瀏覽代碼

Add 'qcom/opensource/graphics-kernel/' from commit 'b4fdc4c04295ac59109ae19d64747522740c3f14'

git-subtree-dir: qcom/opensource/graphics-kernel
git-subtree-mainline: 992813d9c1682e9b78264e75b1f8aad0a3212ec0
git-subtree-split: b4fdc4c04295ac59109ae19d64747522740c3f14
Change-Id:
repo: https://git.codelinaro.org/clo/la/platform/vendor/qcom/opensource/graphics-kernel
tag: GRAPHICS.LA.14.0.r1-07700-lanai.0
David Wronek 8 月之前
父節點
當前提交
880d405719
共有 100 個文件被更改,包括 100803 次插入0 次删除
  1. 35 0
      qcom/opensource/graphics-kernel/Android.bp
  2. 57 0
      qcom/opensource/graphics-kernel/Android.mk
  3. 6 0
      qcom/opensource/graphics-kernel/BUILD.bazel
  4. 159 0
      qcom/opensource/graphics-kernel/Kbuild
  5. 120 0
      qcom/opensource/graphics-kernel/Kconfig
  6. 17 0
      qcom/opensource/graphics-kernel/Makefile
  7. 564 0
      qcom/opensource/graphics-kernel/a3xx_reg.h
  8. 902 0
      qcom/opensource/graphics-kernel/a5xx_reg.h
  9. 1242 0
      qcom/opensource/graphics-kernel/a6xx_reg.h
  10. 2839 0
      qcom/opensource/graphics-kernel/adreno-gpulist.h
  11. 3837 0
      qcom/opensource/graphics-kernel/adreno.c
  12. 2067 0
      qcom/opensource/graphics-kernel/adreno.h
  13. 1514 0
      qcom/opensource/graphics-kernel/adreno_a3xx.c
  14. 76 0
      qcom/opensource/graphics-kernel/adreno_a3xx.h
  15. 65 0
      qcom/opensource/graphics-kernel/adreno_a3xx_coresight.c
  16. 411 0
      qcom/opensource/graphics-kernel/adreno_a3xx_perfcounter.c
  17. 458 0
      qcom/opensource/graphics-kernel/adreno_a3xx_ringbuffer.c
  18. 449 0
      qcom/opensource/graphics-kernel/adreno_a3xx_snapshot.c
  19. 2500 0
      qcom/opensource/graphics-kernel/adreno_a5xx.c
  20. 314 0
      qcom/opensource/graphics-kernel/adreno_a5xx.h
  21. 208 0
      qcom/opensource/graphics-kernel/adreno_a5xx_coresight.c
  22. 1406 0
      qcom/opensource/graphics-kernel/adreno_a5xx_packets.h
  23. 695 0
      qcom/opensource/graphics-kernel/adreno_a5xx_perfcounter.c
  24. 548 0
      qcom/opensource/graphics-kernel/adreno_a5xx_preempt.c
  25. 530 0
      qcom/opensource/graphics-kernel/adreno_a5xx_ringbuffer.c
  26. 1219 0
      qcom/opensource/graphics-kernel/adreno_a5xx_snapshot.c
  27. 2486 0
      qcom/opensource/graphics-kernel/adreno_a6xx.c
  28. 450 0
      qcom/opensource/graphics-kernel/adreno_a6xx.h
  29. 432 0
      qcom/opensource/graphics-kernel/adreno_a6xx_coresight.c
  30. 3863 0
      qcom/opensource/graphics-kernel/adreno_a6xx_gmu.c
  31. 451 0
      qcom/opensource/graphics-kernel/adreno_a6xx_gmu.h
  32. 469 0
      qcom/opensource/graphics-kernel/adreno_a6xx_gmu_snapshot.c
  33. 852 0
      qcom/opensource/graphics-kernel/adreno_a6xx_hfi.c
  34. 188 0
      qcom/opensource/graphics-kernel/adreno_a6xx_hfi.h
  35. 1407 0
      qcom/opensource/graphics-kernel/adreno_a6xx_hwsched.c
  36. 100 0
      qcom/opensource/graphics-kernel/adreno_a6xx_hwsched.h
  37. 2162 0
      qcom/opensource/graphics-kernel/adreno_a6xx_hwsched_hfi.c
  38. 152 0
      qcom/opensource/graphics-kernel/adreno_a6xx_hwsched_hfi.h
  39. 1010 0
      qcom/opensource/graphics-kernel/adreno_a6xx_perfcounter.c
  40. 793 0
      qcom/opensource/graphics-kernel/adreno_a6xx_preempt.c
  41. 1425 0
      qcom/opensource/graphics-kernel/adreno_a6xx_rgmu.c
  42. 106 0
      qcom/opensource/graphics-kernel/adreno_a6xx_rgmu.h
  43. 578 0
      qcom/opensource/graphics-kernel/adreno_a6xx_ringbuffer.c
  44. 515 0
      qcom/opensource/graphics-kernel/adreno_a6xx_rpmh.c
  45. 2306 0
      qcom/opensource/graphics-kernel/adreno_a6xx_snapshot.c
  46. 193 0
      qcom/opensource/graphics-kernel/adreno_compat.c
  47. 46 0
      qcom/opensource/graphics-kernel/adreno_compat.h
  48. 333 0
      qcom/opensource/graphics-kernel/adreno_coresight.c
  49. 164 0
      qcom/opensource/graphics-kernel/adreno_coresight.h
  50. 1047 0
      qcom/opensource/graphics-kernel/adreno_cp_parser.c
  51. 175 0
      qcom/opensource/graphics-kernel/adreno_cp_parser.h
  52. 680 0
      qcom/opensource/graphics-kernel/adreno_debugfs.c
  53. 2884 0
      qcom/opensource/graphics-kernel/adreno_dispatch.c
  54. 112 0
      qcom/opensource/graphics-kernel/adreno_dispatch.h
  55. 678 0
      qcom/opensource/graphics-kernel/adreno_drawctxt.c
  56. 206 0
      qcom/opensource/graphics-kernel/adreno_drawctxt.h
  57. 2253 0
      qcom/opensource/graphics-kernel/adreno_gen7.c
  58. 519 0
      qcom/opensource/graphics-kernel/adreno_gen7.h
  59. 927 0
      qcom/opensource/graphics-kernel/adreno_gen7_0_0_snapshot.h
  60. 1276 0
      qcom/opensource/graphics-kernel/adreno_gen7_11_0_snapshot.h
  61. 752 0
      qcom/opensource/graphics-kernel/adreno_gen7_2_0_snapshot.h
  62. 1425 0
      qcom/opensource/graphics-kernel/adreno_gen7_9_0_snapshot.h
  63. 473 0
      qcom/opensource/graphics-kernel/adreno_gen7_coresight.c
  64. 3437 0
      qcom/opensource/graphics-kernel/adreno_gen7_gmu.c
  65. 510 0
      qcom/opensource/graphics-kernel/adreno_gen7_gmu.h
  66. 317 0
      qcom/opensource/graphics-kernel/adreno_gen7_gmu_snapshot.c
  67. 870 0
      qcom/opensource/graphics-kernel/adreno_gen7_hfi.c
  68. 234 0
      qcom/opensource/graphics-kernel/adreno_gen7_hfi.h
  69. 2063 0
      qcom/opensource/graphics-kernel/adreno_gen7_hwsched.c
  70. 106 0
      qcom/opensource/graphics-kernel/adreno_gen7_hwsched.h
  71. 4302 0
      qcom/opensource/graphics-kernel/adreno_gen7_hwsched_hfi.c
  72. 363 0
      qcom/opensource/graphics-kernel/adreno_gen7_hwsched_hfi.h
  73. 1216 0
      qcom/opensource/graphics-kernel/adreno_gen7_perfcounter.c
  74. 802 0
      qcom/opensource/graphics-kernel/adreno_gen7_preempt.c
  75. 647 0
      qcom/opensource/graphics-kernel/adreno_gen7_ringbuffer.c
  76. 519 0
      qcom/opensource/graphics-kernel/adreno_gen7_rpmh.c
  77. 1811 0
      qcom/opensource/graphics-kernel/adreno_gen7_snapshot.c
  78. 383 0
      qcom/opensource/graphics-kernel/adreno_gen7_snapshot.h
  79. 2777 0
      qcom/opensource/graphics-kernel/adreno_gen8.c
  80. 615 0
      qcom/opensource/graphics-kernel/adreno_gen8.h
  81. 2108 0
      qcom/opensource/graphics-kernel/adreno_gen8_3_0_snapshot.h
  82. 3332 0
      qcom/opensource/graphics-kernel/adreno_gen8_gmu.c
  83. 505 0
      qcom/opensource/graphics-kernel/adreno_gen8_gmu.h
  84. 310 0
      qcom/opensource/graphics-kernel/adreno_gen8_gmu_snapshot.c
  85. 831 0
      qcom/opensource/graphics-kernel/adreno_gen8_hfi.c
  86. 235 0
      qcom/opensource/graphics-kernel/adreno_gen8_hfi.h
  87. 1905 0
      qcom/opensource/graphics-kernel/adreno_gen8_hwsched.c
  88. 106 0
      qcom/opensource/graphics-kernel/adreno_gen8_hwsched.h
  89. 3938 0
      qcom/opensource/graphics-kernel/adreno_gen8_hwsched_hfi.c
  90. 359 0
      qcom/opensource/graphics-kernel/adreno_gen8_hwsched_hfi.h
  91. 1135 0
      qcom/opensource/graphics-kernel/adreno_gen8_perfcounter.c
  92. 807 0
      qcom/opensource/graphics-kernel/adreno_gen8_preempt.c
  93. 649 0
      qcom/opensource/graphics-kernel/adreno_gen8_ringbuffer.c
  94. 517 0
      qcom/opensource/graphics-kernel/adreno_gen8_rpmh.c
  95. 1802 0
      qcom/opensource/graphics-kernel/adreno_gen8_snapshot.c
  96. 656 0
      qcom/opensource/graphics-kernel/adreno_gen8_snapshot.h
  97. 1414 0
      qcom/opensource/graphics-kernel/adreno_hfi.h
  98. 2510 0
      qcom/opensource/graphics-kernel/adreno_hwsched.c
  99. 273 0
      qcom/opensource/graphics-kernel/adreno_hwsched.h
  100. 313 0
      qcom/opensource/graphics-kernel/adreno_ioctl.c

+ 35 - 0
qcom/opensource/graphics-kernel/Android.bp

@@ -0,0 +1,35 @@
+headers_src = [
+    "include/uapi/linux/*.h",
+]
+
+gfx_headers_out = [
+    "linux/msm_kgsl.h",
+]
+
+gfx_kernel_headers_verbose = "--verbose "
+genrule {
+    name: "qti_generate_gfx_kernel_headers",
+    tools: ["headers_install.sh",
+            "unifdef"
+    ],
+    tool_files: [
+         "gfx_kernel_headers.py",
+    ],
+    srcs: headers_src,
+    cmd: "python3 -u $(location gfx_kernel_headers.py) " +
+        gfx_kernel_headers_verbose +
+        "--header_arch arm64 " +
+        "--gen_dir $(genDir) " +
+        "--gfx_include_uapi $(locations include/uapi/linux/*.h) " +
+        "--unifdef $(location unifdef) " +
+        "--headers_install $(location headers_install.sh)",
+    out: gfx_headers_out,
+}
+
+cc_library_headers {
+    name: "qti_gfx_kernel_uapi",
+    generated_headers: ["qti_generate_gfx_kernel_headers"],
+    export_generated_headers: ["qti_generate_gfx_kernel_headers"],
+    vendor: true,
+    recovery_available: true
+}

+ 57 - 0
qcom/opensource/graphics-kernel/Android.mk

@@ -0,0 +1,57 @@
+ifeq ($(TARGET_USES_QMAA),true)
+	KGSL_ENABLED := false
+	ifeq ($(TARGET_USES_QMAA_OVERRIDE_GFX),true)
+		KGSL_ENABLED := true
+	endif # TARGET_USES_QMAA_OVERRIDE_GFX
+else
+	KGSL_ENABLED := true
+endif # TARGET_USES_QMAA
+
+ifeq ($(ENABLE_HYP), true)
+        KGSL_ENABLED := false
+endif
+
+LOCAL_MODULE_DDK_BUILD := true
+LOCAL_MODULE_DDK_ALLOW_UNSAFE_HEADERS := true
+
+ifeq ($(KGSL_ENABLED),true)
+KGSL_SELECT := CONFIG_QCOM_KGSL=m
+
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+# This makefile is only for DLKM
+ifneq ($(findstring vendor,$(LOCAL_PATH)),)
+
+ifeq ($(BOARD_COMMON_DIR),)
+	BOARD_COMMON_DIR := device/qcom/common
+endif
+
+DLKM_DIR   := $(BOARD_COMMON_DIR)/dlkm
+
+KBUILD_OPTIONS += BOARD_PLATFORM=$(TARGET_BOARD_PLATFORM)
+KBUILD_OPTIONS += $(KGSL_SELECT)
+KBUILD_OPTIONS += MODNAME=msm_kgsl
+ifeq ($(TARGET_BOARD_PLATFORM), pineapple)
+	KBUILD_OPTIONS += KBUILD_EXTRA_SYMBOLS+=$(PWD)/$(call intermediates-dir-for,DLKM,hw-fence-module-symvers)/Module.symvers
+endif
+
+include $(CLEAR_VARS)
+# For incremental compilation
+LOCAL_SRC_FILES   := $(wildcard $(LOCAL_PATH)/**/*) $(wildcard $(LOCAL_PATH)/*)
+LOCAL_MODULE      := msm_kgsl.ko
+LOCAL_MODULE_KBUILD_NAME  := msm_kgsl.ko
+LOCAL_MODULE_TAGS         := optional
+LOCAL_MODULE_DEBUG_ENABLE := true
+LOCAL_MODULE_PATH := $(KERNEL_MODULES_OUT)
+
+ifeq ($(TARGET_BOARD_PLATFORM), pineapple)
+	LOCAL_REQUIRED_MODULES    := hw-fence-module-symvers
+	LOCAL_ADDITIONAL_DEPENDENCIES := $(call intermediates-dir-for,DLKM,hw-fence-module-symvers)/Module.symvers
+endif
+# Include msm_kgsl.ko in the /vendor/lib/modules (vendor.img)
+BOARD_VENDOR_KERNEL_MODULES += $(LOCAL_MODULE_PATH)/$(LOCAL_MODULE)
+include $(DLKM_DIR)/Build_external_kernelmodule.mk
+
+endif # DLKM check
+endif # KGSL_ENABLED

+ 6 - 0
qcom/opensource/graphics-kernel/BUILD.bazel

@@ -0,0 +1,6 @@
+load(":build/kgsl_defs.bzl", "define_target_module")
+
+define_target_module("pineapple")
+define_target_module("sun")
+define_target_module("blair")
+define_target_module("monaco")

+ 159 - 0
qcom/opensource/graphics-kernel/Kbuild

@@ -0,0 +1,159 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+KDIR := $(TOP)/kernel_platform/common
+
+ifeq ($(KGSL_PATH),)
+KGSL_PATH=$(src)
+endif
+
+# If we're not GVM and not in an Android tree, select KGSL config
+ifeq ($(CONFIG_QTI_QUIN_GVM),)
+	ifeq ($(ANDROID_BUILD_TOP),)
+		CONFIG_QCOM_KGSL = m
+	endif
+endif
+
+ifeq ($(CONFIG_ARCH_WAIPIO), y)
+	include $(KGSL_PATH)/config/gki_waipiodisp.conf
+endif
+ifeq ($(CONFIG_ARCH_KALAMA), y)
+	include $(KGSL_PATH)/config/gki_kalama.conf
+endif
+ifeq ($(CONFIG_ARCH_PINEAPPLE), y)
+	include $(KGSL_PATH)/config/gki_pineapple.conf
+endif
+ifeq ($(CONFIG_ARCH_BLAIR), y)
+	include $(KGSL_PATH)/config/gki_blair.conf
+endif
+ifeq ($(CONFIG_ARCH_PITTI), y)
+	include $(KGSL_PATH)/config/gki_pitti.conf
+endif
+ifeq ($(CONFIG_ARCH_SA8155), y)
+	include $(KGSL_PATH)/config/gki_sa8155.conf
+endif
+ifeq ($(CONFIG_ARCH_KHAJE), y)
+	include $(KGSL_PATH)/config/gki_khajedisp.conf
+endif
+ifeq ($(CONFIG_ARCH_SA8195), y)
+	include $(KGSL_PATH)/config/gki_sa8155.conf
+endif
+ifeq ($(CONFIG_ARCH_SA6155), y)
+	include $(KGSL_PATH)/config/gki_sa8155.conf
+endif
+ifeq ($(CONFIG_ARCH_MONACO), y)
+	include $(KGSL_PATH)/config/gki_monaco.conf
+endif
+ifeq ($(CONFIG_ARCH_LEMANS), y)
+	include $(KGSL_PATH)/config/gki_lemans.conf
+endif
+ifeq ($(CONFIG_ARCH_KONA), y)
+        include $(KGSL_PATH)/config/gki_kona.conf
+endif
+ifeq ($(CONFIG_ARCH_TRINKET), y)
+	include $(KGSL_PATH)/config/gki_trinket.conf
+endif
+ifeq ($(CONFIG_ARCH_QCS405), y)
+	include $(KGSL_PATH)/config/gki_qcs405.conf
+endif
+ifeq ($(CONFIG_ARCH_HOLI), y)
+	include $(KGSL_PATH)/config/gki_blair.conf
+endif
+
+ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq
+
+obj-$(CONFIG_QCOM_KGSL) += msm_kgsl.o
+
+msm_kgsl-y = \
+	kgsl.o \
+	kgsl_bus.o \
+	kgsl_drawobj.o \
+	kgsl_events.o \
+	kgsl_eventlog.o \
+	kgsl_gmu_core.o \
+	kgsl_ioctl.o \
+	kgsl_mmu.o \
+	kgsl_pwrctrl.o \
+	kgsl_pwrscale.o \
+	kgsl_regmap.o \
+	kgsl_sharedmem.o \
+	kgsl_snapshot.o \
+	kgsl_timeline.o \
+	kgsl_trace.o \
+	kgsl_util.o \
+	kgsl_vbo.o
+
+msm_kgsl-$(CONFIG_COMPAT) += kgsl_compat.o
+msm_kgsl-$(CONFIG_DEBUG_FS) += kgsl_debugfs.o
+msm_kgsl-$(CONFIG_ARM_SMMU) += kgsl_iommu.o
+msm_kgsl-$(CONFIG_SYNC_FILE) += kgsl_sync.o
+msm_kgsl-$(CONFIG_QCOM_KGSL_PROCESS_RECLAIM) += kgsl_reclaim.o
+
+ifndef CONFIG_QCOM_KGSL_USE_SHMEM
+	msm_kgsl-y += kgsl_pool.o
+endif
+
+msm_kgsl-y += \
+	adreno.o \
+	adreno_a3xx.o \
+	adreno_a3xx_perfcounter.o \
+	adreno_a3xx_ringbuffer.o \
+	adreno_a3xx_snapshot.o \
+	adreno_a5xx.o \
+	adreno_a5xx_perfcounter.o \
+	adreno_a5xx_preempt.o \
+	adreno_a5xx_ringbuffer.o \
+	adreno_a5xx_snapshot.o \
+	adreno_a6xx.o \
+	adreno_a6xx_gmu.o \
+	adreno_a6xx_gmu_snapshot.o \
+	adreno_a6xx_hfi.o \
+	adreno_a6xx_hwsched.o \
+	adreno_a6xx_hwsched_hfi.o \
+	adreno_a6xx_perfcounter.o \
+	adreno_a6xx_preempt.o \
+	adreno_a6xx_rgmu.o \
+	adreno_a6xx_ringbuffer.o \
+	adreno_a6xx_rpmh.o \
+	adreno_a6xx_snapshot.o \
+	adreno_cp_parser.o \
+	adreno_dispatch.o \
+	adreno_drawctxt.o \
+	adreno_gen7.o \
+	adreno_gen7_gmu.o \
+	adreno_gen7_gmu_snapshot.o \
+	adreno_gen7_hfi.o \
+	adreno_gen7_hwsched.o \
+	adreno_gen7_hwsched_hfi.o \
+	adreno_gen7_perfcounter.o \
+	adreno_gen7_preempt.o \
+	adreno_gen7_ringbuffer.o \
+	adreno_gen7_rpmh.o \
+	adreno_gen7_snapshot.o \
+	adreno_gen8.o \
+	adreno_gen8_gmu.o \
+	adreno_gen8_gmu_snapshot.o \
+	adreno_gen8_hfi.o \
+	adreno_gen8_hwsched.o \
+	adreno_gen8_hwsched_hfi.o \
+	adreno_gen8_perfcounter.o \
+	adreno_gen8_preempt.o \
+	adreno_gen8_ringbuffer.o \
+	adreno_gen8_rpmh.o \
+	adreno_gen8_snapshot.o \
+	adreno_hwsched.o \
+	adreno_ioctl.o \
+	adreno_perfcounter.o \
+	adreno_ringbuffer.o \
+	adreno_snapshot.o \
+	adreno_sysfs.o \
+	adreno_trace.o \
+	governor_msm_adreno_tz.o \
+	governor_gpubw_mon.o
+
+msm_kgsl-$(CONFIG_COMPAT) += adreno_compat.o
+msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_coresight.o
+msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_a3xx_coresight.o
+msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_a5xx_coresight.o
+msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_a6xx_coresight.o
+msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_gen7_coresight.o
+msm_kgsl-$(CONFIG_DEBUG_FS) += adreno_debugfs.o adreno_profile.o

+ 120 - 0
qcom/opensource/graphics-kernel/Kconfig

@@ -0,0 +1,120 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config QCOM_KGSL
+	tristate "Qualcomm Technologies, Inc. 3D Graphics driver"
+	depends on ARCH_QCOM
+	depends on NVMEM_QCOM_QFPROM || QCOM_QFPROM
+	select QCOM_MDT_LOADER
+	select INTERVAL_TREE
+	select TRACE_GPU_MEM
+	help
+	  3D graphics driver for the Adreno family of GPUs from QTI.
+	  Required to use hardware accelerated OpenGL, compute and Vulkan
+	  on QTI targets. This includes power management, memory management,
+	  and scheduling for the Adreno GPUs.
+
+config DEVFREQ_GOV_QCOM_ADRENO_TZ
+	tristate "Qualcomm Technologies, Inc. GPU frequency governor"
+	depends on PM_DEVFREQ
+	help
+	  GPU frequency governor for the Adreno GPU. Sets the frequency
+	  using an "on demand" algorithm in conjunction with other
+	  components on Adreno platforms. This is not useful for non-Adreno
+	  devices.
+
+config DEVFREQ_GOV_QCOM_GPUBW_MON
+	tristate "Qualcomm Technologies, Inc. GPU bandwidth governor"
+	depends on DEVFREQ_GOV_QCOM_ADRENO_TZ
+	help
+	  This governor works together with the Adreno GPU governor to
+	  select bus frequency votes using an "on-demand" algorithm.
+	  This governor will not be useful for non-Adreno based
+	  targets.
+
+config QCOM_KGSL_FENCE_TRACE
+	bool "Enable built-in tracing for adreno fence timeouts"
+	help
+	  A boolean flag used to create a KGSL-specific tracing instance
+	  under <tracefs>/tracing/instances/kgsl-fence that can be used
+	  for debugging timeouts for fences between KGSL-contexts and
+	  sync-point blocks. If unsure, say 'N' here.
+
+config QCOM_ADRENO_DEFAULT_GOVERNOR
+	string "devfreq governor for the adreno core"
+	default "msm-adreno-tz"
+
+config QCOM_KGSL_CORESIGHT
+	bool "Enable coresight support for the Adreno GPU"
+	depends on CORESIGHT
+	default y
+	help
+	  When enabled, the Adreno GPU is available as a source for Coresight
+	  data. On a6xx targets there are two sources available for the GX and
+	  CX domains respectively. Debug kernels should say 'Y' here.
+
+config QCOM_KGSL_IOCOHERENCY_DEFAULT
+	bool "Enable I/O coherency on cached GPU memory by default"
+	default y if ARCH_LAHAINA
+	help
+	 Say 'Y' here to enable I/O cache coherency by default on targets that
+	 support hardware I/O coherency. If enabled all cached GPU memory
+	 will use I/O coherency regardless of the user flags. If not enabled
+	 the user can still selectively enable I/O coherency with a flag.
+
+config QCOM_KGSL_IDLE_TIMEOUT
+	int
+	default 80
+	help
+	  GPU idle timeout for Adreno GPU. This value decides after how
+	  long the GPU will go into slumber. A higher value will mean that
+	  the GPU is powered ON for a longer duration which will have
+	  power costs.
+
+config QCOM_KGSL_CONTEXT_DEBUG
+	bool "Log kgsl context information for all processes"
+	help
+	  When enabled, total number of KGSL contexts, number of attached and
+	  detached contexts are dumped into kernel log for all the processes.
+	  This gives insight about the number of contexts held by each process.
+
+config QCOM_KGSL_SORT_POOL
+	bool "Sort pool page list based on physical address"
+	default y
+	help
+	  When enabled, the pool page list is sorted based on physical
+	  addresses. This can be turned on for targets where better DDR
+	  efficiency is attained on accesses for adjacent memory.
+
+config QCOM_KGSL_QDSS_STM
+	bool "Enable support for QDSS STM for Adreno GPU"
+	depends on CORESIGHT
+	help
+	  When enabled, the Adreno GPU QDSS STM support is enabled. GPU QDSS STM
+	  memory will be mapped to GPU and QDSS clock needed to access this memory
+	  is voted. Debug kernels should say 'Y' here.
+
+config QCOM_KGSL_USE_SHMEM
+	bool "Enable using shmem for memory allocations"
+	depends on SHMEM
+	help
+	  Say 'Y' to enable using shmem for memory allocations. If enabled,
+	  there will be no support for the memory pools and higher order pages.
+	  But using shmem will help in making kgsl pages available for
+	  reclaiming.
+
+config QCOM_KGSL_PROCESS_RECLAIM
+	bool "Make driver pages available for reclaim"
+	select QCOM_KGSL_USE_SHMEM
+	help
+	  Say 'Y' to make driver pages available for reclaiming. If enabled,
+	  shmem will be used for allocation. kgsl would know the process
+	  foreground/background activity through the sysfs entry exposed per
+	  process. Based on this kgsl can unpin given number of pages from
+	  background processes and make them available to the shrinker.
+
+config QCOM_KGSL_HIBERNATION
+	bool "Enable Hibernation support in KGSL"
+	depends on HIBERNATION
+	help
+	  Say 'Y' to enable hibernation support in kgsl. If enabled, kgsl
+	  will register necessary power manager callbacks to support
+	  hibernation.

+ 17 - 0
qcom/opensource/graphics-kernel/Makefile

@@ -0,0 +1,17 @@
+ifeq ($(KGSL_MODULE_ROOT),)
+CUR_MKFILE = $(abspath $(lastword $(MAKEFILE_LIST)))
+KGSL_MODULE_ROOT = $(dir $(CUR_MKFILE))
+endif
+
+KBUILD_OPTIONS+=KGSL_PATH=$(KGSL_MODULE_ROOT)
+
+all: modules
+
+modules_install:
+	$(MAKE) INSTALL_MOD_STRIP=1 -C $(KERNEL_SRC) M=$(M) modules_install
+
+clean:
+	rm -f *.cmd *.d *.mod *.o *.ko *.mod.c *.mod.o Module.symvers modules.order
+
+%:
+	$(MAKE) -C $(KERNEL_SRC) M=$(M) $@ $(KBUILD_OPTIONS)

+ 564 - 0
qcom/opensource/graphics-kernel/a3xx_reg.h

@@ -0,0 +1,564 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2012-2017,2019-2020, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _A300_REG_H
+#define _A300_REG_H
+
+/* Interrupt bit positions within RBBM_INT_0 */
+
+#define A3XX_INT_RBBM_GPU_IDLE 0
+#define A3XX_INT_RBBM_AHB_ERROR 1
+#define A3XX_INT_RBBM_REG_TIMEOUT 2
+#define A3XX_INT_RBBM_ME_MS_TIMEOUT 3
+#define A3XX_INT_RBBM_PFP_MS_TIMEOUT 4
+#define A3XX_INT_RBBM_ATB_BUS_OVERFLOW 5
+#define A3XX_INT_VFD_ERROR 6
+#define A3XX_INT_CP_SW_INT 7
+#define A3XX_INT_CP_T0_PACKET_IN_IB 8
+#define A3XX_INT_CP_OPCODE_ERROR 9
+#define A3XX_INT_CP_RESERVED_BIT_ERROR 10
+#define A3XX_INT_CP_HW_FAULT 11
+#define A3XX_INT_CP_DMA 12
+#define A3XX_INT_CP_IB2_INT 13
+#define A3XX_INT_CP_IB1_INT 14
+#define A3XX_INT_CP_RB_INT 15
+#define A3XX_INT_CP_REG_PROTECT_FAULT 16
+#define A3XX_INT_CP_RB_DONE_TS 17
+#define A3XX_INT_CP_VS_DONE_TS 18
+#define A3XX_INT_CP_PS_DONE_TS 19
+#define A3XX_INT_CACHE_FLUSH_TS 20
+#define A3XX_INT_CP_AHB_ERROR_HALT 21
+#define A3XX_INT_MISC_HANG_DETECT 24
+#define A3XX_INT_UCHE_OOB_ACCESS 25
+
+/* Register definitions */
+
+#define A3XX_RBBM_CLOCK_CTL 0x010
+#define A3XX_RBBM_SP_HYST_CNT 0x012
+#define A3XX_RBBM_SW_RESET_CMD 0x018
+#define A3XX_RBBM_AHB_CTL0 0x020
+#define A3XX_RBBM_AHB_CTL1 0x021
+#define A3XX_RBBM_AHB_CMD 0x022
+#define A3XX_RBBM_AHB_ERROR_STATUS 0x027
+#define A3XX_RBBM_GPR0_CTL 0x02E
+/* This the same register as on A2XX, just in a different place */
+#define A3XX_RBBM_STATUS 0x030
+#define A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x33
+#define A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x50
+#define A3XX_RBBM_INT_CLEAR_CMD 0x061
+#define A3XX_RBBM_INT_0_MASK 0x063
+#define A3XX_RBBM_INT_0_STATUS 0x064
+#define A3XX_RBBM_PERFCTR_CTL 0x80
+#define A3XX_RBBM_PERFCTR_LOAD_CMD0 0x81
+#define A3XX_RBBM_PERFCTR_LOAD_CMD1 0x82
+#define A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x84
+#define A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x85
+#define A3XX_RBBM_PERFCOUNTER0_SELECT 0x86
+#define A3XX_RBBM_PERFCOUNTER1_SELECT 0x87
+#define A3XX_RBBM_GPU_BUSY_MASKED 0x88
+#define A3XX_RBBM_PERFCTR_CP_0_LO 0x90
+#define A3XX_RBBM_PERFCTR_CP_0_HI 0x91
+#define A3XX_RBBM_PERFCTR_RBBM_0_LO 0x92
+#define A3XX_RBBM_PERFCTR_RBBM_0_HI 0x93
+#define A3XX_RBBM_PERFCTR_RBBM_1_LO 0x94
+#define A3XX_RBBM_PERFCTR_RBBM_1_HI 0x95
+#define A3XX_RBBM_PERFCTR_PC_0_LO 0x96
+#define A3XX_RBBM_PERFCTR_PC_0_HI 0x97
+#define A3XX_RBBM_PERFCTR_PC_1_LO 0x98
+#define A3XX_RBBM_PERFCTR_PC_1_HI 0x99
+#define A3XX_RBBM_PERFCTR_PC_2_LO 0x9A
+#define A3XX_RBBM_PERFCTR_PC_2_HI 0x9B
+#define A3XX_RBBM_PERFCTR_PC_3_LO 0x9C
+#define A3XX_RBBM_PERFCTR_PC_3_HI 0x9D
+#define A3XX_RBBM_PERFCTR_VFD_0_LO 0x9E
+#define A3XX_RBBM_PERFCTR_VFD_0_HI 0x9F
+#define A3XX_RBBM_PERFCTR_VFD_1_LO 0xA0
+#define A3XX_RBBM_PERFCTR_VFD_1_HI 0xA1
+#define A3XX_RBBM_PERFCTR_HLSQ_0_LO 0xA2
+#define A3XX_RBBM_PERFCTR_HLSQ_0_HI 0xA3
+#define A3XX_RBBM_PERFCTR_HLSQ_1_LO 0xA4
+#define A3XX_RBBM_PERFCTR_HLSQ_1_HI 0xA5
+#define A3XX_RBBM_PERFCTR_HLSQ_2_LO 0xA6
+#define A3XX_RBBM_PERFCTR_HLSQ_2_HI 0xA7
+#define A3XX_RBBM_PERFCTR_HLSQ_3_LO 0xA8
+#define A3XX_RBBM_PERFCTR_HLSQ_3_HI 0xA9
+#define A3XX_RBBM_PERFCTR_HLSQ_4_LO 0xAA
+#define A3XX_RBBM_PERFCTR_HLSQ_4_HI 0xAB
+#define A3XX_RBBM_PERFCTR_HLSQ_5_LO 0xAC
+#define A3XX_RBBM_PERFCTR_HLSQ_5_HI 0xAD
+#define A3XX_RBBM_PERFCTR_VPC_0_LO 0xAE
+#define A3XX_RBBM_PERFCTR_VPC_0_HI 0xAF
+#define A3XX_RBBM_PERFCTR_VPC_1_LO 0xB0
+#define A3XX_RBBM_PERFCTR_VPC_1_HI 0xB1
+#define A3XX_RBBM_PERFCTR_TSE_0_LO 0xB2
+#define A3XX_RBBM_PERFCTR_TSE_0_HI 0xB3
+#define A3XX_RBBM_PERFCTR_TSE_1_LO 0xB4
+#define A3XX_RBBM_PERFCTR_TSE_1_HI 0xB5
+#define A3XX_RBBM_PERFCTR_RAS_0_LO 0xB6
+#define A3XX_RBBM_PERFCTR_RAS_0_HI 0xB7
+#define A3XX_RBBM_PERFCTR_RAS_1_LO 0xB8
+#define A3XX_RBBM_PERFCTR_RAS_1_HI 0xB9
+#define A3XX_RBBM_PERFCTR_UCHE_0_LO 0xBA
+#define A3XX_RBBM_PERFCTR_UCHE_0_HI 0xBB
+#define A3XX_RBBM_PERFCTR_UCHE_1_LO 0xBC
+#define A3XX_RBBM_PERFCTR_UCHE_1_HI 0xBD
+#define A3XX_RBBM_PERFCTR_UCHE_2_LO 0xBE
+#define A3XX_RBBM_PERFCTR_UCHE_2_HI 0xBF
+#define A3XX_RBBM_PERFCTR_UCHE_3_LO 0xC0
+#define A3XX_RBBM_PERFCTR_UCHE_3_HI 0xC1
+#define A3XX_RBBM_PERFCTR_UCHE_4_LO 0xC2
+#define A3XX_RBBM_PERFCTR_UCHE_4_HI 0xC3
+#define A3XX_RBBM_PERFCTR_UCHE_5_LO 0xC4
+#define A3XX_RBBM_PERFCTR_UCHE_5_HI 0xC5
+#define A3XX_RBBM_PERFCTR_TP_0_LO 0xC6
+#define A3XX_RBBM_PERFCTR_TP_0_HI 0xC7
+#define A3XX_RBBM_PERFCTR_TP_1_LO 0xC8
+#define A3XX_RBBM_PERFCTR_TP_1_HI 0xC9
+#define A3XX_RBBM_PERFCTR_TP_2_LO 0xCA
+#define A3XX_RBBM_PERFCTR_TP_2_HI 0xCB
+#define A3XX_RBBM_PERFCTR_TP_3_LO 0xCC
+#define A3XX_RBBM_PERFCTR_TP_3_HI 0xCD
+#define A3XX_RBBM_PERFCTR_TP_4_LO 0xCE
+#define A3XX_RBBM_PERFCTR_TP_4_HI 0xCF
+#define A3XX_RBBM_PERFCTR_TP_5_LO 0xD0
+#define A3XX_RBBM_PERFCTR_TP_5_HI 0xD1
+#define A3XX_RBBM_PERFCTR_SP_0_LO 0xD2
+#define A3XX_RBBM_PERFCTR_SP_0_HI 0xD3
+#define A3XX_RBBM_PERFCTR_SP_1_LO 0xD4
+#define A3XX_RBBM_PERFCTR_SP_1_HI 0xD5
+#define A3XX_RBBM_PERFCTR_SP_2_LO 0xD6
+#define A3XX_RBBM_PERFCTR_SP_2_HI 0xD7
+#define A3XX_RBBM_PERFCTR_SP_3_LO 0xD8
+#define A3XX_RBBM_PERFCTR_SP_3_HI 0xD9
+#define A3XX_RBBM_PERFCTR_SP_4_LO 0xDA
+#define A3XX_RBBM_PERFCTR_SP_4_HI 0xDB
+#define A3XX_RBBM_PERFCTR_SP_5_LO 0xDC
+#define A3XX_RBBM_PERFCTR_SP_5_HI 0xDD
+#define A3XX_RBBM_PERFCTR_SP_6_LO 0xDE
+#define A3XX_RBBM_PERFCTR_SP_6_HI 0xDF
+#define A3XX_RBBM_PERFCTR_SP_7_LO 0xE0
+#define A3XX_RBBM_PERFCTR_SP_7_HI 0xE1
+#define A3XX_RBBM_PERFCTR_RB_0_LO 0xE2
+#define A3XX_RBBM_PERFCTR_RB_0_HI 0xE3
+#define A3XX_RBBM_PERFCTR_RB_1_LO 0xE4
+#define A3XX_RBBM_PERFCTR_RB_1_HI 0xE5
+
+#define A3XX_RBBM_RBBM_CTL 0x100
+#define A3XX_RBBM_PERFCTR_PWR_0_LO 0x0EA
+#define A3XX_RBBM_PERFCTR_PWR_0_HI 0x0EB
+#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC
+#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED
+#define A3XX_RBBM_DEBUG_BUS_CTL 0x111
+#define A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x112
+#define A3XX_RBBM_DEBUG_BUS_STB_CTL0 0x11B
+#define A3XX_RBBM_DEBUG_BUS_STB_CTL1 0x11C
+#define A3XX_RBBM_INT_TRACE_BUS_CTL 0x11D
+#define A3XX_RBBM_EXT_TRACE_BUS_CTL 0x11E
+#define A3XX_RBBM_EXT_TRACE_STOP_CNT 0x11F
+#define A3XX_RBBM_EXT_TRACE_START_CNT 0x120
+#define A3XX_RBBM_EXT_TRACE_PERIOD_CNT 0x121
+#define A3XX_RBBM_EXT_TRACE_CMD 0x122
+#define A3XX_CP_RB_BASE 0x01C0
+#define A3XX_CP_RB_CNTL 0x01C1
+#define A3XX_CP_RB_RPTR 0x01C4
+#define A3XX_CP_RB_WPTR 0x01C5
+/* Following two are same as on A2XX, just in a different place */
+#define A3XX_CP_PFP_UCODE_ADDR 0x1C9
+#define A3XX_CP_PFP_UCODE_DATA 0x1CA
+#define A3XX_CP_ROQ_ADDR 0x1CC
+#define A3XX_CP_ROQ_DATA 0x1CD
+#define A3XX_CP_MERCIU_ADDR 0x1D1
+#define A3XX_CP_MERCIU_DATA 0x1D2
+#define A3XX_CP_MERCIU_DATA2 0x1D3
+#define A3XX_CP_QUEUE_THRESHOLDS 0x01D5
+#define A3XX_CP_MEQ_ADDR 0x1DA
+#define A3XX_CP_MEQ_DATA 0x1DB
+#define A3XX_CP_STATE_DEBUG_INDEX 0x01EC
+#define A3XX_CP_STATE_DEBUG_DATA 0x01ED
+#define A3XX_CP_CNTL 0x01F4
+#define A3XX_CP_WFI_PEND_CTR 0x01F5
+#define A3XX_CP_ME_CNTL 0x01F6
+#define A3XX_CP_ME_STATUS 0x01F7
+#define A3XX_CP_ME_RAM_WADDR 0x01F8
+#define A3XX_CP_ME_RAM_RADDR 0x01F9
+#define A3XX_CP_ME_RAM_DATA 0x01FA
+#define A3XX_CP_DEBUG 0x01FC
+
+#define A3XX_RBBM_PM_OVERRIDE2 0x039D
+
+#define A3XX_CP_PERFCOUNTER_SELECT 0x445
+#define A3XX_CP_IB1_BASE 0x0458
+#define A3XX_CP_IB1_BUFSZ 0x0459
+#define A3XX_CP_IB2_BASE 0x045A
+#define A3XX_CP_IB2_BUFSZ 0x045B
+
+#define A3XX_CP_HW_FAULT  0x45C
+#define A3XX_CP_PROTECT_CTRL 0x45E
+#define A3XX_CP_PROTECT_STATUS 0x45F
+#define A3XX_CP_PROTECT_REG_0 0x460
+#define A3XX_CP_STAT 0x047F
+#define A3XX_CP_SCRATCH_REG0 0x578
+#define A3XX_CP_SCRATCH_REG6 0x57E
+#define A3XX_CP_SCRATCH_REG7 0x57F
+#define A3XX_VSC_SIZE_ADDRESS 0xC02
+#define A3XX_VSC_PIPE_DATA_ADDRESS_0 0xC07
+#define A3XX_VSC_PIPE_DATA_LENGTH_0 0xC08
+#define A3XX_VSC_PIPE_DATA_ADDRESS_1 0xC0A
+#define A3XX_VSC_PIPE_DATA_LENGTH_1 0xC0B
+#define A3XX_VSC_PIPE_DATA_ADDRESS_2 0xC0D
+#define A3XX_VSC_PIPE_DATA_LENGTH_2 0xC0E
+#define A3XX_VSC_PIPE_DATA_ADDRESS_3 0xC10
+#define A3XX_VSC_PIPE_DATA_LENGTH_3 0xC11
+#define A3XX_VSC_PIPE_DATA_ADDRESS_4 0xC13
+#define A3XX_VSC_PIPE_DATA_LENGTH_4 0xC14
+#define A3XX_VSC_PIPE_DATA_ADDRESS_5 0xC16
+#define A3XX_VSC_PIPE_DATA_LENGTH_5 0xC17
+#define A3XX_VSC_PIPE_DATA_ADDRESS_6 0xC19
+#define A3XX_VSC_PIPE_DATA_LENGTH_6 0xC1A
+#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C
+#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D
+#define A3XX_PC_PERFCOUNTER0_SELECT 0xC48
+#define A3XX_PC_PERFCOUNTER1_SELECT 0xC49
+#define A3XX_PC_PERFCOUNTER2_SELECT 0xC4A
+#define A3XX_PC_PERFCOUNTER3_SELECT 0xC4B
+#define A3XX_GRAS_TSE_DEBUG_ECO 0xC81
+#define A3XX_GRAS_PERFCOUNTER0_SELECT 0xC88
+#define A3XX_GRAS_PERFCOUNTER1_SELECT 0xC89
+#define A3XX_GRAS_PERFCOUNTER2_SELECT 0xC8A
+#define A3XX_GRAS_PERFCOUNTER3_SELECT 0xC8B
+#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0
+#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1
+#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2
+#define A3XX_GRAS_CL_USER_PLANE_W0 0xCA3
+#define A3XX_GRAS_CL_USER_PLANE_X1 0xCA4
+#define A3XX_GRAS_CL_USER_PLANE_Y1 0xCA5
+#define A3XX_GRAS_CL_USER_PLANE_Z1 0xCA6
+#define A3XX_GRAS_CL_USER_PLANE_W1 0xCA7
+#define A3XX_GRAS_CL_USER_PLANE_X2 0xCA8
+#define A3XX_GRAS_CL_USER_PLANE_Y2 0xCA9
+#define A3XX_GRAS_CL_USER_PLANE_Z2 0xCAA
+#define A3XX_GRAS_CL_USER_PLANE_W2 0xCAB
+#define A3XX_GRAS_CL_USER_PLANE_X3 0xCAC
+#define A3XX_GRAS_CL_USER_PLANE_Y3 0xCAD
+#define A3XX_GRAS_CL_USER_PLANE_Z3 0xCAE
+#define A3XX_GRAS_CL_USER_PLANE_W3 0xCAF
+#define A3XX_GRAS_CL_USER_PLANE_X4 0xCB0
+#define A3XX_GRAS_CL_USER_PLANE_Y4 0xCB1
+#define A3XX_GRAS_CL_USER_PLANE_Z4 0xCB2
+#define A3XX_GRAS_CL_USER_PLANE_W4 0xCB3
+#define A3XX_GRAS_CL_USER_PLANE_X5 0xCB4
+#define A3XX_GRAS_CL_USER_PLANE_Y5 0xCB5
+#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6
+#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7
+#define A3XX_RB_GMEM_BASE_ADDR 0xCC0
+#define A3XX_RB_DEBUG_ECO_CONTROLS_ADDR 0xCC1
+#define A3XX_RB_PERFCOUNTER0_SELECT   0xCC6
+#define A3XX_RB_PERFCOUNTER1_SELECT   0xCC7
+#define A3XX_RB_FRAME_BUFFER_DIMENSION 0xCE0
+#define A3XX_SQ_GPR_MANAGEMENT 0x0D00
+#define A3XX_SQ_INST_STORE_MANAGEMENT 0x0D02
+#define A3XX_HLSQ_PERFCOUNTER0_SELECT 0xE00
+#define A3XX_HLSQ_PERFCOUNTER1_SELECT 0xE01
+#define A3XX_HLSQ_PERFCOUNTER2_SELECT 0xE02
+#define A3XX_HLSQ_PERFCOUNTER3_SELECT 0xE03
+#define A3XX_HLSQ_PERFCOUNTER4_SELECT 0xE04
+#define A3XX_HLSQ_PERFCOUNTER5_SELECT 0xE05
+#define A3XX_TP0_CHICKEN 0x0E1E
+#define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44
+#define A3XX_VFD_PERFCOUNTER1_SELECT 0xE45
+#define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61
+#define A3XX_VPC_VPC_DEBUG_RAM_READ 0xE62
+#define A3XX_VPC_PERFCOUNTER0_SELECT 0xE64
+#define A3XX_VPC_PERFCOUNTER1_SELECT 0xE65
+#define A3XX_UCHE_CACHE_MODE_CONTROL_REG 0xE82
+#define A3XX_UCHE_PERFCOUNTER0_SELECT 0xE84
+#define A3XX_UCHE_PERFCOUNTER1_SELECT 0xE85
+#define A3XX_UCHE_PERFCOUNTER2_SELECT 0xE86
+#define A3XX_UCHE_PERFCOUNTER3_SELECT 0xE87
+#define A3XX_UCHE_PERFCOUNTER4_SELECT 0xE88
+#define A3XX_UCHE_PERFCOUNTER5_SELECT 0xE89
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG 0xEA1
+#define A3XX_UCHE_CACHE_WAYS_VFD 0xEA6
+#define A3XX_SP_PERFCOUNTER0_SELECT 0xEC4
+#define A3XX_SP_PERFCOUNTER1_SELECT 0xEC5
+#define A3XX_SP_PERFCOUNTER2_SELECT 0xEC6
+#define A3XX_SP_PERFCOUNTER3_SELECT 0xEC7
+#define A3XX_SP_PERFCOUNTER4_SELECT 0xEC8
+#define A3XX_SP_PERFCOUNTER5_SELECT 0xEC9
+#define A3XX_SP_PERFCOUNTER6_SELECT 0xECA
+#define A3XX_SP_PERFCOUNTER7_SELECT 0xECB
+#define A3XX_TP_PERFCOUNTER0_SELECT 0xF04
+#define A3XX_TP_PERFCOUNTER1_SELECT 0xF05
+#define A3XX_TP_PERFCOUNTER2_SELECT 0xF06
+#define A3XX_TP_PERFCOUNTER3_SELECT 0xF07
+#define A3XX_TP_PERFCOUNTER4_SELECT 0xF08
+#define A3XX_TP_PERFCOUNTER5_SELECT 0xF09
+#define A3XX_GRAS_CL_CLIP_CNTL 0x2040
+#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044
+#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048
+#define A3XX_GRAS_CL_VPORT_XSCALE 0x2049
+#define A3XX_GRAS_CL_VPORT_YOFFSET 0x204A
+#define A3XX_GRAS_CL_VPORT_YSCALE 0x204B
+#define A3XX_GRAS_CL_VPORT_ZOFFSET 0x204C
+#define A3XX_GRAS_CL_VPORT_ZSCALE 0x204D
+#define A3XX_GRAS_SU_POINT_MINMAX 0x2068
+#define A3XX_GRAS_SU_POINT_SIZE 0x2069
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x206C
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x206D
+#define A3XX_GRAS_SU_MODE_CONTROL 0x2070
+#define A3XX_GRAS_SC_CONTROL 0x2072
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x2074
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x2075
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x2079
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x207A
+#define A3XX_RB_MODE_CONTROL 0x20C0
+#define A3XX_RB_RENDER_CONTROL 0x20C1
+#define A3XX_RB_MSAA_CONTROL 0x20C2
+#define A3XX_RB_ALPHA_REFERENCE 0x20C3
+#define A3XX_RB_MRT_CONTROL0 0x20C4
+#define A3XX_RB_MRT_BUF_INFO0 0x20C5
+#define A3XX_RB_MRT_BUF_BASE0 0x20C6
+#define A3XX_RB_MRT_BLEND_CONTROL0 0x20C7
+#define A3XX_RB_MRT_CONTROL1 0x20C8
+#define A3XX_RB_MRT_BUF_INFO1 0x20C9
+#define A3XX_RB_MRT_BUF_BASE1 0x20CA
+#define A3XX_RB_MRT_BLEND_CONTROL1 0x20CB
+#define A3XX_RB_MRT_CONTROL2 0x20CC
+#define A3XX_RB_MRT_BUF_INFO2 0x20CD
+#define A3XX_RB_MRT_BUF_BASE2 0x20CE
+#define A3XX_RB_MRT_BLEND_CONTROL2 0x20CF
+#define A3XX_RB_MRT_CONTROL3 0x20D0
+#define A3XX_RB_MRT_BUF_INFO3 0x20D1
+#define A3XX_RB_MRT_BUF_BASE3 0x20D2
+#define A3XX_RB_MRT_BLEND_CONTROL3 0x20D3
+#define A3XX_RB_BLEND_RED 0x20E4
+#define A3XX_RB_BLEND_GREEN 0x20E5
+#define A3XX_RB_BLEND_BLUE 0x20E6
+#define A3XX_RB_BLEND_ALPHA 0x20E7
+#define A3XX_RB_CLEAR_COLOR_DW0 0x20E8
+#define A3XX_RB_CLEAR_COLOR_DW1 0x20E9
+#define A3XX_RB_CLEAR_COLOR_DW2 0x20EA
+#define A3XX_RB_CLEAR_COLOR_DW3 0x20EB
+#define A3XX_RB_COPY_CONTROL 0x20EC
+#define A3XX_RB_COPY_DEST_BASE 0x20ED
+#define A3XX_RB_COPY_DEST_PITCH 0x20EE
+#define A3XX_RB_COPY_DEST_INFO 0x20EF
+#define A3XX_RB_DEPTH_CONTROL 0x2100
+#define A3XX_RB_DEPTH_CLEAR 0x2101
+#define A3XX_RB_DEPTH_BUF_INFO 0x2102
+#define A3XX_RB_DEPTH_BUF_PITCH 0x2103
+#define A3XX_RB_STENCIL_CONTROL 0x2104
+#define A3XX_RB_STENCIL_CLEAR 0x2105
+#define A3XX_RB_STENCIL_BUF_INFO 0x2106
+#define A3XX_RB_STENCIL_BUF_PITCH 0x2107
+#define A3XX_RB_STENCIL_REF_MASK 0x2108
+#define A3XX_RB_STENCIL_REF_MASK_BF 0x2109
+#define A3XX_RB_LRZ_VSC_CONTROL 0x210C
+#define A3XX_RB_WINDOW_OFFSET 0x210E
+#define A3XX_RB_SAMPLE_COUNT_CONTROL 0x2110
+#define A3XX_RB_SAMPLE_COUNT_ADDR 0x2111
+#define A3XX_RB_Z_CLAMP_MIN 0x2114
+#define A3XX_RB_Z_CLAMP_MAX 0x2115
+#define A3XX_HLSQ_CONTROL_0_REG 0x2200
+#define A3XX_HLSQ_CONTROL_1_REG 0x2201
+#define A3XX_HLSQ_CONTROL_2_REG 0x2202
+#define A3XX_HLSQ_CONTROL_3_REG 0x2203
+#define A3XX_HLSQ_VS_CONTROL_REG 0x2204
+#define A3XX_HLSQ_FS_CONTROL_REG 0x2205
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x2206
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x2207
+#define A3XX_HLSQ_CL_NDRANGE_0_REG 0x220A
+#define A3XX_HLSQ_CL_NDRANGE_1_REG 0x220B
+#define A3XX_HLSQ_CL_NDRANGE_2_REG 0x220C
+#define A3XX_HLSQ_CL_NDRANGE_3_REG 0x220D
+#define A3XX_HLSQ_CL_NDRANGE_4_REG 0x220E
+#define A3XX_HLSQ_CL_NDRANGE_5_REG 0x220F
+#define A3XX_HLSQ_CL_NDRANGE_6_REG 0x2210
+#define A3XX_HLSQ_CL_CONTROL_0_REG 0x2211
+#define A3XX_HLSQ_CL_CONTROL_1_REG 0x2212
+#define A3XX_HLSQ_CL_KERNEL_CONST_REG 0x2214
+#define A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x2215
+#define A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x2216
+#define A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x2217
+#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A
+#define A3XX_VFD_FETCH_INSTR_1_0 0x2247
+#define A3XX_VFD_FETCH_INSTR_1_1 0x2249
+#define A3XX_VFD_FETCH_INSTR_1_2 0x224B
+#define A3XX_VFD_FETCH_INSTR_1_3 0x224D
+#define A3XX_VFD_FETCH_INSTR_1_4 0x224F
+#define A3XX_VFD_FETCH_INSTR_1_5 0x2251
+#define A3XX_VFD_FETCH_INSTR_1_6 0x2253
+#define A3XX_VFD_FETCH_INSTR_1_7 0x2255
+#define A3XX_VFD_FETCH_INSTR_1_8 0x2257
+#define A3XX_VFD_FETCH_INSTR_1_9 0x2259
+#define A3XX_VFD_FETCH_INSTR_1_A 0x225B
+#define A3XX_VFD_FETCH_INSTR_1_B 0x225D
+#define A3XX_VFD_FETCH_INSTR_1_C 0x225F
+#define A3XX_VFD_FETCH_INSTR_1_D 0x2261
+#define A3XX_VFD_FETCH_INSTR_1_E 0x2263
+#define A3XX_VFD_FETCH_INSTR_1_F 0x2265
+#define A3XX_SP_SP_CTRL_REG 0x22C0
+#define A3XX_SP_VS_CTRL_REG0 0x22C4
+#define A3XX_SP_VS_CTRL_REG1 0x22C5
+#define A3XX_SP_VS_PARAM_REG 0x22C6
+#define A3XX_SP_VS_OUT_REG_0 0x22C7
+#define A3XX_SP_VS_OUT_REG_1 0x22C8
+#define A3XX_SP_VS_OUT_REG_2 0x22C9
+#define A3XX_SP_VS_OUT_REG_3 0x22CA
+#define A3XX_SP_VS_OUT_REG_4 0x22CB
+#define A3XX_SP_VS_OUT_REG_5 0x22CC
+#define A3XX_SP_VS_OUT_REG_6 0x22CD
+#define A3XX_SP_VS_OUT_REG_7 0x22CE
+#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0
+#define A3XX_SP_VS_VPC_DST_REG_1 0x22D1
+#define A3XX_SP_VS_VPC_DST_REG_2 0x22D2
+#define A3XX_SP_VS_VPC_DST_REG_3 0x22D3
+#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4
+#define A3XX_SP_VS_OBJ_START_REG 0x22D5
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG 0x22D6
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG 0x22D7
+#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8
+#define A3XX_SP_VS_LENGTH_REG 0x22DF
+#define A3XX_SP_FS_CTRL_REG0 0x22E0
+#define A3XX_SP_FS_CTRL_REG1 0x22E1
+#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2
+#define A3XX_SP_FS_OBJ_START_REG 0x22E3
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG 0x22E4
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG 0x22E5
+#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6
+#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8
+#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9
+#define A3XX_SP_FS_OUTPUT_REG 0x22EC
+#define A3XX_SP_FS_MRT_REG_0 0x22F0
+#define A3XX_SP_FS_MRT_REG_1 0x22F1
+#define A3XX_SP_FS_MRT_REG_2 0x22F2
+#define A3XX_SP_FS_MRT_REG_3 0x22F3
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_0 0x22F4
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_1 0x22F5
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_2 0x22F6
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_3 0x22F7
+#define A3XX_SP_FS_LENGTH_REG 0x22FF
+#define A3XX_PA_SC_AA_CONFIG 0x2301
+#define A3XX_VBIF_CLKON 0x3001
+#define A3XX_VBIF_ABIT_SORT 0x301C
+#define A3XX_VBIF_ABIT_SORT_CONF 0x301D
+#define A3XX_VBIF_GATE_OFF_WRREQ_EN 0x302A
+#define A3XX_VBIF_IN_RD_LIM_CONF0 0x302C
+#define A3XX_VBIF_IN_RD_LIM_CONF1 0x302D
+#define A3XX_VBIF_IN_WR_LIM_CONF0 0x3030
+#define A3XX_VBIF_IN_WR_LIM_CONF1 0x3031
+#define A3XX_VBIF_OUT_RD_LIM_CONF0 0x3034
+#define A3XX_VBIF_OUT_WR_LIM_CONF0 0x3035
+#define A3XX_VBIF_DDR_OUT_MAX_BURST 0x3036
+#define A3XX_VBIF_ARB_CTL 0x303C
+#define A3XX_VBIF_ROUND_ROBIN_QOS_ARB 0x3049
+#define A3XX_VBIF_OUT_AXI_AOOO_EN 0x305E
+#define A3XX_VBIF_OUT_AXI_AOOO 0x305F
+#define A3XX_VBIF_PERF_CNT0_LO 0x3073
+#define A3XX_VBIF_PERF_CNT0_HI 0x3074
+#define A3XX_VBIF_PERF_CNT1_LO 0x3075
+#define A3XX_VBIF_PERF_CNT1_HI 0x3076
+#define A3XX_VBIF_PERF_PWR_CNT0_LO 0x3077
+#define A3XX_VBIF_PERF_PWR_CNT0_HI 0x3078
+#define A3XX_VBIF_PERF_PWR_CNT1_LO 0x3079
+#define A3XX_VBIF_PERF_PWR_CNT1_HI 0x307a
+#define A3XX_VBIF_PERF_PWR_CNT2_LO 0x307b
+#define A3XX_VBIF_PERF_PWR_CNT2_HI 0x307c
+
+#define A3XX_VBIF_XIN_HALT_CTRL0 0x3080
+#define A3XX_VBIF_XIN_HALT_CTRL0_MASK 0x3F
+#define A30X_VBIF_XIN_HALT_CTRL0_MASK 0x7
+
+#define A3XX_VBIF_XIN_HALT_CTRL1 0x3081
+
+/* VBIF register offsets for A306 */
+#define A3XX_VBIF2_PERF_CNT_SEL0 0x30d0
+#define A3XX_VBIF2_PERF_CNT_SEL1 0x30d1
+#define A3XX_VBIF2_PERF_CNT_SEL2 0x30d2
+#define A3XX_VBIF2_PERF_CNT_SEL3 0x30d3
+#define A3XX_VBIF2_PERF_CNT_LOW0 0x30d8
+#define A3XX_VBIF2_PERF_CNT_LOW1 0x30d9
+#define A3XX_VBIF2_PERF_CNT_LOW2 0x30da
+#define A3XX_VBIF2_PERF_CNT_LOW3 0x30db
+#define A3XX_VBIF2_PERF_CNT_HIGH0 0x30e0
+#define A3XX_VBIF2_PERF_CNT_HIGH1 0x30e1
+#define A3XX_VBIF2_PERF_CNT_HIGH2 0x30e2
+#define A3XX_VBIF2_PERF_CNT_HIGH3 0x30e3
+
+#define A3XX_VBIF2_PERF_PWR_CNT_EN0 0x3100
+#define A3XX_VBIF2_PERF_PWR_CNT_EN1 0x3101
+#define A3XX_VBIF2_PERF_PWR_CNT_EN2 0x3102
+#define A3XX_VBIF2_PERF_PWR_CNT_LOW0 0x3110
+#define A3XX_VBIF2_PERF_PWR_CNT_LOW1 0x3111
+#define A3XX_VBIF2_PERF_PWR_CNT_LOW2 0x3112
+#define A3XX_VBIF2_PERF_PWR_CNT_HIGH0 0x3118
+#define A3XX_VBIF2_PERF_PWR_CNT_HIGH1 0x3119
+#define A3XX_VBIF2_PERF_PWR_CNT_HIGH2 0x311a
+
+#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0 0x3800
+#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1 0x3801
+
+/* RBBM Debug bus block IDs */
+#define RBBM_BLOCK_ID_CP               0x1
+#define RBBM_BLOCK_ID_RBBM             0x2
+#define RBBM_BLOCK_ID_VBIF             0x3
+#define RBBM_BLOCK_ID_HLSQ             0x4
+#define RBBM_BLOCK_ID_UCHE             0x5
+#define RBBM_BLOCK_ID_PC               0x8
+#define RBBM_BLOCK_ID_VFD              0x9
+#define RBBM_BLOCK_ID_VPC              0xa
+#define RBBM_BLOCK_ID_TSE              0xb
+#define RBBM_BLOCK_ID_RAS              0xc
+#define RBBM_BLOCK_ID_VSC              0xd
+#define RBBM_BLOCK_ID_SP_0             0x10
+#define RBBM_BLOCK_ID_SP_1             0x11
+#define RBBM_BLOCK_ID_SP_2             0x12
+#define RBBM_BLOCK_ID_SP_3             0x13
+#define RBBM_BLOCK_ID_TPL1_0           0x18
+#define RBBM_BLOCK_ID_TPL1_1           0x19
+#define RBBM_BLOCK_ID_TPL1_2           0x1a
+#define RBBM_BLOCK_ID_TPL1_3           0x1b
+#define RBBM_BLOCK_ID_RB_0             0x20
+#define RBBM_BLOCK_ID_RB_1             0x21
+#define RBBM_BLOCK_ID_RB_2             0x22
+#define RBBM_BLOCK_ID_RB_3             0x23
+#define RBBM_BLOCK_ID_MARB_0           0x28
+#define RBBM_BLOCK_ID_MARB_1           0x29
+#define RBBM_BLOCK_ID_MARB_2           0x2a
+#define RBBM_BLOCK_ID_MARB_3           0x2b
+
+/* RBBM_CLOCK_CTL default value */
+#define A3XX_RBBM_CLOCK_CTL_DEFAULT   0xAAAAAAAA
+#define A320_RBBM_CLOCK_CTL_DEFAULT   0xBFFFFFFF
+#define A330_RBBM_CLOCK_CTL_DEFAULT   0xBFFCFFFF
+
+#define A330_RBBM_GPR0_CTL_DEFAULT    0x00000000
+#define A330v2_RBBM_GPR0_CTL_DEFAULT  0x05515455
+#define A310_RBBM_GPR0_CTL_DEFAULT    0x000000AA
+
+/* COUNTABLE FOR SP PERFCOUNTER */
+#define SP_ALU_ACTIVE_CYCLES           0x1D
+#define SP0_ICL1_MISSES                0x1A
+#define SP_FS_CFLOW_INSTRUCTIONS       0x0C
+
+/* COUNTABLE FOR TSE PERFCOUNTER */
+#define TSE_INPUT_PRIM_NUM             0x0
+
+/* VBIF countables */
+#define VBIF_AXI_TOTAL_BEATS 85
+
+/* VBIF Recoverable HALT bit value */
+#define VBIF_RECOVERABLE_HALT_CTRL 0x1
+
+/*
+ * CP DEBUG settings for A3XX core:
+ * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control
+ * MIU_128BIT_WRITE_ENABLE [25] - Allow 128 bit writes to the VBIF
+ */
+#define A3XX_CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25))
+
+
+#endif

+ 902 - 0
qcom/opensource/graphics-kernel/a5xx_reg.h

@@ -0,0 +1,902 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2014-2016,2019, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _A5XX_REG_H
+#define _A5XX_REG_H
+
+/* A5XX interrupt bits */
+#define A5XX_INT_RBBM_GPU_IDLE           0
+#define A5XX_INT_RBBM_AHB_ERROR          1
+#define A5XX_INT_RBBM_TRANSFER_TIMEOUT   2
+#define A5XX_INT_RBBM_ME_MS_TIMEOUT      3
+#define A5XX_INT_RBBM_PFP_MS_TIMEOUT     4
+#define A5XX_INT_RBBM_ETS_MS_TIMEOUT     5
+#define A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW 6
+#define A5XX_INT_RBBM_GPC_ERROR          7
+#define A5XX_INT_CP_SW                   8
+#define A5XX_INT_CP_HW_ERROR             9
+#define A5XX_INT_CP_CCU_FLUSH_DEPTH_TS   10
+#define A5XX_INT_CP_CCU_FLUSH_COLOR_TS   11
+#define A5XX_INT_CP_CCU_RESOLVE_TS       12
+#define A5XX_INT_CP_IB2                  13
+#define A5XX_INT_CP_IB1                  14
+#define A5XX_INT_CP_RB                   15
+#define A5XX_INT_CP_UNUSED_1             16
+#define A5XX_INT_CP_RB_DONE_TS           17
+#define A5XX_INT_CP_WT_DONE_TS           18
+#define A5XX_INT_UNKNOWN_1               19
+#define A5XX_INT_CP_CACHE_FLUSH_TS       20
+#define A5XX_INT_UNUSED_2                21
+#define A5XX_INT_RBBM_ATB_BUS_OVERFLOW   22
+#define A5XX_INT_MISC_HANG_DETECT        23
+#define A5XX_INT_UCHE_OOB_ACCESS         24
+#define A5XX_INT_UCHE_TRAP_INTR          25
+#define A5XX_INT_DEBBUS_INTR_0           26
+#define A5XX_INT_DEBBUS_INTR_1           27
+#define A5XX_INT_GPMU_VOLTAGE_DROOP      28
+#define A5XX_INT_GPMU_FIRMWARE           29
+#define A5XX_INT_ISDB_CPU_IRQ            30
+#define A5XX_INT_ISDB_UNDER_DEBUG        31
+
+/* CP Interrupt bits */
+#define A5XX_CP_OPCODE_ERROR               0
+#define A5XX_CP_RESERVED_BIT_ERROR         1
+#define A5XX_CP_HW_FAULT_ERROR             2
+#define A5XX_CP_DMA_ERROR                  3
+#define A5XX_CP_REGISTER_PROTECTION_ERROR  4
+#define A5XX_CP_AHB_ERROR                  5
+
+/* CP registers */
+#define A5XX_CP_RB_BASE                  0x800
+#define A5XX_CP_RB_BASE_HI               0x801
+#define A5XX_CP_RB_CNTL                  0x802
+#define A5XX_CP_RB_RPTR_ADDR_LO          0x804
+#define A5XX_CP_RB_RPTR_ADDR_HI          0x805
+#define A5XX_CP_RB_RPTR                  0x806
+#define A5XX_CP_RB_WPTR                  0x807
+#define A5XX_CP_PFP_STAT_ADDR            0x808
+#define A5XX_CP_PFP_STAT_DATA            0x809
+#define A5XX_CP_DRAW_STATE_ADDR          0x80B
+#define A5XX_CP_DRAW_STATE_DATA          0x80C
+#define A5XX_CP_CRASH_SCRIPT_BASE_LO     0x817
+#define A5XX_CP_CRASH_SCRIPT_BASE_HI     0x818
+#define A5XX_CP_CRASH_DUMP_CNTL          0x819
+#define A5XX_CP_ME_STAT_ADDR             0x81A
+#define A5XX_CP_ROQ_THRESHOLDS_1         0x81F
+#define A5XX_CP_ROQ_THRESHOLDS_2         0x820
+#define A5XX_CP_ROQ_DBG_ADDR             0x821
+#define A5XX_CP_ROQ_DBG_DATA             0x822
+#define A5XX_CP_MEQ_DBG_ADDR             0x823
+#define A5XX_CP_MEQ_DBG_DATA             0x824
+#define A5XX_CP_MEQ_THRESHOLDS           0x825
+#define A5XX_CP_MERCIU_SIZE              0x826
+#define A5XX_CP_MERCIU_DBG_ADDR          0x827
+#define A5XX_CP_MERCIU_DBG_DATA_1        0x828
+#define A5XX_CP_MERCIU_DBG_DATA_2        0x829
+#define A5XX_CP_PFP_UCODE_DBG_ADDR       0x82A
+#define A5XX_CP_PFP_UCODE_DBG_DATA       0x82B
+#define A5XX_CP_ME_UCODE_DBG_ADDR        0x82F
+#define A5XX_CP_ME_UCODE_DBG_DATA        0x830
+#define A5XX_CP_CNTL                     0x831
+#define A5XX_CP_ME_CNTL                  0x832
+#define A5XX_CP_CHICKEN_DBG              0x833
+#define A5XX_CP_PFP_INSTR_BASE_LO        0x835
+#define A5XX_CP_PFP_INSTR_BASE_HI        0x836
+#define A5XX_CP_PM4_INSTR_BASE_LO        0x838
+#define A5XX_CP_PM4_INSTR_BASE_HI        0x839
+#define A5XX_CP_CONTEXT_SWITCH_CNTL      0x83B
+#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO   0x83C
+#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI   0x83D
+#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO   0x83E
+#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI   0x83F
+#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO   0x840
+#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI   0x841
+#define A5XX_CP_ADDR_MODE_CNTL           0x860
+#define A5XX_CP_ME_STAT_DATA             0xB14
+#define A5XX_CP_WFI_PEND_CTR             0xB15
+#define A5XX_CP_INTERRUPT_STATUS         0xB18
+#define A5XX_CP_HW_FAULT                 0xB1A
+#define A5XX_CP_PROTECT_STATUS           0xB1C
+#define A5XX_CP_IB1_BASE                 0xB1F
+#define A5XX_CP_IB1_BASE_HI              0xB20
+#define A5XX_CP_IB1_BUFSZ                0xB21
+#define A5XX_CP_IB2_BASE                 0xB22
+#define A5XX_CP_IB2_BASE_HI              0xB23
+#define A5XX_CP_IB2_BUFSZ                0xB24
+#define A5XX_CP_PROTECT_REG_0            0x880
+#define A5XX_CP_PROTECT_CNTL             0x8A0
+#define A5XX_CP_AHB_FAULT                0xB1B
+#define A5XX_CP_PERFCTR_CP_SEL_0         0xBB0
+#define A5XX_CP_PERFCTR_CP_SEL_1         0xBB1
+#define A5XX_CP_PERFCTR_CP_SEL_2         0xBB2
+#define A5XX_CP_PERFCTR_CP_SEL_3         0xBB3
+#define A5XX_CP_PERFCTR_CP_SEL_4         0xBB4
+#define A5XX_CP_PERFCTR_CP_SEL_5         0xBB5
+#define A5XX_CP_PERFCTR_CP_SEL_6         0xBB6
+#define A5XX_CP_PERFCTR_CP_SEL_7         0xBB7
+
+#define A5XX_VSC_ADDR_MODE_CNTL          0xBC1
+
+/* CP Power Counter Registers Select */
+#define A5XX_CP_POWERCTR_CP_SEL_0        0xBBA
+#define A5XX_CP_POWERCTR_CP_SEL_1        0xBBB
+#define A5XX_CP_POWERCTR_CP_SEL_2        0xBBC
+#define A5XX_CP_POWERCTR_CP_SEL_3        0xBBD
+
+/* RBBM registers */
+#define A5XX_RBBM_CFG_DBGBUS_SEL_A               0x4
+#define A5XX_RBBM_CFG_DBGBUS_SEL_B               0x5
+#define A5XX_RBBM_CFG_DBGBUS_SEL_C               0x6
+#define A5XX_RBBM_CFG_DBGBUS_SEL_D               0x7
+#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_INDEX_SHIFT    0x0
+#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT  0x8
+
+#define A5XX_RBBM_CFG_DBGBUS_CNTLT               0x8
+#define A5XX_RBBM_CFG_DBGBUS_CNTLM               0x9
+#define A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT  0x18
+#define A5XX_RBBM_CFG_DBGBUS_OPL                 0xA
+#define A5XX_RBBM_CFG_DBGBUS_OPE                 0xB
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_0              0xC
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_1              0xD
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_2              0xE
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_3              0xF
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_0             0x10
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_1             0x11
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_2             0x12
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_3             0x13
+#define A5XX_RBBM_CFG_DBGBUS_BYTEL_0             0x14
+#define A5XX_RBBM_CFG_DBGBUS_BYTEL_1             0x15
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_0              0x16
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_1              0x17
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_2              0x18
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_3              0x19
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_0             0x1A
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_1             0x1B
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_2             0x1C
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_3             0x1D
+#define A5XX_RBBM_CFG_DBGBUS_NIBBLEE             0x1E
+#define A5XX_RBBM_CFG_DBGBUS_PTRC0               0x1F
+#define A5XX_RBBM_CFG_DBGBUS_PTRC1               0x20
+#define A5XX_RBBM_CFG_DBGBUS_LOADREG             0x21
+#define A5XX_RBBM_CFG_DBGBUS_IDX                 0x22
+#define A5XX_RBBM_CFG_DBGBUS_CLRC                0x23
+#define A5XX_RBBM_CFG_DBGBUS_LOADIVT             0x24
+#define A5XX_RBBM_INTERFACE_HANG_INT_CNTL        0x2F
+#define A5XX_RBBM_INT_CLEAR_CMD                  0x37
+#define A5XX_RBBM_INT_0_MASK                     0x38
+#define A5XX_RBBM_AHB_DBG_CNTL                   0x3F
+#define A5XX_RBBM_EXT_VBIF_DBG_CNTL              0x41
+#define A5XX_RBBM_SW_RESET_CMD                   0x43
+#define A5XX_RBBM_BLOCK_SW_RESET_CMD             0x45
+#define A5XX_RBBM_BLOCK_SW_RESET_CMD2            0x46
+#define A5XX_RBBM_DBG_LO_HI_GPIO                 0x48
+#define A5XX_RBBM_EXT_TRACE_BUS_CNTL             0x49
+#define A5XX_RBBM_CLOCK_CNTL_TP0                 0x4A
+#define A5XX_RBBM_CLOCK_CNTL_TP1                 0x4B
+#define A5XX_RBBM_CLOCK_CNTL_TP2                 0x4C
+#define A5XX_RBBM_CLOCK_CNTL_TP3                 0x4D
+#define A5XX_RBBM_CLOCK_CNTL2_TP0                0x4E
+#define A5XX_RBBM_CLOCK_CNTL2_TP1                0x4F
+#define A5XX_RBBM_CLOCK_CNTL2_TP2                0x50
+#define A5XX_RBBM_CLOCK_CNTL2_TP3                0x51
+#define A5XX_RBBM_CLOCK_CNTL3_TP0                0x52
+#define A5XX_RBBM_CLOCK_CNTL3_TP1                0x53
+#define A5XX_RBBM_CLOCK_CNTL3_TP2                0x54
+#define A5XX_RBBM_CLOCK_CNTL3_TP3                0x55
+#define A5XX_RBBM_READ_AHB_THROUGH_DBG           0x59
+#define A5XX_RBBM_CLOCK_CNTL_UCHE                0x5A
+#define A5XX_RBBM_CLOCK_CNTL2_UCHE               0x5B
+#define A5XX_RBBM_CLOCK_CNTL3_UCHE               0x5C
+#define A5XX_RBBM_CLOCK_CNTL4_UCHE               0x5D
+#define A5XX_RBBM_CLOCK_HYST_UCHE                0x5E
+#define A5XX_RBBM_CLOCK_DELAY_UCHE               0x5F
+#define A5XX_RBBM_CLOCK_MODE_GPC                 0x60
+#define A5XX_RBBM_CLOCK_DELAY_GPC                0x61
+#define A5XX_RBBM_CLOCK_HYST_GPC                 0x62
+#define A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM        0x63
+#define A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM        0x64
+#define A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM       0x65
+#define A5XX_RBBM_CLOCK_DELAY_HLSQ               0x66
+#define A5XX_RBBM_CLOCK_CNTL                     0x67
+#define A5XX_RBBM_CLOCK_CNTL_SP0                 0x68
+#define A5XX_RBBM_CLOCK_CNTL_SP1                 0x69
+#define A5XX_RBBM_CLOCK_CNTL_SP2                 0x6A
+#define A5XX_RBBM_CLOCK_CNTL_SP3                 0x6B
+#define A5XX_RBBM_CLOCK_CNTL2_SP0                0x6C
+#define A5XX_RBBM_CLOCK_CNTL2_SP1                0x6D
+#define A5XX_RBBM_CLOCK_CNTL2_SP2                0x6E
+#define A5XX_RBBM_CLOCK_CNTL2_SP3                0x6F
+#define A5XX_RBBM_CLOCK_HYST_SP0                 0x70
+#define A5XX_RBBM_CLOCK_HYST_SP1                 0x71
+#define A5XX_RBBM_CLOCK_HYST_SP2                 0x72
+#define A5XX_RBBM_CLOCK_HYST_SP3                 0x73
+#define A5XX_RBBM_CLOCK_DELAY_SP0                0x74
+#define A5XX_RBBM_CLOCK_DELAY_SP1                0x75
+#define A5XX_RBBM_CLOCK_DELAY_SP2                0x76
+#define A5XX_RBBM_CLOCK_DELAY_SP3                0x77
+#define A5XX_RBBM_CLOCK_CNTL_RB0                 0x78
+#define A5XX_RBBM_CLOCK_CNTL_RB1                 0x79
+#define A5XX_RBBM_CLOCK_CNTL_RB2                 0x7a
+#define A5XX_RBBM_CLOCK_CNTL_RB3                 0x7B
+#define A5XX_RBBM_CLOCK_CNTL2_RB0                0x7C
+#define A5XX_RBBM_CLOCK_CNTL2_RB1                0x7D
+#define A5XX_RBBM_CLOCK_CNTL2_RB2                0x7E
+#define A5XX_RBBM_CLOCK_CNTL2_RB3                0x7F
+#define A5XX_RBBM_CLOCK_HYST_RAC                 0x80
+#define A5XX_RBBM_CLOCK_DELAY_RAC                0x81
+#define A5XX_RBBM_CLOCK_CNTL_CCU0                0x82
+#define A5XX_RBBM_CLOCK_CNTL_CCU1                0x83
+#define A5XX_RBBM_CLOCK_CNTL_CCU2                0x84
+#define A5XX_RBBM_CLOCK_CNTL_CCU3                0x85
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU0             0x86
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU1             0x87
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU2             0x88
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU3             0x89
+#define A5XX_RBBM_CLOCK_CNTL_RAC                 0x8A
+#define A5XX_RBBM_CLOCK_CNTL2_RAC                0x8B
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0        0x8C
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1        0x8D
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2        0x8E
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3        0x8F
+#define A5XX_RBBM_CLOCK_HYST_VFD                 0x90
+#define A5XX_RBBM_CLOCK_MODE_VFD                 0x91
+#define A5XX_RBBM_CLOCK_DELAY_VFD                0x92
+#define A5XX_RBBM_AHB_CNTL0                      0x93
+#define A5XX_RBBM_AHB_CNTL1                      0x94
+#define A5XX_RBBM_AHB_CNTL2                      0x95
+#define A5XX_RBBM_AHB_CMD                        0x96
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11     0x9C
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12     0x9D
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13     0x9E
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14     0x9F
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15     0xA0
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16     0xA1
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17     0xA2
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18     0xA3
+#define A5XX_RBBM_CLOCK_DELAY_TP0                0xA4
+#define A5XX_RBBM_CLOCK_DELAY_TP1                0xA5
+#define A5XX_RBBM_CLOCK_DELAY_TP2                0xA6
+#define A5XX_RBBM_CLOCK_DELAY_TP3                0xA7
+#define A5XX_RBBM_CLOCK_DELAY2_TP0               0xA8
+#define A5XX_RBBM_CLOCK_DELAY2_TP1               0xA9
+#define A5XX_RBBM_CLOCK_DELAY2_TP2               0xAA
+#define A5XX_RBBM_CLOCK_DELAY2_TP3               0xAB
+#define A5XX_RBBM_CLOCK_DELAY3_TP0               0xAC
+#define A5XX_RBBM_CLOCK_DELAY3_TP1               0xAD
+#define A5XX_RBBM_CLOCK_DELAY3_TP2               0xAE
+#define A5XX_RBBM_CLOCK_DELAY3_TP3               0xAF
+#define A5XX_RBBM_CLOCK_HYST_TP0                 0xB0
+#define A5XX_RBBM_CLOCK_HYST_TP1                 0xB1
+#define A5XX_RBBM_CLOCK_HYST_TP2                 0xB2
+#define A5XX_RBBM_CLOCK_HYST_TP3                 0xB3
+#define A5XX_RBBM_CLOCK_HYST2_TP0                0xB4
+#define A5XX_RBBM_CLOCK_HYST2_TP1                0xB5
+#define A5XX_RBBM_CLOCK_HYST2_TP2                0xB6
+#define A5XX_RBBM_CLOCK_HYST2_TP3                0xB7
+#define A5XX_RBBM_CLOCK_HYST3_TP0                0xB8
+#define A5XX_RBBM_CLOCK_HYST3_TP1                0xB9
+#define A5XX_RBBM_CLOCK_HYST3_TP2                0xBA
+#define A5XX_RBBM_CLOCK_HYST3_TP3                0xBB
+#define A5XX_RBBM_CLOCK_CNTL_GPMU                0xC8
+#define A5XX_RBBM_CLOCK_DELAY_GPMU               0xC9
+#define A5XX_RBBM_CLOCK_HYST_GPMU                0xCA
+#define A5XX_RBBM_PERFCTR_CP_0_LO                0x3A0
+#define A5XX_RBBM_PERFCTR_CP_0_HI                0x3A1
+#define A5XX_RBBM_PERFCTR_CP_1_LO                0x3A2
+#define A5XX_RBBM_PERFCTR_CP_1_HI                0x3A3
+#define A5XX_RBBM_PERFCTR_CP_2_LO                0x3A4
+#define A5XX_RBBM_PERFCTR_CP_2_HI                0x3A5
+#define A5XX_RBBM_PERFCTR_CP_3_LO                0x3A6
+#define A5XX_RBBM_PERFCTR_CP_3_HI                0x3A7
+#define A5XX_RBBM_PERFCTR_CP_4_LO                0x3A8
+#define A5XX_RBBM_PERFCTR_CP_4_HI                0x3A9
+#define A5XX_RBBM_PERFCTR_CP_5_LO                0x3AA
+#define A5XX_RBBM_PERFCTR_CP_5_HI                0x3AB
+#define A5XX_RBBM_PERFCTR_CP_6_LO                0x3AC
+#define A5XX_RBBM_PERFCTR_CP_6_HI                0x3AD
+#define A5XX_RBBM_PERFCTR_CP_7_LO                0x3AE
+#define A5XX_RBBM_PERFCTR_CP_7_HI                0x3AF
+#define A5XX_RBBM_PERFCTR_RBBM_0_LO              0x3B0
+#define A5XX_RBBM_PERFCTR_RBBM_0_HI              0x3B1
+#define A5XX_RBBM_PERFCTR_RBBM_1_LO              0x3B2
+#define A5XX_RBBM_PERFCTR_RBBM_1_HI              0x3B3
+#define A5XX_RBBM_PERFCTR_RBBM_2_LO              0x3B4
+#define A5XX_RBBM_PERFCTR_RBBM_2_HI              0x3B5
+#define A5XX_RBBM_PERFCTR_RBBM_3_LO              0x3B6
+#define A5XX_RBBM_PERFCTR_RBBM_3_HI              0x3B7
+#define A5XX_RBBM_PERFCTR_PC_0_LO                0x3B8
+#define A5XX_RBBM_PERFCTR_PC_0_HI                0x3B9
+#define A5XX_RBBM_PERFCTR_PC_1_LO                0x3BA
+#define A5XX_RBBM_PERFCTR_PC_1_HI                0x3BB
+#define A5XX_RBBM_PERFCTR_PC_2_LO                0x3BC
+#define A5XX_RBBM_PERFCTR_PC_2_HI                0x3BD
+#define A5XX_RBBM_PERFCTR_PC_3_LO                0x3BE
+#define A5XX_RBBM_PERFCTR_PC_3_HI                0x3BF
+#define A5XX_RBBM_PERFCTR_PC_4_LO                0x3C0
+#define A5XX_RBBM_PERFCTR_PC_4_HI                0x3C1
+#define A5XX_RBBM_PERFCTR_PC_5_LO                0x3C2
+#define A5XX_RBBM_PERFCTR_PC_5_HI                0x3C3
+#define A5XX_RBBM_PERFCTR_PC_6_LO                0x3C4
+#define A5XX_RBBM_PERFCTR_PC_6_HI                0x3C5
+#define A5XX_RBBM_PERFCTR_PC_7_LO                0x3C6
+#define A5XX_RBBM_PERFCTR_PC_7_HI                0x3C7
+#define A5XX_RBBM_PERFCTR_VFD_0_LO               0x3C8
+#define A5XX_RBBM_PERFCTR_VFD_0_HI               0x3C9
+#define A5XX_RBBM_PERFCTR_VFD_1_LO               0x3CA
+#define A5XX_RBBM_PERFCTR_VFD_1_HI               0x3CB
+#define A5XX_RBBM_PERFCTR_VFD_2_LO               0x3CC
+#define A5XX_RBBM_PERFCTR_VFD_2_HI               0x3CD
+#define A5XX_RBBM_PERFCTR_VFD_3_LO               0x3CE
+#define A5XX_RBBM_PERFCTR_VFD_3_HI               0x3CF
+#define A5XX_RBBM_PERFCTR_VFD_4_LO               0x3D0
+#define A5XX_RBBM_PERFCTR_VFD_4_HI               0x3D1
+#define A5XX_RBBM_PERFCTR_VFD_5_LO               0x3D2
+#define A5XX_RBBM_PERFCTR_VFD_5_HI               0x3D3
+#define A5XX_RBBM_PERFCTR_VFD_6_LO               0x3D4
+#define A5XX_RBBM_PERFCTR_VFD_6_HI               0x3D5
+#define A5XX_RBBM_PERFCTR_VFD_7_LO               0x3D6
+#define A5XX_RBBM_PERFCTR_VFD_7_HI               0x3D7
+#define A5XX_RBBM_PERFCTR_HLSQ_0_LO              0x3D8
+#define A5XX_RBBM_PERFCTR_HLSQ_0_HI              0x3D9
+#define A5XX_RBBM_PERFCTR_HLSQ_1_LO              0x3DA
+#define A5XX_RBBM_PERFCTR_HLSQ_1_HI              0x3DB
+#define A5XX_RBBM_PERFCTR_HLSQ_2_LO              0x3DC
+#define A5XX_RBBM_PERFCTR_HLSQ_2_HI              0x3DD
+#define A5XX_RBBM_PERFCTR_HLSQ_3_LO              0x3DE
+#define A5XX_RBBM_PERFCTR_HLSQ_3_HI              0x3DF
+#define A5XX_RBBM_PERFCTR_HLSQ_4_LO              0x3E0
+#define A5XX_RBBM_PERFCTR_HLSQ_4_HI              0x3E1
+#define A5XX_RBBM_PERFCTR_HLSQ_5_LO              0x3E2
+#define A5XX_RBBM_PERFCTR_HLSQ_5_HI              0x3E3
+#define A5XX_RBBM_PERFCTR_HLSQ_6_LO              0x3E4
+#define A5XX_RBBM_PERFCTR_HLSQ_6_HI              0x3E5
+#define A5XX_RBBM_PERFCTR_HLSQ_7_LO              0x3E6
+#define A5XX_RBBM_PERFCTR_HLSQ_7_HI              0x3E7
+#define A5XX_RBBM_PERFCTR_VPC_0_LO               0x3E8
+#define A5XX_RBBM_PERFCTR_VPC_0_HI               0x3E9
+#define A5XX_RBBM_PERFCTR_VPC_1_LO               0x3EA
+#define A5XX_RBBM_PERFCTR_VPC_1_HI               0x3EB
+#define A5XX_RBBM_PERFCTR_VPC_2_LO               0x3EC
+#define A5XX_RBBM_PERFCTR_VPC_2_HI               0x3ED
+#define A5XX_RBBM_PERFCTR_VPC_3_LO               0x3EE
+#define A5XX_RBBM_PERFCTR_VPC_3_HI               0x3EF
+#define A5XX_RBBM_PERFCTR_CCU_0_LO               0x3F0
+#define A5XX_RBBM_PERFCTR_CCU_0_HI               0x3F1
+#define A5XX_RBBM_PERFCTR_CCU_1_LO               0x3F2
+#define A5XX_RBBM_PERFCTR_CCU_1_HI               0x3F3
+#define A5XX_RBBM_PERFCTR_CCU_2_LO               0x3F4
+#define A5XX_RBBM_PERFCTR_CCU_2_HI               0x3F5
+#define A5XX_RBBM_PERFCTR_CCU_3_LO               0x3F6
+#define A5XX_RBBM_PERFCTR_CCU_3_HI               0x3F7
+#define A5XX_RBBM_PERFCTR_TSE_0_LO               0x3F8
+#define A5XX_RBBM_PERFCTR_TSE_0_HI               0x3F9
+#define A5XX_RBBM_PERFCTR_TSE_1_LO               0x3FA
+#define A5XX_RBBM_PERFCTR_TSE_1_HI               0x3FB
+#define A5XX_RBBM_PERFCTR_TSE_2_LO               0x3FC
+#define A5XX_RBBM_PERFCTR_TSE_2_HI               0x3FD
+#define A5XX_RBBM_PERFCTR_TSE_3_LO               0x3FE
+#define A5XX_RBBM_PERFCTR_TSE_3_HI               0x3FF
+#define A5XX_RBBM_PERFCTR_RAS_0_LO               0x400
+#define A5XX_RBBM_PERFCTR_RAS_0_HI               0x401
+#define A5XX_RBBM_PERFCTR_RAS_1_LO               0x402
+#define A5XX_RBBM_PERFCTR_RAS_1_HI               0x403
+#define A5XX_RBBM_PERFCTR_RAS_2_LO               0x404
+#define A5XX_RBBM_PERFCTR_RAS_2_HI               0x405
+#define A5XX_RBBM_PERFCTR_RAS_3_LO               0x406
+#define A5XX_RBBM_PERFCTR_RAS_3_HI               0x407
+#define A5XX_RBBM_PERFCTR_UCHE_0_LO              0x408
+#define A5XX_RBBM_PERFCTR_UCHE_0_HI              0x409
+#define A5XX_RBBM_PERFCTR_UCHE_1_LO              0x40A
+#define A5XX_RBBM_PERFCTR_UCHE_1_HI              0x40B
+#define A5XX_RBBM_PERFCTR_UCHE_2_LO              0x40C
+#define A5XX_RBBM_PERFCTR_UCHE_2_HI              0x40D
+#define A5XX_RBBM_PERFCTR_UCHE_3_LO              0x40E
+#define A5XX_RBBM_PERFCTR_UCHE_3_HI              0x40F
+#define A5XX_RBBM_PERFCTR_UCHE_4_LO              0x410
+#define A5XX_RBBM_PERFCTR_UCHE_4_HI              0x411
+#define A5XX_RBBM_PERFCTR_UCHE_5_LO              0x412
+#define A5XX_RBBM_PERFCTR_UCHE_5_HI              0x413
+#define A5XX_RBBM_PERFCTR_UCHE_6_LO              0x414
+#define A5XX_RBBM_PERFCTR_UCHE_6_HI              0x415
+#define A5XX_RBBM_PERFCTR_UCHE_7_LO              0x416
+#define A5XX_RBBM_PERFCTR_UCHE_7_HI              0x417
+#define A5XX_RBBM_PERFCTR_TP_0_LO                0x418
+#define A5XX_RBBM_PERFCTR_TP_0_HI                0x419
+#define A5XX_RBBM_PERFCTR_TP_1_LO                0x41A
+#define A5XX_RBBM_PERFCTR_TP_1_HI                0x41B
+#define A5XX_RBBM_PERFCTR_TP_2_LO                0x41C
+#define A5XX_RBBM_PERFCTR_TP_2_HI                0x41D
+#define A5XX_RBBM_PERFCTR_TP_3_LO                0x41E
+#define A5XX_RBBM_PERFCTR_TP_3_HI                0x41F
+#define A5XX_RBBM_PERFCTR_TP_4_LO                0x420
+#define A5XX_RBBM_PERFCTR_TP_4_HI                0x421
+#define A5XX_RBBM_PERFCTR_TP_5_LO                0x422
+#define A5XX_RBBM_PERFCTR_TP_5_HI                0x423
+#define A5XX_RBBM_PERFCTR_TP_6_LO                0x424
+#define A5XX_RBBM_PERFCTR_TP_6_HI                0x425
+#define A5XX_RBBM_PERFCTR_TP_7_LO                0x426
+#define A5XX_RBBM_PERFCTR_TP_7_HI                0x427
+#define A5XX_RBBM_PERFCTR_SP_0_LO                0x428
+#define A5XX_RBBM_PERFCTR_SP_0_HI                0x429
+#define A5XX_RBBM_PERFCTR_SP_1_LO                0x42A
+#define A5XX_RBBM_PERFCTR_SP_1_HI                0x42B
+#define A5XX_RBBM_PERFCTR_SP_2_LO                0x42C
+#define A5XX_RBBM_PERFCTR_SP_2_HI                0x42D
+#define A5XX_RBBM_PERFCTR_SP_3_LO                0x42E
+#define A5XX_RBBM_PERFCTR_SP_3_HI                0x42F
+#define A5XX_RBBM_PERFCTR_SP_4_LO                0x430
+#define A5XX_RBBM_PERFCTR_SP_4_HI                0x431
+#define A5XX_RBBM_PERFCTR_SP_5_LO                0x432
+#define A5XX_RBBM_PERFCTR_SP_5_HI                0x433
+#define A5XX_RBBM_PERFCTR_SP_6_LO                0x434
+#define A5XX_RBBM_PERFCTR_SP_6_HI                0x435
+#define A5XX_RBBM_PERFCTR_SP_7_LO                0x436
+#define A5XX_RBBM_PERFCTR_SP_7_HI                0x437
+#define A5XX_RBBM_PERFCTR_SP_8_LO                0x438
+#define A5XX_RBBM_PERFCTR_SP_8_HI                0x439
+#define A5XX_RBBM_PERFCTR_SP_9_LO                0x43A
+#define A5XX_RBBM_PERFCTR_SP_9_HI                0x43B
+#define A5XX_RBBM_PERFCTR_SP_10_LO               0x43C
+#define A5XX_RBBM_PERFCTR_SP_10_HI               0x43D
+#define A5XX_RBBM_PERFCTR_SP_11_LO               0x43E
+#define A5XX_RBBM_PERFCTR_SP_11_HI               0x43F
+#define A5XX_RBBM_PERFCTR_RB_0_LO                0x440
+#define A5XX_RBBM_PERFCTR_RB_0_HI                0x441
+#define A5XX_RBBM_PERFCTR_RB_1_LO                0x442
+#define A5XX_RBBM_PERFCTR_RB_1_HI                0x443
+#define A5XX_RBBM_PERFCTR_RB_2_LO                0x444
+#define A5XX_RBBM_PERFCTR_RB_2_HI                0x445
+#define A5XX_RBBM_PERFCTR_RB_3_LO                0x446
+#define A5XX_RBBM_PERFCTR_RB_3_HI                0x447
+#define A5XX_RBBM_PERFCTR_RB_4_LO                0x448
+#define A5XX_RBBM_PERFCTR_RB_4_HI                0x449
+#define A5XX_RBBM_PERFCTR_RB_5_LO                0x44A
+#define A5XX_RBBM_PERFCTR_RB_5_HI                0x44B
+#define A5XX_RBBM_PERFCTR_RB_6_LO                0x44C
+#define A5XX_RBBM_PERFCTR_RB_6_HI                0x44D
+#define A5XX_RBBM_PERFCTR_RB_7_LO                0x44E
+#define A5XX_RBBM_PERFCTR_RB_7_HI                0x44F
+#define A5XX_RBBM_PERFCTR_VSC_0_LO               0x450
+#define A5XX_RBBM_PERFCTR_VSC_0_HI               0x451
+#define A5XX_RBBM_PERFCTR_VSC_1_LO               0x452
+#define A5XX_RBBM_PERFCTR_VSC_1_HI               0x453
+#define A5XX_RBBM_PERFCTR_LRZ_0_LO               0x454
+#define A5XX_RBBM_PERFCTR_LRZ_0_HI               0x455
+#define A5XX_RBBM_PERFCTR_LRZ_1_LO               0x456
+#define A5XX_RBBM_PERFCTR_LRZ_1_HI               0x457
+#define A5XX_RBBM_PERFCTR_LRZ_2_LO               0x458
+#define A5XX_RBBM_PERFCTR_LRZ_2_HI               0x459
+#define A5XX_RBBM_PERFCTR_LRZ_3_LO               0x45A
+#define A5XX_RBBM_PERFCTR_LRZ_3_HI               0x45B
+#define A5XX_RBBM_PERFCTR_CMP_0_LO               0x45C
+#define A5XX_RBBM_PERFCTR_CMP_0_HI               0x45D
+#define A5XX_RBBM_PERFCTR_CMP_1_LO               0x45E
+#define A5XX_RBBM_PERFCTR_CMP_1_HI               0x45F
+#define A5XX_RBBM_PERFCTR_CMP_2_LO               0x460
+#define A5XX_RBBM_PERFCTR_CMP_2_HI               0x461
+#define A5XX_RBBM_PERFCTR_CMP_3_LO               0x462
+#define A5XX_RBBM_PERFCTR_CMP_3_HI               0x463
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_0             0x46B
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_1             0x46C
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_2             0x46D
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_3             0x46E
+#define A5XX_RBBM_ALWAYSON_COUNTER_LO            0x4D2
+#define A5XX_RBBM_ALWAYSON_COUNTER_HI            0x4D3
+#define A5XX_RBBM_STATUS                         0x4F5
+#define A5XX_RBBM_STATUS3                        0x530
+#define A5XX_RBBM_INT_0_STATUS                   0x4E1
+#define A5XX_RBBM_AHB_ME_SPLIT_STATUS            0x4F0
+#define A5XX_RBBM_AHB_PFP_SPLIT_STATUS           0x4F1
+#define A5XX_RBBM_AHB_ERROR_STATUS               0x4F4
+#define A5XX_RBBM_PERFCTR_CNTL                   0x464
+#define A5XX_RBBM_PERFCTR_LOAD_CMD0              0x465
+#define A5XX_RBBM_PERFCTR_LOAD_CMD1              0x466
+#define A5XX_RBBM_PERFCTR_LOAD_CMD2              0x467
+#define A5XX_RBBM_PERFCTR_LOAD_CMD3              0x468
+#define A5XX_RBBM_PERFCTR_LOAD_VALUE_LO          0x469
+#define A5XX_RBBM_PERFCTR_LOAD_VALUE_HI          0x46A
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_0             0x46B
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_1             0x46C
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_2             0x46D
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_3             0x46E
+#define A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED        0x46F
+#define A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC         0x504
+#define A5XX_RBBM_CFG_DBGBUS_OVER                0x505
+#define A5XX_RBBM_CFG_DBGBUS_COUNT0              0x506
+#define A5XX_RBBM_CFG_DBGBUS_COUNT1              0x507
+#define A5XX_RBBM_CFG_DBGBUS_COUNT2              0x508
+#define A5XX_RBBM_CFG_DBGBUS_COUNT3              0x509
+#define A5XX_RBBM_CFG_DBGBUS_COUNT4              0x50A
+#define A5XX_RBBM_CFG_DBGBUS_COUNT5              0x50B
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR          0x50C
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0          0x50D
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1          0x50E
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2          0x50F
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3          0x510
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4          0x511
+#define A5XX_RBBM_CFG_DBGBUS_MISR0               0x512
+#define A5XX_RBBM_CFG_DBGBUS_MISR1               0x513
+#define A5XX_RBBM_ISDB_CNT                       0x533
+#define A5XX_RBBM_SECVID_TRUST_CONFIG            0xF000
+#define A5XX_RBBM_SECVID_TRUST_CNTL              0xF400
+#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO     0xF800
+#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI     0xF801
+#define A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE        0xF802
+#define A5XX_RBBM_SECVID_TSB_CNTL                0xF803
+#define A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL      0xF810
+
+/* VSC registers */
+#define A5XX_VSC_PERFCTR_VSC_SEL_0          0xC60
+#define A5XX_VSC_PERFCTR_VSC_SEL_1          0xC61
+
+#define A5XX_GRAS_ADDR_MODE_CNTL            0xC81
+
+/* TSE registers */
+#define A5XX_GRAS_PERFCTR_TSE_SEL_0         0xC90
+#define A5XX_GRAS_PERFCTR_TSE_SEL_1         0xC91
+#define A5XX_GRAS_PERFCTR_TSE_SEL_2         0xC92
+#define A5XX_GRAS_PERFCTR_TSE_SEL_3         0xC93
+
+/* RAS registers */
+#define A5XX_GRAS_PERFCTR_RAS_SEL_0         0xC94
+#define A5XX_GRAS_PERFCTR_RAS_SEL_1         0xC95
+#define A5XX_GRAS_PERFCTR_RAS_SEL_2         0xC96
+#define A5XX_GRAS_PERFCTR_RAS_SEL_3         0xC97
+
+/* LRZ registers */
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_0         0xC98
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_1         0xC99
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_2         0xC9A
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_3         0xC9B
+
+
+/* RB registers */
+#define A5XX_RB_DBG_ECO_CNT                 0xCC4
+#define A5XX_RB_ADDR_MODE_CNTL              0xCC5
+#define A5XX_RB_MODE_CNTL                   0xCC6
+#define A5XX_RB_PERFCTR_RB_SEL_0            0xCD0
+#define A5XX_RB_PERFCTR_RB_SEL_1            0xCD1
+#define A5XX_RB_PERFCTR_RB_SEL_2            0xCD2
+#define A5XX_RB_PERFCTR_RB_SEL_3            0xCD3
+#define A5XX_RB_PERFCTR_RB_SEL_4            0xCD4
+#define A5XX_RB_PERFCTR_RB_SEL_5            0xCD5
+#define A5XX_RB_PERFCTR_RB_SEL_6            0xCD6
+#define A5XX_RB_PERFCTR_RB_SEL_7            0xCD7
+
+/* CCU registers */
+#define A5XX_RB_PERFCTR_CCU_SEL_0           0xCD8
+#define A5XX_RB_PERFCTR_CCU_SEL_1           0xCD9
+#define A5XX_RB_PERFCTR_CCU_SEL_2           0xCDA
+#define A5XX_RB_PERFCTR_CCU_SEL_3           0xCDB
+
+/* RB Power Counter RB Registers Select */
+#define A5XX_RB_POWERCTR_RB_SEL_0           0xCE0
+#define A5XX_RB_POWERCTR_RB_SEL_1           0xCE1
+#define A5XX_RB_POWERCTR_RB_SEL_2           0xCE2
+#define A5XX_RB_POWERCTR_RB_SEL_3           0xCE3
+
+/* RB Power Counter CCU Registers Select */
+#define A5XX_RB_POWERCTR_CCU_SEL_0          0xCE4
+#define A5XX_RB_POWERCTR_CCU_SEL_1          0xCE5
+
+/* CMP registers */
+#define A5XX_RB_PERFCTR_CMP_SEL_0           0xCEC
+#define A5XX_RB_PERFCTR_CMP_SEL_1           0xCED
+#define A5XX_RB_PERFCTR_CMP_SEL_2           0xCEE
+#define A5XX_RB_PERFCTR_CMP_SEL_3           0xCEF
+
+/* PC registers */
+#define A5XX_PC_DBG_ECO_CNTL                0xD00
+#define A5XX_PC_ADDR_MODE_CNTL              0xD01
+#define A5XX_PC_PERFCTR_PC_SEL_0            0xD10
+#define A5XX_PC_PERFCTR_PC_SEL_1            0xD11
+#define A5XX_PC_PERFCTR_PC_SEL_2            0xD12
+#define A5XX_PC_PERFCTR_PC_SEL_3            0xD13
+#define A5XX_PC_PERFCTR_PC_SEL_4            0xD14
+#define A5XX_PC_PERFCTR_PC_SEL_5            0xD15
+#define A5XX_PC_PERFCTR_PC_SEL_6            0xD16
+#define A5XX_PC_PERFCTR_PC_SEL_7            0xD17
+
+/* HLSQ registers */
+#define A5XX_HLSQ_DBG_ECO_CNTL		    0xE04
+#define A5XX_HLSQ_ADDR_MODE_CNTL            0xE05
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_0        0xE10
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_1        0xE11
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_2        0xE12
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_3        0xE13
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_4        0xE14
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_5        0xE15
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_6        0xE16
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_7        0xE17
+#define A5XX_HLSQ_DBG_READ_SEL              0xBC00
+#define A5XX_HLSQ_DBG_AHB_READ_APERTURE     0xA000
+
+/* VFD registers */
+#define A5XX_VFD_ADDR_MODE_CNTL             0xE41
+#define A5XX_VFD_PERFCTR_VFD_SEL_0          0xE50
+#define A5XX_VFD_PERFCTR_VFD_SEL_1          0xE51
+#define A5XX_VFD_PERFCTR_VFD_SEL_2          0xE52
+#define A5XX_VFD_PERFCTR_VFD_SEL_3          0xE53
+#define A5XX_VFD_PERFCTR_VFD_SEL_4          0xE54
+#define A5XX_VFD_PERFCTR_VFD_SEL_5          0xE55
+#define A5XX_VFD_PERFCTR_VFD_SEL_6          0xE56
+#define A5XX_VFD_PERFCTR_VFD_SEL_7          0xE57
+
+/* VPC registers */
+#define A5XX_VPC_DBG_ECO_CNTL		    0xE60
+#define A5XX_VPC_ADDR_MODE_CNTL             0xE61
+#define A5XX_VPC_PERFCTR_VPC_SEL_0          0xE64
+#define A5XX_VPC_PERFCTR_VPC_SEL_1          0xE65
+#define A5XX_VPC_PERFCTR_VPC_SEL_2          0xE66
+#define A5XX_VPC_PERFCTR_VPC_SEL_3          0xE67
+
+/* UCHE registers */
+#define A5XX_UCHE_ADDR_MODE_CNTL            0xE80
+#define A5XX_UCHE_MODE_CNTL                 0xE81
+#define A5XX_UCHE_WRITE_THRU_BASE_LO        0xE87
+#define A5XX_UCHE_WRITE_THRU_BASE_HI        0xE88
+#define A5XX_UCHE_TRAP_BASE_LO              0xE89
+#define A5XX_UCHE_TRAP_BASE_HI              0xE8A
+#define A5XX_UCHE_GMEM_RANGE_MIN_LO         0xE8B
+#define A5XX_UCHE_GMEM_RANGE_MIN_HI         0xE8C
+#define A5XX_UCHE_GMEM_RANGE_MAX_LO         0xE8D
+#define A5XX_UCHE_GMEM_RANGE_MAX_HI         0xE8E
+#define A5XX_UCHE_DBG_ECO_CNTL_2            0xE8F
+#define A5XX_UCHE_INVALIDATE0               0xE95
+#define A5XX_UCHE_CACHE_WAYS                0xE96
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_0        0xEA0
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_1        0xEA1
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_2        0xEA2
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_3        0xEA3
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_4        0xEA4
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_5        0xEA5
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_6        0xEA6
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_7        0xEA7
+
+/* UCHE Power Counter UCHE Registers Select */
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_0       0xEA8
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_1       0xEA9
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_2       0xEAA
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_3       0xEAB
+
+/* SP registers */
+#define A5XX_SP_DBG_ECO_CNTL                0xEC0
+#define A5XX_SP_ADDR_MODE_CNTL              0xEC1
+#define A5XX_SP_PERFCTR_SP_SEL_0            0xED0
+#define A5XX_SP_PERFCTR_SP_SEL_1            0xED1
+#define A5XX_SP_PERFCTR_SP_SEL_2            0xED2
+#define A5XX_SP_PERFCTR_SP_SEL_3            0xED3
+#define A5XX_SP_PERFCTR_SP_SEL_4            0xED4
+#define A5XX_SP_PERFCTR_SP_SEL_5            0xED5
+#define A5XX_SP_PERFCTR_SP_SEL_6            0xED6
+#define A5XX_SP_PERFCTR_SP_SEL_7            0xED7
+#define A5XX_SP_PERFCTR_SP_SEL_8            0xED8
+#define A5XX_SP_PERFCTR_SP_SEL_9            0xED9
+#define A5XX_SP_PERFCTR_SP_SEL_10           0xEDA
+#define A5XX_SP_PERFCTR_SP_SEL_11           0xEDB
+
+/* SP Power Counter SP Registers Select */
+#define A5XX_SP_POWERCTR_SP_SEL_0           0xEDC
+#define A5XX_SP_POWERCTR_SP_SEL_1           0xEDD
+#define A5XX_SP_POWERCTR_SP_SEL_2           0xEDE
+#define A5XX_SP_POWERCTR_SP_SEL_3           0xEDF
+
+/* TP registers */
+#define A5XX_TPL1_ADDR_MODE_CNTL            0xF01
+#define A5XX_TPL1_MODE_CNTL                 0xF02
+#define A5XX_TPL1_PERFCTR_TP_SEL_0          0xF10
+#define A5XX_TPL1_PERFCTR_TP_SEL_1          0xF11
+#define A5XX_TPL1_PERFCTR_TP_SEL_2          0xF12
+#define A5XX_TPL1_PERFCTR_TP_SEL_3          0xF13
+#define A5XX_TPL1_PERFCTR_TP_SEL_4          0xF14
+#define A5XX_TPL1_PERFCTR_TP_SEL_5          0xF15
+#define A5XX_TPL1_PERFCTR_TP_SEL_6          0xF16
+#define A5XX_TPL1_PERFCTR_TP_SEL_7          0xF17
+
+/* TP Power Counter TP Registers Select */
+#define A5XX_TPL1_POWERCTR_TP_SEL_0         0xF18
+#define A5XX_TPL1_POWERCTR_TP_SEL_1         0xF19
+#define A5XX_TPL1_POWERCTR_TP_SEL_2         0xF1A
+#define A5XX_TPL1_POWERCTR_TP_SEL_3         0xF1B
+
+/* VBIF registers */
+#define A5XX_VBIF_VERSION                       0x3000
+#define A5XX_VBIF_CLKON                         0x3001
+#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK   0x1
+#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT  0x1
+
+#define A5XX_VBIF_ROUND_ROBIN_QOS_ARB      0x3049
+#define A5XX_VBIF_GATE_OFF_WRREQ_EN        0x302A
+
+#define A5XX_VBIF_XIN_HALT_CTRL0	   0x3080
+#define A5XX_VBIF_XIN_HALT_CTRL0_MASK	   0xF
+#define A510_VBIF_XIN_HALT_CTRL0_MASK	   0x7
+#define A5XX_VBIF_XIN_HALT_CTRL1	   0x3081
+
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL            0x3084
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK    0x1
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT   0x0
+
+#define A5XX_VBIF_TEST_BUS1_CTRL0                0x3085
+#define A5XX_VBIF_TEST_BUS1_CTRL1                0x3086
+#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK  0xF
+#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT 0x0
+
+#define A5XX_VBIF_TEST_BUS2_CTRL0                   0x3087
+#define A5XX_VBIF_TEST_BUS2_CTRL1                   0x3088
+#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK     0x1FF
+#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT    0x0
+
+#define A5XX_VBIF_TEST_BUS_OUT             0x308c
+
+#define A5XX_VBIF_PERF_CNT_SEL0            0x30D0
+#define A5XX_VBIF_PERF_CNT_SEL1            0x30D1
+#define A5XX_VBIF_PERF_CNT_SEL2            0x30D2
+#define A5XX_VBIF_PERF_CNT_SEL3            0x30D3
+#define A5XX_VBIF_PERF_CNT_LOW0            0x30D8
+#define A5XX_VBIF_PERF_CNT_LOW1            0x30D9
+#define A5XX_VBIF_PERF_CNT_LOW2            0x30DA
+#define A5XX_VBIF_PERF_CNT_LOW3            0x30DB
+#define A5XX_VBIF_PERF_CNT_HIGH0           0x30E0
+#define A5XX_VBIF_PERF_CNT_HIGH1           0x30E1
+#define A5XX_VBIF_PERF_CNT_HIGH2           0x30E2
+#define A5XX_VBIF_PERF_CNT_HIGH3           0x30E3
+
+#define A5XX_VBIF_PERF_PWR_CNT_EN0         0x3100
+#define A5XX_VBIF_PERF_PWR_CNT_EN1         0x3101
+#define A5XX_VBIF_PERF_PWR_CNT_EN2         0x3102
+
+#define A5XX_VBIF_PERF_PWR_CNT_LOW0        0x3110
+#define A5XX_VBIF_PERF_PWR_CNT_LOW1        0x3111
+#define A5XX_VBIF_PERF_PWR_CNT_LOW2        0x3112
+
+#define A5XX_VBIF_PERF_PWR_CNT_HIGH0       0x3118
+#define A5XX_VBIF_PERF_PWR_CNT_HIGH1       0x3119
+#define A5XX_VBIF_PERF_PWR_CNT_HIGH2       0x311A
+
+/* GPMU registers */
+#define A5XX_GPMU_INST_RAM_BASE            0x8800
+#define A5XX_GPMU_DATA_RAM_BASE            0x9800
+#define A5XX_GPMU_SP_POWER_CNTL            0xA881
+#define A5XX_GPMU_RBCCU_CLOCK_CNTL         0xA886
+#define A5XX_GPMU_RBCCU_POWER_CNTL         0xA887
+#define A5XX_GPMU_SP_PWR_CLK_STATUS        0xA88B
+#define A5XX_GPMU_RBCCU_PWR_CLK_STATUS     0xA88D
+#define A5XX_GPMU_PWR_COL_STAGGER_DELAY    0xA891
+#define A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL 0xA892
+#define A5XX_GPMU_PWR_COL_INTER_FRAME_HYST 0xA893
+#define A5XX_GPMU_PWR_COL_BINNING_CTRL     0xA894
+#define A5XX_GPMU_CLOCK_THROTTLE_CTRL      0xA8A3
+#define A5XX_GPMU_WFI_CONFIG               0xA8C1
+#define A5XX_GPMU_RBBM_INTR_INFO           0xA8D6
+#define A5XX_GPMU_CM3_SYSRESET             0xA8D8
+#define A5XX_GPMU_GENERAL_0                0xA8E0
+#define A5XX_GPMU_GENERAL_1                0xA8E1
+
+/* COUNTABLE FOR SP PERFCOUNTER */
+#define A5XX_SP_ALU_ACTIVE_CYCLES          0x1
+#define A5XX_SP0_ICL1_MISSES               0x35
+#define A5XX_SP_FS_CFLOW_INSTRUCTIONS      0x27
+
+/* COUNTABLE FOR TSE PERFCOUNTER */
+#define A5XX_TSE_INPUT_PRIM_NUM            0x6
+
+/* COUNTABLE FOR RBBM PERFCOUNTER */
+#define A5XX_RBBM_ALWAYS_COUNT		0x0
+
+/* GPMU POWER COUNTERS */
+#define A5XX_SP_POWER_COUNTER_0_LO		0xA840
+#define A5XX_SP_POWER_COUNTER_0_HI		0xA841
+#define A5XX_SP_POWER_COUNTER_1_LO		0xA842
+#define A5XX_SP_POWER_COUNTER_1_HI		0xA843
+#define A5XX_SP_POWER_COUNTER_2_LO		0xA844
+#define A5XX_SP_POWER_COUNTER_2_HI		0xA845
+#define A5XX_SP_POWER_COUNTER_3_LO		0xA846
+#define A5XX_SP_POWER_COUNTER_3_HI		0xA847
+
+#define A5XX_TP_POWER_COUNTER_0_LO		0xA848
+#define A5XX_TP_POWER_COUNTER_0_HI		0xA849
+#define A5XX_TP_POWER_COUNTER_1_LO		0xA84A
+#define A5XX_TP_POWER_COUNTER_1_HI		0xA84B
+#define A5XX_TP_POWER_COUNTER_2_LO		0xA84C
+#define A5XX_TP_POWER_COUNTER_2_HI		0xA84D
+#define A5XX_TP_POWER_COUNTER_3_LO		0xA84E
+#define A5XX_TP_POWER_COUNTER_3_HI		0xA84F
+
+#define A5XX_RB_POWER_COUNTER_0_LO		0xA850
+#define A5XX_RB_POWER_COUNTER_0_HI		0xA851
+#define A5XX_RB_POWER_COUNTER_1_LO		0xA852
+#define A5XX_RB_POWER_COUNTER_1_HI		0xA853
+#define A5XX_RB_POWER_COUNTER_2_LO		0xA854
+#define A5XX_RB_POWER_COUNTER_2_HI		0xA855
+#define A5XX_RB_POWER_COUNTER_3_LO		0xA856
+#define A5XX_RB_POWER_COUNTER_3_HI		0xA857
+
+#define A5XX_CCU_POWER_COUNTER_0_LO		0xA858
+#define A5XX_CCU_POWER_COUNTER_0_HI		0xA859
+#define A5XX_CCU_POWER_COUNTER_1_LO		0xA85A
+#define A5XX_CCU_POWER_COUNTER_1_HI		0xA85B
+
+#define A5XX_UCHE_POWER_COUNTER_0_LO		0xA85C
+#define A5XX_UCHE_POWER_COUNTER_0_HI		0xA85D
+#define A5XX_UCHE_POWER_COUNTER_1_LO		0xA85E
+#define A5XX_UCHE_POWER_COUNTER_1_HI		0xA85F
+#define A5XX_UCHE_POWER_COUNTER_2_LO		0xA860
+#define A5XX_UCHE_POWER_COUNTER_2_HI		0xA861
+#define A5XX_UCHE_POWER_COUNTER_3_LO		0xA862
+#define A5XX_UCHE_POWER_COUNTER_3_HI		0xA863
+
+#define A5XX_CP_POWER_COUNTER_0_LO		0xA864
+#define A5XX_CP_POWER_COUNTER_0_HI		0xA865
+#define A5XX_CP_POWER_COUNTER_1_LO		0xA866
+#define A5XX_CP_POWER_COUNTER_1_HI		0xA867
+#define A5XX_CP_POWER_COUNTER_2_LO		0xA868
+#define A5XX_CP_POWER_COUNTER_2_HI		0xA869
+#define A5XX_CP_POWER_COUNTER_3_LO		0xA86A
+#define A5XX_CP_POWER_COUNTER_3_HI		0xA86B
+
+#define A5XX_GPMU_POWER_COUNTER_0_LO		0xA86C
+#define A5XX_GPMU_POWER_COUNTER_0_HI		0xA86D
+#define A5XX_GPMU_POWER_COUNTER_1_LO		0xA86E
+#define A5XX_GPMU_POWER_COUNTER_1_HI		0xA86F
+#define A5XX_GPMU_POWER_COUNTER_2_LO		0xA870
+#define A5XX_GPMU_POWER_COUNTER_2_HI		0xA871
+#define A5XX_GPMU_POWER_COUNTER_3_LO		0xA872
+#define A5XX_GPMU_POWER_COUNTER_3_HI		0xA873
+#define A5XX_GPMU_POWER_COUNTER_4_LO		0xA874
+#define A5XX_GPMU_POWER_COUNTER_4_HI		0xA875
+#define A5XX_GPMU_POWER_COUNTER_5_LO		0xA876
+#define A5XX_GPMU_POWER_COUNTER_5_HI		0xA877
+
+#define A5XX_GPMU_POWER_COUNTER_ENABLE		0xA878
+#define A5XX_GPMU_ALWAYS_ON_COUNTER_LO		0xA879
+#define A5XX_GPMU_ALWAYS_ON_COUNTER_HI		0xA87A
+#define A5XX_GPMU_ALWAYS_ON_COUNTER_RESET	0xA87B
+#define A5XX_GPMU_POWER_COUNTER_SELECT_0	0xA87C
+#define A5XX_GPMU_POWER_COUNTER_SELECT_1	0xA87D
+#define A5XX_GPMU_GPMU_SP_CLOCK_CONTROL		0xA880
+
+#define A5XX_GPMU_CLOCK_THROTTLE_CTRL		0xA8A3
+#define A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL	0xA8A8
+
+#define A5XX_GPMU_TEMP_SENSOR_ID		0xAC00
+#define A5XX_GPMU_TEMP_SENSOR_CONFIG		0xAC01
+#define A5XX_GPMU_DELTA_TEMP_THRESHOLD		0xAC03
+#define A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK	0xAC06
+
+#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_0_1	0xAC40
+#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_2_3	0xAC41
+#define A5XX_GPMU_LEAKAGE_VTG_COEFF_0_1		0xAC42
+#define A5XX_GPMU_LEAKAGE_VTG_COEFF_2_3		0xAC43
+#define A5XX_GPMU_BASE_LEAKAGE			0xAC46
+
+#define A5XX_GPMU_GPMU_VOLTAGE			0xAC60
+#define A5XX_GPMU_GPMU_VOLTAGE_INTR_STATUS	0xAC61
+#define A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK	0xAC62
+#define A5XX_GPMU_GPMU_PWR_THRESHOLD		0xAC80
+#define A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL	0xACC4
+#define A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS	0xACC5
+#define A5XX_GPMU_GPMU_ISENSE_CTRL		0xACD0
+
+#define A5XX_GDPM_CONFIG1			0xB80C
+#define A5XX_GDPM_INT_EN			0xB80F
+#define A5XX_GDPM_INT_MASK			0xB811
+#define A5XX_GPMU_BEC_ENABLE			0xB9A0
+
+/* ISENSE registers */
+#define A5XX_GPU_CS_DECIMAL_ALIGN		0xC16A
+#define A5XX_GPU_CS_SENSOR_PARAM_CORE_1	0xC126
+#define A5XX_GPU_CS_SENSOR_PARAM_CORE_2	0xC127
+#define A5XX_GPU_CS_SW_OV_FUSE_EN		0xC168
+#define A5XX_GPU_CS_SENSOR_GENERAL_STATUS	0xC41A
+#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_0	0xC41D
+#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_2	0xC41F
+#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_4	0xC421
+#define A5XX_GPU_CS_ENABLE_REG			0xC520
+#define A5XX_GPU_CS_AMP_CALIBRATION_CONTROL1	0xC557
+#define A5XX_GPU_CS_AMP_CALIBRATION_DONE	0xC565
+#define A5XX_GPU_CS_ENDPOINT_CALIBRATION_DONE   0xC556
+#endif /* _A5XX_REG_H */
+

+ 1242 - 0
qcom/opensource/graphics-kernel/a6xx_reg.h

@@ -0,0 +1,1242 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _A6XX_REG_H
+#define _A6XX_REG_H
+
+/* A6XX interrupt bits */
+#define A6XX_INT_RBBM_GPU_IDLE          0
+#define A6XX_INT_CP_AHB_ERROR           1
+#define A6XX_INT_ATB_ASYNCFIFO_OVERFLOW 6
+#define A6XX_INT_RBBM_GPC_ERROR         7
+#define A6XX_INT_CP_SW                  8
+#define A6XX_INT_CP_HW_ERROR            9
+#define A6XX_INT_CP_CCU_FLUSH_DEPTH_TS  10
+#define A6XX_INT_CP_CCU_FLUSH_COLOR_TS  11
+#define A6XX_INT_CP_CCU_RESOLVE_TS      12
+#define A6XX_INT_CP_IB2                 13
+#define A6XX_INT_CP_IB1                 14
+#define A6XX_INT_CP_RB                  15
+#define A6XX_INT_CP_RB_DONE_TS          17
+#define A6XX_INT_CP_WT_DONE_TS          18
+#define A6XX_INT_CP_CACHE_FLUSH_TS      20
+#define A6XX_INT_RBBM_ATB_BUS_OVERFLOW  22
+#define A6XX_INT_RBBM_HANG_DETECT       23
+#define A6XX_INT_UCHE_OOB_ACCESS        24
+#define A6XX_INT_UCHE_TRAP_INTR         25
+#define A6XX_INT_DEBBUS_INTR_0          26
+#define A6XX_INT_DEBBUS_INTR_1          27
+#define A6XX_INT_TSB_WRITE_ERROR	28
+#define A6XX_INT_ISDB_CPU_IRQ           30
+#define A6XX_INT_ISDB_UNDER_DEBUG       31
+
+/* CP Interrupt bits */
+#define A6XX_CP_OPCODE_ERROR                    0
+#define A6XX_CP_UCODE_ERROR                     1
+#define A6XX_CP_HW_FAULT_ERROR                  2
+#define A6XX_CP_REGISTER_PROTECTION_ERROR       4
+#define A6XX_CP_AHB_ERROR                       5
+#define A6XX_CP_VSD_PARITY_ERROR                6
+#define A6XX_CP_ILLEGAL_INSTR_ERROR             7
+
+/* CP registers */
+#define A6XX_CP_RB_BASE                  0x800
+#define A6XX_CP_RB_BASE_HI               0x801
+#define A6XX_CP_RB_CNTL                  0x802
+#define A6XX_CP_RB_RPTR_ADDR_LO          0x804
+#define A6XX_CP_RB_RPTR_ADDR_HI          0x805
+#define A6XX_CP_RB_RPTR                  0x806
+#define A6XX_CP_RB_WPTR                  0x807
+#define A6XX_CP_SQE_CNTL                 0x808
+#define A6XX_CP_CP2GMU_STATUS            0x812
+#define A6XX_CP_HW_FAULT                 0x821
+#define A6XX_CP_INTERRUPT_STATUS         0x823
+#define A6XX_CP_PROTECT_STATUS           0x824
+#define A6XX_CP_STATUS_1                 0x825
+#define A6XX_CP_SQE_INSTR_BASE_LO        0x830
+#define A6XX_CP_SQE_INSTR_BASE_HI        0x831
+#define A6XX_CP_MISC_CNTL                0x840
+#define A6XX_CP_APRIV_CNTL               0X844
+#define A6XX_CP_ROQ_THRESHOLDS_1         0x8C1
+#define A6XX_CP_ROQ_THRESHOLDS_2         0x8C2
+#define A6XX_CP_MEM_POOL_SIZE            0x8C3
+#define A6XX_CP_CHICKEN_DBG              0x841
+#define A6XX_CP_ADDR_MODE_CNTL           0x842
+#define A6XX_CP_DBG_ECO_CNTL             0x843
+#define A6XX_CP_PROTECT_CNTL             0x84F
+#define A6XX_CP_PROTECT_REG              0x850
+#define A6XX_CP_CONTEXT_SWITCH_CNTL      0x8A0
+#define A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO   0x8A1
+#define A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI   0x8A2
+#define A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO   0x8A3
+#define A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI   0x8A4
+#define A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO   0x8A5
+#define A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI   0x8A6
+#define A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO   0x8A7
+#define A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI   0x8A8
+#define A6XX_CP_CONTEXT_SWITCH_LEVEL_STATUS 0x8AB
+#define A6XX_CP_PERFCTR_CP_SEL_0         0x8D0
+#define A6XX_CP_PERFCTR_CP_SEL_1         0x8D1
+#define A6XX_CP_PERFCTR_CP_SEL_2         0x8D2
+#define A6XX_CP_PERFCTR_CP_SEL_3         0x8D3
+#define A6XX_CP_PERFCTR_CP_SEL_4         0x8D4
+#define A6XX_CP_PERFCTR_CP_SEL_5         0x8D5
+#define A6XX_CP_PERFCTR_CP_SEL_6         0x8D6
+#define A6XX_CP_PERFCTR_CP_SEL_7         0x8D7
+#define A6XX_CP_PERFCTR_CP_SEL_8         0x8D8
+#define A6XX_CP_PERFCTR_CP_SEL_9         0x8D9
+#define A6XX_CP_PERFCTR_CP_SEL_10        0x8DA
+#define A6XX_CP_PERFCTR_CP_SEL_11        0x8DB
+#define A6XX_CP_PERFCTR_CP_SEL_12        0x8DC
+#define A6XX_CP_PERFCTR_CP_SEL_13        0x8DD
+#define A6XX_CP_CRASH_SCRIPT_BASE_LO     0x900
+#define A6XX_CP_CRASH_SCRIPT_BASE_HI     0x901
+#define A6XX_CP_CRASH_DUMP_CNTL          0x902
+#define A6XX_CP_CRASH_DUMP_STATUS        0x903
+#define A6XX_CP_SQE_STAT_ADDR            0x908
+#define A6XX_CP_SQE_STAT_DATA            0x909
+#define A6XX_CP_DRAW_STATE_ADDR          0x90A
+#define A6XX_CP_DRAW_STATE_DATA          0x90B
+#define A6XX_CP_ROQ_DBG_ADDR             0x90C
+#define A6XX_CP_ROQ_DBG_DATA             0x90D
+#define A6XX_CP_MEM_POOL_DBG_ADDR        0x90E
+#define A6XX_CP_MEM_POOL_DBG_DATA        0x90F
+#define A6XX_CP_SQE_UCODE_DBG_ADDR       0x910
+#define A6XX_CP_SQE_UCODE_DBG_DATA       0x911
+#define A6XX_CP_IB1_BASE                 0x928
+#define A6XX_CP_IB1_BASE_HI              0x929
+#define A6XX_CP_IB1_REM_SIZE             0x92A
+#define A6XX_CP_IB2_BASE                 0x92B
+#define A6XX_CP_IB2_BASE_HI              0x92C
+#define A6XX_CP_IB2_REM_SIZE             0x92D
+#define A6XX_CP_ALWAYS_ON_COUNTER_LO     0x980
+#define A6XX_CP_ALWAYS_ON_COUNTER_HI     0x981
+#define A6XX_CP_ALWAYS_ON_CONTEXT_LO     0x982
+#define A6XX_CP_ALWAYS_ON_CONTEXT_HI     0x983
+#define A6XX_CP_AHB_CNTL                 0x98D
+#define A6XX_CP_APERTURE_CNTL_HOST       0xA00
+#define A6XX_CP_APERTURE_CNTL_CD         0xA03
+#define A6XX_VSC_ADDR_MODE_CNTL          0xC01
+
+/* LPAC registers */
+#define A6XX_CP_LPAC_DRAW_STATE_ADDR     0xB0A
+#define A6XX_CP_LPAC_DRAW_STATE_DATA     0xB0B
+#define A6XX_CP_LPAC_ROQ_DBG_ADDR        0xB0C
+#define A6XX_CP_SQE_AC_UCODE_DBG_ADDR    0xB27
+#define A6XX_CP_SQE_AC_UCODE_DBG_DATA    0xB28
+#define A6XX_CP_SQE_AC_STAT_ADDR         0xB29
+#define A6XX_CP_SQE_AC_STAT_DATA         0xB2A
+#define A6XX_CP_LPAC_ROQ_THRESHOLDS_1    0xB32
+#define A6XX_CP_LPAC_ROQ_THRESHOLDS_2    0xB33
+#define A6XX_CP_LPAC_PROG_FIFO_SIZE      0xB34
+#define A6XX_CP_LPAC_ROQ_DBG_DATA        0xB35
+#define A6XX_CP_LPAC_FIFO_DBG_DATA       0xB36
+#define A6XX_CP_LPAC_FIFO_DBG_ADDR       0xB40
+
+/* RBBM registers */
+#define A6XX_RBBM_INT_0_STATUS                   0x201
+#define A6XX_RBBM_STATUS                         0x210
+#define A6XX_RBBM_STATUS3                        0x213
+#define A6XX_RBBM_VBIF_GX_RESET_STATUS           0x215
+#define A6XX_RBBM_PERFCTR_CP_0_LO                0x400
+#define A6XX_RBBM_PERFCTR_CP_0_HI                0x401
+#define A6XX_RBBM_PERFCTR_CP_1_LO                0x402
+#define A6XX_RBBM_PERFCTR_CP_1_HI                0x403
+#define A6XX_RBBM_PERFCTR_CP_2_LO                0x404
+#define A6XX_RBBM_PERFCTR_CP_2_HI                0x405
+#define A6XX_RBBM_PERFCTR_CP_3_LO                0x406
+#define A6XX_RBBM_PERFCTR_CP_3_HI                0x407
+#define A6XX_RBBM_PERFCTR_CP_4_LO                0x408
+#define A6XX_RBBM_PERFCTR_CP_4_HI                0x409
+#define A6XX_RBBM_PERFCTR_CP_5_LO                0x40a
+#define A6XX_RBBM_PERFCTR_CP_5_HI                0x40b
+#define A6XX_RBBM_PERFCTR_CP_6_LO                0x40c
+#define A6XX_RBBM_PERFCTR_CP_6_HI                0x40d
+#define A6XX_RBBM_PERFCTR_CP_7_LO                0x40e
+#define A6XX_RBBM_PERFCTR_CP_7_HI                0x40f
+#define A6XX_RBBM_PERFCTR_CP_8_LO                0x410
+#define A6XX_RBBM_PERFCTR_CP_8_HI                0x411
+#define A6XX_RBBM_PERFCTR_CP_9_LO                0x412
+#define A6XX_RBBM_PERFCTR_CP_9_HI                0x413
+#define A6XX_RBBM_PERFCTR_CP_10_LO               0x414
+#define A6XX_RBBM_PERFCTR_CP_10_HI               0x415
+#define A6XX_RBBM_PERFCTR_CP_11_LO               0x416
+#define A6XX_RBBM_PERFCTR_CP_11_HI               0x417
+#define A6XX_RBBM_PERFCTR_CP_12_LO               0x418
+#define A6XX_RBBM_PERFCTR_CP_12_HI               0x419
+#define A6XX_RBBM_PERFCTR_CP_13_LO               0x41a
+#define A6XX_RBBM_PERFCTR_CP_13_HI               0x41b
+#define A6XX_RBBM_PERFCTR_RBBM_0_LO              0x41c
+#define A6XX_RBBM_PERFCTR_RBBM_0_HI              0x41d
+#define A6XX_RBBM_PERFCTR_RBBM_1_LO              0x41e
+#define A6XX_RBBM_PERFCTR_RBBM_1_HI              0x41f
+#define A6XX_RBBM_PERFCTR_RBBM_2_LO              0x420
+#define A6XX_RBBM_PERFCTR_RBBM_2_HI              0x421
+#define A6XX_RBBM_PERFCTR_RBBM_3_LO              0x422
+#define A6XX_RBBM_PERFCTR_RBBM_3_HI              0x423
+#define A6XX_RBBM_PERFCTR_PC_0_LO                0x424
+#define A6XX_RBBM_PERFCTR_PC_0_HI                0x425
+#define A6XX_RBBM_PERFCTR_PC_1_LO                0x426
+#define A6XX_RBBM_PERFCTR_PC_1_HI                0x427
+#define A6XX_RBBM_PERFCTR_PC_2_LO                0x428
+#define A6XX_RBBM_PERFCTR_PC_2_HI                0x429
+#define A6XX_RBBM_PERFCTR_PC_3_LO                0x42a
+#define A6XX_RBBM_PERFCTR_PC_3_HI                0x42b
+#define A6XX_RBBM_PERFCTR_PC_4_LO                0x42c
+#define A6XX_RBBM_PERFCTR_PC_4_HI                0x42d
+#define A6XX_RBBM_PERFCTR_PC_5_LO                0x42e
+#define A6XX_RBBM_PERFCTR_PC_5_HI                0x42f
+#define A6XX_RBBM_PERFCTR_PC_6_LO                0x430
+#define A6XX_RBBM_PERFCTR_PC_6_HI                0x431
+#define A6XX_RBBM_PERFCTR_PC_7_LO                0x432
+#define A6XX_RBBM_PERFCTR_PC_7_HI                0x433
+#define A6XX_RBBM_PERFCTR_VFD_0_LO               0x434
+#define A6XX_RBBM_PERFCTR_VFD_0_HI               0x435
+#define A6XX_RBBM_PERFCTR_VFD_1_LO               0x436
+#define A6XX_RBBM_PERFCTR_VFD_1_HI               0x437
+#define A6XX_RBBM_PERFCTR_VFD_2_LO               0x438
+#define A6XX_RBBM_PERFCTR_VFD_2_HI               0x439
+#define A6XX_RBBM_PERFCTR_VFD_3_LO               0x43a
+#define A6XX_RBBM_PERFCTR_VFD_3_HI               0x43b
+#define A6XX_RBBM_PERFCTR_VFD_4_LO               0x43c
+#define A6XX_RBBM_PERFCTR_VFD_4_HI               0x43d
+#define A6XX_RBBM_PERFCTR_VFD_5_LO               0x43e
+#define A6XX_RBBM_PERFCTR_VFD_5_HI               0x43f
+#define A6XX_RBBM_PERFCTR_VFD_6_LO               0x440
+#define A6XX_RBBM_PERFCTR_VFD_6_HI               0x441
+#define A6XX_RBBM_PERFCTR_VFD_7_LO               0x442
+#define A6XX_RBBM_PERFCTR_VFD_7_HI               0x443
+#define A6XX_RBBM_PERFCTR_HLSQ_0_LO              0x444
+#define A6XX_RBBM_PERFCTR_HLSQ_0_HI              0x445
+#define A6XX_RBBM_PERFCTR_HLSQ_1_LO              0x446
+#define A6XX_RBBM_PERFCTR_HLSQ_1_HI              0x447
+#define A6XX_RBBM_PERFCTR_HLSQ_2_LO              0x448
+#define A6XX_RBBM_PERFCTR_HLSQ_2_HI              0x449
+#define A6XX_RBBM_PERFCTR_HLSQ_3_LO              0x44a
+#define A6XX_RBBM_PERFCTR_HLSQ_3_HI              0x44b
+#define A6XX_RBBM_PERFCTR_HLSQ_4_LO              0x44c
+#define A6XX_RBBM_PERFCTR_HLSQ_4_HI              0x44d
+#define A6XX_RBBM_PERFCTR_HLSQ_5_LO              0x44e
+#define A6XX_RBBM_PERFCTR_HLSQ_5_HI              0x44f
+#define A6XX_RBBM_PERFCTR_VPC_0_LO               0x450
+#define A6XX_RBBM_PERFCTR_VPC_0_HI               0x451
+#define A6XX_RBBM_PERFCTR_VPC_1_LO               0x452
+#define A6XX_RBBM_PERFCTR_VPC_1_HI               0x453
+#define A6XX_RBBM_PERFCTR_VPC_2_LO               0x454
+#define A6XX_RBBM_PERFCTR_VPC_2_HI               0x455
+#define A6XX_RBBM_PERFCTR_VPC_3_LO               0x456
+#define A6XX_RBBM_PERFCTR_VPC_3_HI               0x457
+#define A6XX_RBBM_PERFCTR_VPC_4_LO               0x458
+#define A6XX_RBBM_PERFCTR_VPC_4_HI               0x459
+#define A6XX_RBBM_PERFCTR_VPC_5_LO               0x45a
+#define A6XX_RBBM_PERFCTR_VPC_5_HI               0x45b
+#define A6XX_RBBM_PERFCTR_CCU_0_LO               0x45c
+#define A6XX_RBBM_PERFCTR_CCU_0_HI               0x45d
+#define A6XX_RBBM_PERFCTR_CCU_1_LO               0x45e
+#define A6XX_RBBM_PERFCTR_CCU_1_HI               0x45f
+#define A6XX_RBBM_PERFCTR_CCU_2_LO               0x460
+#define A6XX_RBBM_PERFCTR_CCU_2_HI               0x461
+#define A6XX_RBBM_PERFCTR_CCU_3_LO               0x462
+#define A6XX_RBBM_PERFCTR_CCU_3_HI               0x463
+#define A6XX_RBBM_PERFCTR_CCU_4_LO               0x464
+#define A6XX_RBBM_PERFCTR_CCU_4_HI               0x465
+#define A6XX_RBBM_PERFCTR_TSE_0_LO               0x466
+#define A6XX_RBBM_PERFCTR_TSE_0_HI               0x467
+#define A6XX_RBBM_PERFCTR_TSE_1_LO               0x468
+#define A6XX_RBBM_PERFCTR_TSE_1_HI               0x469
+#define A6XX_RBBM_PERFCTR_TSE_2_LO               0x46a
+#define A6XX_RBBM_PERFCTR_CCU_4_HI               0x465
+#define A6XX_RBBM_PERFCTR_TSE_0_LO               0x466
+#define A6XX_RBBM_PERFCTR_TSE_0_HI               0x467
+#define A6XX_RBBM_PERFCTR_TSE_1_LO               0x468
+#define A6XX_RBBM_PERFCTR_TSE_1_HI               0x469
+#define A6XX_RBBM_PERFCTR_TSE_2_LO               0x46a
+#define A6XX_RBBM_PERFCTR_TSE_2_HI               0x46b
+#define A6XX_RBBM_PERFCTR_TSE_3_LO               0x46c
+#define A6XX_RBBM_PERFCTR_TSE_3_HI               0x46d
+#define A6XX_RBBM_PERFCTR_RAS_0_LO               0x46e
+#define A6XX_RBBM_PERFCTR_RAS_0_HI               0x46f
+#define A6XX_RBBM_PERFCTR_RAS_1_LO               0x470
+#define A6XX_RBBM_PERFCTR_RAS_1_HI               0x471
+#define A6XX_RBBM_PERFCTR_RAS_2_LO               0x472
+#define A6XX_RBBM_PERFCTR_RAS_2_HI               0x473
+#define A6XX_RBBM_PERFCTR_RAS_3_LO               0x474
+#define A6XX_RBBM_PERFCTR_RAS_3_HI               0x475
+#define A6XX_RBBM_PERFCTR_UCHE_0_LO              0x476
+#define A6XX_RBBM_PERFCTR_UCHE_0_HI              0x477
+#define A6XX_RBBM_PERFCTR_UCHE_1_LO              0x478
+#define A6XX_RBBM_PERFCTR_UCHE_1_HI              0x479
+#define A6XX_RBBM_PERFCTR_UCHE_2_LO              0x47a
+#define A6XX_RBBM_PERFCTR_UCHE_2_HI              0x47b
+#define A6XX_RBBM_PERFCTR_UCHE_3_LO              0x47c
+#define A6XX_RBBM_PERFCTR_UCHE_3_HI              0x47d
+#define A6XX_RBBM_PERFCTR_UCHE_4_LO              0x47e
+#define A6XX_RBBM_PERFCTR_UCHE_4_HI              0x47f
+#define A6XX_RBBM_PERFCTR_UCHE_5_LO              0x480
+#define A6XX_RBBM_PERFCTR_UCHE_5_HI              0x481
+#define A6XX_RBBM_PERFCTR_UCHE_6_LO              0x482
+#define A6XX_RBBM_PERFCTR_UCHE_6_HI              0x483
+#define A6XX_RBBM_PERFCTR_UCHE_7_LO              0x484
+#define A6XX_RBBM_PERFCTR_UCHE_7_HI              0x485
+#define A6XX_RBBM_PERFCTR_UCHE_8_LO              0x486
+#define A6XX_RBBM_PERFCTR_UCHE_8_HI              0x487
+#define A6XX_RBBM_PERFCTR_UCHE_9_LO              0x488
+#define A6XX_RBBM_PERFCTR_UCHE_9_HI              0x489
+#define A6XX_RBBM_PERFCTR_UCHE_10_LO             0x48a
+#define A6XX_RBBM_PERFCTR_UCHE_10_HI             0x48b
+#define A6XX_RBBM_PERFCTR_UCHE_11_LO             0x48c
+#define A6XX_RBBM_PERFCTR_UCHE_11_HI             0x48d
+#define A6XX_RBBM_PERFCTR_TP_0_LO                0x48e
+#define A6XX_RBBM_PERFCTR_TP_0_HI                0x48f
+#define A6XX_RBBM_PERFCTR_TP_1_LO                0x490
+#define A6XX_RBBM_PERFCTR_TP_1_HI                0x491
+#define A6XX_RBBM_PERFCTR_TP_2_LO                0x492
+#define A6XX_RBBM_PERFCTR_TP_2_HI                0x493
+#define A6XX_RBBM_PERFCTR_TP_3_LO                0x494
+#define A6XX_RBBM_PERFCTR_TP_3_HI                0x495
+#define A6XX_RBBM_PERFCTR_TP_4_LO                0x496
+#define A6XX_RBBM_PERFCTR_TP_4_HI                0x497
+#define A6XX_RBBM_PERFCTR_TP_5_LO                0x498
+#define A6XX_RBBM_PERFCTR_TP_5_HI                0x499
+#define A6XX_RBBM_PERFCTR_TP_6_LO                0x49a
+#define A6XX_RBBM_PERFCTR_TP_6_HI                0x49b
+#define A6XX_RBBM_PERFCTR_TP_7_LO                0x49c
+#define A6XX_RBBM_PERFCTR_TP_7_HI                0x49d
+#define A6XX_RBBM_PERFCTR_TP_8_LO                0x49e
+#define A6XX_RBBM_PERFCTR_TP_8_HI                0x49f
+#define A6XX_RBBM_PERFCTR_TP_9_LO                0x4a0
+#define A6XX_RBBM_PERFCTR_TP_9_HI                0x4a1
+#define A6XX_RBBM_PERFCTR_TP_10_LO               0x4a2
+#define A6XX_RBBM_PERFCTR_TP_10_HI               0x4a3
+#define A6XX_RBBM_PERFCTR_TP_11_LO               0x4a4
+#define A6XX_RBBM_PERFCTR_TP_11_HI               0x4a5
+#define A6XX_RBBM_PERFCTR_SP_0_LO                0x4a6
+#define A6XX_RBBM_PERFCTR_SP_0_HI                0x4a7
+#define A6XX_RBBM_PERFCTR_SP_1_LO                0x4a8
+#define A6XX_RBBM_PERFCTR_SP_1_HI                0x4a9
+#define A6XX_RBBM_PERFCTR_SP_2_LO                0x4aa
+#define A6XX_RBBM_PERFCTR_SP_2_HI                0x4ab
+#define A6XX_RBBM_PERFCTR_SP_3_LO                0x4ac
+#define A6XX_RBBM_PERFCTR_SP_3_HI                0x4ad
+#define A6XX_RBBM_PERFCTR_SP_4_LO                0x4ae
+#define A6XX_RBBM_PERFCTR_SP_4_HI                0x4af
+#define A6XX_RBBM_PERFCTR_SP_5_LO                0x4b0
+#define A6XX_RBBM_PERFCTR_SP_5_HI                0x4b1
+#define A6XX_RBBM_PERFCTR_SP_6_LO                0x4b2
+#define A6XX_RBBM_PERFCTR_SP_6_HI                0x4b3
+#define A6XX_RBBM_PERFCTR_SP_7_LO                0x4b4
+#define A6XX_RBBM_PERFCTR_SP_7_HI                0x4b5
+#define A6XX_RBBM_PERFCTR_SP_8_LO                0x4b6
+#define A6XX_RBBM_PERFCTR_SP_8_HI                0x4b7
+#define A6XX_RBBM_PERFCTR_SP_9_LO                0x4b8
+#define A6XX_RBBM_PERFCTR_SP_9_HI                0x4b9
+#define A6XX_RBBM_PERFCTR_SP_10_LO               0x4ba
+#define A6XX_RBBM_PERFCTR_SP_10_HI               0x4bb
+#define A6XX_RBBM_PERFCTR_SP_11_LO               0x4bc
+#define A6XX_RBBM_PERFCTR_SP_11_HI               0x4bd
+#define A6XX_RBBM_PERFCTR_SP_12_LO               0x4be
+#define A6XX_RBBM_PERFCTR_SP_12_HI               0x4bf
+#define A6XX_RBBM_PERFCTR_SP_13_LO               0x4c0
+#define A6XX_RBBM_PERFCTR_SP_13_HI               0x4c1
+#define A6XX_RBBM_PERFCTR_SP_14_LO               0x4c2
+#define A6XX_RBBM_PERFCTR_SP_14_HI               0x4c3
+#define A6XX_RBBM_PERFCTR_SP_15_LO               0x4c4
+#define A6XX_RBBM_PERFCTR_SP_15_HI               0x4c5
+#define A6XX_RBBM_PERFCTR_SP_16_LO               0x4c6
+#define A6XX_RBBM_PERFCTR_SP_16_HI               0x4c7
+#define A6XX_RBBM_PERFCTR_SP_17_LO               0x4c8
+#define A6XX_RBBM_PERFCTR_SP_17_HI               0x4c9
+#define A6XX_RBBM_PERFCTR_SP_18_LO               0x4ca
+#define A6XX_RBBM_PERFCTR_SP_18_HI               0x4cb
+#define A6XX_RBBM_PERFCTR_SP_19_LO               0x4cc
+#define A6XX_RBBM_PERFCTR_SP_19_HI               0x4cd
+#define A6XX_RBBM_PERFCTR_SP_20_LO               0x4ce
+#define A6XX_RBBM_PERFCTR_SP_20_HI               0x4cf
+#define A6XX_RBBM_PERFCTR_SP_21_LO               0x4d0
+#define A6XX_RBBM_PERFCTR_SP_21_HI               0x4d1
+#define A6XX_RBBM_PERFCTR_SP_22_LO               0x4d2
+#define A6XX_RBBM_PERFCTR_SP_22_HI               0x4d3
+#define A6XX_RBBM_PERFCTR_SP_23_LO               0x4d4
+#define A6XX_RBBM_PERFCTR_SP_23_HI               0x4d5
+#define A6XX_RBBM_PERFCTR_RB_0_LO                0x4d6
+#define A6XX_RBBM_PERFCTR_RB_0_HI                0x4d7
+#define A6XX_RBBM_PERFCTR_RB_1_LO                0x4d8
+#define A6XX_RBBM_PERFCTR_RB_1_HI                0x4d9
+#define A6XX_RBBM_PERFCTR_RB_2_LO                0x4da
+#define A6XX_RBBM_PERFCTR_RB_2_HI                0x4db
+#define A6XX_RBBM_PERFCTR_RB_3_LO                0x4dc
+#define A6XX_RBBM_PERFCTR_RB_3_HI                0x4dd
+#define A6XX_RBBM_PERFCTR_RB_4_LO                0x4de
+#define A6XX_RBBM_PERFCTR_RB_4_HI                0x4df
+#define A6XX_RBBM_PERFCTR_RB_5_LO                0x4e0
+#define A6XX_RBBM_PERFCTR_RB_5_HI                0x4e1
+#define A6XX_RBBM_PERFCTR_RB_6_LO                0x4e2
+#define A6XX_RBBM_PERFCTR_RB_6_HI                0x4e3
+#define A6XX_RBBM_PERFCTR_RB_7_LO                0x4e4
+#define A6XX_RBBM_PERFCTR_RB_7_HI                0x4e5
+#define A6XX_RBBM_PERFCTR_VSC_0_LO               0x4e6
+#define A6XX_RBBM_PERFCTR_VSC_0_HI               0x4e7
+#define A6XX_RBBM_PERFCTR_VSC_1_LO               0x4e8
+#define A6XX_RBBM_PERFCTR_VSC_1_HI               0x4e9
+#define A6XX_RBBM_PERFCTR_LRZ_0_LO               0x4ea
+#define A6XX_RBBM_PERFCTR_LRZ_0_HI               0x4eb
+#define A6XX_RBBM_PERFCTR_LRZ_1_LO               0x4ec
+#define A6XX_RBBM_PERFCTR_LRZ_1_HI               0x4ed
+#define A6XX_RBBM_PERFCTR_LRZ_2_LO               0x4ee
+#define A6XX_RBBM_PERFCTR_LRZ_2_HI               0x4ef
+#define A6XX_RBBM_PERFCTR_LRZ_3_LO               0x4f0
+#define A6XX_RBBM_PERFCTR_LRZ_3_HI               0x4f1
+#define A6XX_RBBM_PERFCTR_CMP_0_LO               0x4f2
+#define A6XX_RBBM_PERFCTR_CMP_0_HI               0x4f3
+#define A6XX_RBBM_PERFCTR_CMP_1_LO               0x4f4
+#define A6XX_RBBM_PERFCTR_CMP_1_HI               0x4f5
+#define A6XX_RBBM_PERFCTR_CMP_2_LO               0x4f6
+#define A6XX_RBBM_PERFCTR_CMP_2_HI               0x4f7
+#define A6XX_RBBM_PERFCTR_CMP_3_LO               0x4f8
+#define A6XX_RBBM_PERFCTR_CMP_3_HI               0x4f9
+#define A6XX_RBBM_PERFCTR_CNTL                   0x500
+#define A6XX_RBBM_PERFCTR_LOAD_CMD0              0x501
+#define A6XX_RBBM_PERFCTR_LOAD_CMD1              0x502
+#define A6XX_RBBM_PERFCTR_LOAD_CMD2              0x503
+#define A6XX_RBBM_PERFCTR_LOAD_CMD3              0x504
+#define A6XX_RBBM_PERFCTR_LOAD_VALUE_LO          0x505
+#define A6XX_RBBM_PERFCTR_LOAD_VALUE_HI          0x506
+#define A6XX_RBBM_PERFCTR_RBBM_SEL_0             0x507
+#define A6XX_RBBM_PERFCTR_RBBM_SEL_1             0x508
+#define A6XX_RBBM_PERFCTR_RBBM_SEL_2             0x509
+#define A6XX_RBBM_PERFCTR_RBBM_SEL_3             0x50A
+#define A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED        0x50B
+#define A6XX_RBBM_PERFCTR_SRAM_INIT_CMD          0x50e
+#define A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS       0x50f
+
+#define A6XX_RBBM_ISDB_CNT                       0x533
+#define A6XX_RBBM_NC_MODE_CNTL                   0X534
+#define A6XX_RBBM_SNAPSHOT_STATUS                0x535
+#define A6XX_RBBM_LPAC_GBIF_CLIENT_QOS_CNTL      0x5ff
+
+#define A6XX_RBBM_SECVID_TRUST_CNTL              0xF400
+#define A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO     0xF800
+#define A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI     0xF801
+#define A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE        0xF802
+#define A6XX_RBBM_SECVID_TSB_CNTL                0xF803
+#define A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL      0xF810
+
+#define A6XX_RBBM_VBIF_CLIENT_QOS_CNTL   0x00010
+#define A6XX_RBBM_GBIF_CLIENT_QOS_CNTL   0x00011
+#define A6XX_RBBM_GBIF_HALT              0x00016
+#define A6XX_RBBM_GBIF_HALT_ACK          0x00017
+#define A6XX_RBBM_GPR0_CNTL              0x00018
+#define A6XX_RBBM_INTERFACE_HANG_INT_CNTL 0x0001f
+#define A6XX_RBBM_INT_CLEAR_CMD          0x00037
+#define A6XX_RBBM_INT_0_MASK             0x00038
+#define A6XX_RBBM_INT_2_MASK             0x0003A
+#define A6XX_RBBM_SP_HYST_CNT            0x00042
+#define A6XX_RBBM_SW_RESET_CMD           0x00043
+#define A6XX_RBBM_RAC_THRESHOLD_CNT      0x00044
+#define A6XX_RBBM_BLOCK_SW_RESET_CMD     0x00045
+#define A6XX_RBBM_BLOCK_SW_RESET_CMD2    0x00046
+#define A6XX_RBBM_BLOCK_GX_RETENTION_CNTL 0x00050
+#define A6XX_RBBM_CLOCK_CNTL             0x000ae
+#define A6XX_RBBM_CLOCK_CNTL_SP0         0x000b0
+#define A6XX_RBBM_CLOCK_CNTL_SP1         0x000b1
+#define A6XX_RBBM_CLOCK_CNTL_SP2         0x000b2
+#define A6XX_RBBM_CLOCK_CNTL_SP3         0x000b3
+#define A6XX_RBBM_CLOCK_CNTL2_SP0        0x000b4
+#define A6XX_RBBM_CLOCK_CNTL2_SP1        0x000b5
+#define A6XX_RBBM_CLOCK_CNTL2_SP2        0x000b6
+#define A6XX_RBBM_CLOCK_CNTL2_SP3        0x000b7
+#define A6XX_RBBM_CLOCK_DELAY_SP0        0x000b8
+#define A6XX_RBBM_CLOCK_DELAY_SP1        0x000b9
+#define A6XX_RBBM_CLOCK_DELAY_SP2        0x000ba
+#define A6XX_RBBM_CLOCK_DELAY_SP3        0x000bb
+#define A6XX_RBBM_CLOCK_HYST_SP0         0x000bc
+#define A6XX_RBBM_CLOCK_HYST_SP1         0x000bd
+#define A6XX_RBBM_CLOCK_HYST_SP2         0x000be
+#define A6XX_RBBM_CLOCK_HYST_SP3         0x000bf
+#define A6XX_RBBM_CLOCK_CNTL_TP0         0x000c0
+#define A6XX_RBBM_CLOCK_CNTL_TP1         0x000c1
+#define A6XX_RBBM_CLOCK_CNTL_TP2         0x000c2
+#define A6XX_RBBM_CLOCK_CNTL_TP3         0x000c3
+#define A6XX_RBBM_CLOCK_CNTL2_TP0        0x000c4
+#define A6XX_RBBM_CLOCK_CNTL2_TP1        0x000c5
+#define A6XX_RBBM_CLOCK_CNTL2_TP2        0x000c6
+#define A6XX_RBBM_CLOCK_CNTL2_TP3        0x000c7
+#define A6XX_RBBM_CLOCK_CNTL3_TP0        0x000c8
+#define A6XX_RBBM_CLOCK_CNTL3_TP1        0x000c9
+#define A6XX_RBBM_CLOCK_CNTL3_TP2        0x000ca
+#define A6XX_RBBM_CLOCK_CNTL3_TP3        0x000cb
+#define A6XX_RBBM_CLOCK_CNTL4_TP0        0x000cc
+#define A6XX_RBBM_CLOCK_CNTL4_TP1        0x000cd
+#define A6XX_RBBM_CLOCK_CNTL4_TP2        0x000ce
+#define A6XX_RBBM_CLOCK_CNTL4_TP3        0x000cf
+#define A6XX_RBBM_CLOCK_DELAY_TP0        0x000d0
+#define A6XX_RBBM_CLOCK_DELAY_TP1        0x000d1
+#define A6XX_RBBM_CLOCK_DELAY_TP2        0x000d2
+#define A6XX_RBBM_CLOCK_DELAY_TP3        0x000d3
+#define A6XX_RBBM_CLOCK_DELAY2_TP0       0x000d4
+#define A6XX_RBBM_CLOCK_DELAY2_TP1       0x000d5
+#define A6XX_RBBM_CLOCK_DELAY2_TP2       0x000d6
+#define A6XX_RBBM_CLOCK_DELAY2_TP3       0x000d7
+#define A6XX_RBBM_CLOCK_DELAY3_TP0       0x000d8
+#define A6XX_RBBM_CLOCK_DELAY3_TP1       0x000d9
+#define A6XX_RBBM_CLOCK_DELAY3_TP2       0x000da
+#define A6XX_RBBM_CLOCK_DELAY3_TP3       0x000db
+#define A6XX_RBBM_CLOCK_DELAY4_TP0       0x000dc
+#define A6XX_RBBM_CLOCK_DELAY4_TP1       0x000dd
+#define A6XX_RBBM_CLOCK_DELAY4_TP2       0x000de
+#define A6XX_RBBM_CLOCK_DELAY4_TP3       0x000df
+#define A6XX_RBBM_CLOCK_HYST_TP0         0x000e0
+#define A6XX_RBBM_CLOCK_HYST_TP1         0x000e1
+#define A6XX_RBBM_CLOCK_HYST_TP2         0x000e2
+#define A6XX_RBBM_CLOCK_HYST_TP3         0x000e3
+#define A6XX_RBBM_CLOCK_HYST2_TP0        0x000e4
+#define A6XX_RBBM_CLOCK_HYST2_TP1        0x000e5
+#define A6XX_RBBM_CLOCK_HYST2_TP2        0x000e6
+#define A6XX_RBBM_CLOCK_HYST2_TP3        0x000e7
+#define A6XX_RBBM_CLOCK_HYST3_TP0        0x000e8
+#define A6XX_RBBM_CLOCK_HYST3_TP1        0x000e9
+#define A6XX_RBBM_CLOCK_HYST3_TP2        0x000ea
+#define A6XX_RBBM_CLOCK_HYST3_TP3        0x000eb
+#define A6XX_RBBM_CLOCK_HYST4_TP0        0x000ec
+#define A6XX_RBBM_CLOCK_HYST4_TP1        0x000ed
+#define A6XX_RBBM_CLOCK_HYST4_TP2        0x000ee
+#define A6XX_RBBM_CLOCK_HYST4_TP3        0x000ef
+#define A6XX_RBBM_CLOCK_CNTL_RB0         0x000f0
+#define A6XX_RBBM_CLOCK_CNTL_RB1         0x000f1
+#define A6XX_RBBM_CLOCK_CNTL_RB2         0x000f2
+#define A6XX_RBBM_CLOCK_CNTL_RB3         0x000f3
+#define A6XX_RBBM_CLOCK_CNTL2_RB0        0x000f4
+#define A6XX_RBBM_CLOCK_CNTL2_RB1        0x000f5
+#define A6XX_RBBM_CLOCK_CNTL2_RB2        0x000f6
+#define A6XX_RBBM_CLOCK_CNTL2_RB3        0x000f7
+#define A6XX_RBBM_CLOCK_CNTL_CCU0        0x000f8
+#define A6XX_RBBM_CLOCK_CNTL_CCU1        0x000f9
+#define A6XX_RBBM_CLOCK_CNTL_CCU2        0x000fa
+#define A6XX_RBBM_CLOCK_CNTL_CCU3        0x000fb
+#define A6XX_RBBM_CLOCK_HYST_RB_CCU0     0x00100
+#define A6XX_RBBM_CLOCK_HYST_RB_CCU1     0x00101
+#define A6XX_RBBM_CLOCK_HYST_RB_CCU2     0x00102
+#define A6XX_RBBM_CLOCK_HYST_RB_CCU3     0x00103
+#define A6XX_RBBM_CLOCK_CNTL_RAC         0x00104
+#define A6XX_RBBM_CLOCK_CNTL2_RAC        0x00105
+#define A6XX_RBBM_CLOCK_DELAY_RAC        0x00106
+#define A6XX_RBBM_CLOCK_HYST_RAC         0x00107
+#define A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x00108
+#define A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00109
+#define A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x0010a
+#define A6XX_RBBM_CLOCK_CNTL_UCHE        0x0010b
+#define A6XX_RBBM_CLOCK_CNTL2_UCHE       0x0010c
+#define A6XX_RBBM_CLOCK_CNTL3_UCHE       0x0010d
+#define A6XX_RBBM_CLOCK_CNTL4_UCHE       0x0010e
+#define A6XX_RBBM_CLOCK_DELAY_UCHE       0x0010f
+#define A6XX_RBBM_CLOCK_HYST_UCHE        0x00110
+#define A6XX_RBBM_CLOCK_MODE_VFD         0x00111
+#define A6XX_RBBM_CLOCK_DELAY_VFD        0x00112
+#define A6XX_RBBM_CLOCK_HYST_VFD         0x00113
+#define A6XX_RBBM_CLOCK_MODE_GPC         0x00114
+#define A6XX_RBBM_CLOCK_DELAY_GPC        0x00115
+#define A6XX_RBBM_CLOCK_HYST_GPC         0x00116
+#define A6XX_RBBM_CLOCK_DELAY_HLSQ_2	 0x00117
+#define A6XX_RBBM_CLOCK_CNTL_GMU_GX      0x00118
+#define A6XX_RBBM_CLOCK_DELAY_GMU_GX     0x00119
+#define A6XX_RBBM_CLOCK_CNTL_TEX_FCHE    0x00120
+#define A6XX_RBBM_CLOCK_DELAY_TEX_FCHE   0x00121
+#define A6XX_RBBM_CLOCK_HYST_TEX_FCHE    0x00122
+#define A6XX_RBBM_CLOCK_HYST_GMU_GX      0x0011a
+#define A6XX_RBBM_CLOCK_MODE_HLSQ	 0x0011b
+#define A6XX_RBBM_CLOCK_DELAY_HLSQ       0x0011c
+#define A6XX_RBBM_CLOCK_HYST_HLSQ        0x0011d
+#define A6XX_RBBM_CLOCK_CNTL_FCHE        0x00123
+#define A6XX_RBBM_CLOCK_DELAY_FCHE       0x00124
+#define A6XX_RBBM_CLOCK_HYST_FCHE        0x00125
+#define A6XX_RBBM_CLOCK_CNTL_MHUB        0x00126
+#define A6XX_RBBM_CLOCK_DELAY_MHUB       0x00127
+#define A6XX_RBBM_CLOCK_HYST_MHUB        0x00128
+#define A6XX_RBBM_CLOCK_DELAY_GLC        0x00129
+#define A6XX_RBBM_CLOCK_HYST_GLC         0x0012a
+#define A6XX_RBBM_CLOCK_CNTL_GLC         0x0012b
+
+#define A6XX_GMUAO_GMU_CGC_MODE_CNTL     0x23b09
+#define A6XX_GMUAO_GMU_CGC_DELAY_CNTL    0x23b0a
+#define A6XX_GMUAO_GMU_CGC_HYST_CNTL     0x23b0b
+#define A6XX_GMUCX_GMU_WFI_CONFIG        0x1f802
+#define A6XX_GMUGX_GMU_SP_RF_CONTROL_0   0x1a883
+#define A6XX_GMUGX_GMU_SP_RF_CONTROL_1   0x1a884
+
+/* DBGC_CFG registers */
+#define A6XX_DBGC_CFG_DBGBUS_SEL_A                  0x600
+#define A6XX_DBGC_CFG_DBGBUS_SEL_B                  0x601
+#define A6XX_DBGC_CFG_DBGBUS_SEL_C                  0x602
+#define A6XX_DBGC_CFG_DBGBUS_SEL_D                  0x603
+#define A6XX_DBGC_CFG_DBGBUS_SEL_PING_INDEX_SHIFT   0x0
+#define A6XX_DBGC_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT 0x8
+#define A6XX_DBGC_CFG_DBGBUS_CNTLT                  0x604
+#define A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN_SHIFT    0x0
+#define A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU_SHIFT      0xC
+#define A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT_SHIFT       0x1C
+#define A6XX_DBGC_CFG_DBGBUS_CNTLM                  0x605
+#define A6XX_DBGC_CFG_DBGBUS_CTLTM_ENABLE_SHIFT     0x18
+#define A6XX_DBGC_CFG_DBGBUS_OPL                    0x606
+#define A6XX_DBGC_CFG_DBGBUS_OPE                    0x607
+#define A6XX_DBGC_CFG_DBGBUS_IVTL_0                 0x608
+#define A6XX_DBGC_CFG_DBGBUS_IVTL_1                 0x609
+#define A6XX_DBGC_CFG_DBGBUS_IVTL_2                 0x60a
+#define A6XX_DBGC_CFG_DBGBUS_IVTL_3                 0x60b
+#define A6XX_DBGC_CFG_DBGBUS_MASKL_0                0x60c
+#define A6XX_DBGC_CFG_DBGBUS_MASKL_1                0x60d
+#define A6XX_DBGC_CFG_DBGBUS_MASKL_2                0x60e
+#define A6XX_DBGC_CFG_DBGBUS_MASKL_3                0x60f
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0                0x610
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1                0x611
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL0_SHIFT           0x0
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL1_SHIFT           0x4
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL2_SHIFT           0x8
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL3_SHIFT           0xC
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL4_SHIFT           0x10
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL5_SHIFT           0x14
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL6_SHIFT           0x18
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL7_SHIFT           0x1C
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL8_SHIFT           0x0
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL9_SHIFT           0x4
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL10_SHIFT          0x8
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL11_SHIFT          0xC
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL12_SHIFT          0x10
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL13_SHIFT          0x14
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL14_SHIFT          0x18
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL15_SHIFT          0x1C
+#define A6XX_DBGC_CFG_DBGBUS_IVTE_0                 0x612
+#define A6XX_DBGC_CFG_DBGBUS_IVTE_1                 0x613
+#define A6XX_DBGC_CFG_DBGBUS_IVTE_2                 0x614
+#define A6XX_DBGC_CFG_DBGBUS_IVTE_3                 0x615
+#define A6XX_DBGC_CFG_DBGBUS_MASKE_0                0x616
+#define A6XX_DBGC_CFG_DBGBUS_MASKE_1                0x617
+#define A6XX_DBGC_CFG_DBGBUS_MASKE_2                0x618
+#define A6XX_DBGC_CFG_DBGBUS_MASKE_3                0x619
+#define A6XX_DBGC_CFG_DBGBUS_NIBBLEE                0x61a
+#define A6XX_DBGC_CFG_DBGBUS_PTRC0                  0x61b
+#define A6XX_DBGC_CFG_DBGBUS_PTRC1                  0x61c
+#define A6XX_DBGC_CFG_DBGBUS_LOADREG                0x61d
+#define A6XX_DBGC_CFG_DBGBUS_IDX                    0x61e
+#define A6XX_DBGC_CFG_DBGBUS_CLRC                   0x61f
+#define A6XX_DBGC_CFG_DBGBUS_LOADIVT                0x620
+#define A6XX_DBGC_VBIF_DBG_CNTL                     0x621
+#define A6XX_DBGC_DBG_LO_HI_GPIO                    0x622
+#define A6XX_DBGC_EXT_TRACE_BUS_CNTL                0x623
+#define A6XX_DBGC_READ_AHB_THROUGH_DBG              0x624
+#define A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1             0x62f
+#define A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2             0x630
+#define A6XX_DBGC_EVT_CFG                           0x640
+#define A6XX_DBGC_EVT_INTF_SEL_0                    0x641
+#define A6XX_DBGC_EVT_INTF_SEL_1                    0x642
+#define A6XX_DBGC_PERF_ATB_CFG                      0x643
+#define A6XX_DBGC_PERF_ATB_COUNTER_SEL_0            0x644
+#define A6XX_DBGC_PERF_ATB_COUNTER_SEL_1            0x645
+#define A6XX_DBGC_PERF_ATB_COUNTER_SEL_2            0x646
+#define A6XX_DBGC_PERF_ATB_COUNTER_SEL_3            0x647
+#define A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_0          0x648
+#define A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_1          0x649
+#define A6XX_DBGC_PERF_ATB_DRAIN_CMD                0x64a
+#define A6XX_DBGC_ECO_CNTL                          0x650
+#define A6XX_DBGC_AHB_DBG_CNTL                      0x651
+
+/* VSC registers */
+#define A6XX_VSC_PERFCTR_VSC_SEL_0          0xCD8
+#define A6XX_VSC_PERFCTR_VSC_SEL_1          0xCD9
+
+/* GRAS registers */
+#define A6XX_GRAS_ADDR_MODE_CNTL            0x8601
+#define A6XX_GRAS_PERFCTR_TSE_SEL_0         0x8610
+#define A6XX_GRAS_PERFCTR_TSE_SEL_1         0x8611
+#define A6XX_GRAS_PERFCTR_TSE_SEL_2         0x8612
+#define A6XX_GRAS_PERFCTR_TSE_SEL_3         0x8613
+#define A6XX_GRAS_PERFCTR_RAS_SEL_0         0x8614
+#define A6XX_GRAS_PERFCTR_RAS_SEL_1         0x8615
+#define A6XX_GRAS_PERFCTR_RAS_SEL_2         0x8616
+#define A6XX_GRAS_PERFCTR_RAS_SEL_3         0x8617
+#define A6XX_GRAS_PERFCTR_LRZ_SEL_0         0x8618
+#define A6XX_GRAS_PERFCTR_LRZ_SEL_1         0x8619
+#define A6XX_GRAS_PERFCTR_LRZ_SEL_2         0x861A
+#define A6XX_GRAS_PERFCTR_LRZ_SEL_3         0x861B
+
+/* RB registers */
+#define A6XX_RB_ADDR_MODE_CNTL              0x8E05
+#define A6XX_RB_NC_MODE_CNTL                0x8E08
+#define A6XX_RB_PERFCTR_RB_SEL_0            0x8E10
+#define A6XX_RB_PERFCTR_RB_SEL_1            0x8E11
+#define A6XX_RB_PERFCTR_RB_SEL_2            0x8E12
+#define A6XX_RB_PERFCTR_RB_SEL_3            0x8E13
+#define A6XX_RB_PERFCTR_RB_SEL_4            0x8E14
+#define A6XX_RB_PERFCTR_RB_SEL_5            0x8E15
+#define A6XX_RB_PERFCTR_RB_SEL_6            0x8E16
+#define A6XX_RB_PERFCTR_RB_SEL_7            0x8E17
+#define A6XX_RB_PERFCTR_CCU_SEL_0           0x8E18
+#define A6XX_RB_PERFCTR_CCU_SEL_1           0x8E19
+#define A6XX_RB_PERFCTR_CCU_SEL_2           0x8E1A
+#define A6XX_RB_PERFCTR_CCU_SEL_3           0x8E1B
+#define A6XX_RB_PERFCTR_CCU_SEL_4           0x8E1C
+#define A6XX_RB_PERFCTR_CMP_SEL_0           0x8E2C
+#define A6XX_RB_PERFCTR_CMP_SEL_1           0x8E2D
+#define A6XX_RB_PERFCTR_CMP_SEL_2           0x8E2E
+#define A6XX_RB_PERFCTR_CMP_SEL_3           0x8E2F
+#define A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_HOST  0x8E3B
+#define A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD    0x8E3D
+#define A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE 0x8E50
+
+/* PC registers */
+#define A6XX_PC_DBG_ECO_CNTL                0x9E00
+#define A6XX_PC_ADDR_MODE_CNTL              0x9E01
+#define A6XX_PC_PERFCTR_PC_SEL_0            0x9E34
+#define A6XX_PC_PERFCTR_PC_SEL_1            0x9E35
+#define A6XX_PC_PERFCTR_PC_SEL_2            0x9E36
+#define A6XX_PC_PERFCTR_PC_SEL_3            0x9E37
+#define A6XX_PC_PERFCTR_PC_SEL_4            0x9E38
+#define A6XX_PC_PERFCTR_PC_SEL_5            0x9E39
+#define A6XX_PC_PERFCTR_PC_SEL_6            0x9E3A
+#define A6XX_PC_PERFCTR_PC_SEL_7            0x9E3B
+
+/* HLSQ registers */
+#define A6XX_HLSQ_ADDR_MODE_CNTL            0xBE05
+#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_0        0xBE10
+#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_1        0xBE11
+#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_2        0xBE12
+#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_3        0xBE13
+#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_4        0xBE14
+#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_5        0xBE15
+#define A6XX_HLSQ_DBG_AHB_READ_APERTURE     0xC800
+#define A6XX_HLSQ_DBG_READ_SEL              0xD000
+
+/* VFD registers */
+#define A6XX_VFD_ADDR_MODE_CNTL             0xA601
+#define A6XX_VFD_PERFCTR_VFD_SEL_0          0xA610
+#define A6XX_VFD_PERFCTR_VFD_SEL_1          0xA611
+#define A6XX_VFD_PERFCTR_VFD_SEL_2          0xA612
+#define A6XX_VFD_PERFCTR_VFD_SEL_3          0xA613
+#define A6XX_VFD_PERFCTR_VFD_SEL_4          0xA614
+#define A6XX_VFD_PERFCTR_VFD_SEL_5          0xA615
+#define A6XX_VFD_PERFCTR_VFD_SEL_6          0xA616
+#define A6XX_VFD_PERFCTR_VFD_SEL_7          0xA617
+
+/* VPC registers */
+#define A6XX_VPC_ADDR_MODE_CNTL             0x9601
+#define A6XX_VPC_PERFCTR_VPC_SEL_0          0x9604
+#define A6XX_VPC_PERFCTR_VPC_SEL_1          0x9605
+#define A6XX_VPC_PERFCTR_VPC_SEL_2          0x9606
+#define A6XX_VPC_PERFCTR_VPC_SEL_3          0x9607
+#define A6XX_VPC_PERFCTR_VPC_SEL_4          0x9608
+#define A6XX_VPC_PERFCTR_VPC_SEL_5          0x9609
+
+/* UCHE registers */
+#define A6XX_UCHE_ADDR_MODE_CNTL            0xE00
+#define A6XX_UCHE_MODE_CNTL                 0xE01
+#define A6XX_UCHE_WRITE_RANGE_MAX_LO        0xE05
+#define A6XX_UCHE_WRITE_RANGE_MAX_HI        0xE06
+#define A6XX_UCHE_WRITE_THRU_BASE_LO        0xE07
+#define A6XX_UCHE_WRITE_THRU_BASE_HI        0xE08
+#define A6XX_UCHE_TRAP_BASE_LO              0xE09
+#define A6XX_UCHE_TRAP_BASE_HI              0xE0A
+#define A6XX_UCHE_GMEM_RANGE_MIN_LO         0xE0B
+#define A6XX_UCHE_GMEM_RANGE_MIN_HI         0xE0C
+#define A6XX_UCHE_GMEM_RANGE_MAX_LO         0xE0D
+#define A6XX_UCHE_GMEM_RANGE_MAX_HI         0xE0E
+#define A6XX_UCHE_CACHE_WAYS                0xE17
+#define A6XX_UCHE_FILTER_CNTL               0xE18
+#define A6XX_UCHE_CLIENT_PF                 0xE19
+#define A6XX_UCHE_CLIENT_PF_CLIENT_ID_MASK  0x7
+#define A6XX_UCHE_PERFCTR_UCHE_SEL_0        0xE1C
+#define A6XX_UCHE_PERFCTR_UCHE_SEL_1        0xE1D
+#define A6XX_UCHE_PERFCTR_UCHE_SEL_2        0xE1E
+#define A6XX_UCHE_PERFCTR_UCHE_SEL_3        0xE1F
+#define A6XX_UCHE_PERFCTR_UCHE_SEL_4        0xE20
+#define A6XX_UCHE_PERFCTR_UCHE_SEL_5        0xE21
+#define A6XX_UCHE_PERFCTR_UCHE_SEL_6        0xE22
+#define A6XX_UCHE_PERFCTR_UCHE_SEL_7        0xE23
+#define A6XX_UCHE_PERFCTR_UCHE_SEL_8        0xE24
+#define A6XX_UCHE_PERFCTR_UCHE_SEL_9        0xE25
+#define A6XX_UCHE_PERFCTR_UCHE_SEL_10       0xE26
+#define A6XX_UCHE_PERFCTR_UCHE_SEL_11       0xE27
+#define A6XX_UCHE_GBIF_GX_CONFIG            0xE3A
+#define A6XX_UCHE_CMDQ_CONFIG               0xE3C
+
+/* SP registers */
+#define A6XX_SP_ADDR_MODE_CNTL              0xAE01
+#define A6XX_SP_NC_MODE_CNTL                0xAE02
+#define A6XX_SP_PERFCTR_SP_SEL_0            0xAE10
+#define A6XX_SP_PERFCTR_SP_SEL_1            0xAE11
+#define A6XX_SP_PERFCTR_SP_SEL_2            0xAE12
+#define A6XX_SP_PERFCTR_SP_SEL_3            0xAE13
+#define A6XX_SP_PERFCTR_SP_SEL_4            0xAE14
+#define A6XX_SP_PERFCTR_SP_SEL_5            0xAE15
+#define A6XX_SP_PERFCTR_SP_SEL_6            0xAE16
+#define A6XX_SP_PERFCTR_SP_SEL_7            0xAE17
+#define A6XX_SP_PERFCTR_SP_SEL_8            0xAE18
+#define A6XX_SP_PERFCTR_SP_SEL_9            0xAE19
+#define A6XX_SP_PERFCTR_SP_SEL_10           0xAE1A
+#define A6XX_SP_PERFCTR_SP_SEL_11           0xAE1B
+#define A6XX_SP_PERFCTR_SP_SEL_12           0xAE1C
+#define A6XX_SP_PERFCTR_SP_SEL_13           0xAE1D
+#define A6XX_SP_PERFCTR_SP_SEL_14           0xAE1E
+#define A6XX_SP_PERFCTR_SP_SEL_15           0xAE1F
+#define A6XX_SP_PERFCTR_SP_SEL_16           0xAE20
+#define A6XX_SP_PERFCTR_SP_SEL_17           0xAE21
+#define A6XX_SP_PERFCTR_SP_SEL_18           0xAE22
+#define A6XX_SP_PERFCTR_SP_SEL_19           0xAE23
+#define A6XX_SP_PERFCTR_SP_SEL_20           0xAE24
+#define A6XX_SP_PERFCTR_SP_SEL_21           0xAE25
+#define A6XX_SP_PERFCTR_SP_SEL_22           0xAE26
+#define A6XX_SP_PERFCTR_SP_SEL_23           0xAE27
+
+/* TP registers */
+#define A6XX_TPL1_ADDR_MODE_CNTL            0xB601
+#define A6XX_TPL1_NC_MODE_CNTL              0xB604
+#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0   0xB608
+#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1   0xB609
+#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2   0xB60A
+#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3   0xB60B
+#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4   0xB60C
+#define A6XX_TPL1_PERFCTR_TP_SEL_0          0xB610
+#define A6XX_TPL1_PERFCTR_TP_SEL_1          0xB611
+#define A6XX_TPL1_PERFCTR_TP_SEL_2          0xB612
+#define A6XX_TPL1_PERFCTR_TP_SEL_3          0xB613
+#define A6XX_TPL1_PERFCTR_TP_SEL_4          0xB614
+#define A6XX_TPL1_PERFCTR_TP_SEL_5          0xB615
+#define A6XX_TPL1_PERFCTR_TP_SEL_6          0xB616
+#define A6XX_TPL1_PERFCTR_TP_SEL_7          0xB617
+#define A6XX_TPL1_PERFCTR_TP_SEL_8          0xB618
+#define A6XX_TPL1_PERFCTR_TP_SEL_9          0xB619
+#define A6XX_TPL1_PERFCTR_TP_SEL_10         0xB61A
+#define A6XX_TPL1_PERFCTR_TP_SEL_11         0xB61B
+
+/* VBIF registers */
+#define A6XX_VBIF_VERSION                       0x3000
+#define A6XX_VBIF_CLKON                         0x3001
+#define A6XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK   0x1
+#define A6XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT  0x1
+#define A6XX_VBIF_GATE_OFF_WRREQ_EN             0x302A
+#define A6XX_VBIF_XIN_HALT_CTRL0                0x3080
+#define A6XX_VBIF_XIN_HALT_CTRL0_MASK           0xF
+#define A6XX_VBIF_XIN_HALT_CTRL1                0x3081
+#define A6XX_VBIF_TEST_BUS_OUT_CTRL             0x3084
+#define A6XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK     0x1
+#define A6XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT    0x0
+#define A6XX_VBIF_TEST_BUS1_CTRL0               0x3085
+#define A6XX_VBIF_TEST_BUS1_CTRL1               0x3086
+#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK 0xF
+#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT 0x0
+#define A6XX_VBIF_TEST_BUS2_CTRL0               0x3087
+#define A6XX_VBIF_TEST_BUS2_CTRL1               0x3088
+#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK 0x1FF
+#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT 0x0
+#define A6XX_VBIF_TEST_BUS_OUT                  0x308C
+#define A6XX_VBIF_PERF_CNT_SEL0                 0x30d0
+#define A6XX_VBIF_PERF_CNT_SEL1                 0x30d1
+#define A6XX_VBIF_PERF_CNT_SEL2                 0x30d2
+#define A6XX_VBIF_PERF_CNT_SEL3                 0x30d3
+#define A6XX_VBIF_PERF_CNT_LOW0                 0x30d8
+#define A6XX_VBIF_PERF_CNT_LOW1                 0x30d9
+#define A6XX_VBIF_PERF_CNT_LOW2                 0x30da
+#define A6XX_VBIF_PERF_CNT_LOW3                 0x30db
+#define A6XX_VBIF_PERF_CNT_HIGH0                0x30e0
+#define A6XX_VBIF_PERF_CNT_HIGH1                0x30e1
+#define A6XX_VBIF_PERF_CNT_HIGH2                0x30e2
+#define A6XX_VBIF_PERF_CNT_HIGH3                0x30e3
+#define A6XX_VBIF_PERF_PWR_CNT_EN0              0x3100
+#define A6XX_VBIF_PERF_PWR_CNT_EN1              0x3101
+#define A6XX_VBIF_PERF_PWR_CNT_EN2              0x3102
+#define A6XX_VBIF_PERF_PWR_CNT_LOW0             0x3110
+#define A6XX_VBIF_PERF_PWR_CNT_LOW1             0x3111
+#define A6XX_VBIF_PERF_PWR_CNT_LOW2             0x3112
+#define A6XX_VBIF_PERF_PWR_CNT_HIGH0            0x3118
+#define A6XX_VBIF_PERF_PWR_CNT_HIGH1            0x3119
+#define A6XX_VBIF_PERF_PWR_CNT_HIGH2            0x311a
+
+/* GBIF countables */
+#define GBIF_AXI0_READ_DATA_TOTAL_BEATS    34
+#define GBIF_AXI1_READ_DATA_TOTAL_BEATS    35
+#define GBIF_AXI0_WRITE_DATA_TOTAL_BEATS   46
+#define GBIF_AXI1_WRITE_DATA_TOTAL_BEATS   47
+
+/* GBIF registers */
+#define A6XX_GBIF_CX_CONFIG               0x3c00
+#define A6XX_GBIF_SCACHE_CNTL0            0x3c01
+#define A6XX_GBIF_SCACHE_CNTL1            0x3c02
+#define A6XX_GBIF_QSB_SIDE0               0x3c03
+#define A6XX_GBIF_QSB_SIDE1               0x3c04
+#define A6XX_GBIF_QSB_SIDE2               0x3c05
+#define A6XX_GBIF_QSB_SIDE3               0x3c06
+#define A6XX_GBIF_HALT                    0x3c45
+#define A6XX_GBIF_HALT_ACK                0x3c46
+
+#define A6XX_GBIF_CLIENT_HALT_MASK        BIT(0)
+#define A6XX_GBIF_ARB_HALT_MASK           BIT(1)
+#define A6XX_GBIF_GX_HALT_MASK            BIT(0)
+
+#define A6XX_GBIF_PERF_PWR_CNT_EN         0x3cc0
+#define A6XX_GBIF_PERF_CNT_SEL            0x3cc2
+#define A6XX_GBIF_PERF_PWR_CNT_SEL        0x3cc3
+#define A6XX_GBIF_PERF_CNT_LOW0           0x3cc4
+#define A6XX_GBIF_PERF_CNT_LOW1           0x3cc5
+#define A6XX_GBIF_PERF_CNT_LOW2           0x3cc6
+#define A6XX_GBIF_PERF_CNT_LOW3           0x3cc7
+#define A6XX_GBIF_PERF_CNT_HIGH0          0x3cc8
+#define A6XX_GBIF_PERF_CNT_HIGH1          0x3cc9
+#define A6XX_GBIF_PERF_CNT_HIGH2          0x3cca
+#define A6XX_GBIF_PERF_CNT_HIGH3          0x3ccb
+#define A6XX_GBIF_PWR_CNT_LOW0            0x3ccc
+#define A6XX_GBIF_PWR_CNT_LOW1            0x3ccd
+#define A6XX_GBIF_PWR_CNT_LOW2            0x3cce
+#define A6XX_GBIF_PWR_CNT_HIGH0           0x3ccf
+#define A6XX_GBIF_PWR_CNT_HIGH1           0x3cd0
+#define A6XX_GBIF_PWR_CNT_HIGH2           0x3cd1
+
+
+/* CX_DBGC_CFG registers */
+#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A                   0x18400
+#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_B                   0x18401
+#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_C                   0x18402
+#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_D                   0x18403
+#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_PING_INDEX_SHIFT    0x0
+#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT  0x8
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT                   0x18404
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN_SHIFT     0x0
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU_SHIFT       0xC
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT_SHIFT        0x1C
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM                   0x18405
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE_SHIFT      0x18
+#define A6XX_CX_DBGC_CFG_DBGBUS_OPL                     0x18406
+#define A6XX_CX_DBGC_CFG_DBGBUS_OPE                     0x18407
+#define A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0                  0x18408
+#define A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1                  0x18409
+#define A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2                  0x1840A
+#define A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3                  0x1840B
+#define A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0                 0x1840C
+#define A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1                 0x1840D
+#define A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2                 0x1840E
+#define A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3                 0x1840F
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0                 0x18410
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1                 0x18411
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL0_SHIFT            0x0
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL1_SHIFT            0x4
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL2_SHIFT            0x8
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL3_SHIFT            0xC
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL4_SHIFT            0x10
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL5_SHIFT            0x14
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL6_SHIFT            0x18
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL7_SHIFT            0x1C
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL8_SHIFT            0x0
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL9_SHIFT            0x4
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL10_SHIFT           0x8
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL11_SHIFT           0xC
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL12_SHIFT           0x10
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL13_SHIFT           0x14
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL14_SHIFT           0x18
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL15_SHIFT           0x1C
+#define A6XX_CX_DBGC_CFG_DBGBUS_IVTE_0                  0x18412
+#define A6XX_CX_DBGC_CFG_DBGBUS_IVTE_1                  0x18413
+#define A6XX_CX_DBGC_CFG_DBGBUS_IVTE_2                  0x18414
+#define A6XX_CX_DBGC_CFG_DBGBUS_IVTE_3                  0x18415
+#define A6XX_CX_DBGC_CFG_DBGBUS_MASKE_0                 0x18416
+#define A6XX_CX_DBGC_CFG_DBGBUS_MASKE_1                 0x18417
+#define A6XX_CX_DBGC_CFG_DBGBUS_MASKE_2                 0x18418
+#define A6XX_CX_DBGC_CFG_DBGBUS_MASKE_3                 0x18419
+#define A6XX_CX_DBGC_CFG_DBGBUS_NIBBLEE                 0x1841A
+#define A6XX_CX_DBGC_CFG_DBGBUS_PTRC0                   0x1841B
+#define A6XX_CX_DBGC_CFG_DBGBUS_PTRC1                   0x1841C
+#define A6XX_CX_DBGC_CFG_DBGBUS_LOADREG                 0x1841D
+#define A6XX_CX_DBGC_CFG_DBGBUS_IDX                     0x1841E
+#define A6XX_CX_DBGC_CFG_DBGBUS_CLRC                    0x1841F
+#define A6XX_CX_DBGC_CFG_DBGBUS_LOADIVT                 0x18420
+#define A6XX_CX_DBGC_VBIF_DBG_CNTL                      0x18421
+#define A6XX_CX_DBGC_DBG_LO_HI_GPIO                     0x18422
+#define A6XX_CX_DBGC_EXT_TRACE_BUS_CNTL                 0x18423
+#define A6XX_CX_DBGC_READ_AHB_THROUGH_DBG               0x18424
+#define A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1              0x1842F
+#define A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2              0x18430
+#define A6XX_CX_DBGC_EVT_CFG                            0x18440
+#define A6XX_CX_DBGC_EVT_INTF_SEL_0                     0x18441
+#define A6XX_CX_DBGC_EVT_INTF_SEL_1                     0x18442
+#define A6XX_CX_DBGC_PERF_ATB_CFG                       0x18443
+#define A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_0             0x18444
+#define A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_1             0x18445
+#define A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_2             0x18446
+#define A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_3             0x18447
+#define A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_0           0x18448
+#define A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_1           0x18449
+#define A6XX_CX_DBGC_PERF_ATB_DRAIN_CMD                 0x1844A
+#define A6XX_CX_DBGC_ECO_CNTL                           0x18450
+#define A6XX_CX_DBGC_AHB_DBG_CNTL                       0x18451
+
+/* GMU control registers */
+#define A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL   0x1A880
+#define A6XX_GMU_GX_SPTPRAC_POWER_CONTROL	0x1A881
+#define A6XX_GMU_CM3_ITCM_START			0x1B400
+#define A6XX_GMU_CM3_DTCM_START			0x1C400
+#define A6XX_GMU_NMI_CONTROL_STATUS		0x1CBF0
+#define A6XX_GMU_BOOT_SLUMBER_OPTION		0x1CBF8
+#define A6XX_GMU_GX_VOTE_IDX			0x1CBF9
+#define A6XX_GMU_MX_VOTE_IDX			0x1CBFA
+#define A6XX_GMU_DCVS_ACK_OPTION		0x1CBFC
+#define A6XX_GMU_DCVS_PERF_SETTING		0x1CBFD
+#define A6XX_GMU_DCVS_BW_SETTING		0x1CBFE
+#define A6XX_GMU_DCVS_RETURN			0x1CBFF
+#define A6XX_GMU_ICACHE_CONFIG			0x1F400
+#define A6XX_GMU_DCACHE_CONFIG			0x1F401
+#define A6XX_GMU_SYS_BUS_CONFIG			0x1F40F
+#define A6XX_GMU_CM3_SYSRESET			0x1F800
+#define A6XX_GMU_CM3_BOOT_CONFIG		0x1F801
+#define A6XX_GMU_CX_GMU_WFI_CONFIG		0x1F802
+#define A6XX_GMU_CX_GMU_WDOG_CTRL		0x1F813
+#define A6XX_GMU_CM3_FW_BUSY			0x1F81A
+#define A6XX_GMU_CM3_FW_INIT_RESULT		0x1F81C
+#define A6XX_GMU_CM3_CFG			0x1F82D
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE	0x1F840
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0	0x1F841
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1	0x1F842
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L	0x1F844
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H	0x1F845
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L	0x1F846
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_H	0x1F847
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L	0x1F848
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_H	0x1F849
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L	0x1F84A
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_H	0x1F84B
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L	0x1F84C
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_H	0x1F84D
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_L	0x1F84E
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_H	0x1F84F
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_L 0x1F850
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_H 0x1F851
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_L 0x1F852
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_H 0x1F853
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_2	0x1F860
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_L	0x1F870
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_H	0x1F871
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_L	0x1F872
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_H	0x1F843
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_L	0x1F874
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_H	0x1F875
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_L	0x1F876
+#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_H	0x1F877
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_ENABLE	0x1F8A0
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_0	0x1F8A1
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_1	0x1F8A2
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_0_L	0x1F8A4
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_0_H	0x1F8A5
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_1_L	0x1F8A6
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_1_H	0x1F8A7
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_2_L	0x1F8A8
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_2_H	0x1F8A9
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_3_L	0x1F8AA
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_3_H	0x1F8AB
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_4_L	0x1F8AC
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_4_H	0x1F8AD
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_5_L	0x1F8AE
+#define A6XX_GMU_CX_GMU_PERF_COUNTER_5_H	0x1F8AF
+#define A6XX_GMU_PWR_COL_INTER_FRAME_CTRL	0x1F8C0
+#define A6XX_GMU_PWR_COL_INTER_FRAME_HYST	0x1F8C1
+#define A6XX_GMU_PWR_COL_SPTPRAC_HYST		0x1F8C2
+#define A6XX_GMU_SPTPRAC_PWR_CLK_STATUS		0x1F8D0
+#define A6XX_GMU_GPU_NAP_CTRL			0x1F8E4
+#define A6XX_GMU_RPMH_CTRL			0x1F8E8
+#define A6XX_GMU_RPMH_HYST_CTRL			0x1F8E9
+#define A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE    0x1F8EC
+#define A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_MSG      0x1F900
+#define A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_RESP     0x1F901
+#define A6XX_GMU_BOOT_KMD_LM_HANDSHAKE		0x1F9F0
+#define A6XX_GMU_LLM_GLM_SLEEP_CTRL		0x1F957
+#define A6XX_GMU_LLM_GLM_SLEEP_STATUS		0x1F958
+
+/* HFI registers*/
+#define A6XX_GMU_ALWAYS_ON_COUNTER_L		0x1F888
+#define A6XX_GMU_ALWAYS_ON_COUNTER_H		0x1F889
+#define A6XX_GMU_GMU_PWR_COL_KEEPALIVE		0x1F8C3
+#define A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE      0x1F8C4
+#define A6XX_GMU_HFI_CTRL_STATUS		0x1F980
+#define A6XX_GMU_HFI_VERSION_INFO		0x1F981
+#define A6XX_GMU_HFI_SFR_ADDR			0x1F982
+#define A6XX_GMU_HFI_MMAP_ADDR			0x1F983
+#define A6XX_GMU_HFI_QTBL_INFO			0x1F984
+#define A6XX_GMU_HFI_QTBL_ADDR			0x1F985
+#define A6XX_GMU_HFI_CTRL_INIT			0x1F986
+#define A6XX_GMU_GMU2HOST_INTR_SET		0x1F990
+#define A6XX_GMU_GMU2HOST_INTR_CLR		0x1F991
+#define A6XX_GMU_GMU2HOST_INTR_INFO		0x1F992
+#define A6XX_GMU_GMU2HOST_INTR_MASK		0x1F993
+#define A6XX_GMU_HOST2GMU_INTR_SET		0x1F994
+#define A6XX_GMU_HOST2GMU_INTR_CLR		0x1F995
+#define A6XX_GMU_HOST2GMU_INTR_RAW_INFO		0x1F996
+#define A6XX_GMU_HOST2GMU_INTR_EN_0		0x1F997
+#define A6XX_GMU_HOST2GMU_INTR_EN_1		0x1F998
+#define A6XX_GMU_HOST2GMU_INTR_EN_2		0x1F999
+#define A6XX_GMU_HOST2GMU_INTR_EN_3		0x1F99A
+#define A6XX_GMU_HOST2GMU_INTR_INFO_0		0x1F99B
+#define A6XX_GMU_HOST2GMU_INTR_INFO_1		0x1F99C
+#define A6XX_GMU_HOST2GMU_INTR_INFO_2		0x1F99D
+#define A6XX_GMU_HOST2GMU_INTR_INFO_3		0x1F99E
+#define A6XX_GMU_GENERAL_0			0x1F9C5
+#define A6XX_GMU_GENERAL_1			0x1F9C6
+#define A6XX_GMU_GENERAL_6			0x1F9CB
+#define A6XX_GMU_GENERAL_7			0x1F9CC
+#define A6XX_GMU_GENERAL_11			0x1F9D0
+
+/* ISENSE registers */
+#define A6XX_GMU_ISENSE_CTRL			0x1F95D
+#define A6XX_GPU_GMU_CX_GMU_ISENSE_CTRL		0x1f95d
+#define A6XX_GPU_CS_ENABLE_REG			0x23120
+
+/* LM registers */
+#define A6XX_GPU_GMU_CX_GMU_PWR_THRESHOLD       0x1F94D
+
+/* FAL10 veto register */
+#define A6XX_GPU_GMU_CX_GMU_CX_FAL_INTF         0x1F8F0
+#define A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF     0x1F8F1
+
+#define A6XX_GMU_AO_INTERRUPT_EN		0x23B03
+#define A6XX_GMU_AO_HOST_INTERRUPT_CLR		0x23B04
+#define A6XX_GMU_AO_HOST_INTERRUPT_STATUS	0x23B05
+#define A6XX_GMU_AO_HOST_INTERRUPT_MASK		0x23B06
+#define A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL       0x23B09
+#define A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL      0x23B0A
+#define A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL       0x23B0B
+#define A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS	0x23B0C
+#define A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2	0x23B0D
+#define A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK	0x23B0E
+#define A6XX_GMU_AO_AHB_FENCE_CTRL		0x23B10
+#define A6XX_GMU_AHB_FENCE_STATUS		0x23B13
+#define A6XX_GMU_AHB_FENCE_STATUS_CLR           0x23B14
+#define A6XX_GMU_RBBM_INT_UNMASKED_STATUS	0x23B15
+#define A6XX_GMU_AO_SPARE_CNTL			0x23B16
+
+/* RGMU GLM registers */
+#define A6XX_GMU_AO_RGMU_GLM_SLEEP_CTRL		0x23B80
+#define A6XX_GMU_AO_RGMU_GLM_SLEEP_STATUS	0x23B81
+#define A6XX_GMU_AO_RGMU_GLM_HW_CRC_DISABLE	0x23B82
+
+/* GMU RSC control registers */
+#define A6XX_GMU_RSCC_CONTROL_REQ		0x23B07
+#define A6XX_GMU_RSCC_CONTROL_ACK		0x23B08
+
+/* FENCE control registers */
+#define A6XX_GMU_AHB_FENCE_RANGE_0		0x23B11
+#define A6XX_GMU_AHB_FENCE_RANGE_1		0x23B12
+
+/* GMU countables */
+#define A6XX_GMU_CM3_BUSY_CYCLES		0
+
+/* GPUCC registers */
+#define A6XX_GPU_CC_GX_GDSCR			0x24403
+#define A6XX_GPU_CC_GX_DOMAIN_MISC		0x24542
+#define A6XX_GPU_CC_GX_DOMAIN_MISC3		0x24563
+#define A6XX_GPU_CC_CX_CFG_GDSCR		0x2441C
+
+/* GPUCC offsets are different for A662 */
+#define A662_GPU_CC_GX_GDSCR			0x26417
+#define A662_GPU_CC_GX_DOMAIN_MISC3		0x26541
+#define A662_GPU_CC_CX_CFG_GDSCR		0x26443
+
+/* GPU CPR registers */
+#define A6XX_GPU_CPR_FSM_CTL			0x26801
+
+/* GPU RSC sequencer registers */
+#define A6XX_GPU_RSCC_RSC_STATUS0_DRV0			0x00004
+#define A6XX_RSCC_PDC_SEQ_START_ADDR			0x00008
+#define A6XX_RSCC_PDC_MATCH_VALUE_LO			0x00009
+#define A6XX_RSCC_PDC_MATCH_VALUE_HI			0x0000A
+#define A6XX_RSCC_PDC_SLAVE_ID_DRV0			0x0000B
+#define A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR			0x0000D
+#define A6XX_RSCC_HIDDEN_TCS_CMD0_DATA			0x0000E
+#define A6XX_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_L_DRV0	0x00082
+#define A6XX_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_H_DRV0	0x00083
+#define A6XX_RSCC_TIMESTAMP_UNIT1_EN_DRV0		0x00089
+#define A6XX_RSCC_TIMESTAMP_UNIT1_OUTPUT_DRV0		0x0008C
+#define A6XX_RSCC_OVERRIDE_START_ADDR			0x00100
+#define A6XX_RSCC_SEQ_BUSY_DRV0				0x00101
+#define A6XX_RSCC_SEQ_MEM_0_DRV0			0x00180
+#define A6XX_RSCC_TCS0_DRV0_STATUS			0x00346
+#define A6XX_RSCC_TCS1_DRV0_STATUS                      0x003EE
+#define A6XX_RSCC_TCS2_DRV0_STATUS                      0x00496
+#define A6XX_RSCC_TCS3_DRV0_STATUS                      0x0053E
+#define A6XX_RSCC_TCS4_DRV0_STATUS                      0x005E6
+#define A6XX_RSCC_TCS5_DRV0_STATUS                      0x0068E
+#define A6XX_RSCC_TCS6_DRV0_STATUS                      0x00736
+#define A6XX_RSCC_TCS7_DRV0_STATUS                      0x007DE
+#define A6XX_RSCC_TCS8_DRV0_STATUS                      0x00886
+#define A6XX_RSCC_TCS9_DRV0_STATUS                      0x0092E
+
+/* GPU PDC sequencer registers in AOSS.RPMh domain */
+#define PDC_GPU_ENABLE_PDC			0x1140
+#define PDC_GPU_SEQ_START_ADDR			0x1148
+#define PDC_GPU_TCS0_CONTROL			0x1540
+#define PDC_GPU_TCS0_CMD_ENABLE_BANK		0x1541
+#define PDC_GPU_TCS0_CMD_WAIT_FOR_CMPL_BANK	0x1542
+#define PDC_GPU_TCS0_CMD0_MSGID			0x1543
+#define PDC_GPU_TCS0_CMD0_ADDR			0x1544
+#define PDC_GPU_TCS0_CMD0_DATA			0x1545
+#define PDC_GPU_TCS1_CONTROL			0x1572
+#define PDC_GPU_TCS1_CMD_ENABLE_BANK		0x1573
+#define PDC_GPU_TCS1_CMD_WAIT_FOR_CMPL_BANK	0x1574
+#define PDC_GPU_TCS1_CMD0_MSGID			0x1575
+#define PDC_GPU_TCS1_CMD0_ADDR			0x1576
+#define PDC_GPU_TCS1_CMD0_DATA			0x1577
+#define PDC_GPU_TCS2_CONTROL			0x15A4
+#define PDC_GPU_TCS2_CMD_ENABLE_BANK		0x15A5
+#define PDC_GPU_TCS2_CMD_WAIT_FOR_CMPL_BANK	0x15A6
+#define PDC_GPU_TCS2_CMD0_MSGID			0x15A7
+#define PDC_GPU_TCS2_CMD0_ADDR			0x15A8
+#define PDC_GPU_TCS2_CMD0_DATA			0x15A9
+#define PDC_GPU_TCS3_CONTROL			0x15D6
+#define PDC_GPU_TCS3_CMD_ENABLE_BANK		0x15D7
+#define PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK	0x15D8
+#define PDC_GPU_TCS3_CMD0_MSGID			0x15D9
+#define PDC_GPU_TCS3_CMD0_ADDR			0x15DA
+#define PDC_GPU_TCS3_CMD0_DATA			0x15DB
+
+/*
+ * Legacy DTSI used an offset from the start of the PDC resource
+ * for PDC SEQ programming. We are now using PDC subsections so
+ * start the PDC SEQ offset at zero.
+ */
+#define PDC_GPU_SEQ_MEM_0			0x0
+
+/*
+ * Legacy RSCC register range was a part of the GMU register space
+ * now we are using a separate section for RSCC regsiters. Add the
+ * offset for backward compatibility.
+ */
+#define RSCC_OFFSET_LEGACY			0x23400
+
+/* RGMU(PCC) registers in A6X_GMU_CX_0_NON_CONTEXT_DEC domain */
+#define A6XX_RGMU_CX_INTR_GEN_EN		0x1F80F
+#define A6XX_RGMU_CX_RGMU_TIMER0		0x1F834
+#define A6XX_RGMU_CX_RGMU_TIMER1		0x1F835
+#define A6XX_RGMU_CX_PCC_CTRL			0x1F838
+#define A6XX_RGMU_CX_PCC_INIT_RESULT		0x1F839
+#define A6XX_RGMU_CX_PCC_BKPT_CFG		0x1F83A
+#define A6XX_RGMU_CX_PCC_BKPT_ADDR		0x1F83B
+#define A6XX_RGMU_CX_PCC_STATUS			0x1F83C
+#define A6XX_RGMU_CX_PCC_DEBUG			0x1F83D
+
+/* GPU CX_MISC registers */
+#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0	0x1
+#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1	0x2
+#define A6XX_LLC_NUM_GPU_SCIDS			5
+#define A6XX_GPU_LLC_SCID_NUM_BITS		5
+#define A6XX_GPU_LLC_SCID_MASK \
+	((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1)
+#define A6XX_GPUHTW_LLC_SCID_SHIFT		25
+#define A6XX_GPUHTW_LLC_SCID_MASK \
+	(((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) << A6XX_GPUHTW_LLC_SCID_SHIFT)
+
+/* FUSA registers */
+#define A6XX_GPU_FUSA_REG_BASE			0x3FC00
+#define A6XX_GPU_FUSA_REG_ECC_CTRL			0x3FC00
+#define A6XX_GPU_FUSA_REG_CSR_PRIY			0x3FC52
+#define A6XX_GPU_FUSA_DISABLE_NUM_BITS			4
+#define A6XX_GPU_FUSA_DISABLE_BITS			0x5
+#define A6XX_GPU_FUSA_DISABLE_MASK \
+	((1 << A6XX_GPU_FUSA_DISABLE_NUM_BITS) - 1)
+
+#endif /* _A6XX_REG_H */
+

+ 2839 - 0
qcom/opensource/graphics-kernel/adreno-gpulist.h

@@ -0,0 +1,2839 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#define ANY_ID (~0)
+
+#define DEFINE_ADRENO_REV(_rev, _core, _major, _minor, _patchid) \
+	.gpurev = _rev, .core = _core, .major = _major, .minor = _minor, \
+	.patchid = _patchid
+
+#define DEFINE_DEPRECATED_CORE(_name, _rev, _core, _major, _minor, _patchid) \
+static const struct adreno_gpu_core adreno_gpu_core_##_name = { \
+	DEFINE_ADRENO_REV(_rev, _core, _major, _minor, _patchid), \
+	.features = ADRENO_DEPRECATED, \
+}
+
+#define MHZ_TO_KBPS(mhz, w) ((u64)(mhz * 1000000ULL * w) / (1024))
+
+static const struct kgsl_regmap_list a306_vbif_regs[] = {
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000A },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000A },
+};
+
+static const struct adreno_a3xx_core adreno_gpu_core_a306 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A306, 3, 0, 6, 0),
+		.features = ADRENO_SOFT_FAULT_DETECT,
+		.gpudev = &adreno_a3xx_gpudev,
+		.perfcounters = &adreno_a3xx_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_128K,
+		.bus_width = 0,
+		.snapshot_size = 600 * SZ_1K,
+	},
+	.pm4fw_name = "a300_pm4.fw",
+	.pfpfw_name = "a300_pfp.fw",
+	.vbif = a306_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a306_vbif_regs),
+};
+
+static const struct kgsl_regmap_list a306a_vbif_regs[] = {
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000010 },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000010 },
+};
+
+static const struct adreno_a3xx_core adreno_gpu_core_a306a = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A306A, 3, 0, 6, 0x20),
+		.features = ADRENO_SOFT_FAULT_DETECT,
+		.gpudev = &adreno_a3xx_gpudev,
+		.perfcounters = &adreno_a3xx_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_128K,
+		.bus_width = 16,
+		.snapshot_size = 600 * SZ_1K,
+	},
+	.pm4fw_name = "a300_pm4.fw",
+	.pfpfw_name = "a300_pfp.fw",
+	.vbif = a306a_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a306a_vbif_regs),
+};
+
+static const struct kgsl_regmap_list a304_vbif_regs[] = {
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+};
+
+static const struct adreno_a3xx_core adreno_gpu_core_a304 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A304, 3, 0, 4, 0),
+		.features = ADRENO_SOFT_FAULT_DETECT,
+		.gpudev = &adreno_a3xx_gpudev,
+		.perfcounters = &adreno_a3xx_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = (SZ_64K + SZ_32K),
+		.bus_width = 0,
+		.snapshot_size = 600 * SZ_1K,
+	},
+	.pm4fw_name = "a300_pm4.fw",
+	.pfpfw_name = "a300_pfp.fw",
+	.vbif = a304_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a304_vbif_regs),
+};
+
+DEFINE_DEPRECATED_CORE(a405, ADRENO_REV_A405, 4, 0, 5, ANY_ID);
+DEFINE_DEPRECATED_CORE(a418, ADRENO_REV_A418, 4, 1, 8, ANY_ID);
+DEFINE_DEPRECATED_CORE(a420, ADRENO_REV_A420, 4, 2, 0, ANY_ID);
+DEFINE_DEPRECATED_CORE(a430, ADRENO_REV_A430, 4, 3, 0, ANY_ID);
+DEFINE_DEPRECATED_CORE(a530v1, ADRENO_REV_A530, 5, 3, 0, 0);
+
+static const struct kgsl_regmap_list a530_hwcg_regs[] = {
+	{A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
+	{A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
+	{A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
+	{A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
+	{A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+	{A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
+	{A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+	{A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+	{A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+};
+
+/* VBIF control registers for a530, a510, a508, a505 and a506 */
+static const struct kgsl_regmap_list a530_vbif_regs[] = {
+	{A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003},
+};
+
+static const struct adreno_a5xx_core adreno_gpu_core_a530v2 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A530, 5, 3, 0, 1),
+		.features = ADRENO_SPTP_PC | ADRENO_LM |
+			ADRENO_PREEMPTION |
+			ADRENO_CONTENT_PROTECTION,
+		.gpudev = &adreno_a5xx_gpudev,
+		.perfcounters = &adreno_a5xx_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = SZ_1M,
+		.bus_width = 32,
+		.snapshot_size = SZ_1M,
+	},
+	.gpmu_tsens = 0x00060007,
+	.max_power = 5448,
+	.pm4fw_name = "a530_pm4.fw",
+	.pfpfw_name = "a530_pfp.fw",
+	.gpmufw_name = "a530_gpmu.fw2",
+	.regfw_name = "a530v2_seq.fw2",
+	.zap_name = "a530_zap.mdt",
+	.hwcg = a530_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a530_hwcg_regs),
+	.vbif = a530_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a530_vbif_regs),
+	.highest_bank_bit = 15,
+};
+
+static const struct adreno_a5xx_core adreno_gpu_core_a530v3 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A530, 5, 3, 0, ANY_ID),
+		.features = ADRENO_SPTP_PC | ADRENO_LM |
+			ADRENO_PREEMPTION |
+			ADRENO_CONTENT_PROTECTION,
+		.gpudev = &adreno_a5xx_gpudev,
+		.perfcounters = &adreno_a5xx_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = SZ_1M,
+		.bus_width = 32,
+		.snapshot_size = SZ_1M,
+	},
+	.gpmu_tsens = 0x00060007,
+	.max_power = 5448,
+	.pm4fw_name = "a530_pm4.fw",
+	.pfpfw_name = "a530_pfp.fw",
+	.gpmufw_name = "a530v3_gpmu.fw2",
+	.regfw_name = "a530v3_seq.fw2",
+	.zap_name = "a530_zap.mdt",
+	.hwcg = a530_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a530_hwcg_regs),
+	.vbif = a530_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a530_vbif_regs),
+	.highest_bank_bit = 15,
+};
+
+/* For a505, a506 and a508 */
+static const struct kgsl_regmap_list a50x_hwcg_regs[] = {
+	{A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+	{A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+	{A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4},
+	{A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+	{A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+	{A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
+};
+
+static const struct adreno_a5xx_core adreno_gpu_core_a505 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A505, 5, 0, 5, ANY_ID),
+		.features = ADRENO_PREEMPTION,
+		.gpudev = &adreno_a5xx_gpudev,
+		.perfcounters = &adreno_a5xx_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = (SZ_128K + SZ_8K),
+		.bus_width = 16,
+		.snapshot_size = SZ_1M,
+	},
+	.pm4fw_name = "a530_pm4.fw",
+	.pfpfw_name = "a530_pfp.fw",
+	.hwcg = a50x_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a50x_hwcg_regs),
+	.vbif = a530_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a530_vbif_regs),
+};
+
+static const struct adreno_a5xx_core adreno_gpu_core_a506 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A506, 5, 0, 6, ANY_ID),
+		.features = ADRENO_PREEMPTION |
+			ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION,
+		.gpudev = &adreno_a5xx_gpudev,
+		.perfcounters = &adreno_a5xx_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = (SZ_128K + SZ_8K),
+		.bus_width = 16,
+		.snapshot_size = SZ_1M,
+	},
+	.pm4fw_name = "a530_pm4.fw",
+	.pfpfw_name = "a530_pfp.fw",
+	.zap_name = "a506_zap.mdt",
+	.hwcg = a50x_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a50x_hwcg_regs),
+	.vbif = a530_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a530_vbif_regs),
+	.highest_bank_bit = 14,
+};
+
+static const struct kgsl_regmap_list a510_hwcg_regs[] = {
+	{A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
+	{A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
+	{A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
+	{A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
+	{A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+	{A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
+	{A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+	{A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+	{A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+};
+
+static const struct adreno_a5xx_core adreno_gpu_core_a510 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A510, 5, 1, 0, ANY_ID),
+		.gpudev = &adreno_a5xx_gpudev,
+		.perfcounters = &adreno_a5xx_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = SZ_256K,
+		.bus_width = 16,
+		.snapshot_size = SZ_1M,
+	},
+	.pm4fw_name = "a530_pm4.fw",
+	.pfpfw_name = "a530_pfp.fw",
+	.hwcg = a510_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a510_hwcg_regs),
+	.vbif = a530_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a530_vbif_regs),
+};
+
+DEFINE_DEPRECATED_CORE(a540v1, ADRENO_REV_A540, 5, 4, 0, 0);
+
+static const struct kgsl_regmap_list a540_hwcg_regs[] = {
+	{A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
+	{A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
+	{A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
+	{A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
+	{A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+	{A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
+	{A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+	{A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+	{A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_GPMU, 0x00000222},
+	{A5XX_RBBM_CLOCK_DELAY_GPMU, 0x00000770},
+	{A5XX_RBBM_CLOCK_HYST_GPMU, 0x00000004},
+};
+
+static const struct kgsl_regmap_list a540_vbif_regs[] = {
+	{A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003},
+	{A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009},
+};
+
+static const struct adreno_a5xx_core adreno_gpu_core_a540v2 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A540, 5, 4, 0, ANY_ID),
+		.features = ADRENO_PREEMPTION |
+			ADRENO_CONTENT_PROTECTION |
+			ADRENO_SPTP_PC,
+		.gpudev = &adreno_a5xx_gpudev,
+		.perfcounters = &adreno_a5xx_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = SZ_1M,
+		.bus_width = 32,
+		.snapshot_size = SZ_1M,
+	},
+	.gpmu_tsens = 0x000c000d,
+	.max_power = 5448,
+	.pm4fw_name = "a530_pm4.fw",
+	.pfpfw_name = "a530_pfp.fw",
+	.gpmufw_name = "a540_gpmu.fw2",
+	.zap_name = "a540_zap.mdt",
+	.hwcg = a540_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a540_hwcg_regs),
+	.vbif = a540_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a540_vbif_regs),
+	.highest_bank_bit = 15,
+};
+
+static const struct kgsl_regmap_list a512_hwcg_regs[] = {
+	{A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
+	{A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
+	{A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
+	{A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
+	{A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+	{A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
+	{A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+	{A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+	{A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+};
+
+static const struct adreno_a5xx_core adreno_gpu_core_a512 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A512, 5, 1, 2, ANY_ID),
+		.features = ADRENO_PREEMPTION |
+			ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION,
+		.gpudev = &adreno_a5xx_gpudev,
+		.perfcounters = &adreno_a5xx_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = (SZ_256K + SZ_16K),
+		.bus_width = 32,
+		.snapshot_size = SZ_1M,
+	},
+	.pm4fw_name = "a530_pm4.fw",
+	.pfpfw_name = "a530_pfp.fw",
+	.zap_name = "a512_zap.mdt",
+	.hwcg = a512_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a512_hwcg_regs),
+	.highest_bank_bit = 14,
+};
+
+static const struct adreno_a5xx_core adreno_gpu_core_a508 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A508, 5, 0, 8, ANY_ID),
+		.features = ADRENO_PREEMPTION |
+			ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION,
+		.gpudev = &adreno_a5xx_gpudev,
+		.perfcounters = &adreno_a5xx_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = (SZ_128K + SZ_8K),
+		.bus_width = 32,
+		.snapshot_size = SZ_1M,
+	},
+	.pm4fw_name = "a530_pm4.fw",
+	.pfpfw_name = "a530_pfp.fw",
+	.zap_name = "a508_zap.mdt",
+	.hwcg = a50x_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a50x_hwcg_regs),
+	.vbif = a530_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a530_vbif_regs),
+	.highest_bank_bit = 14,
+};
+
+DEFINE_DEPRECATED_CORE(a630v1, ADRENO_REV_A630, 6, 3, 0, 0);
+
+static const struct kgsl_regmap_list a630_hwcg_regs[] = {
+	{A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022220},
+	{A6XX_RBBM_CLOCK_CNTL2_SP1, 0x02022220},
+	{A6XX_RBBM_CLOCK_CNTL2_SP2, 0x02022220},
+	{A6XX_RBBM_CLOCK_CNTL2_SP3, 0x02022220},
+	{A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A6XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
+	{A6XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
+	{A6XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
+	{A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A6XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
+	{A6XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
+	{A6XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
+	{A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL_TP2, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL_TP3, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP2, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP3, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP2, 0x00022222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP3, 0x00022222},
+	{A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST3_TP2, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST3_TP3, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
+	{A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
+	{A6XX_RBBM_CLOCK_HYST4_TP2, 0x00077777},
+	{A6XX_RBBM_CLOCK_HYST4_TP3, 0x00077777},
+	{A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP2, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP3, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP2, 0x00011111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP3, 0x00011111},
+	{A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+	{A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
+	{A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
+	{A6XX_RBBM_CLOCK_CNTL2_RB1, 0x00002222},
+	{A6XX_RBBM_CLOCK_CNTL2_RB2, 0x00002222},
+	{A6XX_RBBM_CLOCK_CNTL2_RB3, 0x00002222},
+	{A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
+	{A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
+	{A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
+	{A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040F00},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040F00},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040F00},
+	{A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
+	{A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
+	{A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
+	{A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
+	{A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
+	{A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
+	{A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
+	{A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
+};
+
+static const struct kgsl_regmap_list a630_vbif_regs[] = {
+	{A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009},
+	{A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3},
+};
+
+
+/* For a615, a616, a618, A619, a630, a640 and a680 */
+static const struct adreno_protected_regs a630_protected_regs[] = {
+	{ A6XX_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 },
+	{ A6XX_CP_PROTECT_REG + 1, 0x00501, 0x00506, 0 },
+	{ A6XX_CP_PROTECT_REG + 2, 0x0050b, 0x007ff, 0 },
+	{ A6XX_CP_PROTECT_REG + 3, 0x0050e, 0x0050e, 1 },
+	{ A6XX_CP_PROTECT_REG + 4, 0x00510, 0x00510, 1 },
+	{ A6XX_CP_PROTECT_REG + 5, 0x00534, 0x00534, 1 },
+	{ A6XX_CP_PROTECT_REG + 6, 0x00800, 0x00882, 1 },
+	{ A6XX_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 },
+	{ A6XX_CP_PROTECT_REG + 8, 0x008ab, 0x008cf, 1 },
+	{ A6XX_CP_PROTECT_REG + 9, 0x008d0, 0x0098c, 0 },
+	{ A6XX_CP_PROTECT_REG + 10, 0x00900, 0x0094d, 1 },
+	{ A6XX_CP_PROTECT_REG + 11, 0x0098d, 0x00bff, 1 },
+	{ A6XX_CP_PROTECT_REG + 12, 0x00e00, 0x00e01, 1 },
+	{ A6XX_CP_PROTECT_REG + 13, 0x00e03, 0x00e0f, 1 },
+	{ A6XX_CP_PROTECT_REG + 14, 0x03c00, 0x03cc3, 1 },
+	{ A6XX_CP_PROTECT_REG + 15, 0x03cc4, 0x05cc3, 0 },
+	{ A6XX_CP_PROTECT_REG + 16, 0x08630, 0x087ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 17, 0x08e00, 0x08e00, 1 },
+	{ A6XX_CP_PROTECT_REG + 18, 0x08e08, 0x08e08, 1 },
+	{ A6XX_CP_PROTECT_REG + 19, 0x08e50, 0x08e6f, 1 },
+	{ A6XX_CP_PROTECT_REG + 20, 0x09624, 0x097ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 21, 0x09e70, 0x09e71, 1 },
+	{ A6XX_CP_PROTECT_REG + 22, 0x09e78, 0x09fff, 1 },
+	{ A6XX_CP_PROTECT_REG + 23, 0x0a630, 0x0a7ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 24, 0x0ae02, 0x0ae02, 1 },
+	{ A6XX_CP_PROTECT_REG + 25, 0x0ae50, 0x0b17f, 1 },
+	{ A6XX_CP_PROTECT_REG + 26, 0x0b604, 0x0b604, 1 },
+	{ A6XX_CP_PROTECT_REG + 27, 0x0be02, 0x0be03, 1 },
+	{ A6XX_CP_PROTECT_REG + 28, 0x0be20, 0x0d5ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 29, 0x0f000, 0x0fbff, 1 },
+	{ A6XX_CP_PROTECT_REG + 30, 0x0fc00, 0x11bff, 0 },
+	{ A6XX_CP_PROTECT_REG + 31, 0x11c00, 0x11c00, 1 },
+	{ 0 },
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a630v2 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A630, 6, 3, 0, ANY_ID),
+		.features = ADRENO_IFPC | ADRENO_CONTENT_PROTECTION |
+			ADRENO_IOCOHERENT | ADRENO_PREEMPTION,
+		.gpudev = &adreno_a630_gpudev.base,
+		.perfcounters = &adreno_a630_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = SZ_1M,
+		.bus_width = 32,
+		.snapshot_size = SZ_1M,
+	},
+	.prim_fifo_threshold = 0x0018000,
+	.gmu_major = 1,
+	.gmu_minor = 3,
+	.sqefw_name = "a630_sqe.fw",
+	.gmufw_name = "a630_gmu.bin",
+	.zap_name = "a630_zap.mdt",
+	.hwcg = a630_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a630_hwcg_regs),
+	.vbif = a630_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a630_vbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = a630_protected_regs,
+	.highest_bank_bit = 15,
+};
+
+/* For a615, a616, a618 and a619 */
+static const struct kgsl_regmap_list a615_hwcg_regs[] = {
+	{A6XX_RBBM_CLOCK_CNTL_SP0,  0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A6XX_RBBM_CLOCK_HYST_SP0,  0x0000F3CF},
+	{A6XX_RBBM_CLOCK_CNTL_TP0,  0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL_TP1,  0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
+	{A6XX_RBBM_CLOCK_HYST_TP0,  0x77777777},
+	{A6XX_RBBM_CLOCK_HYST_TP1,  0x77777777},
+	{A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
+	{A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
+	{A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
+	{A6XX_RBBM_CLOCK_CNTL_UCHE,  0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+	{A6XX_RBBM_CLOCK_HYST_UCHE,  0x00000004},
+	{A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
+	{A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
+	{A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
+	{A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
+	{A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040F00},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040F00},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040F00},
+	{A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
+	{A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
+	{A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
+	{A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
+	{A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
+	{A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
+	{A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
+	{A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}
+};
+
+/* For a615, a616, a618 and a619 */
+static const struct kgsl_regmap_list a615_gbif_regs[] = {
+	{A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3},
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a615 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A615, 6, 1, 5, ANY_ID),
+		.features = ADRENO_PREEMPTION |
+			ADRENO_CONTENT_PROTECTION | ADRENO_IFPC |
+			ADRENO_IOCOHERENT,
+		.gpudev = &adreno_a630_gpudev.base,
+		.perfcounters = &adreno_a6xx_legacy_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = 600 * SZ_1K,
+	},
+	.prim_fifo_threshold = 0x0018000,
+	.gmu_major = 1,
+	.gmu_minor = 3,
+	.sqefw_name = "a630_sqe.fw",
+	.gmufw_name = "a630_gmu.bin",
+	.zap_name = "a615_zap.mdt",
+	.hwcg = a615_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a615_hwcg_regs),
+	.vbif = a615_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a615_gbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = a630_protected_regs,
+	.highest_bank_bit = 14,
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a618 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A618, 6, 1, 8, ANY_ID),
+		.features = ADRENO_PREEMPTION |
+			ADRENO_CONTENT_PROTECTION | ADRENO_IFPC |
+			ADRENO_IOCOHERENT,
+		.gpudev = &adreno_a630_gpudev.base,
+		.perfcounters = &adreno_a6xx_legacy_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = SZ_1M,
+	},
+	.prim_fifo_threshold = 0x0018000,
+	.gmu_major = 1,
+	.gmu_minor = 7,
+	.sqefw_name = "a630_sqe.fw",
+	.gmufw_name = "a630_gmu.bin",
+	.zap_name = "a615_zap.mdt",
+	.hwcg = a615_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a615_hwcg_regs),
+	.vbif = a615_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a615_gbif_regs),
+	.hang_detect_cycles = 0x3fffff,
+	.protected_regs = a630_protected_regs,
+	.highest_bank_bit = 14,
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a619 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A619, 6, 1, 9, ANY_ID),
+		.features = ADRENO_PREEMPTION |
+			ADRENO_CONTENT_PROTECTION | ADRENO_IFPC |
+			ADRENO_IOCOHERENT,
+		.gpudev = &adreno_a630_gpudev.base,
+		.perfcounters = &adreno_a6xx_legacy_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = SZ_2M,
+	},
+	.prim_fifo_threshold = 0x0018000,
+	.gmu_major = 1,
+	.gmu_minor = 9,
+	.sqefw_name = "a630_sqe.fw",
+	.gmufw_name = "a619_gmu.bin",
+	.zap_name = "a615_zap.mdt",
+	.hwcg = a615_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a615_hwcg_regs),
+	.vbif = a615_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a615_gbif_regs),
+	.hang_detect_cycles = 0x3fffff,
+	.protected_regs = a630_protected_regs,
+	.highest_bank_bit = 14,
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a619_variant = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A619, 6, 1, 9, ANY_ID),
+		.compatible = "qcom,adreno-gpu-a619-holi",
+		.features =  ADRENO_PREEMPTION | ADRENO_CONTENT_PROTECTION,
+		.gpudev = &adreno_a619_holi_gpudev,
+		.perfcounters = &adreno_a6xx_legacy_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = SZ_2M,
+	},
+	.prim_fifo_threshold = 0x0018000,
+	.sqefw_name = "a630_sqe.fw",
+	.zap_name = "gen6_3_25_0_zap.mdt",
+	.hwcg = a615_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a615_hwcg_regs),
+	.vbif = a615_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a615_gbif_regs),
+	.hang_detect_cycles = 0x3fffff,
+	.protected_regs = a630_protected_regs,
+	.gx_cpr_toggle = true,
+	.highest_bank_bit = 14,
+};
+
+static const struct kgsl_regmap_list a620_hwcg_regs[] = {
+	{A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
+	{A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
+	{A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
+	{A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
+	{A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
+	{A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
+	{A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
+	{A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
+	{A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
+	{A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
+	{A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777},
+	{A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
+	{A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A6XX_RBBM_ISDB_CNT, 0x00000182},
+	{A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
+	{A6XX_RBBM_SP_HYST_CNT, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
+};
+
+/* a620, a621 and a650 */
+static const struct kgsl_regmap_list a650_gbif_regs[] = {
+	{A6XX_GBIF_QSB_SIDE0, 0x00071620},
+	{A6XX_GBIF_QSB_SIDE1, 0x00071620},
+	{A6XX_GBIF_QSB_SIDE2, 0x00071620},
+	{A6XX_GBIF_QSB_SIDE3, 0x00071620},
+	{A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3},
+};
+
+/* These are for a620, a621 and a650 */
+static const struct adreno_protected_regs a620_protected_regs[] = {
+	{ A6XX_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 },
+	{ A6XX_CP_PROTECT_REG + 1, 0x00501, 0x00506, 0 },
+	{ A6XX_CP_PROTECT_REG + 2, 0x0050b, 0x007ff, 0 },
+	{ A6XX_CP_PROTECT_REG + 3, 0x0050e, 0x0050e, 1 },
+	{ A6XX_CP_PROTECT_REG + 4, 0x00510, 0x00510, 1 },
+	{ A6XX_CP_PROTECT_REG + 5, 0x00534, 0x00534, 1 },
+	{ A6XX_CP_PROTECT_REG + 6, 0x00800, 0x00882, 1 },
+	{ A6XX_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 },
+	{ A6XX_CP_PROTECT_REG + 8, 0x008ab, 0x008cf, 1 },
+	{ A6XX_CP_PROTECT_REG + 9, 0x008d0, 0x0098c, 0 },
+	{ A6XX_CP_PROTECT_REG + 10, 0x00900, 0x0094d, 1 },
+	{ A6XX_CP_PROTECT_REG + 11, 0x0098d, 0x00bff, 1 },
+	{ A6XX_CP_PROTECT_REG + 12, 0x00e00, 0x00e01, 1 },
+	{ A6XX_CP_PROTECT_REG + 13, 0x00e03, 0x00e0f, 1 },
+	{ A6XX_CP_PROTECT_REG + 14, 0x03c00, 0x03cc3, 1 },
+	{ A6XX_CP_PROTECT_REG + 15, 0x03cc4, 0x05cc3, 0 },
+	{ A6XX_CP_PROTECT_REG + 16, 0x08630, 0x087ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 17, 0x08e00, 0x08e00, 1 },
+	{ A6XX_CP_PROTECT_REG + 18, 0x08e08, 0x08e08, 1 },
+	{ A6XX_CP_PROTECT_REG + 19, 0x08e50, 0x08e6f, 1 },
+	{ A6XX_CP_PROTECT_REG + 20, 0x08e80, 0x090ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 21, 0x09624, 0x097ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 22, 0x09e60, 0x09e71, 1 },
+	{ A6XX_CP_PROTECT_REG + 23, 0x09e78, 0x09fff, 1 },
+	{ A6XX_CP_PROTECT_REG + 24, 0x0a630, 0x0a7ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 25, 0x0ae02, 0x0ae02, 1 },
+	{ A6XX_CP_PROTECT_REG + 26, 0x0ae50, 0x0b17f, 1 },
+	{ A6XX_CP_PROTECT_REG + 27, 0x0b604, 0x0b604, 1 },
+	{ A6XX_CP_PROTECT_REG + 28, 0x0b608, 0x0b60f, 1 },
+	{ A6XX_CP_PROTECT_REG + 29, 0x0be02, 0x0be03, 1 },
+	{ A6XX_CP_PROTECT_REG + 30, 0x0be20, 0x0d5ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 31, 0x0f000, 0x0fbff, 1 },
+	{ A6XX_CP_PROTECT_REG + 32, 0x0fc00, 0x11bff, 0 },
+	{ A6XX_CP_PROTECT_REG + 33, 0x18400, 0x1a3ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 34, 0x1a800, 0x1c7ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 35, 0x1c800, 0x1e7ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 36, 0x1f400, 0x1f843, 1 },
+	{ A6XX_CP_PROTECT_REG + 37, 0x1f844, 0x1f8bf, 0 },
+	{ A6XX_CP_PROTECT_REG + 38, 0x1f887, 0x1f8a2, 1 },
+	{ A6XX_CP_PROTECT_REG + 47, 0x1f8c0, 0x1f8c0, 1 },
+	{ 0 },
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a620 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A620, 6, 2, 0, ANY_ID),
+		.features = ADRENO_CONTENT_PROTECTION | ADRENO_IOCOHERENT |
+			ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_ACD |
+			ADRENO_APRIV,
+		.gpudev = &adreno_a630_gpudev.base,
+		.perfcounters = &adreno_a6xx_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = 2 * SZ_1M,
+	},
+	.prim_fifo_threshold = 0x0010000,
+	.gmu_major = 2,
+	.gmu_minor = 0,
+	.sqefw_name = "a650_sqe.fw",
+	.gmufw_name = "a650_gmu.bin",
+	.zap_name = "a620_zap.mdt",
+	.hwcg = a620_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a620_hwcg_regs),
+	.vbif = a650_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a650_gbif_regs),
+	.veto_fal10 = true,
+	.hang_detect_cycles = 0x3ffff,
+	.protected_regs = a620_protected_regs,
+	.disable_tseskip = true,
+	.highest_bank_bit = 14,
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a621 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A621, 6, 2, 1, ANY_ID),
+		.compatible = "qcom,adreno-gpu-a621",
+		.features = ADRENO_CONTENT_PROTECTION | ADRENO_IOCOHERENT |
+			ADRENO_APRIV | ADRENO_LSR | ADRENO_PREEMPTION |
+			ADRENO_IFPC,
+		.gpudev = &adreno_a6xx_hwsched_gpudev.base,
+		.perfcounters = &adreno_a6xx_hwsched_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = 2 * SZ_1M,
+	},
+	.prim_fifo_threshold = 0x0010000,
+	.gmu_major = 2,
+	.gmu_minor = 0,
+	.sqefw_name = "a650_sqe.fw",
+	.gmufw_name = "a621_gmu.bin",
+	.zap_name = "a620_zap.mdt",
+	.hwcg = a620_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a620_hwcg_regs),
+	.vbif = a650_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a650_gbif_regs),
+	.veto_fal10 = true,
+	.pdc_in_aop = true,
+	.hang_detect_cycles = 0x3ffff,
+	.protected_regs = a620_protected_regs,
+	.disable_tseskip = true,
+	.highest_bank_bit = 13,
+};
+
+static const struct kgsl_regmap_list a640_hwcg_regs[] = {
+	{A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
+	{A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
+	{A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
+	{A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
+	{A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
+	{A6XX_RBBM_CLOCK_CNTL_RAC, 0x05222022},
+	{A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
+	{A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
+	{A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
+	{A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
+	{A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
+	{A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A6XX_RBBM_ISDB_CNT, 0x00000182},
+	{A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
+	{A6XX_RBBM_SP_HYST_CNT, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
+};
+
+static const struct kgsl_regmap_list a680_hwcg_regs[] = {
+	{A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
+	{A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
+	{A6XX_RBBM_CLOCK_HYST_TP0, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST2_TP0, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST3_TP0, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST4_TP0, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
+	{A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
+	{A6XX_RBBM_CLOCK_CNTL_RAC, 0x05222022},
+	{A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
+	{A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
+	{A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
+	{A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
+	{A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
+	{A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A6XX_RBBM_CLOCK_CNTL, 0x8AA8AA82},
+	{A6XX_RBBM_ISDB_CNT, 0x00000182},
+	{A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
+	{A6XX_RBBM_SP_HYST_CNT, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
+	{A6XX_GMUGX_GMU_SP_RF_CONTROL_0, 0x00000001},
+	{A6XX_GMUGX_GMU_SP_RF_CONTROL_1, 0x00000001},
+};
+
+/* These apply to a640, a680, a612, a610 and a702 */
+static const struct kgsl_regmap_list a640_vbif_regs[] = {
+	{A6XX_GBIF_QSB_SIDE0, 0x00071620},
+	{A6XX_GBIF_QSB_SIDE1, 0x00071620},
+	{A6XX_GBIF_QSB_SIDE2, 0x00071620},
+	{A6XX_GBIF_QSB_SIDE3, 0x00071620},
+	{A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3},
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a640 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A640, 6, 4, 0, ANY_ID),
+		.features = ADRENO_CONTENT_PROTECTION | ADRENO_IOCOHERENT |
+			ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_L3_VOTE,
+		.gpudev = &adreno_a6xx_gmu_gpudev.base,
+		.perfcounters = &adreno_a6xx_legacy_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = SZ_1M, //Verified 1MB
+		.bus_width = 32,
+		.snapshot_size = 2 * SZ_1M,
+	},
+	.prim_fifo_threshold = 0x00200000,
+	.gmu_major = 2,
+	.gmu_minor = 0,
+	.sqefw_name = "a630_sqe.fw",
+	.gmufw_name = "a640_gmu.bin",
+	.zap_name = "a640_zap.mdt",
+	.hwcg = a640_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a640_hwcg_regs),
+	.vbif = a640_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a640_vbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = a630_protected_regs,
+	.disable_tseskip = true,
+	.highest_bank_bit = 15,
+};
+
+static const struct kgsl_regmap_list a650_hwcg_regs[] = {
+	{A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
+	{A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
+	{A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
+	{A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
+	{A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
+	{A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
+	{A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
+	{A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
+	{A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
+	{A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
+	{A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777},
+	{A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
+	{A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A6XX_RBBM_ISDB_CNT, 0x00000182},
+	{A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
+	{A6XX_RBBM_SP_HYST_CNT, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a650 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A650, 6, 5, 0, 0),
+		.features = ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION |
+			ADRENO_IFPC | ADRENO_APRIV | ADRENO_L3_VOTE,
+		.gpudev = &adreno_a6xx_gmu_gpudev.base,
+		.perfcounters = &adreno_a6xx_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_1M + SZ_128K, /* verified 1152kB */
+		.bus_width = 32,
+		.snapshot_size = 2 * SZ_1M,
+	},
+	.prim_fifo_threshold = 0x00300000,
+	.gmu_major = 2,
+	.gmu_minor = 0,
+	.sqefw_name = "a650_sqe.fw",
+	.gmufw_name = "a650_gmu.bin",
+	.zap_name = "a650_zap.mdt",
+	.hwcg = a650_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a650_hwcg_regs),
+	.vbif = a650_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a650_gbif_regs),
+	.veto_fal10 = true,
+	.pdc_in_aop = true,
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = a620_protected_regs,
+	.disable_tseskip = true,
+	.highest_bank_bit = 16,
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a650v2 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A650, 6, 5, 0, ANY_ID),
+		.features = ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION |
+			ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_ACD |
+			ADRENO_LM | ADRENO_APRIV | ADRENO_L3_VOTE,
+		.gpudev = &adreno_a6xx_gmu_gpudev.base,
+		.perfcounters = &adreno_a6xx_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_1M + SZ_128K, /* verified 1152kB */
+		.bus_width = 32,
+		.snapshot_size = 2 * SZ_1M,
+	},
+	.prim_fifo_threshold = 0x00300000,
+	.gmu_major = 2,
+	.gmu_minor = 0,
+	.sqefw_name = "a650_sqe.fw",
+	.gmufw_name = "a650_gmu.bin",
+	.zap_name = "a650_zap.mdt",
+	.hwcg = a650_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a650_hwcg_regs),
+	.vbif = a650_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a650_gbif_regs),
+	.veto_fal10 = true,
+	.pdc_in_aop = true,
+	.hang_detect_cycles = 0x3ffff,
+	.protected_regs = a620_protected_regs,
+	.disable_tseskip = true,
+	.highest_bank_bit = 16,
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a680 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A680, 6, 8, 0, ANY_ID),
+		.features = ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION |
+			ADRENO_IFPC | ADRENO_PREEMPTION,
+		.gpudev = &adreno_a6xx_gmu_gpudev.base,
+		.perfcounters = &adreno_a6xx_legacy_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = SZ_2M,
+		.bus_width = 32,
+		.snapshot_size = SZ_2M,
+	},
+	.prim_fifo_threshold = 0x00400000,
+	.gmu_major = 2,
+	.gmu_minor = 0,
+	.sqefw_name = "a630_sqe.fw",
+	.gmufw_name = "a640_gmu.bin",
+	.zap_name = "a640_zap.mdt",
+	.hwcg = a680_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a680_hwcg_regs),
+	.vbif = a640_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a640_vbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = a630_protected_regs,
+	.disable_tseskip = true,
+	.highest_bank_bit = 16,
+};
+
+static const struct kgsl_regmap_list a612_hwcg_regs[] = {
+	{A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000081},
+	{A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
+	{A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
+	{A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
+	{A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01202222},
+	{A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
+	{A6XX_RBBM_CLOCK_CNTL_RAC, 0x05522022},
+	{A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
+	{A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
+	{A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
+	{A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
+	{A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
+	{A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A6XX_RBBM_ISDB_CNT, 0x00000182},
+	{A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
+	{A6XX_RBBM_SP_HYST_CNT, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a612 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A612, 6, 1, 2, ANY_ID),
+		.features = ADRENO_CONTENT_PROTECTION |
+			ADRENO_IOCOHERENT | ADRENO_PREEMPTION | ADRENO_IFPC,
+		.gpudev = &adreno_a6xx_rgmu_gpudev,
+		.perfcounters = &adreno_a6xx_legacy_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = (SZ_128K + SZ_4K),
+		.bus_width = 32,
+		.snapshot_size = SZ_1M,
+	},
+	.prim_fifo_threshold = 0x00080000,
+	.sqefw_name = "a630_sqe.fw",
+	.gmufw_name = "a612_rgmu.bin",
+	.zap_name = "a612_zap.mdt",
+	.hwcg = a612_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a612_hwcg_regs),
+	.vbif = a640_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a640_vbif_regs),
+	.hang_detect_cycles = 0x3fffff,
+	.protected_regs = a630_protected_regs,
+	.highest_bank_bit = 14,
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a616 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A616, 6, 1, 6, ANY_ID),
+		.features = ADRENO_PREEMPTION |
+			ADRENO_CONTENT_PROTECTION | ADRENO_IFPC |
+			ADRENO_IOCOHERENT,
+		.gpudev = &adreno_a630_gpudev.base,
+		.perfcounters = &adreno_a6xx_legacy_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = SZ_1M,
+	},
+	.prim_fifo_threshold = 0x0018000,
+	.gmu_major = 1,
+	.gmu_minor = 3,
+	.sqefw_name = "a630_sqe.fw",
+	.gmufw_name = "a630_gmu.bin",
+	.zap_name = "a615_zap.mdt",
+	.hwcg = a615_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a615_hwcg_regs),
+	.vbif = a615_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a615_gbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = a630_protected_regs,
+	.highest_bank_bit = 14,
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a610 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A610, 6, 1, 0, ANY_ID),
+		.compatible = "qcom,adreno-gpu-a610",
+		.features = ADRENO_CONTENT_PROTECTION |
+			ADRENO_PREEMPTION,
+		.gpudev = &adreno_a6xx_gpudev,
+		.perfcounters = &adreno_a6xx_legacy_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = (SZ_128K + SZ_4K),
+		.bus_width = 32,
+		.snapshot_size = SZ_1M,
+	},
+	.prim_fifo_threshold = 0x00080000,
+	.sqefw_name = "a630_sqe.fw",
+	.zap_name = "a610_zap.mdt",
+	.hwcg = a612_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a612_hwcg_regs),
+	.vbif = a640_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a640_vbif_regs),
+	.hang_detect_cycles = 0x3ffff,
+	.protected_regs = a630_protected_regs,
+	.highest_bank_bit = 14,
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a611 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A611, 6, 1, 1, ANY_ID),
+		.compatible = "qcom,adreno-gpu-a611",
+		.features = ADRENO_CONTENT_PROTECTION |
+			ADRENO_PREEMPTION,
+		.gpudev = &adreno_a6xx_gpudev,
+		.perfcounters = &adreno_a6xx_legacy_perfcounters,
+		.uche_gmem_alignment = SZ_1M,
+		.gmem_size = (SZ_128K + SZ_4K),
+		.bus_width = 32,
+		.snapshot_size = SZ_1M,
+	},
+	.prim_fifo_threshold = 0x00080000,
+	.sqefw_name = "a630_sqe.fw",
+	.zap_name = "a610_zap.mbn",
+	.hwcg = a612_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a612_hwcg_regs),
+	.vbif = a640_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a640_vbif_regs),
+	.hang_detect_cycles = 0x3ffff,
+	.protected_regs = a630_protected_regs,
+	.highest_bank_bit = 14,
+};
+
+static const struct kgsl_regmap_list a660_hwcg_regs[] = {
+	{A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
+	{A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
+	{A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
+	{A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
+	{A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
+	{A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
+	{A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
+	{A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
+	{A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
+	{A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
+	{A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
+	{A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A6XX_RBBM_ISDB_CNT, 0x00000182},
+	{A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
+	{A6XX_RBBM_SP_HYST_CNT, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
+};
+
+/* A660 protected register list */
+static const struct adreno_protected_regs a660_protected_regs[] = {
+	{ A6XX_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 },
+	{ A6XX_CP_PROTECT_REG + 1, 0x00501, 0x00506, 0 },
+	{ A6XX_CP_PROTECT_REG + 2, 0x0050b, 0x007ff, 0 },
+	{ A6XX_CP_PROTECT_REG + 3, 0x0050e, 0x0050e, 1 },
+	{ A6XX_CP_PROTECT_REG + 4, 0x00510, 0x00510, 1 },
+	{ A6XX_CP_PROTECT_REG + 5, 0x00534, 0x00534, 1 },
+	{ A6XX_CP_PROTECT_REG + 6, 0x00800, 0x00882, 1 },
+	{ A6XX_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 },
+	{ A6XX_CP_PROTECT_REG + 8, 0x008ab, 0x008cf, 1 },
+	{ A6XX_CP_PROTECT_REG + 9, 0x008d0, 0x0098c, 0 },
+	{ A6XX_CP_PROTECT_REG + 10, 0x00900, 0x0094d, 1 },
+	{ A6XX_CP_PROTECT_REG + 11, 0x0098d, 0x00bff, 1 },
+	{ A6XX_CP_PROTECT_REG + 12, 0x00e00, 0x00e01, 1 },
+	{ A6XX_CP_PROTECT_REG + 13, 0x00e03, 0x00e0f, 1 },
+	{ A6XX_CP_PROTECT_REG + 14, 0x03c00, 0x03cc3, 1 },
+	{ A6XX_CP_PROTECT_REG + 15, 0x03cc4, 0x05cc3, 0 },
+	{ A6XX_CP_PROTECT_REG + 16, 0x08630, 0x087ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 17, 0x08e00, 0x08e00, 1 },
+	{ A6XX_CP_PROTECT_REG + 18, 0x08e08, 0x08e08, 1 },
+	{ A6XX_CP_PROTECT_REG + 19, 0x08e50, 0x08e6f, 1 },
+	{ A6XX_CP_PROTECT_REG + 20, 0x08e80, 0x090ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 21, 0x09624, 0x097ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 22, 0x09e60, 0x09e71, 1 },
+	{ A6XX_CP_PROTECT_REG + 23, 0x09e78, 0x09fff, 1 },
+	{ A6XX_CP_PROTECT_REG + 24, 0x0a630, 0x0a7ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 25, 0x0ae02, 0x0ae02, 1 },
+	{ A6XX_CP_PROTECT_REG + 26, 0x0ae50, 0x0af7f, 1 },
+	{ A6XX_CP_PROTECT_REG + 27, 0x0b604, 0x0b604, 1 },
+	{ A6XX_CP_PROTECT_REG + 28, 0x0b608, 0x0b60e, 1 },
+	{ A6XX_CP_PROTECT_REG + 29, 0x0be02, 0x0be03, 1 },
+	{ A6XX_CP_PROTECT_REG + 30, 0x0be20, 0x0bf7f, 1 },
+	{ A6XX_CP_PROTECT_REG + 31, 0x0d000, 0x0d5ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 32, 0x0f000, 0x0fbff, 1 },
+	{ A6XX_CP_PROTECT_REG + 33, 0x0fc00, 0x11bff, 0 },
+	{ A6XX_CP_PROTECT_REG + 34, 0x18400, 0x1a3ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 35, 0x1a400, 0x1c3ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 36, 0x1c400, 0x1e3ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 37, 0x1f400, 0x1f843, 1 },
+	{ A6XX_CP_PROTECT_REG + 38, 0x1f844, 0x1f8bf, 0 },
+	{ A6XX_CP_PROTECT_REG + 39, 0x1f860, 0x1f860, 1 },
+	{ A6XX_CP_PROTECT_REG + 40, 0x1f887, 0x1f8a2, 1 },
+	{ A6XX_CP_PROTECT_REG + 47, 0x1f8c0, 0x1f8c0, 1 },
+	{ 0 },
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a660 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A660, 6, 6, 0, 0),
+		.features = ADRENO_APRIV |
+				ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION |
+				ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_L3_VOTE,
+		.gpudev = &adreno_a6xx_gmu_gpudev.base,
+		.perfcounters = &adreno_a6xx_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_1M + SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = SZ_2M,
+	},
+	.prim_fifo_threshold = 0x00300000,
+	.gmu_major = 2,
+	.gmu_minor = 0,
+	.sqefw_name = "a660_sqe.fw",
+	.gmufw_name = "a660_gmu.bin",
+	.zap_name = "a660_zap.mdt",
+	.hwcg = a660_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a660_hwcg_regs),
+	.vbif = a650_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a650_gbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.veto_fal10 = true,
+	.protected_regs = a660_protected_regs,
+	.disable_tseskip = true,
+	.highest_bank_bit = 16,
+	.pdc_in_aop = true,
+	.ctxt_record_size = 2496 * 1024,
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a660v2 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A660, 6, 6, 0, ANY_ID),
+		.features = ADRENO_APRIV |
+				ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION |
+				ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_ACD |
+				ADRENO_L3_VOTE,
+		.gpudev = &adreno_a6xx_gmu_gpudev.base,
+		.perfcounters = &adreno_a6xx_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_1M + SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = SZ_2M,
+	},
+	.prim_fifo_threshold = 0x00300000,
+	.gmu_major = 2,
+	.gmu_minor = 0,
+	.sqefw_name = "a660_sqe.fw",
+	.gmufw_name = "a660_gmu.bin",
+	.zap_name = "a660_zap.mdt",
+	.hwcg = a660_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a660_hwcg_regs),
+	.vbif = a650_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a650_gbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.veto_fal10 = true,
+	.protected_regs = a660_protected_regs,
+	.disable_tseskip = true,
+	.highest_bank_bit = 16,
+	.pdc_in_aop = true,
+	.ctxt_record_size = 2496 * 1024,
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a660_shima = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A660, 6, 6, 0, ANY_ID),
+		.compatible = "qcom,adreno-gpu-a660-shima",
+		.features = ADRENO_APRIV |
+				ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION |
+				ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_ACD,
+		.gpudev = &adreno_a6xx_gmu_gpudev.base,
+		.perfcounters = &adreno_a6xx_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_1M + SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = SZ_2M,
+	},
+	.prim_fifo_threshold = 0x00300000,
+	.gmu_major = 2,
+	.gmu_minor = 0,
+	.sqefw_name = "a660_sqe.fw",
+	.gmufw_name = "a660_gmu.bin",
+	.zap_name = "a660_zap.mdt",
+	.hwcg = a660_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a660_hwcg_regs),
+	.vbif = a650_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a650_gbif_regs),
+	.hang_detect_cycles = 0x3ffff,
+	.veto_fal10 = true,
+	.protected_regs = a660_protected_regs,
+	.disable_tseskip = true,
+	.highest_bank_bit = 15,
+	.pdc_in_aop = true,
+	.ctxt_record_size = 2496 * 1024,
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a635 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A635, 6, 3, 5, ANY_ID),
+		.features = ADRENO_APRIV | ADRENO_IOCOHERENT |
+				ADRENO_CONTENT_PROTECTION,
+		.gpudev = &adreno_a6xx_gmu_gpudev.base,
+		.perfcounters = &adreno_a6xx_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = SZ_2M,
+	},
+	.prim_fifo_threshold = 0x00200000,
+	.gmu_major = 2,
+	.gmu_minor = 0,
+	.sqefw_name = "a660_sqe.fw",
+	.gmufw_name = "a660_gmu.bin",
+	.zap_name = "a660_zap.mdt",
+	.hwcg = a660_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a660_hwcg_regs),
+	.vbif = a650_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a650_gbif_regs),
+	.hang_detect_cycles = 0x3ffff,
+	.veto_fal10 = true,
+	.protected_regs = a660_protected_regs,
+	.disable_tseskip = true,
+	.highest_bank_bit = 15,
+	.pdc_in_aop = true,
+	.ctxt_record_size = 2496 * 1024,
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a662 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A662, ANY_ID, ANY_ID, ANY_ID, ANY_ID),
+		.compatible = "qcom,adreno-gpu-a662",
+		.features = ADRENO_APRIV | ADRENO_IOCOHERENT |
+			ADRENO_CONTENT_PROTECTION | ADRENO_PREEMPTION |
+			ADRENO_IFPC | ADRENO_BCL | ADRENO_ACD,
+		.gpudev = &adreno_a6xx_gmu_gpudev.base,
+		.perfcounters = &adreno_a6xx_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_1M + SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = SZ_2M,
+	},
+	.prim_fifo_threshold = 0x00300000,
+	.gmu_major = 2,
+	.gmu_minor = 0,
+	.sqefw_name = "a660_sqe.fw",
+	.gmufw_name = "a662_gmu.bin",
+	.zap_name = "a662_zap.mdt",
+	.hwcg = a660_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a660_hwcg_regs),
+	.vbif = a650_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a650_gbif_regs),
+	.hang_detect_cycles = 0x3ffff,
+	.veto_fal10 = true,
+	.protected_regs = a660_protected_regs,
+	.disable_tseskip = true,
+	.highest_bank_bit = 15,
+	.pdc_in_aop = true,
+	.ctxt_record_size = 2496 * 1024,
+};
+
+extern const struct gen7_snapshot_block_list gen7_0_0_snapshot_block_list;
+
+static const struct kgsl_regmap_list gen7_0_0_gbif_regs[] = {
+	{ GEN7_GBIF_QSB_SIDE0, 0x00071620 },
+	{ GEN7_GBIF_QSB_SIDE1, 0x00071620 },
+	{ GEN7_GBIF_QSB_SIDE2, 0x00071620 },
+	{ GEN7_GBIF_QSB_SIDE3, 0x00071620 },
+	{ GEN7_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212 },
+};
+
+static const struct kgsl_regmap_list a702_hwcg_regs[] = {
+	{A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000081},
+	{A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf},
+	{A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
+	{A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
+	{A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
+	{A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01202222},
+	{A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00},
+	{A6XX_RBBM_CLOCK_CNTL_RAC, 0x05522022},
+	{A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
+	{A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
+	{A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
+	{A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
+	{A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
+	{A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A6XX_RBBM_ISDB_CNT, 0x00000182},
+	{A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
+	{A6XX_RBBM_SP_HYST_CNT, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
+	{A6XX_RBBM_CLOCK_CNTL_FCHE, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_FCHE, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_FCHE, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_GLC, 0x00222222},
+	{A6XX_RBBM_CLOCK_DELAY_GLC, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_GLC, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_MHUB, 0x00000002},
+	{A6XX_RBBM_CLOCK_DELAY_MHUB, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_MHUB, 0x00000000},
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a702 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A702, 7, 0, 2, ANY_ID),
+		.features = ADRENO_CONTENT_PROTECTION |
+					ADRENO_APRIV | ADRENO_PREEMPTION,
+		.gpudev = &adreno_a6xx_gpudev,
+		.perfcounters = &adreno_a6xx_legacy_perfcounters,
+		.gmem_size = SZ_128K,
+		.bus_width = 16,
+		.snapshot_size = SZ_1M,
+	},
+	.prim_fifo_threshold = 0x0000c000,
+	.sqefw_name = "a702_sqe.fw",
+	.zap_name = "a702_zap.mdt",
+	.hwcg = a702_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a702_hwcg_regs),
+	.vbif = a640_vbif_regs,
+	.vbif_count = ARRAY_SIZE(a640_vbif_regs),
+	.hang_detect_cycles = 0x3ffff,
+	.protected_regs = a620_protected_regs,
+	.highest_bank_bit = 14,
+};
+
+static const struct kgsl_regmap_list gen7_0_0_hwcg_regs[] = {
+	{ GEN7_RBBM_CLOCK_CNTL_SP0, 0x02222222 },
+	{ GEN7_RBBM_CLOCK_CNTL2_SP0, 0x02022222 },
+	{ GEN7_RBBM_CLOCK_HYST_SP0, 0x0000f3cf },
+	{ GEN7_RBBM_CLOCK_DELAY_SP0, 0x00000080 },
+	{ GEN7_RBBM_CLOCK_CNTL_TP0, 0x22222220 },
+	{ GEN7_RBBM_CLOCK_CNTL2_TP0, 0x22222222 },
+	{ GEN7_RBBM_CLOCK_CNTL3_TP0, 0x22222222 },
+	{ GEN7_RBBM_CLOCK_CNTL4_TP0, 0x00222222 },
+	{ GEN7_RBBM_CLOCK_HYST_TP0, 0x77777777 },
+	{ GEN7_RBBM_CLOCK_HYST2_TP0, 0x77777777 },
+	{ GEN7_RBBM_CLOCK_HYST3_TP0, 0x77777777 },
+	{ GEN7_RBBM_CLOCK_HYST4_TP0, 0x00077777 },
+	{ GEN7_RBBM_CLOCK_DELAY_TP0, 0x11111111 },
+	{ GEN7_RBBM_CLOCK_DELAY2_TP0, 0x11111111 },
+	{ GEN7_RBBM_CLOCK_DELAY3_TP0, 0x11111111 },
+	{ GEN7_RBBM_CLOCK_DELAY4_TP0, 0x00011111 },
+	{ GEN7_RBBM_CLOCK_CNTL_UCHE, 0x22222222 },
+	{ GEN7_RBBM_CLOCK_HYST_UCHE, 0x00000004 },
+	{ GEN7_RBBM_CLOCK_DELAY_UCHE, 0x00000002 },
+	{ GEN7_RBBM_CLOCK_CNTL_RB0, 0x22222222 },
+	{ GEN7_RBBM_CLOCK_CNTL2_RB0, 0x01002222 },
+	{ GEN7_RBBM_CLOCK_CNTL_CCU0, 0x00002220 },
+	{ GEN7_RBBM_CLOCK_HYST_RB_CCU0, 0x44000f00 },
+	{ GEN7_RBBM_CLOCK_CNTL_RAC, 0x25222022 },
+	{ GEN7_RBBM_CLOCK_CNTL2_RAC, 0x00555555 },
+	{ GEN7_RBBM_CLOCK_DELAY_RAC, 0x00000011 },
+	{ GEN7_RBBM_CLOCK_HYST_RAC, 0x00440044 },
+	{ GEN7_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222 },
+	{ GEN7_RBBM_CLOCK_MODE2_GRAS, 0x00000222 },
+	{ GEN7_RBBM_CLOCK_MODE_BV_GRAS, 0x00222222 },
+	{ GEN7_RBBM_CLOCK_MODE_GPC, 0x02222223 },
+	{ GEN7_RBBM_CLOCK_MODE_VFD, 0x00002222 },
+	{ GEN7_RBBM_CLOCK_MODE_BV_GPC, 0x00222222 },
+	{ GEN7_RBBM_CLOCK_MODE_BV_VFD, 0x00002222 },
+	{ GEN7_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000 },
+	{ GEN7_RBBM_CLOCK_HYST_GPC, 0x04104004 },
+	{ GEN7_RBBM_CLOCK_HYST_VFD, 0x00000000 },
+	{ GEN7_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000 },
+	{ GEN7_RBBM_CLOCK_DELAY_GPC, 0x00000200 },
+	{ GEN7_RBBM_CLOCK_DELAY_VFD, 0x00002222 },
+	{ GEN7_RBBM_CLOCK_MODE_HLSQ, 0x00002222 },
+	{ GEN7_RBBM_CLOCK_DELAY_HLSQ, 0x00000000 },
+	{ GEN7_RBBM_CLOCK_HYST_HLSQ, 0x00000000 },
+	{ GEN7_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002 },
+	{ GEN7_RBBM_CLOCK_MODE_BV_LRZ, 0x55555552 },
+	{ GEN7_RBBM_CLOCK_MODE_CP, 0x00000223 },
+	{ GEN7_RBBM_CLOCK_CNTL, 0x8aa8aa82 },
+	{ GEN7_RBBM_ISDB_CNT, 0x00000182 },
+	{ GEN7_RBBM_RAC_THRESHOLD_CNT, 0x00000000 },
+	{ GEN7_RBBM_SP_HYST_CNT, 0x00000000 },
+	{ GEN7_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222 },
+	{ GEN7_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111 },
+	{ GEN7_RBBM_CLOCK_HYST_GMU_GX, 0x00000555 },
+};
+
+static const struct kgsl_regmap_list gen7_0_0_ao_hwcg_regs[] = {
+	{ GEN7_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 0x00020000 },
+	{ GEN7_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 0x00010111 },
+	{ GEN7_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 0x00005555 },
+};
+
+/* GEN7_0_0 protected register list */
+static const struct gen7_protected_regs gen7_0_0_protected_regs[] = {
+	{ GEN7_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 },
+	{ GEN7_CP_PROTECT_REG + 1, 0x0050b, 0x00563, 0 },
+	{ GEN7_CP_PROTECT_REG + 2, 0x0050e, 0x0050e, 1 },
+	{ GEN7_CP_PROTECT_REG + 3, 0x00510, 0x00510, 1 },
+	{ GEN7_CP_PROTECT_REG + 4, 0x00534, 0x00534, 1 },
+	{ GEN7_CP_PROTECT_REG + 5, 0x005fb, 0x00698, 0 },
+	{ GEN7_CP_PROTECT_REG + 6, 0x00699, 0x00882, 1 },
+	{ GEN7_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 },
+	{ GEN7_CP_PROTECT_REG + 8, 0x008ab, 0x008cf, 1 },
+	{ GEN7_CP_PROTECT_REG + 9, 0x008d0, 0x00a40, 0 },
+	{ GEN7_CP_PROTECT_REG + 10, 0x00900, 0x0094d, 1 },
+	{ GEN7_CP_PROTECT_REG + 11, 0x0098d, 0x00a3f, 1 },
+	{ GEN7_CP_PROTECT_REG + 12, 0x00a41, 0x00bff, 1 },
+	{ GEN7_CP_PROTECT_REG + 13, 0x00df0, 0x00df1, 1 },
+	{ GEN7_CP_PROTECT_REG + 14, 0x00e01, 0x00e01, 1 },
+	{ GEN7_CP_PROTECT_REG + 15, 0x00e07, 0x00e0f, 1 },
+	{ GEN7_CP_PROTECT_REG + 16, 0x03c00, 0x03cc3, 1 },
+	{ GEN7_CP_PROTECT_REG + 17, 0x03cc4, 0x05cc3, 0 },
+	{ GEN7_CP_PROTECT_REG + 18, 0x08630, 0x087ff, 1 },
+	{ GEN7_CP_PROTECT_REG + 19, 0x08e00, 0x08e00, 1 },
+	{ GEN7_CP_PROTECT_REG + 20, 0x08e08, 0x08e08, 1 },
+	{ GEN7_CP_PROTECT_REG + 21, 0x08e50, 0x08e6f, 1 },
+	{ GEN7_CP_PROTECT_REG + 22, 0x08e80, 0x09100, 1 },
+	{ GEN7_CP_PROTECT_REG + 23, 0x09624, 0x097ff, 1 },
+	{ GEN7_CP_PROTECT_REG + 24, 0x09e40, 0x09e40, 1 },
+	{ GEN7_CP_PROTECT_REG + 25, 0x09e64, 0x09e71, 1 },
+	{ GEN7_CP_PROTECT_REG + 26, 0x09e78, 0x09fff, 1 },
+	{ GEN7_CP_PROTECT_REG + 27, 0x0a630, 0x0a7ff, 1 },
+	{ GEN7_CP_PROTECT_REG + 28, 0x0ae02, 0x0ae02, 1 },
+	{ GEN7_CP_PROTECT_REG + 29, 0x0ae50, 0x0ae5f, 1 },
+	{ GEN7_CP_PROTECT_REG + 30, 0x0ae66, 0x0ae69, 1 },
+	{ GEN7_CP_PROTECT_REG + 31, 0x0ae6f, 0x0ae72, 1 },
+	{ GEN7_CP_PROTECT_REG + 32, 0x0b604, 0x0b607, 1 },
+	{ GEN7_CP_PROTECT_REG + 33, 0x0ec00, 0x0fbff, 1 },
+	{ GEN7_CP_PROTECT_REG + 34, 0x0fc00, 0x11bff, 0 },
+	{ GEN7_CP_PROTECT_REG + 35, 0x18400, 0x1844a, 1 },
+	{ GEN7_CP_PROTECT_REG + 36, 0x1844b, 0x1857f, 0 },
+	{ GEN7_CP_PROTECT_REG + 37, 0x1844c, 0x18453, 1 },
+	{ GEN7_CP_PROTECT_REG + 38, 0x18580, 0x1a57f, 1 },
+	{ GEN7_CP_PROTECT_REG + 39, 0x1a580, 0x1c57f, 1 },
+	{ GEN7_CP_PROTECT_REG + 40, 0x1c580, 0x1e57f, 1 },
+	{ GEN7_CP_PROTECT_REG + 41, 0x1f400, 0x1f843, 1 },
+	{ GEN7_CP_PROTECT_REG + 42, 0x1f844, 0x1f8bf, 0 },
+	{ GEN7_CP_PROTECT_REG + 43, 0x1f860, 0x1f860, 1 },
+	{ GEN7_CP_PROTECT_REG + 44, 0x1f87f, 0x1f8a2, 1 },
+	{ GEN7_CP_PROTECT_REG + 47, 0x1f8c0, 0x1f8c0, 1 },
+	{ 0 },
+};
+
+static const struct adreno_gen7_core adreno_gpu_core_gen7_0_0 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_GEN7_0_0,
+				UINT_MAX, UINT_MAX, UINT_MAX, 0),
+		.compatible = "qcom,adreno-gpu-gen7-0-0",
+		.features = ADRENO_APRIV | ADRENO_IOCOHERENT |
+				ADRENO_CONTENT_PROTECTION | ADRENO_IFPC |
+				ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL |
+				ADRENO_PREEMPTION,
+		.gpudev = &adreno_gen7_gmu_gpudev.base,
+		.perfcounters = &adreno_gen7_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_2M,
+		.bus_width = 32,
+		.snapshot_size = SZ_4M,
+	},
+	.gmu_fw_version = GMU_VERSION(4, 0, 0),
+	.sqefw_name = "a730_sqe.fw",
+	.gmufw_name = "gmu_gen70000.bin",
+	.gmufw_bak_name = "c500_gmu.bin",
+	.zap_name = "a730_zap.mdt",
+	.hwcg = gen7_0_0_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs),
+	.ao_hwcg = gen7_0_0_ao_hwcg_regs,
+	.ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs),
+	.gbif = gen7_0_0_gbif_regs,
+	.gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = gen7_0_0_protected_regs,
+	.highest_bank_bit = 16,
+	.gen7_snapshot_block_list = &gen7_0_0_snapshot_block_list,
+	.preempt_level = 1,
+	.ctxt_record_size = (2860 * SZ_1K),
+	.fast_bus_hint = true,
+};
+
+static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_GEN7_0_1,
+				UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID),
+		.compatible = "qcom,adreno-gpu-gen7-0-1",
+		.features = ADRENO_APRIV | ADRENO_IOCOHERENT |
+				ADRENO_CONTENT_PROTECTION | ADRENO_IFPC |
+				ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL |
+				ADRENO_PREEMPTION,
+		.gpudev = &adreno_gen7_gmu_gpudev.base,
+		.perfcounters = &adreno_gen7_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_2M,
+		.bus_width = 32,
+		.snapshot_size = SZ_4M,
+	},
+	.gmu_fw_version = GMU_VERSION(4, 0, 0),
+	.sqefw_name = "a730_sqe.fw",
+	.gmufw_name = "gmu_gen70000.bin",
+	.gmufw_bak_name = "c500_gmu.bin",
+	.zap_name = "a730_zap.mdt",
+	.hwcg = gen7_0_0_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs),
+	.ao_hwcg = gen7_0_0_ao_hwcg_regs,
+	.ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs),
+	.gbif = gen7_0_0_gbif_regs,
+	.gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = gen7_0_0_protected_regs,
+	.highest_bank_bit = 16,
+	.gen7_snapshot_block_list = &gen7_0_0_snapshot_block_list,
+	.preempt_level = 1,
+	.ctxt_record_size = (2860 * SZ_1K),
+	.fast_bus_hint = true,
+};
+
+extern const struct gen7_snapshot_block_list gen7_2_0_snapshot_block_list;
+
+static const struct kgsl_regmap_list gen7_2_0_gbif_regs[] = {
+	{ GEN7_GBIF_QSB_SIDE0, 0x00071620 },
+	{ GEN7_GBIF_QSB_SIDE1, 0x00071620 },
+	{ GEN7_GBIF_QSB_SIDE2, 0x00071620 },
+	{ GEN7_GBIF_QSB_SIDE3, 0x00071620 },
+	{ GEN7_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212 },
+	{ GEN7_GMU_CX_MRC_GBIF_QOS_CTRL, 0x33 },
+};
+
+static const struct kgsl_regmap_list gen7_2_0_hwcg_regs[] = {
+	{ GEN7_RBBM_CLOCK_CNTL_SP0, 0x02222222 },
+	{ GEN7_RBBM_CLOCK_CNTL2_SP0, 0x22022222 },
+	{ GEN7_RBBM_CLOCK_HYST_SP0, 0x003cf3cf },
+	{ GEN7_RBBM_CLOCK_DELAY_SP0, 0x00000080 },
+	{ GEN7_RBBM_CLOCK_CNTL_TP0, 0x22222220 },
+	{ GEN7_RBBM_CLOCK_CNTL2_TP0, 0x22222222 },
+	{ GEN7_RBBM_CLOCK_CNTL3_TP0, 0x22222222 },
+	{ GEN7_RBBM_CLOCK_CNTL4_TP0, 0x00222222 },
+	{ GEN7_RBBM_CLOCK_HYST_TP0, 0x77777777 },
+	{ GEN7_RBBM_CLOCK_HYST2_TP0, 0x77777777 },
+	{ GEN7_RBBM_CLOCK_HYST3_TP0, 0x77777777 },
+	{ GEN7_RBBM_CLOCK_HYST4_TP0, 0x00077777 },
+	{ GEN7_RBBM_CLOCK_DELAY_TP0, 0x11111111 },
+	{ GEN7_RBBM_CLOCK_DELAY2_TP0, 0x11111111 },
+	{ GEN7_RBBM_CLOCK_DELAY3_TP0, 0x11111111 },
+	{ GEN7_RBBM_CLOCK_DELAY4_TP0, 0x00011111 },
+	{ GEN7_RBBM_CLOCK_CNTL_UCHE, 0x22222222 },
+	{ GEN7_RBBM_CLOCK_CNTL2_UCHE, 0x00222222 },
+	{ GEN7_RBBM_CLOCK_HYST_UCHE, 0x00000444 },
+	{ GEN7_RBBM_CLOCK_DELAY_UCHE, 0x00000222 },
+	{ GEN7_RBBM_CLOCK_CNTL_RB0, 0x22222222 },
+	{ GEN7_RBBM_CLOCK_CNTL2_RB0, 0x01002222 },
+	{ GEN7_RBBM_CLOCK_CNTL_CCU0, 0x00002220 },
+	{ GEN7_RBBM_CLOCK_HYST_RB_CCU0, 0x44000f00 },
+	{ GEN7_RBBM_CLOCK_CNTL_RAC, 0x25222022 },
+	{ GEN7_RBBM_CLOCK_CNTL2_RAC, 0x00555555 },
+	{ GEN7_RBBM_CLOCK_DELAY_RAC, 0x00000011 },
+	{ GEN7_RBBM_CLOCK_HYST_RAC, 0x00440044 },
+	{ GEN7_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222 },
+	{ GEN7_RBBM_CLOCK_MODE2_GRAS, 0x00000222 },
+	{ GEN7_RBBM_CLOCK_MODE_BV_GRAS, 0x00222222 },
+	{ GEN7_RBBM_CLOCK_MODE_GPC, 0x02222223 },
+	{ GEN7_RBBM_CLOCK_MODE_VFD, 0x00222222 },
+	{ GEN7_RBBM_CLOCK_MODE_BV_GPC, 0x00222222 },
+	{ GEN7_RBBM_CLOCK_MODE_BV_VFD, 0x00002222 },
+	{ GEN7_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000 },
+	{ GEN7_RBBM_CLOCK_HYST_GPC, 0x04104004 },
+	{ GEN7_RBBM_CLOCK_HYST_VFD, 0x00000000 },
+	{ GEN7_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000000 },
+	{ GEN7_RBBM_CLOCK_DELAY_GPC, 0x00000200 },
+	{ GEN7_RBBM_CLOCK_DELAY_VFD, 0x00000000 },
+	{ GEN7_RBBM_CLOCK_MODE_HLSQ, 0x00002222 },
+	{ GEN7_RBBM_CLOCK_DELAY_HLSQ, 0x00000000 },
+	{ GEN7_RBBM_CLOCK_HYST_HLSQ, 0x00000000 },
+	{ GEN7_RBBM_CLOCK_MODE_BV_LRZ, 0x55555552 },
+	{ GEN7_RBBM_CLOCK_HYST2_VFD, 0x00000000 },
+	{ GEN7_RBBM_CLOCK_MODE_CP, 0x00000222 },
+	{ GEN7_RBBM_CLOCK_CNTL, 0x8aa8aa82 },
+	{ GEN7_RBBM_ISDB_CNT, 0x00000182 },
+	{ GEN7_RBBM_RAC_THRESHOLD_CNT, 0x00000000 },
+	{ GEN7_RBBM_SP_HYST_CNT, 0x00000000 },
+	{ GEN7_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222 },
+	{ GEN7_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111 },
+	{ GEN7_RBBM_CLOCK_HYST_GMU_GX, 0x00000555 },
+};
+
+static const struct kgsl_regmap_list gen7_2_0_ao_hwcg_regs[] = {
+	{ GEN7_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 0x00020202 },
+	{ GEN7_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 0x00010111 },
+	{ GEN7_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 0x00005555 },
+};
+
+static const struct adreno_gen7_core adreno_gpu_core_gen7_2_0 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_GEN7_2_0,
+				UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID),
+		.compatible = "qcom,adreno-gpu-gen7-2-0",
+		.features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_IFPC |
+				ADRENO_CONTENT_PROTECTION | ADRENO_ACD |
+				ADRENO_LPAC | ADRENO_BCL | ADRENO_L3_VOTE |
+				ADRENO_PREEMPTION | ADRENO_DMS,
+		.gpudev = &adreno_gen7_hwsched_gpudev.base,
+		.perfcounters = &adreno_gen7_hwsched_perfcounters,
+		.uche_gmem_alignment = SZ_16M,
+		.gmem_size = 3 * SZ_1M,
+		.bus_width = 32,
+		.snapshot_size = SZ_8M,
+	},
+	.gmu_fw_version = GMU_VERSION(4, 1, 0),
+	.sqefw_name = "a740_sqe.fw",
+	.gmufw_name = "gmu_gen70200.bin",
+	.zap_name = "a740_zap.mbn",
+	.hwcg = gen7_2_0_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs),
+	.ao_hwcg = gen7_2_0_ao_hwcg_regs,
+	.ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs),
+	.gbif = gen7_2_0_gbif_regs,
+	.gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = gen7_0_0_protected_regs,
+	.highest_bank_bit = 16,
+	.gmu_hub_clk_freq = 200000000,
+	.gen7_snapshot_block_list = &gen7_2_0_snapshot_block_list,
+	.bcl_data = 1,
+	.preempt_level = 1,
+	.ctxt_record_size = (4192 * SZ_1K),
+	.fast_bus_hint = true,
+};
+
+static const struct adreno_gen7_core adreno_gpu_core_gen7_2_1 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_GEN7_2_1,
+				UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID),
+		.compatible = "qcom,adreno-gpu-gen7-2-1",
+		.features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_IFPC |
+				ADRENO_CONTENT_PROTECTION | ADRENO_LPAC |
+				ADRENO_BCL | ADRENO_L3_VOTE | ADRENO_ACD |
+				ADRENO_PREEMPTION | ADRENO_DMS,
+		.gpudev = &adreno_gen7_hwsched_gpudev.base,
+		.perfcounters = &adreno_gen7_hwsched_perfcounters,
+		.uche_gmem_alignment = SZ_16M,
+		.gmem_size = 3 * SZ_1M,
+		.bus_width = 32,
+		.snapshot_size = SZ_8M,
+	},
+	.gmu_fw_version = GMU_VERSION(4, 1, 0),
+	.sqefw_name = "a740_sqe.fw",
+	.gmufw_name = "gmu_gen70200.bin",
+	.zap_name = "a740_zap.mbn",
+	.hwcg = gen7_2_0_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs),
+	.ao_hwcg = gen7_2_0_ao_hwcg_regs,
+	.ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs),
+	.gbif = gen7_2_0_gbif_regs,
+	.gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = gen7_0_0_protected_regs,
+	.highest_bank_bit = 16,
+	.gmu_hub_clk_freq = 200000000,
+	.gen7_snapshot_block_list = &gen7_2_0_snapshot_block_list,
+	.bcl_data = 1,
+	.preempt_level = 1,
+	.ctxt_record_size = (4192 * SZ_1K),
+	.fast_bus_hint = true,
+};
+
+static const struct adreno_gen7_core adreno_gpu_core_gen7_4_0 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_GEN7_4_0,
+				UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID),
+		.compatible = "qcom,adreno-gpu-gen7-4-0",
+		.features = ADRENO_APRIV | ADRENO_IOCOHERENT |
+				ADRENO_CONTENT_PROTECTION | ADRENO_L3_VOTE |
+				ADRENO_PREEMPTION | ADRENO_IFPC | ADRENO_ACD |
+				ADRENO_BCL,
+		.gpudev = &adreno_gen7_gmu_gpudev.base,
+		.perfcounters = &adreno_gen7_perfcounters,
+		.uche_gmem_alignment = 0,
+		.gmem_size = SZ_2M,
+		.bus_width = 32,
+		.snapshot_size = SZ_4M,
+	},
+	.gmu_fw_version = GMU_VERSION(4, 0, 7),
+	.sqefw_name = "a730_sqe.fw",
+	.gmufw_name = "gmu_gen70000.bin",
+	.gmufw_bak_name = "c500_gmu.bin",
+	.zap_name = "a730_zap.mdt",
+	.hwcg = gen7_0_0_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs),
+	.ao_hwcg = gen7_0_0_ao_hwcg_regs,
+	.ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs),
+	.gbif = gen7_0_0_gbif_regs,
+	.gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = gen7_0_0_protected_regs,
+	.highest_bank_bit = 16,
+	.gen7_snapshot_block_list = &gen7_0_0_snapshot_block_list,
+	.preempt_level = 1,
+	.ctxt_record_size = (2860 * SZ_1K),
+	.fast_bus_hint = true,
+};
+
+extern const struct gen7_snapshot_block_list gen7_9_0_snapshot_block_list;
+
+/* GEN7_9_0 protected register list */
+static const struct gen7_protected_regs gen7_9_0_protected_regs[] = {
+	{ GEN7_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 },
+	{ GEN7_CP_PROTECT_REG + 1, 0x0050b, 0x00563, 0 },
+	{ GEN7_CP_PROTECT_REG + 2, 0x00584, 0x006c1, 0 },
+	{ GEN7_CP_PROTECT_REG + 3, 0x00706, 0x00706, 0 },
+	{ GEN7_CP_PROTECT_REG + 4, 0x00720, 0x0073f, 0 },
+	{ GEN7_CP_PROTECT_REG + 5, 0x00760, 0x007ff, 0 },
+	{ GEN7_CP_PROTECT_REG + 6, 0x00800, 0x00882, 1 },
+	{ GEN7_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 },
+	{ GEN7_CP_PROTECT_REG + 8, 0x008ab, 0x00a40, 0 },
+	{ GEN7_CP_PROTECT_REG + 9, 0x00900, 0x0094d, 1 },
+	{ GEN7_CP_PROTECT_REG + 10, 0x0098d, 0x00a3f, 1 },
+	{ GEN7_CP_PROTECT_REG + 11, 0x00a41, 0x00bff, 1 },
+	{ GEN7_CP_PROTECT_REG + 12, 0x00df0, 0x00df1, 1 },
+	{ GEN7_CP_PROTECT_REG + 13, 0x00e01, 0x00e01, 1 },
+	{ GEN7_CP_PROTECT_REG + 14, 0x00e07, 0x00e0f, 1 },
+	{ GEN7_CP_PROTECT_REG + 15, 0x02840, 0x03cc3, 1 },
+	{ GEN7_CP_PROTECT_REG + 16, 0x03cc4, 0x05cc3, 0 },
+	{ GEN7_CP_PROTECT_REG + 17, 0x08630, 0x087ff, 1 },
+	{ GEN7_CP_PROTECT_REG + 18, 0x08e00, 0x08e00, 1 },
+	{ GEN7_CP_PROTECT_REG + 19, 0x08e08, 0x08e08, 1 },
+	{ GEN7_CP_PROTECT_REG + 20, 0x08e50, 0x08e6f, 1 },
+	{ GEN7_CP_PROTECT_REG + 21, 0x08e79, 0x09100, 1 },
+	{ GEN7_CP_PROTECT_REG + 22, 0x09624, 0x097ff, 1 },
+	{ GEN7_CP_PROTECT_REG + 23, 0x09b0b, 0x09dff, 0 },
+	{ GEN7_CP_PROTECT_REG + 24, 0x09e1a, 0x09e1b, 1 },
+	{ GEN7_CP_PROTECT_REG + 25, 0x09e40, 0x09e40, 1 },
+	{ GEN7_CP_PROTECT_REG + 26, 0x09e64, 0x09e64, 1 },
+	{ GEN7_CP_PROTECT_REG + 27, 0x09e70, 0x09e71, 1 },
+	{ GEN7_CP_PROTECT_REG + 28, 0x09e78, 0x09fff, 1 },
+	{ GEN7_CP_PROTECT_REG + 29, 0x0a630, 0x0a7ff, 1 },
+	{ GEN7_CP_PROTECT_REG + 30, 0x0ae02, 0x0ae02, 1 },
+	{ GEN7_CP_PROTECT_REG + 31, 0x0ae50, 0x0ae5f, 1 },
+	{ GEN7_CP_PROTECT_REG + 32, 0x0ae66, 0x0ae69, 1 },
+	{ GEN7_CP_PROTECT_REG + 33, 0x0ae6f, 0x0ae72, 1 },
+	{ GEN7_CP_PROTECT_REG + 34, 0x0b602, 0x0b607, 1 },
+	{ GEN7_CP_PROTECT_REG + 35, 0x0ec00, 0x0fbff, 1 },
+	{ GEN7_CP_PROTECT_REG + 36, 0x0fc00, 0x11bff, 0 },
+	{ GEN7_CP_PROTECT_REG + 37, 0x18400, 0x1857f, 0 },
+	{ GEN7_CP_PROTECT_REG + 38, 0x18580, 0x1a57f, 1 },
+	{ GEN7_CP_PROTECT_REG + 39, 0x1a580, 0x1c57f, 1 },
+	{ GEN7_CP_PROTECT_REG + 40, 0x1c580, 0x1e57f, 1 },
+	{ GEN7_CP_PROTECT_REG + 41, 0x1f400, 0x1f843, 1 },
+	{ GEN7_CP_PROTECT_REG + 42, 0x1f844, 0x1f8b7, 0 },
+	{ GEN7_CP_PROTECT_REG + 43, 0x1f87f, 0x1f8a2, 1 },
+	{ GEN7_CP_PROTECT_REG + 44, 0x1f8b8, 0x218b7, 1 },
+	{ GEN7_CP_PROTECT_REG + 45, 0x27800, 0x2787f, 1 },
+	{ GEN7_CP_PROTECT_REG + 46, 0x27880, 0x27c01, 0 },
+	{ GEN7_CP_PROTECT_REG + 47, 0x27c02, 0x27c02, 1 },
+	{ 0 },
+};
+
+static const struct adreno_gen7_core adreno_gpu_core_gen7_9_0 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_GEN7_9_0,
+				  UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID),
+		.compatible = "qcom,adreno-gpu-gen7-9-0",
+		.features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_AQE |
+			ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_IFPC |
+			ADRENO_L3_VOTE | ADRENO_BCL | ADRENO_DMS |
+			ADRENO_HW_FENCE | ADRENO_PREEMPTION | ADRENO_ACD |
+			ADRENO_GMU_WARMBOOT,
+		.gpudev = &adreno_gen7_9_0_hwsched_gpudev.base,
+		.perfcounters = &adreno_gen7_9_0_hwsched_perfcounters,
+		.uche_gmem_alignment = SZ_16M,
+		.gmem_size = 3 * SZ_1M,
+		.bus_width = 32,
+		.snapshot_size = SZ_8M,
+		.num_ddr_channels = 4,
+	},
+	.aqefw_name = "gen70900_aqe.fw",
+	.sqefw_name = "gen70900_sqe.fw",
+	.gmufw_name = "gmu_gen70900.bin",
+	.zap_name = "gen70900_zap.mbn",
+	.ao_hwcg = gen7_2_0_ao_hwcg_regs,
+	.ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs),
+	.gbif = gen7_2_0_gbif_regs,
+	.gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = gen7_9_0_protected_regs,
+	.highest_bank_bit = 16,
+	.gmu_hub_clk_freq = 200000000,
+	.gen7_snapshot_block_list = &gen7_9_0_snapshot_block_list,
+	.bcl_data = 1,
+	.acv_perfmode_vote = BIT(2),
+	.acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4),
+	.ctxt_record_size = (4208 * SZ_1K),
+	.preempt_level = 1,
+	.fast_bus_hint = true,
+};
+
+static const struct adreno_gen7_core adreno_gpu_core_gen7_9_1 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_GEN7_9_1,
+				  UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID),
+		.compatible = "qcom,adreno-gpu-gen7-9-1",
+		.features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_AQE |
+			ADRENO_CONTENT_PROTECTION | ADRENO_LPAC | ADRENO_IFPC |
+			ADRENO_L3_VOTE | ADRENO_BCL | ADRENO_DMS |
+			ADRENO_HW_FENCE | ADRENO_PREEMPTION | ADRENO_ACD |
+			ADRENO_GMU_WARMBOOT,
+		.gpudev = &adreno_gen7_9_0_hwsched_gpudev.base,
+		.perfcounters = &adreno_gen7_9_0_hwsched_perfcounters,
+		.uche_gmem_alignment = SZ_16M,
+		.gmem_size = 3 * SZ_1M,
+		.bus_width = 32,
+		.snapshot_size = SZ_8M,
+		.num_ddr_channels = 4,
+	},
+	.aqefw_name = "gen70900_aqe.fw",
+	.sqefw_name = "gen70900_sqe.fw",
+	.gmufw_name = "gmu_gen70900.bin",
+	.zap_name = "gen70900_zap.mbn",
+	.ao_hwcg = gen7_2_0_ao_hwcg_regs,
+	.ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs),
+	.gbif = gen7_0_0_gbif_regs,
+	.gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = gen7_9_0_protected_regs,
+	.highest_bank_bit = 16,
+	.gmu_hub_clk_freq = 200000000,
+	.gen7_snapshot_block_list = &gen7_9_0_snapshot_block_list,
+	.bcl_data = 1,
+	.acv_perfmode_vote = BIT(2),
+	.acv_perfmode_ddr_freq = MHZ_TO_KBPS(2736, 4),
+	.ctxt_record_size = (4208 * SZ_1K),
+	.preempt_level = 1,
+	.fast_bus_hint = true,
+};
+
+extern const struct gen7_snapshot_block_list gen7_11_0_snapshot_block_list;
+
+static const struct adreno_gen7_core adreno_gpu_core_gen7_11_0 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_GEN7_11_0,
+				  UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID),
+		.compatible = "qcom,adreno-gpu-gen7-11-0",
+		.features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION |
+			ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_L3_VOTE |
+			ADRENO_DMS | ADRENO_BCL,
+		.gpudev = &adreno_gen7_hwsched_gpudev.base,
+		.perfcounters = &adreno_gen7_hwsched_perfcounters,
+		.uche_gmem_alignment = SZ_16M,
+		.gmem_size = SZ_1M + SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = SZ_4M,
+		.num_ddr_channels = 4,
+	},
+	.sqefw_name = "gen71100_sqe.fw",
+	.gmufw_name = "gen71100_gmu.bin",
+	.zap_name = "gen71100_zap.mbn",
+	.hwcg = gen7_2_0_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs),
+	.ao_hwcg = gen7_2_0_ao_hwcg_regs,
+	.ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs),
+	.gbif = gen7_2_0_gbif_regs,
+	.gbif_count = ARRAY_SIZE(gen7_2_0_gbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = gen7_0_0_protected_regs,
+	.highest_bank_bit = 16,
+	.gmu_hub_clk_freq = 200000000,
+	.gen7_snapshot_block_list = &gen7_11_0_snapshot_block_list,
+	.preempt_level = 1,
+	.acv_perfmode_vote = BIT(2),
+	.bcl_data = 1,
+	.fast_bus_hint = true,
+	.ctxt_record_size = (2196 * SZ_1K),
+};
+
+static const struct kgsl_regmap_list a663_hwcg_regs[] = {
+	{A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
+	{A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
+	{A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
+	{A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
+	{A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
+	{A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
+	{A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
+	{A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
+	{A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
+	{A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
+	{A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
+	{A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
+	{A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
+	{A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+	{A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
+	{A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A6XX_RBBM_CLOCK_CNTL, 0x8AA8AA82},
+	{A6XX_RBBM_ISDB_CNT, 0x00000182},
+	{A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
+	{A6XX_RBBM_SP_HYST_CNT, 0x00000000},
+	{A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
+	{A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
+	{A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
+	{A6XX_GMUAO_GMU_CGC_MODE_CNTL, 0x00020200},
+	{A6XX_GMUAO_GMU_CGC_DELAY_CNTL, 0x00010111},
+	{A6XX_GMUAO_GMU_CGC_HYST_CNTL, 0x00005555},
+	{A6XX_GMUCX_GMU_WFI_CONFIG, 0x00000000},
+};
+
+/* A633 protected register list */
+static const struct adreno_protected_regs a663_protected_regs[] = {
+	{ A6XX_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 },
+	{ A6XX_CP_PROTECT_REG + 1, 0x00501, 0x00506, 0 },
+	{ A6XX_CP_PROTECT_REG + 2, 0x0050b, 0x007ff, 0 },
+	{ A6XX_CP_PROTECT_REG + 3, 0x0050e, 0x0050e, 1 },
+	{ A6XX_CP_PROTECT_REG + 4, 0x00510, 0x00510, 1 },
+	{ A6XX_CP_PROTECT_REG + 5, 0x00534, 0x00534, 1 },
+	{ A6XX_CP_PROTECT_REG + 6, 0x00800, 0x00882, 1 },
+	{ A6XX_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 },
+	{ A6XX_CP_PROTECT_REG + 8, 0x008ab, 0x008cf, 1 },
+	{ A6XX_CP_PROTECT_REG + 9, 0x008d0, 0x0098c, 0 },
+	{ A6XX_CP_PROTECT_REG + 10, 0x00900, 0x0094d, 1 },
+	{ A6XX_CP_PROTECT_REG + 11, 0x0098d, 0x00bff, 1 },
+	{ A6XX_CP_PROTECT_REG + 12, 0x00e00, 0x00e01, 1 },
+	{ A6XX_CP_PROTECT_REG + 13, 0x00e03, 0x00e0f, 1 },
+	{ A6XX_CP_PROTECT_REG + 14, 0x03c00, 0x03cc3, 1 },
+	{ A6XX_CP_PROTECT_REG + 15, 0x03cc4, 0x05cc3, 0 },
+	{ A6XX_CP_PROTECT_REG + 16, 0x08630, 0x087ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 17, 0x08e00, 0x08e00, 1 },
+	{ A6XX_CP_PROTECT_REG + 18, 0x08e08, 0x08e08, 1 },
+	{ A6XX_CP_PROTECT_REG + 19, 0x08e50, 0x08e6f, 1 },
+	{ A6XX_CP_PROTECT_REG + 20, 0x08e80, 0x090ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 21, 0x09624, 0x097ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 22, 0x09e60, 0x09e71, 1 },
+	{ A6XX_CP_PROTECT_REG + 23, 0x09e78, 0x09fff, 1 },
+	{ A6XX_CP_PROTECT_REG + 24, 0x0a630, 0x0a7ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 25, 0x0ae02, 0x0ae02, 1 },
+	{ A6XX_CP_PROTECT_REG + 26, 0x0ae50, 0x0af7f, 1 },
+	{ A6XX_CP_PROTECT_REG + 27, 0x0b604, 0x0b604, 1 },
+	{ A6XX_CP_PROTECT_REG + 28, 0x0b608, 0x0b60e, 1 },
+	{ A6XX_CP_PROTECT_REG + 29, 0x0be02, 0x0be03, 1 },
+	{ A6XX_CP_PROTECT_REG + 30, 0x0be20, 0x0bf7f, 1 },
+	{ A6XX_CP_PROTECT_REG + 31, 0x0d000, 0x0d5ff, 1 },
+	{ A6XX_CP_PROTECT_REG + 32, 0x0f000, 0x0fbff, 1 },
+	{ A6XX_CP_PROTECT_REG + 33, 0x0fc00, 0x11bff, 0 },
+	/* Note1:  lastspanunbound feature is enabled in
+	 *         CP_PROTECT_CNTL and hence this last
+	 *         protect register(REG_47) has infinite
+	 *         span.
+	 *
+	 * Note2:  Although we are protecting the SMMU
+	 *         range here the CP register protection
+	 *         interrupt will not fire for this range
+	 *         as GPU RAP can only cover the GPU 18-bit
+	 *         DW address space.  So max address offset
+	 *         is 0x3FFFF.  Also note that the max number
+	 *         of bits for address in violation in
+	 *         CP_PROT_STATUS is only 18.
+	 */
+	{ A6XX_CP_PROTECT_REG + 47, 0x11c00, 0x00000, 1 },
+	{ 0 },
+};
+
+static const struct adreno_a6xx_core adreno_gpu_core_a663 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_A663, 6, 6, 3, ANY_ID),
+		.features = ADRENO_APRIV |
+				ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION |
+				ADRENO_PREEMPTION | ADRENO_ACD,
+		.gpudev = &adreno_a6xx_gmu_gpudev.base,
+		.perfcounters = &adreno_a6xx_perfcounters,
+		.gmem_size = SZ_1M + SZ_512K,
+		.bus_width = 32,
+		.snapshot_size = SZ_2M,
+	},
+	.prim_fifo_threshold = 0x00300000,
+	.gmu_major = 2,
+	.gmu_minor = 0,
+	.sqefw_name = "a660_sqe.fw",
+	.gmufw_name = "a663_gmu.bin",
+	.zap_name = "a663_zap.mdt",
+	.hwcg = a663_hwcg_regs,
+	.hwcg_count = ARRAY_SIZE(a663_hwcg_regs),
+	.vbif = a650_gbif_regs,
+	.vbif_count = ARRAY_SIZE(a650_gbif_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.veto_fal10 = true,
+	.protected_regs = a663_protected_regs,
+	.disable_tseskip = true,
+	.highest_bank_bit = 13,
+	.pdc_in_aop = true,
+	.ctxt_record_size = 2496 * 1024,
+};
+
+extern const struct gen8_snapshot_block_list gen8_3_0_snapshot_block_list;
+
+static const struct kgsl_regmap_list gen8_3_0_gbif_cx_regs[] = {
+	{ GEN8_GBIF_QSB_SIDE0, 0x00071e20 },
+	{ GEN8_GBIF_QSB_SIDE1, 0x00071e20 },
+	{ GEN8_GBIF_QSB_SIDE2, 0x00071e20 },
+	{ GEN8_GBIF_QSB_SIDE3, 0x00071e20 },
+	{ GEN8_GBIF_CX_CONFIG, 0x20023000 },
+};
+
+/* GEN8_3_0 noncontext register list */
+static const struct gen8_nonctxt_regs gen8_3_0_nonctxt_regs[] = {
+	{ GEN8_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) },
+	{ GEN8_GRAS_DBG_ECO_CNTL, 0x00f80800, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_PC_AUTO_VERTEX_STRIDE, 0x00000001, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_PC_VIS_STREAM_CNTL, 0x10010000, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1, 0x00000002, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_PC_CHICKEN_BITS_1, 0x00000003, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_PC_CHICKEN_BITS_2, 0x00000200, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_PC_CHICKEN_BITS_3, 0x00500000, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_PC_CHICKEN_BITS_4, 0x00500050, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_RB_CCU_CNTL, 0x00000068, BIT(PIPE_BR) },
+	{ GEN8_RB_RESOLVE_PREFETCH_CNTL, 0x00000007, BIT(PIPE_BR) },
+	{ GEN8_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) },
+	{ GEN8_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) },
+	{ GEN8_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) },
+	{ GEN8_RBBM_WAIT_IDLE_CLOCKS_CNTL, 0x00000030, BIT(PIPE_NONE) },
+	{ GEN8_RBBM_WAIT_IDLE_CLOCKS_CNTL2, 0x00000030, BIT(PIPE_NONE) },
+	{ GEN8_UCHE_GBIF_GX_CONFIG, 0x010240e0, BIT(PIPE_NONE) },
+	{ GEN8_RBBM_GBIF_CLIENT_QOS_CNTL, 0x22122212, BIT(PIPE_NONE) },
+	{ GEN8_RBBM_CGC_P2S_CNTL, 0x00000040, BIT(PIPE_NONE) },
+	/*
+	 * BIT(22): Disable PS out of order retire
+	 * BIT(23): Enable half wave mode and MM instruction src&dst is half precision
+	 */
+	{ GEN8_SP_CHICKEN_BITS_2, BIT(22) | BIT(23), BIT(PIPE_NONE) },
+	{ GEN8_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) },
+	{ GEN8_SP_PERFCTR_SHADER_MASK, 0x0000003f, BIT(PIPE_NONE) },
+	{ GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) },
+	{ GEN8_SP_READ_SEL, 0x0001ff00, BIT(PIPE_NONE) },
+	{ GEN8_TPL1_DBG_ECO_CNTL, 0x10000000, BIT(PIPE_NONE) },
+	{ GEN8_TPL1_DBG_ECO_CNTL1, 0x00000724, BIT(PIPE_NONE) },
+	{ GEN8_UCHE_MODE_CNTL, 0x00020000, BIT(PIPE_NONE) },
+	{ GEN8_UCHE_CCHE_MODE_CNTL, 0x00001000, BIT(PIPE_NONE) },
+	{ GEN8_UCHE_CCHE_CACHE_WAYS, 0x00000800, BIT(PIPE_NONE) },
+	{ GEN8_UCHE_CACHE_WAYS, 0x00080000, BIT(PIPE_NONE) },
+	{ GEN8_UCHE_VARB_IDLE_TIMEOUT, 0x00000020, BIT(PIPE_NONE) },
+	{ GEN8_VFD_DBG_ECO_CNTL, 0x00008000, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_VFD_CB_BV_THRESHOLD, 0x00500050, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_VFD_CB_BR_THRESHOLD, 0x00600060, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_VFD_CB_BUSY_REQ_CNT, 0x00200020, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_VFD_CB_LP_REQ_CNT, 0x00100020, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_VPC_FLATSHADE_MODE_CNTL, 0x00000001, BIT(PIPE_BV) | BIT(PIPE_BR) },
+	{ GEN8_VSC_BIN_SIZE, 0x00010001, BIT(PIPE_NONE) },
+	{ GEN8_RB_GC_GMEM_PROTECT, 0x00900000, BIT(PIPE_BR) },
+	{ 0 },
+};
+
+static const struct kgsl_regmap_list gen8_ao_hwcg_regs[] = {
+	{ GEN8_GMUAO_CGC_MODE_CNTL, 0x00020000 },
+	{ GEN8_GMUAO_CGC_DELAY_CNTL, 0x00010111 },
+	{ GEN8_GMUAO_CGC_HYST_CNTL, 0x00005555 },
+};
+
+/* GEN8_3_0 protected register list */
+static const struct gen8_protected_regs gen8_3_0_protected_regs[] = {
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 0, 0x00000, 0x003a3, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 1, 0x003b4, 0x0043f, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 2, 0x00440, 0x0045f, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 3, 0x00580, 0x005df, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 4, 0x005e0, 0x006ff, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 5, 0x0074a, 0x0074f, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 6, 0x00759, 0x0077f, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 7, 0x00789, 0x00789, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 8, 0x0078c, 0x0079f, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 9, 0x00800, 0x00829, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 10, 0x00837, 0x008e6, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 11, 0x008e7, 0x009b0, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 12, 0x008ec, 0x009af, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 13, 0x009b1, 0x00c01, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 14, 0x00ce0, 0x00ce1, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 15, 0x00df0, 0x00df0, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 16, 0x00df1, 0x00df1, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 17, 0x00e01, 0x00e01, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 18, 0x00e03, 0x02e02, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 19, 0x03c00, 0x03cc5, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 20, 0x03cc6, 0x05cc5, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 21, 0x08600, 0x087ff, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 22, 0x08e00, 0x08eff, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 23, 0x08f00, 0x08f00, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 24, 0x08f01, 0x090bf, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 25, 0x09600, 0x097ff, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 26, 0x0981a, 0x09aff, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 27, 0x09e00, 0x09fff, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 28, 0x0a600, 0x0a7ff, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 29, 0x0ae00, 0x0ae06, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 30, 0x0ae08, 0x0ae0e, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 31, 0x0ae10, 0x0b17f, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 32, 0x0b600, 0x0d5ff, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 33, 0x0dc00, 0x0fbff, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 34, 0x0fc00, 0x11bff, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 35, 0x18400, 0x1843f, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 36, 0x18440, 0x1857f, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 37, 0x18580, 0x1a57f, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 38, 0x1b400, 0x1d3ff, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 39, 0x1f400, 0x1f877, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 40, 0x1f878, 0x1ffff, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 41, 0x1f930, 0x1fc59, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 42, 0x20000, 0x21fff, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 43, 0x27800, 0x2787f, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 44, 0x27880, 0x27c01, 0 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 45, 0x27882, 0x27883, 1 },
+	{ GEN8_CP_PROTECT_REG_GLOBAL + 63, 0x27c02, 0x27c02, 1 },
+	{ 0 },
+};
+
+static const struct adreno_gen8_core adreno_gpu_core_gen8_3_0 = {
+	.base = {
+		DEFINE_ADRENO_REV(ADRENO_REV_GEN8_3_0,
+				  UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID),
+		.compatible = "qcom,adreno-gpu-gen8-3-0",
+		.features = ADRENO_APRIV | ADRENO_IOCOHERENT |
+			ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | ADRENO_BCL |
+			ADRENO_PREEMPTION | ADRENO_ACD,
+		.gpudev = &adreno_gen8_hwsched_gpudev.base,
+		.perfcounters = &adreno_gen8_perfcounters,
+		.uche_gmem_alignment = SZ_64M,
+		.gmem_size = (SZ_512K + SZ_64K),
+		.bus_width = 32,
+		.snapshot_size = SZ_8M,
+		.num_ddr_channels = 2,
+	},
+	.sqefw_name = "gen80300_sqe.fw",
+	.gmufw_name = "gen80300_gmu.bin",
+	.zap_name = "gen80300_zap.mbn",
+	.ao_hwcg = gen8_ao_hwcg_regs,
+	.ao_hwcg_count = ARRAY_SIZE(gen8_ao_hwcg_regs),
+	.gbif = gen8_3_0_gbif_cx_regs,
+	.gbif_count = ARRAY_SIZE(gen8_3_0_gbif_cx_regs),
+	.hang_detect_cycles = 0xcfffff,
+	.protected_regs = gen8_3_0_protected_regs,
+	.nonctxt_regs = gen8_3_0_nonctxt_regs,
+	.highest_bank_bit = 15,
+	.gmu_hub_clk_freq = 200000000,
+	.gen8_snapshot_block_list = &gen8_3_0_snapshot_block_list,
+	.ctxt_record_size = (4558 * SZ_1K),
+	.bcl_data = 1,
+	.noc_timeout_us = 6800, /* 6.8 msec */
+};
+
+static const struct adreno_gpu_core *adreno_gpulist[] = {
+	&adreno_gpu_core_a306.base,
+	&adreno_gpu_core_a306a.base,
+	&adreno_gpu_core_a304.base,
+	&adreno_gpu_core_a405,		/* Deprecated */
+	&adreno_gpu_core_a418,		/* Deprecated */
+	&adreno_gpu_core_a420,		/* Deprecated */
+	&adreno_gpu_core_a430,		/* Deprecated */
+	&adreno_gpu_core_a530v1,	/* Deprecated */
+	&adreno_gpu_core_a530v2.base,
+	&adreno_gpu_core_a530v3.base,
+	&adreno_gpu_core_a505.base,
+	&adreno_gpu_core_a506.base,
+	&adreno_gpu_core_a510.base,
+	&adreno_gpu_core_a540v1,	/* Deprecated */
+	&adreno_gpu_core_a540v2.base,
+	&adreno_gpu_core_a512.base,
+	&adreno_gpu_core_a508.base,
+	&adreno_gpu_core_a630v1,	/* Deprecated */
+	&adreno_gpu_core_a630v2.base,
+	&adreno_gpu_core_a615.base,
+	&adreno_gpu_core_a618.base,
+	&adreno_gpu_core_a619.base,
+	&adreno_gpu_core_a619_variant.base,
+	&adreno_gpu_core_a620.base,
+	&adreno_gpu_core_a621.base,
+	&adreno_gpu_core_a635.base,
+	&adreno_gpu_core_a640.base,
+	&adreno_gpu_core_a650.base,
+	&adreno_gpu_core_a650v2.base,
+	&adreno_gpu_core_a660.base,
+	&adreno_gpu_core_a660v2.base,
+	&adreno_gpu_core_a663.base,
+	&adreno_gpu_core_a680.base,
+	&adreno_gpu_core_a612.base,
+	&adreno_gpu_core_a616.base,
+	&adreno_gpu_core_a610.base,
+	&adreno_gpu_core_a611.base,
+	&adreno_gpu_core_a660_shima.base,
+	&adreno_gpu_core_a702.base,
+	&adreno_gpu_core_gen7_0_0.base,
+	&adreno_gpu_core_gen7_0_1.base,
+	&adreno_gpu_core_a662.base,
+	&adreno_gpu_core_gen7_2_0.base,
+	&adreno_gpu_core_gen7_2_1.base,
+	&adreno_gpu_core_gen7_4_0.base,
+	&adreno_gpu_core_gen7_9_0.base,
+	&adreno_gpu_core_gen7_9_1.base,
+	&adreno_gpu_core_gen7_11_0.base,
+	&adreno_gpu_core_gen8_3_0.base,
+
+};

+ 3837 - 0
qcom/opensource/graphics-kernel/adreno.c

@@ -0,0 +1,3837 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#include <linux/component.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/input.h>
+#include <linux/interconnect.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_fdt.h>
+#include <linux/module.h>
+#include <linux/msm_kgsl.h>
+#include <linux/regulator/consumer.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/reset.h>
+#include <linux/trace.h>
+#include <linux/units.h>
+#include <linux/version.h>
+#include <soc/qcom/dcvs.h>
+#include <soc/qcom/socinfo.h>
+#include <linux/suspend.h>
+
+#include "adreno.h"
+#include "adreno_a3xx.h"
+#include "adreno_a5xx.h"
+#include "adreno_a6xx.h"
+#include "adreno_compat.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+#include "kgsl_bus.h"
+#include "kgsl_reclaim.h"
+#include "kgsl_trace.h"
+#include "kgsl_util.h"
+
+/* Include the master list of GPU cores that are supported */
+#include "adreno-gpulist.h"
+
+static void adreno_unbind(struct device *dev);
+static void adreno_input_work(struct work_struct *work);
+static int adreno_soft_reset(struct kgsl_device *device);
+static unsigned int counter_delta(struct kgsl_device *device,
+	unsigned int reg, unsigned int *counter);
+static struct device_node *
+	adreno_get_gpu_model_node(struct platform_device *pdev);
+
+static struct adreno_device device_3d0;
+static bool adreno_preemption_enable;
+
+/* Nice level for the higher priority GPU start thread */
+int adreno_wake_nice = -7;
+
+/* Number of milliseconds to stay active after a wake on touch */
+unsigned int adreno_wake_timeout = 100;
+
+static u32 get_ucode_version(const u32 *data)
+{
+	u32 version;
+
+	version = data[1];
+
+	if ((version & 0xf) != 0xa)
+		return version;
+
+	version &= ~0xfff;
+	return  version | ((data[3] & 0xfff000) >> 12);
+}
+
+int adreno_get_firmware(struct adreno_device *adreno_dev,
+		const char *fwfile, struct adreno_firmware *firmware)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct firmware *fw = NULL;
+	int ret;
+
+	if (!IS_ERR_OR_NULL(firmware->memdesc))
+		return 0;
+
+	ret = request_firmware(&fw, fwfile, &device->pdev->dev);
+
+	if (ret) {
+		dev_err(device->dev, "request_firmware(%s) failed: %d\n",
+				fwfile, ret);
+		return ret;
+	}
+
+	firmware->memdesc = kgsl_allocate_global(device, fw->size - 4, 0,
+				KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_UCODE,
+				"ucode");
+
+	ret = PTR_ERR_OR_ZERO(firmware->memdesc);
+	if (!ret) {
+		memcpy(firmware->memdesc->hostptr, &fw->data[4], fw->size - 4);
+		firmware->size = (fw->size - 4) / sizeof(u32);
+		firmware->version = get_ucode_version((u32 *)fw->data);
+	}
+
+	release_firmware(fw);
+	return ret;
+}
+
+
+int adreno_zap_shader_load(struct adreno_device *adreno_dev,
+		const char *name)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (!name || adreno_dev->zap_loaded)
+		return 0;
+
+	ret = kgsl_zap_shader_load(&device->pdev->dev, name);
+	if (!ret)
+		adreno_dev->zap_loaded = true;
+
+	return ret;
+}
+
+#if (IS_ENABLED(CONFIG_QCOM_KGSL_HIBERNATION) || IS_ENABLED(CONFIG_DEEPSLEEP))
+static void adreno_zap_shader_unload(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (adreno_dev->zap_loaded) {
+		ret = kgsl_zap_shader_unload(&device->pdev->dev);
+		if (!ret)
+			adreno_dev->zap_loaded = false;
+	}
+}
+#endif
+
+/**
+ * adreno_readreg64() - Read a 64bit register by getting its offset from the
+ * offset array defined in gpudev node
+ * @adreno_dev: Pointer to the adreno device
+ * @lo:	lower 32bit register enum that is to be read
+ * @hi:	higher 32bit register enum that is to be read
+ * @val: 64 bit Register value read is placed here
+ */
+void adreno_readreg64(struct adreno_device *adreno_dev,
+		enum adreno_regs lo, enum adreno_regs hi, uint64_t *val)
+{
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int val_lo = 0, val_hi = 0;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (adreno_checkreg_off(adreno_dev, lo))
+		kgsl_regread(device, gpudev->reg_offsets[lo], &val_lo);
+	if (adreno_checkreg_off(adreno_dev, hi))
+		kgsl_regread(device, gpudev->reg_offsets[hi], &val_hi);
+
+	*val = (val_lo | ((uint64_t)val_hi << 32));
+}
+
+/**
+ * adreno_get_rptr() - Get the current ringbuffer read pointer
+ * @rb: Pointer the ringbuffer to query
+ *
+ * Get the latest rptr
+ */
+unsigned int adreno_get_rptr(struct adreno_ringbuffer *rb)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 rptr = 0;
+
+	if (adreno_is_a3xx(adreno_dev))
+		kgsl_regread(device, A3XX_CP_RB_RPTR, &rptr);
+	else
+		kgsl_sharedmem_readl(device->scratch, &rptr,
+				SCRATCH_RB_OFFSET(rb->id, rptr));
+
+	return rptr;
+}
+
+static void adreno_touch_wakeup(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/*
+	 * Don't schedule adreno_start in a high priority workqueue, we are
+	 * already in a workqueue which should be sufficient
+	 */
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+
+	/*
+	 * When waking up from a touch event we want to stay active long enough
+	 * for the user to send a draw command.  The default idle timer timeout
+	 * is shorter than we want so go ahead and push the idle timer out
+	 * further for this special case
+	 */
+	mod_timer(&device->idle_timer,
+		jiffies + msecs_to_jiffies(adreno_wake_timeout));
+
+}
+
+/*
+ * A workqueue callback responsible for actually turning on the GPU after a
+ * touch event. kgsl_pwrctrl_change_state(ACTIVE) is used without any
+ * active_count protection to avoid the need to maintain state.  Either
+ * somebody will start using the GPU or the idle timer will fire and put the
+ * GPU back into slumber.
+ */
+static void adreno_input_work(struct work_struct *work)
+{
+	struct adreno_device *adreno_dev = container_of(work,
+			struct adreno_device, input_work);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev);
+
+	mutex_lock(&device->mutex);
+
+	device->pwrctrl.wake_on_touch = true;
+
+	ops->touch_wakeup(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+/* Wake up the touch event kworker to initiate GPU wakeup */
+void adreno_touch_wake(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/*
+	 * Don't do anything if anything hasn't been rendered since we've been
+	 * here before
+	 */
+
+	if (device->pwrctrl.wake_on_touch)
+		return;
+
+	if (gmu_core_isenabled(device) || (device->state == KGSL_STATE_SLUMBER))
+		schedule_work(&adreno_dev->input_work);
+}
+
+/*
+ * Process input events and schedule work if needed.  At this point we are only
+ * interested in groking EV_ABS touchscreen events
+ */
+static void adreno_input_event(struct input_handle *handle, unsigned int type,
+		unsigned int code, int value)
+{
+	struct kgsl_device *device = handle->handler->private;
+
+	/* Only consider EV_ABS (touch) events */
+	if (type == EV_ABS)
+		adreno_touch_wake(device);
+}
+
+#ifdef CONFIG_INPUT
+static int adreno_input_connect(struct input_handler *handler,
+		struct input_dev *dev, const struct input_device_id *id)
+{
+	struct input_handle *handle;
+	int ret;
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (handle == NULL)
+		return -ENOMEM;
+
+	handle->dev = dev;
+	handle->handler = handler;
+	handle->name = handler->name;
+
+	ret = input_register_handle(handle);
+	if (ret) {
+		kfree(handle);
+		return ret;
+	}
+
+	ret = input_open_device(handle);
+	if (ret) {
+		input_unregister_handle(handle);
+		kfree(handle);
+	}
+
+	return ret;
+}
+
+static void adreno_input_disconnect(struct input_handle *handle)
+{
+	input_close_device(handle);
+	input_unregister_handle(handle);
+	kfree(handle);
+}
+#else
+static int adreno_input_connect(struct input_handler *handler,
+		struct input_dev *dev, const struct input_device_id *id)
+{
+	return 0;
+}
+static void adreno_input_disconnect(struct input_handle *handle) {}
+#endif
+
+/*
+ * We are only interested in EV_ABS events so only register handlers for those
+ * input devices that have EV_ABS events
+ */
+static const struct input_device_id adreno_input_ids[] = {
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
+		.evbit = { BIT_MASK(EV_ABS) },
+		/* assumption: MT_.._X & MT_.._Y are in the same long */
+		.absbit = { [BIT_WORD(ABS_MT_POSITION_X)] =
+				BIT_MASK(ABS_MT_POSITION_X) |
+				BIT_MASK(ABS_MT_POSITION_Y) },
+	},
+	{ },
+};
+
+static struct input_handler adreno_input_handler = {
+	.event = adreno_input_event,
+	.connect = adreno_input_connect,
+	.disconnect = adreno_input_disconnect,
+	.name = "kgsl",
+	.id_table = adreno_input_ids,
+};
+
+/*
+ * _soft_reset() - Soft reset GPU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * Soft reset the GPU by doing a AHB write of value 1 to RBBM_SW_RESET
+ * register. This is used when we want to reset the GPU without
+ * turning off GFX power rail. The reset when asserted resets
+ * all the HW logic, restores GPU registers to default state and
+ * flushes out pending VBIF transactions.
+ */
+static void _soft_reset(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, 1);
+	/*
+	 * Do a dummy read to get a brief read cycle delay for the
+	 * reset to take effect
+	 */
+	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, &reg);
+	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, 0);
+
+	/* The SP/TP regulator gets turned off after a soft reset */
+
+	clear_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, &adreno_dev->priv);
+	if (gpudev->regulator_enable)
+		gpudev->regulator_enable(adreno_dev);
+}
+
+/**
+ * adreno_irqctrl() - Enables/disables the RBBM interrupt mask
+ * @adreno_dev: Pointer to an adreno_device
+ * @state: 1 for masked or 0 for unmasked
+ * Power: The caller of this function must make sure to use OOBs
+ * so that we know that the GPU is powered on
+ */
+void adreno_irqctrl(struct adreno_device *adreno_dev, int state)
+{
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (!adreno_dev->irq_mask)
+		return;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK,
+		state ? adreno_dev->irq_mask : 0);
+
+	if (gpudev->swfuse_irqctrl)
+		gpudev->swfuse_irqctrl(adreno_dev, state);
+}
+
+/*
+ * adreno_hang_int_callback() - Isr for fatal interrupts that hang GPU
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit)
+{
+	dev_crit_ratelimited(KGSL_DEVICE(adreno_dev)->dev,
+				"MISC: GPU hang detected\n");
+	adreno_irqctrl(adreno_dev, 0);
+
+	/* Trigger a fault in the dispatcher - this will effect a restart */
+	adreno_dispatcher_fault(adreno_dev, ADRENO_HARD_FAULT);
+}
+
+/*
+ * adreno_cp_callback() - CP interrupt handler
+ * @adreno_dev: Adreno device pointer
+ * @irq: irq number
+ *
+ * Handle the cp interrupt generated by GPU.
+ */
+void adreno_cp_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	adreno_dispatcher_schedule(device);
+}
+
+static irqreturn_t adreno_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	irqreturn_t ret;
+
+	atomic_inc(&adreno_dev->pending_irq_refcnt);
+	/* Ensure this increment is done before the IRQ status is updated */
+	smp_mb__after_atomic();
+
+	ret = gpudev->irq_handler(adreno_dev);
+
+	/* Make sure the regwrites are done before the decrement */
+	smp_mb__before_atomic();
+	atomic_dec(&adreno_dev->pending_irq_refcnt);
+	/* Ensure other CPUs see the decrement */
+	smp_mb__after_atomic();
+
+	return ret;
+}
+
+static irqreturn_t adreno_freq_limiter_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+
+	KGSL_PWRCTRL_LOG_FREQLIM(device);
+
+	reset_control_reset(device->freq_limiter_irq_clear);
+
+	return IRQ_HANDLED;
+}
+
+irqreturn_t adreno_irq_callbacks(struct adreno_device *adreno_dev,
+		const struct adreno_irq_funcs *funcs, u32 status)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	irqreturn_t ret = IRQ_NONE;
+
+	/* Loop through all set interrupts and call respective handlers */
+	while (status) {
+		int i = fls(status) - 1;
+
+		if (funcs[i].func) {
+			if (adreno_dev->irq_mask & BIT(i))
+				funcs[i].func(adreno_dev, i);
+		} else
+			dev_crit_ratelimited(device->dev,
+				"Unhandled interrupt bit %x\n", i);
+
+		ret = IRQ_HANDLED;
+
+		status &= ~BIT(i);
+	}
+
+	return ret;
+}
+
+static int adreno_get_chipid(struct platform_device *pdev, u32 *chipid);
+
+static inline bool _rev_match(unsigned int id, unsigned int entry)
+{
+	return (entry == ANY_ID || entry == id);
+}
+
+static const struct adreno_gpu_core *
+_get_gpu_core(struct platform_device *pdev, u32 *chipid)
+{
+	int i;
+	struct device_node *node;
+
+	/*
+	 * When "qcom,gpu-models" is defined, use gpu model node to match
+	 * on a compatible string, otherwise match using legacy way.
+	 */
+	node = adreno_get_gpu_model_node(pdev);
+	if (!node || !of_find_property(node, "compatible", NULL))
+		node = pdev->dev.of_node;
+
+	*chipid = 0;
+
+	/* Check to see if any of the entries match on a compatible string */
+	for (i = 0; i < ARRAY_SIZE(adreno_gpulist); i++) {
+		if (adreno_gpulist[i]->compatible &&
+				of_device_is_compatible(node,
+					adreno_gpulist[i]->compatible)) {
+			/*
+			 * We matched compat string, set chipid based on
+			 * dtsi, else fail.
+			 */
+			if (!adreno_get_chipid(pdev, chipid))
+				return adreno_gpulist[i];
+
+			dev_crit(&pdev->dev,
+					"No chipid associated with %s\n",
+					adreno_gpulist[i]->compatible);
+			return NULL;
+		}
+	}
+
+	/* No compatible string so try and match on chipid */
+	if (!adreno_get_chipid(pdev, chipid)) {
+		unsigned int core = ADRENO_CHIPID_CORE(*chipid);
+		unsigned int major = ADRENO_CHIPID_MAJOR(*chipid);
+		unsigned int minor = ADRENO_CHIPID_MINOR(*chipid);
+		unsigned int patchid = ADRENO_CHIPID_PATCH(*chipid);
+
+		for (i = 0; i < ARRAY_SIZE(adreno_gpulist); i++) {
+			if (core == adreno_gpulist[i]->core &&
+				_rev_match(major, adreno_gpulist[i]->major) &&
+				_rev_match(minor, adreno_gpulist[i]->minor) &&
+				_rev_match(patchid, adreno_gpulist[i]->patchid))
+				return adreno_gpulist[i];
+		}
+	}
+
+	dev_crit(&pdev->dev, "Unknown GPU chip ID %8.8x\n", *chipid);
+	return NULL;
+}
+
+static struct {
+	unsigned int quirk;
+	const char *prop;
+} adreno_quirks[] = {
+	 { ADRENO_QUIRK_TWO_PASS_USE_WFI, "qcom,gpu-quirk-two-pass-use-wfi" },
+	 { ADRENO_QUIRK_CRITICAL_PACKETS, "qcom,gpu-quirk-critical-packets" },
+	 { ADRENO_QUIRK_FAULT_DETECT_MASK, "qcom,gpu-quirk-fault-detect-mask" },
+	 { ADRENO_QUIRK_DISABLE_RB_DP2CLOCKGATING,
+			"qcom,gpu-quirk-dp2clockgating-disable" },
+	 { ADRENO_QUIRK_DISABLE_LMLOADKILL,
+			"qcom,gpu-quirk-lmloadkill-disable" },
+	{ ADRENO_QUIRK_HFI_USE_REG, "qcom,gpu-quirk-hfi-use-reg" },
+	{ ADRENO_QUIRK_SECVID_SET_ONCE, "qcom,gpu-quirk-secvid-set-once" },
+	{ ADRENO_QUIRK_LIMIT_UCHE_GBIF_RW,
+			"qcom,gpu-quirk-limit-uche-gbif-rw" },
+	{ ADRENO_QUIRK_CX_GDSC, "qcom,gpu-quirk-cx-gdsc" },
+};
+
+static int adreno_get_chipid(struct platform_device *pdev, u32 *chipid)
+{
+	u32 id;
+
+	if (!of_property_read_u32(pdev->dev.of_node, "qcom,chipid", chipid))
+		return 0;
+
+	id = socinfo_get_partinfo_chip_id(SOCINFO_PART_GPU);
+	if (id)
+		*chipid = id;
+
+	return id ? 0 : -EINVAL;
+}
+
+static void
+adreno_update_soc_hw_revision_quirks(struct adreno_device *adreno_dev,
+		struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	int i;
+
+	/* update quirk */
+	for (i = 0; i < ARRAY_SIZE(adreno_quirks); i++) {
+		if (of_property_read_bool(node, adreno_quirks[i].prop))
+			adreno_dev->quirks |= adreno_quirks[i].quirk;
+	}
+}
+
+static const struct adreno_gpu_core *
+adreno_identify_gpu(struct platform_device *pdev, u32 *chipid)
+{
+	const struct adreno_gpu_core *gpucore;
+
+	gpucore = _get_gpu_core(pdev, chipid);
+	if (!gpucore)
+		return ERR_PTR(-ENODEV);
+
+	/*
+	 * Identify non-longer supported targets and spins and print a helpful
+	 * message
+	 */
+	if (gpucore->features & ADRENO_DEPRECATED) {
+		if (gpucore->compatible)
+			dev_err(&pdev->dev,
+				"Support for GPU %s has been deprecated\n",
+				gpucore->compatible);
+		else
+			dev_err(&pdev->dev,
+				"Support for GPU %x.%d.%x.%d has been deprecated\n",
+				gpucore->core, gpucore->major,
+				gpucore->minor, gpucore->patchid);
+		return ERR_PTR(-ENODEV);
+	}
+
+	return gpucore;
+}
+
+static const struct of_device_id adreno_match_table[] = {
+	{ .compatible = "qcom,kgsl-3d0", .data = &device_3d0 },
+	{ },
+};
+
+MODULE_DEVICE_TABLE(of, adreno_match_table);
+
+/* Dynamically build the OPP table for the GPU device */
+static void adreno_build_opp_table(struct device *dev, struct kgsl_pwrctrl *pwr)
+{
+	int i;
+
+	/* Skip if the table has already been populated */
+	if (dev_pm_opp_get_opp_count(dev) > 0)
+		return;
+
+	/* Add all the supported frequencies into the tree */
+	for (i = 0; i < pwr->num_pwrlevels; i++)
+		dev_pm_opp_add(dev, pwr->pwrlevels[i].gpu_freq, 0);
+}
+
+static int adreno_of_parse_pwrlevels(struct adreno_device *adreno_dev,
+		struct device_node *node)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct device_node *child;
+	int ret;
+
+	pwr->num_pwrlevels = 0;
+
+	for_each_child_of_node(node, child) {
+		u32 index, freq = 0, voltage, bus;
+		struct kgsl_pwrlevel *level;
+
+		ret = of_property_read_u32(child, "reg", &index);
+		if (ret) {
+			dev_err(device->dev, "%pOF: powerlevel index not found\n",
+				child);
+			goto out;
+		}
+
+		ret = of_property_read_u32(child, "qcom,gpu-freq", &freq);
+		if (ret) {
+			dev_err(device->dev, "%pOF: Unable to read qcom,gpu-freq\n",
+				child);
+			goto out;
+		}
+
+		/* Ignore "zero" powerlevels */
+		if (!freq)
+			continue;
+
+		ret = of_property_read_u32(child, "qcom,level", &voltage);
+		if (ret) {
+			dev_err(device->dev, "%pOF: Unable to read qcom,level\n",
+				child);
+			goto out;
+		}
+
+		ret = kgsl_of_property_read_ddrtype(child, "qcom,bus-freq",
+			&bus);
+		if (ret) {
+			dev_err(device->dev, "%pOF:Unable to read qcom,bus-freq\n",
+				child);
+			goto out;
+		}
+
+		if (index >= ARRAY_SIZE(pwr->pwrlevels)) {
+			dev_err(device->dev, "%pOF: Pwrlevel index %d is out of range\n",
+				child, index);
+			continue;
+		}
+
+		if (index >= pwr->num_pwrlevels)
+			pwr->num_pwrlevels = index + 1;
+
+		level = &pwr->pwrlevels[index];
+
+		level->gpu_freq = freq;
+		level->bus_freq = bus;
+		level->voltage_level = voltage;
+		level->cx_level = 0xffffffff;
+
+		of_property_read_u32(child, "qcom,acd-level",
+			&level->acd_level);
+
+		of_property_read_u32(child, "qcom,cx-level",
+			&level->cx_level);
+
+		level->bus_min = level->bus_freq;
+		kgsl_of_property_read_ddrtype(child,
+			"qcom,bus-min", &level->bus_min);
+
+		level->bus_max = level->bus_freq;
+		kgsl_of_property_read_ddrtype(child,
+			"qcom,bus-max", &level->bus_max);
+	}
+
+	adreno_build_opp_table(&device->pdev->dev, pwr);
+	return 0;
+out:
+	of_node_put(child);
+	return ret;
+}
+
+static void adreno_of_get_initial_pwrlevels(struct kgsl_pwrctrl *pwr,
+		struct device_node *node)
+{
+	int level;
+
+	/* Get and set the initial power level */
+	if (of_property_read_u32(node, "qcom,initial-pwrlevel", &level))
+		level = 1;
+
+	if (level < 0 || level >= pwr->num_pwrlevels)
+		level = 1;
+
+	pwr->active_pwrlevel = level;
+	pwr->default_pwrlevel = level;
+
+	/* Set the max power level */
+	pwr->max_pwrlevel = 0;
+
+	/* Get and set the min power level */
+	if (of_property_read_u32(node, "qcom,initial-min-pwrlevel", &level))
+		level = pwr->num_pwrlevels - 1;
+
+	if (level < 0 || level >= pwr->num_pwrlevels || level < pwr->default_pwrlevel)
+		level = pwr->num_pwrlevels - 1;
+
+	pwr->min_render_pwrlevel = level;
+	pwr->min_pwrlevel = level;
+}
+
+static void adreno_of_get_limits(struct adreno_device *adreno_dev,
+		struct device_node *node)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwrctrl = &device->pwrctrl;
+	unsigned int throttle_level;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM) || of_property_read_u32(node,
+				"qcom,throttle-pwrlevel", &throttle_level))
+		return;
+
+	throttle_level = min(throttle_level, pwrctrl->num_pwrlevels - 1);
+
+	pwrctrl->throttle_mask = GENMASK(pwrctrl->num_pwrlevels - 1,
+			pwrctrl->num_pwrlevels - 1 - throttle_level);
+
+	adreno_dev->lm_enabled = true;
+}
+
+static int adreno_of_get_legacy_pwrlevels(struct adreno_device *adreno_dev,
+		struct device_node *parent)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct device_node *node;
+	int ret;
+
+	node = of_find_node_by_name(parent, "qcom,gpu-pwrlevels");
+
+	if (node == NULL) {
+		dev_err(&device->pdev->dev,
+			"Unable to find 'qcom,gpu-pwrlevels'\n");
+		return -EINVAL;
+	}
+
+	ret = adreno_of_parse_pwrlevels(adreno_dev, node);
+
+	if (!ret) {
+		adreno_of_get_initial_pwrlevels(&device->pwrctrl, parent);
+		adreno_of_get_limits(adreno_dev, parent);
+	}
+
+	of_node_put(node);
+	return ret;
+}
+
+static int adreno_of_get_pwrlevels(struct adreno_device *adreno_dev,
+		struct device_node *parent)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct device_node *node, *child;
+	int feature_code, pcode;
+
+	node = of_find_node_by_name(parent, "qcom,gpu-pwrlevel-bins");
+	if (node == NULL)
+		return adreno_of_get_legacy_pwrlevels(adreno_dev, parent);
+
+	feature_code = max_t(int, socinfo_get_feature_code(), SOCINFO_FC_UNKNOWN);
+	pcode = (feature_code >= SOCINFO_FC_Y0 && feature_code < SOCINFO_FC_INT_RESERVE) ?
+		max_t(int, socinfo_get_pcode(), SOCINFO_PCODE_UNKNOWN) : SOCINFO_PCODE_UNKNOWN;
+
+	device->soc_code = FIELD_PREP(GENMASK(31, 16), pcode) |
+					FIELD_PREP(GENMASK(15, 0), feature_code);
+
+	for_each_child_of_node(node, child) {
+		bool match = false;
+		int tbl_size;
+		u32 bin = 0;
+
+		/* Check if the bin has a speed-bin requirement */
+		if (!of_property_read_u32(child, "qcom,speed-bin", &bin))
+			match = (bin == device->speed_bin);
+
+		/* Check if the bin has a sku-code requirement */
+		if (of_get_property(child, "qcom,sku-codes", &tbl_size)) {
+			int num_codes = tbl_size / sizeof(u32);
+			int i;
+			u32 sku_code;
+
+			/*
+			 * If we have a speed-bin requirement that did not match
+			 * keep searching.
+			 */
+			if (bin && !match)
+				continue;
+
+			/* Check if the soc_code matches any of the sku codes */
+			match = false;
+			for (i = 0; i < num_codes; i++) {
+				if (!of_property_read_u32_index(child, "qcom,sku-codes",
+								i, &sku_code) &&
+					(sku_code == 0 || device->soc_code == sku_code)) {
+					match = true;
+					break;
+				}
+			}
+		}
+
+		if (match) {
+			int ret;
+
+			ret = adreno_of_parse_pwrlevels(adreno_dev, child);
+			if (ret) {
+				of_node_put(child);
+				return ret;
+			}
+
+			adreno_of_get_initial_pwrlevels(&device->pwrctrl, child);
+
+			/*
+			 * Check for global throttle-pwrlevel first and override
+			 * with speedbin specific one if found.
+			 */
+			adreno_of_get_limits(adreno_dev, parent);
+			adreno_of_get_limits(adreno_dev, child);
+
+			of_node_put(child);
+			return 0;
+		}
+	}
+
+	dev_err(&device->pdev->dev,
+		"No match for speed_bin:%d and soc_code:0x%x\n",
+		device->speed_bin, device->soc_code);
+	return -ENODEV;
+}
+
+static int register_l3_voter(struct kgsl_device *device)
+{
+	int ret = 0;
+
+	mutex_lock(&device->mutex);
+
+	if (!device->l3_vote)
+		goto done;
+
+	/* This indicates that we are already set up */
+	if (device->num_l3_pwrlevels != 0)
+		goto done;
+
+	memset(device->l3_freq, 0x0, sizeof(device->l3_freq));
+
+	ret = qcom_dcvs_register_voter(KGSL_L3_DEVICE, DCVS_L3, DCVS_SLOW_PATH);
+	if (ret) {
+		dev_err_once(&device->pdev->dev,
+			"Unable to register l3 dcvs voter: %d\n", ret);
+		goto done;
+	}
+
+	ret = qcom_dcvs_hw_minmax_get(DCVS_L3, &device->l3_freq[1],
+		&device->l3_freq[2]);
+	if (ret) {
+		dev_err_once(&device->pdev->dev,
+			"Unable to get min/max for l3 dcvs: %d\n", ret);
+		qcom_dcvs_unregister_voter(KGSL_L3_DEVICE, DCVS_L3,
+			DCVS_SLOW_PATH);
+		memset(device->l3_freq, 0x0, sizeof(device->l3_freq));
+		goto done;
+	}
+
+	device->num_l3_pwrlevels = 3;
+
+done:
+	mutex_unlock(&device->mutex);
+
+	return ret;
+}
+
+static int adreno_of_get_power(struct adreno_device *adreno_dev,
+		struct platform_device *pdev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	ret = adreno_of_get_pwrlevels(adreno_dev, pdev->dev.of_node);
+	if (ret)
+		return ret;
+
+	device->pwrctrl.interval_timeout = CONFIG_QCOM_KGSL_IDLE_TIMEOUT;
+
+	/* Set default bus control to true on all targets */
+	device->pwrctrl.bus_control = true;
+
+	return 0;
+}
+
+static void adreno_cx_misc_probe(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct resource *res;
+
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+					   "cx_misc");
+
+	if (res == NULL)
+		return;
+
+	adreno_dev->cx_misc_len = resource_size(res);
+	adreno_dev->cx_misc_virt = devm_ioremap(&device->pdev->dev,
+					res->start, adreno_dev->cx_misc_len);
+}
+
+static void adreno_isense_probe(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct resource *res;
+
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+			"isense_cntl");
+	if (res == NULL)
+		return;
+
+	adreno_dev->isense_base = res->start - device->regmap.base->start;
+	adreno_dev->isense_len = resource_size(res);
+	adreno_dev->isense_virt = devm_ioremap(&device->pdev->dev, res->start,
+					adreno_dev->isense_len);
+	if (adreno_dev->isense_virt == NULL)
+		dev_warn(device->dev, "isense ioremap failed\n");
+}
+
+/* Read the fuse through the new and fancy nvmem method */
+static int adreno_read_speed_bin(struct platform_device *pdev)
+{
+	struct nvmem_cell *cell = nvmem_cell_get(&pdev->dev, "speed_bin");
+	int ret = PTR_ERR_OR_ZERO(cell);
+	void *buf;
+	int val = 0;
+	size_t len;
+
+	if (ret) {
+		if (ret == -ENOENT)
+			return 0;
+
+		return ret;
+	}
+
+	buf = nvmem_cell_read(cell, &len);
+	nvmem_cell_put(cell);
+
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	memcpy(&val, buf, min(len, sizeof(val)));
+	kfree(buf);
+
+	return val;
+}
+
+static int adreno_read_gpu_model_fuse(struct platform_device *pdev)
+{
+	struct nvmem_cell *cell = nvmem_cell_get(&pdev->dev, "gpu_model");
+	void *buf;
+	int val = 0;
+	size_t len;
+
+	if (IS_ERR(cell))
+		return PTR_ERR(cell);
+
+	buf = nvmem_cell_read(cell, &len);
+	nvmem_cell_put(cell);
+
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	memcpy(&val, buf, min(len, sizeof(val)));
+	kfree(buf);
+
+	return val;
+}
+
+static struct device_node *
+adreno_get_gpu_model_node(struct platform_device *pdev)
+{
+	struct device_node *node, *child;
+	int fuse_model = adreno_read_gpu_model_fuse(pdev);
+
+	if (fuse_model < 0)
+		return NULL;
+
+	node = of_find_node_by_name(pdev->dev.of_node, "qcom,gpu-models");
+	if (node == NULL)
+		return NULL;
+
+	for_each_child_of_node(node, child) {
+		u32 model;
+
+		if (of_property_read_u32(child, "qcom,gpu-model-id", &model))
+			continue;
+
+		if (model == fuse_model) {
+			of_node_put(node);
+			return child;
+		}
+	}
+
+	of_node_put(node);
+
+	return NULL;
+}
+
+const char *adreno_get_gpu_model(struct kgsl_device *device)
+{
+	struct device_node *node;
+	static char gpu_model[32];
+	const char *model;
+	int ret;
+
+	if (strlen(gpu_model))
+		return gpu_model;
+
+	node = adreno_get_gpu_model_node(device->pdev);
+	if (!node)
+		node = of_node_get(device->pdev->dev.of_node);
+
+	ret = of_property_read_string(node, "qcom,gpu-model", &model);
+	of_node_put(node);
+
+	if (!ret)
+		goto done;
+
+	model = socinfo_get_partinfo_part_name(SOCINFO_PART_GPU);
+	if (model)
+		goto done;
+
+	scnprintf(gpu_model, sizeof(gpu_model), "Adreno%u%u%uv%u",
+		(u32)ADRENO_CHIPID_CORE(ADRENO_DEVICE(device)->chipid),
+		(u32)ADRENO_CHIPID_MAJOR(ADRENO_DEVICE(device)->chipid),
+		(u32)ADRENO_CHIPID_MINOR(ADRENO_DEVICE(device)->chipid),
+		(u32)ADRENO_CHIPID_PATCH(ADRENO_DEVICE(device)->chipid) + 1);
+
+	return gpu_model;
+
+done:
+	strscpy(gpu_model, model, sizeof(gpu_model));
+	return gpu_model;
+}
+
+static u32 adreno_get_vk_device_id(struct kgsl_device *device)
+{
+	struct device_node *node;
+	static u32 device_id;
+	u32 vk_id;
+	int ret;
+
+	if (device_id)
+		return device_id;
+
+	node = adreno_get_gpu_model_node(device->pdev);
+	if (!node)
+		node = of_node_get(device->pdev->dev.of_node);
+
+	ret = of_property_read_u32(node, "qcom,vk-device-id", &device_id);
+	of_node_put(node);
+	if (!ret)
+		return device_id;
+
+	vk_id = socinfo_get_partinfo_vulkan_id(SOCINFO_PART_GPU);
+	device_id = vk_id ? vk_id : ADRENO_DEVICE(device)->chipid;
+
+	return device_id;
+}
+
+#if IS_ENABLED(CONFIG_QCOM_LLCC)
+static int adreno_probe_llcc(struct adreno_device *adreno_dev,
+		struct platform_device *pdev)
+{
+	int ret;
+
+	/* Get the system cache slice descriptor for GPU */
+	adreno_dev->gpu_llc_slice = llcc_slice_getd(LLCC_GPU);
+	ret = PTR_ERR_OR_ZERO(adreno_dev->gpu_llc_slice);
+
+	if (ret) {
+		/* Propagate EPROBE_DEFER back to the probe function */
+		if (ret == -EPROBE_DEFER)
+			return ret;
+
+		if (ret != -ENOENT)
+			dev_warn(&pdev->dev,
+				"Unable to get the GPU LLC slice: %d\n", ret);
+	} else
+		adreno_dev->gpu_llc_slice_enable = true;
+
+	/* Get the system cache slice descriptor for GPU pagetables */
+	adreno_dev->gpuhtw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
+	ret = PTR_ERR_OR_ZERO(adreno_dev->gpuhtw_llc_slice);
+	if (ret) {
+		if (ret == -EPROBE_DEFER) {
+			llcc_slice_putd(adreno_dev->gpu_llc_slice);
+			return ret;
+		}
+
+		if (ret != -ENOENT)
+			dev_warn(&pdev->dev,
+				"Unable to get GPU HTW LLC slice: %d\n", ret);
+	} else
+		adreno_dev->gpuhtw_llc_slice_enable = true;
+
+	return 0;
+}
+#else
+static int adreno_probe_llcc(struct adreno_device *adreno_dev,
+		struct platform_device *pdev)
+{
+	return 0;
+}
+#endif
+
+static void adreno_regmap_op_preaccess(struct kgsl_regmap_region *region)
+{
+	struct kgsl_device *device = region->priv;
+	/*
+	 * kgsl panic notifier will be called in atomic context to get
+	 * GPU snapshot. Also panic handler will skip snapshot dumping
+	 * incase GPU is in SLUMBER state. So we can safely ignore the
+	 * kgsl_pre_hwaccess().
+	 */
+	if (!device->snapshot_atomic && !in_interrupt())
+		kgsl_pre_hwaccess(device);
+}
+
+static const struct kgsl_regmap_ops adreno_regmap_ops = {
+	.preaccess = adreno_regmap_op_preaccess,
+};
+
+static const struct kgsl_functable adreno_functable;
+
+static void adreno_setup_device(struct adreno_device *adreno_dev)
+{
+	u32 i;
+
+	adreno_dev->dev.name = "kgsl-3d0";
+	adreno_dev->dev.ftbl = &adreno_functable;
+
+	init_completion(&adreno_dev->dev.hwaccess_gate);
+	init_completion(&adreno_dev->dev.halt_gate);
+
+	idr_init(&adreno_dev->dev.context_idr);
+
+	mutex_init(&adreno_dev->dev.mutex);
+	INIT_LIST_HEAD(&adreno_dev->dev.globals);
+
+	/* Set the fault tolerance policy to replay, skip, throttle */
+	adreno_dev->ft_policy = BIT(KGSL_FT_REPLAY) |
+		BIT(KGSL_FT_SKIPCMD) | BIT(KGSL_FT_THROTTLE);
+
+	/* Enable command timeouts by default */
+	adreno_dev->long_ib_detect = true;
+
+	INIT_WORK(&adreno_dev->input_work, adreno_input_work);
+
+	INIT_LIST_HEAD(&adreno_dev->active_list);
+	spin_lock_init(&adreno_dev->active_list_lock);
+
+	for (i = 0; i < ARRAY_SIZE(adreno_dev->ringbuffers); i++) {
+		struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[i];
+
+		INIT_LIST_HEAD(&rb->events.group);
+	}
+
+	/*
+	 * Some GPUs needs specific alignment for UCHE GMEM base address.
+	 * Configure UCHE GMEM base based on GMEM size and align it accordingly.
+	 * This needs to be done based on GMEM size to avoid overlap between
+	 * RB and UCHE GMEM range.
+	 */
+	if (adreno_dev->gpucore->uche_gmem_alignment)
+		adreno_dev->uche_gmem_base =
+			ALIGN(adreno_dev->gpucore->gmem_size,
+				adreno_dev->gpucore->uche_gmem_alignment);
+}
+
+static const struct of_device_id adreno_component_match[] = {
+	{ .compatible = "qcom,gen8-gmu" },
+	{ .compatible = "qcom,gen7-gmu" },
+	{ .compatible = "qcom,gpu-gmu" },
+	{ .compatible = "qcom,gpu-rgmu" },
+	{ .compatible = "qcom,kgsl-smmu-v2" },
+	{ .compatible = "qcom,smmu-kgsl-cb" },
+	{},
+};
+
+static int adreno_irq_setup(struct platform_device *pdev,
+		struct adreno_device *adreno_dev)
+{
+	if (!adreno_dev->irq_mask)
+		return 0;
+
+	return kgsl_request_irq(pdev, "kgsl_3d0_irq", adreno_irq_handler, KGSL_DEVICE(adreno_dev));
+}
+
+int adreno_device_probe(struct platform_device *pdev,
+		struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct device *dev = &pdev->dev;
+	unsigned int priv = 0;
+	int status;
+	u32 size;
+
+	KGSL_BOOT_MARKER("GPU Init");
+
+	/* Initialize the adreno device structure */
+	adreno_setup_device(adreno_dev);
+
+	dev_set_drvdata(dev, device);
+
+	device->pdev = pdev;
+
+	adreno_update_soc_hw_revision_quirks(adreno_dev, pdev);
+
+	status = adreno_read_speed_bin(pdev);
+	if (status < 0)
+		goto err;
+
+	device->speed_bin = status;
+
+	status = adreno_of_get_power(adreno_dev, pdev);
+	if (status)
+		goto err;
+
+	status = kgsl_bus_init(device, pdev);
+	if (status)
+		goto err;
+
+	status = kgsl_regmap_init(pdev, &device->regmap, "kgsl_3d0_reg_memory",
+		&adreno_regmap_ops, device);
+	if (status)
+		goto err_bus_close;
+
+	/*
+	 * The SMMU APIs use unsigned long for virtual addresses which means
+	 * that we cannot use 64 bit virtual addresses on a 32 bit kernel even
+	 * though the hardware and the rest of the KGSL driver supports it.
+	 */
+	if (adreno_support_64bit(adreno_dev))
+		kgsl_mmu_set_feature(device, KGSL_MMU_64BIT);
+
+	/*
+	 * Set the SMMU aperture on A6XX/Gen7 targets to use per-process
+	 * pagetables.
+	 */
+	if (ADRENO_GPUREV(adreno_dev) >= 600)
+		kgsl_mmu_set_feature(device, KGSL_MMU_SMMU_APERTURE);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_IOCOHERENT))
+		kgsl_mmu_set_feature(device, KGSL_MMU_IO_COHERENT);
+
+	/*
+	 * Support VBOs on hardware where HLOS has access to PRR registers
+	 * configuration.
+	 */
+	if (!adreno_is_a650(adreno_dev))
+		kgsl_mmu_set_feature(device, KGSL_MMU_SUPPORT_VBO);
+
+	if (adreno_preemption_enable)
+		adreno_dev->preempt_override = true;
+
+	device->pwrctrl.bus_width = adreno_dev->gpucore->bus_width;
+
+	device->mmu.secured = (IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) &&
+		ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION));
+
+	/* Probe the LLCC - this could return -EPROBE_DEFER */
+	status = adreno_probe_llcc(adreno_dev, pdev);
+	if (status)
+		goto err_bus_close;
+
+	/*
+	 * IF the GPU HTW slice was successsful set the MMU feature so the
+	 * domain can set the appropriate attributes
+	 */
+	if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice))
+		kgsl_mmu_set_feature(device, KGSL_MMU_LLCC_ENABLE);
+
+	 /* Bind the components before doing the KGSL platform probe. */
+	status = component_bind_all(dev, NULL);
+	if (status)
+		goto err_remove_llcc;
+
+	status = adreno_irq_setup(pdev, adreno_dev);
+	if (status < 0)
+		goto err_unbind;
+
+	device->pwrctrl.interrupt_num = status;
+
+	device->freq_limiter_intr_num = kgsl_request_irq_optional(pdev, "freq_limiter_irq",
+				adreno_freq_limiter_irq_handler, device);
+
+	device->freq_limiter_irq_clear =
+		devm_reset_control_get(&pdev->dev, "freq_limiter_irq_clear");
+
+	status = kgsl_device_platform_probe(device);
+	if (status)
+		goto err_unbind;
+
+	adreno_fence_trace_array_init(device);
+
+	/* Add CX_DBGC block to the regmap*/
+	kgsl_regmap_add_region(&device->regmap, pdev, "cx_dbgc", NULL, NULL);
+
+	/* Probe for the optional CX_MISC block */
+	adreno_cx_misc_probe(device);
+
+	adreno_isense_probe(device);
+
+	/* Allocate the memstore for storing timestamps and other useful info */
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV))
+		priv |= KGSL_MEMDESC_PRIVILEGED;
+
+	device->memstore = kgsl_allocate_global(device,
+		KGSL_MEMSTORE_SIZE, 0, 0, priv, "memstore");
+
+	status = PTR_ERR_OR_ZERO(device->memstore);
+	if (status) {
+		trace_array_put(device->fence_trace_array);
+		kgsl_device_platform_remove(device);
+		goto err_unbind;
+	}
+
+	/* Initialize the snapshot engine */
+	size = adreno_dev->gpucore->snapshot_size;
+
+	/*
+	 * Use a default size if one wasn't specified, but print a warning so
+	 * the developer knows to fix it
+	 */
+
+	if (WARN(!size, "The snapshot size was not specified in the gpucore\n"))
+		size = SZ_1M;
+
+	kgsl_device_snapshot_probe(device, size);
+
+	adreno_debugfs_init(adreno_dev);
+	adreno_profile_init(adreno_dev);
+
+	adreno_dev->perfcounter = false;
+
+	adreno_sysfs_init(adreno_dev);
+
+	/* Ignore return value, as driver can still function without pwrscale enabled */
+	kgsl_pwrscale_init(device, pdev, CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_L3_VOTE))
+		device->l3_vote = true;
+
+#ifdef CONFIG_INPUT
+
+	if (!of_property_read_bool(pdev->dev.of_node,
+			"qcom,disable-wake-on-touch")) {
+		adreno_input_handler.private = device;
+		/*
+		 * It isn't fatal if we cannot register the input handler.  Sad,
+		 * perhaps, but not fatal
+		 */
+		if (input_register_handler(&adreno_input_handler)) {
+			adreno_input_handler.private = NULL;
+			dev_err(device->dev,
+				     "Unable to register the input handler\n");
+		}
+	}
+#endif
+
+	kgsl_qcom_va_md_register(device);
+
+	KGSL_BOOT_MARKER("GPU Ready");
+
+	return 0;
+
+err_unbind:
+	component_unbind_all(dev, NULL);
+
+err_remove_llcc:
+	if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice))
+		llcc_slice_putd(adreno_dev->gpu_llc_slice);
+
+	if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice))
+		llcc_slice_putd(adreno_dev->gpuhtw_llc_slice);
+
+err_bus_close:
+	kgsl_bus_close(device);
+
+err:
+	device->pdev = NULL;
+	dev_err_probe(&pdev->dev, status, "adreno device probe failed\n");
+	return status;
+}
+
+static int adreno_bind(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	const struct adreno_gpu_core *gpucore;
+	int ret;
+	u32 chipid;
+
+	gpucore = adreno_identify_gpu(pdev, &chipid);
+	if (IS_ERR(gpucore))
+		return PTR_ERR(gpucore);
+
+	ret = gpucore->gpudev->probe(pdev, chipid, gpucore);
+
+	if (!ret) {
+		struct kgsl_device *device = dev_get_drvdata(dev);
+
+		device->pdev_loaded = true;
+		srcu_init_notifier_head(&device->nh);
+	} else {
+		/*
+		 * Handle resource clean up through unbind, instead of a
+		 * lengthy goto error path.
+		 */
+		adreno_unbind(dev);
+	}
+
+	return ret;
+}
+
+static void adreno_unbind(struct device *dev)
+{
+	struct adreno_device *adreno_dev;
+	struct kgsl_device *device;
+	const struct adreno_gpudev *gpudev;
+
+	device = dev_get_drvdata(dev);
+	if (!device)
+		return;
+
+	/* Return if cleanup happens in adreno_device_probe */
+	if (!device->pdev)
+		return;
+
+	if (device->pdev_loaded) {
+		srcu_cleanup_notifier_head(&device->nh);
+		device->pdev_loaded = false;
+	}
+
+	adreno_dev = ADRENO_DEVICE(device);
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	trace_array_put(device->fence_trace_array);
+
+	if (gpudev->remove != NULL)
+		gpudev->remove(adreno_dev);
+
+#ifdef CONFIG_INPUT
+	if (adreno_input_handler.private)
+		input_unregister_handler(&adreno_input_handler);
+#endif
+
+	kgsl_qcom_va_md_unregister(device);
+	adreno_coresight_remove(adreno_dev);
+	adreno_profile_close(adreno_dev);
+
+	/* Release the system cache slice descriptor */
+	if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice))
+		llcc_slice_putd(adreno_dev->gpu_llc_slice);
+
+	if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice))
+		llcc_slice_putd(adreno_dev->gpuhtw_llc_slice);
+
+	kgsl_pwrscale_close(device);
+
+	if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->close)
+		adreno_dev->dispatch_ops->close(adreno_dev);
+
+	kgsl_device_platform_remove(device);
+
+	component_unbind_all(dev, NULL);
+
+	kgsl_bus_close(device);
+	device->pdev = NULL;
+
+	if (device->num_l3_pwrlevels != 0)
+		qcom_dcvs_unregister_voter(KGSL_L3_DEVICE, DCVS_L3,
+			DCVS_SLOW_PATH);
+
+	clear_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv);
+	clear_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv);
+}
+
+static void adreno_resume(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (device->state == KGSL_STATE_SUSPEND) {
+		adreno_put_gpu_halt(adreno_dev);
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+	} else if (device->state != KGSL_STATE_INIT) {
+		/*
+		 * This is an error situation so wait for the device to idle and
+		 * then put the device in SLUMBER state.  This will get us to
+		 * the right place when we resume.
+		 */
+		if (device->state == KGSL_STATE_ACTIVE)
+			adreno_idle(device);
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+		dev_err(device->dev, "resume invoked without a suspend\n");
+	}
+}
+
+static int adreno_pm_resume(struct device *dev)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+	struct adreno_device *adreno_dev;
+	const struct adreno_power_ops *ops;
+
+	if (!device)
+		return 0;
+
+	adreno_dev = ADRENO_DEVICE(device);
+	ops = ADRENO_POWER_OPS(adreno_dev);
+
+#if IS_ENABLED(CONFIG_DEEPSLEEP)
+	if (pm_suspend_via_firmware()) {
+		struct kgsl_iommu *iommu = &device->mmu.iommu;
+		int status = kgsl_set_smmu_aperture(device, &iommu->user_context);
+
+		if (status)
+			return status;
+
+		status = kgsl_set_smmu_lpac_aperture(device, &iommu->lpac_context);
+		if (status < 0)
+			return status;
+	}
+#endif
+
+	mutex_lock(&device->mutex);
+	ops->pm_resume(adreno_dev);
+	mutex_unlock(&device->mutex);
+
+	kgsl_reclaim_start();
+	return 0;
+}
+
+static int adreno_suspend(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int status = kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+
+	if (!status && device->state == KGSL_STATE_SUSPEND)
+		adreno_get_gpu_halt(adreno_dev);
+
+	return status;
+}
+
+static int adreno_pm_suspend(struct device *dev)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+	struct adreno_device *adreno_dev;
+	const struct adreno_power_ops *ops;
+	int status;
+
+	if (!device)
+		return 0;
+
+	adreno_dev = ADRENO_DEVICE(device);
+	ops = ADRENO_POWER_OPS(adreno_dev);
+
+	mutex_lock(&device->mutex);
+	status = ops->pm_suspend(adreno_dev);
+
+#if IS_ENABLED(CONFIG_DEEPSLEEP)
+	if (!status && pm_suspend_via_firmware())
+		adreno_zap_shader_unload(adreno_dev);
+#endif
+
+	mutex_unlock(&device->mutex);
+
+	if (status)
+		return status;
+
+	/*
+	 * When the device enters in suspend state, the CX can be collapsed causing
+	 * the GPU CX timer to pause. Clear the ADRENO_DEVICE_CX_TIMER_INITIALIZED
+	 * flag to ensure that the CX timer is reseeded during resume.
+	 */
+	clear_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv);
+	kgsl_reclaim_close();
+	kthread_flush_worker(device->events_worker);
+	flush_workqueue(kgsl_driver.lockless_workqueue);
+
+	return status;
+}
+
+void adreno_create_profile_buffer(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int priv = 0;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV))
+		priv = KGSL_MEMDESC_PRIVILEGED;
+
+	adreno_allocate_global(device, &adreno_dev->profile_buffer,
+		PAGE_SIZE, 0, 0, priv, "alwayson");
+
+	adreno_dev->profile_index = 0;
+
+	if (!IS_ERR(adreno_dev->profile_buffer))
+		set_bit(ADRENO_DEVICE_DRAWOBJ_PROFILE,
+			&adreno_dev->priv);
+}
+
+static int adreno_init(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ret;
+
+	ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+	if (ret)
+		return ret;
+
+	/*
+	 * initialization only needs to be done once initially until
+	 * device is shutdown
+	 */
+	if (test_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv))
+		return 0;
+
+	ret = gpudev->init(adreno_dev);
+	if (ret)
+		return ret;
+
+	set_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv);
+
+	return 0;
+}
+
+static bool regulators_left_on(struct kgsl_device *device)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	if (gmu_core_gpmu_isenabled(device))
+		return false;
+
+	if (!IS_ERR_OR_NULL(pwr->cx_gdsc))
+		if (regulator_is_enabled(pwr->cx_gdsc))
+			return true;
+
+	if (!IS_ERR_OR_NULL(pwr->gx_gdsc))
+		return regulator_is_enabled(pwr->gx_gdsc);
+
+	return false;
+}
+
+void adreno_set_active_ctxs_null(struct adreno_device *adreno_dev)
+{
+	int i;
+	struct adreno_ringbuffer *rb;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		if (rb->drawctxt_active)
+			kgsl_context_put(&(rb->drawctxt_active->base));
+		rb->drawctxt_active = NULL;
+
+		kgsl_sharedmem_writel(device->scratch,
+			SCRATCH_RB_OFFSET(rb->id, current_rb_ptname),
+			0);
+	}
+}
+
+static int adreno_open(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	/*
+	 * active_cnt special case: we are starting up for the first
+	 * time, so use this sequence instead of the kgsl_pwrctrl_wake()
+	 * which will be called by adreno_active_count_get().
+	 */
+	atomic_inc(&device->active_cnt);
+
+	memset(device->memstore->hostptr, 0, device->memstore->size);
+
+	ret = adreno_init(device);
+	if (ret)
+		goto err;
+
+	ret = adreno_start(device, 0);
+	if (ret)
+		goto err;
+
+	complete_all(&device->hwaccess_gate);
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+	adreno_active_count_put(adreno_dev);
+
+	return 0;
+err:
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+	atomic_dec(&device->active_cnt);
+
+	return ret;
+}
+
+static int adreno_first_open(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev);
+
+	if (!device->pdev_loaded)
+		return -ENODEV;
+
+	return ops->first_open(adreno_dev);
+}
+
+static int adreno_close(struct adreno_device *adreno_dev)
+{
+	return kgsl_pwrctrl_change_state(KGSL_DEVICE(adreno_dev),
+			KGSL_STATE_INIT);
+}
+
+static int adreno_last_close(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev);
+
+	/*
+	 * Wait up to 1 second for the active count to go low
+	 * and then start complaining about it
+	 */
+	if (kgsl_active_count_wait(device, 0, HZ)) {
+		dev_err(device->dev,
+			"Waiting for the active count to become 0\n");
+
+		while (kgsl_active_count_wait(device, 0, HZ))
+			dev_err(device->dev,
+				"Still waiting for the active count\n");
+	}
+
+	return ops->last_close(adreno_dev);
+}
+
+static int adreno_pwrctrl_active_count_get(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EINVAL;
+
+	if ((atomic_read(&device->active_cnt) == 0) &&
+		(device->state != KGSL_STATE_ACTIVE)) {
+		mutex_unlock(&device->mutex);
+		wait_for_completion(&device->hwaccess_gate);
+		mutex_lock(&device->mutex);
+		device->pwrctrl.superfast = true;
+		ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+	}
+	if (ret == 0)
+		atomic_inc(&device->active_cnt);
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+	return ret;
+}
+
+static void adreno_pwrctrl_active_count_put(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return;
+
+	if (WARN(atomic_read(&device->active_cnt) == 0,
+			"Unbalanced get/put calls to KGSL active count\n"))
+		return;
+
+	if (atomic_dec_and_test(&device->active_cnt)) {
+		kgsl_pwrscale_update_stats(device);
+		kgsl_pwrscale_update(device);
+
+		kgsl_start_idle_timer(device);
+	}
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	wake_up(&device->active_cnt_wq);
+}
+
+int adreno_active_count_get(struct adreno_device *adreno_dev)
+{
+	const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev);
+
+	return ops->active_count_get(adreno_dev);
+}
+
+void adreno_active_count_put(struct adreno_device *adreno_dev)
+{
+	const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev);
+
+	ops->active_count_put(adreno_dev);
+}
+
+void adreno_get_bus_counters(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+
+	if (!device->pwrctrl.bus_control)
+		return;
+
+	/* VBIF waiting for RAM */
+	ret |= adreno_perfcounter_kernel_get(adreno_dev,
+		KGSL_PERFCOUNTER_GROUP_VBIF_PWR, 0,
+		&adreno_dev->starved_ram_lo, NULL);
+
+	/* Target has GBIF */
+	if (adreno_is_gen8(adreno_dev) || adreno_is_gen7(adreno_dev) ||
+		(adreno_is_a6xx(adreno_dev) && !adreno_is_a630(adreno_dev))) {
+		ret |= adreno_perfcounter_kernel_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_VBIF_PWR, 1,
+			&adreno_dev->starved_ram_lo_ch1, NULL);
+
+		ret |= adreno_perfcounter_kernel_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_VBIF,
+			GBIF_AXI0_READ_DATA_TOTAL_BEATS,
+			&adreno_dev->ram_cycles_lo, NULL);
+
+		ret |= adreno_perfcounter_kernel_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_VBIF,
+			GBIF_AXI1_READ_DATA_TOTAL_BEATS,
+			&adreno_dev->ram_cycles_lo_ch1_read, NULL);
+
+		ret |= adreno_perfcounter_kernel_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_VBIF,
+			GBIF_AXI0_WRITE_DATA_TOTAL_BEATS,
+			&adreno_dev->ram_cycles_lo_ch0_write, NULL);
+
+		ret |= adreno_perfcounter_kernel_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_VBIF,
+			GBIF_AXI1_WRITE_DATA_TOTAL_BEATS,
+			&adreno_dev->ram_cycles_lo_ch1_write, NULL);
+	} else {
+		/* VBIF DDR cycles */
+		ret |= adreno_perfcounter_kernel_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_VBIF,
+			VBIF_AXI_TOTAL_BEATS,
+			&adreno_dev->ram_cycles_lo, NULL);
+	}
+
+	if (ret)
+		dev_err(KGSL_DEVICE(adreno_dev)->dev,
+			"Unable to get perf counters for bus DCVS\n");
+}
+
+#define ADRENO_AHB_MIN_TIMEOUT_VAL_USEC 1000
+
+u32 adreno_get_ahb_timeout_val(struct adreno_device *adreno_dev, u32 noc_timeout_us)
+{
+	u64 cycles, hub_clk_freq = adreno_dev->gmu_hub_clk_freq;
+	u32 timeout_val;
+
+	if (!noc_timeout_us)
+		return 0;
+
+	do_div(hub_clk_freq, HZ_PER_MHZ);
+	cycles = hub_clk_freq * noc_timeout_us;
+
+	/*
+	 * Get max possible AHB timeout value which is less than the GPU NOC timeout value.
+	 * When cycles are exact power of two, the calculated AHB timeout value will be same
+	 * as GPU config NOC timeout. Just reduce one cycle to make sure we do not program AHB
+	 * timeout same as GPU config NOC timeout.
+	 */
+	if (is_power_of_2(cycles))
+		cycles -= 1;
+
+	timeout_val = ilog2(cycles);
+
+	/*
+	 * Make sure, AHB timeout value fits into bit fields and it is not too low
+	 * which can cause false timeouts.
+	 */
+	if ((timeout_val > GENMASK(4, 0)) ||
+		((ADRENO_AHB_MIN_TIMEOUT_VAL_USEC * hub_clk_freq) > (1 << timeout_val))) {
+		dev_warn(adreno_dev->dev.dev, "Invalid AHB timeout_val %u\n", timeout_val);
+		return 0;
+	}
+
+	/*
+	 * Return (timeout_val - 1). Based on timeout_val programmed, a timeout will occur if
+	 * an AHB transaction is not completed in 2 ^ (timeout_val + 1) cycles.
+	 */
+	return (timeout_val - 1);
+}
+
+/**
+ * _adreno_start - Power up the GPU and prepare to accept commands
+ * @adreno_dev: Pointer to an adreno_device structure
+ *
+ * The core function that powers up and initalizes the GPU.  This function is
+ * called at init and after coming out of SLUMBER
+ */
+static int _adreno_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int status;
+	unsigned int state = device->state;
+	bool regulator_left_on;
+
+	/* make sure ADRENO_DEVICE_STARTED is not set here */
+	WARN_ON(test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv));
+
+	regulator_left_on = regulators_left_on(device);
+
+	/* Clear any GPU faults that might have been left over */
+	adreno_clear_gpu_fault(adreno_dev);
+
+	/* Put the GPU in a responsive state */
+	status = kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE);
+	if (status)
+		goto error_pwr_off;
+
+	/* Set any stale active contexts to NULL */
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	/* Set the bit to indicate that we've just powered on */
+	set_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv);
+
+	/* Clear the busy_data stats - we're starting over from scratch */
+	memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data));
+
+	/* Soft reset the GPU if a regulator is stuck on*/
+	if (regulator_left_on)
+		_soft_reset(adreno_dev);
+
+	/* Start the GPU */
+	status = gpudev->start(adreno_dev);
+	if (status)
+		goto error_pwr_off;
+
+	/* Re-initialize the coresight registers if applicable */
+	adreno_coresight_start(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 1);
+
+	adreno_perfcounter_start(adreno_dev);
+
+	/* Clear FSR here in case it is set from a previous pagefault */
+	kgsl_mmu_clear_fsr(&device->mmu);
+
+	status = gpudev->rb_start(adreno_dev);
+	if (status)
+		goto error_pwr_off;
+
+	/*
+	 * At this point it is safe to assume that we recovered. Setting
+	 * this field allows us to take a new snapshot for the next failure
+	 * if we are prioritizing the first unrecoverable snapshot.
+	 */
+	if (device->snapshot)
+		device->snapshot->recovered = true;
+
+	/* Start the dispatcher */
+	adreno_dispatcher_start(device);
+
+	device->reset_counter++;
+
+	set_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+	/*
+	 * There is a possible deadlock scenario during kgsl firmware reading
+	 * (request_firmware) and devfreq update calls. During first boot, kgsl
+	 * device mutex is held and then request_firmware is called for reading
+	 * firmware. request_firmware internally takes dev_pm_qos_mtx lock.
+	 * Whereas in case of devfreq update calls triggered by thermal/bcl or
+	 * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then
+	 * tries to take kgsl device mutex as part of get_dev_status/target
+	 * calls. This results in deadlock when both thread are unable to acquire
+	 * the mutex held by other thread. Enable devfreq updates now as we are
+	 * done reading all firmware files.
+	 */
+	device->pwrscale.devfreq_enabled = true;
+
+	return 0;
+
+error_pwr_off:
+	/* set the state back to original state */
+	kgsl_pwrctrl_change_state(device, state);
+
+	return status;
+}
+
+/**
+ * adreno_start() - Power up and initialize the GPU
+ * @device: Pointer to the KGSL device to power up
+ * @priority:  Boolean flag to specify of the start should be scheduled in a low
+ * latency work queue
+ *
+ * Power up the GPU and initialize it.  If priority is specified then elevate
+ * the thread priority for the duration of the start operation
+ */
+int adreno_start(struct kgsl_device *device, int priority)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int nice = task_nice(current);
+	int ret;
+
+	if (priority && (adreno_wake_nice < nice))
+		set_user_nice(current, adreno_wake_nice);
+
+	ret = _adreno_start(adreno_dev);
+
+	if (priority)
+		set_user_nice(current, nice);
+
+	return ret;
+}
+
+static int adreno_stop(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int error = 0;
+
+	if (!test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv))
+		return 0;
+
+	kgsl_pwrscale_update_stats(device);
+
+	adreno_irqctrl(adreno_dev, 0);
+
+	/* Save active coresight registers if applicable */
+	adreno_coresight_stop(adreno_dev);
+
+	/* Save physical performance counter values before GPU power down*/
+	adreno_perfcounter_save(adreno_dev);
+
+	if (gpudev->clear_pending_transactions)
+		gpudev->clear_pending_transactions(adreno_dev);
+
+	adreno_dispatcher_stop(adreno_dev);
+
+	adreno_ringbuffer_stop(adreno_dev);
+
+	adreno_llcc_slice_deactivate(adreno_dev);
+
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+	return error;
+}
+
+/**
+ * adreno_reset() - Helper function to reset the GPU
+ * @device: Pointer to the KGSL device structure for the GPU
+ * @fault: Type of fault. Needed to skip soft reset for MMU fault
+ *
+ * Try to reset the GPU to recover from a fault.  First, try to do a low latency
+ * soft reset.  If the soft reset fails for some reason, then bring out the big
+ * guns and toggle the footswitch.
+ */
+int adreno_reset(struct kgsl_device *device, int fault)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ret = -EINVAL;
+	int i;
+
+	if (gpudev->reset)
+		return gpudev->reset(adreno_dev);
+
+	/*
+	 * Try soft reset first Do not do soft reset for a IOMMU fault (because
+	 * the IOMMU hardware needs a reset too)
+	 */
+
+	if (!(fault & ADRENO_IOMMU_PAGE_FAULT))
+		ret = adreno_soft_reset(device);
+
+	if (ret) {
+		/* If soft reset failed/skipped, then pull the power */
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+		/* since device is officially off now clear start bit */
+		clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+		/* Try to reset the device */
+		ret = adreno_start(device, 0);
+
+		for (i = 0; ret && i < 4; i++) {
+			msleep(20);
+			ret = adreno_start(device, 0);
+		}
+
+		if (ret)
+			return ret;
+
+		if (i != 0)
+			dev_warn(device->dev,
+			      "Device hard reset tried %d tries\n", i);
+	}
+
+	/*
+	 * If active_cnt is non-zero then the system was active before
+	 * going into a reset - put it back in that state
+	 */
+
+	if (atomic_read(&device->active_cnt))
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+
+	return ret;
+}
+
+static int copy_prop(struct kgsl_device_getproperty *param,
+		void *src, size_t size)
+{
+	if (copy_to_user(param->value, src,
+		min_t(u32, size, param->sizebytes)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int adreno_prop_device_info(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_devinfo devinfo = {
+		.device_id = device->id + 1,
+		.chip_id = adreno_dev->chipid,
+		.mmu_enabled = kgsl_mmu_has_feature(device, KGSL_MMU_PAGED),
+		.gmem_gpubaseaddr = 0,
+		.gmem_sizebytes = adreno_dev->gpucore->gmem_size,
+	};
+
+	return copy_prop(param, &devinfo, sizeof(devinfo));
+}
+
+static int adreno_prop_gpu_model(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	struct kgsl_gpu_model model = {0};
+
+	strscpy(model.gpu_model, adreno_get_gpu_model(device),
+			sizeof(model.gpu_model));
+
+	return copy_prop(param, &model, sizeof(model));
+}
+
+static int adreno_prop_device_shadow(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	struct kgsl_shadowprop shadowprop = { 0 };
+
+	if (device->memstore->hostptr) {
+		/* Pass a dummy address to identify memstore */
+		shadowprop.gpuaddr =  KGSL_MEMSTORE_TOKEN_ADDRESS;
+		shadowprop.size = device->memstore->size;
+
+		shadowprop.flags = KGSL_FLAGS_INITIALIZED |
+			KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS;
+	}
+
+	return copy_prop(param, &shadowprop, sizeof(shadowprop));
+}
+
+static int adreno_prop_device_qdss_stm(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	struct kgsl_qdss_stm_prop qdssprop = {0};
+
+	if (!IS_ERR_OR_NULL(device->qdss_desc)) {
+		qdssprop.gpuaddr = device->qdss_desc->gpuaddr;
+		qdssprop.size = device->qdss_desc->size;
+	}
+
+	return copy_prop(param, &qdssprop, sizeof(qdssprop));
+}
+
+static int adreno_prop_device_qtimer(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	struct kgsl_qtimer_prop qtimerprop = {0};
+
+	if (!IS_ERR_OR_NULL(device->qtimer_desc)) {
+		qtimerprop.gpuaddr = device->qtimer_desc->gpuaddr;
+		qtimerprop.size = device->qtimer_desc->size;
+	}
+
+	return copy_prop(param, &qtimerprop, sizeof(qtimerprop));
+}
+
+static int adreno_prop_s32(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	int val = 0;
+
+	if (param->type == KGSL_PROP_MMU_ENABLE)
+		val = kgsl_mmu_has_feature(device, KGSL_MMU_PAGED);
+	else if (param->type == KGSL_PROP_INTERRUPT_WAITS)
+		val = 1;
+
+	return copy_prop(param, &val, sizeof(val));
+}
+
+static int adreno_prop_uche_gmem_addr(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	return copy_prop(param, &adreno_dev->uche_gmem_base,
+		sizeof(adreno_dev->uche_gmem_base));
+}
+
+static int adreno_prop_ucode_version(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_ucode_version ucode = {
+		.pfp = adreno_dev->fw[ADRENO_FW_PFP].version,
+		.pm4 = adreno_dev->fw[ADRENO_FW_PM4].version,
+	};
+
+	return copy_prop(param, &ucode, sizeof(ucode));
+}
+
+static int adreno_prop_gaming_bin(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	void *buf;
+	size_t len;
+	int ret;
+	struct nvmem_cell *cell;
+
+	cell = nvmem_cell_get(&device->pdev->dev, "gaming_bin");
+	if (IS_ERR(cell))
+		return -EINVAL;
+
+	buf = nvmem_cell_read(cell, &len);
+	nvmem_cell_put(cell);
+
+	if (!IS_ERR(buf)) {
+		ret = copy_prop(param, buf, len);
+		kfree(buf);
+		return ret;
+	}
+
+	dev_err(device->dev, "failed to read gaming_bin nvmem cell\n");
+	return -EINVAL;
+}
+
+static int adreno_prop_u32(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u32 val = 0;
+
+	if (param->type == KGSL_PROP_HIGHEST_BANK_BIT) {
+		val = adreno_dev->highest_bank_bit;
+	} else if (param->type == KGSL_PROP_MIN_ACCESS_LENGTH)
+		of_property_read_u32(device->pdev->dev.of_node,
+			"qcom,min-access-length", &val);
+	else if (param->type == KGSL_PROP_UBWC_MODE)
+		of_property_read_u32(device->pdev->dev.of_node,
+			"qcom,ubwc-mode", &val);
+	else if (param->type == KGSL_PROP_DEVICE_BITNESS)
+		val = adreno_support_64bit(adreno_dev) ? 48 : 32;
+	else if (param->type == KGSL_PROP_SPEED_BIN)
+		val = device->speed_bin;
+	else if (param->type == KGSL_PROP_VK_DEVICE_ID)
+		val = adreno_get_vk_device_id(device);
+	else if (param->type == KGSL_PROP_IS_LPAC_ENABLED)
+		val = adreno_dev->lpac_enabled ? 1 : 0;
+	else if (param->type == KGSL_PROP_IS_RAYTRACING_ENABLED)
+		val =  adreno_dev->raytracing_enabled ? 1 : 0;
+	else if (param->type == KGSL_PROP_IS_FASTBLEND_ENABLED)
+		val = adreno_dev->fastblend_enabled ? 1 : 0;
+	else if (param->type == KGSL_PROP_IS_AQE_ENABLED)
+		val = ADRENO_FEATURE(adreno_dev, ADRENO_AQE) ? 1 : 0;
+
+	return copy_prop(param, &val, sizeof(val));
+}
+
+static int adreno_prop_uche_trap_base(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	u64 val = 0;
+
+	if (!gpudev->get_uche_trap_base)
+		return -EINVAL;
+
+	val = gpudev->get_uche_trap_base();
+
+	return copy_prop(param, &val, sizeof(val));
+}
+
+static const struct {
+	int type;
+	int (*func)(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param);
+} adreno_property_funcs[] = {
+	{ KGSL_PROP_DEVICE_INFO, adreno_prop_device_info },
+	{ KGSL_PROP_DEVICE_SHADOW, adreno_prop_device_shadow },
+	{ KGSL_PROP_DEVICE_QDSS_STM, adreno_prop_device_qdss_stm },
+	{ KGSL_PROP_DEVICE_QTIMER, adreno_prop_device_qtimer },
+	{ KGSL_PROP_MMU_ENABLE, adreno_prop_s32 },
+	{ KGSL_PROP_INTERRUPT_WAITS, adreno_prop_s32 },
+	{ KGSL_PROP_UCHE_GMEM_VADDR, adreno_prop_uche_gmem_addr },
+	{ KGSL_PROP_UCODE_VERSION, adreno_prop_ucode_version },
+	{ KGSL_PROP_HIGHEST_BANK_BIT, adreno_prop_u32 },
+	{ KGSL_PROP_MIN_ACCESS_LENGTH, adreno_prop_u32 },
+	{ KGSL_PROP_UBWC_MODE, adreno_prop_u32 },
+	{ KGSL_PROP_DEVICE_BITNESS, adreno_prop_u32 },
+	{ KGSL_PROP_SPEED_BIN, adreno_prop_u32 },
+	{ KGSL_PROP_GAMING_BIN, adreno_prop_gaming_bin },
+	{ KGSL_PROP_GPU_MODEL, adreno_prop_gpu_model},
+	{ KGSL_PROP_VK_DEVICE_ID, adreno_prop_u32},
+	{ KGSL_PROP_IS_LPAC_ENABLED, adreno_prop_u32 },
+	{ KGSL_PROP_IS_RAYTRACING_ENABLED, adreno_prop_u32},
+	{ KGSL_PROP_IS_FASTBLEND_ENABLED, adreno_prop_u32},
+	{ KGSL_PROP_UCHE_TRAP_BASE, adreno_prop_uche_trap_base },
+	{ KGSL_PROP_IS_AQE_ENABLED, adreno_prop_u32 },
+};
+
+static int adreno_getproperty(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(adreno_property_funcs); i++) {
+		if (param->type == adreno_property_funcs[i].type)
+			return adreno_property_funcs[i].func(device, param);
+	}
+
+	return -ENODEV;
+}
+
+static int adreno_query_property_list(struct kgsl_device *device, u32 *list,
+		u32 count)
+{
+	int i;
+
+	if (!list)
+		return ARRAY_SIZE(adreno_property_funcs);
+
+	for (i = 0; i < count && i < ARRAY_SIZE(adreno_property_funcs); i++)
+		list[i] = adreno_property_funcs[i].type;
+
+	return i;
+}
+
+int adreno_set_constraint(struct kgsl_device *device,
+				struct kgsl_context *context,
+				struct kgsl_device_constraint *constraint)
+{
+	int status = 0;
+
+	switch (constraint->type) {
+	case KGSL_CONSTRAINT_PWRLEVEL: {
+		struct kgsl_device_constraint_pwrlevel pwr;
+
+		if (constraint->size != sizeof(pwr)) {
+			status = -EINVAL;
+			break;
+		}
+
+		if (copy_from_user(&pwr,
+				(void __user *)constraint->data,
+				sizeof(pwr))) {
+			status = -EFAULT;
+			break;
+		}
+		if (pwr.level >= KGSL_CONSTRAINT_PWR_MAXLEVELS) {
+			status = -EINVAL;
+			break;
+		}
+
+		context->pwr_constraint.type =
+				KGSL_CONSTRAINT_PWRLEVEL;
+		context->pwr_constraint.sub_type = pwr.level;
+		trace_kgsl_user_pwrlevel_constraint(device,
+			context->id,
+			context->pwr_constraint.type,
+			context->pwr_constraint.sub_type);
+		}
+		break;
+	case KGSL_CONSTRAINT_NONE:
+		if (context->pwr_constraint.type == KGSL_CONSTRAINT_PWRLEVEL)
+			trace_kgsl_user_pwrlevel_constraint(device,
+				context->id,
+				KGSL_CONSTRAINT_NONE,
+				context->pwr_constraint.sub_type);
+		context->pwr_constraint.type = KGSL_CONSTRAINT_NONE;
+		break;
+	case KGSL_CONSTRAINT_L3_PWRLEVEL: {
+		struct kgsl_device_constraint_pwrlevel pwr;
+
+		if (constraint->size != sizeof(pwr)) {
+			status = -EINVAL;
+			break;
+		}
+
+		if (copy_from_user(&pwr, constraint->data, sizeof(pwr))) {
+			status = -EFAULT;
+			break;
+		}
+
+		status = register_l3_voter(device);
+		if (status)
+			break;
+
+		if (pwr.level >= KGSL_CONSTRAINT_PWR_MAXLEVELS)
+			pwr.level = KGSL_CONSTRAINT_PWR_MAXLEVELS - 1;
+
+		context->l3_pwr_constraint.type = KGSL_CONSTRAINT_L3_PWRLEVEL;
+		context->l3_pwr_constraint.sub_type = pwr.level;
+		trace_kgsl_user_pwrlevel_constraint(device, context->id,
+			context->l3_pwr_constraint.type,
+			context->l3_pwr_constraint.sub_type);
+		}
+		break;
+	case KGSL_CONSTRAINT_L3_NONE: {
+		unsigned int type = context->l3_pwr_constraint.type;
+
+		if (type == KGSL_CONSTRAINT_L3_PWRLEVEL)
+			trace_kgsl_user_pwrlevel_constraint(device, context->id,
+				KGSL_CONSTRAINT_L3_NONE,
+				context->l3_pwr_constraint.sub_type);
+		context->l3_pwr_constraint.type = KGSL_CONSTRAINT_L3_NONE;
+		}
+		break;
+	default:
+		status = -EINVAL;
+		break;
+	}
+
+	/* If a new constraint has been set for a context, cancel the old one */
+	if ((status == 0) &&
+		(context->id == device->pwrctrl.constraint.owner_id)) {
+		trace_kgsl_constraint(device, device->pwrctrl.constraint.type,
+					device->pwrctrl.active_pwrlevel, 0);
+		device->pwrctrl.constraint.type = KGSL_CONSTRAINT_NONE;
+	}
+
+	return status;
+}
+
+static int adreno_setproperty(struct kgsl_device_private *dev_priv,
+				unsigned int type,
+				void __user *value,
+				unsigned int sizebytes)
+{
+	int status = -EINVAL;
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	switch (type) {
+	case KGSL_PROP_PWR_CONSTRAINT:
+	case KGSL_PROP_L3_PWR_CONSTRAINT: {
+			struct kgsl_device_constraint constraint;
+			struct kgsl_context *context;
+
+			if (sizebytes != sizeof(constraint))
+				break;
+
+			if (copy_from_user(&constraint, value,
+				sizeof(constraint))) {
+				status = -EFAULT;
+				break;
+			}
+
+			context = kgsl_context_get_owner(dev_priv,
+							constraint.context_id);
+
+			if (context == NULL)
+				break;
+
+			status = adreno_set_constraint(device, context,
+								&constraint);
+
+			kgsl_context_put(context);
+		}
+		break;
+	default:
+		status = gpudev->setproperty(dev_priv, type, value, sizebytes);
+		break;
+	}
+
+	return status;
+}
+
+/*
+ * adreno_soft_reset -  Do a soft reset of the GPU hardware
+ * @device: KGSL device to soft reset
+ *
+ * "soft reset" the GPU hardware - this is a fast path GPU reset
+ * The GPU hardware is reset but we never pull power so we can skip
+ * a lot of the standard adreno_stop/adreno_start sequence
+ */
+static int adreno_soft_reset(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ret;
+
+	/*
+	 * Don't allow a soft reset for a304 because the SMMU needs to be hard
+	 * reset
+	 */
+	if (adreno_is_a304(adreno_dev))
+		return -ENODEV;
+
+	if (gpudev->clear_pending_transactions) {
+		ret = gpudev->clear_pending_transactions(adreno_dev);
+		if (ret)
+			return ret;
+	}
+
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE);
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 0);
+
+	adreno_clear_gpu_fault(adreno_dev);
+	/* since device is oficially off now clear start bit */
+	clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+	/* save physical performance counter values before GPU soft reset */
+	adreno_perfcounter_save(adreno_dev);
+
+	_soft_reset(adreno_dev);
+
+	/* Clear the busy_data stats - we're starting over from scratch */
+	adreno_dev->busy_data.gpu_busy = 0;
+	adreno_dev->busy_data.bif_ram_cycles = 0;
+	adreno_dev->busy_data.bif_ram_cycles_read_ch1 = 0;
+	adreno_dev->busy_data.bif_ram_cycles_write_ch0 = 0;
+	adreno_dev->busy_data.bif_ram_cycles_write_ch1 = 0;
+	adreno_dev->busy_data.bif_starved_ram = 0;
+	adreno_dev->busy_data.bif_starved_ram_ch1 = 0;
+
+	/* Reinitialize the GPU */
+	gpudev->start(adreno_dev);
+
+	/* Re-initialize the coresight registers if applicable */
+	adreno_coresight_start(adreno_dev);
+
+	/* Enable IRQ */
+	adreno_irqctrl(adreno_dev, 1);
+
+	/* stop all ringbuffers to cancel RB events */
+	adreno_ringbuffer_stop(adreno_dev);
+
+	/* Start the ringbuffer(s) again */
+	ret = gpudev->rb_start(adreno_dev);
+	if (ret == 0) {
+		device->reset_counter++;
+		set_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+	}
+
+	/* Restore physical performance counter values after soft reset */
+	adreno_perfcounter_restore(adreno_dev);
+
+	if (ret)
+		dev_err(device->dev, "Device soft reset failed: %d\n", ret);
+
+	return ret;
+}
+
+bool adreno_isidle(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb;
+	int i;
+
+	if (!kgsl_state_is_awake(KGSL_DEVICE(adreno_dev)))
+		return true;
+
+	/*
+	 * wptr is updated when we add commands to ringbuffer, add a barrier
+	 * to make sure updated wptr is compared to rptr
+	 */
+	smp_mb();
+
+	/*
+	 * ringbuffer is truly idle when all ringbuffers read and write
+	 * pointers are equal
+	 */
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		if (!adreno_rb_empty(rb))
+			return false;
+	}
+
+	return gpudev->hw_isidle(adreno_dev);
+}
+
+/**
+ * adreno_spin_idle() - Spin wait for the GPU to idle
+ * @adreno_dev: Pointer to an adreno device
+ * @timeout: milliseconds to wait before returning error
+ *
+ * Spin the CPU waiting for the RBBM status to return idle
+ */
+int adreno_spin_idle(struct adreno_device *adreno_dev, unsigned int timeout)
+{
+	unsigned long wait = jiffies + msecs_to_jiffies(timeout);
+
+	do {
+		/*
+		 * If we fault, stop waiting and return an error. The dispatcher
+		 * will clean up the fault from the work queue, but we need to
+		 * make sure we don't block it by waiting for an idle that
+		 * will never come.
+		 */
+
+		if (adreno_gpu_fault(adreno_dev) != 0)
+			return -EDEADLK;
+
+		if (adreno_isidle(adreno_dev))
+			return 0;
+
+	} while (time_before(jiffies, wait));
+
+	/*
+	 * Under rare conditions, preemption can cause the while loop to exit
+	 * without checking if the gpu is idle. check one last time before we
+	 * return failure.
+	 */
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return -EDEADLK;
+
+	if (adreno_isidle(adreno_dev))
+		return 0;
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * adreno_idle() - wait for the GPU hardware to go idle
+ * @device: Pointer to the KGSL device structure for the GPU
+ *
+ * Wait up to ADRENO_IDLE_TIMEOUT milliseconds for the GPU hardware to go quiet.
+ * Caller must hold the device mutex, and must not hold the dispatcher mutex.
+ */
+
+int adreno_idle(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret;
+
+	/*
+	 * Make sure the device mutex is held so the dispatcher can't send any
+	 * more commands to the hardware
+	 */
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EDEADLK;
+
+	/* Check if we are already idle before idling dispatcher */
+	if (adreno_isidle(adreno_dev))
+		return 0;
+	/*
+	 * Wait for dispatcher to finish completing commands
+	 * already submitted
+	 */
+	ret = adreno_dispatcher_idle(adreno_dev);
+	if (ret)
+		return ret;
+
+	return adreno_spin_idle(adreno_dev, ADRENO_IDLE_TIMEOUT);
+}
+
+static int adreno_drain_and_idle(struct kgsl_device *device)
+{
+	int ret;
+
+	reinit_completion(&device->halt_gate);
+
+	ret = kgsl_active_count_wait(device, 0, HZ);
+	if (ret)
+		return ret;
+
+	return adreno_idle(device);
+}
+
+/* Caller must hold the device mutex. */
+int adreno_suspend_context(struct kgsl_device *device)
+{
+	/* process any profiling results that are available */
+	adreno_profile_process_results(ADRENO_DEVICE(device));
+
+	/* Wait for the device to go idle */
+	return adreno_idle(device);
+}
+
+void adreno_cx_misc_regread(struct adreno_device *adreno_dev,
+	unsigned int offsetwords, unsigned int *value)
+{
+	unsigned int cx_misc_offset;
+
+	WARN_ONCE(!adreno_dev->cx_misc_virt,
+		  "cx_misc region is not defined in device tree");
+
+	cx_misc_offset = (offsetwords << 2);
+	if (!adreno_dev->cx_misc_virt ||
+		(cx_misc_offset >= adreno_dev->cx_misc_len))
+		return;
+
+	*value = __raw_readl(adreno_dev->cx_misc_virt + cx_misc_offset);
+
+	/*
+	 * ensure this read finishes before the next one.
+	 * i.e. act like normal readl()
+	 */
+	rmb();
+}
+
+void adreno_isense_regread(struct adreno_device *adreno_dev,
+	unsigned int offsetwords, unsigned int *value)
+{
+	unsigned int isense_offset;
+
+	isense_offset = (offsetwords << 2);
+	if (!adreno_dev->isense_virt ||
+		(isense_offset >= adreno_dev->isense_len))
+		return;
+
+	*value =  __raw_readl(adreno_dev->isense_virt + isense_offset);
+
+	/*
+	 * ensure this read finishes before the next one.
+	 * i.e. act like normal readl()
+	 */
+	rmb();
+}
+
+bool adreno_gx_is_on(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	return gpudev->gx_is_on(adreno_dev);
+}
+
+void adreno_cx_misc_regwrite(struct adreno_device *adreno_dev,
+	unsigned int offsetwords, unsigned int value)
+{
+	unsigned int cx_misc_offset;
+
+	WARN_ONCE(!adreno_dev->cx_misc_virt,
+		  "cx_misc region is not defined in device tree");
+
+	cx_misc_offset = (offsetwords << 2);
+	if (!adreno_dev->cx_misc_virt ||
+		(cx_misc_offset >= adreno_dev->cx_misc_len))
+		return;
+
+	/*
+	 * ensure previous writes post before this one,
+	 * i.e. act like normal writel()
+	 */
+	wmb();
+	__raw_writel(value, adreno_dev->cx_misc_virt + cx_misc_offset);
+}
+
+void adreno_cx_misc_regrmw(struct adreno_device *adreno_dev,
+		unsigned int offsetwords,
+		unsigned int mask, unsigned int bits)
+{
+	unsigned int val = 0;
+
+	adreno_cx_misc_regread(adreno_dev, offsetwords, &val);
+	val &= ~mask;
+	adreno_cx_misc_regwrite(adreno_dev, offsetwords, val | bits);
+}
+
+void adreno_profile_submit_time(struct adreno_submit_time *time)
+{
+	struct kgsl_drawobj *drawobj;
+	struct kgsl_drawobj_cmd *cmdobj;
+	struct kgsl_mem_entry *entry;
+	struct kgsl_drawobj_profiling_buffer *profile_buffer;
+
+	if (!time)
+		return;
+
+	drawobj = time->drawobj;
+	if (drawobj == NULL)
+		return;
+
+	cmdobj = CMDOBJ(drawobj);
+	entry = cmdobj->profiling_buf_entry;
+	if (!entry)
+		return;
+
+	profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc,
+			cmdobj->profiling_buffer_gpuaddr);
+
+	if (profile_buffer == NULL)
+		return;
+
+	/* Return kernel clock time to the client if requested */
+	if (drawobj->flags & KGSL_DRAWOBJ_PROFILING_KTIME) {
+		u64 secs = time->ktime;
+
+		profile_buffer->wall_clock_ns =
+			do_div(secs, NSEC_PER_SEC);
+		profile_buffer->wall_clock_s = secs;
+	} else {
+		profile_buffer->wall_clock_s = time->utime.tv_sec;
+		profile_buffer->wall_clock_ns = time->utime.tv_nsec;
+	}
+
+	profile_buffer->gpu_ticks_queued = time->ticks;
+
+	kgsl_memdesc_unmap(&entry->memdesc);
+}
+
+/**
+ * adreno_waittimestamp - sleep while waiting for the specified timestamp
+ * @device - pointer to a KGSL device structure
+ * @context - pointer to the active kgsl context
+ * @timestamp - GPU timestamp to wait for
+ * @msecs - amount of time to wait (in milliseconds)
+ *
+ * Wait up to 'msecs' milliseconds for the specified timestamp to expire.
+ */
+static int adreno_waittimestamp(struct kgsl_device *device,
+		struct kgsl_context *context,
+		unsigned int timestamp,
+		unsigned int msecs)
+{
+	int ret;
+
+	if (context == NULL) {
+		/* If they are doing then complain once */
+		dev_WARN_ONCE(device->dev, 1,
+			"IOCTL_KGSL_DEVICE_WAITTIMESTAMP is deprecated\n");
+		return -ENOTTY;
+	}
+
+	/* Return -ENOENT if the context has been detached */
+	if (kgsl_context_detached(context))
+		return -ENOENT;
+
+	ret = adreno_drawctxt_wait(ADRENO_DEVICE(device), context,
+		timestamp, msecs);
+
+	/* If the context got invalidated then return a specific error */
+	if (kgsl_context_invalid(context))
+		ret = -EDEADLK;
+
+	/*
+	 * Return -EPROTO if the device has faulted since the last time we
+	 * checked.  Userspace uses this as a marker for performing post
+	 * fault activities
+	 */
+
+	if (!ret && test_and_clear_bit(ADRENO_CONTEXT_FAULT, &context->priv))
+		ret = -EPROTO;
+
+	return ret;
+}
+
+/**
+ * __adreno_readtimestamp() - Reads the timestamp from memstore memory
+ * @adreno_dev: Pointer to an adreno device
+ * @index: Index into the memstore memory
+ * @type: Type of timestamp to read
+ * @timestamp: The out parameter where the timestamp is read
+ */
+static int __adreno_readtimestamp(struct adreno_device *adreno_dev, int index,
+				int type, unsigned int *timestamp)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int status = 0;
+
+	switch (type) {
+	case KGSL_TIMESTAMP_CONSUMED:
+		kgsl_sharedmem_readl(device->memstore, timestamp,
+			KGSL_MEMSTORE_OFFSET(index, soptimestamp));
+		break;
+	case KGSL_TIMESTAMP_RETIRED:
+		kgsl_sharedmem_readl(device->memstore, timestamp,
+			KGSL_MEMSTORE_OFFSET(index, eoptimestamp));
+		break;
+	default:
+		status = -EINVAL;
+		*timestamp = 0;
+		break;
+	}
+	return status;
+}
+
+/**
+ * adreno_rb_readtimestamp(): Return the value of given type of timestamp
+ * for a RB
+ * @adreno_dev: adreno device whose timestamp values are being queried
+ * @priv: The object being queried for a timestamp (expected to be a rb pointer)
+ * @type: The type of timestamp (one of 3) to be read
+ * @timestamp: Pointer to where the read timestamp is to be written to
+ *
+ * CONSUMED and RETIRED type timestamps are sorted by id and are constantly
+ * updated by the GPU through shared memstore memory. QUEUED type timestamps
+ * are read directly from context struct.
+
+ * The function returns 0 on success and timestamp value at the *timestamp
+ * address and returns -EINVAL on any read error/invalid type and timestamp = 0.
+ */
+int adreno_rb_readtimestamp(struct adreno_device *adreno_dev,
+		void *priv, enum kgsl_timestamp_type type,
+		unsigned int *timestamp)
+{
+	int status = 0;
+	struct adreno_ringbuffer *rb = priv;
+
+	if (type == KGSL_TIMESTAMP_QUEUED)
+		*timestamp = rb->timestamp;
+	else
+		status = __adreno_readtimestamp(adreno_dev,
+				rb->id + KGSL_MEMSTORE_MAX,
+				type, timestamp);
+
+	return status;
+}
+
+/**
+ * adreno_readtimestamp(): Return the value of given type of timestamp
+ * @device: GPU device whose timestamp values are being queried
+ * @priv: The object being queried for a timestamp (expected to be a context)
+ * @type: The type of timestamp (one of 3) to be read
+ * @timestamp: Pointer to where the read timestamp is to be written to
+ *
+ * CONSUMED and RETIRED type timestamps are sorted by id and are constantly
+ * updated by the GPU through shared memstore memory. QUEUED type timestamps
+ * are read directly from context struct.
+
+ * The function returns 0 on success and timestamp value at the *timestamp
+ * address and returns -EINVAL on any read error/invalid type and timestamp = 0.
+ */
+static int adreno_readtimestamp(struct kgsl_device *device,
+		void *priv, enum kgsl_timestamp_type type,
+		unsigned int *timestamp)
+{
+	int status = 0;
+	struct kgsl_context *context = priv;
+
+	if (type == KGSL_TIMESTAMP_QUEUED) {
+		struct adreno_context *ctxt = ADRENO_CONTEXT(context);
+
+		*timestamp = ctxt->timestamp;
+	} else
+		status = __adreno_readtimestamp(ADRENO_DEVICE(device),
+				context->id, type, timestamp);
+
+	return status;
+}
+
+/**
+ * adreno_device_private_create(): Allocate an adreno_device_private structure
+ */
+static struct kgsl_device_private *adreno_device_private_create(void)
+{
+	struct adreno_device_private *adreno_priv =
+			kzalloc(sizeof(*adreno_priv), GFP_KERNEL);
+
+	if (adreno_priv) {
+		INIT_LIST_HEAD(&adreno_priv->perfcounter_list);
+		return &adreno_priv->dev_priv;
+	}
+	return NULL;
+}
+
+/**
+ * adreno_device_private_destroy(): Destroy an adreno_device_private structure
+ * and release the perfcounters held by the kgsl fd.
+ * @dev_priv: The kgsl device private structure
+ */
+static void adreno_device_private_destroy(struct kgsl_device_private *dev_priv)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_device_private *adreno_priv =
+		container_of(dev_priv, struct adreno_device_private,
+		dev_priv);
+	struct adreno_perfcounter_list_node *p, *tmp;
+
+	mutex_lock(&device->mutex);
+	list_for_each_entry_safe(p, tmp, &adreno_priv->perfcounter_list, node) {
+		adreno_perfcounter_put(adreno_dev, p->groupid,
+					p->countable, PERFCOUNTER_FLAG_NONE);
+		list_del(&p->node);
+		kfree(p);
+	}
+	mutex_unlock(&device->mutex);
+
+	kfree(adreno_priv);
+}
+
+/**
+ * adreno_power_stats() - Reads the counters needed for freq decisions
+ * @device: Pointer to device whose counters are read
+ * @stats: Pointer to stats set that needs updating
+ * Power: The caller is expected to be in a clock enabled state as this
+ * function does reg reads
+ */
+static void adreno_power_stats(struct kgsl_device *device,
+				struct kgsl_power_stats *stats)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	return gpudev->power_stats(adreno_dev, stats);
+}
+
+static int adreno_regulator_enable(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->regulator_enable)
+		return gpudev->regulator_enable(adreno_dev);
+
+	return 0;
+}
+
+static bool adreno_is_hw_collapsible(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (!gpudev->is_hw_collapsible(adreno_dev))
+		return false;
+
+	if (gpudev->clear_pending_transactions(adreno_dev))
+		return false;
+
+	adreno_dispatcher_stop_fault_timer(device);
+
+	return true;
+}
+
+static void adreno_regulator_disable(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->regulator_disable)
+		gpudev->regulator_disable(adreno_dev);
+}
+
+static void adreno_pwrlevel_change_settings(struct kgsl_device *device,
+		unsigned int prelevel, unsigned int postlevel, bool post)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->pwrlevel_change_settings)
+		gpudev->pwrlevel_change_settings(adreno_dev, prelevel,
+					postlevel, post);
+}
+
+static bool adreno_is_hwcg_on(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	return adreno_dev->hwcg_enabled;
+}
+
+static int adreno_queue_cmds(struct kgsl_device_private *dev_priv,
+	struct kgsl_context *context, struct kgsl_drawobj *drawobj[],
+	u32 count, u32 *timestamp)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (WARN_ON(!adreno_dev->dispatch_ops || !adreno_dev->dispatch_ops->queue_cmds))
+		return -ENODEV;
+
+	return adreno_dev->dispatch_ops->queue_cmds(dev_priv, context, drawobj,
+		count, timestamp);
+}
+
+static inline bool _verify_ib(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, struct kgsl_memobj_node *ib)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+
+	/* The maximum allowable size for an IB in the CP is 0xFFFFF dwords */
+	if (ib->size == 0 || ((ib->size >> 2) > 0xFFFFF)) {
+		pr_context(device, context, "ctxt %u invalid ib size %lld\n",
+			context->id, ib->size);
+		return false;
+	}
+
+	/* Make sure that the address is in range and dword aligned */
+	if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr,
+		ib->size) || !IS_ALIGNED(ib->gpuaddr, 4)) {
+		pr_context(device, context, "ctxt %u invalid ib gpuaddr %llX\n",
+			context->id, ib->gpuaddr);
+		return false;
+	}
+
+	return true;
+}
+
+int adreno_verify_cmdobj(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, struct kgsl_drawobj *drawobj[],
+		uint32_t count)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_memobj_node *ib;
+	unsigned int i;
+
+	for (i = 0; i < count; i++) {
+		/* Verify the IBs before they get queued */
+		if (drawobj[i]->type == CMDOBJ_TYPE) {
+			struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj[i]);
+
+			list_for_each_entry(ib, &cmdobj->cmdlist, node)
+				if (!_verify_ib(dev_priv,
+					&ADRENO_CONTEXT(context)->base, ib))
+					return -EINVAL;
+
+			/*
+			 * Clear the wake on touch bit to indicate an IB has
+			 * been submitted since the last time we set it.
+			 * But only clear it when we have rendering commands.
+			 */
+			device->pwrctrl.wake_on_touch = false;
+		}
+
+		/* A3XX does not have support for drawobj profiling */
+		if (adreno_is_a3xx(ADRENO_DEVICE(device)) &&
+			(drawobj[i]->flags & KGSL_DRAWOBJ_PROFILING))
+			return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int adreno_queue_recurring_cmd(struct kgsl_device_private *dev_priv,
+	struct kgsl_context *context, struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj);
+	int ret;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LSR))
+		return -EOPNOTSUPP;
+
+	if (!gpudev->send_recurring_cmdobj)
+		return -ENODEV;
+
+	ret = adreno_verify_cmdobj(dev_priv, context, &drawobj, 1);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+
+	/* Only one recurring command allowed */
+	if (hwsched->recurring_cmdobj) {
+		mutex_unlock(&device->mutex);
+		return -EINVAL;
+	}
+
+	ret = kgsl_check_context_state(context);
+	if (ret) {
+		mutex_unlock(&device->mutex);
+		return ret;
+	}
+
+	set_bit(CMDOBJ_RECURRING_START, &cmdobj->priv);
+
+	ret = gpudev->send_recurring_cmdobj(adreno_dev, cmdobj);
+	mutex_unlock(&device->mutex);
+
+	if (!ret)
+		srcu_notifier_call_chain(&device->nh, GPU_GMU_READY, NULL);
+
+	return ret;
+}
+
+static int adreno_dequeue_recurring_cmd(struct kgsl_device *device,
+	struct kgsl_context *context)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_drawobj *recurring_drawobj;
+	int ret;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LSR))
+		return -EOPNOTSUPP;
+
+	if (!gpudev->send_recurring_cmdobj)
+		return -ENODEV;
+
+	mutex_lock(&device->mutex);
+
+	/* We can safely return here as recurring wokload is already untracked */
+	if (hwsched->recurring_cmdobj == NULL) {
+		mutex_unlock(&device->mutex);
+		return -EINVAL;
+	}
+
+	recurring_drawobj = DRAWOBJ(hwsched->recurring_cmdobj);
+
+	/* Check if the recurring command is for same context or not*/
+	if (recurring_drawobj->context != context) {
+		mutex_unlock(&device->mutex);
+		return -EINVAL;
+	}
+
+	ret = kgsl_check_context_state(context);
+	if (ret) {
+		mutex_unlock(&device->mutex);
+		return ret;
+	}
+
+	clear_bit(CMDOBJ_RECURRING_START, &hwsched->recurring_cmdobj->priv);
+	set_bit(CMDOBJ_RECURRING_STOP, &hwsched->recurring_cmdobj->priv);
+
+	ret = gpudev->send_recurring_cmdobj(adreno_dev, hwsched->recurring_cmdobj);
+
+	mutex_unlock(&device->mutex);
+
+	if (!ret)
+		srcu_notifier_call_chain(&device->nh, GPU_GMU_STOP, NULL);
+
+	return ret;
+}
+
+static void adreno_set_isdb_breakpoint_registers(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->set_isdb_breakpoint_registers)
+		gpudev->set_isdb_breakpoint_registers(adreno_dev);
+}
+
+static void adreno_drawctxt_sched(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (WARN_ON(!adreno_dev->dispatch_ops || !adreno_dev->dispatch_ops->queue_context))
+		return;
+
+	adreno_dev->dispatch_ops->queue_context(adreno_dev,
+		ADRENO_CONTEXT(context));
+}
+
+void adreno_mark_for_coldboot(struct adreno_device *adreno_dev)
+{
+	if (!adreno_dev->warmboot_enabled)
+		return;
+
+	set_bit(ADRENO_DEVICE_FORCE_COLDBOOT, &adreno_dev->priv);
+}
+
+bool adreno_smmu_is_stalled(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_mmu *mmu = &device->mmu;
+	u32 fault, val;
+
+	/*
+	 * RBBM_STATUS3:SMMU_STALLED_ON_FAULT (BIT 24) to tells if GPU
+	 * encoutnered a pagefault. Gen8 page fault status checked from
+	 * the software condition as RBBM_STATS3 is not available.
+	 */
+	if (ADRENO_GPUREV(adreno_dev) < 0x080000) {
+		adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS3, &val);
+		return (val & BIT(24));
+	}
+
+	if (WARN_ON(!adreno_dev->dispatch_ops || !adreno_dev->dispatch_ops->get_fault))
+		return false;
+
+	fault = adreno_dev->dispatch_ops->get_fault(adreno_dev);
+
+	return ((fault & ADRENO_IOMMU_PAGE_FAULT) &&
+		test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &mmu->pfpolicy)) ? true : false;
+}
+
+int adreno_power_cycle(struct adreno_device *adreno_dev,
+	void (*callback)(struct adreno_device *adreno_dev, void *priv),
+	void *priv)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev);
+	int ret;
+
+	mutex_lock(&device->mutex);
+	ret = ops->pm_suspend(adreno_dev);
+
+	if (!ret) {
+		callback(adreno_dev, priv);
+		adreno_mark_for_coldboot(adreno_dev);
+		ops->pm_resume(adreno_dev);
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return ret;
+}
+
+struct cycle_data {
+	void *ptr;
+	void *val;
+};
+
+static void cycle_set_bool(struct adreno_device *adreno_dev, void *priv)
+{
+	struct cycle_data *data = priv;
+
+	*((bool *) data->ptr) = *((bool *) data->val);
+}
+
+int adreno_power_cycle_bool(struct adreno_device *adreno_dev,
+	bool *flag, bool val)
+{
+	struct cycle_data data = { .ptr = flag, .val = &val };
+
+	return adreno_power_cycle(adreno_dev, cycle_set_bool, &data);
+}
+
+static void cycle_set_u32(struct adreno_device *adreno_dev, void *priv)
+{
+	struct cycle_data *data = priv;
+
+	*((u32 *) data->ptr) = *((u32 *) data->val);
+}
+
+int adreno_power_cycle_u32(struct adreno_device *adreno_dev,
+	u32 *flag, u32 val)
+{
+	struct cycle_data data = { .ptr = flag, .val = &val };
+
+	return adreno_power_cycle(adreno_dev, cycle_set_u32, &data);
+}
+
+static int adreno_gpu_clock_set(struct kgsl_device *device, u32 pwrlevel)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwrlevel *pl = &pwr->pwrlevels[pwrlevel];
+	int ret;
+
+	if (ops->gpu_clock_set)
+		return ops->gpu_clock_set(adreno_dev, pwrlevel);
+
+	ret = clk_set_rate(pwr->grp_clks[0], pl->gpu_freq);
+	if (ret)
+		dev_err(device->dev, "GPU clk freq set failure: %d\n", ret);
+
+	return ret;
+}
+
+static int adreno_interconnect_bus_set(struct adreno_device *adreno_dev,
+	int level, u32 ab)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	if ((level == pwr->cur_buslevel) && (ab == pwr->cur_ab))
+		return 0;
+
+	kgsl_icc_set_tag(pwr, level);
+	pwr->cur_buslevel = level;
+	pwr->cur_ab = ab;
+
+	icc_set_bw(pwr->icc_path, MBps_to_icc(ab),
+		kBps_to_icc(pwr->ddr_table[level]));
+
+	trace_kgsl_buslevel(device, pwr->active_pwrlevel, level, ab);
+
+	return 0;
+}
+
+static int adreno_gpu_bus_set(struct kgsl_device *device, int level, u32 ab)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev);
+
+	if (ops->gpu_bus_set)
+		return ops->gpu_bus_set(adreno_dev, level, ab);
+
+	return adreno_interconnect_bus_set(adreno_dev, level, ab);
+}
+
+static void adreno_deassert_gbif_halt(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->deassert_gbif_halt)
+		gpudev->deassert_gbif_halt(adreno_dev);
+}
+
+static void adreno_create_hw_fence(struct kgsl_device *device, struct kgsl_sync_fence *kfence)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (WARN_ON(!adreno_dev->dispatch_ops))
+		return;
+
+	if (adreno_dev->dispatch_ops->create_hw_fence)
+		adreno_dev->dispatch_ops->create_hw_fence(adreno_dev, kfence);
+}
+
+u64 adreno_read_cx_timer(struct adreno_device *adreno_dev)
+{
+	/* Check if the CX timer is initialized */
+	if (!test_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv))
+		return 0;
+
+	/* Since the GPU CX and CPU timers are synchronized return the CPU timer */
+	return arch_timer_read_counter();
+}
+
+static const struct kgsl_functable adreno_functable = {
+	/* Mandatory functions */
+	.suspend_context = adreno_suspend_context,
+	.first_open = adreno_first_open,
+	.start = adreno_start,
+	.stop = adreno_stop,
+	.last_close = adreno_last_close,
+	.getproperty = adreno_getproperty,
+	.getproperty_compat = adreno_getproperty_compat,
+	.waittimestamp = adreno_waittimestamp,
+	.readtimestamp = adreno_readtimestamp,
+	.queue_cmds = adreno_queue_cmds,
+	.ioctl = adreno_ioctl,
+	.compat_ioctl = adreno_compat_ioctl,
+	.power_stats = adreno_power_stats,
+	.snapshot = adreno_snapshot,
+	.drain_and_idle = adreno_drain_and_idle,
+	.device_private_create = adreno_device_private_create,
+	.device_private_destroy = adreno_device_private_destroy,
+	/* Optional functions */
+	.drawctxt_create = adreno_drawctxt_create,
+	.drawctxt_detach = adreno_drawctxt_detach,
+	.drawctxt_destroy = adreno_drawctxt_destroy,
+	.drawctxt_dump = adreno_drawctxt_dump,
+	.setproperty = adreno_setproperty,
+	.setproperty_compat = adreno_setproperty_compat,
+	.drawctxt_sched = adreno_drawctxt_sched,
+	.resume = adreno_dispatcher_start,
+	.regulator_enable = adreno_regulator_enable,
+	.is_hw_collapsible = adreno_is_hw_collapsible,
+	.regulator_disable = adreno_regulator_disable,
+	.pwrlevel_change_settings = adreno_pwrlevel_change_settings,
+	.query_property_list = adreno_query_property_list,
+	.is_hwcg_on = adreno_is_hwcg_on,
+	.gpu_clock_set = adreno_gpu_clock_set,
+	.gpu_bus_set = adreno_gpu_bus_set,
+	.deassert_gbif_halt = adreno_deassert_gbif_halt,
+	.queue_recurring_cmd = adreno_queue_recurring_cmd,
+	.dequeue_recurring_cmd = adreno_dequeue_recurring_cmd,
+	.set_isdb_breakpoint_registers = adreno_set_isdb_breakpoint_registers,
+	.create_hw_fence = adreno_create_hw_fence,
+};
+
+static const struct component_master_ops adreno_ops = {
+	.bind = adreno_bind,
+	.unbind = adreno_unbind,
+};
+
+const struct adreno_power_ops adreno_power_operations = {
+	.first_open = adreno_open,
+	.last_close = adreno_close,
+	.active_count_get = adreno_pwrctrl_active_count_get,
+	.active_count_put = adreno_pwrctrl_active_count_put,
+	.pm_suspend = adreno_suspend,
+	.pm_resume = adreno_resume,
+	.touch_wakeup = adreno_touch_wakeup,
+};
+
+static int _compare_of(struct device *dev, void *data)
+{
+	return (dev->of_node == data);
+}
+
+static void _release_of(struct device *dev, void *data)
+{
+	of_node_put(data);
+}
+
+static void adreno_add_components(struct device *dev,
+		struct component_match **match)
+{
+	struct device_node *node;
+
+	/*
+	 * Add kgsl-smmu, context banks and gmu as components, if supported.
+	 * Master bind (adreno_bind) will be called only once all added
+	 * components are available.
+	 */
+	for_each_matching_node(node, adreno_component_match) {
+		if (!of_device_is_available(node))
+			continue;
+
+		component_match_add_release(dev, match, _release_of, _compare_of, node);
+	}
+}
+
+static int adreno_probe(struct platform_device *pdev)
+{
+	struct component_match *match = NULL;
+
+	adreno_add_components(&pdev->dev, &match);
+
+	if (!match)
+		return -ENODEV;
+
+	return component_master_add_with_match(&pdev->dev,
+			&adreno_ops, match);
+}
+
+static int adreno_remove(struct platform_device *pdev)
+{
+	component_master_del(&pdev->dev, &adreno_ops);
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_QCOM_KGSL_HIBERNATION)
+#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER)
+/*
+ * Issue hyp_assign call to assign non-used internal/userspace secure
+ * buffers to kernel.
+ */
+static int adreno_secure_pt_hibernate(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_process_private *process;
+	struct kgsl_mem_entry *entry;
+	struct kgsl_global_memdesc *md;
+	struct kgsl_memdesc *memdesc;
+	int ret, id;
+
+	read_lock(&kgsl_driver.proclist_lock);
+	list_for_each_entry(process, &kgsl_driver.process_list, list) {
+		idr_for_each_entry(&process->mem_idr, entry, id) {
+			memdesc = &entry->memdesc;
+			if (!kgsl_memdesc_is_secured(memdesc) ||
+				(memdesc->flags & KGSL_MEMFLAGS_USERMEM_ION) ||
+				(memdesc->priv & KGSL_MEMDESC_HYPASSIGNED_HLOS))
+				continue;
+
+			read_unlock(&kgsl_driver.proclist_lock);
+
+			if (kgsl_unlock_sgt(memdesc->sgt))
+				dev_err(device->dev, "kgsl_unlock_sgt failed\n");
+
+			memdesc->priv |= KGSL_MEMDESC_HYPASSIGNED_HLOS;
+
+			read_lock(&kgsl_driver.proclist_lock);
+		}
+	}
+	read_unlock(&kgsl_driver.proclist_lock);
+
+	list_for_each_entry(md, &device->globals, node) {
+		memdesc = &md->memdesc;
+		if (kgsl_memdesc_is_secured(memdesc) &&
+			!(memdesc->priv & KGSL_MEMDESC_HYPASSIGNED_HLOS)) {
+			ret = kgsl_unlock_sgt(memdesc->sgt);
+			if (ret) {
+				dev_err(device->dev, "kgsl_unlock_sgt failed ret %d\n", ret);
+				goto fail;
+			}
+			memdesc->priv |= KGSL_MEMDESC_HYPASSIGNED_HLOS;
+		}
+	}
+
+	return 0;
+
+fail:
+	list_for_each_entry(md, &device->globals, node) {
+		memdesc = &md->memdesc;
+		if (kgsl_memdesc_is_secured(memdesc) &&
+			(memdesc->priv & KGSL_MEMDESC_HYPASSIGNED_HLOS)) {
+			kgsl_lock_sgt(memdesc->sgt, memdesc->size);
+			memdesc->priv &= ~KGSL_MEMDESC_HYPASSIGNED_HLOS;
+		}
+	}
+
+	return -EBUSY;
+}
+
+static int adreno_secure_pt_restore(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_process_private *process;
+	struct kgsl_mem_entry *entry;
+	struct kgsl_memdesc *memdesc;
+	struct kgsl_global_memdesc *md;
+	int ret, id;
+
+	list_for_each_entry(md, &device->globals, node) {
+		memdesc = &md->memdesc;
+		if (kgsl_memdesc_is_secured(memdesc) &&
+			(memdesc->priv & KGSL_MEMDESC_HYPASSIGNED_HLOS)) {
+			ret = kgsl_lock_sgt(memdesc->sgt, memdesc->size);
+			if (ret) {
+				dev_err(device->dev, "kgsl_lock_sgt failed ret %d\n", ret);
+				return ret;
+			}
+			memdesc->priv &= ~KGSL_MEMDESC_HYPASSIGNED_HLOS;
+		}
+	}
+
+	read_lock(&kgsl_driver.proclist_lock);
+	list_for_each_entry(process, &kgsl_driver.process_list, list) {
+		idr_for_each_entry(&process->mem_idr, entry, id) {
+			memdesc = &entry->memdesc;
+			if (!kgsl_memdesc_is_secured(memdesc) ||
+				(memdesc->flags & KGSL_MEMFLAGS_USERMEM_ION) ||
+				!(memdesc->priv & KGSL_MEMDESC_HYPASSIGNED_HLOS))
+				continue;
+
+			read_unlock(&kgsl_driver.proclist_lock);
+
+			ret = kgsl_lock_sgt(memdesc->sgt, memdesc->size);
+			if (ret) {
+				dev_err(device->dev, "kgsl_lock_sgt failed ret %d\n", ret);
+				return ret;
+			}
+			memdesc->priv &= ~KGSL_MEMDESC_HYPASSIGNED_HLOS;
+
+			read_lock(&kgsl_driver.proclist_lock);
+		}
+	}
+	read_unlock(&kgsl_driver.proclist_lock);
+
+	return 0;
+}
+#else
+static int adreno_secure_pt_hibernate(struct adreno_device *adreno_dev)
+{
+	return 0;
+}
+
+static int adreno_secure_pt_restore(struct adreno_device *adreno_dev)
+{
+	return 0;
+}
+#endif /* IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) */
+
+static int adreno_hibernation_suspend(struct device *dev)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+	struct adreno_device *adreno_dev;
+	const struct adreno_power_ops *ops;
+	int status;
+
+	if (!device)
+		return 0;
+
+	adreno_dev = ADRENO_DEVICE(device);
+	ops = ADRENO_POWER_OPS(adreno_dev);
+
+	mutex_lock(&device->mutex);
+
+	status = ops->pm_suspend(adreno_dev);
+	if (status)
+		goto err;
+
+	/*
+	 * When the device enters in hibernation state, the CX will be collapsed causing
+	 * the GPU CX timer to pause. Clear the ADRENO_DEVICE_CX_TIMER_INITIALIZED flag
+	 * to ensure that the CX timer is reseeded during resume.
+	 */
+	clear_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv);
+
+	/*
+	 * Unload zap shader during device hibernation and reload it
+	 * during resume as there is possibility that TZ driver
+	 * is not aware of the hibernation.
+	 */
+	adreno_zap_shader_unload(adreno_dev);
+	status = adreno_secure_pt_hibernate(adreno_dev);
+
+err:
+	mutex_unlock(&device->mutex);
+	return status;
+}
+
+static int adreno_hibernation_resume(struct device *dev)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+	struct kgsl_iommu *iommu;
+	struct kgsl_pwrscale *pwrscale;
+	struct adreno_device *adreno_dev;
+	const struct adreno_power_ops *ops;
+	int ret;
+
+	if (!device)
+		return 0;
+
+	iommu = &device->mmu.iommu;
+	pwrscale = &device->pwrscale;
+	adreno_dev = ADRENO_DEVICE(device);
+	ops = ADRENO_POWER_OPS(adreno_dev);
+
+	mutex_lock(&device->mutex);
+
+	ret = adreno_secure_pt_restore(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = kgsl_set_smmu_aperture(device, &iommu->user_context);
+	if (ret)
+		goto err;
+
+	ret = kgsl_set_smmu_lpac_aperture(device, &iommu->lpac_context);
+	if (ret < 0)
+		goto err;
+
+	gmu_core_dev_force_first_boot(device);
+
+	msm_adreno_tz_reinit(pwrscale->devfreqptr);
+
+	ops->pm_resume(adreno_dev);
+
+err:
+	mutex_unlock(&device->mutex);
+	return ret;
+}
+
+static const struct dev_pm_ops adreno_pm_ops = {
+	.suspend  = adreno_pm_suspend,
+	.resume = adreno_pm_resume,
+	.freeze = adreno_hibernation_suspend,
+	.thaw = adreno_hibernation_resume,
+	.poweroff = adreno_hibernation_suspend,
+	.restore = adreno_hibernation_resume,
+};
+#else
+static const struct dev_pm_ops adreno_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(adreno_pm_suspend, adreno_pm_resume)
+};
+#endif /* IS_ENABLED(CONFIG_QCOM_KGSL_HIBERNATION) */
+
+static struct platform_driver adreno_platform_driver = {
+	.probe = adreno_probe,
+	.remove = adreno_remove,
+	.driver = {
+		.name = "kgsl-3d",
+		.pm = &adreno_pm_ops,
+		.of_match_table = of_match_ptr(adreno_match_table),
+	}
+};
+
+static int __init kgsl_3d_init(void)
+{
+	int ret;
+
+	ret = kgsl_core_init();
+	if (ret)
+		return ret;
+
+	ret = kgsl_mmu_init();
+	if (ret) {
+		kgsl_core_exit();
+		return ret;
+	}
+
+	gmu_core_register();
+	ret = platform_driver_register(&adreno_platform_driver);
+	if (ret) {
+		gmu_core_unregister();
+		kgsl_mmu_exit();
+		kgsl_core_exit();
+	}
+
+	return ret;
+}
+
+static void __exit kgsl_3d_exit(void)
+{
+	platform_driver_unregister(&adreno_platform_driver);
+	gmu_core_unregister();
+	kgsl_mmu_exit();
+	kgsl_core_exit();
+}
+
+module_param_named(preempt_enable, adreno_preemption_enable, bool, 0600);
+MODULE_PARM_DESC(preempt_enable, "Enable GPU HW Preemption");
+
+module_init(kgsl_3d_init);
+module_exit(kgsl_3d_exit);
+
+MODULE_DESCRIPTION("3D Graphics driver");
+MODULE_LICENSE("GPL v2");
+MODULE_SOFTDEP("pre: arm_smmu nvmem_qfprom socinfo");
+#if (KERNEL_VERSION(5, 18, 0) <= LINUX_VERSION_CODE)
+MODULE_IMPORT_NS(DMA_BUF);
+#endif

+ 2067 - 0
qcom/opensource/graphics-kernel/adreno.h

@@ -0,0 +1,2067 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_H
+#define __ADRENO_H
+
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/soc/qcom/llcc-qcom.h>
+#include "adreno_coresight.h"
+#include "adreno_dispatch.h"
+#include "adreno_drawctxt.h"
+#include "adreno_hfi.h"
+#include "adreno_hwsched.h"
+#include "adreno_perfcounter.h"
+#include "adreno_profile.h"
+#include "adreno_ringbuffer.h"
+#include "kgsl_sharedmem.h"
+
+/* Used to point CP to the SMMU record during preemption */
+#define SET_PSEUDO_SMMU_INFO 0
+/* Used to inform CP where to save preemption data at the time of switch out */
+#define SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR 1
+/* Used to inform CP where to save secure preemption data at the time of switch out */
+#define SET_PSEUDO_PRIV_SECURE_SAVE_ADDR 2
+/* Used to inform CP where to save per context non-secure data at the time of switch out */
+#define SET_PSEUDO_NON_PRIV_SAVE_ADDR 3
+/* Used to inform CP where to save preemption counter data at the time of switch out */
+#define SET_PSEUDO_COUNTER 4
+
+/* Index to preemption scratch buffer to store current QOS value */
+#define QOS_VALUE_IDX KGSL_PRIORITY_MAX_RB_LEVELS
+
+/* ADRENO_DEVICE - Given a kgsl_device return the adreno device struct */
+#define ADRENO_DEVICE(device) \
+		container_of(device, struct adreno_device, dev)
+
+/* KGSL_DEVICE - given an adreno_device, return the KGSL device struct */
+#define KGSL_DEVICE(_dev) (&((_dev)->dev))
+
+/* ADRENO_CONTEXT - Given a context return the adreno context struct */
+#define ADRENO_CONTEXT(context) \
+		container_of(context, struct adreno_context, base)
+
+/* ADRENO_GPU_DEVICE - Given an adreno device return the GPU specific struct */
+#define ADRENO_GPU_DEVICE(_a) ((_a)->gpucore->gpudev)
+
+/*
+ * ADRENO_POWER_OPS - Given an adreno device return the GPU specific power
+ * ops
+ */
+#define ADRENO_POWER_OPS(_a) ((_a)->gpucore->gpudev->power_ops)
+
+#define ADRENO_CHIPID_CORE(_id) FIELD_GET(GENMASK(31, 24), _id)
+#define ADRENO_CHIPID_MAJOR(_id) FIELD_GET(GENMASK(23, 16), _id)
+#define ADRENO_CHIPID_MINOR(_id) FIELD_GET(GENMASK(15, 8), _id)
+#define ADRENO_CHIPID_PATCH(_id) FIELD_GET(GENMASK(7, 0), _id)
+
+#define ADRENO_GMU_CHIPID(_id) \
+	(FIELD_PREP(GENMASK(31, 24), ADRENO_CHIPID_CORE(_id)) | \
+	 FIELD_PREP(GENMASK(23, 16), ADRENO_CHIPID_MAJOR(_id)) | \
+	 FIELD_PREP(GENMASK(15, 12), ADRENO_CHIPID_MINOR(_id)) | \
+	 FIELD_PREP(GENMASK(11, 8), ADRENO_CHIPID_PATCH(_id)))
+
+#define ADRENO_REV_MAJOR(_rev) FIELD_GET(GENMASK(23, 16), _rev)
+#define ADRENO_REV_MINOR(_rev) FIELD_GET(GENMASK(15, 8), _rev)
+#define ADRENO_REV_PATCH(_rev) FIELD_GET(GENMASK(7, 0), _rev)
+
+#define ADRENO_GMU_REV(_rev) \
+	(FIELD_PREP(GENMASK(31, 24), ADRENO_REV_MAJOR(_rev)) | \
+	 FIELD_PREP(GENMASK(23, 16), ADRENO_REV_MINOR(_rev)) | \
+	 FIELD_PREP(GENMASK(15, 8), ADRENO_REV_PATCH(_rev)))
+
+/* ADRENO_GPUREV - Return the GPU ID for the given adreno_device */
+#define ADRENO_GPUREV(_a) ((_a)->gpucore->gpurev)
+
+/*
+ * ADRENO_FEATURE - return true if the specified feature is supported by the GPU
+ * core
+ */
+#define ADRENO_FEATURE(_dev, _bit) \
+	((_dev)->gpucore->features & (_bit))
+
+/**
+ * ADRENO_QUIRK - return true if the specified quirk is required by the GPU
+ */
+#define ADRENO_QUIRK(_dev, _bit) \
+	((_dev)->quirks & (_bit))
+
+#define ADRENO_FW(a, f)		(&(a->fw[f]))
+
+/* Adreno core features */
+/* The core supports SP/TP hw controlled power collapse */
+#define ADRENO_SPTP_PC BIT(0)
+/* The GPU supports content protection */
+#define ADRENO_CONTENT_PROTECTION BIT(1)
+/* The GPU supports preemption */
+#define ADRENO_PREEMPTION BIT(2)
+/* The GPMU supports Limits Management */
+#define ADRENO_LM BIT(3)
+/* The GPU supports retention for cpz registers */
+#define ADRENO_CPZ_RETENTION BIT(4)
+/* The core has soft fault detection available */
+#define ADRENO_SOFT_FAULT_DETECT BIT(5)
+/* The GMU supports IFPC power management*/
+#define ADRENO_IFPC BIT(6)
+/* The core supports IO-coherent memory */
+#define ADRENO_IOCOHERENT BIT(7)
+/*
+ * The GMU supports Adaptive Clock Distribution (ACD)
+ * for droop mitigation
+ */
+#define ADRENO_ACD BIT(8)
+/* Cooperative reset enabled GMU */
+#define ADRENO_COOP_RESET BIT(9)
+/* Indicates that the specific target is no longer supported */
+#define ADRENO_DEPRECATED BIT(10)
+/* The target supports ringbuffer level APRIV */
+#define ADRENO_APRIV BIT(11)
+/* The GMU supports Battery Current Limiting */
+#define ADRENO_BCL BIT(12)
+/* L3 voting is supported with L3 constraints */
+#define ADRENO_L3_VOTE BIT(13)
+/* LPAC is supported  */
+#define ADRENO_LPAC BIT(14)
+/* Late Stage Reprojection (LSR) enablment for GMU */
+#define ADRENO_LSR BIT(15)
+/* GMU and kernel supports hardware fences */
+#define ADRENO_HW_FENCE BIT(16)
+/* Dynamic Mode Switching supported on this target */
+#define ADRENO_DMS BIT(17)
+/* AQE supported on this target */
+#define ADRENO_AQE BIT(18)
+/* Warm Boot supported on this target */
+#define ADRENO_GMU_WARMBOOT BIT(19)
+/* The GPU supports CLX */
+#define ADRENO_CLX BIT(20)
+
+/*
+ * Adreno GPU quirks - control bits for various workarounds
+ */
+
+/* Set TWOPASSUSEWFI in PC_DBG_ECO_CNTL (5XX/6XX) */
+#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0)
+/* Submit critical packets at GPU wake up */
+#define ADRENO_QUIRK_CRITICAL_PACKETS BIT(1)
+/* Mask out RB1-3 activity signals from HW hang detection logic */
+#define ADRENO_QUIRK_FAULT_DETECT_MASK BIT(2)
+/* Disable RB sampler datapath clock gating optimization */
+#define ADRENO_QUIRK_DISABLE_RB_DP2CLOCKGATING BIT(3)
+/* Disable local memory(LM) feature to avoid corner case error */
+#define ADRENO_QUIRK_DISABLE_LMLOADKILL BIT(4)
+/* Allow HFI to use registers to send message to GMU */
+#define ADRENO_QUIRK_HFI_USE_REG BIT(5)
+/* Only set protected SECVID registers once */
+#define ADRENO_QUIRK_SECVID_SET_ONCE BIT(6)
+/*
+ * Limit number of read and write transactions from
+ * UCHE block to GBIF to avoid possible deadlock
+ * between GBIF, SMMU and MEMNOC.
+ */
+#define ADRENO_QUIRK_LIMIT_UCHE_GBIF_RW BIT(8)
+/* Do explicit mode control of cx gdsc */
+#define ADRENO_QUIRK_CX_GDSC BIT(9)
+
+/* Command identifiers */
+#define CONTEXT_TO_MEM_IDENTIFIER	0x2EADBEEF
+#define CMD_IDENTIFIER			0x2EEDFACE
+#define CMD_INTERNAL_IDENTIFIER		0x2EEDD00D
+#define START_IB_IDENTIFIER		0x2EADEABE
+#define END_IB_IDENTIFIER		0x2ABEDEAD
+#define START_PROFILE_IDENTIFIER	0x2DEFADE1
+#define END_PROFILE_IDENTIFIER		0x2DEFADE2
+#define PWRON_FIXUP_IDENTIFIER		0x2AFAFAFA
+
+/* One cannot wait forever for the core to idle, so set an upper limit to the
+ * amount of time to wait for the core to go idle
+ */
+#define ADRENO_IDLE_TIMEOUT (20 * 1000)
+
+#define ADRENO_FW_PFP 0
+#define ADRENO_FW_SQE 0
+#define ADRENO_FW_PM4 1
+#define ADRENO_FW_AQE 1
+
+#define ADRENO_GPUREV_VALUE(_major, _minor, _patchid) (((_major & 0xFF) << 16) | \
+						       ((_minor & 0xFF) << 8) | \
+						       (_patchid & 0xFF))
+enum adreno_gpurev {
+	ADRENO_REV_UNKNOWN = 0,
+	ADRENO_REV_A304 = 304,
+	ADRENO_REV_A305 = 305,
+	ADRENO_REV_A305C = 306,
+	ADRENO_REV_A306 = 307,
+	ADRENO_REV_A306A = 308,
+	ADRENO_REV_A310 = 310,
+	ADRENO_REV_A320 = 320,
+	ADRENO_REV_A330 = 330,
+	ADRENO_REV_A305B = 335,
+	ADRENO_REV_A405 = 405,
+	ADRENO_REV_A418 = 418,
+	ADRENO_REV_A420 = 420,
+	ADRENO_REV_A430 = 430,
+	ADRENO_REV_A505 = 505,
+	ADRENO_REV_A506 = 506,
+	ADRENO_REV_A508 = 508,
+	ADRENO_REV_A510 = 510,
+	ADRENO_REV_A512 = 512,
+	ADRENO_REV_A530 = 530,
+	ADRENO_REV_A540 = 540,
+	ADRENO_REV_A610 = 610,
+	ADRENO_REV_A611 = 611,
+	ADRENO_REV_A612 = 612,
+	ADRENO_REV_A615 = 615,
+	ADRENO_REV_A616 = 616,
+	ADRENO_REV_A618 = 618,
+	ADRENO_REV_A619 = 619,
+	ADRENO_REV_A620 = 620,
+	ADRENO_REV_A621 = 621,
+	ADRENO_REV_A630 = 630,
+	ADRENO_REV_A635 = 635,
+	ADRENO_REV_A640 = 640,
+	ADRENO_REV_A650 = 650,
+	ADRENO_REV_A660 = 660,
+	ADRENO_REV_A662 = 662,
+	ADRENO_REV_A663 = 663,
+	ADRENO_REV_A680 = 680,
+	ADRENO_REV_A702 = 702,
+	/*
+	 * Gen7 and higher version numbers may exceed 1 digit
+	 * Bits 16-23: Major
+	 * Bits 8-15: Minor
+	 * Bits 0-7: Patch id
+	 */
+	ADRENO_REV_GEN7_0_0 = ADRENO_GPUREV_VALUE(7, 0, 0),
+	ADRENO_REV_GEN7_0_1 = ADRENO_GPUREV_VALUE(7, 0, 1),
+	ADRENO_REV_GEN7_2_0 = ADRENO_GPUREV_VALUE(7, 2, 0),
+	ADRENO_REV_GEN7_2_1 = ADRENO_GPUREV_VALUE(7, 2, 1),
+	ADRENO_REV_GEN7_4_0 = ADRENO_GPUREV_VALUE(7, 4, 0),
+	ADRENO_REV_GEN7_9_0 = ADRENO_GPUREV_VALUE(7, 9, 0),
+	ADRENO_REV_GEN7_9_1 = ADRENO_GPUREV_VALUE(7, 9, 1),
+	ADRENO_REV_GEN7_11_0 = ADRENO_GPUREV_VALUE(7, 11, 0),
+	ADRENO_REV_GEN8_3_0 = ADRENO_GPUREV_VALUE(8, 3, 0),
+};
+
+#define ADRENO_SOFT_FAULT BIT(0)
+#define ADRENO_HARD_FAULT BIT(1)
+#define ADRENO_TIMEOUT_FAULT BIT(2)
+#define ADRENO_IOMMU_PAGE_FAULT BIT(3)
+#define ADRENO_PREEMPT_FAULT BIT(4)
+#define ADRENO_GMU_FAULT BIT(5)
+#define ADRENO_CTX_DETATCH_TIMEOUT_FAULT BIT(6)
+#define ADRENO_GMU_FAULT_SKIP_SNAPSHOT BIT(7)
+
+enum adreno_pipe_type {
+	PIPE_NONE = 0,
+	PIPE_BR = 1,
+	PIPE_BV = 2,
+	PIPE_LPAC = 3,
+	PIPE_AQE0 = 4,
+	PIPE_AQE1 = 5,
+	PIPE_DDE_BR = 6,
+	PIPE_DDE_BV = 7,
+};
+/**
+ * Bit fields for GPU_CX_MISC_CX_AHB_*_CNTL registers
+ * AHB_TXFRTIMEOUTRELEASE	[8:8]
+ * AHB_TXFRTIMEOUTENABLE	[9:9]
+ * AHB_RESPONDERROR		[11:11]
+ * AHB_ERRORSTATUSENABLE	[12:12]
+ */
+#define ADRENO_AHB_CNTL_DEFAULT (BIT(12) | BIT(11) | BIT(9) | BIT(8))
+
+/* number of throttle counters for DCVS adjustment */
+#define ADRENO_GPMU_THROTTLE_COUNTERS 4
+
+struct adreno_gpudev;
+
+/* Time to allow preemption to complete (in ms) */
+#define ADRENO_PREEMPT_TIMEOUT 10000
+
+#define PREEMPT_SCRATCH_OFFSET(id) (id * sizeof(u64))
+
+#define PREEMPT_SCRATCH_ADDR(dev, id) \
+	((dev)->preempt.scratch->gpuaddr + PREEMPT_SCRATCH_OFFSET(id))
+
+/**
+ * enum adreno_preempt_states
+ * ADRENO_PREEMPT_NONE: No preemption is scheduled
+ * ADRENO_PREEMPT_START: The S/W has started
+ * ADRENO_PREEMPT_TRIGGERED: A preeempt has been triggered in the HW
+ * ADRENO_PREEMPT_FAULTED: The preempt timer has fired
+ * ADRENO_PREEMPT_PENDING: The H/W has signaled preemption complete
+ * ADRENO_PREEMPT_COMPLETE: Preemption could not be finished in the IRQ handler,
+ * worker has been scheduled
+ */
+enum adreno_preempt_states {
+	ADRENO_PREEMPT_NONE = 0,
+	ADRENO_PREEMPT_START,
+	ADRENO_PREEMPT_TRIGGERED,
+	ADRENO_PREEMPT_FAULTED,
+	ADRENO_PREEMPT_PENDING,
+	ADRENO_PREEMPT_COMPLETE,
+};
+
+/**
+ * struct adreno_protected_regs - container for a protect register span
+ */
+struct adreno_protected_regs {
+	/** @reg: Physical protected mode register to write to */
+	u32 reg;
+	/** @start: Dword offset of the starting register in the range */
+	u32 start;
+	/**
+	 * @end: Dword offset of the ending register in the range
+	 * (inclusive)
+	 */
+	u32 end;
+	/**
+	 * @noaccess: 1 if the register should not be accessible from
+	 * userspace, 0 if it can be read (but not written)
+	 */
+	u32 noaccess;
+};
+
+/**
+ * struct adreno_preemption
+ * @state: The current state of preemption
+ * @scratch: Per-target scratch memory for implementation specific functionality
+ * @timer: A timer to make sure preemption doesn't stall
+ * @work: A work struct for the preemption worker (for 5XX)
+ * preempt_level: The level of preemption (for 6XX)
+ * skipsaverestore: To skip saverestore during L1 preemption (for 6XX)
+ * usesgmem: enable GMEM save/restore across preemption (for 6XX)
+ * count: Track the number of preemptions triggered
+ */
+struct adreno_preemption {
+	atomic_t state;
+	struct kgsl_memdesc *scratch;
+	struct timer_list timer;
+	struct work_struct work;
+	unsigned int preempt_level;
+	bool skipsaverestore;
+	bool usesgmem;
+	unsigned int count;
+	/* @postamble_len: Number of dwords in KMD postamble pm4 packet */
+	u32 postamble_len;
+	/*
+	 * @postamble_bootup_len: Number of dwords in KMD postamble pm4 packet
+	 * that needs to be sent before first submission to GPU.
+	 * Note: Postambles are not preserved across slumber.
+	 */
+	u32 postamble_bootup_len;
+};
+
+struct adreno_busy_data {
+	unsigned int gpu_busy;
+	unsigned int bif_ram_cycles;
+	unsigned int bif_ram_cycles_read_ch1;
+	unsigned int bif_ram_cycles_write_ch0;
+	unsigned int bif_ram_cycles_write_ch1;
+	unsigned int bif_starved_ram;
+	unsigned int bif_starved_ram_ch1;
+	unsigned int num_ifpc;
+	unsigned int throttle_cycles[ADRENO_GPMU_THROTTLE_COUNTERS];
+	u32 bcl_throttle;
+};
+
+/**
+ * struct adreno_firmware - Struct holding fw details
+ * @fwvirt: Buffer which holds the ucode
+ * @size: Size of ucode buffer
+ * @version: Version of ucode
+ * @memdesc: Memory descriptor which holds ucode buffer info
+ */
+struct adreno_firmware {
+	unsigned int *fwvirt;
+	size_t size;
+	unsigned int version;
+	struct kgsl_memdesc *memdesc;
+};
+
+/**
+ * struct adreno_perfcounter_list_node - struct to store perfcounters
+ * allocated by a process on a kgsl fd.
+ * @groupid: groupid of the allocated perfcounter
+ * @countable: countable assigned to the allocated perfcounter
+ * @node: list node for perfcounter_list of a process
+ */
+struct adreno_perfcounter_list_node {
+	unsigned int groupid;
+	unsigned int countable;
+	struct list_head node;
+};
+
+/**
+ * struct adreno_device_private - Adreno private structure per fd
+ * @dev_priv: the kgsl device private structure
+ * @perfcounter_list: list of perfcounters used by the process
+ */
+struct adreno_device_private {
+	struct kgsl_device_private dev_priv;
+	struct list_head perfcounter_list;
+};
+
+/**
+ * struct adreno_reglist_list - A container for list of registers and
+ * number of registers in the list
+ */
+struct adreno_reglist_list {
+	/** @reg: List of register **/
+	const u32 *regs;
+	/** @count: Number of registers in the list **/
+	u32 count;
+};
+
+/**
+ * struct adreno_power_ops - Container for target specific power up/down
+ * sequences
+ */
+struct adreno_power_ops {
+	/**
+	 * @first_open: Target specific function triggered when first kgsl
+	 * instance is opened
+	 */
+	int (*first_open)(struct adreno_device *adreno_dev);
+	/**
+	 * @last_close: Target specific function triggered when last kgsl
+	 * instance is closed
+	 */
+	int (*last_close)(struct adreno_device *adreno_dev);
+	/**
+	 * @active_count_get: Target specific function to keep gpu from power
+	 * collapsing
+	 */
+	int (*active_count_get)(struct adreno_device *adreno_dev);
+	/**
+	 * @active_count_put: Target specific function to allow gpu to power
+	 * collapse
+	 */
+	void (*active_count_put)(struct adreno_device *adreno_dev);
+	/** @pm_suspend: Target specific function to suspend the driver */
+	int (*pm_suspend)(struct adreno_device *adreno_dev);
+	/** @pm_resume: Target specific function to resume the driver */
+	void (*pm_resume)(struct adreno_device *adreno_dev);
+	/**
+	 * @touch_wakeup: Target specific function to start gpu on touch event
+	 */
+	void (*touch_wakeup)(struct adreno_device *adreno_dev);
+	/** @gpu_clock_set: Target specific function to set gpu frequency */
+	int (*gpu_clock_set)(struct adreno_device *adreno_dev, u32 pwrlevel);
+	/** @gpu_bus_set: Target specific function to set gpu bandwidth */
+	int (*gpu_bus_set)(struct adreno_device *adreno_dev, int bus_level,
+		u32 ab);
+};
+
+/**
+ * struct adreno_gpu_core - A specific GPU core definition
+ * @gpurev: Unique GPU revision identifier
+ * @core: Match for the core version of the GPU
+ * @major: Match for the major version of the GPU
+ * @minor: Match for the minor version of the GPU
+ * @patchid: Match for the patch revision of the GPU
+ * @features: Common adreno features supported by this core
+ * @gpudev: Pointer to the GPU family specific functions for this core
+ * @uche_gmem_alignment: Alignment required for UCHE GMEM base
+ * @gmem_size: Amount of binning memory (GMEM/OCMEM) to reserve for the core
+ * @bus_width: Bytes transferred in 1 cycle
+ */
+struct adreno_gpu_core {
+	enum adreno_gpurev gpurev;
+	unsigned int core, major, minor, patchid;
+	/**
+	 * @compatible: If specified, use the compatible string to match the
+	 * device
+	 */
+	const char *compatible;
+	unsigned long features;
+	const struct adreno_gpudev *gpudev;
+	const struct adreno_perfcounters *perfcounters;
+	u32 uche_gmem_alignment;
+	size_t gmem_size;
+	u32 bus_width;
+	/** @snapshot_size: Size of the static snapshot region in bytes */
+	u32 snapshot_size;
+	/** @num_ddr_channels: Number of DDR channels */
+	u32 num_ddr_channels;
+};
+
+/**
+ * struct adreno_dispatch_ops - Common functions for dispatcher operations
+ */
+struct adreno_dispatch_ops {
+	/* @close: Shut down the dispatcher */
+	void (*close)(struct adreno_device *adreno_dev);
+	/* @queue_cmds: Queue a command on the context */
+	int (*queue_cmds)(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, struct kgsl_drawobj *drawobj[],
+		u32 count, u32 *timestamp);
+	/* @queue_context: Queue a context to be dispatched */
+	void (*queue_context)(struct adreno_device *adreno_dev,
+			struct adreno_context *drawctxt);
+	void (*setup_context)(struct adreno_device *adreno_dev,
+			struct adreno_context *drawctxt);
+	void (*fault)(struct adreno_device *adreno_dev, u32 fault);
+	/* @create_hw_fence: Create a hardware fence */
+	void (*create_hw_fence)(struct adreno_device *adreno_dev, struct kgsl_sync_fence *kfence);
+	/* @get_fault: Get the GPU fault status */
+	u32 (*get_fault)(struct adreno_device *adreno_dev);
+};
+
+/**
+ * struct adreno_device - The mothership structure for all adreno related info
+ * @dev: Reference to struct kgsl_device
+ * @priv: Holds the private flags specific to the adreno_device
+ * @chipid: Chip ID specific to the GPU
+ * @cx_misc_len: Length of the CX MISC register block
+ * @cx_misc_virt: Pointer where the CX MISC block is mapped
+ * @isense_base: Base physical address of isense block
+ * @isense_len: Length of the isense register block
+ * @isense_virt: Pointer where isense block is mapped
+ * @gpucore: Pointer to the adreno_gpu_core structure
+ * @gpmu_cmds_size: Length of gpmu cmd stream
+ * @gpmu_cmds: gpmu cmd stream
+ * @ringbuffers: Array of pointers to adreno_ringbuffers
+ * @num_ringbuffers: Number of ringbuffers for the GPU
+ * @cur_rb: Pointer to the current ringbuffer
+ * @next_rb: Ringbuffer we are switching to during preemption
+ * @prev_rb: Ringbuffer we are switching from during preemption
+ * @fast_hang_detect: Software fault detection availability
+ * @ft_policy: Defines the fault tolerance policy
+ * @long_ib_detect: Long IB detection availability
+ * @cooperative_reset: Indicates if graceful death handshake is enabled
+ * between GMU and GPU
+ * @profile: Container for adreno profiler information
+ * @dispatcher: Container for adreno GPU dispatcher
+ * @pwron_fixup: Command buffer to run a post-power collapse shader workaround
+ * @pwron_fixup_dwords: Number of dwords in the command buffer
+ * @input_work: Work struct for turning on the GPU after a touch event
+ * @busy_data: Struct holding GPU VBIF busy stats
+ * @ram_cycles_lo: Number of DDR clock cycles for the monitor session (Only
+ * DDR channel 0 read cycles in case of GBIF)
+ * @ram_cycles_lo_ch1_read: Number of DDR channel 1 Read clock cycles for
+ * the monitor session
+ * @ram_cycles_lo_ch0_write: Number of DDR channel 0 Write clock cycles for
+ * the monitor session
+ * @ram_cycles_lo_ch1_write: Number of DDR channel 0 Write clock cycles for
+ * the monitor session
+ * @starved_ram_lo: Number of cycles VBIF/GBIF is stalled by DDR (Only channel 0
+ * stall cycles in case of GBIF)
+ * @starved_ram_lo_ch1: Number of cycles GBIF is stalled by DDR channel 1
+ * @halt: Atomic variable to check whether the GPU is currently halted
+ * @pending_irq_refcnt: Atomic variable to keep track of running IRQ handlers
+ * @ctx_d_debugfs: Context debugfs node
+ * @profile_buffer: Memdesc holding the drawobj profiling buffer
+ * @profile_index: Index to store the start/stop ticks in the profiling
+ * buffer
+ * @pwrup_reglist: Memdesc holding the power up register list
+ * which is used by CP during preemption and IFPC
+ * @lm_sequence: Pointer to the start of the register write sequence for LM
+ * @lm_size: The dword size of the LM sequence
+ * @lm_limit: limiting value for LM
+ * @lm_threshold_count: register value for counter for lm threshold breakin
+ * @lm_threshold_cross: number of current peaks exceeding threshold
+ * @ifpc_count: Number of times the GPU went into IFPC
+ * @highest_bank_bit: Value of the highest bank bit
+ * @gpmu_throttle_counters - counters for number of throttled clocks
+ * @irq_storm_work: Worker to handle possible interrupt storms
+ * @active_list: List to track active contexts
+ * @active_list_lock: Lock to protect active_list
+ * @gpu_llc_slice: GPU system cache slice descriptor
+ * @gpu_llc_slice_enable: To enable the GPU system cache slice or not
+ * @gpuhtw_llc_slice: GPU pagetables system cache slice descriptor
+ * @gpuhtw_llc_slice_enable: To enable the GPUHTW system cache slice or not
+ * @zap_loaded: Used to track if zap was successfully loaded or not
+ */
+struct adreno_device {
+	struct kgsl_device dev;    /* Must be first field in this struct */
+	unsigned long priv;
+	unsigned int chipid;
+	/** @uche_gmem_base: Base address of GMEM for UCHE access */
+	u64 uche_gmem_base;
+	unsigned int cx_misc_len;
+	void __iomem *cx_misc_virt;
+	unsigned long isense_base;
+	unsigned int isense_len;
+	void __iomem *isense_virt;
+	const struct adreno_gpu_core *gpucore;
+	struct adreno_firmware fw[2];
+	size_t gpmu_cmds_size;
+	unsigned int *gpmu_cmds;
+	struct adreno_ringbuffer ringbuffers[KGSL_PRIORITY_MAX_RB_LEVELS];
+	int num_ringbuffers;
+	struct adreno_ringbuffer *cur_rb;
+	struct adreno_ringbuffer *next_rb;
+	struct adreno_ringbuffer *prev_rb;
+	unsigned int fast_hang_detect;
+	unsigned long ft_policy;
+	bool long_ib_detect;
+	bool cooperative_reset;
+	struct adreno_profile profile;
+	struct adreno_dispatcher dispatcher;
+	struct kgsl_memdesc *pwron_fixup;
+	unsigned int pwron_fixup_dwords;
+	struct work_struct input_work;
+	struct adreno_busy_data busy_data;
+	unsigned int ram_cycles_lo;
+	unsigned int ram_cycles_lo_ch1_read;
+	unsigned int ram_cycles_lo_ch0_write;
+	unsigned int ram_cycles_lo_ch1_write;
+	unsigned int starved_ram_lo;
+	unsigned int starved_ram_lo_ch1;
+	atomic_t halt;
+	atomic_t pending_irq_refcnt;
+	struct dentry *ctx_d_debugfs;
+	/** @lm_enabled: True if limits management is enabled for this target */
+	bool lm_enabled;
+	/** @acd_enabled: True if acd is enabled for this target */
+	bool acd_enabled;
+	/** @hwcg_enabled: True if hardware clock gating is enabled */
+	bool hwcg_enabled;
+	/** @throttling_enabled: True if LM throttling is enabled on a5xx */
+	bool throttling_enabled;
+	/** @sptp_pc_enabled: True if SPTP power collapse is enabled on a5xx */
+	bool sptp_pc_enabled;
+	/** @bcl_enabled: True if BCL is enabled */
+	bool bcl_enabled;
+	/** @clx_enabled: True if CLX is enabled */
+	bool clx_enabled;
+	/** @lpac_enabled: True if LPAC is enabled */
+	bool lpac_enabled;
+	/** @dms_enabled: True if DMS is enabled */
+	bool dms_enabled;
+	/** @warmboot_enabled: True if warmboot is enabled */
+	bool warmboot_enabled;
+	/** @preempt_override: True if command line param enables preemption */
+	bool preempt_override;
+	struct kgsl_memdesc *profile_buffer;
+	unsigned int profile_index;
+	struct kgsl_memdesc *pwrup_reglist;
+	uint32_t *lm_sequence;
+	uint32_t lm_size;
+	struct adreno_preemption preempt;
+	struct work_struct gpmu_work;
+	uint32_t lm_leakage;
+	uint32_t lm_limit;
+	uint32_t lm_threshold_count;
+	uint32_t lm_threshold_cross;
+	uint32_t ifpc_count;
+
+	unsigned int highest_bank_bit;
+	unsigned int quirks;
+
+#ifdef CONFIG_QCOM_KGSL_CORESIGHT
+	/** @gx_coresight:  A coresight instance for GX */
+	struct adreno_coresight_device gx_coresight;
+	/** @gx_coresight:  A coresight instance for CX */
+	struct adreno_coresight_device cx_coresight;
+	/** @funnel_gfx:  A coresight instance for gfx funnel */
+	struct adreno_funnel_device funnel_gfx;
+#endif
+
+	uint32_t gpmu_throttle_counters[ADRENO_GPMU_THROTTLE_COUNTERS];
+	struct work_struct irq_storm_work;
+
+	struct list_head active_list;
+	spinlock_t active_list_lock;
+
+	void *gpu_llc_slice;
+	bool gpu_llc_slice_enable;
+	void *gpuhtw_llc_slice;
+	bool gpuhtw_llc_slice_enable;
+	unsigned int zap_loaded;
+	/**
+	 * @critpkts: Memory descriptor for 5xx critical packets if applicable
+	 */
+	struct kgsl_memdesc *critpkts;
+	/**
+	 * @critpkts: Memory descriptor for 5xx secure critical packets
+	 */
+	struct kgsl_memdesc *critpkts_secure;
+	/** @irq_mask: The current interrupt mask for the GPU device */
+	u32 irq_mask;
+	/*
+	 * @soft_ft_regs: an array of registers for soft fault detection on a3xx
+	 * targets
+	 */
+	u32 *soft_ft_regs;
+	/*
+	 * @soft_ft_vals: an array of register values for soft fault detection
+	 * on a3xx targets
+	 */
+	u32 *soft_ft_vals;
+	/*
+	 * @soft_ft_vals: number of elements in @soft_ft_regs and @soft_ft_vals
+	 */
+	int soft_ft_count;
+	/* @dispatch_ops: A pointer to a set of adreno dispatch ops */
+	const struct adreno_dispatch_ops *dispatch_ops;
+	/** @hwsched: Container for the hardware dispatcher */
+	struct adreno_hwsched hwsched;
+	/*
+	 * @perfcounter: Flag to clear perfcounters across contexts and
+	 * controls perfcounter ioctl read
+	 */
+	bool perfcounter;
+	/** @gmu_hub_clk_freq: Gmu hub interface clock frequency */
+	u64 gmu_hub_clk_freq;
+	/* @patch_reglist: If false power up register list needs to be patched */
+	bool patch_reglist;
+	/*
+	 * @uche_client_pf: uche_client_pf client register configuration
+	 * for pf debugging
+	 */
+	u32 uche_client_pf;
+	/**
+	 * @bcl_data: bit 0 contains response type for bcl alarms and bits 1:24 controls
+	 * throttle level for bcl alarm levels 0-2. If not set, gmu fw sets default throttle levels.
+	 */
+	u32 bcl_data;
+	/*
+	 * @bcl_debugfs_dir: Debugfs directory node for bcl related nodes
+	 */
+	struct dentry *bcl_debugfs_dir;
+	/** @bcl_throttle_time_us: Total time in us spent in BCL throttling */
+	u32 bcl_throttle_time_us;
+	/* @preemption_debugfs_dir: Debugfs directory node for preemption related nodes */
+	struct dentry *preemption_debugfs_dir;
+	/* @hwsched_enabled: If true, hwsched is enabled */
+	bool hwsched_enabled;
+	/* @fastblend_enabled: True if fastblend feature is enabled */
+	bool fastblend_enabled;
+	/* @raytracing_enabled: True if raytracing feature is enabled */
+	bool raytracing_enabled;
+	/* @feature_fuse: feature fuse value read from HW */
+	u32 feature_fuse;
+	/** @gmu_ab: Track if GMU supports ab vote */
+	bool gmu_ab;
+	/** @ifpc_hyst: IFPC long hysteresis value */
+	u32 ifpc_hyst;
+	/** @ifpc_hyst_floor: IFPC long hysteresis floor value */
+	u32 ifpc_hyst_floor;
+	/** @cx_misc_base: CX MISC register block base offset */
+	u32 cx_misc_base;
+	/*
+	 * @no_restore_count: Keep track of perfcounter requests that don't have
+	 * ADRENO_PERFCOUNTER_GROUP_RESTORE flag set
+	 */
+	u32 no_restore_count;
+	/*
+	 * @ahb_timeout_val: AHB transaction timeout value.
+	 * If set, a timeout will occur in 2 ^ (ahb_timeout_val + 1) cycles.
+	 */
+	u32 ahb_timeout_val;
+};
+
+/**
+ * enum adreno_device_flags - Private flags for the adreno_device
+ * @ADRENO_DEVICE_PWRON - Set during init after a power collapse
+ * @ADRENO_DEVICE_PWRON_FIXUP - Set if the target requires the shader fixup
+ * after power collapse
+ * @ADRENO_DEVICE_STARTED - Set if the device start sequence is in progress
+ * @ADRENO_DEVICE_FAULT - Set if the device is currently in fault (and shouldn't
+ * send any more commands to the ringbuffer)
+ * @ADRENO_DEVICE_DRAWOBJ_PROFILE - Set if the device supports drawobj
+ * profiling via the ALWAYSON counter
+ * @ADRENO_DEVICE_PREEMPTION - Turn on/off preemption
+ * @ADRENO_DEVICE_SOFT_FAULT_DETECT - Set if soft fault detect is enabled
+ * @ADRENO_DEVICE_GPMU_INITIALIZED - Set if GPMU firmware initialization succeed
+ * @ADRENO_DEVICE_ISDB_ENABLED - Set if the Integrated Shader DeBugger is
+ * attached and enabled
+ * @ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED - Set if a CACHE_FLUSH_TS irq storm
+ * is in progress
+ */
+enum adreno_device_flags {
+	ADRENO_DEVICE_PWRON = 0,
+	ADRENO_DEVICE_PWRON_FIXUP = 1,
+	ADRENO_DEVICE_INITIALIZED = 2,
+	ADRENO_DEVICE_STARTED = 5,
+	ADRENO_DEVICE_FAULT = 6,
+	ADRENO_DEVICE_DRAWOBJ_PROFILE = 7,
+	ADRENO_DEVICE_GPU_REGULATOR_ENABLED = 8,
+	ADRENO_DEVICE_PREEMPTION = 9,
+	ADRENO_DEVICE_SOFT_FAULT_DETECT = 10,
+	ADRENO_DEVICE_GPMU_INITIALIZED = 11,
+	ADRENO_DEVICE_ISDB_ENABLED = 12,
+	ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED = 13,
+	/** @ADRENO_DEVICE_DMS: Set if DMS is enabled */
+	ADRENO_DEVICE_DMS = 14,
+	/** @ADRENO_DEVICE_GMU_AB: Set if AB vote via GMU is enabled */
+	ADRENO_DEVICE_GMU_AB = 15,
+	/*
+	 * @ADRENO_DEVICE_FORCE_COLDBOOT: Set if a feature is toggled
+	 * via sysfs/debugfs or when we are doing fault recovery
+	 */
+	ADRENO_DEVICE_FORCE_COLDBOOT = 16,
+	/** @ADRENO_DEVICE_CX_TIMER_INITIALIZED: Set if the CX timer is initialized */
+	ADRENO_DEVICE_CX_TIMER_INITIALIZED = 17,
+};
+
+/**
+ * struct adreno_drawobj_profile_entry - a single drawobj entry in the
+ * kernel profiling buffer
+ * @started: Number of GPU ticks at start of the drawobj
+ * @retired: Number of GPU ticks at the end of the drawobj
+ * @ctx_start: CP_ALWAYS_ON_CONTEXT tick at start of the drawobj
+ * @ctx_end: CP_ALWAYS_ON_CONTEXT tick at end of the drawobj
+ */
+struct adreno_drawobj_profile_entry {
+	uint64_t started;
+	uint64_t retired;
+	uint64_t ctx_start;
+	uint64_t ctx_end;
+};
+
+#define ADRENO_DRAWOBJ_PROFILE_OFFSET(_index, _member) \
+	 ((_index) * sizeof(struct adreno_drawobj_profile_entry) \
+	  + offsetof(struct adreno_drawobj_profile_entry, _member))
+
+
+/**
+ * adreno_regs: List of registers that are used in kgsl driver for all
+ * 3D devices. Each device type has different offset value for the same
+ * register, so an array of register offsets are declared for every device
+ * and are indexed by the enumeration values defined in this enum
+ */
+enum adreno_regs {
+	ADRENO_REG_CP_ME_RAM_DATA,
+	ADRENO_REG_CP_RB_BASE,
+	ADRENO_REG_CP_RB_BASE_HI,
+	ADRENO_REG_CP_RB_RPTR_ADDR_LO,
+	ADRENO_REG_CP_RB_RPTR_ADDR_HI,
+	ADRENO_REG_CP_RB_RPTR,
+	ADRENO_REG_CP_RB_WPTR,
+	ADRENO_REG_CP_ME_CNTL,
+	ADRENO_REG_CP_RB_CNTL,
+	ADRENO_REG_CP_IB1_BASE,
+	ADRENO_REG_CP_IB1_BASE_HI,
+	ADRENO_REG_CP_IB1_BUFSZ,
+	ADRENO_REG_CP_IB2_BASE,
+	ADRENO_REG_CP_IB2_BASE_HI,
+	ADRENO_REG_CP_IB2_BUFSZ,
+	ADRENO_REG_CP_TIMESTAMP,
+	ADRENO_REG_CP_SCRATCH_REG6,
+	ADRENO_REG_CP_SCRATCH_REG7,
+	ADRENO_REG_CP_PROTECT_STATUS,
+	ADRENO_REG_CP_PREEMPT,
+	ADRENO_REG_CP_PREEMPT_DEBUG,
+	ADRENO_REG_CP_PREEMPT_DISABLE,
+	ADRENO_REG_CP_PROTECT_REG_0,
+	ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+	ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+	ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO,
+	ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI,
+	ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO,
+	ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI,
+	ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO,
+	ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI,
+	ADRENO_REG_CP_PREEMPT_LEVEL_STATUS,
+	ADRENO_REG_RBBM_STATUS,
+	ADRENO_REG_RBBM_STATUS3,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3,
+	ADRENO_REG_RBBM_PERFCTR_PWR_1_LO,
+	ADRENO_REG_RBBM_INT_0_MASK,
+	ADRENO_REG_RBBM_PM_OVERRIDE2,
+	ADRENO_REG_RBBM_SW_RESET_CMD,
+	ADRENO_REG_RBBM_CLOCK_CTL,
+	ADRENO_REG_PA_SC_AA_CONFIG,
+	ADRENO_REG_SQ_GPR_MANAGEMENT,
+	ADRENO_REG_SQ_INST_STORE_MANAGEMENT,
+	ADRENO_REG_TP0_CHICKEN,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
+	ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK,
+	ADRENO_REG_GMU_AHB_FENCE_STATUS,
+	ADRENO_REG_GMU_GMU2HOST_INTR_MASK,
+	ADRENO_REG_GPMU_POWER_COUNTER_ENABLE,
+	ADRENO_REG_REGISTER_MAX,
+};
+
+#define ADRENO_REG_UNUSED	0xFFFFFFFF
+#define ADRENO_REG_SKIP	0xFFFFFFFE
+#define ADRENO_REG_DEFINE(_offset, _reg)[_offset] = _reg
+
+struct adreno_irq_funcs {
+	void (*func)(struct adreno_device *adreno_dev, int mask);
+};
+#define ADRENO_IRQ_CALLBACK(_c) { .func = _c }
+
+/*
+ * struct adreno_debugbus_block - Holds info about debug buses of a chip
+ * @block_id: Bus identifier
+ * @dwords: Number of dwords of data that this block holds
+ */
+struct adreno_debugbus_block {
+	unsigned int block_id;
+	unsigned int dwords;
+};
+
+enum adreno_cp_marker_type {
+	IFPC_DISABLE,
+	IFPC_ENABLE,
+	IB1LIST_START,
+	IB1LIST_END,
+};
+
+struct adreno_gpudev {
+	/*
+	 * These registers are in a different location on different devices,
+	 * so define them in the structure and use them as variables.
+	 */
+	unsigned int *const reg_offsets;
+
+	/* GPU specific function hooks */
+	int (*probe)(struct platform_device *pdev, u32 chipid,
+		const struct adreno_gpu_core *gpucore);
+	void (*snapshot)(struct adreno_device *adreno_dev,
+				struct kgsl_snapshot *snapshot);
+	irqreturn_t (*irq_handler)(struct adreno_device *adreno_dev);
+	int (*init)(struct adreno_device *adreno_dev);
+	void (*remove)(struct adreno_device *adreno_dev);
+	int (*rb_start)(struct adreno_device *adreno_dev);
+	int (*start)(struct adreno_device *adreno_dev);
+	int (*regulator_enable)(struct adreno_device *adreno_dev);
+	void (*regulator_disable)(struct adreno_device *adreno_dev);
+	void (*pwrlevel_change_settings)(struct adreno_device *adreno_dev,
+				unsigned int prelevel, unsigned int postlevel,
+				bool post);
+	void (*preemption_schedule)(struct adreno_device *adreno_dev);
+	int (*preemption_context_init)(struct kgsl_context *context);
+	void (*context_detach)(struct adreno_context *drawctxt);
+	void (*pre_reset)(struct adreno_device *adreno_dev);
+	void (*gpu_keepalive)(struct adreno_device *adreno_dev,
+			bool state);
+	bool (*hw_isidle)(struct adreno_device *adreno_dev);
+	const char *(*iommu_fault_block)(struct kgsl_device *device,
+				unsigned int fsynr1);
+	int (*reset)(struct adreno_device *adreno_dev);
+	/** @read_alwayson: Return the current value of the alwayson counter */
+	u64 (*read_alwayson)(struct adreno_device *adreno_dev);
+	/**
+	 * @power_ops: Target specific function pointers to power up/down the
+	 * gpu
+	 */
+	const struct adreno_power_ops *power_ops;
+	int (*clear_pending_transactions)(struct adreno_device *adreno_dev);
+	void (*deassert_gbif_halt)(struct adreno_device *adreno_dev);
+	int (*ringbuffer_submitcmd)(struct adreno_device *adreno_dev,
+			struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+			struct adreno_submit_time *time);
+	/**
+	 * @is_hw_collapsible: Return true if the hardware can be collapsed.
+	 * Only used by non GMU/RGMU targets
+	 */
+	bool (*is_hw_collapsible)(struct adreno_device *adreno_dev);
+	/**
+	 * @power_stats - Return the perfcounter statistics for DCVS
+	 */
+	void (*power_stats)(struct adreno_device *adreno_dev,
+			struct kgsl_power_stats *stats);
+	int (*setproperty)(struct kgsl_device_private *priv, u32 type,
+		void __user *value, u32 sizebytes);
+	int (*add_to_va_minidump)(struct adreno_device *adreno_dev);
+	/**
+	 * @gx_is_on - Return true if both gfx clock and gxgdsc are enabled.
+	 */
+	bool (*gx_is_on)(struct adreno_device *adreno_dev);
+	/**
+	 * @send_recurring_cmdobj - Target specific function to send recurring IBs to GMU
+	 */
+	int (*send_recurring_cmdobj)(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj);
+	/**
+	 * @perfcounter_remove: Remove perfcounter from the power up list
+	 */
+	int (*perfcounter_remove)(struct adreno_device *adreno_dev,
+			struct adreno_perfcount_register *reg, u32 groupid);
+	/**
+	 * @set_isdb_breakpoint_registers - Program isdb registers to issue break command
+	 */
+	void (*set_isdb_breakpoint_registers)(struct adreno_device *adreno_dev);
+	/**
+	 * @context_destroy: Target specific function called during context destruction
+	 */
+	void (*context_destroy)(struct adreno_device *adreno_dev, struct adreno_context *drawctxt);
+	/**
+	 * @swfuse_irqctrl: To enable/disable sw fuse violation interrupt
+	 */
+	void (*swfuse_irqctrl)(struct adreno_device *adreno_dev, bool state);
+	/**
+	 * @lpac_store: To enable/disable lpac at runtime
+	 */
+	int (*lpac_store)(struct adreno_device *adreno_dev, bool enable);
+	/*
+	 * @get_uche_trap_base: Return the UCHE_TRAP_BASE value
+	 */
+	u64 (*get_uche_trap_base)(void);
+	/**
+	 * @fault_header: Print fault header
+	 */
+	void (*fault_header)(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj);
+	/**
+	 * @lpac_fault_header: Print LPAC fault header
+	 */
+	void (*lpac_fault_header)(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj);
+};
+
+/**
+ * enum kgsl_ft_policy_bits - KGSL fault tolerance policy bits
+ * @KGSL_FT_OFF: Disable fault detection (not used)
+ * @KGSL_FT_REPLAY: Replay the faulting command
+ * @KGSL_FT_SKIPIB: Skip the faulting indirect buffer
+ * @KGSL_FT_SKIPFRAME: Skip the frame containing the faulting IB
+ * @KGSL_FT_DISABLE: Tells the dispatcher to disable FT for the command obj
+ * @KGSL_FT_TEMP_DISABLE: Disables FT for all commands
+ * @KGSL_FT_THROTTLE: Disable the context if it faults too often
+ * @KGSL_FT_SKIPCMD: Skip the command containing the faulting IB
+ */
+enum kgsl_ft_policy_bits {
+	KGSL_FT_OFF = 0,
+	KGSL_FT_REPLAY,
+	KGSL_FT_SKIPIB,
+	KGSL_FT_SKIPFRAME,
+	KGSL_FT_DISABLE,
+	KGSL_FT_TEMP_DISABLE,
+	KGSL_FT_THROTTLE,
+	KGSL_FT_SKIPCMD,
+	/* KGSL_FT_MAX_BITS is used to calculate the mask */
+	KGSL_FT_MAX_BITS,
+	/* Internal bits - set during GFT */
+	/* Skip the PM dump on replayed command obj's */
+	KGSL_FT_SKIP_PMDUMP = 31,
+};
+
+#define KGSL_FT_POLICY_MASK GENMASK(KGSL_FT_MAX_BITS - 1, 0)
+
+#define FOR_EACH_RINGBUFFER(_dev, _rb, _i)			\
+	for ((_i) = 0, (_rb) = &((_dev)->ringbuffers[0]);	\
+		(_i) < (_dev)->num_ringbuffers;			\
+		(_i)++, (_rb)++)
+
+extern const struct adreno_power_ops adreno_power_operations;
+
+extern const struct adreno_gpudev adreno_a3xx_gpudev;
+extern const struct adreno_gpudev adreno_a5xx_gpudev;
+extern const struct adreno_gpudev adreno_a6xx_gpudev;
+extern const struct adreno_gpudev adreno_a6xx_rgmu_gpudev;
+extern const struct adreno_gpudev adreno_a619_holi_gpudev;
+extern const struct adreno_gpudev adreno_a611_gpudev;
+
+extern int adreno_wake_nice;
+extern unsigned int adreno_wake_timeout;
+
+int adreno_start(struct kgsl_device *device, int priority);
+long adreno_ioctl(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg);
+
+long adreno_ioctl_helper(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg,
+		const struct kgsl_ioctl *cmds, int len);
+
+int adreno_spin_idle(struct adreno_device *device, unsigned int timeout);
+int adreno_idle(struct kgsl_device *device);
+
+int adreno_set_constraint(struct kgsl_device *device,
+				struct kgsl_context *context,
+				struct kgsl_device_constraint *constraint);
+
+void adreno_snapshot(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		struct kgsl_context *context, struct kgsl_context *context_lpac);
+
+int adreno_reset(struct kgsl_device *device, int fault);
+
+void adreno_fault_skipcmd_detached(struct adreno_device *adreno_dev,
+					 struct adreno_context *drawctxt,
+					 struct kgsl_drawobj *drawobj);
+
+void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit);
+void adreno_cp_callback(struct adreno_device *adreno_dev, int bit);
+
+int adreno_sysfs_init(struct adreno_device *adreno_dev);
+
+void adreno_irqctrl(struct adreno_device *adreno_dev, int state);
+
+long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data);
+
+long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data);
+
+void adreno_cx_misc_regread(struct adreno_device *adreno_dev,
+		unsigned int offsetwords, unsigned int *value);
+void adreno_cx_misc_regwrite(struct adreno_device *adreno_dev,
+		unsigned int offsetwords, unsigned int value);
+void adreno_cx_misc_regrmw(struct adreno_device *adreno_dev,
+		unsigned int offsetwords,
+		unsigned int mask, unsigned int bits);
+void adreno_isense_regread(struct adreno_device *adreno_dev,
+		unsigned int offsetwords, unsigned int *value);
+bool adreno_gx_is_on(struct adreno_device *adreno_dev);
+
+u64 adreno_read_cx_timer(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_active_count_get - Wrapper for target specific active count get
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Increase the active count for the KGSL device and execute slumber exit
+ * sequence if this is the first reference. Code paths that need to touch the
+ * hardware or wait for the hardware to complete an operation must hold an
+ * active count reference until they are finished. The device mutex must be held
+ * while calling this function.
+ *
+ * Return: 0 on success or negative error on failure to wake up the device
+ */
+int adreno_active_count_get(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_active_count_put - Wrapper for target specific active count put
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Decrease the active or the KGSL device and schedule the idle thread to
+ * execute the slumber sequence if there are no remaining references. The
+ * device mutex must be held while calling this function.
+ */
+void adreno_active_count_put(struct adreno_device *adreno_dev);
+
+#define ADRENO_TARGET(_name, _id) \
+static inline int adreno_is_##_name(struct adreno_device *adreno_dev) \
+{ \
+	return (ADRENO_GPUREV(adreno_dev) == (_id)); \
+}
+
+static inline int adreno_is_a3xx(struct adreno_device *adreno_dev)
+{
+	return ((ADRENO_GPUREV(adreno_dev) >= 300) &&
+		(ADRENO_GPUREV(adreno_dev) < 400));
+}
+
+ADRENO_TARGET(a304, ADRENO_REV_A304)
+ADRENO_TARGET(a306, ADRENO_REV_A306)
+ADRENO_TARGET(a306a, ADRENO_REV_A306A)
+
+static inline int adreno_is_a5xx(struct adreno_device *adreno_dev)
+{
+	return ADRENO_GPUREV(adreno_dev) >= 500 &&
+			ADRENO_GPUREV(adreno_dev) < 600;
+}
+
+ADRENO_TARGET(a505, ADRENO_REV_A505)
+ADRENO_TARGET(a506, ADRENO_REV_A506)
+ADRENO_TARGET(a508, ADRENO_REV_A508)
+ADRENO_TARGET(a510, ADRENO_REV_A510)
+ADRENO_TARGET(a512, ADRENO_REV_A512)
+ADRENO_TARGET(a530, ADRENO_REV_A530)
+ADRENO_TARGET(a540, ADRENO_REV_A540)
+
+static inline int adreno_is_a530v2(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1);
+}
+
+static inline int adreno_is_a530v3(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 2);
+}
+
+static inline int adreno_is_a505_or_a506(struct adreno_device *adreno_dev)
+{
+	return ADRENO_GPUREV(adreno_dev) >= 505 &&
+			ADRENO_GPUREV(adreno_dev) <= 506;
+}
+
+static inline int adreno_is_a6xx(struct adreno_device *adreno_dev)
+{
+	return ADRENO_GPUREV(adreno_dev) >= 600 &&
+			ADRENO_GPUREV(adreno_dev) <= 702;
+}
+
+static inline int adreno_is_a660_shima(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A660) &&
+		(adreno_dev->gpucore->compatible &&
+		!strcmp(adreno_dev->gpucore->compatible,
+		"qcom,adreno-gpu-a660-shima"));
+}
+
+ADRENO_TARGET(a610, ADRENO_REV_A610)
+ADRENO_TARGET(a611, ADRENO_REV_A611)
+ADRENO_TARGET(a612, ADRENO_REV_A612)
+ADRENO_TARGET(a618, ADRENO_REV_A618)
+ADRENO_TARGET(a619, ADRENO_REV_A619)
+ADRENO_TARGET(a621, ADRENO_REV_A621)
+ADRENO_TARGET(a630, ADRENO_REV_A630)
+ADRENO_TARGET(a635, ADRENO_REV_A635)
+ADRENO_TARGET(a662, ADRENO_REV_A662)
+ADRENO_TARGET(a640, ADRENO_REV_A640)
+ADRENO_TARGET(a650, ADRENO_REV_A650)
+ADRENO_TARGET(a663, ADRENO_REV_A663)
+ADRENO_TARGET(a680, ADRENO_REV_A680)
+ADRENO_TARGET(a702, ADRENO_REV_A702)
+
+/* A635 is derived from A660 and shares same logic */
+static inline int adreno_is_a660(struct adreno_device *adreno_dev)
+{
+	unsigned int rev = ADRENO_GPUREV(adreno_dev);
+
+	return (rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635 ||
+			rev == ADRENO_REV_A662);
+}
+
+/*
+ * All the derived chipsets from A615 needs to be added to this
+ * list such as A616, A618, A619 etc.
+ */
+static inline int adreno_is_a615_family(struct adreno_device *adreno_dev)
+{
+	unsigned int rev = ADRENO_GPUREV(adreno_dev);
+
+	return (rev == ADRENO_REV_A615 || rev == ADRENO_REV_A616 ||
+			rev == ADRENO_REV_A618 || rev == ADRENO_REV_A619);
+}
+
+/*
+ * Derived GPUs from A640 needs to be added to this list.
+ * A640 and A680 belongs to this family.
+ */
+static inline int adreno_is_a640_family(struct adreno_device *adreno_dev)
+{
+	unsigned int rev = ADRENO_GPUREV(adreno_dev);
+
+	return (rev == ADRENO_REV_A640 || rev == ADRENO_REV_A680);
+}
+
+/*
+ * Derived GPUs from A650 needs to be added to this list.
+ * A650 is derived from A640 but register specs has been
+ * changed hence do not belongs to A640 family. A620, A621,
+ * A660, A663, A690 follows the register specs of A650.
+ *
+ */
+static inline int adreno_is_a650_family(struct adreno_device *adreno_dev)
+{
+	unsigned int rev = ADRENO_GPUREV(adreno_dev);
+
+	return (rev == ADRENO_REV_A650 || rev == ADRENO_REV_A620 ||
+		rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635 ||
+		rev == ADRENO_REV_A662 || rev == ADRENO_REV_A621 ||
+		rev == ADRENO_REV_A663);
+}
+
+static inline int adreno_is_a619_holi(struct adreno_device *adreno_dev)
+{
+	return of_device_is_compatible(adreno_dev->dev.pdev->dev.of_node,
+		"qcom,adreno-gpu-a619-holi");
+}
+
+static inline int adreno_is_a620(struct adreno_device *adreno_dev)
+{
+	unsigned int rev = ADRENO_GPUREV(adreno_dev);
+
+	return (rev == ADRENO_REV_A620 || rev == ADRENO_REV_A621);
+}
+
+static inline int adreno_is_a610_family(struct adreno_device *adreno_dev)
+{
+	unsigned int rev = ADRENO_GPUREV(adreno_dev);
+
+	return (rev == ADRENO_REV_A610 || rev == ADRENO_REV_A611);
+}
+
+static inline int adreno_is_a640v2(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A640) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1);
+}
+
+static inline int adreno_is_gen7(struct adreno_device *adreno_dev)
+{
+	return ADRENO_GPUREV(adreno_dev) >= 0x070000 &&
+			ADRENO_GPUREV(adreno_dev) < 0x080000;
+}
+
+static inline int adreno_is_gen8(struct adreno_device *adreno_dev)
+{
+	return ADRENO_GPUREV(adreno_dev) >= 0x080000 &&
+			ADRENO_GPUREV(adreno_dev) < 0x090000;
+}
+
+ADRENO_TARGET(gen7_0_0, ADRENO_REV_GEN7_0_0)
+ADRENO_TARGET(gen7_0_1, ADRENO_REV_GEN7_0_1)
+ADRENO_TARGET(gen7_2_0, ADRENO_REV_GEN7_2_0)
+ADRENO_TARGET(gen7_2_1, ADRENO_REV_GEN7_2_1)
+ADRENO_TARGET(gen7_4_0, ADRENO_REV_GEN7_4_0)
+ADRENO_TARGET(gen7_9_0, ADRENO_REV_GEN7_9_0)
+ADRENO_TARGET(gen7_9_1, ADRENO_REV_GEN7_9_1)
+ADRENO_TARGET(gen7_11_0, ADRENO_REV_GEN7_11_0)
+ADRENO_TARGET(gen8_3_0, ADRENO_REV_GEN8_3_0)
+
+static inline int adreno_is_gen7_9_x(struct adreno_device *adreno_dev)
+{
+	return adreno_is_gen7_9_0(adreno_dev) || adreno_is_gen7_9_1(adreno_dev);
+}
+
+static inline int adreno_is_gen7_0_x_family(struct adreno_device *adreno_dev)
+{
+	return adreno_is_gen7_0_0(adreno_dev) || adreno_is_gen7_0_1(adreno_dev) ||
+		adreno_is_gen7_4_0(adreno_dev);
+}
+
+static inline int adreno_is_gen7_2_x_family(struct adreno_device *adreno_dev)
+{
+	return adreno_is_gen7_2_0(adreno_dev) || adreno_is_gen7_2_1(adreno_dev) ||
+		adreno_is_gen7_9_x(adreno_dev) || adreno_is_gen7_11_0(adreno_dev);
+}
+
+/*
+ * adreno_checkreg_off() - Checks the validity of a register enum
+ * @adreno_dev: Pointer to adreno device
+ * @offset_name: The register enum that is checked
+ */
+static inline bool adreno_checkreg_off(struct adreno_device *adreno_dev,
+					enum adreno_regs offset_name)
+{
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (offset_name >= ADRENO_REG_REGISTER_MAX ||
+		gpudev->reg_offsets[offset_name] == ADRENO_REG_UNUSED)
+		return false;
+
+	/*
+	 * GPU register programming is kept common as much as possible
+	 * across the cores, Use ADRENO_REG_SKIP when certain register
+	 * programming needs to be skipped for certain GPU cores.
+	 * Example: Certain registers on a5xx like IB1_BASE are 64 bit.
+	 * Common programming programs 64bit register but upper 32 bits
+	 * are skipped in a3xx using ADRENO_REG_SKIP.
+	 */
+	if (gpudev->reg_offsets[offset_name] == ADRENO_REG_SKIP)
+		return false;
+
+	return true;
+}
+
+/*
+ * adreno_readreg() - Read a register by getting its offset from the
+ * offset array defined in gpudev node
+ * @adreno_dev: Pointer to the adreno device
+ * @offset_name: The register enum that is to be read
+ * @val: Register value read is placed here
+ */
+static inline void adreno_readreg(struct adreno_device *adreno_dev,
+				enum adreno_regs offset_name, unsigned int *val)
+{
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (adreno_checkreg_off(adreno_dev, offset_name))
+		kgsl_regread(KGSL_DEVICE(adreno_dev),
+				gpudev->reg_offsets[offset_name], val);
+	else
+		*val = 0;
+}
+
+/*
+ * adreno_writereg() - Write a register by getting its offset from the
+ * offset array defined in gpudev node
+ * @adreno_dev: Pointer to the adreno device
+ * @offset_name: The register enum that is to be written
+ * @val: Value to write
+ */
+static inline void adreno_writereg(struct adreno_device *adreno_dev,
+				enum adreno_regs offset_name, unsigned int val)
+{
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (adreno_checkreg_off(adreno_dev, offset_name))
+		kgsl_regwrite(KGSL_DEVICE(adreno_dev),
+				gpudev->reg_offsets[offset_name], val);
+}
+
+/*
+ * adreno_getreg() - Returns the offset value of a register from the
+ * register offset array in the gpudev node
+ * @adreno_dev: Pointer to the adreno device
+ * @offset_name: The register enum whore offset is returned
+ */
+static inline unsigned int adreno_getreg(struct adreno_device *adreno_dev,
+				enum adreno_regs offset_name)
+{
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (!adreno_checkreg_off(adreno_dev, offset_name))
+		return ADRENO_REG_REGISTER_MAX;
+	return gpudev->reg_offsets[offset_name];
+}
+
+/*
+ * adreno_write_gmureg() - Write a GMU register by getting its offset from the
+ * offset array defined in gpudev node
+ * @adreno_dev: Pointer to the adreno device
+ * @offset_name: The register enum that is to be written
+ * @val: Value to write
+ */
+static inline void adreno_write_gmureg(struct adreno_device *adreno_dev,
+				enum adreno_regs offset_name, unsigned int val)
+{
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (adreno_checkreg_off(adreno_dev, offset_name))
+		gmu_core_regwrite(KGSL_DEVICE(adreno_dev),
+				gpudev->reg_offsets[offset_name], val);
+}
+
+/**
+ * adreno_gpu_fault() - Return the current state of the GPU
+ * @adreno_dev: A pointer to the adreno_device to query
+ *
+ * Return 0 if there is no fault or positive with the last type of fault that
+ * occurred
+ */
+static inline unsigned int adreno_gpu_fault(struct adreno_device *adreno_dev)
+{
+	/* make sure we're reading the latest value */
+	smp_rmb();
+	return atomic_read(&adreno_dev->dispatcher.fault);
+}
+
+/**
+ * adreno_set_gpu_fault() - Set the current fault status of the GPU
+ * @adreno_dev: A pointer to the adreno_device to set
+ * @state: fault state to set
+ *
+ */
+static inline void adreno_set_gpu_fault(struct adreno_device *adreno_dev,
+	int state)
+{
+	/* only set the fault bit w/o overwriting other bits */
+	atomic_or(state, &adreno_dev->dispatcher.fault);
+
+	/* make sure other CPUs see the update */
+	smp_wmb();
+}
+
+/**
+ * adreno_clear_gpu_fault() - Clear the GPU fault register
+ * @adreno_dev: A pointer to an adreno_device structure
+ *
+ * Clear the GPU fault status for the adreno device
+ */
+
+static inline void adreno_clear_gpu_fault(struct adreno_device *adreno_dev)
+{
+	atomic_set(&adreno_dev->dispatcher.fault, 0);
+
+	/* make sure other CPUs see the update */
+	smp_wmb();
+}
+
+/**
+ * adreno_gpu_halt() - Return the GPU halt refcount
+ * @adreno_dev: A pointer to the adreno_device
+ */
+static inline int adreno_gpu_halt(struct adreno_device *adreno_dev)
+{
+	/* make sure we're reading the latest value */
+	smp_rmb();
+	return atomic_read(&adreno_dev->halt);
+}
+
+
+/**
+ * adreno_clear_gpu_halt() - Clear the GPU halt refcount
+ * @adreno_dev: A pointer to the adreno_device
+ */
+static inline void adreno_clear_gpu_halt(struct adreno_device *adreno_dev)
+{
+	atomic_set(&adreno_dev->halt, 0);
+
+	/* make sure other CPUs see the update */
+	smp_wmb();
+}
+
+/**
+ * adreno_get_gpu_halt() - Increment GPU halt refcount
+ * @adreno_dev: A pointer to the adreno_device
+ */
+static inline void adreno_get_gpu_halt(struct adreno_device *adreno_dev)
+{
+	atomic_inc(&adreno_dev->halt);
+}
+
+/**
+ * adreno_put_gpu_halt() - Decrement GPU halt refcount
+ * @adreno_dev: A pointer to the adreno_device
+ */
+static inline void adreno_put_gpu_halt(struct adreno_device *adreno_dev)
+{
+	/* Make sure the refcount is good */
+	int ret = atomic_dec_if_positive(&adreno_dev->halt);
+
+	WARN(ret < 0, "GPU halt refcount unbalanced\n");
+}
+
+
+#ifdef CONFIG_DEBUG_FS
+void adreno_debugfs_init(struct adreno_device *adreno_dev);
+void adreno_context_debugfs_init(struct adreno_device *adreno_dev,
+				struct adreno_context *ctx);
+#else
+static inline void adreno_debugfs_init(struct adreno_device *adreno_dev) { }
+static inline void adreno_context_debugfs_init(struct adreno_device *device,
+						struct adreno_context *context)
+{
+	context->debug_root = NULL;
+}
+#endif
+
+/**
+ * adreno_compare_pm4_version() - Compare the PM4 microcode version
+ * @adreno_dev: Pointer to the adreno_device struct
+ * @version: Version number to compare again
+ *
+ * Compare the current version against the specified version and return -1 if
+ * the current code is older, 0 if equal or 1 if newer.
+ */
+static inline int adreno_compare_pm4_version(struct adreno_device *adreno_dev,
+	unsigned int version)
+{
+	if (adreno_dev->fw[ADRENO_FW_PM4].version == version)
+		return 0;
+
+	return (adreno_dev->fw[ADRENO_FW_PM4].version > version) ? 1 : -1;
+}
+
+/**
+ * adreno_compare_pfp_version() - Compare the PFP microcode version
+ * @adreno_dev: Pointer to the adreno_device struct
+ * @version: Version number to compare against
+ *
+ * Compare the current version against the specified version and return -1 if
+ * the current code is older, 0 if equal or 1 if newer.
+ */
+static inline int adreno_compare_pfp_version(struct adreno_device *adreno_dev,
+	unsigned int version)
+{
+	if (adreno_dev->fw[ADRENO_FW_PFP].version == version)
+		return 0;
+
+	return (adreno_dev->fw[ADRENO_FW_PFP].version > version) ? 1 : -1;
+}
+
+/**
+ * adreno_in_preempt_state() - Check if preemption state is equal to given state
+ * @adreno_dev: Device whose preemption state is checked
+ * @state: State to compare against
+ */
+static inline bool adreno_in_preempt_state(struct adreno_device *adreno_dev,
+			enum adreno_preempt_states state)
+{
+	return atomic_read(&adreno_dev->preempt.state) == state;
+}
+/**
+ * adreno_set_preempt_state() - Set the specified preemption state
+ * @adreno_dev: Device to change preemption state
+ * @state: State to set
+ */
+static inline void adreno_set_preempt_state(struct adreno_device *adreno_dev,
+		enum adreno_preempt_states state)
+{
+	/*
+	 * atomic_set doesn't use barriers, so we need to do it ourselves.  One
+	 * before...
+	 */
+	smp_wmb();
+	atomic_set(&adreno_dev->preempt.state, state);
+
+	/* ... and one after */
+	smp_wmb();
+}
+
+static inline bool adreno_is_preemption_enabled(
+				struct adreno_device *adreno_dev)
+{
+	return test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+}
+
+
+/**
+ * adreno_preemption_feature_set() - Check whether adreno preemption feature is statically enabled
+ * either via adreno feature bit, or via the cmdline override
+ * @adreno_dev: Device whose preemption state is checked
+ */
+static inline bool adreno_preemption_feature_set(struct adreno_device *adreno_dev)
+{
+	return ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION) || adreno_dev->preempt_override;
+}
+
+/*
+ * adreno_compare_prio_level() - Compares 2 priority levels based on enum values
+ * @p1: First priority level
+ * @p2: Second priority level
+ *
+ * Returns greater than 0 if p1 is higher priority, 0 if levels are equal else
+ * less than 0
+ */
+static inline int adreno_compare_prio_level(int p1, int p2)
+{
+	return p2 - p1;
+}
+
+void adreno_readreg64(struct adreno_device *adreno_dev,
+		enum adreno_regs lo, enum adreno_regs hi, uint64_t *val);
+
+void adreno_writereg64(struct adreno_device *adreno_dev,
+		enum adreno_regs lo, enum adreno_regs hi, uint64_t val);
+
+unsigned int adreno_get_rptr(struct adreno_ringbuffer *rb);
+
+void adreno_touch_wake(struct kgsl_device *device);
+
+static inline bool adreno_rb_empty(struct adreno_ringbuffer *rb)
+{
+	return (adreno_get_rptr(rb) == rb->wptr);
+}
+
+static inline bool adreno_soft_fault_detect(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->fast_hang_detect &&
+		!test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+}
+
+static inline bool adreno_long_ib_detect(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->long_ib_detect &&
+		!test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+}
+
+/**
+ * adreno_support_64bit - Return true if the GPU supports 64 bit addressing
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Return: True if the device supports 64 bit addressing
+ */
+static inline bool adreno_support_64bit(struct adreno_device *adreno_dev)
+{
+	/*
+	 * The IOMMU API takes a unsigned long for the iova so we can't support
+	 * 64 bit addresses when the kernel is in 32 bit mode even if we wanted
+	 * so we need to check that we are using a5xx or newer and that the
+	 * unsigned long is big enough for our purposes.
+	 */
+	return (BITS_PER_LONG > 32 && ADRENO_GPUREV(adreno_dev) >= 500);
+}
+
+static inline void adreno_ringbuffer_set_pagetable(struct kgsl_device *device,
+	struct adreno_ringbuffer *rb, struct kgsl_pagetable *pt)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+
+	kgsl_sharedmem_writel(device->scratch,
+		SCRATCH_RB_OFFSET(rb->id, current_rb_ptname), pt->name);
+
+	kgsl_sharedmem_writeq(device->scratch,
+		SCRATCH_RB_OFFSET(rb->id, ttbr0),
+		kgsl_mmu_pagetable_get_ttbr0(pt));
+
+	kgsl_sharedmem_writel(device->scratch,
+		SCRATCH_RB_OFFSET(rb->id, contextidr), 0);
+
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+}
+
+static inline u32 counter_delta(struct kgsl_device *device,
+			unsigned int reg, unsigned int *counter)
+{
+	u32 val, ret = 0;
+
+	if (!reg)
+		return 0;
+
+	kgsl_regread(device, reg, &val);
+
+	if (*counter) {
+		if (val >= *counter)
+			ret = val - *counter;
+		else
+			ret = (UINT_MAX - *counter) + val;
+	}
+
+	*counter = val;
+	return ret;
+}
+
+static inline int adreno_perfcntr_active_oob_get(
+	struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = adreno_active_count_get(adreno_dev);
+
+	if (!ret) {
+		ret = gmu_core_dev_oob_set(device, oob_perfcntr);
+		if (ret)
+			adreno_active_count_put(adreno_dev);
+	}
+
+	return ret;
+}
+
+static inline void adreno_perfcntr_active_oob_put(
+	struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	gmu_core_dev_oob_clear(device, oob_perfcntr);
+	adreno_active_count_put(adreno_dev);
+}
+
+/**
+ * adreno_wait_for_halt_ack - wait for acknowlegement for a bus halt request
+ * @ack_reg: register offset to wait for acknowledge
+ * @mask: A mask value to wait for
+ *
+ * Return: 0 on success or -ETIMEDOUT if the request timed out
+ */
+static inline int adreno_wait_for_halt_ack(struct kgsl_device *device,
+	int ack_reg, unsigned int mask)
+{
+	u32 val;
+	int ret = kgsl_regmap_read_poll_timeout(&device->regmap, ack_reg,
+		val, (val & mask) == mask, 100, 100 * 1000);
+
+	if (ret)
+		dev_err(device->dev,
+			"GBIF/VBIF Halt ack timeout: reg=%08x mask=%08x status=%08x\n",
+			ack_reg, mask, val);
+
+	return ret;
+}
+
+/**
+ * adreno_move_preempt_state - Update the preemption state
+ * @adreno_dev: An Adreno GPU device handle
+ * @old: The current state of the preemption
+ * @new: The new state of the preemption
+ *
+ * Return: True if the state was updated or false if not
+ */
+static inline bool adreno_move_preempt_state(struct adreno_device *adreno_dev,
+	enum adreno_preempt_states old, enum adreno_preempt_states new)
+{
+	return (atomic_cmpxchg(&adreno_dev->preempt.state, old, new) == old);
+}
+
+/**
+ * adreno_reg_offset_init - Helper function to initialize reg_offsets
+ * @reg_offsets: Pointer to an array of register offsets
+ *
+ * Helper function to setup register_offsets for a target. Go through
+ * and set ADRENO_REG_UNUSED for all unused entries in the list.
+ */
+static inline void adreno_reg_offset_init(u32 *reg_offsets)
+{
+	int i;
+
+	/*
+	 * Initialize uninitialzed gpu registers, only needs to be done once.
+	 * Make all offsets that are not initialized to ADRENO_REG_UNUSED
+	 */
+	for (i = 0; i < ADRENO_REG_REGISTER_MAX; i++) {
+		if (!reg_offsets[i])
+			reg_offsets[i] = ADRENO_REG_UNUSED;
+	}
+}
+
+static inline u32 adreno_get_level(struct kgsl_context *context)
+{
+	u32 level;
+
+	if (kgsl_context_is_lpac(context))
+		return KGSL_LPAC_RB_ID;
+
+	level = context->priority / KGSL_PRIORITY_MAX_RB_LEVELS;
+
+	return min_t(u32, level, KGSL_PRIORITY_MAX_RB_LEVELS - 1);
+}
+
+
+/**
+ * adreno_get_firwmare - Load firmware into a adreno_firmware struct
+ * @adreno_dev: An Adreno GPU device handle
+ * @fwfile: Firmware file to load
+ * @firmware: A &struct adreno_firmware container for the firmware.
+ *
+ * Load the specified firmware file into the memdesc in &struct adreno_firmware
+ * and get the size and version from the data.
+ *
+ * Return: 0 on success or negative on failure
+ */
+int adreno_get_firmware(struct adreno_device *adreno_dev,
+		const char *fwfile, struct adreno_firmware *firmware);
+/**
+ * adreno_zap_shader_load - Helper function for loading the zap shader
+ * adreno_dev: A handle to an Adreno GPU device
+ * name: Name of the zap shader to load
+ *
+ * A target indepedent helper function for loading the zap shader.
+ *
+ * Return: 0 on success or negative on failure.
+ */
+int adreno_zap_shader_load(struct adreno_device *adreno_dev,
+		const char *name);
+
+/**
+ * adreno_irq_callbacks - Helper function to handle IRQ callbacks
+ * @adreno_dev: Adreno GPU device handle
+ * @funcs: List of callback functions
+ * @status: Interrupt status
+ *
+ * Walk the bits in the interrupt status and call any applicable callbacks.
+ * Return: IRQ_HANDLED if one or more interrupt callbacks were called.
+ */
+irqreturn_t adreno_irq_callbacks(struct adreno_device *adreno_dev,
+		const struct adreno_irq_funcs *funcs, u32 status);
+
+
+/**
+ * adreno_device_probe - Generic adreno device probe function
+ * @pdev: Pointer to the platform device
+ * @adreno_dev: Adreno GPU device handle
+ *
+ * Do the generic setup for the Adreno device. Called from the target specific
+ * probe functions.
+ *
+ * Return: 0 on success or negative on failure
+ */
+int adreno_device_probe(struct platform_device *pdev,
+		struct adreno_device *adreno_dev);
+
+/**
+ * adreno_power_cycle - Suspend and resume the device
+ * @adreno_dev: Pointer to the adreno device
+ * @callback: Function that needs to be executed
+ * @priv: Argument to be passed to the callback
+ *
+ * Certain properties that can be set via sysfs need to power
+ * cycle the device to take effect. This function suspends
+ * the device, executes the callback, and resumes the device.
+ *
+ * Return: 0 on success or negative on failure
+ */
+int adreno_power_cycle(struct adreno_device *adreno_dev,
+	void (*callback)(struct adreno_device *adreno_dev, void *priv),
+	void *priv);
+
+/**
+ * adreno_power_cycle_bool - Power cycle the device to change device setting
+ * @adreno_dev: Pointer to the adreno device
+ * @flag: Flag that needs to be set
+ * @val: The value flag should be set to
+ *
+ * Certain properties that can be set via sysfs need to power cycle the device
+ * to take effect. This function suspends the device, sets the flag, and
+ * resumes the device.
+ *
+ * Return: 0 on success or negative on failure
+ */
+int adreno_power_cycle_bool(struct adreno_device *adreno_dev,
+	bool *flag, bool val);
+
+/**
+ * adreno_power_cycle_u32 - Power cycle the device to change device setting
+ * @adreno_dev: Pointer to the adreno device
+ * @flag: Flag that needs to be set
+ * @val: The value flag should be set to
+ *
+ * Certain properties that can be set via sysfs need to power cycle the device
+ * to take effect. This function suspends the device, sets the flag, and
+ * resumes the device.
+ *
+ * Return: 0 on success or negative on failure
+ */
+int adreno_power_cycle_u32(struct adreno_device *adreno_dev,
+	u32 *flag, u32 val);
+
+/**
+ * adreno_set_active_ctxs_null - Give up active context refcount
+ * @adreno_dev: Adreno GPU device handle
+ *
+ * This puts back the reference for that last active context on
+ * each ringbuffer when going in and out of slumber.
+ */
+void adreno_set_active_ctxs_null(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_get_bus_counters - Allocate the bus dcvs counters
+ * @adreno_dev: Adreno GPU device handle
+ *
+ * This function allocates the various gpu counters to measure
+ * gpu bus usage for bus dcvs
+ */
+void adreno_get_bus_counters(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_suspend_context - Make sure device is idle
+ * @device: Pointer to the kgsl device
+ *
+ * This function processes the profiling results and checks if the
+ * device is idle so that it can be turned off safely
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int adreno_suspend_context(struct kgsl_device *device);
+
+/*
+ * adreno_profile_submit_time - Populate profiling buffer with timestamps
+ * @time: Container for the statistics
+ *
+ * Populate the draw object user profiling buffer with the timestamps
+ * recored in the adreno_submit_time structure at the time of draw object
+ * submission.
+ */
+void adreno_profile_submit_time(struct adreno_submit_time *time);
+
+void adreno_preemption_timer(struct timer_list *t);
+
+/**
+ * adreno_create_profile_buffer - Create a buffer to store profiling data
+ * @adreno_dev: Adreno GPU device handle
+ */
+void adreno_create_profile_buffer(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_isidle - return true if the hardware is idle
+ * @adreno_dev: Adreno GPU device handle
+ *
+ * Return: True if the hardware is idle
+ */
+bool adreno_isidle(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_allocate_global - Helper function to allocate a global GPU object
+ * @device: A GPU device handle
+ * @memdesc: Pointer to a &struct kgsl_memdesc pointer
+ * @size: Size of the allocation in bytes
+ * @padding: Amount of extra adding to add to the VA allocation
+ * @flags: Control flags for the allocation
+ * @priv: Internal flags for the allocation
+ * @name: Name of the allocation (for the debugfs file)
+ *
+ * Allocate a global object if it hasn't already been alllocated and put it in
+ * the pointer pointed to by @memdesc.
+ * Return: 0 on success or negative on error
+ */
+static inline int adreno_allocate_global(struct kgsl_device *device,
+		struct kgsl_memdesc **memdesc, u64 size, u32 padding, u64 flags,
+		u32 priv, const char *name)
+{
+	if (!IS_ERR_OR_NULL(*memdesc))
+		return 0;
+
+	*memdesc = kgsl_allocate_global(device, size, padding, flags, priv, name);
+	return PTR_ERR_OR_ZERO(*memdesc);
+}
+
+static inline void adreno_set_dispatch_ops(struct adreno_device *adreno_dev,
+		const struct adreno_dispatch_ops *ops)
+{
+	adreno_dev->dispatch_ops = ops;
+}
+
+#ifdef CONFIG_QCOM_KGSL_FENCE_TRACE
+/**
+ * adreno_fence_trace_array_init - Initialize an always on trace array
+ * @device: A GPU device handle
+ *
+ * Register an always-on trace array to for fence timeout debugging
+ */
+void adreno_fence_trace_array_init(struct kgsl_device *device);
+#else
+static inline void adreno_fence_trace_array_init(struct kgsl_device *device) {}
+#endif
+
+/*
+ * adreno_drawobj_set_constraint - Set a power constraint
+ * @device: Pointer to a KGSL device structure
+ * @drawobj: Draw object for which constraint is to be set
+ *
+ * Set the power constraint if requested by this context
+ */
+void adreno_drawobj_set_constraint(struct kgsl_device *device,
+		struct kgsl_drawobj *drawobj);
+
+/**
+ * adreno_get_gpu_model - Gets gpu model name from device tree (or) chipid
+ * @device: A GPU device handle
+ *
+ * Return: GPU model name string
+ */
+const char *adreno_get_gpu_model(struct kgsl_device *device);
+
+int adreno_verify_cmdobj(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, struct kgsl_drawobj *drawobj[],
+		uint32_t count);
+
+/**
+ * adreno_mark_for_coldboot - Set a flag to coldboot gpu in the slumber exit
+ * @adreno_dev: Adreno device handle
+ *
+ */
+void adreno_mark_for_coldboot(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_smmu_is_stalled() - Check whether smmu is stalled or not
+ * @device: Pointer to adreno device
+ *
+ * Return - True if smmu is stalled or false otherwise
+ */
+bool adreno_smmu_is_stalled(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_get_ahb_timeout_val() - Get the ahb_timeout value
+ * @adreno_dev: Adreno device handle
+ * @noc_timeout_us: GPU config NOC timeout value in usec
+ *
+ * Return - AHB timeout value to be programmed in AHB CNTL registers
+ */
+u32 adreno_get_ahb_timeout_val(struct adreno_device *adreno_dev, u32 noc_timeout_us);
+
+/**
+ * adreno_llcc_slice_deactivate - Deactivate GPU and GPUHTW llcc slices
+ * @adreno_dev: Adreno device handle
+ */
+static inline void adreno_llcc_slice_deactivate(struct adreno_device *adreno_dev)
+{
+	if (adreno_dev->gpu_llc_slice_enable && !IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice))
+		llcc_slice_deactivate(adreno_dev->gpu_llc_slice);
+
+	if (adreno_dev->gpuhtw_llc_slice_enable && !IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice))
+		llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice);
+}
+
+/**
+ * adreno_irq_free - Free an interrupt allocated for GPU
+ * @adreno_dev: Adreno device handle
+ */
+static inline void adreno_irq_free(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!(adreno_dev->irq_mask || device->pwrctrl.interrupt_num))
+		return;
+
+	devm_free_irq(&device->pdev->dev, device->pwrctrl.interrupt_num, device);
+	adreno_dev->irq_mask = 0;
+	device->pwrctrl.interrupt_num = 0;
+}
+#endif /*__ADRENO_H */

+ 1514 - 0
qcom/opensource/graphics-kernel/adreno_a3xx.c

@@ -0,0 +1,1514 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/clk/qcom.h>
+#include <linux/clk-provider.h>
+#include <linux/firmware.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+
+#include "adreno.h"
+#include "adreno_cp_parser.h"
+#include "adreno_a3xx.h"
+#include "adreno_pm4types.h"
+#include "adreno_snapshot.h"
+#include "adreno_trace.h"
+
+/*
+ * Define registers for a3xx that contain addresses used by the
+ * cp parser logic
+ */
+const unsigned int a3xx_cp_addr_regs[ADRENO_CP_ADDR_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0,
+				A3XX_VSC_PIPE_DATA_ADDRESS_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0,
+				A3XX_VSC_PIPE_DATA_LENGTH_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1,
+				A3XX_VSC_PIPE_DATA_ADDRESS_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1,
+				A3XX_VSC_PIPE_DATA_LENGTH_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2,
+				A3XX_VSC_PIPE_DATA_ADDRESS_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2,
+				A3XX_VSC_PIPE_DATA_LENGTH_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3,
+				A3XX_VSC_PIPE_DATA_ADDRESS_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3,
+				A3XX_VSC_PIPE_DATA_LENGTH_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4,
+				A3XX_VSC_PIPE_DATA_ADDRESS_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4,
+				A3XX_VSC_PIPE_DATA_LENGTH_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5,
+				A3XX_VSC_PIPE_DATA_ADDRESS_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5,
+				A3XX_VSC_PIPE_DATA_LENGTH_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6,
+				A3XX_VSC_PIPE_DATA_ADDRESS_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6,
+				A3XX_VSC_PIPE_DATA_LENGTH_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7,
+				A3XX_VSC_PIPE_DATA_ADDRESS_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7,
+				A3XX_VSC_PIPE_DATA_LENGTH_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0,
+				A3XX_VFD_FETCH_INSTR_1_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1,
+				A3XX_VFD_FETCH_INSTR_1_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2,
+				A3XX_VFD_FETCH_INSTR_1_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3,
+				A3XX_VFD_FETCH_INSTR_1_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4,
+				A3XX_VFD_FETCH_INSTR_1_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5,
+				A3XX_VFD_FETCH_INSTR_1_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6,
+				A3XX_VFD_FETCH_INSTR_1_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7,
+				A3XX_VFD_FETCH_INSTR_1_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8,
+				A3XX_VFD_FETCH_INSTR_1_8),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9,
+				A3XX_VFD_FETCH_INSTR_1_9),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10,
+				A3XX_VFD_FETCH_INSTR_1_A),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11,
+				A3XX_VFD_FETCH_INSTR_1_B),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12,
+				A3XX_VFD_FETCH_INSTR_1_C),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13,
+				A3XX_VFD_FETCH_INSTR_1_D),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14,
+				A3XX_VFD_FETCH_INSTR_1_E),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15,
+				A3XX_VFD_FETCH_INSTR_1_F),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_SIZE_ADDRESS,
+				A3XX_VSC_SIZE_ADDRESS),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR,
+				A3XX_SP_VS_PVT_MEM_ADDR_REG),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR,
+				A3XX_SP_FS_PVT_MEM_ADDR_REG),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_OBJ_START_REG,
+				A3XX_SP_VS_OBJ_START_REG),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_OBJ_START_REG,
+				A3XX_SP_FS_OBJ_START_REG),
+};
+
+static const unsigned int _a3xx_pwron_fixup_fs_instructions[] = {
+	0x00000000, 0x302CC300, 0x00000000, 0x302CC304,
+	0x00000000, 0x302CC308, 0x00000000, 0x302CC30C,
+	0x00000000, 0x302CC310, 0x00000000, 0x302CC314,
+	0x00000000, 0x302CC318, 0x00000000, 0x302CC31C,
+	0x00000000, 0x302CC320, 0x00000000, 0x302CC324,
+	0x00000000, 0x302CC328, 0x00000000, 0x302CC32C,
+	0x00000000, 0x302CC330, 0x00000000, 0x302CC334,
+	0x00000000, 0x302CC338, 0x00000000, 0x302CC33C,
+	0x00000000, 0x00000400, 0x00020000, 0x63808003,
+	0x00060004, 0x63828007, 0x000A0008, 0x6384800B,
+	0x000E000C, 0x6386800F, 0x00120010, 0x63888013,
+	0x00160014, 0x638A8017, 0x001A0018, 0x638C801B,
+	0x001E001C, 0x638E801F, 0x00220020, 0x63908023,
+	0x00260024, 0x63928027, 0x002A0028, 0x6394802B,
+	0x002E002C, 0x6396802F, 0x00320030, 0x63988033,
+	0x00360034, 0x639A8037, 0x003A0038, 0x639C803B,
+	0x003E003C, 0x639E803F, 0x00000000, 0x00000400,
+	0x00000003, 0x80D60003, 0x00000007, 0x80D60007,
+	0x0000000B, 0x80D6000B, 0x0000000F, 0x80D6000F,
+	0x00000013, 0x80D60013, 0x00000017, 0x80D60017,
+	0x0000001B, 0x80D6001B, 0x0000001F, 0x80D6001F,
+	0x00000023, 0x80D60023, 0x00000027, 0x80D60027,
+	0x0000002B, 0x80D6002B, 0x0000002F, 0x80D6002F,
+	0x00000033, 0x80D60033, 0x00000037, 0x80D60037,
+	0x0000003B, 0x80D6003B, 0x0000003F, 0x80D6003F,
+	0x00000000, 0x03000000, 0x00000000, 0x00000000,
+};
+
+#define A3XX_INT_MASK \
+	((1 << A3XX_INT_RBBM_AHB_ERROR) |        \
+	 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
+	 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) |    \
+	 (1 << A3XX_INT_CP_OPCODE_ERROR) |       \
+	 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
+	 (1 << A3XX_INT_CP_HW_FAULT) |           \
+	 (1 << A3XX_INT_CP_IB1_INT) |            \
+	 (1 << A3XX_INT_CP_IB2_INT) |            \
+	 (1 << A3XX_INT_CP_RB_INT) |             \
+	 (1 << A3XX_INT_CACHE_FLUSH_TS) |	 \
+	 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) |  \
+	 (1 << A3XX_INT_CP_AHB_ERROR_HALT) |     \
+	 (1 << A3XX_INT_UCHE_OOB_ACCESS))
+
+/**
+ * _a3xx_pwron_fixup() - Initialize a special command buffer to run a
+ * post-power collapse shader workaround
+ * @adreno_dev: Pointer to a adreno_device struct
+ *
+ * Some targets require a special workaround shader to be executed after
+ * power-collapse.  Construct the IB once at init time and keep it
+ * handy
+ *
+ * Returns: 0 on success or negative on error
+ */
+static int _a3xx_pwron_fixup(struct adreno_device *adreno_dev)
+{
+	unsigned int *cmds;
+	int count = ARRAY_SIZE(_a3xx_pwron_fixup_fs_instructions);
+
+	/* Return if the fixup is already in place */
+	if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv))
+		return 0;
+
+	adreno_dev->pwron_fixup = kgsl_allocate_global(KGSL_DEVICE(adreno_dev),
+		PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "pwron_fixup");
+
+	if (IS_ERR(adreno_dev->pwron_fixup))
+		return PTR_ERR(adreno_dev->pwron_fixup);
+
+	cmds = adreno_dev->pwron_fixup->hostptr;
+
+	*cmds++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x90000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmds++ = A3XX_RBBM_CLOCK_CTL;
+	*cmds++ = 0xFFFCFFFF;
+	*cmds++ = 0x00010000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1);
+	*cmds++ = 0x1E000150;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
+	*cmds++ = 0x1E000150;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1);
+	*cmds++ = 0x1E000150;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_1_REG, 1);
+	*cmds++ = 0x00000040;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_2_REG, 1);
+	*cmds++ = 0x80000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_3_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_VS_CONTROL_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_FS_CONTROL_REG, 1);
+	*cmds++ = 0x0D001002;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_0_REG, 1);
+	*cmds++ = 0x00401101;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_1_REG, 1);
+	*cmds++ = 0x00000400;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_2_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_3_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_4_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_5_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_6_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_1_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_CONST_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_X_REG, 1);
+	*cmds++ = 0x00000010;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_WG_OFFSET_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_SP_CTRL_REG, 1);
+	*cmds++ = 0x00040000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
+	*cmds++ = 0x0000000A;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG1, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_PARAM_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_6, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_7, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_OFFSET_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_START_REG, 1);
+	*cmds++ = 0x00000004;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_PARAM_REG, 1);
+	*cmds++ = 0x04008001;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_ADDR_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_LENGTH_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
+	*cmds++ = 0x0DB0400A;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG1, 1);
+	*cmds++ = 0x00300402;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_OFFSET_REG, 1);
+	*cmds++ = 0x00010000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_START_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_PARAM_REG, 1);
+	*cmds++ = 0x04008001;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_ADDR_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_OUTPUT_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_LENGTH_REG, 1);
+	*cmds++ = 0x0000000D;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_CLIP_CNTL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XOFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XSCALE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YOFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YSCALE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZOFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZSCALE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_MINMAX, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_SIZE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_OFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_SCALE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_MODE_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_BR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_BR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_TSE_DEBUG_ECO, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER0_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER1_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER2_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER3_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MODE_CONTROL, 1);
+	*cmds++ = 0x00008000;
+	*cmds++ = cp_type0_packet(A3XX_RB_RENDER_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MSAA_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_ALPHA_REFERENCE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_BLEND_RED, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_BLEND_GREEN, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_BLEND_BLUE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_BLEND_ALPHA, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_COPY_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_BASE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_PITCH, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_INFO, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CLEAR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_INFO, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_PITCH, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CLEAR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_INFO, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_PITCH, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK_BF, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_LRZ_VSC_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_WINDOW_OFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_ADDR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MIN, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MAX, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_GMEM_BASE_ADDR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEBUG_ECO_CONTROLS_ADDR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER0_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER1_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
+	*cmds++ = (1 << CP_LOADSTATE_DSTOFFSET_SHIFT) |
+		(0 << CP_LOADSTATE_STATESRC_SHIFT) |
+		(6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) |
+		(1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT) |
+		(0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
+	*cmds++ = 0x00400000;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
+	*cmds++ = (2 << CP_LOADSTATE_DSTOFFSET_SHIFT) |
+		(6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) |
+		(1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT);
+	*cmds++ = 0x00400220;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
+	*cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) |
+		(1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT);
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 2 + count);
+	*cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) |
+		(13 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = 0x00000000;
+
+	memcpy(cmds, _a3xx_pwron_fixup_fs_instructions, count << 2);
+
+	cmds += count;
+
+	*cmds++ = cp_type3_packet(CP_EXEC_CL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1);
+	*cmds++ = 0x1E000150;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
+	*cmds++ = 0x1E000050;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmds++ = A3XX_RBBM_CLOCK_CTL;
+	*cmds++ = 0xFFFCFFFF;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+
+	/*
+	 * Remember the number of dwords in the command buffer for when we
+	 * program the indirect buffer call in the ringbuffer
+	 */
+	adreno_dev->pwron_fixup_dwords =
+		(cmds - (unsigned int *) adreno_dev->pwron_fixup->hostptr);
+
+	/* Mark the flag in ->priv to show that we have the fix */
+	set_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv);
+	return 0;
+}
+
+static int a3xx_probe(struct platform_device *pdev,
+		u32 chipid, const struct adreno_gpu_core *gpucore)
+{
+	struct adreno_device *adreno_dev;
+	struct kgsl_device *device;
+	int ret;
+
+	adreno_dev = (struct adreno_device *)
+		of_device_get_match_data(&pdev->dev);
+
+	memset(adreno_dev, 0, sizeof(*adreno_dev));
+
+	adreno_dev->gpucore = gpucore;
+	adreno_dev->chipid = chipid;
+
+	adreno_reg_offset_init(gpucore->gpudev->reg_offsets);
+
+
+	device = KGSL_DEVICE(adreno_dev);
+
+	timer_setup(&device->idle_timer, kgsl_timer, 0);
+
+	INIT_WORK(&device->idle_check_ws, kgsl_idle_check);
+
+	adreno_dev->irq_mask = A3XX_INT_MASK;
+
+	ret = adreno_device_probe(pdev, adreno_dev);
+	if (ret)
+		return ret;
+
+	a3xx_coresight_init(adreno_dev);
+
+	return adreno_dispatcher_init(adreno_dev);
+}
+
+static int a3xx_send_me_init(struct adreno_device *adreno_dev,
+			 struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int *cmds;
+	int ret;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 18);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	*cmds++ = cp_type3_packet(CP_ME_INIT, 17);
+
+	*cmds++ = 0x000003f7;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000080;
+	*cmds++ = 0x00000100;
+	*cmds++ = 0x00000180;
+	*cmds++ = 0x00006600;
+	*cmds++ = 0x00000150;
+	*cmds++ = 0x0000014e;
+	*cmds++ = 0x00000154;
+	*cmds++ = 0x00000001;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	/* Enable protected mode registers for A3XX */
+	*cmds++ = 0x20000000;
+
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	/* Submit the command to the ringbuffer */
+	kgsl_pwrscale_busy(device);
+	kgsl_regwrite(device, A3XX_CP_RB_WPTR, rb->_wptr);
+	rb->wptr = rb->_wptr;
+
+	ret = adreno_spin_idle(adreno_dev, 2000);
+	if (ret) {
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		dev_err(device->dev, "CP initialization failed to idle\n");
+		kgsl_device_snapshot(device, NULL, NULL, false);
+	}
+
+	return ret;
+}
+
+static void a3xx_microcode_load(struct adreno_device *adreno_dev);
+
+static int a3xx_rb_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+
+	memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE);
+	rb->wptr = 0;
+	rb->_wptr = 0;
+	rb->wptr_preempt_end = ~0;
+
+	/*
+	 * The size of the ringbuffer in the hardware is the log2
+	 * representation of the size in quadwords (sizedwords / 2).
+	 * Also disable the host RPTR shadow register as it might be unreliable
+	 * in certain circumstances.
+	 */
+
+	kgsl_regwrite(device, A3XX_CP_RB_CNTL,
+		(ilog2(KGSL_RB_DWORDS >> 1) & 0x3F) |
+		(1 << 27));
+
+	kgsl_regwrite(device, A3XX_CP_RB_BASE, rb->buffer_desc->gpuaddr);
+
+	a3xx_microcode_load(adreno_dev);
+
+	/* clear ME_HALT to start micro engine */
+	kgsl_regwrite(device, A3XX_CP_ME_CNTL, 0);
+
+	return a3xx_send_me_init(adreno_dev, rb);
+}
+
+/*
+ * a3xx soft fault detection
+ *
+ * a3xx targets do not have hardware fault detection so we need to do it the old
+ * fashioned way by periodically reading a set of registers and counters and
+ * checking that they are advancing. There are 6 registers and four 64 bit
+ * counters that we keep an eye on.
+ */
+
+#define A3XX_SOFT_FAULT_DETECT_REGS 6
+#define A3XX_SOFT_FAULT_DETECT_COUNTERS 4
+#define A3XX_SOFT_FAULT_DETECT_COUNT \
+	(A3XX_SOFT_FAULT_DETECT_REGS + (A3XX_SOFT_FAULT_DETECT_COUNTERS * 2))
+
+static bool a3xx_soft_fault_detect_isidle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 reg;
+
+	if (kgsl_state_is_awake(device)) {
+		if (!adreno_rb_empty(adreno_dev->cur_rb))
+			return false;
+
+		/* only check rbbm status to determine if GPU is idle */
+		kgsl_regread(device, A3XX_RBBM_STATUS, &reg);
+
+		if (reg & 0x7ffffffe)
+			return false;
+	}
+
+	memset(adreno_dev->soft_ft_vals, 0, A3XX_SOFT_FAULT_DETECT_COUNT << 2);
+	return true;
+}
+
+/* Read the fault detect registers and compare them to the stored version */
+static int a3xx_soft_fault_detect_read_compare(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+	int i, ret = 0;
+	unsigned int ts;
+
+	if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv))
+		return 1;
+
+	/* Check to see if the device is idle - if so report no hang */
+	if (a3xx_soft_fault_detect_isidle(adreno_dev))
+		ret = 1;
+
+	for (i = 0; i < A3XX_SOFT_FAULT_DETECT_COUNT; i++) {
+		unsigned int val;
+
+		if (!adreno_dev->soft_ft_regs[i])
+			continue;
+
+		kgsl_regread(device, adreno_dev->soft_ft_regs[i], &val);
+		if (val != adreno_dev->soft_ft_vals[i])
+			ret = 1;
+		adreno_dev->soft_ft_vals[i] = val;
+	}
+
+	if (!adreno_rb_readtimestamp(adreno_dev, adreno_dev->cur_rb,
+				KGSL_TIMESTAMP_RETIRED, &ts)) {
+		if (ts != rb->fault_detect_ts)
+			ret = 1;
+
+		rb->fault_detect_ts = ts;
+	}
+
+	return ret;
+}
+
+/*
+ * This is called on a regular basis while cmdobjs are inflight. Fault
+ * detection registers are read and compared to the existing values - if they
+ * changed then the GPU is still running. If they are the same between
+ * subsequent calls then the GPU may have faulted
+ */
+static void a3xx_soft_fault_timer(struct timer_list *t)
+{
+	struct adreno_dispatcher *dispatcher = from_timer(dispatcher,
+							t, fault_timer);
+	struct adreno_device *adreno_dev = container_of(dispatcher,
+					struct adreno_device, dispatcher);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Leave if the user decided to turn off fast hang detection */
+	if (!adreno_soft_fault_detect(adreno_dev))
+		return;
+
+	if (adreno_gpu_fault(adreno_dev)) {
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+
+	/*
+	 * Read the fault registers - if it returns 0 then they haven't changed
+	 * so mark the dispatcher as faulted and schedule the work loop.
+	 */
+
+	if (!a3xx_soft_fault_detect_read_compare(adreno_dev))
+		adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT);
+	else if (dispatcher->inflight > 0)
+		adreno_dispatcher_start_fault_timer(adreno_dev);
+}
+
+/*
+ * Start fault detection. The counters are only assigned while fault detection
+ * is running so that they can be used for other purposes if fault detection is
+ * disabled
+ */
+static void a3xx_soft_fault_detect_start(struct adreno_device *adreno_dev)
+{
+	u32 *regs = &adreno_dev->soft_ft_regs[A3XX_SOFT_FAULT_DETECT_COUNTERS];
+	int ret = 0;
+
+	if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv))
+		return;
+
+	if (adreno_dev->fast_hang_detect == 1)
+		return;
+
+	ret |= adreno_perfcounter_kernel_get(adreno_dev,
+		KGSL_PERFCOUNTER_GROUP_SP, SP_ALU_ACTIVE_CYCLES,
+		&regs[0], &regs[1]);
+
+	ret |= adreno_perfcounter_kernel_get(adreno_dev,
+		KGSL_PERFCOUNTER_GROUP_SP, SP0_ICL1_MISSES,
+		&regs[2], &regs[3]);
+
+	ret |= adreno_perfcounter_kernel_get(adreno_dev,
+		KGSL_PERFCOUNTER_GROUP_SP, SP_FS_CFLOW_INSTRUCTIONS,
+		&regs[4], &regs[5]);
+
+	ret |= adreno_perfcounter_kernel_get(adreno_dev,
+		KGSL_PERFCOUNTER_GROUP_TSE, TSE_INPUT_PRIM_NUM,
+		&regs[6], &regs[7]);
+
+	WARN(ret, "Unable to allocate one or more fault detect counters\n");
+	adreno_dev->fast_hang_detect = 1;
+}
+
+/* Helper function to put back a counter */
+static void put_counter(struct adreno_device *adreno_dev,
+		int group, int countable, u32 *lo, u32 *hi)
+{
+	adreno_perfcounter_put(adreno_dev, group, countable,
+		PERFCOUNTER_FLAG_KERNEL);
+
+	*lo = 0;
+	*hi = 0;
+}
+
+/* Stop fault detection and return the counters */
+static void a3xx_soft_fault_detect_stop(struct adreno_device *adreno_dev)
+{
+	u32 *regs = &adreno_dev->soft_ft_regs[A3XX_SOFT_FAULT_DETECT_COUNTERS];
+
+	if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv))
+		return;
+
+	if (!adreno_dev->fast_hang_detect)
+		return;
+
+	put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, SP_ALU_ACTIVE_CYCLES,
+			&regs[0], &regs[1]);
+
+	put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, SP0_ICL1_MISSES,
+			&regs[2], &regs[3]);
+
+	put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+			SP_FS_CFLOW_INSTRUCTIONS, &regs[4], &regs[5]);
+
+	put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_TSE, TSE_INPUT_PRIM_NUM,
+			&regs[6], &regs[7]);
+
+	adreno_dev->fast_hang_detect = 0;
+}
+
+/* Initialize the registers and set up the data structures */
+static void a3xx_soft_fault_detect_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_SOFT_FAULT_DETECT))
+		return;
+
+	/* Disable the fast hang detect bit until we know its a go */
+	adreno_dev->fast_hang_detect = 0;
+
+	adreno_dev->soft_ft_regs = devm_kcalloc(&device->pdev->dev,
+		A3XX_SOFT_FAULT_DETECT_COUNT, sizeof(u32), GFP_KERNEL);
+
+	adreno_dev->soft_ft_vals = devm_kcalloc(&device->pdev->dev,
+		A3XX_SOFT_FAULT_DETECT_COUNT, sizeof(u32), GFP_KERNEL);
+
+	if (!adreno_dev->soft_ft_regs || !adreno_dev->soft_ft_vals)
+		return;
+
+	adreno_dev->soft_ft_count = A3XX_SOFT_FAULT_DETECT_COUNT;
+
+	adreno_dev->soft_ft_regs[0] = A3XX_RBBM_STATUS;
+	adreno_dev->soft_ft_regs[1] = A3XX_CP_RB_RPTR;
+	adreno_dev->soft_ft_regs[2] = A3XX_CP_IB1_BASE;
+	adreno_dev->soft_ft_regs[3] = A3XX_CP_IB1_BUFSZ;
+	adreno_dev->soft_ft_regs[4] = A3XX_CP_IB2_BASE;
+	adreno_dev->soft_ft_regs[5] = A3XX_CP_IB2_BUFSZ;
+
+	set_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv);
+
+	a3xx_soft_fault_detect_start(adreno_dev);
+}
+
+static void a3xx_remove(struct adreno_device *adreno_dev)
+{
+	a3xx_soft_fault_detect_stop(adreno_dev);
+}
+
+static int a3xx_microcode_read(struct adreno_device *adreno_dev);
+
+/*
+ * a3xx_init() - Initialize gpu specific data
+ * @adreno_dev: Pointer to adreno device
+ */
+static int a3xx_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int ret;
+
+	/*
+	 * Set up the a3xx only soft fault timer before heading into the generic
+	 * dispatcher setup
+	 */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_SOFT_FAULT_DETECT))
+		timer_setup(&dispatcher->fault_timer, a3xx_soft_fault_timer, 0);
+
+	ret = a3xx_ringbuffer_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a3xx_microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	_a3xx_pwron_fixup(adreno_dev);
+
+	ret = adreno_allocate_global(device, &iommu->setstate, PAGE_SIZE,
+			0, KGSL_MEMFLAGS_GPUREADONLY, 0, "setstate");
+
+	if (!ret)
+		kgsl_sharedmem_writel(iommu->setstate,
+			KGSL_IOMMU_SETSTATE_NOP_OFFSET,
+			cp_type3_packet(CP_NOP, 1));
+
+	kgsl_mmu_set_feature(device, KGSL_MMU_NEED_GUARD_PAGE);
+
+	/* Put the hardware in a responsive state to set up fault detection*/
+	ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE);
+	if (ret)
+		return ret;
+
+	a3xx_soft_fault_detect_init(adreno_dev);
+
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+	return 0;
+}
+
+/*
+ * a3xx_err_callback() - Call back for a3xx error interrupts
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	switch (bit) {
+	case A3XX_INT_RBBM_AHB_ERROR: {
+		kgsl_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
+
+		/*
+		 * Return the word address of the erroring register so that it
+		 * matches the register specification
+		 */
+		dev_crit_ratelimited(device->dev,
+					"RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
+					reg & (1 << 28) ? "WRITE" : "READ",
+					(reg & 0xFFFFF) >> 2,
+					(reg >> 20) & 0x3,
+					(reg >> 24) & 0xF);
+
+		/* Clear the error */
+		kgsl_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
+		break;
+	}
+	case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
+		dev_crit_ratelimited(device->dev,
+					"RBBM: ATB bus oveflow\n");
+		break;
+	case A3XX_INT_CP_T0_PACKET_IN_IB:
+		dev_crit_ratelimited(device->dev,
+					"ringbuffer TO packet in IB interrupt\n");
+		break;
+	case A3XX_INT_CP_OPCODE_ERROR:
+		dev_crit_ratelimited(device->dev,
+					"ringbuffer opcode error interrupt\n");
+		break;
+	case A3XX_INT_CP_RESERVED_BIT_ERROR:
+		dev_crit_ratelimited(device->dev,
+					"ringbuffer reserved bit error interrupt\n");
+		break;
+	case A3XX_INT_CP_HW_FAULT:
+		kgsl_regread(device, A3XX_CP_HW_FAULT, &reg);
+		dev_crit_ratelimited(device->dev,
+					"CP | Ringbuffer HW fault | status=%x\n",
+					reg);
+		break;
+	case A3XX_INT_CP_REG_PROTECT_FAULT:
+		kgsl_regread(device, A3XX_CP_PROTECT_STATUS, &reg);
+		dev_crit_ratelimited(device->dev,
+					"CP | Protected mode error| %s | addr=%x\n",
+					reg & (1 << 24) ? "WRITE" : "READ",
+					(reg & 0xFFFFF) >> 2);
+		break;
+	case A3XX_INT_CP_AHB_ERROR_HALT:
+		dev_crit_ratelimited(device->dev,
+					"ringbuffer AHB error interrupt\n");
+		break;
+	case A3XX_INT_UCHE_OOB_ACCESS:
+		dev_crit_ratelimited(device->dev,
+					"UCHE: Out of bounds access\n");
+		break;
+	default:
+		dev_crit_ratelimited(device->dev, "Unknown interrupt\n");
+	}
+}
+
+static const struct adreno_irq_funcs a3xx_irq_funcs[32] = {
+	ADRENO_IRQ_CALLBACK(NULL),                    /* 0 - RBBM_GPU_IDLE */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 1 - RBBM_AHB_ERROR */
+	ADRENO_IRQ_CALLBACK(NULL),  /* 2 - RBBM_REG_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(NULL),  /* 3 - RBBM_ME_MS_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(NULL),  /* 4 - RBBM_PFP_MS_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 5 - RBBM_ATB_BUS_OVERFLOW */
+	ADRENO_IRQ_CALLBACK(NULL),  /* 6 - RBBM_VFD_ERROR */
+	ADRENO_IRQ_CALLBACK(NULL),	/* 7 - CP_SW */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 8 - CP_T0_PACKET_IN_IB */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 9 - CP_OPCODE_ERROR */
+	/* 10 - CP_RESERVED_BIT_ERROR */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 11 - CP_HW_FAULT */
+	ADRENO_IRQ_CALLBACK(NULL),	             /* 12 - CP_DMA */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback),   /* 13 - CP_IB2_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback),   /* 14 - CP_IB1_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback),   /* 15 - CP_RB_INT */
+	/* 16 - CP_REG_PROTECT_FAULT */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 17 - CP_RB_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 18 - CP_VS_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 19 - CP_PS_DONE_TS */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */
+	/* 21 - CP_AHB_ERROR_FAULT */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 22 - Unused */
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 23 - Unused */
+	/* 24 - MISC_HANG_DETECT */
+	ADRENO_IRQ_CALLBACK(adreno_hang_int_callback),
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 25 - UCHE_OOB_ACCESS */
+};
+
+static struct {
+	u32 reg;
+	u32 base;
+	u32 count;
+} a3xx_protected_blocks[] = {
+	/* RBBM */
+	{ A3XX_CP_PROTECT_REG_0,      0x0018, 0 },
+	{ A3XX_CP_PROTECT_REG_0 + 1,  0x0020, 2 },
+	{ A3XX_CP_PROTECT_REG_0 + 2,  0x0033, 0 },
+	{ A3XX_CP_PROTECT_REG_0 + 3,  0x0042, 0 },
+	{ A3XX_CP_PROTECT_REG_0 + 4,  0x0050, 4 },
+	{ A3XX_CP_PROTECT_REG_0 + 5,  0x0063, 0 },
+	{ A3XX_CP_PROTECT_REG_0 + 6,  0x0100, 4 },
+	/* CP */
+	{ A3XX_CP_PROTECT_REG_0 + 7,  0x01c0, 5 },
+	{ A3XX_CP_PROTECT_REG_0 + 8,  0x01ec, 1 },
+	{ A3XX_CP_PROTECT_REG_0 + 9,  0x01f6, 1 },
+	{ A3XX_CP_PROTECT_REG_0 + 10, 0x01f8, 2 },
+	{ A3XX_CP_PROTECT_REG_0 + 11, 0x045e, 2 },
+	{ A3XX_CP_PROTECT_REG_0 + 12, 0x0460, 4 },
+	/* RB */
+	{ A3XX_CP_PROTECT_REG_0 + 13, 0x0cc0, 0 },
+	/* VBIF */
+	{ A3XX_CP_PROTECT_REG_0 + 14, 0x3000, 6 },
+	/*
+	 * SMMU
+	 * For A3xx, base offset for smmu region is 0xa000 and length is
+	 * 0x1000 bytes. Offset must be in dword and length of the block
+	 * must be ilog2(dword length).
+	 * 0xa000 >> 2 = 0x2800, ilog2(0x1000 >> 2) = 10.
+	 */
+	{ A3XX_CP_PROTECT_REG_0 + 15, 0x2800, 10 },
+	/* There are no remaining protected mode registers for a3xx */
+};
+
+static void a3xx_protect_init(struct kgsl_device *device)
+{
+	int i;
+
+	kgsl_regwrite(device, A3XX_CP_PROTECT_CTRL, 0x00000007);
+
+	for (i = 0; i < ARRAY_SIZE(a3xx_protected_blocks); i++) {
+		u32 val = 0x60000000 |
+			(a3xx_protected_blocks[i].count << 24) |
+			(a3xx_protected_blocks[i].base << 2);
+
+		kgsl_regwrite(device, a3xx_protected_blocks[i].reg, val);
+	}
+}
+
+bool a3xx_gx_is_on(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	bool gdsc_on, clk_on;
+
+	clk_on = __clk_is_enabled(pwr->grp_clks[0]);
+
+	gdsc_on = regulator_is_enabled(pwr->gx_gdsc);
+
+	return (gdsc_on & clk_on);
+}
+
+static int a3xx_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_a3xx_core *a3xx_core = to_a3xx_core(adreno_dev);
+	int ret;
+
+	ret = kgsl_mmu_start(device);
+	if (ret)
+		return ret;
+
+	adreno_get_bus_counters(adreno_dev);
+	adreno_perfcounter_restore(adreno_dev);
+
+	if (adreno_dev->soft_ft_regs)
+		memset(adreno_dev->soft_ft_regs, 0,
+			adreno_dev->soft_ft_count << 2);
+
+	/* Set up VBIF registers from the GPU core definition */
+	kgsl_regmap_multi_write(&device->regmap, a3xx_core->vbif,
+		a3xx_core->vbif_count);
+
+	/* Make all blocks contribute to the GPU BUSY perf counter */
+	kgsl_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
+
+	/* Tune the hystersis counters for SP and CP idle detection */
+	kgsl_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10);
+	kgsl_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
+
+	/*
+	 * Enable the RBBM error reporting bits.  This lets us get
+	 * useful information on failure
+	 */
+
+	kgsl_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
+
+	/* Enable AHB error reporting */
+	kgsl_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
+
+	/* Turn on the power counters */
+	kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000);
+
+	/*
+	 * Turn on hang detection - this spews a lot of useful information
+	 * into the RBBM registers on a hang
+	 */
+	kgsl_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
+		(1 << 16) | 0xFFF);
+
+	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0). */
+	kgsl_regwrite(device, A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
+
+	/* Enable VFD to access most of the UCHE (7 ways out of 8) */
+	kgsl_regwrite(device, A3XX_UCHE_CACHE_WAYS_VFD, 0x07);
+
+	/* Enable Clock gating */
+	kgsl_regwrite(device, A3XX_RBBM_CLOCK_CTL, A3XX_RBBM_CLOCK_CTL_DEFAULT);
+
+	/* Turn on protection */
+	a3xx_protect_init(device);
+
+	/* Turn on performance counters */
+	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, 0x01);
+
+	kgsl_regwrite(device, A3XX_CP_DEBUG, A3XX_CP_DEBUG_DEFAULT);
+
+	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
+	kgsl_regwrite(device, A3XX_CP_QUEUE_THRESHOLDS, 0x000E0602);
+	return 0;
+}
+
+/* Register offset defines for A3XX */
+static unsigned int a3xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A3XX_CP_RB_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A3XX_CP_RB_RPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A3XX_CP_RB_WPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A3XX_CP_ME_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A3XX_CP_RB_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A3XX_CP_IB1_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A3XX_CP_IB1_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A3XX_CP_IB2_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A3XX_CP_IB2_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_TIMESTAMP, A3XX_CP_SCRATCH_REG0),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG6, A3XX_CP_SCRATCH_REG6),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG7, A3XX_CP_SCRATCH_REG7),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A3XX_CP_PROTECT_REG_0),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A3XX_RBBM_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_PWR_1_LO,
+					A3XX_RBBM_PERFCTR_PWR_1_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A3XX_RBBM_INT_0_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A3XX_RBBM_CLOCK_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_PA_SC_AA_CONFIG, A3XX_PA_SC_AA_CONFIG),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PM_OVERRIDE2, A3XX_RBBM_PM_OVERRIDE2),
+	ADRENO_REG_DEFINE(ADRENO_REG_SQ_GPR_MANAGEMENT, A3XX_SQ_GPR_MANAGEMENT),
+	ADRENO_REG_DEFINE(ADRENO_REG_SQ_INST_STORE_MANAGEMENT,
+				A3XX_SQ_INST_STORE_MANAGEMENT),
+	ADRENO_REG_DEFINE(ADRENO_REG_TP0_CHICKEN, A3XX_TP0_CHICKEN),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A3XX_RBBM_SW_RESET_CMD),
+};
+
+static int _load_firmware(struct kgsl_device *device, const char *fwfile,
+			  void **buf, int *len)
+{
+	const struct firmware *fw = NULL;
+	int ret;
+
+	ret = request_firmware(&fw, fwfile, &device->pdev->dev);
+
+	if (ret) {
+		dev_err(&device->pdev->dev, "request_firmware(%s) failed: %d\n",
+			     fwfile, ret);
+		return ret;
+	}
+
+	if (!fw)
+		return -EINVAL;
+
+	*buf = devm_kmemdup(&device->pdev->dev, fw->data, fw->size, GFP_KERNEL);
+	*len = fw->size;
+
+	release_firmware(fw);
+	return (*buf) ? 0 : -ENOMEM;
+}
+
+static int a3xx_microcode_read(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_firmware *pm4_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4);
+	struct adreno_firmware *pfp_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP);
+	const struct adreno_a3xx_core *a3xx_core = to_a3xx_core(adreno_dev);
+
+	if (pm4_fw->fwvirt == NULL) {
+		int len;
+		void *ptr;
+
+		int ret = _load_firmware(device,
+			a3xx_core->pm4fw_name, &ptr, &len);
+
+		if (ret) {
+			dev_err(device->dev,  "Failed to read pm4 ucode %s\n",
+				a3xx_core->pm4fw_name);
+			return ret;
+		}
+
+		/* PM4 size is 3 dword aligned plus 1 dword of version */
+		if (len % ((sizeof(uint32_t) * 3)) != sizeof(uint32_t)) {
+			dev_err(device->dev,
+				     "Bad pm4 microcode size: %d\n",
+				     len);
+			kfree(ptr);
+			return -ENOMEM;
+		}
+
+		pm4_fw->size = len / sizeof(uint32_t);
+		pm4_fw->fwvirt = ptr;
+		pm4_fw->version = pm4_fw->fwvirt[1];
+	}
+
+	if (pfp_fw->fwvirt == NULL) {
+		int len;
+		void *ptr;
+
+		int ret = _load_firmware(device,
+			a3xx_core->pfpfw_name, &ptr, &len);
+		if (ret) {
+			dev_err(device->dev, "Failed to read pfp ucode %s\n",
+					   a3xx_core->pfpfw_name);
+			return ret;
+		}
+
+		/* PFP size shold be dword aligned */
+		if (len % sizeof(uint32_t) != 0) {
+			dev_err(device->dev,
+						"Bad PFP microcode size: %d\n",
+						len);
+			kfree(ptr);
+			return -ENOMEM;
+		}
+
+		pfp_fw->size = len / sizeof(uint32_t);
+		pfp_fw->fwvirt = ptr;
+		pfp_fw->version = pfp_fw->fwvirt[1];
+	}
+
+	return 0;
+}
+
+static void a3xx_microcode_load(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	size_t pm4_size = adreno_dev->fw[ADRENO_FW_PM4].size;
+	size_t pfp_size = adreno_dev->fw[ADRENO_FW_PFP].size;
+	int i;
+
+	/* load the CP ucode using AHB writes */
+	kgsl_regwrite(device, A3XX_CP_ME_RAM_WADDR, 0);
+
+	for (i = 1; i < pm4_size; i++)
+		kgsl_regwrite(device, A3XX_CP_ME_RAM_DATA,
+				adreno_dev->fw[ADRENO_FW_PM4].fwvirt[i]);
+
+	kgsl_regwrite(device, A3XX_CP_PFP_UCODE_ADDR, 0);
+
+	for (i = 1; i < pfp_size; i++)
+		kgsl_regwrite(device, A3XX_CP_PFP_UCODE_DATA,
+				adreno_dev->fw[ADRENO_FW_PFP].fwvirt[i]);
+}
+
+static u64 a3xx_read_alwayson(struct adreno_device *adreno_dev)
+{
+	/* A3XX does not have a always on timer */
+	return 0;
+}
+
+static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	irqreturn_t ret;
+	u32 status;
+
+	/* Get the current interrupt status */
+	kgsl_regread(device, A3XX_RBBM_INT_0_STATUS, &status);
+
+	/*
+	 * Clear all the interrupt bits except A3XX_INT_RBBM_AHB_ERROR.
+	 * The interrupt will stay asserted until it is cleared by the handler
+	 * so don't touch it yet to avoid a storm
+	 */
+
+	kgsl_regwrite(device, A3XX_RBBM_INT_CLEAR_CMD,
+		status & ~A3XX_INT_RBBM_AHB_ERROR);
+
+	/* Call the helper to execute the callbacks */
+	ret = adreno_irq_callbacks(adreno_dev, a3xx_irq_funcs, status);
+
+	trace_kgsl_a3xx_irq_status(adreno_dev, status);
+
+	/* Now clear AHB_ERROR if it was set */
+	if (status & A3XX_INT_RBBM_AHB_ERROR)
+		kgsl_regwrite(device, A3XX_RBBM_INT_CLEAR_CMD,
+			A3XX_INT_RBBM_AHB_ERROR);
+
+	return ret;
+}
+
+static bool a3xx_hw_isidle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status;
+
+	kgsl_regread(device, A3XX_RBBM_STATUS, &status);
+
+	if (status & 0x7ffffffe)
+		return false;
+
+	kgsl_regread(device, A3XX_RBBM_INT_0_STATUS, &status);
+
+	/* Return busy if a interrupt is pending */
+	return !((status & adreno_dev->irq_mask) ||
+		atomic_read(&adreno_dev->pending_irq_refcnt));
+}
+
+static int a3xx_clear_pending_transactions(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 mask = A30X_VBIF_XIN_HALT_CTRL0_MASK;
+	int ret;
+
+	kgsl_regwrite(device, A3XX_VBIF_XIN_HALT_CTRL0, mask);
+	ret = adreno_wait_for_halt_ack(device, A3XX_VBIF_XIN_HALT_CTRL1, mask);
+	kgsl_regwrite(device, A3XX_VBIF_XIN_HALT_CTRL0, 0);
+
+	return ret;
+}
+
+static bool a3xx_is_hw_collapsible(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/*
+	 * Skip power collapse for A304, if power ctrl flag is set to
+	 * non zero. As A304 soft_reset will not work, power collapse
+	 * needs to disable to avoid soft_reset.
+	 */
+	if (adreno_is_a304(adreno_dev) && device->pwrctrl.ctrl_flags)
+		return false;
+
+	return adreno_isidle(adreno_dev);
+}
+
+static void a3xx_power_stats(struct adreno_device *adreno_dev,
+		struct kgsl_power_stats *stats)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_busy_data *busy = &adreno_dev->busy_data;
+	s64 freq = kgsl_pwrctrl_active_freq(&device->pwrctrl) / 1000000;
+	u64 gpu_busy;
+
+	/* Set the GPU busy counter for frequency scaling */
+	gpu_busy = counter_delta(device, A3XX_RBBM_PERFCTR_PWR_1_LO,
+		&busy->gpu_busy);
+
+	stats->busy_time = gpu_busy / freq;
+
+	if (!device->pwrctrl.bus_control)
+		return;
+
+	stats->ram_time = counter_delta(device, adreno_dev->ram_cycles_lo,
+		&busy->bif_ram_cycles);
+
+	stats->ram_wait = counter_delta(device, adreno_dev->starved_ram_lo,
+		&busy->bif_starved_ram);
+}
+
+static int a3xx_setproperty(struct kgsl_device_private *dev_priv,
+		u32 type, void __user *value, u32 sizebytes)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u32 enable;
+
+	if (type != KGSL_PROP_PWRCTRL)
+		return -ENODEV;
+
+	if (sizebytes != sizeof(enable))
+		return -EINVAL;
+
+	if (copy_from_user(&enable, value, sizeof(enable)))
+		return -EFAULT;
+
+	mutex_lock(&device->mutex);
+	if (enable) {
+		device->pwrctrl.ctrl_flags = 0;
+
+		if (!adreno_active_count_get(adreno_dev)) {
+			a3xx_soft_fault_detect_start(adreno_dev);
+			adreno_active_count_put(adreno_dev);
+		}
+
+		kgsl_pwrscale_enable(device);
+	} else {
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+		device->pwrctrl.ctrl_flags = KGSL_PWR_ON;
+
+		a3xx_soft_fault_detect_stop(adreno_dev);
+		kgsl_pwrscale_disable(device, true);
+	}
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+const struct adreno_gpudev adreno_a3xx_gpudev = {
+	.reg_offsets = a3xx_register_offsets,
+	.irq_handler = a3xx_irq_handler,
+	.probe = a3xx_probe,
+	.rb_start = a3xx_rb_start,
+	.init = a3xx_init,
+	.start = a3xx_start,
+	.snapshot = a3xx_snapshot,
+	.read_alwayson = a3xx_read_alwayson,
+	.hw_isidle = a3xx_hw_isidle,
+	.power_ops = &adreno_power_operations,
+	.clear_pending_transactions = a3xx_clear_pending_transactions,
+	.ringbuffer_submitcmd = a3xx_ringbuffer_submitcmd,
+	.is_hw_collapsible = a3xx_is_hw_collapsible,
+	.power_stats = a3xx_power_stats,
+	.setproperty = a3xx_setproperty,
+	.remove = a3xx_remove,
+	.gx_is_on = a3xx_gx_is_on,
+};

+ 76 - 0
qcom/opensource/graphics-kernel/adreno_a3xx.h

@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013-2016, 2019-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __A3XX_H
+#define __A3XX_H
+
+#include "a3xx_reg.h"
+/**
+ * struct adreno_a3xx_core - a3xx specific GPU core definitions
+ */
+struct adreno_a3xx_core {
+	/** @base: Container for the generic &struct adreno_gpu_core */
+	struct adreno_gpu_core base;
+	/** pm4fw_name: Name of the PM4 microcode file */
+	const char *pm4fw_name;
+	/** pfpfw_name: Name of the PFP microcode file */
+	const char *pfpfw_name;
+	/** @vbif: List of registers and values to write for VBIF */
+	const struct kgsl_regmap_list *vbif;
+	/** @vbif_count: Number of registers in @vbif */
+	u32 vbif_count;
+};
+
+struct adreno_device;
+
+/**
+ * to_a3xx_core - return the a3xx specific GPU core struct
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Returns:
+ * A pointer to the a3xx specific GPU core struct
+ */
+static inline const struct adreno_a3xx_core *
+to_a3xx_core(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpu_core *core = adreno_dev->gpucore;
+
+	return container_of(core, struct adreno_a3xx_core, base);
+}
+
+void a3xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+
+extern const struct adreno_perfcounters adreno_a3xx_perfcounters;
+
+/**
+ * a3xx_ringbuffer_init - Initialize the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Initialize the ringbuffer for a3xx.
+ * Return: 0 on success or negative on failure
+ */
+int a3xx_ringbuffer_init(struct adreno_device *adreno_dev);
+
+/**
+ * a3xx_ringbuffer_submitcmd - Submit a user command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @cmdobj: Pointer to a user command object
+ * @flags: Internal submit flags
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int a3xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time);
+
+#ifdef CONFIG_QCOM_KGSL_CORESIGHT
+void a3xx_coresight_init(struct adreno_device *device);
+#else
+static inline void a3xx_coresight_init(struct adreno_device *device) { }
+#endif
+
+#endif /*__A3XX_H */

+ 65 - 0
qcom/opensource/graphics-kernel/adreno_a3xx_coresight.c

@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a3xx.h"
+#include "adreno_coresight.h"
+
+static struct adreno_coresight_register a3xx_coresight_registers[] = {
+	{ A3XX_RBBM_DEBUG_BUS_CTL, 0x0001093F },
+	{ A3XX_RBBM_EXT_TRACE_STOP_CNT, 0x00017fff },
+	{ A3XX_RBBM_EXT_TRACE_START_CNT, 0x0001000f },
+	{ A3XX_RBBM_EXT_TRACE_PERIOD_CNT, 0x0001ffff },
+	{ A3XX_RBBM_EXT_TRACE_CMD, 0x00000001 },
+	{ A3XX_RBBM_EXT_TRACE_BUS_CTL, 0x89100010 },
+	{ A3XX_RBBM_DEBUG_BUS_STB_CTL0, 0x00000000 },
+	{ A3XX_RBBM_DEBUG_BUS_STB_CTL1, 0xFFFFFFFE },
+	{ A3XX_RBBM_INT_TRACE_BUS_CTL, 0x00201111 },
+};
+
+static ADRENO_CORESIGHT_ATTR(config_debug_bus,
+	&a3xx_coresight_registers[0]);
+static ADRENO_CORESIGHT_ATTR(config_trace_stop_cnt,
+	&a3xx_coresight_registers[1]);
+static ADRENO_CORESIGHT_ATTR(config_trace_start_cnt,
+	&a3xx_coresight_registers[2]);
+static ADRENO_CORESIGHT_ATTR(config_trace_period_cnt,
+	&a3xx_coresight_registers[3]);
+static ADRENO_CORESIGHT_ATTR(config_trace_cmd,
+	&a3xx_coresight_registers[4]);
+static ADRENO_CORESIGHT_ATTR(config_trace_bus_ctl,
+	&a3xx_coresight_registers[5]);
+
+static struct attribute *a3xx_coresight_attrs[] = {
+	&coresight_attr_config_debug_bus.attr.attr,
+	&coresight_attr_config_trace_start_cnt.attr.attr,
+	&coresight_attr_config_trace_stop_cnt.attr.attr,
+	&coresight_attr_config_trace_period_cnt.attr.attr,
+	&coresight_attr_config_trace_cmd.attr.attr,
+	&coresight_attr_config_trace_bus_ctl.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group a3xx_coresight_group = {
+	.attrs = a3xx_coresight_attrs,
+};
+
+static const struct attribute_group *a3xx_coresight_groups[] = {
+	&a3xx_coresight_group,
+	NULL,
+};
+
+static const struct adreno_coresight a3xx_coresight = {
+	.registers = a3xx_coresight_registers,
+	.count = ARRAY_SIZE(a3xx_coresight_registers),
+	.groups = a3xx_coresight_groups,
+};
+
+void a3xx_coresight_init(struct adreno_device *adreno_dev)
+{
+	adreno_coresight_add_device(adreno_dev, "coresight-gfx",
+		&a3xx_coresight, &adreno_dev->gx_coresight);
+}

+ 411 - 0
qcom/opensource/graphics-kernel/adreno_a3xx_perfcounter.c

@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a3xx.h"
+#include "adreno_perfcounter.h"
+#include "kgsl_device.h"
+
+/* Bit flag for RBMM_PERFCTR_CTL */
+#define RBBM_PERFCTR_CTL_ENABLE		0x00000001
+#define VBIF2_PERF_CNT_SEL_MASK 0x7F
+/* offset of clear register from select register */
+#define VBIF2_PERF_CLR_REG_SEL_OFF 8
+/* offset of enable register from select register */
+#define VBIF2_PERF_EN_REG_SEL_OFF 16
+/* offset of clear register from the enable register */
+#define VBIF2_PERF_PWR_CLR_REG_EN_OFF 8
+
+static void a3xx_counter_load(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_register *reg)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int index = reg->load_bit / 32;
+	u32 enable = BIT(reg->load_bit & 31);
+
+	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_VALUE_LO,
+		lower_32_bits(reg->value));
+
+	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_VALUE_HI,
+		upper_32_bits(reg->value));
+
+	if (index == 0)
+		kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD0, enable);
+	else
+		kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD1, enable);
+}
+
+static int a3xx_counter_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	kgsl_regwrite(device, reg->select, countable);
+	reg->value = 0;
+
+	return 0;
+}
+
+static u64 a3xx_counter_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 val, hi, lo;
+
+	kgsl_regread(device, A3XX_RBBM_PERFCTR_CTL, &val);
+	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL,
+		val & ~RBBM_PERFCTR_CTL_ENABLE);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val);
+
+	return (((u64) hi) << 32) | lo;
+}
+
+static int a3xx_counter_pwr_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	return 0;
+}
+
+static u64 a3xx_counter_pwr_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 val, hi, lo;
+
+	kgsl_regread(device, A3XX_RBBM_RBBM_CTL, &val);
+
+	/* Freeze the counter so we can read it */
+	if (!counter)
+		kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val & ~0x10000);
+	else
+		kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val & ~0x20000);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val);
+
+	return ((((u64) hi) << 32) | lo) + reg->value;
+}
+
+static int a3xx_counter_vbif_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	if (countable > VBIF2_PERF_CNT_SEL_MASK)
+		return -EINVAL;
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 1);
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 0);
+	kgsl_regwrite(device,
+		reg->select, countable & VBIF2_PERF_CNT_SEL_MASK);
+	/* enable reg is 8 DWORDS before select reg */
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1);
+
+	kgsl_regwrite(device, reg->select, countable);
+
+	reg->value = 0;
+	return 0;
+}
+
+static u64 a3xx_counter_vbif_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 hi, lo;
+
+	/* freeze counter */
+	kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 0);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* un-freeze counter */
+	kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1);
+
+	return ((((u64) hi) << 32) | lo) + reg->value;
+}
+
+static int a3xx_counter_vbif_pwr_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regwrite(device, reg->select +
+		VBIF2_PERF_PWR_CLR_REG_EN_OFF, 1);
+	kgsl_regwrite(device, reg->select +
+		VBIF2_PERF_PWR_CLR_REG_EN_OFF, 0);
+	kgsl_regwrite(device, reg->select, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static u64 a3xx_counter_vbif_pwr_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 hi, lo;
+
+	/* freeze counter */
+	kgsl_regwrite(device, reg->select, 0);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* un-freeze counter */
+	kgsl_regwrite(device, reg->select, 1);
+
+	return ((((u64) hi) << 32) | lo) + reg->value;
+}
+
+/*
+ * Define the available perfcounter groups - these get used by
+ * adreno_perfcounter_get and adreno_perfcounter_put
+ */
+
+static struct adreno_perfcount_register a3xx_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_CP_0_LO,
+		A3XX_RBBM_PERFCTR_CP_0_HI, 0, A3XX_CP_PERFCOUNTER_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO,
+		A3XX_RBBM_PERFCTR_RBBM_0_HI, 1, A3XX_RBBM_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO,
+		A3XX_RBBM_PERFCTR_RBBM_1_HI, 2, A3XX_RBBM_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_0_LO,
+		A3XX_RBBM_PERFCTR_PC_0_HI, 3, A3XX_PC_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_1_LO,
+		A3XX_RBBM_PERFCTR_PC_1_HI, 4, A3XX_PC_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_2_LO,
+		A3XX_RBBM_PERFCTR_PC_2_HI, 5, A3XX_PC_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_3_LO,
+		A3XX_RBBM_PERFCTR_PC_3_HI, 6, A3XX_PC_PERFCOUNTER3_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_0_LO,
+		A3XX_RBBM_PERFCTR_VFD_0_HI, 7, A3XX_VFD_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_1_LO,
+		A3XX_RBBM_PERFCTR_VFD_1_HI, 8, A3XX_VFD_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_0_HI, 9,
+		A3XX_HLSQ_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_1_HI, 10,
+		A3XX_HLSQ_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_2_HI, 11,
+		A3XX_HLSQ_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_3_HI, 12,
+		A3XX_HLSQ_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_4_HI, 13,
+		A3XX_HLSQ_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_5_HI, 14,
+		A3XX_HLSQ_PERFCOUNTER5_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_0_LO,
+		A3XX_RBBM_PERFCTR_VPC_0_HI, 15, A3XX_VPC_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_1_LO,
+		A3XX_RBBM_PERFCTR_VPC_1_HI, 16, A3XX_VPC_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_0_LO,
+		A3XX_RBBM_PERFCTR_TSE_0_HI, 17, A3XX_GRAS_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_1_LO,
+		A3XX_RBBM_PERFCTR_TSE_1_HI, 18, A3XX_GRAS_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_0_LO,
+		A3XX_RBBM_PERFCTR_RAS_0_HI, 19, A3XX_GRAS_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_1_LO,
+		A3XX_RBBM_PERFCTR_RAS_1_HI, 20, A3XX_GRAS_PERFCOUNTER3_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO,
+		A3XX_RBBM_PERFCTR_UCHE_0_HI, 21,
+		A3XX_UCHE_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO,
+		A3XX_RBBM_PERFCTR_UCHE_1_HI, 22,
+		A3XX_UCHE_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO,
+		A3XX_RBBM_PERFCTR_UCHE_2_HI, 23,
+		A3XX_UCHE_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO,
+		A3XX_RBBM_PERFCTR_UCHE_3_HI, 24,
+		A3XX_UCHE_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO,
+		A3XX_RBBM_PERFCTR_UCHE_4_HI, 25,
+		A3XX_UCHE_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO,
+		A3XX_RBBM_PERFCTR_UCHE_5_HI, 26,
+		A3XX_UCHE_PERFCOUNTER5_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_0_LO,
+		A3XX_RBBM_PERFCTR_TP_0_HI, 27, A3XX_TP_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_1_LO,
+		A3XX_RBBM_PERFCTR_TP_1_HI, 28, A3XX_TP_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_2_LO,
+		A3XX_RBBM_PERFCTR_TP_2_HI, 29, A3XX_TP_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_3_LO,
+		A3XX_RBBM_PERFCTR_TP_3_HI, 30, A3XX_TP_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_4_LO,
+		A3XX_RBBM_PERFCTR_TP_4_HI, 31, A3XX_TP_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_5_LO,
+		A3XX_RBBM_PERFCTR_TP_5_HI, 32, A3XX_TP_PERFCOUNTER5_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_0_LO,
+		A3XX_RBBM_PERFCTR_SP_0_HI, 33, A3XX_SP_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_1_LO,
+		A3XX_RBBM_PERFCTR_SP_1_HI, 34, A3XX_SP_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_2_LO,
+		A3XX_RBBM_PERFCTR_SP_2_HI, 35, A3XX_SP_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_3_LO,
+		A3XX_RBBM_PERFCTR_SP_3_HI, 36, A3XX_SP_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_4_LO,
+		A3XX_RBBM_PERFCTR_SP_4_HI, 37, A3XX_SP_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_5_LO,
+		A3XX_RBBM_PERFCTR_SP_5_HI, 38, A3XX_SP_PERFCOUNTER5_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_6_LO,
+		A3XX_RBBM_PERFCTR_SP_6_HI, 39, A3XX_SP_PERFCOUNTER6_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_7_LO,
+		A3XX_RBBM_PERFCTR_SP_7_HI, 40, A3XX_SP_PERFCOUNTER7_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_0_LO,
+		A3XX_RBBM_PERFCTR_RB_0_HI, 41, A3XX_RB_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_1_LO,
+		A3XX_RBBM_PERFCTR_RB_1_HI, 42, A3XX_RB_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_0_LO,
+		A3XX_RBBM_PERFCTR_PWR_0_HI, -1, 0 },
+	/*
+	 * A3XX_RBBM_PERFCTR_PWR_1_LO is used for frequency scaling and removed
+	 * from the pool of available counters
+	 */
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vbif2[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW0,
+		A3XX_VBIF2_PERF_CNT_HIGH0, -1, A3XX_VBIF2_PERF_CNT_SEL0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW1,
+		A3XX_VBIF2_PERF_CNT_HIGH1, -1, A3XX_VBIF2_PERF_CNT_SEL1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW2,
+		A3XX_VBIF2_PERF_CNT_HIGH2, -1, A3XX_VBIF2_PERF_CNT_SEL2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW3,
+		A3XX_VBIF2_PERF_CNT_HIGH3, -1, A3XX_VBIF2_PERF_CNT_SEL3 },
+};
+/*
+ * Placing EN register in select field since vbif perf counters
+ * don't have select register to program
+ */
+static struct adreno_perfcount_register a3xx_perfcounters_vbif2_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0,
+		0, A3XX_VBIF2_PERF_PWR_CNT_LOW0,
+		A3XX_VBIF2_PERF_PWR_CNT_HIGH0, -1,
+		A3XX_VBIF2_PERF_PWR_CNT_EN0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0,
+		0, A3XX_VBIF2_PERF_PWR_CNT_LOW1,
+		A3XX_VBIF2_PERF_PWR_CNT_HIGH1, -1,
+		A3XX_VBIF2_PERF_PWR_CNT_EN1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0,
+		0, A3XX_VBIF2_PERF_PWR_CNT_LOW2,
+		A3XX_VBIF2_PERF_PWR_CNT_HIGH2, -1,
+		A3XX_VBIF2_PERF_PWR_CNT_EN2 },
+};
+
+#define A3XX_PERFCOUNTER_GROUP(offset, name, enable, read, load) \
+	ADRENO_PERFCOUNTER_GROUP(a3xx, offset, name, enable, read, load)
+
+#define A3XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags, enable, read, load) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(a3xx, offset, name, flags, enable, read, load)
+
+#define A3XX_REGULAR_PERFCOUNTER_GROUP(offset, name) \
+	A3XX_PERFCOUNTER_GROUP(offset, name, a3xx_counter_enable,\
+		a3xx_counter_read, a3xx_counter_load)
+
+static const struct adreno_perfcount_group
+a3xx_perfcounter_groups[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	A3XX_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(RBBM, rbbm),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(PC, pc),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(VFD, vfd),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(HLSQ, hlsq),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(VPC, vpc),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(TSE, tse),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(RAS, ras),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(UCHE, uche),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(TP, tp),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(SP, sp),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(RB, rb),
+	A3XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		a3xx_counter_pwr_enable, a3xx_counter_pwr_read, NULL),
+	A3XX_PERFCOUNTER_GROUP(VBIF, vbif2,
+		a3xx_counter_vbif_enable, a3xx_counter_vbif_read, NULL),
+	A3XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif2_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		a3xx_counter_vbif_pwr_enable, a3xx_counter_vbif_pwr_read,
+		NULL),
+
+};
+
+const struct adreno_perfcounters adreno_a3xx_perfcounters = {
+	a3xx_perfcounter_groups,
+	ARRAY_SIZE(a3xx_perfcounter_groups),
+};

+ 458 - 0
qcom/opensource/graphics-kernel/adreno_a3xx_ringbuffer.c

@@ -0,0 +1,458 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a3xx.h"
+#include "adreno_pm4types.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_trace.h"
+#include "kgsl_trace.h"
+
+static int a3xx_wait_reg(unsigned int *cmds, unsigned int addr,
+			unsigned int val, unsigned int mask,
+			unsigned int interval)
+{
+	cmds[0] = cp_type3_packet(CP_WAIT_REG_EQ, 4);
+	cmds[1] = addr;
+	cmds[2] = val;
+	cmds[3] = mask;
+	cmds[4] = interval;
+
+	return 5;
+}
+
+static int a3xx_vbif_lock(unsigned int *cmds)
+{
+	int count;
+
+	/*
+	 * glue commands together until next
+	 * WAIT_FOR_ME
+	 */
+	count = a3xx_wait_reg(cmds, A3XX_CP_WFI_PEND_CTR,
+			1, 0xFFFFFFFF, 0xF);
+
+	/* MMU-500 VBIF stall */
+	cmds[count++] = cp_type3_packet(CP_REG_RMW, 3);
+	cmds[count++] = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0;
+	/* AND to unmask the HALT bit */
+	cmds[count++] = ~(VBIF_RECOVERABLE_HALT_CTRL);
+	/* OR to set the HALT bit */
+	cmds[count++] = 0x1;
+
+	/* Wait for acknowledgment */
+	count += a3xx_wait_reg(&cmds[count],
+			A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1,
+			1, 0xFFFFFFFF, 0xF);
+
+	return count;
+}
+
+static int a3xx_vbif_unlock(unsigned int *cmds)
+{
+	/* MMU-500 VBIF unstall */
+	cmds[0] = cp_type3_packet(CP_REG_RMW, 3);
+	cmds[1] = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0;
+	/* AND to unmask the HALT bit */
+	cmds[2] = ~(VBIF_RECOVERABLE_HALT_CTRL);
+	/* OR to reset the HALT bit */
+	cmds[3] = 0;
+
+	/* release all commands since _vbif_lock() with wait_for_me */
+	cmds[4] = cp_type3_packet(CP_WAIT_FOR_ME, 1);
+	cmds[5] = 0;
+
+	return 6;
+}
+
+#define A3XX_GPU_OFFSET 0xa000
+
+static int a3xx_cp_smmu_reg(unsigned int *cmds,
+				u32 reg,
+				unsigned int num)
+{
+	cmds[0] = cp_type3_packet(CP_REG_WR_NO_CTXT, num + 1);
+	cmds[1] = (A3XX_GPU_OFFSET + reg) >> 2;
+
+	return 2;
+}
+
+/* This function is only needed for A3xx targets */
+static int a3xx_tlbiall(unsigned int *cmds)
+{
+	unsigned int tlbstatus = (A3XX_GPU_OFFSET +
+		KGSL_IOMMU_CTX_TLBSTATUS) >> 2;
+	int count;
+
+	count = a3xx_cp_smmu_reg(cmds, KGSL_IOMMU_CTX_TLBIALL, 1);
+	cmds[count++] = 1;
+
+	count += a3xx_cp_smmu_reg(&cmds[count], KGSL_IOMMU_CTX_TLBSYNC, 1);
+	cmds[count++] = 0;
+
+	count += a3xx_wait_reg(&cmds[count], tlbstatus, 0,
+			KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE, 0xF);
+
+	return count;
+}
+
+/* offset at which a nop command is placed in setstate */
+#define KGSL_IOMMU_SETSTATE_NOP_OFFSET	1024
+
+static int a3xx_rb_pagetable_switch(struct adreno_device *adreno_dev,
+		struct kgsl_pagetable *pagetable, u32 *cmds)
+{
+	u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	int count = 0;
+
+	/* Skip pagetable switch if current context is using default PT. */
+	if (pagetable == device->mmu.defaultpagetable)
+		return 0;
+	/*
+	 * Adding an indirect buffer ensures that the prefetch stalls until
+	 * the commands in indirect buffer have completed. We need to stall
+	 * prefetch with a nop indirect buffer when updating pagetables
+	 * because it provides stabler synchronization.
+	 */
+	cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1);
+	cmds[count++] = 0;
+
+	cmds[count++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2);
+	cmds[count++] = lower_32_bits(iommu->setstate->gpuaddr);
+	cmds[count++] = 2;
+
+	cmds[count++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	cmds[count++] = 0;
+
+	cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1);
+	cmds[count++] = 0;
+
+	count += a3xx_vbif_lock(&cmds[count]);
+
+	count += a3xx_cp_smmu_reg(&cmds[count], KGSL_IOMMU_CTX_TTBR0, 2);
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+
+	count += a3xx_vbif_unlock(&cmds[count]);
+
+	count += a3xx_tlbiall(&cmds[count]);
+
+	/* wait for me to finish the TLBI */
+	cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1);
+	cmds[count++] = 0;
+	cmds[count++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	cmds[count++] = 0;
+
+	/* Invalidate the state */
+	cmds[count++] = cp_type3_packet(CP_INVALIDATE_STATE, 1);
+	cmds[count++] = 0x7ffff;
+
+	return count;
+}
+
+#define RB_SOPTIMESTAMP(device, rb) \
+	       MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp)
+#define CTXT_SOPTIMESTAMP(device, drawctxt) \
+	       MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp)
+
+#define RB_EOPTIMESTAMP(device, rb) \
+	       MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp)
+#define CTXT_EOPTIMESTAMP(device, drawctxt) \
+	       MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp)
+
+int a3xx_ringbuffer_init(struct adreno_device *adreno_dev)
+{
+	adreno_dev->num_ringbuffers = 1;
+
+	adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+
+	return adreno_ringbuffer_setup(adreno_dev,
+		&adreno_dev->ringbuffers[0], 0);
+}
+
+#define A3XX_SUBMIT_MAX 55
+
+static int a3xx_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 size = A3XX_SUBMIT_MAX + dwords;
+	u32 *cmds, index = 0;
+	u64 profile_gpuaddr;
+	u32 profile_dwords;
+
+	if (adreno_drawctxt_detached(drawctxt))
+		return -ENOENT;
+
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return -EPROTO;
+
+	rb->timestamp++;
+
+	if (drawctxt)
+		drawctxt->internal_timestamp = rb->timestamp;
+
+	cmds = adreno_ringbuffer_allocspace(rb, size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	/* Identify the start of a command */
+	cmds[index++] = cp_type3_packet(CP_NOP, 1);
+	cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER;
+
+	if (IS_PWRON_FIXUP(flags)) {
+		cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 0;
+
+		cmds[index++] = cp_type3_packet(CP_NOP, 1);
+		cmds[index++] = PWRON_FIXUP_IDENTIFIER;
+
+		cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2);
+		cmds[index++] = lower_32_bits(adreno_dev->pwron_fixup->gpuaddr);
+		cmds[index++] = adreno_dev->pwron_fixup_dwords;
+
+		cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 0;
+	}
+
+	profile_gpuaddr = adreno_profile_preib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (drawctxt) {
+		cmds[index++] = cp_type3_packet(CP_MEM_WRITE, 2);
+		cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+	}
+
+	cmds[index++] = cp_type3_packet(CP_MEM_WRITE, 2);
+	cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = rb->timestamp;
+
+	if (IS_NOTPROTECTED(flags)) {
+		cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 0;
+	}
+
+	memcpy(&cmds[index], in, dwords << 2);
+	index += dwords;
+
+	if (IS_NOTPROTECTED(flags)) {
+		cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	/*
+	 * Flush HLSQ lazy updates to make sure there are no resourses pending
+	 * for indirect loads after the timestamp
+	 */
+
+	cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 0x07; /* HLSQ FLUSH */
+	cmds[index++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	cmds[index++] = 0;
+
+	profile_gpuaddr = adreno_profile_postib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	/*
+	 * If this is an internal command, just write the ringbuffer timestamp,
+	 * otherwise, write both
+	 */
+	if (!drawctxt) {
+		cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3);
+		cmds[index++] = CACHE_FLUSH_TS | (1 << 31);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	} else {
+		cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3);
+		cmds[index++] = CACHE_FLUSH_TS | (1 << 31);
+		cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+
+		cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3);
+		cmds[index++] = CACHE_FLUSH_TS;
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	}
+
+	/* Trigger a context rollover */
+	cmds[index++] = cp_type3_packet(CP_SET_CONSTANT, 2);
+	cmds[index++] = (4 << 16) | (A3XX_HLSQ_CL_KERNEL_GROUP_X_REG - 0x2000);
+	cmds[index++] = 0;
+
+	if (IS_WFI(flags)) {
+		cmds[index++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+		cmds[index++] = 0;
+	}
+
+	/* Adjust the thing for the number of bytes we actually wrote */
+	rb->_wptr -= (size - index);
+
+	kgsl_pwrscale_busy(device);
+	kgsl_regwrite(device, A3XX_CP_RB_WPTR, rb->_wptr);
+	rb->wptr = rb->_wptr;
+
+	return 0;
+}
+
+static int a3xx_rb_context_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_pagetable *pagetable =
+		adreno_drawctxt_get_pagetable(drawctxt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int count = 0;
+	u32 cmds[64];
+
+	if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable)
+		count += a3xx_rb_pagetable_switch(adreno_dev, pagetable, cmds);
+
+	cmds[count++] = cp_type3_packet(CP_NOP, 1);
+	cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER;
+
+	cmds[count++] = cp_type3_packet(CP_MEM_WRITE, 2);
+	cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type3_packet(CP_MEM_WRITE, 2);
+	cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+	cmds[count++] = 0;
+	cmds[count++] = 0x90000000;
+
+	return a3xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED,
+			cmds, count, 0, NULL);
+}
+
+static int a3xx_drawctxt_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (rb->drawctxt_active == drawctxt)
+		return 0;
+
+	if (kgsl_context_detached(&drawctxt->base))
+		return -ENOENT;
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return -ENOENT;
+
+	trace_adreno_drawctxt_switch(rb, drawctxt);
+
+	a3xx_rb_context_switch(adreno_dev, rb, drawctxt);
+
+	/* Release the current drawctxt as soon as the new one is switched */
+	adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active,
+		rb, rb->timestamp);
+
+	rb->drawctxt_active = drawctxt;
+	return 0;
+}
+
+#define A3XX_COMMAND_DWORDS 4
+
+int a3xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct adreno_ringbuffer *rb = drawctxt->rb;
+	int ret = 0, numibs = 0, index = 0;
+	u32 *cmds;
+
+	/* Count the number of IBs (if we are not skipping) */
+	if (!IS_SKIP(flags)) {
+		struct list_head *tmp;
+
+		list_for_each(tmp, &cmdobj->cmdlist)
+			numibs++;
+	}
+
+	cmds = kmalloc((A3XX_COMMAND_DWORDS + (numibs * 4)) << 2, GFP_KERNEL);
+	if (!cmds) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	cmds[index++] = cp_type3_packet(CP_NOP, 1);
+	cmds[index++] = START_IB_IDENTIFIER;
+
+	if (numibs) {
+		struct kgsl_memobj_node *ib;
+
+		list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+			if (ib->priv & MEMOBJ_SKIP ||
+			    (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE
+			     && !IS_PREAMBLE(flags)))
+				cmds[index++] = cp_type3_packet(CP_NOP, 3);
+
+			cmds[index++] =
+				cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2);
+			cmds[index++] = lower_32_bits(ib->gpuaddr);
+			cmds[index++] = ib->size >> 2;
+		}
+	}
+
+	cmds[index++] = cp_type3_packet(CP_NOP, 1);
+	cmds[index++] = END_IB_IDENTIFIER;
+
+	ret = a3xx_drawctxt_switch(adreno_dev, rb, drawctxt);
+
+	/*
+	 * In the unlikely event of an error in the drawctxt switch,
+	 * treat it like a hang
+	 */
+	if (ret) {
+		/*
+		 * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it,
+		 * the upper layers know how to handle it
+		 */
+		if (ret != -ENOSPC && ret != -ENOENT)
+			dev_err(device->dev,
+				     "Unable to switch draw context: %d\n",
+				     ret);
+		goto done;
+	}
+
+	adreno_drawobj_set_constraint(device, drawobj);
+
+	ret = a3xx_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt,
+		flags, cmds, index, drawobj->timestamp, NULL);
+
+done:
+	trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs,
+		drawobj->timestamp, drawobj->flags, ret, drawctxt->type);
+
+	kfree(cmds);
+	return ret;
+}

+ 449 - 0
qcom/opensource/graphics-kernel/adreno_a3xx_snapshot.c

@@ -0,0 +1,449 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2012-2017,2019-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/io.h>
+
+#include "adreno.h"
+#include "adreno_a3xx.h"
+#include "adreno_snapshot.h"
+#include "kgsl_device.h"
+
+/*
+ * Set of registers to dump for A3XX on snapshot.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+
+static const unsigned int a3xx_registers[] = {
+	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
+	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
+	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
+	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
+	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
+	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f6, 0x01f8, 0x01f9,
+	0x01fc, 0x01ff,
+	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
+	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
+	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
+	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
+	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
+	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5,
+	0x0e41, 0x0e45, 0x0e64, 0x0e65,
+	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
+	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
+	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
+	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
+	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
+	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
+	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
+	0x2240, 0x227e,
+	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
+	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
+	0x22ff, 0x22ff, 0x2340, 0x2343,
+	0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
+	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
+	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
+	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
+	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
+	0x25f0, 0x25f0,
+	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
+	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
+	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
+	0x300C, 0x300E, 0x301C, 0x301D,
+	0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
+	0x303C, 0x303C, 0x305E, 0x305F,
+};
+
+/* Removed the following HLSQ register ranges from being read during
+ * fault tolerance since reading the registers may cause the device to hang:
+ */
+static const unsigned int a3xx_hlsq_registers[] = {
+	0x0e00, 0x0e05, 0x0e0c, 0x0e0c, 0x0e22, 0x0e23,
+	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a,
+	0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
+};
+
+/* Shader memory size in words */
+#define SHADER_MEMORY_SIZE 0x4000
+
+/**
+ * _rbbm_debug_bus_read - Helper function to read data from the RBBM
+ * debug bus.
+ * @device - GPU device to read/write registers
+ * @block_id - Debug bus block to read from
+ * @index - Index in the debug bus block to read
+ * @ret - Value of the register read
+ */
+static void _rbbm_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int block = (block_id << 8) | 1 << 16;
+
+	kgsl_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, block | index);
+	kgsl_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS, val);
+}
+
+/**
+ * a3xx_snapshot_shader_memory - Helper function to dump the GPU shader
+ * memory to the snapshot buffer.
+ * @device: GPU device whose shader memory is to be dumped
+ * @buf: Pointer to binary snapshot data blob being made
+ * @remain: Number of remaining bytes in the snapshot blob
+ * @priv: Unused parameter
+ *
+ */
+static size_t a3xx_snapshot_shader_memory(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	void *data = buf + sizeof(*header);
+	unsigned int shader_read_len = SHADER_MEMORY_SIZE;
+
+	if (remain < DEBUG_SECTION_SZ(shader_read_len)) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_SHADER_MEMORY;
+	header->size = shader_read_len;
+
+	/* Map shader memory to kernel, for dumping */
+	if (IS_ERR_OR_NULL(device->shader_mem_virt)) {
+		struct resource *res;
+
+		res = platform_get_resource_byname(device->pdev,
+			IORESOURCE_MEM, "kgsl_3d0_shader_memory");
+
+		if (res)
+			device->shader_mem_virt =
+				devm_ioremap_resource(&device->pdev->dev, res);
+	}
+
+	if (IS_ERR_OR_NULL(device->shader_mem_virt)) {
+		dev_err(device->dev, "Unable to map the shader memory\n");
+		return 0;
+	}
+
+	memcpy_fromio(data, device->shader_mem_virt, shader_read_len << 2);
+
+	return DEBUG_SECTION_SZ(shader_read_len);
+}
+
+static size_t a3xx_snapshot_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header
+		= (struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size;
+
+	size = (0x40 * sizeof(unsigned int)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = block->block_id;
+	header->count = 0x40;
+
+	for (i = 0; i < 0x40; i++)
+		_rbbm_debug_bus_read(device, block->block_id, i, &data[i]);
+
+	return size;
+}
+
+static struct adreno_debugbus_block debugbus_blocks[] = {
+	{ RBBM_BLOCK_ID_CP, 0x52, },
+	{ RBBM_BLOCK_ID_RBBM, 0x40, },
+	{ RBBM_BLOCK_ID_VBIF, 0x40, },
+	{ RBBM_BLOCK_ID_HLSQ, 0x40, },
+	{ RBBM_BLOCK_ID_UCHE, 0x40, },
+	{ RBBM_BLOCK_ID_PC, 0x40, },
+	{ RBBM_BLOCK_ID_VFD, 0x40, },
+	{ RBBM_BLOCK_ID_VPC, 0x40, },
+	{ RBBM_BLOCK_ID_TSE, 0x40, },
+	{ RBBM_BLOCK_ID_RAS, 0x40, },
+	{ RBBM_BLOCK_ID_VSC, 0x40, },
+	{ RBBM_BLOCK_ID_SP_0, 0x40, },
+	{ RBBM_BLOCK_ID_SP_1, 0x40, },
+	{ RBBM_BLOCK_ID_SP_2, 0x40, },
+	{ RBBM_BLOCK_ID_SP_3, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_0, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_1, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_2, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_3, 0x40, },
+	{ RBBM_BLOCK_ID_RB_0, 0x40, },
+	{ RBBM_BLOCK_ID_RB_1, 0x40, },
+	{ RBBM_BLOCK_ID_RB_2, 0x40, },
+	{ RBBM_BLOCK_ID_RB_3, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_0, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_1, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_2, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_3, 0x40, },
+};
+
+static void a3xx_snapshot_debugbus(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(debugbus_blocks); i++) {
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUGBUS, snapshot,
+			a3xx_snapshot_debugbus_block,
+			(void *) &debugbus_blocks[i]);
+	}
+}
+
+static void _snapshot_hlsq_regs(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	unsigned int next_pif = 0;
+
+	/*
+	 * Trying to read HLSQ registers when the HLSQ block is busy
+	 * will cause the device to hang.  The RBBM_DEBUG_BUS has information
+	 * that will tell us if the HLSQ block is busy or not.  Read values
+	 * from the debug bus to ensure the HLSQ block is not busy (this
+	 * is hardware dependent).  If the HLSQ block is busy do not
+	 * dump the registers, otherwise dump the HLSQ registers.
+	 */
+
+	/*
+	 * tpif status bits: RBBM_BLOCK_ID_HLSQ index 4 [4:0]
+	 * spif status bits: RBBM_BLOCK_ID_HLSQ index 7 [5:0]
+	 *
+	 * if ((tpif == 0, 1, 28) && (spif == 0, 1, 10))
+	 * then dump HLSQ registers
+	 */
+
+	/* check tpif */
+	_rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 4, &next_pif);
+	next_pif &= 0x1f;
+	if (next_pif != 0 && next_pif != 1 && next_pif != 28)
+		return;
+
+	/* check spif */
+	_rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 7, &next_pif);
+	next_pif &= 0x3f;
+	if (next_pif != 0 && next_pif != 1 && next_pif != 10)
+		return;
+
+	SNAPSHOT_REGISTERS(device, snapshot, a3xx_hlsq_registers);
+}
+
+#define VPC_MEM_SIZE 512
+
+static size_t a3xx_snapshot_vpc_memory(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size = 4 * VPC_MEM_SIZE;
+	int bank, addr, i = 0;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "VPC MEMORY");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_VPC_MEMORY;
+	header->size = size;
+
+	for (bank = 0; bank < 4; bank++) {
+		for (addr = 0; addr < VPC_MEM_SIZE; addr++) {
+			unsigned int val = bank | (addr << 4);
+
+			kgsl_regwrite(device, A3XX_VPC_VPC_DEBUG_RAM_SEL, val);
+			kgsl_regread(device, A3XX_VPC_VPC_DEBUG_RAM_READ,
+				&data[i++]);
+		}
+	}
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+static size_t a3xx_snapshot_cp_pm4_ram(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4);
+	size_t size = fw->size - 1;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PM4_RAM;
+	header->size = size;
+
+	/*
+	 * Read the firmware from the GPU rather than use our cache in order to
+	 * try to catch mis-programming or corruption in the hardware.  We do
+	 * use the cached version of the size, however, instead of trying to
+	 * maintain always changing hardcoded constants
+	 */
+	kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_ME_RAM_RADDR,
+		A3XX_CP_ME_RAM_DATA, data, size);
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+static size_t a3xx_snapshot_cp_pfp_ram(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP);
+	int size = fw->size - 1;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PFP_RAM;
+	header->size = size;
+
+	/*
+	 * Read the firmware from the GPU rather than use our cache in order to
+	 * try to catch mis-programming or corruption in the hardware.  We do
+	 * use the cached version of the size, however, instead of trying to
+	 * maintain always changing hardcoded constants
+	 */
+	kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_PFP_UCODE_ADDR,
+		A3XX_CP_PFP_UCODE_DATA, data, size);
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+static size_t a3xx_snapshot_cp_roq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf;
+	u32 *data = (u32 *) (buf + sizeof(*header));
+
+	if (remain < DEBUG_SECTION_SZ(128)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP ROQ DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_ROQ;
+	header->size = 128;
+
+	kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_ROQ_ADDR,
+		A3XX_CP_ROQ_DATA, data, 128);
+
+	return DEBUG_SECTION_SZ(128);
+}
+
+static size_t a3xx_snapshot_cp_meq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf;
+	u32 *data = (u32 *) (buf + sizeof(*header));
+
+	if (remain < DEBUG_SECTION_SZ(16)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP MEQ DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_MEQ;
+	header->size = 16;
+
+	kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_MEQ_ADDR,
+		A3XX_CP_MEQ_DATA, data, 16);
+
+	return DEBUG_SECTION_SZ(16);
+}
+
+/*
+ * a3xx_snapshot() - A3XX GPU snapshot function
+ * @adreno_dev: Device being snapshotted
+ * @snapshot: Snapshot metadata
+ * @remain: Amount of space left in snapshot memory
+ *
+ * This is where all of the A3XX specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a3xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	/* Disable Clock gating temporarily for the debug bus to work */
+	kgsl_regwrite(device, A3XX_RBBM_CLOCK_CTL, 0x0);
+
+	/* Save some CP information that the generic snapshot uses */
+	kgsl_regread(device, A3XX_CP_IB1_BASE, &reg);
+	snapshot->ib1base = (u64) reg;
+
+	kgsl_regread(device, A3XX_CP_IB2_BASE, &reg);
+	snapshot->ib2base = (u64) reg;
+
+	kgsl_regread(device, A3XX_CP_IB1_BUFSZ, &snapshot->ib1size);
+	kgsl_regread(device, A3XX_CP_IB2_BUFSZ, &snapshot->ib2size);
+
+	SNAPSHOT_REGISTERS(device, snapshot, a3xx_registers);
+
+	_snapshot_hlsq_regs(device, snapshot);
+
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A3XX_CP_STATE_DEBUG_INDEX, A3XX_CP_STATE_DEBUG_DATA, 0, 0x14);
+
+	/* CP_ME indexed registers */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A3XX_CP_ME_CNTL, A3XX_CP_ME_STATUS, 64, 44);
+
+	/* VPC memory */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a3xx_snapshot_vpc_memory, NULL);
+
+	/* CP MEQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot,
+		a3xx_snapshot_cp_meq, NULL);
+
+	/* Shader working/shadow memory */
+	 kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a3xx_snapshot_shader_memory, NULL);
+
+
+	/* CP PFP and PM4 */
+
+	/*
+	 * Reading the microcode while the CP is running will
+	 * basically move the CP instruction pointer to
+	 * whatever address we read. Big badaboom ensues. Stop the CP
+	 * (if it isn't already stopped) to ensure that we are safe.
+	 * We do this here and not earlier to avoid corrupting the RBBM
+	 * status and CP registers - by the time we get here we don't
+	 * care about the contents of the CP anymore.
+	 */
+
+	kgsl_regread(device, A3XX_CP_ME_CNTL, &reg);
+	reg |= (1 << 27) | (1 << 28);
+	kgsl_regwrite(device, A3XX_CP_ME_CNTL, reg);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a3xx_snapshot_cp_pfp_ram, NULL);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a3xx_snapshot_cp_pm4_ram, NULL);
+
+	/* CP ROQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a3xx_snapshot_cp_roq, NULL);
+
+	a3xx_snapshot_debugbus(device, snapshot);
+}

+ 2500 - 0
qcom/opensource/graphics-kernel/adreno_a5xx.c

@@ -0,0 +1,2500 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/clk/qcom.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/qcom_scm.h>
+#include <linux/slab.h>
+
+#include "adreno.h"
+#include "adreno_a5xx.h"
+#include "adreno_a5xx_packets.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+#include "kgsl_trace.h"
+
+static int critical_packet_constructed;
+static unsigned int crit_pkts_dwords;
+
+static void a5xx_irq_storm_worker(struct work_struct *work);
+static int _read_fw2_block_header(struct kgsl_device *device,
+		uint32_t *header, uint32_t remain,
+		uint32_t id, uint32_t major, uint32_t minor);
+static void a5xx_gpmu_reset(struct work_struct *work);
+static int a5xx_gpmu_init(struct adreno_device *adreno_dev);
+
+/**
+ * Number of times to check if the regulator enabled before
+ * giving up and returning failure.
+ */
+#define PWR_RETRY 100
+
+/**
+ * Number of times to check if the GPMU firmware is initialized before
+ * giving up and returning failure.
+ */
+#define GPMU_FW_INIT_RETRY 5000
+
+#define A530_QFPROM_RAW_PTE_ROW0_MSB 0x134
+#define A530_QFPROM_RAW_PTE_ROW2_MSB 0x144
+
+#define A5XX_INT_MASK \
+	((1 << A5XX_INT_RBBM_AHB_ERROR) |		\
+	 (1 << A5XX_INT_RBBM_TRANSFER_TIMEOUT) |		\
+	 (1 << A5XX_INT_RBBM_ME_MS_TIMEOUT) |		\
+	 (1 << A5XX_INT_RBBM_PFP_MS_TIMEOUT) |		\
+	 (1 << A5XX_INT_RBBM_ETS_MS_TIMEOUT) |		\
+	 (1 << A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW) |		\
+	 (1 << A5XX_INT_RBBM_GPC_ERROR) |		\
+	 (1 << A5XX_INT_CP_HW_ERROR) |	\
+	 (1 << A5XX_INT_CP_CACHE_FLUSH_TS) |		\
+	 (1 << A5XX_INT_RBBM_ATB_BUS_OVERFLOW) |	\
+	 (1 << A5XX_INT_MISC_HANG_DETECT) |		\
+	 (1 << A5XX_INT_UCHE_OOB_ACCESS) |		\
+	 (1 << A5XX_INT_UCHE_TRAP_INTR) |		\
+	 (1 << A5XX_INT_CP_SW) |			\
+	 (1 << A5XX_INT_GPMU_FIRMWARE) |                \
+	 (1 << A5XX_INT_GPMU_VOLTAGE_DROOP))
+
+static int a5xx_probe(struct platform_device *pdev,
+	u32 chipid, const struct adreno_gpu_core *gpucore)
+{
+	struct adreno_device *adreno_dev;
+	struct kgsl_device *device;
+	int ret;
+
+	adreno_dev = (struct adreno_device *)
+		of_device_get_match_data(&pdev->dev);
+
+	memset(adreno_dev, 0, sizeof(*adreno_dev));
+
+	adreno_dev->gpucore = gpucore;
+	adreno_dev->chipid = chipid;
+
+	adreno_reg_offset_init(gpucore->gpudev->reg_offsets);
+
+	adreno_dev->sptp_pc_enabled =
+		ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC);
+
+	if (adreno_is_a540(adreno_dev))
+		adreno_dev->throttling_enabled = true;
+
+	adreno_dev->hwcg_enabled = true;
+	adreno_dev->lm_enabled =
+		ADRENO_FEATURE(adreno_dev, ADRENO_LM);
+
+	/* Setup defaults that might get changed by the fuse bits */
+	adreno_dev->lm_leakage = 0x4e001a;
+
+	device = KGSL_DEVICE(adreno_dev);
+
+	timer_setup(&device->idle_timer, kgsl_timer, 0);
+
+	INIT_WORK(&device->idle_check_ws, kgsl_idle_check);
+
+	adreno_dev->irq_mask = A5XX_INT_MASK;
+
+	ret = adreno_device_probe(pdev, adreno_dev);
+	if (ret)
+		return ret;
+
+	a5xx_coresight_init(adreno_dev);
+
+	return adreno_dispatcher_init(adreno_dev);
+}
+
+static void _do_fixup(const struct adreno_critical_fixup *fixups, int count,
+		uint64_t *gpuaddrs, unsigned int *buffer)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		buffer[fixups[i].lo_offset] =
+			lower_32_bits(gpuaddrs[fixups[i].buffer]) |
+			fixups[i].mem_offset;
+
+		buffer[fixups[i].hi_offset] =
+			upper_32_bits(gpuaddrs[fixups[i].buffer]);
+	}
+}
+
+static int a5xx_critical_packet_construct(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int *cmds;
+	uint64_t gpuaddrs[4];
+
+	adreno_dev->critpkts = kgsl_allocate_global(device,
+		PAGE_SIZE * 4, 0, 0, 0, "crit_pkts");
+	if (IS_ERR(adreno_dev->critpkts))
+		return PTR_ERR(adreno_dev->critpkts);
+
+	adreno_dev->critpkts_secure = kgsl_allocate_global(device,
+		PAGE_SIZE, 0, KGSL_MEMFLAGS_SECURE, 0, "crit_pkts_secure");
+	if (IS_ERR(adreno_dev->critpkts_secure))
+		return PTR_ERR(adreno_dev->critpkts_secure);
+
+	cmds = adreno_dev->critpkts->hostptr;
+
+	gpuaddrs[0] = adreno_dev->critpkts_secure->gpuaddr;
+	gpuaddrs[1] = adreno_dev->critpkts->gpuaddr + PAGE_SIZE;
+	gpuaddrs[2] = adreno_dev->critpkts->gpuaddr + (PAGE_SIZE * 2);
+	gpuaddrs[3] = adreno_dev->critpkts->gpuaddr + (PAGE_SIZE * 3);
+
+	crit_pkts_dwords = ARRAY_SIZE(_a5xx_critical_pkts);
+
+	memcpy(cmds, _a5xx_critical_pkts, crit_pkts_dwords << 2);
+
+	_do_fixup(critical_pkt_fixups, ARRAY_SIZE(critical_pkt_fixups),
+		gpuaddrs, cmds);
+
+	cmds = adreno_dev->critpkts->hostptr + PAGE_SIZE;
+	memcpy(cmds, _a5xx_critical_pkts_mem01,
+			ARRAY_SIZE(_a5xx_critical_pkts_mem01) << 2);
+
+	cmds = adreno_dev->critpkts->hostptr + (PAGE_SIZE * 2);
+	memcpy(cmds, _a5xx_critical_pkts_mem02,
+			ARRAY_SIZE(_a5xx_critical_pkts_mem02) << 2);
+
+	cmds = adreno_dev->critpkts->hostptr + (PAGE_SIZE * 3);
+	memcpy(cmds, _a5xx_critical_pkts_mem03,
+			ARRAY_SIZE(_a5xx_critical_pkts_mem03) << 2);
+
+	_do_fixup(critical_pkt_mem03_fixups,
+		ARRAY_SIZE(critical_pkt_mem03_fixups), gpuaddrs, cmds);
+
+	critical_packet_constructed = 1;
+
+	return 0;
+}
+
+static int a5xx_microcode_read(struct adreno_device *adreno_dev);
+
+static int a5xx_init(struct adreno_device *adreno_dev)
+{
+	const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev);
+	int ret;
+
+	ret = a5xx_ringbuffer_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a5xx_microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	if (a5xx_has_gpmu(adreno_dev))
+		INIT_WORK(&adreno_dev->gpmu_work, a5xx_gpmu_reset);
+
+	adreno_dev->highest_bank_bit = a5xx_core->highest_bank_bit;
+
+	INIT_WORK(&adreno_dev->irq_storm_work, a5xx_irq_storm_worker);
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CRITICAL_PACKETS))
+		a5xx_critical_packet_construct(adreno_dev);
+
+	adreno_create_profile_buffer(adreno_dev);
+	a5xx_crashdump_init(adreno_dev);
+
+	return 0;
+}
+
+static const struct {
+	u32 reg;
+	u32 base;
+	u32 count;
+} a5xx_protected_blocks[] = {
+	/* RBBM */
+	{  A5XX_CP_PROTECT_REG_0,     0x004, 2 },
+	{  A5XX_CP_PROTECT_REG_0 + 1, 0x008, 3 },
+	{  A5XX_CP_PROTECT_REG_0 + 2, 0x010, 4 },
+	{  A5XX_CP_PROTECT_REG_0 + 3, 0x020, 5 },
+	{  A5XX_CP_PROTECT_REG_0 + 4, 0x040, 6 },
+	{  A5XX_CP_PROTECT_REG_0 + 5, 0x080, 6 },
+	/* Content protection */
+	{  A5XX_CP_PROTECT_REG_0 + 6, A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 4 },
+	{  A5XX_CP_PROTECT_REG_0 + 7, A5XX_RBBM_SECVID_TRUST_CNTL, 1 },
+	/* CP */
+	{  A5XX_CP_PROTECT_REG_0 + 8, 0x800, 6 },
+	{  A5XX_CP_PROTECT_REG_0 + 9, 0x840, 3 },
+	{  A5XX_CP_PROTECT_REG_0 + 10, 0x880, 5 },
+	{  A5XX_CP_PROTECT_REG_0 + 11, 0xaa0, 0 },
+	/* RB */
+	{  A5XX_CP_PROTECT_REG_0 + 12, 0xcc0, 0 },
+	{  A5XX_CP_PROTECT_REG_0 + 13, 0xcf0, 1 },
+	/* VPC */
+	{  A5XX_CP_PROTECT_REG_0 + 14, 0xe68, 3 },
+	{  A5XX_CP_PROTECT_REG_0 + 15, 0xe70, 4 },
+	/* UCHE */
+	{  A5XX_CP_PROTECT_REG_0 + 16, 0xe80, 4 },
+	/* A5XX_CP_PROTECT_REG_17 will be used for SMMU */
+	/* A5XX_CP_PROTECT_REG_18 - A5XX_CP_PROTECT_REG_31 are available */
+};
+
+static void _setprotectreg(struct kgsl_device *device, u32 offset,
+		u32 base, u32 count)
+{
+	kgsl_regwrite(device, offset, 0x60000000 | (count << 24) | (base << 2));
+}
+
+static void a5xx_protect_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 reg;
+	int i;
+
+	/* enable access protection to privileged registers */
+	kgsl_regwrite(device, A5XX_CP_PROTECT_CNTL, 0x00000007);
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_protected_blocks); i++) {
+		reg = a5xx_protected_blocks[i].reg;
+
+		_setprotectreg(device, reg, a5xx_protected_blocks[i].base,
+			a5xx_protected_blocks[i].count);
+	}
+
+	/*
+	 * For a530 and a540 the SMMU region is 0x20000 bytes long and 0x10000
+	 * bytes on all other targets. The base offset for both is 0x40000.
+	 * Write it to the next available slot
+	 */
+	if (adreno_is_a530(adreno_dev) || adreno_is_a540(adreno_dev))
+		_setprotectreg(device, reg + 1, 0x40000, ilog2(0x20000));
+	else
+		_setprotectreg(device, reg + 1, 0x40000, ilog2(0x10000));
+}
+
+/*
+ * _poll_gdsc_status() - Poll the GDSC status register
+ * @adreno_dev: The adreno device pointer
+ * @status_reg: Offset of the status register
+ * @status_value: The expected bit value
+ *
+ * Poll the status register till the power-on bit is equal to the
+ * expected value or the max retries are exceeded.
+ */
+static int _poll_gdsc_status(struct adreno_device *adreno_dev,
+				unsigned int status_reg,
+				unsigned int status_value)
+{
+	unsigned int reg, retry = PWR_RETRY;
+
+	/* Bit 20 is the power on bit of SPTP and RAC GDSC status register */
+	do {
+		udelay(1);
+		kgsl_regread(KGSL_DEVICE(adreno_dev), status_reg, &reg);
+	} while (((reg & BIT(20)) != (status_value << 20)) && retry--);
+	if ((reg & BIT(20)) != (status_value << 20))
+		return -ETIMEDOUT;
+	return 0;
+}
+
+static void a5xx_restore_isense_regs(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg, i, ramp = GPMU_ISENSE_SAVE;
+	static unsigned int isense_regs[6] = {0xFFFF}, isense_reg_addr[] = {
+		A5XX_GPU_CS_DECIMAL_ALIGN,
+		A5XX_GPU_CS_SENSOR_PARAM_CORE_1,
+		A5XX_GPU_CS_SENSOR_PARAM_CORE_2,
+		A5XX_GPU_CS_SW_OV_FUSE_EN,
+		A5XX_GPU_CS_ENDPOINT_CALIBRATION_DONE,
+		A5XX_GPMU_TEMP_SENSOR_CONFIG};
+
+	if (!adreno_is_a540(adreno_dev))
+		return;
+
+	/* read signature */
+	kgsl_regread(device, ramp++, &reg);
+
+	if (reg == 0xBABEFACE) {
+		/* store memory locations in buffer */
+		for (i = 0; i < ARRAY_SIZE(isense_regs); i++)
+			kgsl_regread(device, ramp + i, isense_regs + i);
+
+		/* clear signature */
+		kgsl_regwrite(device, GPMU_ISENSE_SAVE, 0x0);
+	}
+
+	/* if we never stored memory locations - do nothing */
+	if (isense_regs[0] == 0xFFFF)
+		return;
+
+	/* restore registers from memory */
+	for (i = 0; i < ARRAY_SIZE(isense_reg_addr); i++)
+		kgsl_regwrite(device, isense_reg_addr[i], isense_regs[i]);
+
+}
+
+/*
+ * a5xx_regulator_enable() - Enable any necessary HW regulators
+ * @adreno_dev: The adreno device pointer
+ *
+ * Some HW blocks may need their regulators explicitly enabled
+ * on a restart.  Clocks must be on during this call.
+ */
+static int a5xx_regulator_enable(struct adreno_device *adreno_dev)
+{
+	unsigned int ret;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (test_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED,
+			&adreno_dev->priv))
+		return 0;
+
+	if (!(adreno_is_a530(adreno_dev) || adreno_is_a540(adreno_dev))) {
+		/* Halt the sp_input_clk at HM level */
+		kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0x00000055);
+		a5xx_hwcg_set(adreno_dev, true);
+		/* Turn on sp_input_clk at HM level */
+		kgsl_regrmw(device, A5XX_RBBM_CLOCK_CNTL, 0xFF, 0);
+
+		set_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED,
+			&adreno_dev->priv);
+		return 0;
+	}
+
+	/*
+	 * Turn on smaller power domain first to reduce voltage droop.
+	 * Set the default register values; set SW_COLLAPSE to 0.
+	 */
+	kgsl_regwrite(device, A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
+	/* Insert a delay between RAC and SPTP GDSC to reduce voltage droop */
+	udelay(3);
+	ret = _poll_gdsc_status(adreno_dev, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 1);
+	if (ret) {
+		dev_err(device->dev, "RBCCU GDSC enable failed\n");
+		return ret;
+	}
+
+	kgsl_regwrite(device, A5XX_GPMU_SP_POWER_CNTL, 0x778000);
+	ret = _poll_gdsc_status(adreno_dev, A5XX_GPMU_SP_PWR_CLK_STATUS, 1);
+	if (ret) {
+		dev_err(device->dev, "SPTP GDSC enable failed\n");
+		return ret;
+	}
+
+	/* Disable SP clock */
+	kgsl_regrmw(device, A5XX_GPMU_GPMU_SP_CLOCK_CONTROL,
+		CNTL_IP_CLK_ENABLE, 0);
+	/* Enable hardware clockgating */
+	a5xx_hwcg_set(adreno_dev, true);
+	/* Enable SP clock */
+	kgsl_regrmw(device, A5XX_GPMU_GPMU_SP_CLOCK_CONTROL,
+		CNTL_IP_CLK_ENABLE, 1);
+
+	a5xx_restore_isense_regs(adreno_dev);
+
+	set_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, &adreno_dev->priv);
+	return 0;
+}
+
+/*
+ * a5xx_regulator_disable() - Disable any necessary HW regulators
+ * @adreno_dev: The adreno device pointer
+ *
+ * Some HW blocks may need their regulators explicitly disabled
+ * on a power down to prevent current spikes.  Clocks must be on
+ * during this call.
+ */
+static void a5xx_regulator_disable(struct adreno_device *adreno_dev)
+{
+	unsigned int reg;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (adreno_is_a512(adreno_dev) || adreno_is_a508(adreno_dev))
+		return;
+
+	if (!test_and_clear_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED,
+		&adreno_dev->priv))
+		return;
+
+	/* If feature is not supported or not enabled */
+	if (!adreno_dev->sptp_pc_enabled) {
+		/* Set the default register values; set SW_COLLAPSE to 1 */
+		kgsl_regwrite(device, A5XX_GPMU_SP_POWER_CNTL, 0x778001);
+		/*
+		 * Insert a delay between SPTP and RAC GDSC to reduce voltage
+		 * droop.
+		 */
+		udelay(3);
+		if (_poll_gdsc_status(adreno_dev,
+					A5XX_GPMU_SP_PWR_CLK_STATUS, 0))
+			dev_warn(device->dev, "SPTP GDSC disable failed\n");
+
+		kgsl_regwrite(device, A5XX_GPMU_RBCCU_POWER_CNTL, 0x778001);
+		if (_poll_gdsc_status(adreno_dev,
+					A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 0))
+			dev_warn(device->dev, "RBCCU GDSC disable failed\n");
+	} else if (test_bit(ADRENO_DEVICE_GPMU_INITIALIZED,
+			&adreno_dev->priv)) {
+		/* GPMU firmware is supposed to turn off SPTP & RAC GDSCs. */
+		kgsl_regread(device, A5XX_GPMU_SP_PWR_CLK_STATUS, &reg);
+		if (reg & BIT(20))
+			dev_warn(device->dev, "SPTP GDSC is not disabled\n");
+		kgsl_regread(device, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, &reg);
+		if (reg & BIT(20))
+			dev_warn(device->dev, "RBCCU GDSC is not disabled\n");
+		/*
+		 * GPMU firmware is supposed to set GMEM to non-retention.
+		 * Bit 14 is the memory core force on bit.
+		 */
+		kgsl_regread(device, A5XX_GPMU_RBCCU_CLOCK_CNTL, &reg);
+		if (reg & BIT(14))
+			dev_warn(device->dev, "GMEM is forced on\n");
+	}
+
+	if (adreno_is_a530(adreno_dev)) {
+		/* Reset VBIF before PC to avoid popping bogus FIFO entries */
+		kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD,
+			0x003C0000);
+		kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, 0);
+	}
+}
+
+/*
+ * a5xx_enable_pc() - Enable the GPMU based power collapse of the SPTP and RAC
+ * blocks
+ * @adreno_dev: The adreno device pointer
+ */
+static void a5xx_enable_pc(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_dev->sptp_pc_enabled)
+		return;
+
+	kgsl_regwrite(device, A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL, 0x0000007F);
+	kgsl_regwrite(device, A5XX_GPMU_PWR_COL_BINNING_CTRL, 0);
+	kgsl_regwrite(device, A5XX_GPMU_PWR_COL_INTER_FRAME_HYST, 0x000A0080);
+	kgsl_regwrite(device, A5XX_GPMU_PWR_COL_STAGGER_DELAY, 0x00600040);
+
+	trace_adreno_sp_tp((unsigned long) __builtin_return_address(0));
+};
+
+/*
+ * The maximum payload of a type4 packet is the max size minus one for the
+ * opcode
+ */
+#define TYPE4_MAX_PAYLOAD (PM4_TYPE4_PKT_SIZE_MAX - 1)
+
+static int _gpmu_create_load_cmds(struct adreno_device *adreno_dev,
+	uint32_t *ucode, uint32_t size)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	uint32_t *start, *cmds;
+	uint32_t offset = 0;
+	uint32_t cmds_size = size;
+
+	/* Add a dword for each PM4 packet */
+	cmds_size += (size / TYPE4_MAX_PAYLOAD) + 1;
+
+	/* Add 4 dwords for the protected mode */
+	cmds_size += 4;
+
+	if (adreno_dev->gpmu_cmds != NULL)
+		return 0;
+
+	adreno_dev->gpmu_cmds = devm_kmalloc(&device->pdev->dev,
+		cmds_size << 2, GFP_KERNEL);
+	if (adreno_dev->gpmu_cmds == NULL)
+		return -ENOMEM;
+
+	cmds = adreno_dev->gpmu_cmds;
+	start = cmds;
+
+	/* Turn CP protection OFF */
+	cmds += cp_protected_mode(adreno_dev, cmds, 0);
+
+	/*
+	 * Prebuild the cmd stream to send to the GPU to load
+	 * the GPMU firmware
+	 */
+	while (size > 0) {
+		int tmp_size = size;
+
+		if (size >= TYPE4_MAX_PAYLOAD)
+			tmp_size = TYPE4_MAX_PAYLOAD;
+
+		*cmds++ = cp_type4_packet(
+				A5XX_GPMU_INST_RAM_BASE + offset,
+				tmp_size);
+
+		memcpy(cmds, &ucode[offset], tmp_size << 2);
+
+		cmds += tmp_size;
+		offset += tmp_size;
+		size -= tmp_size;
+	}
+
+	/* Turn CP protection ON */
+	cmds += cp_protected_mode(adreno_dev, cmds, 1);
+
+	adreno_dev->gpmu_cmds_size = (size_t) (cmds - start);
+
+	return 0;
+}
+
+
+/*
+ * _load_gpmu_firmware() - Load the ucode into the GPMU RAM
+ * @adreno_dev: Pointer to adreno device
+ */
+static int _load_gpmu_firmware(struct adreno_device *adreno_dev)
+{
+	uint32_t *data;
+	const struct firmware *fw = NULL;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev);
+	uint32_t *cmds, cmd_size;
+	int ret =  -EINVAL;
+	u32 gmu_major = 1;
+
+	if (!a5xx_has_gpmu(adreno_dev))
+		return 0;
+
+	/* a530 used GMU major 1 and A540 used GMU major 3 */
+	if (adreno_is_a540(adreno_dev))
+		gmu_major = 3;
+
+	/* gpmu fw already saved and verified so do nothing new */
+	if (adreno_dev->gpmu_cmds_size != 0)
+		return 0;
+
+	if (a5xx_core->gpmufw_name == NULL)
+		return 0;
+
+	ret = request_firmware(&fw, a5xx_core->gpmufw_name, &device->pdev->dev);
+	if (ret || fw == NULL) {
+		dev_err(&device->pdev->dev,
+			"request_firmware (%s) failed: %d\n",
+			a5xx_core->gpmufw_name, ret);
+		return ret;
+	}
+
+	data = (uint32_t *)fw->data;
+
+	if (data[0] >= (fw->size / sizeof(uint32_t)) || data[0] < 2)
+		goto err;
+
+	if (data[1] != GPMU_FIRMWARE_ID)
+		goto err;
+	ret = _read_fw2_block_header(device, &data[2],
+		data[0] - 2, GPMU_FIRMWARE_ID, gmu_major, 0);
+	if (ret)
+		goto err;
+
+	/* Integer overflow check for cmd_size */
+	if (data[2] > (data[0] - 2))
+		goto err;
+
+	cmds = data + data[2] + 3;
+	cmd_size = data[0] - data[2] - 2;
+
+	if (cmd_size > GPMU_INST_RAM_SIZE) {
+		dev_err(device->dev,
+			"GPMU firmware block size is larger than RAM size\n");
+		goto err;
+	}
+
+	/* Everything is cool, so create some commands */
+	ret = _gpmu_create_load_cmds(adreno_dev, cmds, cmd_size);
+err:
+	if (fw)
+		release_firmware(fw);
+
+	return ret;
+}
+
+static void a5xx_spin_idle_debug(struct adreno_device *adreno_dev,
+				const char *str)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int rptr, wptr;
+	unsigned int status, status3, intstatus;
+	unsigned int hwfault;
+
+	dev_err(device->dev, str);
+
+	kgsl_regread(device, A5XX_CP_RB_RPTR, &rptr);
+	kgsl_regread(device, A5XX_CP_RB_WPTR, &wptr);
+
+	kgsl_regread(device, A5XX_RBBM_STATUS, &status);
+	kgsl_regread(device, A5XX_RBBM_STATUS3, &status3);
+	kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &intstatus);
+	kgsl_regread(device, A5XX_CP_HW_FAULT, &hwfault);
+
+
+	dev_err(device->dev,
+		"rb=%d pos=%X/%X rbbm_status=%8.8X/%8.8X int_0_status=%8.8X\n",
+		adreno_dev->cur_rb->id, rptr, wptr, status, status3, intstatus);
+
+	dev_err(device->dev, " hwfault=%8.8X\n", hwfault);
+
+	kgsl_device_snapshot(device, NULL, NULL, false);
+}
+
+static int _gpmu_send_init_cmds(struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	uint32_t *cmds;
+	uint32_t size = adreno_dev->gpmu_cmds_size;
+	int ret;
+
+	if (size == 0 || adreno_dev->gpmu_cmds == NULL)
+		return -EINVAL;
+
+	cmds = adreno_ringbuffer_allocspace(rb, size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+	if (cmds == NULL)
+		return -ENOSPC;
+
+	/* Copy to the RB the predefined fw sequence cmds */
+	memcpy(cmds, adreno_dev->gpmu_cmds, size << 2);
+
+	ret = a5xx_ringbuffer_submit(rb, NULL, true);
+	if (!ret) {
+		ret = adreno_spin_idle(adreno_dev, 2000);
+		if (ret)
+			a5xx_spin_idle_debug(adreno_dev,
+				"gpmu initialization failed to idle\n");
+	}
+	return ret;
+}
+
+/*
+ * a5xx_gpmu_start() - Initialize and start the GPMU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * Load the GPMU microcode, set up any features such as hardware clock gating
+ * or IFPC, and take the GPMU out of reset.
+ */
+static int a5xx_gpmu_start(struct adreno_device *adreno_dev)
+{
+	int ret;
+	unsigned int reg, retry = GPMU_FW_INIT_RETRY;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!a5xx_has_gpmu(adreno_dev))
+		return 0;
+
+	ret = _gpmu_send_init_cmds(adreno_dev);
+	if (ret)
+		return ret;
+
+	if (adreno_is_a530(adreno_dev)) {
+		/* GPMU clock gating setup */
+		kgsl_regwrite(device, A5XX_GPMU_WFI_CONFIG, 0x00004014);
+	}
+	/* Kick off GPMU firmware */
+	kgsl_regwrite(device, A5XX_GPMU_CM3_SYSRESET, 0);
+	/*
+	 * The hardware team's estimation of GPMU firmware initialization
+	 * latency is about 3000 cycles, that's about 5 to 24 usec.
+	 */
+	do {
+		udelay(1);
+		kgsl_regread(device, A5XX_GPMU_GENERAL_0, &reg);
+	} while ((reg != 0xBABEFACE) && retry--);
+
+	if (reg != 0xBABEFACE) {
+		dev_err(device->dev,
+			"GPMU firmware initialization timed out\n");
+		return -ETIMEDOUT;
+	}
+
+	if (!adreno_is_a530(adreno_dev)) {
+		kgsl_regread(device, A5XX_GPMU_GENERAL_1, &reg);
+
+		if (reg) {
+			dev_err(device->dev,
+				"GPMU firmware initialization failed: %d\n",
+				reg);
+			return -EIO;
+		}
+	}
+	set_bit(ADRENO_DEVICE_GPMU_INITIALIZED, &adreno_dev->priv);
+	/*
+	 *  We are in AWARE state and IRQ line from GPU to host is
+	 *  disabled.
+	 *  Read pending GPMU interrupts and clear GPMU_RBBM_INTR_INFO.
+	 */
+	kgsl_regread(device, A5XX_GPMU_RBBM_INTR_INFO, &reg);
+	/*
+	 * Clear RBBM interrupt mask if any of GPMU interrupts
+	 * are pending.
+	 */
+	if (reg)
+		kgsl_regwrite(device,
+			A5XX_RBBM_INT_CLEAR_CMD,
+			1 << A5XX_INT_GPMU_FIRMWARE);
+	return ret;
+}
+
+void a5xx_hwcg_set(struct adreno_device *adreno_dev, bool on)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev);
+	int i;
+
+	if (!adreno_dev->hwcg_enabled)
+		return;
+
+	for (i = 0; i < a5xx_core->hwcg_count; i++)
+		kgsl_regwrite(device, a5xx_core->hwcg[i].offset,
+			on ? a5xx_core->hwcg[i].val : 0);
+
+	/* enable top level HWCG */
+	kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, on ? 0xAAA8AA00 : 0);
+	kgsl_regwrite(device, A5XX_RBBM_ISDB_CNT, on ? 0x00000182 : 0x00000180);
+}
+
+static int _read_fw2_block_header(struct kgsl_device *device,
+		uint32_t *header, uint32_t remain,
+		uint32_t id, uint32_t major, uint32_t minor)
+{
+	uint32_t header_size;
+	int i = 1;
+
+	if (header == NULL)
+		return -ENOMEM;
+
+	header_size = header[0];
+	/* Headers have limited size and always occur as pairs of words */
+	if (header_size >  MAX_HEADER_SIZE || header_size >= remain ||
+				header_size % 2 || header_size == 0)
+		return -EINVAL;
+	/* Sequences must have an identifying id first thing in their header */
+	if (id == GPMU_SEQUENCE_ID) {
+		if (header[i] != HEADER_SEQUENCE ||
+			(header[i + 1] >= MAX_SEQUENCE_ID))
+			return -EINVAL;
+		i += 2;
+	}
+	for (; i < header_size; i += 2) {
+		switch (header[i]) {
+		/* Major Version */
+		case HEADER_MAJOR:
+			if ((major > header[i + 1]) &&
+				header[i + 1]) {
+				dev_err(device->dev,
+					"GPMU major version mis-match %d, %d\n",
+					major, header[i + 1]);
+				return -EINVAL;
+			}
+			break;
+		case HEADER_MINOR:
+			if (minor > header[i + 1])
+				dev_err(device->dev,
+					"GPMU minor version mis-match %d %d\n",
+					minor, header[i + 1]);
+			break;
+		case HEADER_DATE:
+		case HEADER_TIME:
+			break;
+		default:
+			dev_err(device->dev, "GPMU unknown header ID %d\n",
+					header[i]);
+		}
+	}
+	return 0;
+}
+
+/*
+ * Read in the register sequence file and save pointers to the
+ * necessary sequences.
+ *
+ * GPU sequence file format (one dword per field unless noted):
+ * Block 1 length (length dword field not inclusive)
+ * Block 1 type = Sequence = 3
+ * Block Header length (length dword field not inclusive)
+ * BH field ID = Sequence field ID
+ * BH field data = Sequence ID
+ * BH field ID
+ * BH field data
+ * ...
+ * Opcode 0 ID
+ * Opcode 0 data M words
+ * Opcode 1 ID
+ * Opcode 1 data N words
+ * ...
+ * Opcode X ID
+ * Opcode X data O words
+ * Block 2 length...
+ */
+static void _load_regfile(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev);
+	const struct firmware *fw;
+	uint64_t block_size = 0, block_total = 0;
+	uint32_t fw_size, *block;
+	int ret = -EINVAL;
+	u32 lm_major = 1;
+
+	if (!a5xx_core->regfw_name)
+		return;
+
+	ret = request_firmware(&fw, a5xx_core->regfw_name, &device->pdev->dev);
+	if (ret) {
+		dev_err(&device->pdev->dev, "request firmware failed %d, %s\n",
+				ret, a5xx_core->regfw_name);
+		return;
+	}
+
+	/* a530v2 lm_major was 3. a530v3 lm_major was 1 */
+	if (adreno_is_a530v2(adreno_dev))
+		lm_major = 3;
+
+	fw_size = fw->size / sizeof(uint32_t);
+	/* Min valid file of size 6, see file description */
+	if (fw_size < 6)
+		goto err;
+	block = (uint32_t *)fw->data;
+	/* All offset numbers calculated from file description */
+	while (block_total < fw_size) {
+		block_size = block[0];
+		if (((block_total + block_size) >= fw_size)
+				|| block_size < 5)
+			goto err;
+		if (block[1] != GPMU_SEQUENCE_ID)
+			goto err;
+
+		/* For now ignore blocks other than the LM sequence */
+		if (block[4] == LM_SEQUENCE_ID) {
+			ret = _read_fw2_block_header(device, &block[2],
+				block_size - 2, GPMU_SEQUENCE_ID,
+				lm_major, 0);
+			if (ret)
+				goto err;
+
+			if (block[2] > (block_size - 2))
+				goto err;
+			adreno_dev->lm_sequence = block + block[2] + 3;
+			adreno_dev->lm_size = block_size - block[2] - 2;
+		}
+		block_total += (block_size + 1);
+		block += (block_size + 1);
+	}
+	if (adreno_dev->lm_sequence)
+		return;
+
+err:
+	release_firmware(fw);
+	dev_err(device->dev,
+		     "Register file failed to load sz=%d bsz=%llu header=%d\n",
+		     fw_size, block_size, ret);
+}
+
+static int _execute_reg_sequence(struct adreno_device *adreno_dev,
+			uint32_t *opcode, uint32_t length)
+{
+	uint32_t *cur = opcode;
+	uint64_t reg, val;
+
+	/* todo double check the reg writes */
+	while ((cur - opcode) < length) {
+		if (cur[0] == 1 && (length - (cur - opcode) >= 4)) {
+			/* Write a 32 bit value to a 64 bit reg */
+			reg = cur[2];
+			reg = (reg << 32) | cur[1];
+			kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg, cur[3]);
+			cur += 4;
+		} else if (cur[0] == 2 && (length - (cur - opcode) >= 5)) {
+			/* Write a 64 bit value to a 64 bit reg */
+			reg = cur[2];
+			reg = (reg << 32) | cur[1];
+			val = cur[4];
+			val = (val << 32) | cur[3];
+			kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg, val);
+			cur += 5;
+		} else if (cur[0] == 3 && (length - (cur - opcode) >= 2)) {
+			/* Delay for X usec */
+			udelay(cur[1]);
+			cur += 2;
+		} else
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static uint32_t _write_voltage_table(struct adreno_device *adreno_dev,
+			unsigned int addr)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev);
+	int i;
+	struct dev_pm_opp *opp;
+	unsigned int mvolt = 0;
+
+	kgsl_regwrite(device, addr++, a5xx_core->max_power);
+	kgsl_regwrite(device, addr++, pwr->num_pwrlevels);
+
+	/* Write voltage in mV and frequency in MHz */
+	for (i = 0; i < pwr->num_pwrlevels; i++) {
+		opp = dev_pm_opp_find_freq_exact(&device->pdev->dev,
+				pwr->pwrlevels[i].gpu_freq, true);
+		/* _opp_get returns uV, convert to mV */
+		if (!IS_ERR(opp)) {
+			mvolt = dev_pm_opp_get_voltage(opp) / 1000;
+			dev_pm_opp_put(opp);
+		}
+		kgsl_regwrite(device, addr++, mvolt);
+		kgsl_regwrite(device, addr++,
+				pwr->pwrlevels[i].gpu_freq / 1000000);
+	}
+	return (pwr->num_pwrlevels * 2 + 2);
+}
+
+static uint32_t lm_limit(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (adreno_dev->lm_limit)
+		return adreno_dev->lm_limit;
+
+	if (of_property_read_u32(device->pdev->dev.of_node, "qcom,lm-limit",
+		&adreno_dev->lm_limit))
+		adreno_dev->lm_limit = LM_DEFAULT_LIMIT;
+
+	return adreno_dev->lm_limit;
+}
+/*
+ * a5xx_lm_init() - Initialize LM/DPM on the GPMU
+ * @adreno_dev: The adreno device pointer
+ */
+static void a530_lm_init(struct adreno_device *adreno_dev)
+{
+	uint32_t length;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev);
+
+	if (!adreno_dev->lm_enabled)
+		return;
+
+	/* If something was wrong with the sequence file, return */
+	if (adreno_dev->lm_sequence == NULL)
+		return;
+
+	/* Write LM registers including DPM ucode, coefficients, and config */
+	if (_execute_reg_sequence(adreno_dev, adreno_dev->lm_sequence,
+				adreno_dev->lm_size)) {
+		/* If the sequence is invalid, it's not getting better */
+		adreno_dev->lm_sequence = NULL;
+		dev_warn(device->dev,
+				"Invalid LM sequence\n");
+		return;
+	}
+
+	kgsl_regwrite(device, A5XX_GPMU_TEMP_SENSOR_ID, a5xx_core->gpmu_tsens);
+	kgsl_regwrite(device, A5XX_GPMU_DELTA_TEMP_THRESHOLD, 0x1);
+	kgsl_regwrite(device, A5XX_GPMU_TEMP_SENSOR_CONFIG, 0x1);
+
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE,
+			(0x80000000 | device->pwrctrl.active_pwrlevel));
+	/* use the leakage to set this value at runtime */
+	kgsl_regwrite(device, A5XX_GPMU_BASE_LEAKAGE,
+		adreno_dev->lm_leakage);
+
+	/* Enable the power threshold and set it to 6000m */
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_PWR_THRESHOLD,
+		0x80000000 | lm_limit(adreno_dev));
+
+	kgsl_regwrite(device, A5XX_GPMU_BEC_ENABLE, 0x10001FFF);
+	kgsl_regwrite(device, A5XX_GDPM_CONFIG1, 0x00201FF1);
+
+	/* Send an initial message to the GPMU with the LM voltage table */
+	kgsl_regwrite(device, AGC_MSG_STATE, 1);
+	kgsl_regwrite(device, AGC_MSG_COMMAND, AGC_POWER_CONFIG_PRODUCTION_ID);
+	length = _write_voltage_table(adreno_dev, AGC_MSG_PAYLOAD);
+	kgsl_regwrite(device, AGC_MSG_PAYLOAD_SIZE, length * sizeof(uint32_t));
+	kgsl_regwrite(device, AGC_INIT_MSG_MAGIC, AGC_INIT_MSG_VALUE);
+}
+
+/*
+ * a5xx_lm_enable() - Enable the LM/DPM feature on the GPMU
+ * @adreno_dev: The adreno device pointer
+ */
+static void a530_lm_enable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_dev->lm_enabled)
+		return;
+
+	/* If no sequence properly initialized, return */
+	if (adreno_dev->lm_sequence == NULL)
+		return;
+
+	kgsl_regwrite(device, A5XX_GDPM_INT_MASK, 0x00000000);
+	kgsl_regwrite(device, A5XX_GDPM_INT_EN, 0x0000000A);
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK, 0x00000001);
+	kgsl_regwrite(device, A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK,
+			0x00050000);
+	kgsl_regwrite(device, A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL,
+			0x00030000);
+
+	if (adreno_is_a530(adreno_dev))
+		/* Program throttle control, do not enable idle DCS on v3+ */
+		kgsl_regwrite(device, A5XX_GPMU_CLOCK_THROTTLE_CTRL,
+			adreno_is_a530v2(adreno_dev) ? 0x00060011 : 0x00000011);
+}
+
+static void a540_lm_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	uint32_t agc_lm_config = AGC_BCL_DISABLED |
+		((ADRENO_CHIPID_PATCH(adreno_dev->chipid) & 0x3)
+		<< AGC_GPU_VERSION_SHIFT);
+	unsigned int r;
+
+	if (!adreno_dev->throttling_enabled)
+		agc_lm_config |= AGC_THROTTLE_DISABLE;
+
+	if (adreno_dev->lm_enabled) {
+		agc_lm_config |=
+			AGC_LM_CONFIG_ENABLE_GPMU_ADAPTIVE |
+			AGC_LM_CONFIG_ISENSE_ENABLE;
+
+		kgsl_regread(device, A5XX_GPMU_TEMP_SENSOR_CONFIG, &r);
+
+		if ((r & GPMU_ISENSE_STATUS) == GPMU_ISENSE_END_POINT_CAL_ERR) {
+			dev_err(device->dev,
+				"GPMU: ISENSE end point calibration failure\n");
+			agc_lm_config |= AGC_LM_CONFIG_ENABLE_ERROR;
+		}
+	}
+
+	kgsl_regwrite(device, AGC_MSG_STATE, 0x80000001);
+	kgsl_regwrite(device, AGC_MSG_COMMAND, AGC_POWER_CONFIG_PRODUCTION_ID);
+	(void) _write_voltage_table(adreno_dev, AGC_MSG_PAYLOAD);
+	kgsl_regwrite(device, AGC_MSG_PAYLOAD + AGC_LM_CONFIG, agc_lm_config);
+	kgsl_regwrite(device, AGC_MSG_PAYLOAD + AGC_LEVEL_CONFIG,
+		(unsigned int) ~(GENMASK(LM_DCVS_LIMIT, 0) |
+				GENMASK(16+LM_DCVS_LIMIT, 16)));
+
+	kgsl_regwrite(device, AGC_MSG_PAYLOAD_SIZE,
+		(AGC_LEVEL_CONFIG + 1) * sizeof(uint32_t));
+	kgsl_regwrite(device, AGC_INIT_MSG_MAGIC, AGC_INIT_MSG_VALUE);
+
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE,
+		(0x80000000 | device->pwrctrl.active_pwrlevel));
+
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_PWR_THRESHOLD,
+		PWR_THRESHOLD_VALID | lm_limit(adreno_dev));
+
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK,
+		VOLTAGE_INTR_EN);
+}
+
+
+static void a5xx_lm_enable(struct adreno_device *adreno_dev)
+{
+	if (adreno_is_a530(adreno_dev))
+		a530_lm_enable(adreno_dev);
+}
+
+static void a5xx_lm_init(struct adreno_device *adreno_dev)
+{
+	if (adreno_is_a530(adreno_dev))
+		a530_lm_init(adreno_dev);
+	else if (adreno_is_a540(adreno_dev))
+		a540_lm_init(adreno_dev);
+}
+
+static int gpmu_set_level(struct adreno_device *adreno_dev, unsigned int val)
+{
+	unsigned int reg;
+	int retry = 100;
+
+	kgsl_regwrite(KGSL_DEVICE(adreno_dev), A5XX_GPMU_GPMU_VOLTAGE, val);
+
+	do {
+		kgsl_regread(KGSL_DEVICE(adreno_dev), A5XX_GPMU_GPMU_VOLTAGE,
+			&reg);
+	} while ((reg & 0x80000000) && retry--);
+
+	return (reg & 0x80000000) ? -ETIMEDOUT : 0;
+}
+
+/*
+ * a5xx_pwrlevel_change_settings() - Program the hardware during power level
+ * transitions
+ * @adreno_dev: The adreno device pointer
+ * @prelevel: The previous power level
+ * @postlevel: The new power level
+ * @post: True if called after the clock change has taken effect
+ */
+static void a5xx_pwrlevel_change_settings(struct adreno_device *adreno_dev,
+				unsigned int prelevel, unsigned int postlevel,
+				bool post)
+{
+	/*
+	 * On pre A540 HW only call through if LMx is supported and enabled, and
+	 * always call through for a540
+	 */
+	if (!adreno_is_a540(adreno_dev) && !adreno_dev->lm_enabled)
+		return;
+
+	if (!post) {
+		if (gpmu_set_level(adreno_dev, (0x80000010 | postlevel)))
+			dev_err(KGSL_DEVICE(adreno_dev)->dev,
+				"GPMU pre powerlevel did not stabilize\n");
+	} else {
+		if (gpmu_set_level(adreno_dev, (0x80000000 | postlevel)))
+			dev_err(KGSL_DEVICE(adreno_dev)->dev,
+				"GPMU post powerlevel did not stabilize\n");
+	}
+}
+
+/* FW driven idle 10% throttle */
+#define IDLE_10PCT 0
+/* number of cycles when clock is throttled by 50% (CRC) */
+#define CRC_50PCT  1
+/* number of cycles when clock is throttled by more than 50% (CRC) */
+#define CRC_MORE50PCT 2
+/* number of cycles when clock is throttle by less than 50% (CRC) */
+#define CRC_LESS50PCT 3
+
+static int64_t a5xx_read_throttling_counters(struct adreno_device *adreno_dev)
+{
+	int i;
+	int64_t adj;
+	uint32_t th[ADRENO_GPMU_THROTTLE_COUNTERS];
+	struct adreno_busy_data *busy = &adreno_dev->busy_data;
+
+	if (!adreno_dev->throttling_enabled)
+		return 0;
+
+	for (i = 0; i < ADRENO_GPMU_THROTTLE_COUNTERS; i++) {
+		if (!adreno_dev->gpmu_throttle_counters[i])
+			return 0;
+
+		th[i] = counter_delta(KGSL_DEVICE(adreno_dev),
+				adreno_dev->gpmu_throttle_counters[i],
+				&busy->throttle_cycles[i]);
+	}
+	adj = th[CRC_MORE50PCT] - th[IDLE_10PCT];
+	adj = th[CRC_50PCT] + th[CRC_LESS50PCT] / 3 + (adj < 0 ? 0 : adj) * 3;
+
+	trace_kgsl_clock_throttling(
+		th[IDLE_10PCT], th[CRC_50PCT],
+		th[CRC_MORE50PCT], th[CRC_LESS50PCT],
+		adj);
+	return adj;
+}
+
+/*
+ * a5xx_gpmu_reset() - Re-enable GPMU based power features and restart GPMU
+ * @work: Pointer to the work struct for gpmu reset
+ *
+ * Load the GPMU microcode, set up any features such as hardware clock gating
+ * or IFPC, and take the GPMU out of reset.
+ */
+static void a5xx_gpmu_reset(struct work_struct *work)
+{
+	struct adreno_device *adreno_dev = container_of(work,
+			struct adreno_device, gpmu_work);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (test_bit(ADRENO_DEVICE_GPMU_INITIALIZED, &adreno_dev->priv))
+		return;
+
+	/*
+	 * If GPMU has already experienced a restart or is in the process of it
+	 * after the watchdog timeout, then there is no need to reset GPMU
+	 * again.
+	 */
+	if (device->state != KGSL_STATE_AWARE && device->state != KGSL_STATE_ACTIVE)
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (a5xx_regulator_enable(adreno_dev))
+		goto out;
+
+	/* Soft reset of the GPMU block */
+	kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, BIT(16));
+
+	/* GPU comes up in secured mode, make it unsecured by default */
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION))
+		kgsl_regwrite(device, A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
+
+
+	a5xx_gpmu_init(adreno_dev);
+
+out:
+	mutex_unlock(&device->mutex);
+}
+
+static void _setup_throttling_counters(struct adreno_device *adreno_dev)
+{
+	int i, ret = 0;
+
+	if (!adreno_is_a540(adreno_dev))
+		return;
+
+	for (i = 0; i < ADRENO_GPMU_THROTTLE_COUNTERS; i++) {
+		/* reset throttled cycles ivalue */
+		adreno_dev->busy_data.throttle_cycles[i] = 0;
+
+		/* Throttle countables start at off set 43 */
+		ret |= adreno_perfcounter_kernel_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_GPMU_PWR, 43 + i,
+			&adreno_dev->gpmu_throttle_counters[i], NULL);
+	}
+
+	WARN_ONCE(ret, "Unable to get one or more clock throttling registers\n");
+}
+
+/*
+ * a5xx_start() - Device start
+ * @adreno_dev: Pointer to adreno device
+ *
+ * a5xx device start
+ */
+static int a5xx_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev);
+	unsigned int bit;
+	int ret;
+
+	ret = kgsl_mmu_start(device);
+	if (ret)
+		return ret;
+
+	adreno_get_bus_counters(adreno_dev);
+	adreno_perfcounter_restore(adreno_dev);
+
+	if (adreno_is_a530(adreno_dev) &&
+			ADRENO_FEATURE(adreno_dev, ADRENO_LM))
+		adreno_perfcounter_kernel_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_GPMU_PWR, 27,
+			&adreno_dev->lm_threshold_count, NULL);
+
+	/* Enable 64 bit addressing */
+	kgsl_regwrite(device, A5XX_CP_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_VSC_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_RB_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_PC_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_VFD_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_VPC_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_SP_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
+
+	_setup_throttling_counters(adreno_dev);
+
+	/* Set up VBIF registers from the GPU core definition */
+	kgsl_regmap_multi_write(&device->regmap, a5xx_core->vbif,
+		a5xx_core->vbif_count);
+
+	/* Make all blocks contribute to the GPU BUSY perf counter */
+	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
+
+	/* Program RBBM counter 0 to report GPU busy for frequency scaling */
+	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
+
+	/*
+	 * Enable the RBBM error reporting bits.  This lets us get
+	 * useful information on failure
+	 */
+	kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL0, 0x00000001);
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_FAULT_DETECT_MASK)) {
+		/*
+		 * We have 4 RB units, and only RB0 activity signals are
+		 * working correctly. Mask out RB1-3 activity signals
+		 * from the HW hang detection logic as per
+		 * recommendation of hardware team.
+		 */
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
+				0xF0000000);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
+				0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
+				0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
+				0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
+				0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
+				0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
+				0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
+				0xFFFFFFFF);
+	}
+
+	/*
+	 * Set hang detection threshold to 4 million cycles
+	 * (0x3FFFF*16)
+	 */
+	kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
+				  (1 << 30) | 0x3FFFF);
+
+	/* Turn on performance counters */
+	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_CNTL, 0x01);
+
+	/*
+	 * This is to increase performance by restricting VFD's cache access,
+	 * so that LRZ and other data get evicted less.
+	 */
+	kgsl_regwrite(device, A5XX_UCHE_CACHE_WAYS, 0x02);
+
+	/*
+	 * Set UCHE_WRITE_THRU_BASE to the UCHE_TRAP_BASE effectively
+	 * disabling L2 bypass
+	 */
+	kgsl_regwrite(device, A5XX_UCHE_TRAP_BASE_LO, 0xffff0000);
+	kgsl_regwrite(device, A5XX_UCHE_TRAP_BASE_HI, 0x0001ffff);
+	kgsl_regwrite(device, A5XX_UCHE_WRITE_THRU_BASE_LO, 0xffff0000);
+	kgsl_regwrite(device, A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff);
+
+	/* Program the GMEM VA range for the UCHE path */
+	kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MIN_LO,
+			adreno_dev->uche_gmem_base);
+	kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x0);
+	kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MAX_LO,
+			adreno_dev->uche_gmem_base +
+			adreno_dev->gpucore->gmem_size - 1);
+	kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x0);
+
+	/*
+	 * Below CP registers are 0x0 by default, program init
+	 * values based on a5xx flavor.
+	 */
+	if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev)) {
+		kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x20);
+		kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x400);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
+	} else if (adreno_is_a510(adreno_dev)) {
+		kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x20);
+		kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x20);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
+	} else if (adreno_is_a540(adreno_dev) || adreno_is_a512(adreno_dev)) {
+		kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x40);
+		kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x400);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
+	} else {
+		kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x40);
+		kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x40);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
+	}
+
+	/*
+	 * vtxFifo and primFifo thresholds default values
+	 * are different.
+	 */
+	if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev))
+		kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL,
+						(0x100 << 11 | 0x100 << 22));
+	else if (adreno_is_a510(adreno_dev) || adreno_is_a512(adreno_dev))
+		kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL,
+						(0x200 << 11 | 0x200 << 22));
+	else
+		kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL,
+						(0x400 << 11 | 0x300 << 22));
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_TWO_PASS_USE_WFI)) {
+		/*
+		 * Set TWOPASSUSEWFI in A5XX_PC_DBG_ECO_CNTL for
+		 * microcodes after v77
+		 */
+		if ((adreno_compare_pfp_version(adreno_dev, 0x5FF077) >= 0))
+			kgsl_regrmw(device, A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
+	}
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_DISABLE_RB_DP2CLOCKGATING)) {
+		/*
+		 * Disable RB sampler datapath DP2 clock gating
+		 * optimization for 1-SP GPU's, by default it is enabled.
+		 */
+		kgsl_regrmw(device, A5XX_RB_DBG_ECO_CNT, 0, (1 << 9));
+	}
+	/*
+	 * Disable UCHE global filter as SP can invalidate/flush
+	 * independently
+	 */
+	kgsl_regwrite(device, A5XX_UCHE_MODE_CNTL, BIT(29));
+	/* Set the USE_RETENTION_FLOPS chicken bit */
+	kgsl_regwrite(device, A5XX_CP_CHICKEN_DBG, 0x02000000);
+
+	/* Enable ISDB mode if requested */
+	if (test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv)) {
+		if (!adreno_active_count_get(adreno_dev)) {
+			/*
+			 * Disable ME/PFP split timeouts when the debugger is
+			 * enabled because the CP doesn't know when a shader is
+			 * in active debug
+			 */
+			kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL1, 0x06FFFFFF);
+
+			/* Force the SP0/SP1 clocks on to enable ISDB */
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP0, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP1, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP2, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP3, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP0, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP1, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP2, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP3, 0x0);
+
+			/* disable HWCG */
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_ISDB_CNT, 0x0);
+		} else
+			dev_err(device->dev,
+				"Active count failed while turning on ISDB\n");
+	} else {
+		/* if not in ISDB mode enable ME/PFP split notification */
+		kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
+	}
+
+	kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL2, 0x0000003F);
+	bit = adreno_dev->highest_bank_bit ?
+		(adreno_dev->highest_bank_bit - 13) & 0x03 : 0;
+	/*
+	 * Program the highest DDR bank bit that was passed in
+	 * from the DT in a handful of registers. Some of these
+	 * registers will also be written by the UMD, but we
+	 * want to program them in case we happen to use the
+	 * UCHE before the UMD does
+	 */
+
+	kgsl_regwrite(device, A5XX_TPL1_MODE_CNTL, bit << 7);
+	kgsl_regwrite(device, A5XX_RB_MODE_CNTL, bit << 1);
+	if (adreno_is_a540(adreno_dev) || adreno_is_a512(adreno_dev))
+		kgsl_regwrite(device, A5XX_UCHE_DBG_ECO_CNTL_2, bit);
+
+	/* Disable All flat shading optimization */
+	kgsl_regrmw(device, A5XX_VPC_DBG_ECO_CNTL, 0, 0x1 << 10);
+
+	/*
+	 * VPC corner case with local memory load kill leads to corrupt
+	 * internal state. Normal Disable does not work for all a5x chips.
+	 * So do the following setting to disable it.
+	 */
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_DISABLE_LMLOADKILL)) {
+		kgsl_regrmw(device, A5XX_VPC_DBG_ECO_CNTL, 0, 0x1 << 23);
+		kgsl_regrmw(device, A5XX_HLSQ_DBG_ECO_CNTL, 0x1 << 18, 0);
+	}
+
+	if (device->mmu.secured) {
+		kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_CNTL, 0x0);
+		kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
+			lower_32_bits(KGSL_IOMMU_SECURE_BASE32));
+		kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI,
+			upper_32_bits(KGSL_IOMMU_SECURE_BASE32));
+		kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE,
+			FIELD_PREP(GENMASK(31, 12),
+			(KGSL_IOMMU_SECURE_SIZE(&device->mmu) / SZ_4K)));
+	}
+
+	a5xx_preemption_start(adreno_dev);
+	a5xx_protect_init(adreno_dev);
+
+	return 0;
+}
+
+/*
+ * Follow the ME_INIT sequence with a preemption yield to allow the GPU to move
+ * to a different ringbuffer, if desired
+ */
+static int _preemption_init(
+			struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb, unsigned int *cmds,
+			struct kgsl_context *context)
+{
+	unsigned int *cmds_orig = cmds;
+	uint64_t gpuaddr = rb->preemption_desc->gpuaddr;
+
+	/* Turn CP protection OFF */
+	cmds += cp_protected_mode(adreno_dev, cmds, 0);
+	/*
+	 * CP during context switch will save context switch info to
+	 * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR
+	 */
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1);
+	*cmds++ = lower_32_bits(gpuaddr);
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1);
+	*cmds++ = upper_32_bits(gpuaddr);
+
+	/* Turn CP protection ON */
+	cmds += cp_protected_mode(adreno_dev, cmds, 1);
+
+	*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1);
+	*cmds++ = 0;
+
+	*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1);
+	*cmds++ = 1;
+
+	/* Enable yield in RB only */
+	*cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1);
+	*cmds++ = 1;
+
+	*cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0x0);
+	*cmds++ = 0;
+	/* generate interrupt on preemption completion */
+	*cmds++ = 1;
+
+	return cmds - cmds_orig;
+}
+
+static int a5xx_post_start(struct adreno_device *adreno_dev)
+{
+	int ret;
+	unsigned int *cmds, *start;
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+
+	if (!adreno_is_a530(adreno_dev) &&
+		!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 42);
+	if (IS_ERR(cmds)) {
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		dev_err(device->dev,
+			     "error allocating preemtion init cmds\n");
+		return PTR_ERR(cmds);
+	}
+	start = cmds;
+
+	/*
+	 * Send a pipeline stat event whenever the GPU gets powered up
+	 * to cause misbehaving perf counters to start ticking
+	 */
+	if (adreno_is_a530(adreno_dev)) {
+		*cmds++ = cp_packet(adreno_dev, CP_EVENT_WRITE, 1);
+		*cmds++ = 0xF;
+	}
+
+	if (adreno_is_preemption_enabled(adreno_dev)) {
+		cmds += _preemption_init(adreno_dev, rb, cmds, NULL);
+		rb->_wptr = rb->_wptr - (42 - (cmds - start));
+		ret = a5xx_ringbuffer_submit(rb, NULL, false);
+	} else {
+		rb->_wptr = rb->_wptr - (42 - (cmds - start));
+		ret = a5xx_ringbuffer_submit(rb, NULL, true);
+	}
+
+	if (!ret) {
+		ret = adreno_spin_idle(adreno_dev, 2000);
+		if (ret)
+			a5xx_spin_idle_debug(adreno_dev,
+				"hw initialization failed to idle\n");
+	}
+
+	return ret;
+}
+
+static int a5xx_gpmu_init(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	/* Set up LM before initializing the GPMU */
+	a5xx_lm_init(adreno_dev);
+
+	/* Enable SPTP based power collapse before enabling GPMU */
+	a5xx_enable_pc(adreno_dev);
+
+	ret = a5xx_gpmu_start(adreno_dev);
+	if (ret)
+		return ret;
+
+	/* Enable limits management */
+	a5xx_lm_enable(adreno_dev);
+	return 0;
+}
+
+static int a5xx_zap_shader_resume(struct kgsl_device *device)
+{
+	int ret = qcom_scm_set_remote_state(0, 13);
+
+	if (ret)
+		dev_err(device->dev,
+			"SCM zap resume call failed: %d\n", ret);
+
+	return ret;
+}
+
+/*
+ * a5xx_microcode_load() - Load microcode
+ * @adreno_dev: Pointer to adreno device
+ */
+static int a5xx_microcode_load(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_firmware *pm4_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4);
+	struct adreno_firmware *pfp_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP);
+	const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev);
+	uint64_t gpuaddr;
+
+	gpuaddr = pm4_fw->memdesc->gpuaddr;
+	kgsl_regwrite(device, A5XX_CP_PM4_INSTR_BASE_LO,
+				lower_32_bits(gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_PM4_INSTR_BASE_HI,
+				upper_32_bits(gpuaddr));
+
+	gpuaddr = pfp_fw->memdesc->gpuaddr;
+	kgsl_regwrite(device, A5XX_CP_PFP_INSTR_BASE_LO,
+				lower_32_bits(gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_PFP_INSTR_BASE_HI,
+				upper_32_bits(gpuaddr));
+
+	/*
+	 * Do not invoke to load zap shader if MMU does
+	 * not support secure mode.
+	 */
+	if (!device->mmu.secured)
+		return 0;
+
+	if (adreno_dev->zap_loaded && !(ADRENO_FEATURE(adreno_dev,
+		ADRENO_CPZ_RETENTION)))
+		return a5xx_zap_shader_resume(device);
+
+	return adreno_zap_shader_load(adreno_dev, a5xx_core->zap_name);
+}
+
+static int _me_init_ucode_workarounds(struct adreno_device *adreno_dev)
+{
+	switch (ADRENO_GPUREV(adreno_dev)) {
+	case ADRENO_REV_A510:
+		return 0x00000001; /* Ucode workaround for token end syncs */
+	case ADRENO_REV_A505:
+	case ADRENO_REV_A506:
+	case ADRENO_REV_A530:
+		/*
+		 * Ucode workarounds for token end syncs,
+		 * WFI after every direct-render 3D mode draw and
+		 * WFI after every 2D Mode 3 draw.
+		 */
+		return 0x0000000B;
+	default:
+		return 0x00000000; /* No ucode workarounds enabled */
+	}
+}
+
+/*
+ * CP_INIT_MAX_CONTEXT bit tells if the multiple hardware contexts can
+ * be used at once of if they should be serialized
+ */
+#define CP_INIT_MAX_CONTEXT BIT(0)
+
+/* Enables register protection mode */
+#define CP_INIT_ERROR_DETECTION_CONTROL BIT(1)
+
+/* Header dump information */
+#define CP_INIT_HEADER_DUMP BIT(2) /* Reserved */
+
+/* Default Reset states enabled for PFP and ME */
+#define CP_INIT_DEFAULT_RESET_STATE BIT(3)
+
+/* Drawcall filter range */
+#define CP_INIT_DRAWCALL_FILTER_RANGE BIT(4)
+
+/* Ucode workaround masks */
+#define CP_INIT_UCODE_WORKAROUND_MASK BIT(5)
+
+#define CP_INIT_MASK (CP_INIT_MAX_CONTEXT | \
+		CP_INIT_ERROR_DETECTION_CONTROL | \
+		CP_INIT_HEADER_DUMP | \
+		CP_INIT_DEFAULT_RESET_STATE | \
+		CP_INIT_UCODE_WORKAROUND_MASK)
+
+static int a5xx_critical_packet_submit(struct adreno_device *adreno_dev,
+					struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds;
+	int ret;
+
+	if (!critical_packet_constructed)
+		return 0;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 4);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, adreno_dev->critpkts->gpuaddr);
+	*cmds++ = crit_pkts_dwords;
+
+	ret = a5xx_ringbuffer_submit(rb, NULL, true);
+	if (!ret) {
+		ret = adreno_spin_idle(adreno_dev, 20);
+		if (ret)
+			a5xx_spin_idle_debug(adreno_dev,
+				"Critical packet submission failed to idle\n");
+	}
+
+	return ret;
+}
+
+/*
+ * a5xx_send_me_init() - Initialize ringbuffer
+ * @adreno_dev: Pointer to adreno device
+ * @rb: Pointer to the ringbuffer of device
+ *
+ * Submit commands for ME initialization,
+ */
+static int a5xx_send_me_init(struct adreno_device *adreno_dev,
+			 struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds;
+	int i = 0, ret;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 9);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	cmds[i++] = cp_type7_packet(CP_ME_INIT, 8);
+
+	/* Enabled ordinal mask */
+	cmds[i++] = CP_INIT_MASK;
+
+	if (CP_INIT_MASK & CP_INIT_MAX_CONTEXT)
+		cmds[i++] = 0x00000003;
+
+	if (CP_INIT_MASK & CP_INIT_ERROR_DETECTION_CONTROL)
+		cmds[i++] = 0x20000000;
+
+	if (CP_INIT_MASK & CP_INIT_HEADER_DUMP) {
+		/* Header dump address */
+		cmds[i++] = 0x00000000;
+		/* Header dump enable and dump size */
+		cmds[i++] = 0x00000000;
+	}
+
+	if (CP_INIT_MASK & CP_INIT_DRAWCALL_FILTER_RANGE) {
+		/* Start range */
+		cmds[i++] = 0x00000000;
+		/* End range (inclusive) */
+		cmds[i++] = 0x00000000;
+	}
+
+	if (CP_INIT_MASK & CP_INIT_UCODE_WORKAROUND_MASK)
+		cmds[i++] = _me_init_ucode_workarounds(adreno_dev);
+
+	ret = a5xx_ringbuffer_submit(rb, NULL, true);
+	if (!ret) {
+		ret = adreno_spin_idle(adreno_dev, 2000);
+		if (ret)
+			a5xx_spin_idle_debug(adreno_dev,
+				"CP initialization failed to idle\n");
+	}
+
+	return ret;
+}
+
+/*
+ * a5xx_rb_start() - Start the ringbuffer
+ * @adreno_dev: Pointer to adreno device
+ */
+static int a5xx_rb_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb;
+	uint64_t addr;
+	unsigned int *cmds;
+	int ret, i;
+
+	/* Clear all the ringbuffers */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE);
+		kgsl_sharedmem_writel(device->scratch,
+			SCRATCH_RB_OFFSET(rb->id, rptr), 0);
+
+		rb->wptr = 0;
+		rb->_wptr = 0;
+		rb->wptr_preempt_end = ~0;
+	}
+
+	/* Set up the current ringbuffer */
+	rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+	addr = SCRATCH_RB_GPU_ADDR(device, rb->id, rptr);
+
+	kgsl_regwrite(device, A5XX_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr));
+	kgsl_regwrite(device, A5XX_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr));
+
+	/*
+	 * The size of the ringbuffer in the hardware is the log2
+	 * representation of the size in quadwords (sizedwords / 2).
+	 * Also disable the host RPTR shadow register as it might be unreliable
+	 * in certain circumstances.
+	 */
+
+	kgsl_regwrite(device, A5XX_CP_RB_CNTL,
+		A5XX_CP_RB_CNTL_DEFAULT);
+
+	kgsl_regwrite(device, A5XX_CP_RB_BASE,
+		lower_32_bits(rb->buffer_desc->gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_RB_BASE_HI,
+		upper_32_bits(rb->buffer_desc->gpuaddr));
+
+	ret = a5xx_microcode_load(adreno_dev);
+	if (ret)
+		return ret;
+
+	/* clear ME_HALT to start micro engine */
+
+	kgsl_regwrite(device, A5XX_CP_ME_CNTL, 0);
+
+	ret = a5xx_send_me_init(adreno_dev, rb);
+	if (ret)
+		return ret;
+
+	/* Run the critical packets if we need to */
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CRITICAL_PACKETS)) {
+		ret = a5xx_critical_packet_submit(adreno_dev, rb);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * Try to execute the zap shader if it exists, otherwise just try
+	 * directly writing to the control register
+	 */
+	if (!adreno_dev->zap_loaded)
+		kgsl_regwrite(device, A5XX_RBBM_SECVID_TRUST_CNTL, 0);
+	else {
+		cmds = adreno_ringbuffer_allocspace(rb, 2);
+		if (IS_ERR(cmds))
+			return  PTR_ERR(cmds);
+
+		*cmds++ = cp_packet(adreno_dev, CP_SET_SECURE_MODE, 1);
+		*cmds++ = 0;
+
+		ret = a5xx_ringbuffer_submit(rb, NULL, true);
+		if (!ret) {
+			ret = adreno_spin_idle(adreno_dev, 2000);
+			if (ret) {
+				a5xx_spin_idle_debug(adreno_dev,
+					"Switch to unsecure failed to idle\n");
+				return ret;
+			}
+		}
+	}
+
+	ret = a5xx_gpmu_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	a5xx_post_start(adreno_dev);
+
+	return 0;
+}
+
+/*
+ * a5xx_microcode_read() - Read microcode
+ * @adreno_dev: Pointer to adreno device
+ */
+static int a5xx_microcode_read(struct adreno_device *adreno_dev)
+{
+	int ret;
+	struct adreno_firmware *pm4_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4);
+	struct adreno_firmware *pfp_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP);
+	const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev);
+
+	ret = adreno_get_firmware(adreno_dev, a5xx_core->pm4fw_name, pm4_fw);
+	if (ret)
+		return ret;
+
+	ret = adreno_get_firmware(adreno_dev, a5xx_core->pfpfw_name, pfp_fw);
+	if (ret)
+		return ret;
+
+	ret = _load_gpmu_firmware(adreno_dev);
+	if (ret)
+		return ret;
+
+	_load_regfile(adreno_dev);
+
+	return ret;
+}
+
+/* Register offset defines for A5XX, in order of enum adreno_regs */
+static unsigned int a5xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A5XX_CP_RB_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, A5XX_CP_RB_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_LO,
+			A5XX_CP_RB_RPTR_ADDR_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_HI,
+			A5XX_CP_RB_RPTR_ADDR_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A5XX_CP_RB_RPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A5XX_CP_RB_WPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A5XX_CP_ME_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A5XX_CP_RB_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A5XX_CP_IB1_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, A5XX_CP_IB1_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A5XX_CP_IB1_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A5XX_CP_IB2_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, A5XX_CP_IB2_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A5XX_CP_IB2_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A5XX_CP_PROTECT_REG_0),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A5XX_CP_CONTEXT_SWITCH_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DEBUG, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DISABLE, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+				A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+				A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A5XX_RBBM_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, A5XX_RBBM_STATUS3),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A5XX_RBBM_INT_0_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A5XX_RBBM_CLOCK_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A5XX_RBBM_SW_RESET_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_GPMU_POWER_COUNTER_ENABLE,
+				A5XX_GPMU_POWER_COUNTER_ENABLE),
+};
+
+static void a5xx_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status1, status2;
+
+	kgsl_regread(device, A5XX_CP_INTERRUPT_STATUS, &status1);
+
+	if (status1 & BIT(A5XX_CP_OPCODE_ERROR)) {
+		unsigned int val;
+
+		kgsl_regwrite(device, A5XX_CP_PFP_STAT_ADDR, 0);
+
+		/*
+		 * A5XX_CP_PFP_STAT_DATA is indexed, so read it twice to get the
+		 * value we want
+		 */
+		kgsl_regread(device, A5XX_CP_PFP_STAT_DATA, &val);
+		kgsl_regread(device, A5XX_CP_PFP_STAT_DATA, &val);
+
+		dev_crit_ratelimited(device->dev,
+					"ringbuffer opcode error | possible opcode=0x%8.8X\n",
+					val);
+	}
+	if (status1 & BIT(A5XX_CP_RESERVED_BIT_ERROR))
+		dev_crit_ratelimited(device->dev,
+					"ringbuffer reserved bit error interrupt\n");
+	if (status1 & BIT(A5XX_CP_HW_FAULT_ERROR)) {
+		kgsl_regread(device, A5XX_CP_HW_FAULT, &status2);
+		dev_crit_ratelimited(device->dev,
+					"CP | Ringbuffer HW fault | status=%x\n",
+					status2);
+	}
+	if (status1 & BIT(A5XX_CP_DMA_ERROR))
+		dev_crit_ratelimited(device->dev, "CP | DMA error\n");
+	if (status1 & BIT(A5XX_CP_REGISTER_PROTECTION_ERROR)) {
+		kgsl_regread(device, A5XX_CP_PROTECT_STATUS, &status2);
+		dev_crit_ratelimited(device->dev,
+					"CP | Protected mode error| %s | addr=%x | status=%x\n",
+					status2 & (1 << 24) ? "WRITE" : "READ",
+					(status2 & 0xFFFFF) >> 2, status2);
+	}
+	if (status1 & BIT(A5XX_CP_AHB_ERROR)) {
+		kgsl_regread(device, A5XX_CP_AHB_FAULT, &status2);
+		dev_crit_ratelimited(device->dev,
+					"ringbuffer AHB error interrupt | status=%x\n",
+					status2);
+	}
+}
+
+static void a5xx_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	switch (bit) {
+	case A5XX_INT_RBBM_AHB_ERROR: {
+		kgsl_regread(device, A5XX_RBBM_AHB_ERROR_STATUS, &reg);
+
+		/*
+		 * Return the word address of the erroring register so that it
+		 * matches the register specification
+		 */
+		dev_crit_ratelimited(device->dev,
+					"RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
+					reg & (1 << 28) ? "WRITE" : "READ",
+					(reg & 0xFFFFF) >> 2,
+					(reg >> 20) & 0x3,
+					(reg >> 24) & 0xF);
+
+		/* Clear the error */
+		kgsl_regwrite(device, A5XX_RBBM_AHB_CMD, (1 << 4));
+		break;
+	}
+	case A5XX_INT_RBBM_TRANSFER_TIMEOUT:
+		dev_crit_ratelimited(device->dev,
+					"RBBM: AHB transfer timeout\n");
+		break;
+	case A5XX_INT_RBBM_ME_MS_TIMEOUT:
+		kgsl_regread(device, A5XX_RBBM_AHB_ME_SPLIT_STATUS, &reg);
+		dev_crit_ratelimited(device->dev,
+					"RBBM | ME master split timeout | status=%x\n",
+					reg);
+		break;
+	case A5XX_INT_RBBM_PFP_MS_TIMEOUT:
+		kgsl_regread(device, A5XX_RBBM_AHB_PFP_SPLIT_STATUS, &reg);
+		dev_crit_ratelimited(device->dev,
+					"RBBM | PFP master split timeout | status=%x\n",
+					reg);
+		break;
+	case A5XX_INT_RBBM_ETS_MS_TIMEOUT:
+		dev_crit_ratelimited(device->dev,
+					"RBBM: ME master split timeout\n");
+		break;
+	case A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW:
+		dev_crit_ratelimited(device->dev,
+					"RBBM: ATB ASYNC overflow\n");
+		break;
+	case A5XX_INT_RBBM_ATB_BUS_OVERFLOW:
+		dev_crit_ratelimited(device->dev,
+					"RBBM: ATB bus overflow\n");
+		break;
+	case A5XX_INT_UCHE_OOB_ACCESS:
+		dev_crit_ratelimited(device->dev,
+					"UCHE: Out of bounds access\n");
+		break;
+	case A5XX_INT_UCHE_TRAP_INTR:
+		dev_crit_ratelimited(device->dev, "UCHE: Trap interrupt\n");
+		break;
+	case A5XX_INT_GPMU_VOLTAGE_DROOP:
+		dev_crit_ratelimited(device->dev, "GPMU: Voltage droop\n");
+		break;
+	default:
+		dev_crit_ratelimited(device->dev, "Unknown interrupt %d\n",
+					bit);
+	}
+}
+
+static void a5xx_irq_storm_worker(struct work_struct *work)
+{
+	struct adreno_device *adreno_dev = container_of(work,
+			struct adreno_device, irq_storm_work);
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int status;
+
+	mutex_lock(&device->mutex);
+
+	/* Wait for the storm to clear up */
+	do {
+		kgsl_regwrite(device, A5XX_RBBM_INT_CLEAR_CMD,
+				BIT(A5XX_INT_CP_CACHE_FLUSH_TS));
+		kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &status);
+	} while (status & BIT(A5XX_INT_CP_CACHE_FLUSH_TS));
+
+	/* Re-enable the interrupt bit in the mask */
+	adreno_dev->irq_mask |= BIT(A5XX_INT_CP_CACHE_FLUSH_TS);
+	kgsl_regwrite(device, A5XX_RBBM_INT_0_MASK, adreno_dev->irq_mask);
+	clear_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED, &adreno_dev->priv);
+
+	dev_warn(device->dev, "Re-enabled A5XX_INT_CP_CACHE_FLUSH_TS\n");
+	mutex_unlock(&device->mutex);
+
+	/* Reschedule just to make sure everything retires */
+	adreno_dispatcher_schedule(device);
+}
+
+static void a5xx_cp_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int cur;
+	static unsigned int count;
+	static unsigned int prev;
+
+	if (test_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED, &adreno_dev->priv))
+		return;
+
+	kgsl_sharedmem_readl(device->memstore, &cur,
+			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+				ref_wait_ts));
+
+	/*
+	 * prev holds a previously read value
+	 * from memory.  It should be changed by the GPU with every
+	 * interrupt. If the value we know about and the value we just
+	 * read are the same, then we are likely in a storm.
+	 * If this happens twice, disable the interrupt in the mask
+	 * so the dispatcher can take care of the issue. It is then
+	 * up to the dispatcher to re-enable the mask once all work
+	 * is done and the storm has ended.
+	 */
+	if (prev == cur) {
+		count++;
+		if (count == 2) {
+			/* disable interrupt from the mask */
+			set_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED,
+					&adreno_dev->priv);
+
+			adreno_dev->irq_mask &=
+				~BIT(A5XX_INT_CP_CACHE_FLUSH_TS);
+
+			kgsl_regwrite(device, A5XX_RBBM_INT_0_MASK,
+				adreno_dev->irq_mask);
+
+			kgsl_schedule_work(&adreno_dev->irq_storm_work);
+
+			return;
+		}
+	} else {
+		count = 0;
+		prev = cur;
+	}
+
+	a5xx_preemption_trigger(adreno_dev);
+	adreno_dispatcher_schedule(device);
+}
+
+static const char *gpmu_int_msg[32] = {
+	[FW_INTR_INFO] = "FW_INTR_INFO",
+	[LLM_ACK_ERR_INTR] = "LLM_ACK_ERR_INTR",
+	[ISENS_TRIM_ERR_INTR] = "ISENS_TRIM_ERR_INTR",
+	[ISENS_ERR_INTR] = "ISENS_ERR_INTR",
+	[ISENS_IDLE_ERR_INTR] = "ISENS_IDLE_ERR_INTR",
+	[ISENS_PWR_ON_ERR_INTR] = "ISENS_PWR_ON_ERR_INTR",
+	[6 ... 30] = "",
+	[WDOG_EXPITED] = "WDOG_EXPITED"};
+
+static void a5xx_gpmu_int_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg, i;
+
+	kgsl_regread(device, A5XX_GPMU_RBBM_INTR_INFO, &reg);
+
+	if (reg & (~VALID_GPMU_IRQ)) {
+		dev_crit_ratelimited(device->dev,
+					"GPMU: Unknown IRQ mask 0x%08lx in 0x%08x\n",
+					reg & (~VALID_GPMU_IRQ), reg);
+	}
+
+	for (i = 0; i < 32; i++)
+		switch (reg & BIT(i)) {
+		case BIT(WDOG_EXPITED):
+			if (test_and_clear_bit(ADRENO_DEVICE_GPMU_INITIALIZED,
+				&adreno_dev->priv)) {
+				/* Stop GPMU */
+				kgsl_regwrite(device,
+					A5XX_GPMU_CM3_SYSRESET, 1);
+				kgsl_schedule_work(&adreno_dev->gpmu_work);
+			}
+			fallthrough;
+		case BIT(FW_INTR_INFO):
+			fallthrough;
+		case BIT(LLM_ACK_ERR_INTR):
+			fallthrough;
+		case BIT(ISENS_TRIM_ERR_INTR):
+			fallthrough;
+		case BIT(ISENS_ERR_INTR):
+			fallthrough;
+		case BIT(ISENS_IDLE_ERR_INTR):
+			fallthrough;
+		case BIT(ISENS_PWR_ON_ERR_INTR):
+			dev_crit_ratelimited(device->dev,
+						"GPMU: interrupt %s(%08lx)\n",
+						gpmu_int_msg[i],
+						BIT(i));
+			break;
+	}
+}
+
+/*
+ * a5x_gpc_err_int_callback() - Isr for GPC error interrupts
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+static void a5x_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/*
+	 * GPC error is typically the result of mistake SW programming.
+	 * Force GPU fault for this interrupt so that we can debug it
+	 * with help of register dump.
+	 */
+
+	dev_crit(device->dev, "RBBM: GPC error\n");
+	adreno_irqctrl(adreno_dev, 0);
+
+	/* Trigger a fault in the dispatcher - this will effect a restart */
+	adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT);
+	adreno_dispatcher_schedule(device);
+}
+
+u64 a5xx_read_alwayson(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 lo = 0, hi = 0;
+
+	kgsl_regread(device, A5XX_RBBM_ALWAYSON_COUNTER_LO, &lo);
+
+	/* The upper 32 bits are only reliable on A540 targets */
+	if (adreno_is_a540(adreno_dev))
+		kgsl_regread(device, A5XX_RBBM_ALWAYSON_COUNTER_HI, &hi);
+
+	return (((u64) hi) << 32) | lo;
+}
+
+
+static const struct adreno_irq_funcs a5xx_irq_funcs[32] = {
+	ADRENO_IRQ_CALLBACK(NULL),              /* 0 - RBBM_GPU_IDLE */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 1 - RBBM_AHB_ERROR */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 2 - RBBM_TRANSFER_TIMEOUT */
+	/* 3 - RBBM_ME_MASTER_SPLIT_TIMEOUT  */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback),
+	/* 4 - RBBM_PFP_MASTER_SPLIT_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback),
+	 /* 5 - RBBM_ETS_MASTER_SPLIT_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback),
+	/* 6 - RBBM_ATB_ASYNC_OVERFLOW */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback),
+	ADRENO_IRQ_CALLBACK(a5x_gpc_err_int_callback), /* 7 - GPC_ERR */
+	ADRENO_IRQ_CALLBACK(a5xx_preempt_callback),/* 8 - CP_SW */
+	ADRENO_IRQ_CALLBACK(a5xx_cp_hw_err_callback), /* 9 - CP_HW_ERROR */
+	/* 10 - CP_CCU_FLUSH_DEPTH_TS */
+	ADRENO_IRQ_CALLBACK(NULL),
+	 /* 11 - CP_CCU_FLUSH_COLOR_TS */
+	ADRENO_IRQ_CALLBACK(NULL),
+	 /* 12 - CP_CCU_RESOLVE_TS */
+	ADRENO_IRQ_CALLBACK(NULL),
+	ADRENO_IRQ_CALLBACK(NULL), /* 13 - CP_IB2_INT */
+	ADRENO_IRQ_CALLBACK(NULL), /* 14 - CP_IB1_INT */
+	ADRENO_IRQ_CALLBACK(NULL), /* 15 - CP_RB_INT */
+	/* 16 - CCP_UNUSED_1 */
+	ADRENO_IRQ_CALLBACK(NULL),
+	ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_WT_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNKNOWN_1 */
+	ADRENO_IRQ_CALLBACK(a5xx_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */
+	/* 21 - UNUSED_2 */
+	ADRENO_IRQ_CALLBACK(NULL),
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */
+	/* 23 - MISC_HANG_DETECT */
+	ADRENO_IRQ_CALLBACK(adreno_hang_int_callback),
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 24 - UCHE_OOB_ACCESS */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 25 - UCHE_TRAP_INTR */
+	ADRENO_IRQ_CALLBACK(NULL), /* 26 - DEBBUS_INTR_0 */
+	ADRENO_IRQ_CALLBACK(NULL), /* 27 - DEBBUS_INTR_1 */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 28 - GPMU_VOLTAGE_DROOP */
+	ADRENO_IRQ_CALLBACK(a5xx_gpmu_int_callback), /* 29 - GPMU_FIRMWARE */
+	ADRENO_IRQ_CALLBACK(NULL), /* 30 - ISDB_CPU_IRQ */
+	ADRENO_IRQ_CALLBACK(NULL), /* 31 - ISDB_UNDER_DEBUG */
+};
+
+static irqreturn_t a5xx_irq_handler(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	irqreturn_t ret;
+	u32 status;
+
+	kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &status);
+
+	/*
+	 * Clear all the interrupt bits except A5XX_INT_RBBM_AHB_ERROR.
+	 * The interrupt will stay asserted until it is cleared by the handler
+	 * so don't touch it yet to avoid a storm
+	 */
+	kgsl_regwrite(device, A5XX_RBBM_INT_CLEAR_CMD,
+		status & ~A5XX_INT_RBBM_AHB_ERROR);
+
+	/* Call the helper function for callbacks */
+	ret = adreno_irq_callbacks(adreno_dev, a5xx_irq_funcs, status);
+
+	trace_kgsl_a5xx_irq_status(adreno_dev, status);
+
+	/* Now chear AHB_ERROR if it was set */
+	if (status & A5XX_INT_RBBM_AHB_ERROR)
+		kgsl_regwrite(device, A5XX_RBBM_INT_CLEAR_CMD,
+			A5XX_INT_RBBM_AHB_ERROR);
+
+	return ret;
+}
+
+static bool a5xx_hw_isidle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status;
+
+	/*
+	 * Due to CRC idle throttling the GPU idle hysteresis on a540 can take
+	 * up to 5uS to expire
+	 */
+	if (adreno_is_a540(adreno_dev))
+		udelay(5);
+
+	kgsl_regread(device, A5XX_RBBM_STATUS, &status);
+
+	if (status & 0xfffffffe)
+		return false;
+
+	kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &status);
+
+	/* Return busy if a interrupt is pending */
+	return !((status & adreno_dev->irq_mask) ||
+		atomic_read(&adreno_dev->pending_irq_refcnt));
+}
+
+static int a5xx_clear_pending_transactions(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 mask = A5XX_VBIF_XIN_HALT_CTRL0_MASK;
+	int ret;
+
+	kgsl_regwrite(device, A5XX_VBIF_XIN_HALT_CTRL0, mask);
+	ret = adreno_wait_for_halt_ack(device, A5XX_VBIF_XIN_HALT_CTRL1, mask);
+	kgsl_regwrite(device, A5XX_VBIF_XIN_HALT_CTRL0, 0);
+
+	return ret;
+}
+
+static bool a5xx_is_hw_collapsible(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	if (!adreno_isidle(adreno_dev))
+		return false;
+
+	/* If feature is not supported or enabled, no worry */
+	if (!adreno_dev->sptp_pc_enabled)
+		return true;
+	kgsl_regread(device, A5XX_GPMU_SP_PWR_CLK_STATUS, &reg);
+	if (reg & BIT(20))
+		return false;
+	kgsl_regread(device, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, &reg);
+	return !(reg & BIT(20));
+}
+
+static void a5xx_remove(struct adreno_device *adreno_dev)
+{
+	if (adreno_preemption_feature_set(adreno_dev))
+		del_timer(&adreno_dev->preempt.timer);
+}
+
+static void a5xx_power_stats(struct adreno_device *adreno_dev,
+		struct kgsl_power_stats *stats)
+{
+	static u32 rbbm0_hi;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	s64 freq = kgsl_pwrctrl_active_freq(&device->pwrctrl) / 1000000;
+	struct adreno_busy_data *busy = &adreno_dev->busy_data;
+	s64 gpu_busy = 0;
+	u32 lo, hi;
+	s64 adj;
+
+	/* Sometimes this counter can go backwards, so try to detect that */
+	kgsl_regread(device, A5XX_RBBM_PERFCTR_RBBM_0_LO, &lo);
+	kgsl_regread(device, A5XX_RBBM_PERFCTR_RBBM_0_HI, &hi);
+
+	if (busy->gpu_busy) {
+		if (lo < busy->gpu_busy) {
+			if (hi == rbbm0_hi) {
+				dev_warn_once(device->dev,
+					"abmormal value from RBBM_0 perfcounter: %x %x\n",
+					lo, busy->gpu_busy);
+				gpu_busy = 0;
+			} else {
+				gpu_busy = (UINT_MAX - busy->gpu_busy) + lo;
+				rbbm0_hi = hi;
+			}
+		} else
+			gpu_busy = lo - busy->gpu_busy;
+	} else {
+		gpu_busy = 0;
+		rbbm0_hi = 0;
+	}
+
+	busy->gpu_busy = lo;
+
+	adj = a5xx_read_throttling_counters(adreno_dev);
+	if (-adj <= gpu_busy)
+		gpu_busy += adj;
+	else
+		gpu_busy = 0;
+
+	stats->busy_time = gpu_busy / freq;
+
+	if (adreno_is_a530(adreno_dev) && adreno_dev->lm_threshold_count)
+		kgsl_regread(device, adreno_dev->lm_threshold_count,
+			&adreno_dev->lm_threshold_cross);
+	else if (adreno_is_a540(adreno_dev))
+		adreno_dev->lm_threshold_cross = adj;
+
+	if (!device->pwrctrl.bus_control)
+		return;
+
+	stats->ram_time = counter_delta(device, adreno_dev->ram_cycles_lo,
+		&busy->bif_ram_cycles);
+
+	stats->ram_wait = counter_delta(device, adreno_dev->starved_ram_lo,
+		&busy->bif_starved_ram);
+}
+
+static int a5xx_setproperty(struct kgsl_device_private *dev_priv,
+		u32 type, void __user *value, u32 sizebytes)
+{
+	struct kgsl_device *device = dev_priv->device;
+	u32 enable;
+
+	if (type != KGSL_PROP_PWRCTRL)
+		return -ENODEV;
+
+	if (sizebytes != sizeof(enable))
+		return -EINVAL;
+
+	if (copy_from_user(&enable, value, sizeof(enable)))
+		return -EFAULT;
+
+	mutex_lock(&device->mutex);
+
+	if (enable) {
+		device->pwrctrl.ctrl_flags = 0;
+		kgsl_pwrscale_enable(device);
+	} else {
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+		device->pwrctrl.ctrl_flags = KGSL_PWR_ON;
+		kgsl_pwrscale_disable(device, true);
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+const struct adreno_gpudev adreno_a5xx_gpudev = {
+	.reg_offsets = a5xx_register_offsets,
+	.probe = a5xx_probe,
+	.start = a5xx_start,
+	.snapshot = a5xx_snapshot,
+	.init = a5xx_init,
+	.irq_handler = a5xx_irq_handler,
+	.rb_start = a5xx_rb_start,
+	.regulator_enable = a5xx_regulator_enable,
+	.regulator_disable = a5xx_regulator_disable,
+	.pwrlevel_change_settings = a5xx_pwrlevel_change_settings,
+	.preemption_schedule = a5xx_preemption_schedule,
+	.read_alwayson = a5xx_read_alwayson,
+	.hw_isidle = a5xx_hw_isidle,
+	.power_ops = &adreno_power_operations,
+	.clear_pending_transactions = a5xx_clear_pending_transactions,
+	.remove = a5xx_remove,
+	.ringbuffer_submitcmd = a5xx_ringbuffer_submitcmd,
+	.is_hw_collapsible = a5xx_is_hw_collapsible,
+	.power_stats = a5xx_power_stats,
+	.setproperty = a5xx_setproperty,
+};

+ 314 - 0
qcom/opensource/graphics-kernel/adreno_a5xx.h

@@ -0,0 +1,314 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2015-2017,2019-2020 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022,2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_A5XX_H_
+#define _ADRENO_A5XX_H_
+
+#include "a5xx_reg.h"
+
+/**
+ * struct adreno_a5xx_core - a5xx specific GPU core definitions
+ */
+struct adreno_a5xx_core {
+	/** @base: Container for the generic &struct adreno_gpu_core */
+	struct adreno_gpu_core base;
+	/** @gpmu_tsens: ID for the temperature sensor used by the GPMU */
+	unsigned int gpmu_tsens;
+	/** @max_power: Max possible power draw of a core */
+	unsigned int max_power;
+	/** pm4fw_name: Name of the PM4 microcode file */
+	const char *pm4fw_name;
+	/** pfpfw_name: Name of the PFP microcode file */
+	const char *pfpfw_name;
+	/** gpmufw_name: Name of the GPMU microcode file */
+	const char *gpmufw_name;
+	/** @regfw_name: Filename for the LM registers if applicable */
+	const char *regfw_name;
+	/** @zap_name: Name of the CPZ zap file */
+	const char *zap_name;
+	/** @hwcg: List of registers and values to write for HWCG */
+	const struct kgsl_regmap_list *hwcg;
+	/** @hwcg_count: Number of registers in @hwcg */
+	u32 hwcg_count;
+	/** @vbif: List of registers and values to write for VBIF */
+	const struct kgsl_regmap_list *vbif;
+	/** @vbif_count: Number of registers in @vbif */
+	u32 vbif_count;
+	/** @highest_bank_bit: The bit of the highest DDR bank */
+	u32 highest_bank_bit;
+};
+
+#define A5XX_CP_CTXRECORD_MAGIC_REF     0x27C4BAFCUL
+/* Size of each CP preemption record */
+#define A5XX_CP_CTXRECORD_SIZE_IN_BYTES     0x10000
+/* Size of the preemption counter block (in bytes) */
+#define A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE   (16 * 4)
+
+/**
+ * struct a5xx_cp_preemption_record - CP context record for
+ * preemption.
+ * @magic: (00) Value at this offset must be equal to
+ * A5XX_CP_CTXRECORD_MAGIC_REF.
+ * @info: (04) Type of record. Written non-zero (usually) by CP.
+ * we must set to zero for all ringbuffers.
+ * @data: (08) DATA field in SET_RENDER_MODE or checkpoint packets.
+ * Written by CP when switching out. Not used on switch-in.
+ * we must initialize to zero.
+ * @cntl: (12) RB_CNTL, saved and restored by CP.
+ * @rptr: (16) RB_RPTR, saved and restored by CP.
+ * @wptr: (20) RB_WPTR, saved and restored by CP.
+ * @rptr_addr: (24) RB_RPTR_ADDR_LO|HI saved and restored.
+ * rbase: (32) RB_BASE_LO|HI saved and restored.
+ * counter: (40) Pointer to preemption counter
+ */
+struct a5xx_cp_preemption_record {
+	uint32_t  magic;
+	uint32_t  info;
+	uint32_t  data;
+	uint32_t  cntl;
+	uint32_t  rptr;
+	uint32_t  wptr;
+	uint64_t  rptr_addr;
+	uint64_t  rbase;
+	uint64_t  counter;
+};
+
+#define A5XX_CP_SMMU_INFO_MAGIC_REF     0x3618CDA3UL
+
+/**
+ * struct a5xx_cp_smmu_info - CP preemption SMMU info.
+ * @magic: (00) The value at this offset must be equal to
+ * A5XX_CP_SMMU_INFO_MAGIC_REF.
+ * @_pad4: (04) Reserved/padding
+ * @ttbr0: (08) Base address of the page table for the
+ * incoming context.
+ * @context_idr: (16) Context Identification Register value.
+ */
+struct a5xx_cp_smmu_info {
+	uint32_t  magic;
+	uint32_t  _pad4;
+	uint64_t  ttbr0;
+	uint32_t  asid;
+	uint32_t  context_idr;
+};
+
+void a5xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+unsigned int a5xx_num_registers(void);
+
+void a5xx_crashdump_init(struct adreno_device *adreno_dev);
+
+void a5xx_hwcg_set(struct adreno_device *adreno_dev, bool on);
+
+#define A5XX_CP_RB_CNTL_DEFAULT ((1 << 27) | ((ilog2(4) << 8) & 0x1F00) | \
+		(ilog2(KGSL_RB_DWORDS >> 1) & 0x3F))
+/* GPMU interrupt multiplexor */
+#define FW_INTR_INFO			(0)
+#define LLM_ACK_ERR_INTR		(1)
+#define ISENS_TRIM_ERR_INTR		(2)
+#define ISENS_ERR_INTR			(3)
+#define ISENS_IDLE_ERR_INTR		(4)
+#define ISENS_PWR_ON_ERR_INTR		(5)
+#define WDOG_EXPITED			(31)
+
+#define VALID_GPMU_IRQ (\
+	BIT(FW_INTR_INFO) | \
+	BIT(LLM_ACK_ERR_INTR) | \
+	BIT(ISENS_TRIM_ERR_INTR) | \
+	BIT(ISENS_ERR_INTR) | \
+	BIT(ISENS_IDLE_ERR_INTR) | \
+	BIT(ISENS_PWR_ON_ERR_INTR) | \
+	BIT(WDOG_EXPITED))
+
+/* A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL */
+#define STATE_OF_CHILD			GENMASK(5, 4)
+#define STATE_OF_CHILD_01		BIT(4)
+#define STATE_OF_CHILD_11		(BIT(4) | BIT(5))
+#define IDLE_FULL_LM_SLEEP		BIT(0)
+
+/* A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS */
+#define WAKEUP_ACK			BIT(1)
+#define IDLE_FULL_ACK			BIT(0)
+
+/* A5XX_GPMU_GPMU_ISENSE_CTRL */
+#define	ISENSE_CGC_EN_DISABLE		BIT(0)
+
+/* A5XX_GPMU_TEMP_SENSOR_CONFIG */
+#define GPMU_BCL_ENABLED		BIT(4)
+#define GPMU_LLM_ENABLED		BIT(9)
+#define GPMU_ISENSE_STATUS		GENMASK(3, 0)
+#define GPMU_ISENSE_END_POINT_CAL_ERR	BIT(0)
+
+#define AMP_CALIBRATION_RETRY_CNT	3
+#define AMP_CALIBRATION_TIMEOUT		6
+
+/* A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK */
+#define VOLTAGE_INTR_EN			BIT(0)
+
+/* A5XX_GPMU_GPMU_PWR_THRESHOLD */
+#define PWR_THRESHOLD_VALID		0x80000000
+
+/* A5XX_GPMU_GPMU_SP_CLOCK_CONTROL */
+#define CNTL_IP_CLK_ENABLE		BIT(0)
+/* AGC */
+#define AGC_INIT_BASE			A5XX_GPMU_DATA_RAM_BASE
+#define AGC_INIT_MSG_MAGIC		(AGC_INIT_BASE + 5)
+#define AGC_MSG_BASE			(AGC_INIT_BASE + 7)
+
+#define AGC_MSG_STATE			(AGC_MSG_BASE + 0)
+#define AGC_MSG_COMMAND			(AGC_MSG_BASE + 1)
+#define AGC_MSG_PAYLOAD_SIZE		(AGC_MSG_BASE + 3)
+#define AGC_MSG_PAYLOAD			(AGC_MSG_BASE + 5)
+
+#define AGC_INIT_MSG_VALUE		0xBABEFACE
+#define AGC_POWER_CONFIG_PRODUCTION_ID	1
+
+#define AGC_LM_CONFIG			(136/4)
+#define AGC_LM_CONFIG_ENABLE_GPMU_ADAPTIVE (1)
+
+#define AGC_LM_CONFIG_ENABLE_ERROR	(3 << 4)
+#define AGC_LM_CONFIG_ISENSE_ENABLE     (1 << 4)
+
+#define AGC_THROTTLE_SEL_DCS		(1 << 8)
+#define AGC_THROTTLE_DISABLE            (2 << 8)
+
+
+#define AGC_LLM_ENABLED			(1 << 16)
+#define	AGC_GPU_VERSION_MASK		GENMASK(18, 17)
+#define AGC_GPU_VERSION_SHIFT		17
+#define AGC_BCL_DISABLED		(1 << 24)
+
+
+#define AGC_LEVEL_CONFIG		(140/4)
+
+#define LM_DCVS_LIMIT			1
+/* FW file tages */
+#define GPMU_FIRMWARE_ID		2
+#define GPMU_SEQUENCE_ID		3
+#define GPMU_INST_RAM_SIZE		0xFFF
+
+#define HEADER_MAJOR			1
+#define HEADER_MINOR			2
+#define HEADER_DATE			3
+#define HEADER_TIME			4
+#define HEADER_SEQUENCE			5
+
+#define MAX_HEADER_SIZE			10
+
+#define LM_SEQUENCE_ID			1
+#define MAX_SEQUENCE_ID			3
+
+#define GPMU_ISENSE_SAVE	(A5XX_GPMU_DATA_RAM_BASE + 200/4)
+/* LM defaults */
+#define LM_DEFAULT_LIMIT		6000
+#define A530_DEFAULT_LEAKAGE		0x004E001A
+
+/**
+ * to_a5xx_core - return the a5xx specific GPU core struct
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Returns:
+ * A pointer to the a5xx specific GPU core struct
+ */
+static inline const struct adreno_a5xx_core *
+to_a5xx_core(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpu_core *core = adreno_dev->gpucore;
+
+	return container_of(core, struct adreno_a5xx_core, base);
+}
+
+/* Preemption functions */
+void a5xx_preemption_trigger(struct adreno_device *adreno_dev);
+void a5xx_preemption_schedule(struct adreno_device *adreno_dev);
+void a5xx_preemption_start(struct adreno_device *adreno_dev);
+int a5xx_preemption_init(struct adreno_device *adreno_dev);
+
+/**
+ * a5xx_preemption_post_ibsubmit - Insert commands following a submission
+ * @adreno_dev: Adreno GPU handle
+ * @cmds: Pointer to the ringbuffer to insert opcodes
+ *
+ * Return: The number of opcodes written to @cmds
+ */
+u32 a5xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev, u32 *cmds);
+
+/**
+ * a5xx_preemption_post_ibsubmit - Insert opcodes before a submission
+ * @adreno_dev: Adreno GPU handle
+ * @rb: The ringbuffer being written
+ * @drawctxt: The draw context being written
+ * @cmds: Pointer to the ringbuffer to insert opcodes
+ *
+ * Return: The number of opcodes written to @cmds
+ */
+u32 a5xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds);
+
+void a5xx_preempt_callback(struct adreno_device *adreno_dev, int bit);
+
+u64 a5xx_read_alwayson(struct adreno_device *adreno_dev);
+
+extern const struct adreno_perfcounters adreno_a5xx_perfcounters;
+
+/**
+ * a5xx_ringbuffer_init - Initialize the ringbuffers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Initialize the ringbuffer(s) for a5xx.
+ * Return: 0 on success or negative on failure
+ */
+int a5xx_ringbuffer_init(struct adreno_device *adreno_dev);
+
+/**
+ * a5xx_ringbuffer_addcmds - Submit a command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @rb: Pointer to the ringbuffer to submit on
+ * @drawctxt: Pointer to the draw context for the submission, or NULL for
+ * internal submissions
+ * @flags: Flags for the submission
+ * @in: Commands to write to the ringbuffer
+ * @dwords: Size of @in (in dwords)
+ * @timestamp: Timestamp for the submission
+ * @time: Optional pointer to a submit time structure
+ *
+ * Submit a command to the ringbuffer.
+ * Return: 0 on success or negative on failure
+ */
+int a5xx_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time);
+
+/**
+ * a5xx_ringbuffer_submitcmd - Submit a user command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @cmdobj: Pointer to a user command object
+ * @flags: Internal submit flags
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int a5xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time);
+
+int a5xx_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time, bool sync);
+
+static inline bool a5xx_has_gpmu(struct adreno_device *adreno_dev)
+{
+	return (adreno_is_a530(adreno_dev) || adreno_is_a540(adreno_dev));
+}
+
+#ifdef CONFIG_QCOM_KGSL_CORESIGHT
+void a5xx_coresight_init(struct adreno_device *device);
+#else
+static inline void a5xx_coresight_init(struct adreno_device *device) { }
+#endif
+
+#endif

+ 208 - 0
qcom/opensource/graphics-kernel/adreno_a5xx_coresight.c

@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a5xx.h"
+#include "adreno_coresight.h"
+
+static struct adreno_coresight_register a5xx_coresight_registers[] = {
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_A },
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_B },
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_C },
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_D },
+	{ A5XX_RBBM_CFG_DBGBUS_CNTLT },
+	{ A5XX_RBBM_CFG_DBGBUS_CNTLM },
+	{ A5XX_RBBM_CFG_DBGBUS_OPL },
+	{ A5XX_RBBM_CFG_DBGBUS_OPE },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_BYTEL_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_BYTEL_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_NIBBLEE },
+	{ A5XX_RBBM_CFG_DBGBUS_PTRC0 },
+	{ A5XX_RBBM_CFG_DBGBUS_PTRC1 },
+	{ A5XX_RBBM_CFG_DBGBUS_LOADREG },
+	{ A5XX_RBBM_CFG_DBGBUS_IDX },
+	{ A5XX_RBBM_CFG_DBGBUS_CLRC },
+	{ A5XX_RBBM_CFG_DBGBUS_LOADIVT },
+	{ A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC },
+	{ A5XX_RBBM_CFG_DBGBUS_OVER },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT0 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT1 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT2 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT3 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT4 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT5 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 },
+	{ A5XX_RBBM_CFG_DBGBUS_MISR0 },
+	{ A5XX_RBBM_CFG_DBGBUS_MISR1 },
+	{ A5XX_RBBM_AHB_DBG_CNTL },
+	{ A5XX_RBBM_READ_AHB_THROUGH_DBG },
+	{ A5XX_RBBM_DBG_LO_HI_GPIO },
+	{ A5XX_RBBM_EXT_TRACE_BUS_CNTL },
+	{ A5XX_RBBM_EXT_VBIF_DBG_CNTL },
+};
+
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &a5xx_coresight_registers[0]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &a5xx_coresight_registers[1]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &a5xx_coresight_registers[2]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &a5xx_coresight_registers[3]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &a5xx_coresight_registers[4]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &a5xx_coresight_registers[5]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &a5xx_coresight_registers[6]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &a5xx_coresight_registers[7]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &a5xx_coresight_registers[8]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &a5xx_coresight_registers[9]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &a5xx_coresight_registers[10]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &a5xx_coresight_registers[11]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &a5xx_coresight_registers[12]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &a5xx_coresight_registers[13]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &a5xx_coresight_registers[14]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &a5xx_coresight_registers[15]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &a5xx_coresight_registers[16]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &a5xx_coresight_registers[17]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &a5xx_coresight_registers[18]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &a5xx_coresight_registers[19]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &a5xx_coresight_registers[20]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &a5xx_coresight_registers[21]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &a5xx_coresight_registers[22]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &a5xx_coresight_registers[23]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &a5xx_coresight_registers[24]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &a5xx_coresight_registers[25]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &a5xx_coresight_registers[26]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &a5xx_coresight_registers[27]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &a5xx_coresight_registers[28]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &a5xx_coresight_registers[29]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &a5xx_coresight_registers[30]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &a5xx_coresight_registers[31]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &a5xx_coresight_registers[32]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_event_logic,
+				&a5xx_coresight_registers[33]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_over, &a5xx_coresight_registers[34]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count0, &a5xx_coresight_registers[35]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count1, &a5xx_coresight_registers[36]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count2, &a5xx_coresight_registers[37]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count3, &a5xx_coresight_registers[38]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count4, &a5xx_coresight_registers[39]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count5, &a5xx_coresight_registers[40]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_addr,
+				&a5xx_coresight_registers[41]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf0,
+				&a5xx_coresight_registers[42]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1,
+				&a5xx_coresight_registers[43]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2,
+				&a5xx_coresight_registers[44]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf3,
+				&a5xx_coresight_registers[45]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf4,
+				&a5xx_coresight_registers[46]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr0, &a5xx_coresight_registers[47]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr1, &a5xx_coresight_registers[48]);
+static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &a5xx_coresight_registers[49]);
+static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg,
+				&a5xx_coresight_registers[50]);
+static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &a5xx_coresight_registers[51]);
+static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &a5xx_coresight_registers[52]);
+static ADRENO_CORESIGHT_ATTR(ext_vbif_dbg_cntl, &a5xx_coresight_registers[53]);
+
+static struct attribute *a5xx_coresight_attrs[] = {
+	&coresight_attr_cfg_dbgbus_sel_a.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_b.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_c.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_d.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlt.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlm.attr.attr,
+	&coresight_attr_cfg_dbgbus_opl.attr.attr,
+	&coresight_attr_cfg_dbgbus_ope.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_nibblee.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc1.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadreg.attr.attr,
+	&coresight_attr_cfg_dbgbus_idx.attr.attr,
+	&coresight_attr_cfg_dbgbus_clrc.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadivt.attr.attr,
+	&coresight_attr_cfg_dbgbus_event_logic.attr.attr,
+	&coresight_attr_cfg_dbgbus_over.attr.attr,
+	&coresight_attr_cfg_dbgbus_count0.attr.attr,
+	&coresight_attr_cfg_dbgbus_count1.attr.attr,
+	&coresight_attr_cfg_dbgbus_count2.attr.attr,
+	&coresight_attr_cfg_dbgbus_count3.attr.attr,
+	&coresight_attr_cfg_dbgbus_count4.attr.attr,
+	&coresight_attr_cfg_dbgbus_count5.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_addr.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf0.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf1.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf2.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf3.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf4.attr.attr,
+	&coresight_attr_cfg_dbgbus_misr0.attr.attr,
+	&coresight_attr_cfg_dbgbus_misr1.attr.attr,
+	&coresight_attr_ahb_dbg_cntl.attr.attr,
+	&coresight_attr_read_ahb_through_dbg.attr.attr,
+	&coresight_attr_dbg_lo_hi_gpio.attr.attr,
+	&coresight_attr_ext_trace_bus_cntl.attr.attr,
+	&coresight_attr_ext_vbif_dbg_cntl.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group a5xx_coresight_group = {
+	.attrs = a5xx_coresight_attrs,
+};
+
+static const struct attribute_group *a5xx_coresight_groups[] = {
+	&a5xx_coresight_group,
+	NULL,
+};
+
+static const struct adreno_coresight a5xx_coresight = {
+	.registers = a5xx_coresight_registers,
+	.count = ARRAY_SIZE(a5xx_coresight_registers),
+	.groups = a5xx_coresight_groups,
+};
+
+void a5xx_coresight_init(struct adreno_device *adreno_dev)
+{
+	adreno_coresight_add_device(adreno_dev, "coresight-gfx",
+		&a5xx_coresight, &adreno_dev->gx_coresight);
+}

+ 1406 - 0
qcom/opensource/graphics-kernel/adreno_a5xx_packets.h

@@ -0,0 +1,1406 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2016,2019, The Linux Foundation. All rights reserved.
+ */
+
+struct adreno_critical_fixup {
+	unsigned int lo_offset;
+	unsigned int hi_offset;
+	int buffer;
+	uint64_t mem_offset;
+};
+
+static unsigned int _a5xx_critical_pkts[] = {
+	0x400E0601, /* [0x0000] == TYPE4 == */
+	0x00000002, /* [0x0001] A5X_HLSQ_MODE_CNTL (0x0E06)*/
+	0x40E78A01, /* [0x0002] == TYPE4 == */
+	0x000FFFFF, /* [0x0003] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/
+	0x48E78401, /* [0x0004] == TYPE4 == */
+	0x00000005, /* [0x0005] A5X_HLSQ_CNTL_0_CTX_0 (0xE784)*/
+	0x40E78501, /* [0x0006] == TYPE4 == */
+	0x00000009, /* [0x0007] A5X_HLSQ_CNTL_1_CTX_0 (0xE785)*/
+	0x48E78B85, /* [0x0008] == TYPE4 == */
+	0x00000001, /* [0x0009] A5X_HLSQ_VS_CONFIG_CTX_0 (0xE78B)*/
+	0x00002085, /* [0x000A] A5X_HLSQ_PS_CONFIG_CTX_0 (0xE78C)*/
+	0x00002084, /* [0x000B] A5X_HLSQ_HS_CONFIG_CTX_0 (0xE78D)*/
+	0x00002084, /* [0x000C] A5X_HLSQ_DS_CONFIG_CTX_0 (0xE78E)*/
+	0x00002084, /* [0x000D] A5X_HLSQ_GS_CONFIG_CTX_0 (0xE78F)*/
+	0x40E58485, /* [0x000E] == TYPE4 == */
+	0x00000001, /* [0x000F] A5X_SP_VS_CONFIG_CTX_0 (0xE584)*/
+	0x00002085, /* [0x0010] A5X_SP_PS_CONFIG_CTX_0 (0xE585)*/
+	0x00002084, /* [0x0011] A5X_SP_HS_CONFIG_CTX_0 (0xE586)*/
+	0x00002084, /* [0x0012] A5X_SP_DS_CONFIG_CTX_0 (0xE587)*/
+	0x00002084, /* [0x0013] A5X_SP_GS_CONFIG_CTX_0 (0xE588)*/
+	0x40E79101, /* [0x0014] == TYPE4 == */
+	0x00000004, /* [0x0015] A5X_HLSQ_VS_CNTL_CTX_0 (0xE791)*/
+	0x40E79201, /* [0x0016] == TYPE4 == */
+	0x00000002, /* [0x0017] A5X_HLSQ_PS_CNTL_CTX_0 (0xE792)*/
+	0x48E58001, /* [0x0018] == TYPE4 == */
+	0x00000010, /* [0x0019] A5X_SP_SP_CNTL_CTX_0 (0xE580)*/
+	0x70B00043, /* [0x001A] == TYPE7: LOAD_STATE (30) == */
+	0x00A00000, /* [0x001B] */
+	0x00000000, /* [0x001C] */
+	0x00000000, /* [0x001D] */
+	0x20020003, /* [0x001E] */
+	0x56D81803, /* [0x001F] */
+	0x00000003, /* [0x0020] */
+	0x20150000, /* [0x0021] */
+	0x00000000, /* [0x0022] */
+	0x00000200, /* [0x0023] */
+	0x00000000, /* [0x0024] */
+	0x201100F4, /* [0x0025] */
+	0x00000000, /* [0x0026] */
+	0x00000500, /* [0x0027] */
+	0x00000C21, /* [0x0028] */
+	0x20154004, /* [0x0029] */
+	0x00000C20, /* [0x002A] */
+	0x20154003, /* [0x002B] */
+	0x00000C23, /* [0x002C] */
+	0x20154008, /* [0x002D] */
+	0x00000C22, /* [0x002E] */
+	0x20156007, /* [0x002F] */
+	0x00000000, /* [0x0030] */
+	0x20554005, /* [0x0031] */
+	0x3F800000, /* [0x0032] */
+	0x20554006, /* [0x0033] */
+	0x00000000, /* [0x0034] */
+	0x03000000, /* [0x0035] */
+	0x20050000, /* [0x0036] */
+	0x46F00009, /* [0x0037] */
+	0x201F0000, /* [0x0038] */
+	0x4398000A, /* [0x0039] */
+	0x201F0009, /* [0x003A] */
+	0x43980809, /* [0x003B] */
+	0x20180009, /* [0x003C] */
+	0x46100809, /* [0x003D] */
+	0x00091014, /* [0x003E] */
+	0x62050009, /* [0x003F] */
+	0x00000000, /* [0x0040] */
+	0x00000500, /* [0x0041] */
+	0x04800006, /* [0x0042] */
+	0xC2C61300, /* [0x0043] */
+	0x0280000E, /* [0x0044] */
+	0xC2C61310, /* [0x0045] */
+	0x00000000, /* [0x0046] */
+	0x04800000, /* [0x0047] */
+	0x00000000, /* [0x0048] */
+	0x05000000, /* [0x0049] */
+	0x00000000, /* [0x004A] */
+	0x00000000, /* [0x004B] */
+	0x00000000, /* [0x004C] */
+	0x00000000, /* [0x004D] */
+	0x00000000, /* [0x004E] */
+	0x00000000, /* [0x004F] */
+	0x00000000, /* [0x0050] */
+	0x00000000, /* [0x0051] */
+	0x00000000, /* [0x0052] */
+	0x00000000, /* [0x0053] */
+	0x00000000, /* [0x0054] */
+	0x00000000, /* [0x0055] */
+	0x00000000, /* [0x0056] */
+	0x00000000, /* [0x0057] */
+	0x00000000, /* [0x0058] */
+	0x00000000, /* [0x0059] */
+	0x00000000, /* [0x005A] */
+	0x00000000, /* [0x005B] */
+	0x00000000, /* [0x005C] */
+	0x00000000, /* [0x005D] */
+	0x70B00023, /* [0x005E] == TYPE7: LOAD_STATE (30) == */
+	0x00700000, /* [0x005F] */
+	0x00000000, /* [0x0060] */
+	0x00000000, /* [0x0061] */
+	0x00000000, /* [0x0062] */
+	0x03000000, /* [0x0063] */
+	0x00000000, /* [0x0064] */
+	0x00000000, /* [0x0065] */
+	0x00000000, /* [0x0066] */
+	0x00000000, /* [0x0067] */
+	0x00000000, /* [0x0068] */
+	0x00000000, /* [0x0069] */
+	0x00000000, /* [0x006A] */
+	0x00000000, /* [0x006B] */
+	0x00000000, /* [0x006C] */
+	0x00000000, /* [0x006D] */
+	0x00000000, /* [0x006E] */
+	0x00000000, /* [0x006F] */
+	0x00000000, /* [0x0070] */
+	0x00000000, /* [0x0071] */
+	0x00000000, /* [0x0072] */
+	0x00000000, /* [0x0073] */
+	0x00000000, /* [0x0074] */
+	0x00000000, /* [0x0075] */
+	0x00000000, /* [0x0076] */
+	0x00000000, /* [0x0077] */
+	0x00000000, /* [0x0078] */
+	0x00000000, /* [0x0079] */
+	0x00000000, /* [0x007A] */
+	0x00000000, /* [0x007B] */
+	0x00000000, /* [0x007C] */
+	0x00000000, /* [0x007D] */
+	0x00000000, /* [0x007E] */
+	0x00000000, /* [0x007F] */
+	0x00000000, /* [0x0080] */
+	0x00000000, /* [0x0081] */
+	0x70B08003, /* [0x0082] == TYPE7: LOAD_STATE (30) == */
+	0x00620000, /* [0x0083] */
+	0x00000000, /* [0x0084] */
+	0x00000000, /* [0x0085] */
+	0x70B08003, /* [0x0086] == TYPE7: LOAD_STATE (30) == */
+	0x01220008, /* [0x0087] */
+	0x00000000, /* [0x0088] */
+	0x00000000, /* [0x0089] */
+	0x70B0000B, /* [0x008A] == TYPE7: LOAD_STATE (30) == */
+	0x01180000, /* [0x008B] */
+	0x00000001, /* [0x008C] */
+	0x00000000, /* [0x008D] */
+	0x00000000, /* [0x008E] */
+	0x00000000, /* [0x008F] */
+	0x00000000, /* [0x0090] */
+	0x00000000, /* [0x0091] */
+	0x00000000, /* [0x0092] */
+	0x00000000, /* [0x0093] */
+	0x00000000, /* [0x0094] */
+	0x01400000, /* [0x0095] */
+	0x70460001, /* [0x0096] == TYPE7: EVENT_WRITE (46) == */
+	0x00000019, /* [0x0097] */
+	0x70460004, /* [0x0098] == TYPE7: EVENT_WRITE (46) == */
+	0x0000001D, /* [0x0099] */
+	0x00000000, /* [0x009A] */
+	0x00000000, /* [0x009B] */
+	0x00000001, /* [0x009C] */
+	0x70460004, /* [0x009D] == TYPE7: EVENT_WRITE (46) == */
+	0x0000001C, /* [0x009E] */
+	0x00000000, /* [0x009F] */
+	0x00000000, /* [0x00A0] */
+	0x00000001, /* [0x00A1] */
+	0x480E9185, /* [0x00A2] == TYPE4 == */
+	0x00000000, /* [0x00A3] A5X_UCHE_CACHE_INVALIDATE_MIN_LO (0x0E91)*/
+	0x00000000, /* [0x00A4] A5X_UCHE_CACHE_INVALIDATE_MIN_HI (0x0E92)*/
+	0x00000000, /* [0x00A5] A5X_UCHE_CACHE_INVALIDATE_MAX_LO (0x0E93)*/
+	0x00000000, /* [0x00A6] A5X_UCHE_CACHE_INVALIDATE_MAX_HI (0x0E94)*/
+	0x00000012, /* [0x00A7] A5X_UCHE_CACHE_INVALIDATE (0x0E95)*/
+	0x70268000, /* [0x00A8] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x40E78A01, /* [0x00A9] == TYPE4 == */
+	0x000FFFFF, /* [0x00AA] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/
+	0x70D08003, /* [0x00AB] == TYPE7: PERFCOUNTER_ACTION (50) == */
+	0x00000000, /* [0x00AC] */
+	0x00000000, /* [0x00AD] */
+	0x00000000, /* [0x00AE] */
+	0x70D08003, /* [0x00AF] == TYPE7: PERFCOUNTER_ACTION (50) == */
+	0x00000010, /* [0x00B0] */
+	0x00000000, /* [0x00B1] */
+	0x00000000, /* [0x00B2] */
+	0x70268000, /* [0x00B3] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x48E38C01, /* [0x00B4] == TYPE4 == */
+	0xFFFFFFFF, /* [0x00B5] A5X_PC_RESTART_INDEX_CTX_0 (0xE38C)*/
+	0x40E38801, /* [0x00B6] == TYPE4 == */
+	0x00000012, /* [0x00B7] A5X_PC_RASTER_CNTL_CTX_0 (0xE388)*/
+	0x48E09102, /* [0x00B8] == TYPE4 == */
+	0xFFC00010, /* [0x00B9] A5X_GRAS_SU_POINT_MINMAX_CTX_0 (0xE091)*/
+	0x00000008, /* [0x00BA] A5X_GRAS_SU_POINT_SIZE_CTX_0 (0xE092)*/
+	0x40E09901, /* [0x00BB] == TYPE4 == */
+	0x00000000, /* [0x00BC] A5X_GRAS_SU_CONSERVATIVE_RAS_CNTL_CTX_0
+		     * (0xE099)
+		     */
+	0x48E0A401, /* [0x00BD] == TYPE4 == */
+	0x00000000, /* [0x00BE] A5X_GRAS_SC_SCREEN_SCISSOR_CNTL_CTX_0 (0xE0A4)*/
+	0x48E58A01, /* [0x00BF] == TYPE4 == */
+	0x00000000, /* [0x00C0] A5X_SP_VS_CONFIG_MAX_CONST_CTX_0 (0xE58A)*/
+	0x40E58B01, /* [0x00C1] == TYPE4 == */
+	0x00000000, /* [0x00C2] A5X_SP_PS_CONFIG_MAX_CONST_CTX_0 (0xE58B)*/
+	0x480CC601, /* [0x00C3] == TYPE4 == */
+	0x00000044, /* [0x00C4] A5X_RB_MODE_CNTL (0x0CC6)*/
+	0x400CC401, /* [0x00C5] == TYPE4 == */
+	0x00100000, /* [0x00C6] A5X_RB_DBG_ECO_CNTL (0x0CC4)*/
+	0x400E4201, /* [0x00C7] == TYPE4 == */
+	0x00000000, /* [0x00C8] A5X_VFD_MODE_CNTL (0x0E42)*/
+	0x480D0201, /* [0x00C9] == TYPE4 == */
+	0x0000001F, /* [0x00CA] A5X_PC_MODE_CNTL (0x0D02)*/
+	0x480EC201, /* [0x00CB] == TYPE4 == */
+	0x0000001E, /* [0x00CC] A5X_SP_MODE_CNTL (0x0EC2)*/
+	0x400EC001, /* [0x00CD] == TYPE4 == */
+	0x40000800, /* [0x00CE] A5X_SP_DBG_ECO_CNTL (0x0EC0)*/
+	0x400F0201, /* [0x00CF] == TYPE4 == */
+	0x00000544, /* [0x00D0] A5X_TPL1_MODE_CNTL (0x0F02)*/
+	0x400E0002, /* [0x00D1] == TYPE4 == */
+	0x00000080, /* [0x00D2] A5X_HLSQ_TIMEOUT_THRESHOLD_0 (0x0E00)*/
+	0x00000000, /* [0x00D3] A5X_HLSQ_TIMEOUT_THRESHOLD_1 (0x0E01)*/
+	0x400E6001, /* [0x00D4] == TYPE4 == */
+	0x00000400, /* [0x00D5] A5X_VPC_DBG_ECO_CNTL (0x0E60)*/
+	0x400E0601, /* [0x00D6] == TYPE4 == */
+	0x00000001, /* [0x00D7] A5X_HLSQ_MODE_CNTL (0x0E06)*/
+	0x480E6201, /* [0x00D8] == TYPE4 == */
+	0x00000000, /* [0x00D9] A5X_VPC_MODE_CNTL (0x0E62)*/
+	0x70EC8005, /* [0x00DA] == TYPE7: SET_RENDER_MODE (6C) == */
+	0x00000002, /* [0x00DB] */
+	0x00000000, /* [0x00DC] */
+	0x00000000, /* [0x00DD] */
+	0x00000008, /* [0x00DE] */
+	0x00000001, /* [0x00DF] */
+	0x40E14001, /* [0x00E0] == TYPE4 == */
+	0x00000204, /* [0x00E1] A5X_RB_CNTL_CTX_0 (0xE140)*/
+	0x709D0001, /* [0x00E2] == TYPE7: SKIP_IB2_ENABLE_GLOBAL (1D) == */
+	0x00000000, /* [0x00E3] */
+	0x48E0EA02, /* [0x00E4] == TYPE4 == */
+	0x00000000, /* [0x00E5] A5X_GRAS_SC_WINDOW_SCISSOR_TL_CTX_0 (0xE0EA)*/
+	0x001F0073, /* [0x00E6] A5X_GRAS_SC_WINDOW_SCISSOR_BR_CTX_0 (0xE0EB)*/
+	0x48E21102, /* [0x00E7] == TYPE4 == */
+	0x00000000, /* [0x00E8] A5X_RB_RESOLVE_CNTL_1_CTX_0 (0xE211)*/
+	0x00000000, /* [0x00E9] A5X_RB_RESOLVE_CNTL_2_CTX_0 (0xE212)*/
+	0x480BC283, /* [0x00EA] == TYPE4 == */
+	0x00000204, /* [0x00EB] UNKNOWN (0x0BC2)*/
+	0x00000000, /* [0x00EC] UNKNOWN (0x0BC3)*/
+	0x00000000, /* [0x00ED] UNKNOWN (0x0BC4)*/
+	0x400BC502, /* [0x00EE] == TYPE4 == */
+	0x00000000, /* [0x00EF] UNKNOWN (0x0BC5)*/
+	0x00000000, /* [0x00F0] UNKNOWN (0x0BC6)*/
+	0x480BD001, /* [0x00F1] == TYPE4 == */
+	0x01100000, /* [0x00F2] UNKNOWN (0x0BD0)*/
+	0x480BE002, /* [0x00F3] == TYPE4 == */
+	0x00000000, /* [0x00F4] UNKNOWN (0x0BE0)*/
+	0x00000000, /* [0x00F5] UNKNOWN (0x0BE1)*/
+	0x480C0001, /* [0x00F6] == TYPE4 == */
+	0x00000020, /* [0x00F7] A5X_VSC_PIPE_DATA_LENGTH_0 (0x0C00)*/
+	0x48E3B001, /* [0x00F8] == TYPE4 == */
+	0x00000003, /* [0x00F9] A5X_PC_POWER_CNTL_CTX_0 (0xE3B0)*/
+	0x48E4F001, /* [0x00FA] == TYPE4 == */
+	0x00000003, /* [0x00FB] A5X_VFD_POWER_CNTL_CTX_0 (0xE4F0)*/
+	0x480E6201, /* [0x00FC] == TYPE4 == */
+	0x00000001, /* [0x00FD] A5X_VPC_MODE_CNTL (0x0E62)*/
+	0x70460001, /* [0x00FE] == TYPE7: EVENT_WRITE (46) == */
+	0x0000002C, /* [0x00FF] */
+	0x40E1D001, /* [0x0100] == TYPE4 == */
+	0x00000000, /* [0x0101] A5X_RB_WINDOW_OFFSET_CTX_0 (0xE1D0)*/
+	0x70BF8003, /* [0x0102] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */
+	0x00000000, /* [0x0103] */
+	0x00000000, /* [0x0104] */
+	0x000000A0, /* [0x0105] */
+	0x70460001, /* [0x0106] == TYPE7: EVENT_WRITE (46) == */
+	0x0000002D, /* [0x0107] */
+	0x70460004, /* [0x0108] == TYPE7: EVENT_WRITE (46) == */
+	0x00000004, /* [0x0109] */
+	0x00000000, /* [0x010A] */
+	0x00000000, /* [0x010B] */
+	0x00000000, /* [0x010C] */
+	0x70268000, /* [0x010D] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x480E6201, /* [0x010E] == TYPE4 == */
+	0x00000000, /* [0x010F] A5X_VPC_MODE_CNTL (0x0E62)*/
+	0x48E3B001, /* [0x0110] == TYPE4 == */
+	0x00000003, /* [0x0111] A5X_PC_POWER_CNTL_CTX_0 (0xE3B0)*/
+	0x48E4F001, /* [0x0112] == TYPE4 == */
+	0x00000003, /* [0x0113] A5X_VFD_POWER_CNTL_CTX_0 (0xE4F0)*/
+	0x70268000, /* [0x0114] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x400CC701, /* [0x0115] == TYPE4 == */
+	0x7C13C080, /* [0x0116] A5X_RB_CCU_CNTL (0x0CC7)*/
+	0x70EC8005, /* [0x0117] == TYPE7: SET_RENDER_MODE (6C) == */
+	0x00000001, /* [0x0118] */
+	0x00000000, /* [0x0119] */
+	0x00000000, /* [0x011A] */
+	0x00000010, /* [0x011B] */
+	0x00000001, /* [0x011C] */
+	0x70EA0001, /* [0x011D] == TYPE7: PREEMPT_ENABLE_LOCAL (6A) == */
+	0x00000000, /* [0x011E] */
+	0x48E0EA02, /* [0x011F] == TYPE4 == */
+	0x00000000, /* [0x0120] A5X_GRAS_SC_WINDOW_SCISSOR_TL_CTX_0 (0xE0EA)*/
+	0x001F0073, /* [0x0121] A5X_GRAS_SC_WINDOW_SCISSOR_BR_CTX_0 (0xE0EB)*/
+	0x48E21102, /* [0x0122] == TYPE4 == */
+	0x00000000, /* [0x0123] A5X_RB_RESOLVE_CNTL_1_CTX_0 (0xE211)*/
+	0x00030007, /* [0x0124] A5X_RB_RESOLVE_CNTL_2_CTX_0 (0xE212)*/
+	0x70138000, /* [0x0125] == TYPE7: WAIT_FOR_ME (13) == */
+	0x70640001, /* [0x0126] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */
+	0x00000000, /* [0x0127] */
+	0x702F8005, /* [0x0128] == TYPE7: SET_BIN_DATA (2F) == */
+	0x00010000, /* [0x0129] */
+	0x00000000, /* [0x012A] */
+	0x00000000, /* [0x012B] */
+	0x00000000, /* [0x012C] */
+	0x00000000, /* [0x012D] */
+	0x40E1D001, /* [0x012E] == TYPE4 == */
+	0x00000000, /* [0x012F] A5X_RB_WINDOW_OFFSET_CTX_0 (0xE1D0)*/
+	0x40E2A201, /* [0x0130] == TYPE4 == */
+	0x00000001, /* [0x0131] A5X_VPC_SO_OVERRIDE_CTX_0 (0xE2A2)*/
+	0x70640001, /* [0x0132] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */
+	0x00000000, /* [0x0133] */
+	0x48E1B285, /* [0x0134] == TYPE4 == */
+	0x00000001, /* [0x0135] A5X_RB_DEPTH_BUFFER_INFO_CTX_0 (0xE1B2)*/
+	0x00004000, /* [0x0136] A5X_RB_DEPTH_BUFFER_BASE_LO_CTX_0 (0xE1B3)*/
+	0x00000000, /* [0x0137] A5X_RB_DEPTH_BUFFER_BASE_HI_CTX_0 (0xE1B4)*/
+	0x00000004, /* [0x0138] A5X_RB_DEPTH_BUFFER_PITCH_CTX_0 (0xE1B5)*/
+	0x000000C0, /* [0x0139] A5X_RB_DEPTH_BUFFER_ARRAY_PITCH_CTX_0 (0xE1B6)*/
+	0x48E09801, /* [0x013A] == TYPE4 == */
+	0x00000001, /* [0x013B] A5X_GRAS_SU_DEPTH_BUFFER_INFO_CTX_0 (0xE098)*/
+	0x40E24083, /* [0x013C] == TYPE4 == */
+	0x00000000, /* [0x013D] A5X_RB_DEPTH_FLAG_BUFFER_BASE_LO_CTX_0
+		     * (0xE240)
+		     */
+	0x00000000, /* [0x013E] A5X_RB_DEPTH_FLAG_BUFFER_BASE_HI_CTX_0
+		     * (0xE241)
+		     */
+	0x00000000, /* [0x013F] A5X_RB_DEPTH_FLAG_BUFFER_PITCH_CTX_0 (0xE242)*/
+	0x40E15285, /* [0x0140] == TYPE4 == */
+	0x00001230, /* [0x0141] A5X_RB_MRT_BUFFER_INFO_0_CTX_0 (0xE152)*/
+	0x00000008, /* [0x0142] A5X_RB_MRT_BUFFER_PITCH_0_CTX_0 (0xE153)*/
+	0x00000100, /* [0x0143] A5X_RB_MRT_BUFFER_ARRAY_PITCH_0_CTX_0 (0xE154)*/
+	0x00000000, /* [0x0144] A5X_RB_MRT_BUFFER_BASE_LO_0_CTX_0 (0xE155)*/
+	0x00000000, /* [0x0145] A5X_RB_MRT_BUFFER_BASE_HI_0_CTX_0 (0xE156)*/
+	0x40E40801, /* [0x0146] == TYPE4 == */
+	0x00000000, /* [0x0147] A5X_VFD_INDEX_OFFSET_CTX_0 (0xE408)*/
+	0x48E40901, /* [0x0148] == TYPE4 == */
+	0x00000000, /* [0x0149] A5X_VFD_INSTANCE_START_OFFSET_CTX_0 (0xE409)*/
+	0x70BF8003, /* [0x014A] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */
+	0x00000000, /* [0x014B] */
+	0x00000000, /* [0x014C] */
+	0x00000112, /* [0x014D] */
+	0x70230001, /* [0x014E] == TYPE7: SKIP_IB2_ENABLE_LOCAL (23) == */
+	0x00000000, /* [0x014F] */
+	0x70BF8003, /* [0x0150] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */
+	0x00000000, /* [0x0151] */
+	0x00000000, /* [0x0152] */
+	0x0000001B, /* [0x0153] */
+	0x70EC8005, /* [0x0154] == TYPE7: SET_RENDER_MODE (6C) == */
+	0x00000001, /* [0x0155] */
+	0x00000000, /* [0x0156] */
+	0x00000000, /* [0x0157] */
+	0x00000000, /* [0x0158] */
+	0x00000001, /* [0x0159] */
+	0x70438003, /* [0x015A] == TYPE7: SET_DRAW_STATE (43) == */
+	0x00080059, /* [0x015B] */
+	0x00000000, /* [0x015C] */
+	0x00000000, /* [0x015D] */
+	0x70388003, /* [0x015E] == TYPE7: DRAW_INDX_OFFSET (38) == */
+	0x00000888, /* [0x015F] */
+	0x00000000, /* [0x0160] */
+	0x00000002, /* [0x0161] */
+	0x70A88003, /* [0x0162] == TYPE7: DRAW_INDIRECT (28) == */
+	0x00200884, /* [0x0163] */
+	0x00000000, /* [0x0164] */
+	0x00000000, /* [0x0165] */
+	0x70298006, /* [0x0166] == TYPE7: DRAW_INDX_INDIRECT (29) == */
+	0x00200404, /* [0x0167] */
+	0x00000000, /* [0x0168] */
+	0x00000000, /* [0x0169] */
+	0x00000006, /* [0x016A] */
+	0x00000000, /* [0x016B] */
+	0x00000000, /* [0x016C] */
+	0x40E2A783, /* [0x016D] == TYPE4 == */
+	0x00000000, /* [0x016E] A5X_VPC_SO_BUFFER_BASE_LO_0_CTX_0 (0xE2A7)*/
+	0x00000000, /* [0x016F] A5X_VPC_SO_BUFFER_BASE_HI_0_CTX_0 (0xE2A8)*/
+	0x00000004, /* [0x0170] A5X_VPC_SO_BUFFER_SIZE_0_CTX_0 (0xE2A9)*/
+	0x48E2AC02, /* [0x0171] == TYPE4 == */
+	0x00000000, /* [0x0172] A5X_VPC_SO_FLUSH_BASE_LO_0_CTX_0 (0xE2AC)*/
+	0x00000000, /* [0x0173] A5X_VPC_SO_FLUSH_BASE_HI_0_CTX_0 (0xE2AD)*/
+	0x70460001, /* [0x0174] == TYPE7: EVENT_WRITE (46) == */
+	0x00000011, /* [0x0175] */
+	0x48E10001, /* [0x0176] == TYPE4 == */
+	0x00000009, /* [0x0177] A5X_GRAS_LRZ_CNTL_CTX_0 (0xE100)*/
+	0x70460001, /* [0x0178] == TYPE7: EVENT_WRITE (46) == */
+	0x00000026, /* [0x0179] */
+	0x48E10001, /* [0x017A] == TYPE4 == */
+	0x00000008, /* [0x017B] A5X_GRAS_LRZ_CNTL_CTX_0 (0xE100)*/
+	0x40E10185, /* [0x017C] == TYPE4 == */
+	0x00000000, /* [0x017D] A5X_GRAS_LRZ_BUFFER_BASE_LO_CTX_0 (0xE101)*/
+	0x00000000, /* [0x017E] A5X_GRAS_LRZ_BUFFER_BASE_HI_CTX_0 (0xE102)*/
+	0x00000001, /* [0x017F] A5X_GRAS_LRZ_BUFFER_PITCH_CTX_0 (0xE103)*/
+	0x00000000, /* [0x0180] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO_CTX_0
+		     * (0xE104)
+		     */
+	0x00000000, /* [0x0181] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI_CTX_0
+		     * (0xE105)
+		     */
+	0x70460001, /* [0x0182] == TYPE7: EVENT_WRITE (46) == */
+	0x00000025, /* [0x0183] */
+	0x70460001, /* [0x0184] == TYPE7: EVENT_WRITE (46) == */
+	0x00000019, /* [0x0185] */
+	0x70460001, /* [0x0186] == TYPE7: EVENT_WRITE (46) == */
+	0x00000018, /* [0x0187] */
+	0x70EA0001, /* [0x0188] == TYPE7: PREEMPT_ENABLE_LOCAL (6A) == */
+	0x00000000, /* [0x0189] */
+	0x70EC0001, /* [0x018A] == TYPE7: SET_RENDER_MODE (6C) == */
+	0x00000006, /* [0x018B] */
+	0x70438003, /* [0x018C] == TYPE7: SET_DRAW_STATE (43) == */
+	0x00080059, /* [0x018D] */
+	0x00000000, /* [0x018E] */
+	0x00000000, /* [0x018F] */
+	0x70DC0002, /* [0x0190] == TYPE7: CONTEXT_REG_BUNCH (5C) == */
+	0x0000E2A1, /* [0x0191] */
+	0x00008001, /* [0x0192] */
+	0x709D0001, /* [0x0193] == TYPE7: SKIP_IB2_ENABLE_GLOBAL (1D) == */
+	0x00000000, /* [0x0194] */
+	0x70138000, /* [0x0195] == TYPE7: WAIT_FOR_ME (13) == */
+	0x70640001, /* [0x0196] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */
+	0x00000001, /* [0x0197] */
+	0x70380007, /* [0x0198] == TYPE7: DRAW_INDX_OFFSET (38) == */
+	0x00200506, /* [0x0199] */
+	0x00000000, /* [0x019A] */
+	0x00000004, /* [0x019B] */
+	0x00000000, /* [0x019C] */
+	0x00000000, /* [0x019D] */
+	0x00000000, /* [0x019E] */
+	0x00000004, /* [0x019F] */
+	0x703D8005, /* [0x01A0] == TYPE7: MEM_WRITE (3D) == */
+	0x00000000, /* [0x01A1] */
+	0x00000000, /* [0x01A2] */
+	0x00000001, /* [0x01A3] */
+	0x00000001, /* [0x01A4] */
+	0x00000001, /* [0x01A5] */
+	0x70928000, /* [0x01A6] == TYPE7: WAIT_MEM_WRITES (12) == */
+	0x70BF8003, /* [0x01A7] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */
+	0x00000000, /* [0x01A8] */
+	0x00000000, /* [0x01A9] */
+	0x00000028, /* [0x01AA] */
+	0x70C48006, /* [0x01AB] == TYPE7: COND_EXEC (44) == */
+	0x00000000, /* [0x01AC] */
+	0x00000000, /* [0x01AD] */
+	0x00000000, /* [0x01AE] */
+	0x00000000, /* [0x01AF] */
+	0x00000001, /* [0x01B0] */
+	0x00000002, /* [0x01B1] */
+	0x70100001, /* [0x01B2] == TYPE7: NOP (10) == */
+	0x00000000, /* [0x01B3] */
+	0x70C28003, /* [0x01B4] == TYPE7: MEM_TO_REG (42) == */
+	0xC000E2AB, /* [0x01B5] */
+	0x00000000, /* [0x01B6] */
+	0x00000000, /* [0x01B7] */
+	0x70230001, /* [0x01B8] == TYPE7: SKIP_IB2_ENABLE_LOCAL (23) == */
+	0x00000000, /* [0x01B9] */
+	0x70E90001, /* [0x01BA] == TYPE7: PREEMPT_ENABLE_GLOBAL (69) == */
+	0x00000000, /* [0x01BB] */
+	0x70BC8006, /* [0x01BC] == TYPE7: WAIT_REG_MEM (3C) == */
+	0x00000010, /* [0x01BD] */
+	0x00000000, /* [0x01BE] */
+	0x00000000, /* [0x01BF] */
+	0x00000001, /* [0x01C0] */
+	0xFFFFFFFF, /* [0x01C1] */
+	0x00000001, /* [0x01C2] */
+	0x70738009, /* [0x01C3] == TYPE7: MEM_TO_MEM (73) == */
+	0x20000004, /* [0x01C4] */
+	0x00000000, /* [0x01C5] */
+	0x00000000, /* [0x01C6] */
+	0x00000000, /* [0x01C7] */
+	0x00000000, /* [0x01C8] */
+	0x00000000, /* [0x01C9] */
+	0x00000000, /* [0x01CA] */
+	0x00000000, /* [0x01CB] */
+	0x00000000, /* [0x01CC] */
+	0x70738009, /* [0x01CD] == TYPE7: MEM_TO_MEM (73) == */
+	0xE0000004, /* [0x01CE] */
+	0x00000000, /* [0x01CF] */
+	0x00000000, /* [0x01D0] */
+	0x00000000, /* [0x01D1] */
+	0x00000000, /* [0x01D2] */
+	0x00000000, /* [0x01D3] */
+	0x00000000, /* [0x01D4] */
+	0x00000000, /* [0x01D5] */
+	0x00000000, /* [0x01D6] */
+	0x70B50001, /* [0x01D7] == TYPE7: SET_SUBDRAW_SIZE (35) == */
+	0x00000001, /* [0x01D8] */
+	0x40E78A01, /* [0x01D9] == TYPE4 == */
+	0x000FFFFF, /* [0x01DA] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/
+	0x70268000, /* [0x01DB] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x400E0601, /* [0x01DC] == TYPE4 == */
+	0x00000001, /* [0x01DD] A5X_HLSQ_MODE_CNTL (0x0E06)*/
+	0x706E0004, /* [0x01DE] == TYPE7: COMPUTE_CHECKPOINT (6E) == */
+	0x00000000, /* [0x01DF] */
+	0x00000000, /* [0x01E0] */
+	0x00000018, /* [0x01E1] */
+	0x00000001, /* [0x01E2] */
+	0x40E14001, /* [0x01E3] == TYPE4 == */
+	0x00020000, /* [0x01E4] A5X_RB_CNTL_CTX_0 (0xE140)*/
+	0x40E78A01, /* [0x01E5] == TYPE4 == */
+	0x01F00000, /* [0x01E6] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/
+	0x70268000, /* [0x01E7] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x48E38C01, /* [0x01E8] == TYPE4 == */
+	0xFFFFFFFF, /* [0x01E9] A5X_PC_RESTART_INDEX_CTX_0 (0xE38C)*/
+	0x480D0201, /* [0x01EA] == TYPE4 == */
+	0x0000001F, /* [0x01EB] A5X_PC_MODE_CNTL (0x0D02)*/
+	0x480EC201, /* [0x01EC] == TYPE4 == */
+	0x0000001E, /* [0x01ED] A5X_SP_MODE_CNTL (0x0EC2)*/
+	0x48E58001, /* [0x01EE] == TYPE4 == */
+	0x00000000, /* [0x01EF] A5X_SP_SP_CNTL_CTX_0 (0xE580)*/
+	0x40E2A201, /* [0x01F0] == TYPE4 == */
+	0x00000001, /* [0x01F1] A5X_VPC_SO_OVERRIDE_CTX_0 (0xE2A2)*/
+	0x70640001, /* [0x01F2] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */
+	0x00000001, /* [0x01F3] */
+	0x48E78401, /* [0x01F4] == TYPE4 == */
+	0x00000881, /* [0x01F5] A5X_HLSQ_CNTL_0_CTX_0 (0xE784)*/
+	0x40E5F001, /* [0x01F6] == TYPE4 == */
+	0x00000C06, /* [0x01F7] A5X_SP_CS_CNTL_0_CTX_0 (0xE5F0)*/
+	0x48E79001, /* [0x01F8] == TYPE4 == */
+	0x00000001, /* [0x01F9] A5X_HLSQ_CS_CONFIG_CTX_0 (0xE790)*/
+	0x48E79601, /* [0x01FA] == TYPE4 == */
+	0x00000005, /* [0x01FB] A5X_HLSQ_CS_CNTL_CTX_0 (0xE796)*/
+	0x48E58901, /* [0x01FC] == TYPE4 == */
+	0x00000001, /* [0x01FD] A5X_SP_CS_CONFIG_CTX_0 (0xE589)*/
+	0x40E7DC01, /* [0x01FE] == TYPE4 == */
+	0x00000030, /* [0x01FF] A5X_HLSQ_CONTEXT_SWITCH_CS_SW_3_CTX_0 (0xE7DC)*/
+	0x48E7DD01, /* [0x0200] == TYPE4 == */
+	0x00000002, /* [0x0201] A5X_HLSQ_CONTEXT_SWITCH_CS_SW_4_CTX_0 (0xE7DD)*/
+	0x40E7B001, /* [0x0202] == TYPE4 == */
+	0x00000003, /* [0x0203] A5X_HLSQ_CS_NDRANGE_0_CTX_0 (0xE7B0)*/
+	0x48E7B702, /* [0x0204] == TYPE4 == */
+	0x00FCC0CF, /* [0x0205] A5X_HLSQ_CS_CNTL_0_CTX_0 (0xE7B7)*/
+	0x00000000, /* [0x0206] A5X_HLSQ_CS_CNTL_1_CTX_0 (0xE7B8)*/
+	0x40E7B983, /* [0x0207] == TYPE4 == */
+	0x00000001, /* [0x0208] A5X_HLSQ_CS_KERNEL_GROUP_X_CTX_0 (0xE7B9)*/
+	0x00000001, /* [0x0209] A5X_HLSQ_CS_KERNEL_GROUP_Y_CTX_0 (0xE7BA)*/
+	0x00000001, /* [0x020A] A5X_HLSQ_CS_KERNEL_GROUP_Z_CTX_0 (0xE7BB)*/
+	0x70B08003, /* [0x020B] == TYPE7: LOAD_STATE (30) == */
+	0x00B60000, /* [0x020C] */
+	0x00000000, /* [0x020D] */
+	0x00000000, /* [0x020E] */
+	0x70B08003, /* [0x020F] == TYPE7: LOAD_STATE (30) == */
+	0x01360008, /* [0x0210] */
+	0x00000000, /* [0x0211] */
+	0x00000000, /* [0x0212] */
+	0x70B0000B, /* [0x0213] == TYPE7: LOAD_STATE (30) == */
+	0x00BC0000, /* [0x0214] */
+	0x00000000, /* [0x0215] */
+	0x00000000, /* [0x0216] */
+	0x00000000, /* [0x0217] */
+	0x00000000, /* [0x0218] */
+	0x00000000, /* [0x0219] */
+	0x00000000, /* [0x021A] */
+	0x00000000, /* [0x021B] */
+	0x00000000, /* [0x021C] */
+	0x00000000, /* [0x021D] */
+	0x00000000, /* [0x021E] */
+	0x70B00007, /* [0x021F] == TYPE7: LOAD_STATE (30) == */
+	0x00BC0000, /* [0x0220] */
+	0x00000001, /* [0x0221] */
+	0x00000000, /* [0x0222] */
+	0x00040000, /* [0x0223] */
+	0x00000000, /* [0x0224] */
+	0x00040000, /* [0x0225] */
+	0x00000000, /* [0x0226] */
+	0x70B00007, /* [0x0227] == TYPE7: LOAD_STATE (30) == */
+	0x00BC0000, /* [0x0228] */
+	0x00000002, /* [0x0229] */
+	0x00000000, /* [0x022A] */
+	0x00000000, /* [0x022B] */
+	0x00000000, /* [0x022C] */
+	0x00000000, /* [0x022D] */
+	0x00000000, /* [0x022E] */
+	0x48E7B186, /* [0x022F] == TYPE4 == */
+	0x00000001, /* [0x0230] A5X_HLSQ_CS_NDRANGE_1_CTX_0 (0xE7B1)*/
+	0x00000000, /* [0x0231] A5X_HLSQ_CS_NDRANGE_2_CTX_0 (0xE7B2)*/
+	0x00000001, /* [0x0232] A5X_HLSQ_CS_NDRANGE_3_CTX_0 (0xE7B3)*/
+	0x00000000, /* [0x0233] A5X_HLSQ_CS_NDRANGE_4_CTX_0 (0xE7B4)*/
+	0x00000001, /* [0x0234] A5X_HLSQ_CS_NDRANGE_5_CTX_0 (0xE7B5)*/
+	0x00000000, /* [0x0235] A5X_HLSQ_CS_NDRANGE_6_CTX_0 (0xE7B6)*/
+	0x70B30004, /* [0x0236] == TYPE7: EXEC_CS (33) == */
+	0x00000000, /* [0x0237] */
+	0x00000001, /* [0x0238] */
+	0x00000001, /* [0x0239] */
+	0x00000001, /* [0x023A] */
+	0x480E6201, /* [0x023B] == TYPE4 == */
+	0x00000001, /* [0x023C] A5X_VPC_MODE_CNTL (0x0E62)*/
+};
+
+/*
+ * These are fixups for the addresses _a5xx_critical_pkts[]. The first two
+ * numbers are the dword offsets into the buffer above.  The third enum is a
+ * clue as to which buffer is being patched in and the final number is an offset
+ * in said buffer.
+ */
+static const struct adreno_critical_fixup critical_pkt_fixups[] = {
+	{ 132, 133, 2, 0x0000 },
+	{ 136, 137, 2, 0x0001 },
+	{ 154, 155, 2, 0x0100 },
+	{ 159, 160, 2, 0x0104 },
+	{ 173, 174, 2, 0x0200 },
+	{ 177, 178, 2, 0x0300 },
+	{ 236, 237, 0, 0x0000 },
+	{ 244, 245, 0, 0x0040 },
+	{ 259, 260, 3, 0x0000 },
+	{ 266, 267, 2, 0x0108 },
+	{ 298, 299, 0, 0x0040 },
+	{ 300, 301, 2, 0x0080 },
+	{ 331, 332, 3, 0x02A0 },
+	{ 337, 338, 3, 0x0700 },
+	{ 348, 349, 3, 0x0920 },
+	{ 356, 357, 1, 0x008C },
+	{ 360, 361, 1, 0x0080 },
+	{ 363, 364, 1, 0x008C },
+	{ 366, 367, 0, 0x0100 },
+	{ 370, 371, 0, 0x0120 },
+	{ 381, 382, 1, 0x0480 },
+	{ 384, 385, 1, 0x0400 },
+	{ 398, 399, 3, 0x0920 },
+	{ 413, 414, 1, 0x0080 },
+	{ 417, 418, 1, 0x0300 },
+	{ 424, 425, 3, 0x0880 },
+	{ 428, 429, 1, 0x0300 },
+	{ 430, 431, 1, 0x0300 },
+	{ 438, 439, 1, 0x0300 },
+	{ 446, 447, 1, 0x0300 },
+	{ 453, 454, 1, 0x0320 },
+	{ 455, 456, 1, 0x0300 },
+	{ 457, 458, 1, 0x0304 },
+	{ 459, 460, 1, 0x0308 },
+	{ 463, 464, 1, 0x0320 },
+	{ 465, 466, 1, 0x0300 },
+	{ 467, 468, 1, 0x0304 },
+	{ 469, 470, 1, 0x0308 },
+	{ 525, 526, 1, 0x0160 },
+	{ 529, 530, 1, 0x0101 },
+	{ 535, 536, 1, 0x0140 },
+	{ 539, 540, 0, 0x0800 },
+	{ 555, 556, 1, 0x0140 },
+	{ 557, 558, 0, 0x0800 },
+};
+
+static unsigned int _a5xx_critical_pkts_mem01[] = {
+	0xBECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0x3ECCCCCD,
+	0xBECCCCCD, 0x00000000, 0xBECCCCCD, 0x3ECCCCCD, 0x3ECCCCCD, 0x00000000,
+	0xBECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0xBECCCCCD,
+	0xBECCCCCD, 0x00000000, 0xBECCCCCD, 0xBECCCCCD, 0x3ECCCCCD, 0x00000000,
+	0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000,
+	0x00000000, 0x00000000, 0x00040003, 0x00090005, 0x000B000A, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000003, 0x00000001,
+	0x00000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000003, 0x00000001, 0x00000003, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x3EF5C28F, 0x3ED1EB85, 0x3E6147AE, 0x3F800000,
+	0x3E947AE1, 0x3E6147AE, 0x3D4CCCCD, 0x3F800000, 0x00000000, 0x20554002,
+	0x3F800000, 0x20444003, 0x000000CF, 0x20044904, 0x00000000, 0x00000200,
+	0x00050001, 0x42300001, 0x00000002, 0x20154005, 0x00000020, 0x20244006,
+	0x00000000, 0x00000000, 0x10200001, 0x46500007, 0x20030004, 0x46D00004,
+	0x00000000, 0x20554008, 0x00070001, 0x61830806, 0x00061020, 0x61808001,
+	0x00040000, 0x42380800, 0x00010000, 0x42380800, 0x20040000, 0x46D80800,
+	0x00000000, 0x20154007, 0x20020000, 0x46F80000, 0x00000007, 0x20154001,
+	0x00000000, 0x00000200, 0x60030001, 0x43900004, 0x60030001, 0x43900001,
+	0x00000000, 0x00000400, 0x00013600, 0xC6E20004, 0x40040003, 0x50180104,
+	0x40060003, 0x40180803, 0x00000003, 0x20044006, 0x00000000, 0x00000500,
+	0x00003609, 0xC7260201, 0x00000000, 0x03000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000,
+};
+
+static unsigned int _a5xx_critical_pkts_mem02[] = {
+	0x00000000, 0x03000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x0000000C, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x8ACFE7F3, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+static unsigned int _a5xx_critical_pkts_mem03[] = {
+	0x70438003, /* [0x0000] == TYPE7: SET_DRAW_STATE (43) == */
+	0x0008003A, /* [0x0001] */
+	0x00000000, /* [0x0002] */
+	0x00000000, /* [0x0003] */
+	0x70B08003, /* [0x0004] == TYPE7: LOAD_STATE (30) == */
+	0x00620000, /* [0x0005] */
+	0x00000000, /* [0x0006] */
+	0x00000000, /* [0x0007] */
+	0x40E29801, /* [0x0008] == TYPE4 == */
+	0x0000FFFF, /* [0x0009] A5X_VPC_GS_SIV_CNTL_CTX_0 (0xE298)*/
+	0x48E2A001, /* [0x000A] == TYPE4 == */
+	0x000000FF, /* [0x000B] A5X_VPC_PS_PRIMITIVEID_CNTL_CTX_0 (0xE2A0)*/
+	0x40E40185, /* [0x000C] == TYPE4 == */
+	0x00FCFCFC, /* [0x000D] A5X_VFD_CNTL_1_CTX_0 (0xE401)*/
+	0x0000FCFC, /* [0x000E] A5X_VFD_CNTL_2_CTX_0 (0xE402)*/
+	0x0000FCFC, /* [0x000F] A5X_VFD_CNTL_3_CTX_0 (0xE403)*/
+	0x000000FC, /* [0x0010] A5X_VFD_CNTL_4_CTX_0 (0xE404)*/
+	0x00000000, /* [0x0011] A5X_VFD_CNTL_5_CTX_0 (0xE405)*/
+	0x48E38F01, /* [0x0012] == TYPE4 == */
+	0x00000000, /* [0x0013] A5X_PC_HS_PARAM_CTX_0 (0xE38F)*/
+	0x48E58001, /* [0x0014] == TYPE4 == */
+	0x00000010, /* [0x0015] A5X_SP_SP_CNTL_CTX_0 (0xE580)*/
+	0x40E00001, /* [0x0016] == TYPE4 == */
+	0x00000080, /* [0x0017] A5X_GRAS_CL_CNTL_CTX_0 (0xE000)*/
+	0x40E09583, /* [0x0018] == TYPE4 == */
+	0x00000000, /* [0x0019] A5X_GRAS_SU_POLY_OFFSET_SCALE_CTX_0 (0xE095)*/
+	0x00000000, /* [0x001A] A5X_GRAS_SU_POLY_OFFSET_OFFSET_CTX_0 (0xE096)*/
+	0x00000000, /* [0x001B] A5X_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP_CTX_0
+		     * (0xE097)
+		     */
+	0x40E09001, /* [0x001C] == TYPE4 == */
+	0x00000010, /* [0x001D] A5X_GRAS_SU_CNTL_CTX_0 (0xE090)*/
+	0x40E0AA02, /* [0x001E] == TYPE4 == */
+	0x00000000, /* [0x001F] A5X_GRAS_SC_SCREEN_SCISSOR_TL_0_CTX_0 (0xE0AA)*/
+	0x001F0073, /* [0x0020] A5X_GRAS_SC_SCREEN_SCISSOR_BR_0_CTX_0 (0xE0AB)*/
+	0x48E01086, /* [0x0021] == TYPE4 == */
+	0x42680000, /* [0x0022] A5X_GRAS_CL_VIEWPORT_XOFFSET_0_CTX_0 (0xE010)*/
+	0x42680000, /* [0x0023] A5X_GRAS_CL_VIEWPORT_XSCALE_0_CTX_0 (0xE011)*/
+	0x41800000, /* [0x0024] A5X_GRAS_CL_VIEWPORT_YOFFSET_0_CTX_0 (0xE012)*/
+	0xC1800000, /* [0x0025] A5X_GRAS_CL_VIEWPORT_YSCALE_0_CTX_0 (0xE013)*/
+	0x3EFFFEE0, /* [0x0026] A5X_GRAS_CL_VIEWPORT_ZOFFSET_0_CTX_0 (0xE014)*/
+	0x3EFFFEE0, /* [0x0027] A5X_GRAS_CL_VIEWPORT_ZSCALE_0_CTX_0 (0xE015)*/
+	0x40E0CA02, /* [0x0028] == TYPE4 == */
+	0x00000000, /* [0x0029] A5X_GRAS_SC_VIEWPORT_SCISSOR_TL_0_CTX_0
+		     * (0xE0CA)
+		     */
+	0x001F0073, /* [0x002A] A5X_GRAS_SC_VIEWPORT_SCISSOR_BR_0_CTX_0
+		     * (0xE0CB)
+		     */
+	0x40E00601, /* [0x002B] == TYPE4 == */
+	0x0007FDFF, /* [0x002C] A5X_GRAS_CL_GUARDBAND_CLIP_ADJ_CTX_0 (0xE006)*/
+	0x40E70401, /* [0x002D] == TYPE4 == */
+	0x00000000, /* [0x002E] A5X_TPL1_TP_RAS_MSAA_CNTL_CTX_0 (0xE704)*/
+	0x48E70501, /* [0x002F] == TYPE4 == */
+	0x00000004, /* [0x0030] A5X_TPL1_TP_DEST_MSAA_CNTL_CTX_0 (0xE705)*/
+	0x48E14201, /* [0x0031] == TYPE4 == */
+	0x00000000, /* [0x0032] A5X_RB_RAS_MSAA_CNTL_CTX_0 (0xE142)*/
+	0x40E14301, /* [0x0033] == TYPE4 == */
+	0x00000004, /* [0x0034] A5X_RB_DEST_MSAA_CNTL_CTX_0 (0xE143)*/
+	0x40E78683, /* [0x0035] == TYPE4 == */
+	0xFCFCFCFC, /* [0x0036] A5X_HLSQ_CNTL_2_CTX_0 (0xE786)*/
+	0xFCFCFCFC, /* [0x0037] A5X_HLSQ_CNTL_3_CTX_0 (0xE787)*/
+	0xFCFCFCFC, /* [0x0038] A5X_HLSQ_CNTL_4_CTX_0 (0xE788)*/
+	0x48E0A201, /* [0x0039] == TYPE4 == */
+	0x00000000, /* [0x003A] A5X_GRAS_SC_RAS_MSAA_CNTL_CTX_0 (0xE0A2)*/
+	0x40E0A301, /* [0x003B] == TYPE4 == */
+	0x00000004, /* [0x003C] A5X_GRAS_SC_DEST_MSAA_CNTL_CTX_0 (0xE0A3)*/
+	0x48E14101, /* [0x003D] == TYPE4 == */
+	0x0000C089, /* [0x003E] A5X_RB_RENDER_CNTL_CTX_0 (0xE141)*/
+	0x40E0A001, /* [0x003F] == TYPE4 == */
+	0x00000009, /* [0x0040] A5X_GRAS_SC_CNTL_CTX_0 (0xE0A0)*/
+	0x40E28001, /* [0x0041] == TYPE4 == */
+	0x00010004, /* [0x0042] A5X_VPC_CNTL_0_CTX_0 (0xE280)*/
+	0x40E38401, /* [0x0043] == TYPE4 == */
+	0x00000404, /* [0x0044] A5X_PC_PRIMITIVE_CNTL_CTX_0 (0xE384)*/
+	0x40E78501, /* [0x0045] == TYPE4 == */
+	0x0000003F, /* [0x0046] A5X_HLSQ_CNTL_1_CTX_0 (0xE785)*/
+	0x48E5D301, /* [0x0047] == TYPE4 == */
+	0x00000030, /* [0x0048] A5X_SP_PS_MRT_0_CTX_0 (0xE5D3)*/
+	0x48E5CB01, /* [0x0049] == TYPE4 == */
+	0x00000100, /* [0x004A] A5X_SP_PS_OUTPUT_0_CTX_0 (0xE5CB)*/
+	0x40E5CA01, /* [0x004B] == TYPE4 == */
+	0x001F9F81, /* [0x004C] A5X_SP_PS_OUTPUT_CNTL_CTX_0 (0xE5CA)*/
+	0x40E14601, /* [0x004D] == TYPE4 == */
+	0x00000001, /* [0x004E] A5X_RB_PS_OUTPUT_CNTL_CTX_0 (0xE146)*/
+	0x40E38E01, /* [0x004F] == TYPE4 == */
+	0x00000000, /* [0x0050] A5X_PC_GS_PARAM_CTX_0 (0xE38E)*/
+	0x40E28A01, /* [0x0051] == TYPE4 == */
+	0x00000000, /* [0x0052] A5X_VPC_VARYING_REPLACE_MODE_0_CTX_0 (0xE28A)*/
+	0x48E1A901, /* [0x0053] == TYPE4 == */
+	0xFFFF0100, /* [0x0054] A5X_RB_BLEND_CNTL_CTX_0 (0xE1A9)*/
+	0x40E5C901, /* [0x0055] == TYPE4 == */
+	0x00000100, /* [0x0056] A5X_SP_BLEND_CNTL_CTX_0 (0xE5C9)*/
+	0x40E76401, /* [0x0057] == TYPE4 == */
+	0x00000000, /* [0x0058] A5X_TPL1_TP_PS_ROTATION_CNTL_CTX_0 (0xE764)*/
+	0x48E09401, /* [0x0059] == TYPE4 == */
+	0x00000000, /* [0x005A] A5X_GRAS_SU_DEPTH_PLANE_CNTL_CTX_0 (0xE094)*/
+	0x40E1B001, /* [0x005B] == TYPE4 == */
+	0x00000000, /* [0x005C] A5X_RB_DEPTH_PLANE_CNTL_CTX_0 (0xE1B0)*/
+	0x48E1B101, /* [0x005D] == TYPE4 == */
+	0x00000000, /* [0x005E] A5X_RB_DEPTH_CNTL_CTX_0 (0xE1B1)*/
+	0x48E40001, /* [0x005F] == TYPE4 == */
+	0x00000001, /* [0x0060] A5X_VFD_CNTL_0_CTX_0 (0xE400)*/
+	0x48E40A04, /* [0x0061] == TYPE4 == */
+	0x00000000, /* [0x0062] A5X_VFD_VERTEX_BUFFER_BASE_LO_0_CTX_0 (0xE40A)*/
+	0x00000000, /* [0x0063] A5X_VFD_VERTEX_BUFFER_BASE_HI_0_CTX_0 (0xE40B)*/
+	0x00000078, /* [0x0064] A5X_VFD_VERTEX_BUFFER_SIZE_0_CTX_0 (0xE40C)*/
+	0x00000008, /* [0x0065] A5X_VFD_VERTEX_BUFFER_STRIDE_0_CTX_0 (0xE40D)*/
+	0x40E48A02, /* [0x0066] == TYPE4 == */
+	0xC6700000, /* [0x0067] A5X_VFD_FETCH_INSTR_0_CTX_0 (0xE48A)*/
+	0x00000001, /* [0x0068] A5X_VFD_FETCH_INSTR_STEP_RATE_0_CTX_0 (0xE48B)*/
+	0x48E4CA01, /* [0x0069] == TYPE4 == */
+	0x0000000F, /* [0x006A] A5X_VFD_DEST_CNTL_0_CTX_0 (0xE4CA)*/
+	0x48E10001, /* [0x006B] == TYPE4 == */
+	0x00000008, /* [0x006C] A5X_GRAS_LRZ_CNTL_CTX_0 (0xE100)*/
+	0x48E0A101, /* [0x006D] == TYPE4 == */
+	0x00000004, /* [0x006E] A5X_GRAS_SC_BIN_CNTL_CTX_0 (0xE0A1)*/
+	0x40E10185, /* [0x006F] == TYPE4 == */
+	0x00000000, /* [0x0070] A5X_GRAS_LRZ_BUFFER_BASE_LO_CTX_0 (0xE101)*/
+	0x00000000, /* [0x0071] A5X_GRAS_LRZ_BUFFER_BASE_HI_CTX_0 (0xE102)*/
+	0x00000001, /* [0x0072] A5X_GRAS_LRZ_BUFFER_PITCH_CTX_0 (0xE103)*/
+	0x00000000, /* [0x0073] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO_CTX_0
+		     * (0xE104)
+		     */
+	0x00000000, /* [0x0074] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI_CTX_0
+		     * (0xE105)
+		     */
+	0x70388003, /* [0x0075] == TYPE7: DRAW_INDX_OFFSET (38) == */
+	0x00200884, /* [0x0076] */
+	0x00000001, /* [0x0077] */
+	0x00000003, /* [0x0078] */
+	0x70380007, /* [0x0079] == TYPE7: DRAW_INDX_OFFSET (38) == */
+	0x00200404, /* [0x007A] */
+	0x00000001, /* [0x007B] */
+	0x00000003, /* [0x007C] */
+	0x00000000, /* [0x007D] */
+	0x00000000, /* [0x007E] */
+	0x00000000, /* [0x007F] */
+	0x00000006, /* [0x0080] */
+	0x70460004, /* [0x0081] == TYPE7: EVENT_WRITE (46) == */
+	0x00000004, /* [0x0082] */
+	0x00000000, /* [0x0083] */
+	0x00000000, /* [0x0084] */
+	0x00000001, /* [0x0085] */
+	0x70268000, /* [0x0086] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x70A88003, /* [0x0087] == TYPE7: DRAW_INDIRECT (28) == */
+	0x00200884, /* [0x0088] */
+	0x00000000, /* [0x0089] */
+	0x00000000, /* [0x008A] */
+	0x70460004, /* [0x008B] == TYPE7: EVENT_WRITE (46) == */
+	0x00000004, /* [0x008C] */
+	0x00000000, /* [0x008D] */
+	0x00000000, /* [0x008E] */
+	0x00000001, /* [0x008F] */
+	0x70268000, /* [0x0090] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x70298006, /* [0x0091] == TYPE7: DRAW_INDX_INDIRECT (29) == */
+	0x00200404, /* [0x0092] */
+	0x00000000, /* [0x0093] */
+	0x00000000, /* [0x0094] */
+	0x00000006, /* [0x0095] */
+	0x00000000, /* [0x0096] */
+	0x00000000, /* [0x0097] */
+	0x40E40801, /* [0x0098] == TYPE4 == */
+	0x0000000D, /* [0x0099] A5X_VFD_INDEX_OFFSET_CTX_0 (0xE408)*/
+	0x48E40901, /* [0x009A] == TYPE4 == */
+	0x00000000, /* [0x009B] A5X_VFD_INSTANCE_START_OFFSET_CTX_0 (0xE409)*/
+	0x70388003, /* [0x009C] == TYPE7: DRAW_INDX_OFFSET (38) == */
+	0x00200884, /* [0x009D] */
+	0x00000001, /* [0x009E] */
+	0x00000003, /* [0x009F] */
+	0x00000000, /* [0x00A0] */
+	0x00000000, /* [0x00A1] */
+	0x00000000, /* [0x00A2] */
+	0x00000000, /* [0x00A3] */
+	0x00000000, /* [0x00A4] */
+	0x00000000, /* [0x00A5] */
+	0x00000000, /* [0x00A6] */
+	0x00000000, /* [0x00A7] */
+	0x48E78401, /* [0x00A8] */
+	0x00000881, /* [0x00A9] */
+	0x40E5C001, /* [0x00AA] */
+	0x0004001E, /* [0x00AB] */
+	0x70438003, /* [0x00AC] */
+	0x0000003A, /* [0x00AD] */
+	0x00000000, /* [0x00AE] */
+	0x00000000, /* [0x00AF] */
+	0x70B00023, /* [0x00B0] */
+	0x00600000, /* [0x00B1] */
+	0x00000000, /* [0x00B2] */
+	0x00000000, /* [0x00B3] */
+	0x00000000, /* [0x00B4] */
+	0x03000000, /* [0x00B5] */
+	0x00000000, /* [0x00B6] */
+	0x00000000, /* [0x00B7] */
+	0x00000000, /* [0x00B8] */
+	0x00000000, /* [0x00B9] */
+	0x00000000, /* [0x00BA] */
+	0x00000000, /* [0x00BB] */
+	0x00000000, /* [0x00BC] */
+	0x00000000, /* [0x00BD] */
+	0x00000000, /* [0x00BE] */
+	0x00000000, /* [0x00BF] */
+	0x00000000, /* [0x00C0] */
+	0x00000000, /* [0x00C1] */
+	0x00000000, /* [0x00C2] */
+	0x00000000, /* [0x00C3] */
+	0x00000000, /* [0x00C4] */
+	0x00000000, /* [0x00C5] */
+	0x00000000, /* [0x00C6] */
+	0x00000000, /* [0x00C7] */
+	0x00000000, /* [0x00C8] */
+	0x00000000, /* [0x00C9] */
+	0x00000000, /* [0x00CA] */
+	0x00000000, /* [0x00CB] */
+	0x00000000, /* [0x00CC] */
+	0x00000000, /* [0x00CD] */
+	0x00000000, /* [0x00CE] */
+	0x00000000, /* [0x00CF] */
+	0x00000000, /* [0x00D0] */
+	0x00000000, /* [0x00D1] */
+	0x00000000, /* [0x00D2] */
+	0x00000000, /* [0x00D3] */
+	0x40E09301, /* [0x00D4] */
+	0x00000000, /* [0x00D5] */
+	0x40E38D01, /* [0x00D6] */
+	0x00000000, /* [0x00D7] */
+	0x40E29801, /* [0x00D8] */
+	0x0000FFFF, /* [0x00D9] */
+	0x48E28201, /* [0x00DA] */
+	0xEAEAEAEA, /* [0x00DB] */
+	0x40E29404, /* [0x00DC] */
+	0xFFFFFFFF, /* [0x00DD] */
+	0xFFFFFFFF, /* [0x00DE] */
+	0xFFFFFFFF, /* [0x00DF] */
+	0xFFFFFFFF, /* [0x00E0] */
+	0x40E5DB01, /* [0x00E1] */
+	0x00000000, /* [0x00E2] */
+	0x48E14701, /* [0x00E3] */
+	0x0000000F, /* [0x00E4] */
+	0x70B00023, /* [0x00E5] */
+	0x00700000, /* [0x00E6] */
+	0x00000000, /* [0x00E7] */
+	0x00000000, /* [0x00E8] */
+	0x00003C00, /* [0x00E9] */
+	0x20400000, /* [0x00EA] */
+	0x00000000, /* [0x00EB] */
+	0x20400001, /* [0x00EC] */
+	0x00000000, /* [0x00ED] */
+	0x20400002, /* [0x00EE] */
+	0x00003C00, /* [0x00EF] */
+	0x20400003, /* [0x00F0] */
+	0x00000000, /* [0x00F1] */
+	0x03000000, /* [0x00F2] */
+	0x00000000, /* [0x00F3] */
+	0x00000000, /* [0x00F4] */
+	0x00000000, /* [0x00F5] */
+	0x00000000, /* [0x00F6] */
+	0x00000000, /* [0x00F7] */
+	0x00000000, /* [0x00F8] */
+	0x00000000, /* [0x00F9] */
+	0x00000000, /* [0x00FA] */
+	0x00000000, /* [0x00FB] */
+	0x00000000, /* [0x00FC] */
+	0x00000000, /* [0x00FD] */
+	0x00000000, /* [0x00FE] */
+	0x00000000, /* [0x00FF] */
+	0x00000000, /* [0x0100] */
+	0x00000000, /* [0x0101] */
+	0x00000000, /* [0x0102] */
+	0x00000000, /* [0x0103] */
+	0x00000000, /* [0x0104] */
+	0x00000000, /* [0x0105] */
+	0x00000000, /* [0x0106] */
+	0x00000000, /* [0x0107] */
+	0x00000000, /* [0x0108] */
+	0x48E2A001, /* [0x0109] */
+	0x000000FF, /* [0x010A] */
+	0x40E40185, /* [0x010B] */
+	0x00FCFCFC, /* [0x010C] */
+	0x0000FCFC, /* [0x010D] */
+	0x0000FCFC, /* [0x010E] */
+	0x000000FC, /* [0x010F] */
+	0x00000000, /* [0x0110] */
+	0x48E38F01, /* [0x0111] */
+	0x00000000, /* [0x0112] */
+	0x48E58001, /* [0x0113] */
+	0x00000010, /* [0x0114] */
+	0x40E1A801, /* [0x0115] */
+	0x00000E00, /* [0x0116] */
+	0x48E15001, /* [0x0117] */
+	0x000007E0, /* [0x0118] */
+	0x40E15101, /* [0x0119] */
+	0x00000000, /* [0x011A] */
+	0x40E00001, /* [0x011B] */
+	0x00000080, /* [0x011C] */
+	0x40E09583, /* [0x011D] */
+	0x00000000, /* [0x011E] */
+	0x00000000, /* [0x011F] */
+	0x00000000, /* [0x0120] */
+	0x40E09001, /* [0x0121] */
+	0x00000010, /* [0x0122] */
+	0x40E0AA02, /* [0x0123] */
+	0x00000000, /* [0x0124] */
+	0x001F0073, /* [0x0125] */
+	0x48E01086, /* [0x0126] */
+	0x42680000, /* [0x0127] */
+	0x42680000, /* [0x0128] */
+	0x41800000, /* [0x0129] */
+	0xC1800000, /* [0x012A] */
+	0x3EFFFEE0, /* [0x012B] */
+	0x3EFFFEE0, /* [0x012C] */
+	0x40E0CA02, /* [0x012D] */
+	0x00000000, /* [0x012E] */
+	0x001F0073, /* [0x012F] */
+	0x40E00601, /* [0x0130] */
+	0x0007FDFF, /* [0x0131] */
+	0x40E70401, /* [0x0132] */
+	0x00000000, /* [0x0133] */
+	0x48E70501, /* [0x0134] */
+	0x00000004, /* [0x0135] */
+	0x48E14201, /* [0x0136] */
+	0x00000000, /* [0x0137] */
+	0x40E14301, /* [0x0138] */
+	0x00000004, /* [0x0139] */
+	0x40E78683, /* [0x013A] */
+	0xFCFCFCFC, /* [0x013B] */
+	0xFCFCFCFC, /* [0x013C] */
+	0xFCFCFCFC, /* [0x013D] */
+	0x48E0A201, /* [0x013E] */
+	0x00000000, /* [0x013F] */
+	0x40E0A301, /* [0x0140] */
+	0x00000004, /* [0x0141] */
+	0x48E1B285, /* [0x0142] */
+	0x00000001, /* [0x0143] */
+	0x00004000, /* [0x0144] */
+	0x00000000, /* [0x0145] */
+	0x00000004, /* [0x0146] */
+	0x000000C0, /* [0x0147] */
+	0x48E09801, /* [0x0148] */
+	0x00000001, /* [0x0149] */
+	0x48E00401, /* [0x014A] */
+	0x00000000, /* [0x014B] */
+	0x480CDD02, /* [0x014C] */
+	0x00200074, /* [0x014D] */
+	0x00000000, /* [0x014E] */
+	0x40E15285, /* [0x014F] */
+	0x00000A30, /* [0x0150] */
+	0x00000008, /* [0x0151] */
+	0x00000100, /* [0x0152] */
+	0x00000000, /* [0x0153] */
+	0x00000000, /* [0x0154] */
+	0x48E14101, /* [0x0155] */
+	0x0000C008, /* [0x0156] */
+	0x40E0A001, /* [0x0157] */
+	0x00000008, /* [0x0158] */
+	0x40E28001, /* [0x0159] */
+	0x00010004, /* [0x015A] */
+	0x40E38401, /* [0x015B] */
+	0x00000404, /* [0x015C] */
+	0x40E78501, /* [0x015D] */
+	0x0000003F, /* [0x015E] */
+	0x48E5D301, /* [0x015F] */
+	0x00000030, /* [0x0160] */
+	0x48E5CB01, /* [0x0161] */
+	0x00000100, /* [0x0162] */
+	0x40E5CA01, /* [0x0163] */
+	0x001F9F81, /* [0x0164] */
+	0x40E14601, /* [0x0165] */
+	0x00000001, /* [0x0166] */
+	0x40E38E01, /* [0x0167] */
+	0x00000000, /* [0x0168] */
+	0x40E28A01, /* [0x0169] */
+	0x00000000, /* [0x016A] */
+	0x48E1A901, /* [0x016B] */
+	0xFFFF0100, /* [0x016C] */
+	0x40E5C901, /* [0x016D] */
+	0x00000100, /* [0x016E] */
+	0x40E76401, /* [0x016F] */
+	0x00000000, /* [0x0170] */
+	0x48E09401, /* [0x0171] */
+	0x00000000, /* [0x0172] */
+	0x40E1B001, /* [0x0173] */
+	0x00000000, /* [0x0174] */
+	0x48E1B101, /* [0x0175] */
+	0x00000006, /* [0x0176] */
+	0x48E40001, /* [0x0177] */
+	0x00000001, /* [0x0178] */
+	0x48E40A04, /* [0x0179] */
+	0x00000000, /* [0x017A] */
+	0x00000000, /* [0x017B] */
+	0x00000078, /* [0x017C] */
+	0x00000008, /* [0x017D] */
+	0x40E48A02, /* [0x017E] */
+	0xC6700000, /* [0x017F] */
+	0x00000001, /* [0x0180] */
+	0x48E4CA01, /* [0x0181] */
+	0x0000000F, /* [0x0182] */
+	0x48E10001, /* [0x0183] */
+	0x00000008, /* [0x0184] */
+	0x48E0A101, /* [0x0185] */
+	0x00000000, /* [0x0186] */
+	0x40E10185, /* [0x0187] */
+	0x00000000, /* [0x0188] */
+	0x00000000, /* [0x0189] */
+	0x00000001, /* [0x018A] */
+	0x00000000, /* [0x018B] */
+	0x00000000, /* [0x018C] */
+	0x70230001, /* [0x018D] */
+	0x00000000, /* [0x018E] */
+	0x70388003, /* [0x018F] */
+	0x00200984, /* [0x0190] */
+	0x00000001, /* [0x0191] */
+	0x00000003, /* [0x0192] */
+	0x70380007, /* [0x0193] */
+	0x00200504, /* [0x0194] */
+	0x00000001, /* [0x0195] */
+	0x00000003, /* [0x0196] */
+	0x00000000, /* [0x0197] */
+	0x00000000, /* [0x0198] */
+	0x00000000, /* [0x0199] */
+	0x00000006, /* [0x019A] */
+	0x70460004, /* [0x019B] */
+	0x00000004, /* [0x019C] */
+	0x00000000, /* [0x019D] */
+	0x00000000, /* [0x019E] */
+	0x00000000, /* [0x019F] */
+	0x70268000, /* [0x01A0] */
+	0x70A88003, /* [0x01A1] */
+	0x00200984, /* [0x01A2] */
+	0x00000000, /* [0x01A3] */
+	0x00000000, /* [0x01A4] */
+	0x70460004, /* [0x01A5] */
+	0x00000004, /* [0x01A6] */
+	0x00000000, /* [0x01A7] */
+	0x00000000, /* [0x01A8] */
+	0x00000001, /* [0x01A9] */
+	0x70268000, /* [0x01AA] */
+	0x70298006, /* [0x01AB] */
+	0x00200504, /* [0x01AC] */
+	0x00000000, /* [0x01AD] */
+	0x00000000, /* [0x01AE] */
+	0x00000006, /* [0x01AF] */
+	0x00000000, /* [0x01B0] */
+	0x00000000, /* [0x01B1] */
+	0x40E40801, /* [0x01B2] */
+	0x0000000D, /* [0x01B3] */
+	0x48E40901, /* [0x01B4] */
+	0x00000000, /* [0x01B5] */
+	0x70388003, /* [0x01B6] */
+	0x00200984, /* [0x01B7] */
+	0x00000001, /* [0x01B8] */
+	0x00000003, /* [0x01B9] */
+	0x00000000, /* [0x01BA] */
+	0x00000000, /* [0x01BB] */
+	0x00000000, /* [0x01BC] */
+	0x00000000, /* [0x01BD] */
+	0x00000000, /* [0x01BE] */
+	0x00000000, /* [0x01BF] */
+	0x70EA0001, /* [0x01C0] */
+	0x00000000, /* [0x01C1] */
+	0x40E78A01, /* [0x01C2] */
+	0x000FFFFF, /* [0x01C3] */
+	0x40E09001, /* [0x01C4] */
+	0x00000000, /* [0x01C5] */
+	0x40E00501, /* [0x01C6] */
+	0x00000000, /* [0x01C7] */
+	0x40E00001, /* [0x01C8] */
+	0x00000181, /* [0x01C9] */
+	0x48E10001, /* [0x01CA] */
+	0x00000000, /* [0x01CB] */
+	0x40E21385, /* [0x01CC] */
+	0x00000004, /* [0x01CD] */
+	0x00000000, /* [0x01CE] */
+	0x00000000, /* [0x01CF] */
+	0x00000001, /* [0x01D0] */
+	0x00000001, /* [0x01D1] */
+	0x40E21C01, /* [0x01D2] */
+	0x00000000, /* [0x01D3] */
+	0x40E21001, /* [0x01D4] */
+	0x00000000, /* [0x01D5] */
+	0x70460004, /* [0x01D6] */
+	0x0000001E, /* [0x01D7] */
+	0x00000000, /* [0x01D8] */
+	0x00000000, /* [0x01D9] */
+	0x00000001, /* [0x01DA] */
+	0x00000000, /* [0x01DB] */
+	0x00000000, /* [0x01DC] */
+	0x00000000, /* [0x01DD] */
+	0x00000000, /* [0x01DE] */
+	0x00000000, /* [0x01DF] */
+	0x40E78A01, /* [0x01E0] */
+	0x020FFFFF, /* [0x01E1] */
+	0x48E78B85, /* [0x01E2] */
+	0x00000001, /* [0x01E3] */
+	0x00003F05, /* [0x01E4] */
+	0x00003F04, /* [0x01E5] */
+	0x00003F04, /* [0x01E6] */
+	0x00003F04, /* [0x01E7] */
+	0x48E79001, /* [0x01E8] */
+	0x00000000, /* [0x01E9] */
+	0x40E79101, /* [0x01EA] */
+	0x00000002, /* [0x01EB] */
+	0x40E79201, /* [0x01EC] */
+	0x00000002, /* [0x01ED] */
+	0x40E58485, /* [0x01EE] */
+	0x00000001, /* [0x01EF] */
+	0x00003F05, /* [0x01F0] */
+	0x00003F04, /* [0x01F1] */
+	0x00003F04, /* [0x01F2] */
+	0x00003F04, /* [0x01F3] */
+	0x48E58901, /* [0x01F4] */
+	0x00000000, /* [0x01F5] */
+	0x48E7C302, /* [0x01F6] */
+	0x00000002, /* [0x01F7] */
+	0x00000001, /* [0x01F8] */
+	0x48E7D702, /* [0x01F9] */
+	0x00000002, /* [0x01FA] */
+	0x00000001, /* [0x01FB] */
+	0x40E7C802, /* [0x01FC] */
+	0x00000000, /* [0x01FD] */
+	0x00000000, /* [0x01FE] */
+	0x40E7CD02, /* [0x01FF] */
+	0x00000000, /* [0x0200] */
+	0x00000000, /* [0x0201] */
+	0x48E7D202, /* [0x0202] */
+	0x00000000, /* [0x0203] */
+	0x00000000, /* [0x0204] */
+	0x40E7DC02, /* [0x0205] */
+	0x00000000, /* [0x0206] */
+	0x00000000, /* [0x0207] */
+	0x48E38901, /* [0x0208] */
+	0x00000000, /* [0x0209] */
+	0x48E29A01, /* [0x020A] */
+	0x00FFFF00, /* [0x020B] */
+	0x48E00101, /* [0x020C] */
+	0x00000000, /* [0x020D] */
+	0x40E29D01, /* [0x020E] */
+	0x0000FF00, /* [0x020F] */
+	0x40E59001, /* [0x0210] */
+	0x00000406, /* [0x0211] */
+	0x48E59201, /* [0x0212] */
+	0x00000001, /* [0x0213] */
+	0x40E59301, /* [0x0214] */
+	0x00000F00, /* [0x0215] */
+	0x40E5A301, /* [0x0216] */
+	0x00000000, /* [0x0217] */
+	0x48E38501, /* [0x0218] */
+	0x00000000, /* [0x0219] */
+	0x00000000, /* [0x021A] */
+	0x00000000, /* [0x021B] */
+	0x00000000, /* [0x021C] */
+	0x00000000, /* [0x021D] */
+	0x00000000, /* [0x021E] */
+	0x00000000, /* [0x021F] */
+	0x48210001, /* [0x0220] */
+	0x86000000, /* [0x0221] */
+	0x40218001, /* [0x0222] */
+	0x86000000, /* [0x0223] */
+	0x40211089, /* [0x0224] */
+	0x00001331, /* [0x0225] */
+	0x00000000, /* [0x0226] */
+	0x00000000, /* [0x0227] */
+	0x00020001, /* [0x0228] */
+	0x00000000, /* [0x0229] */
+	0x00000000, /* [0x022A] */
+	0x00000000, /* [0x022B] */
+	0x00000000, /* [0x022C] */
+	0x00000000, /* [0x022D] */
+	0x48218201, /* [0x022E] */
+	0x00001331, /* [0x022F] */
+	0x40214383, /* [0x0230] */
+	0x00000000, /* [0x0231] */
+	0x00000000, /* [0x0232] */
+	0x00000001, /* [0x0233] */
+	0x40210789, /* [0x0234] */
+	0x00000021, /* [0x0235] */
+	0x00000000, /* [0x0236] */
+	0x00000000, /* [0x0237] */
+	0x00020001, /* [0x0238] */
+	0x00000000, /* [0x0239] */
+	0x00000000, /* [0x023A] */
+	0x00000000, /* [0x023B] */
+	0x00000000, /* [0x023C] */
+	0x00000000, /* [0x023D] */
+	0x48218101, /* [0x023E] */
+	0x00000021, /* [0x023F] */
+	0x48218401, /* [0x0240] */
+	0x00000001, /* [0x0241] */
+	0x702C8005, /* [0x0242] */
+	0x00000002, /* [0x0243] */
+	0x00000000, /* [0x0244] */
+	0x00010001, /* [0x0245] */
+	0x00000000, /* [0x0246] */
+	0x00010001, /* [0x0247] */
+	0x70B00023, /* [0x0248] */
+	0x00600000, /* [0x0249] */
+	0x00000000, /* [0x024A] */
+	0x00000000, /* [0x024B] */
+	0x00000000, /* [0x024C] */
+	0x03000000, /* [0x024D] */
+	0x00000000, /* [0x024E] */
+	0x00000000, /* [0x024F] */
+	0x00000000, /* [0x0250] */
+	0x00000000, /* [0x0251] */
+	0x00000000, /* [0x0252] */
+	0x00000000, /* [0x0253] */
+	0x00000000, /* [0x0254] */
+	0x00000000, /* [0x0255] */
+	0x00000000, /* [0x0256] */
+	0x00000000, /* [0x0257] */
+	0x00000000, /* [0x0258] */
+	0x00000000, /* [0x0259] */
+	0x00000000, /* [0x025A] */
+	0x00000000, /* [0x025B] */
+	0x00000000, /* [0x025C] */
+	0x00000000, /* [0x025D] */
+	0x00000000, /* [0x025E] */
+	0x00000000, /* [0x025F] */
+	0x00000000, /* [0x0260] */
+	0x00000000, /* [0x0261] */
+	0x00000000, /* [0x0262] */
+	0x00000000, /* [0x0263] */
+	0x00000000, /* [0x0264] */
+	0x00000000, /* [0x0265] */
+	0x00000000, /* [0x0266] */
+	0x00000000, /* [0x0267] */
+	0x00000000, /* [0x0268] */
+	0x00000000, /* [0x0269] */
+	0x00000000, /* [0x026A] */
+	0x00000000, /* [0x026B] */
+	0x40E09301, /* [0x026C] */
+	0x00000000, /* [0x026D] */
+	0x40E38D01, /* [0x026E] */
+	0x00000000, /* [0x026F] */
+	0x40E29801, /* [0x0270] */
+	0x0000FFFF, /* [0x0271] */
+	0x48E28201, /* [0x0272] */
+	0xEAEAEAEA, /* [0x0273] */
+	0x40E29404, /* [0x0274] */
+	0xFFFFFFFF, /* [0x0275] */
+	0xFFFFFFFF, /* [0x0276] */
+	0xFFFFFFFF, /* [0x0277] */
+	0xFFFFFFFF, /* [0x0278] */
+	0x40E5DB01, /* [0x0279] */
+	0x00000000, /* [0x027A] */
+	0x48E14701, /* [0x027B] */
+	0x0000000F, /* [0x027C] */
+	0x70B00023, /* [0x027D] */
+	0x00700000, /* [0x027E] */
+	0x00000000, /* [0x027F] */
+	0x00000000, /* [0x0280] */
+	0x00003C00, /* [0x0281] */
+	0x20400000, /* [0x0282] */
+	0x00000000, /* [0x0283] */
+	0x20400001, /* [0x0284] */
+	0x00000000, /* [0x0285] */
+	0x20400002, /* [0x0286] */
+	0x00003C00, /* [0x0287] */
+	0x20400003, /* [0x0288] */
+	0x00000000, /* [0x0289] */
+	0x03000000, /* [0x028A] */
+	0x00000000, /* [0x028B] */
+	0x00000000, /* [0x028C] */
+	0x00000000, /* [0x028D] */
+	0x00000000, /* [0x028E] */
+	0x00000000, /* [0x028F] */
+	0x00000000, /* [0x0290] */
+	0x00000000, /* [0x0291] */
+	0x00000000, /* [0x0292] */
+	0x00000000, /* [0x0293] */
+	0x00000000, /* [0x0294] */
+	0x00000000, /* [0x0295] */
+	0x00000000, /* [0x0296] */
+	0x00000000, /* [0x0297] */
+	0x00000000, /* [0x0298] */
+	0x00000000, /* [0x0299] */
+	0x00000000, /* [0x029A] */
+	0x00000000, /* [0x029B] */
+	0x00000000, /* [0x029C] */
+	0x00000000, /* [0x029D] */
+	0x00000000, /* [0x029E] */
+	0x00000000, /* [0x029F] */
+	0x00000000, /* [0x02A0] */
+};
+
+/* Fixups for the IBs in _a5xx_critical_pkts_mem03 */
+static const struct adreno_critical_fixup critical_pkt_mem03_fixups[] = {
+	{ 2, 3, 3, 0x0780 },
+	{ 6, 7, 2, 0x0000 },
+	{ 98, 99, 1, 0x0000 },
+	{ 112, 113, 1, 0x0480 },
+	{ 115, 116, 1, 0x0400 },
+	{ 126, 127, 1, 0x0080 },
+	{ 131, 132, 2, 0x0108 },
+	{ 137, 138, 1, 0x00A0 },
+	{ 141, 142, 2, 0x0108 },
+	{ 147, 148, 1, 0x0080 },
+	{ 150, 151, 1, 0x00C0 },
+	{ 174, 175, 3, 0x0780 },
+	{ 378, 379, 1, 0x0000 },
+	{ 392, 393, 1, 0x0480 },
+	{ 395, 396, 1, 0x0400 },
+	{ 408, 409, 1, 0x0080 },
+	{ 413, 414, 2, 0x0108 },
+	{ 419, 420, 1, 0x00A0 },
+	{ 423, 424, 2, 0x0108 },
+	{ 429, 430, 1, 0x0080 },
+	{ 432, 433, 1, 0x00C0 },
+	{ 462, 463, 0, 0x0700 },
+	{ 472, 473, 2, 0x0110 },
+	{ 550, 551, 1, 0x0500 },
+	{ 561, 562, 1, 0x0600 },
+	{ 566, 567, 1, 0x0700 },
+};

+ 695 - 0
qcom/opensource/graphics-kernel/adreno_a5xx_perfcounter.c

@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a5xx.h"
+#include "adreno_perfcounter.h"
+#include "adreno_pm4types.h"
+#include "kgsl_device.h"
+
+#define VBIF2_PERF_CNT_SEL_MASK 0x7F
+/* offset of clear register from select register */
+#define VBIF2_PERF_CLR_REG_SEL_OFF 8
+/* offset of enable register from select register */
+#define VBIF2_PERF_EN_REG_SEL_OFF 16
+/* offset of clear register from the enable register */
+#define VBIF2_PERF_PWR_CLR_REG_EN_OFF 8
+
+static void a5xx_counter_load(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_register *reg)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int index = reg->load_bit / 32;
+	u32 enable = BIT(reg->load_bit & 31);
+
+	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_LOAD_VALUE_LO,
+		lower_32_bits(reg->value));
+
+	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_LOAD_VALUE_HI,
+		upper_32_bits(reg->value));
+
+	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_LOAD_CMD0 + index, enable);
+}
+
+static u64 a5xx_counter_read_norestore(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 hi, lo;
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	return ((((u64) hi) << 32) | lo) + reg->value;
+}
+
+static int a5xx_counter_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	kgsl_regwrite(device, reg->select, countable);
+	reg->value = 0;
+
+	return 0;
+}
+
+static int a5xx_counter_inline_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0];
+	u32 cmds[3];
+	int ret;
+
+	if (!(device->state == KGSL_STATE_ACTIVE))
+		return a5xx_counter_enable(adreno_dev, group, counter,
+			countable);
+
+	cmds[0]  = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+	cmds[1] = cp_type4_packet(reg->select, 1);
+	cmds[2] = countable;
+
+	/* submit to highest priority RB always */
+	ret = a5xx_ringbuffer_addcmds(adreno_dev, rb, NULL,
+		F_NOTPROTECTED, cmds, 3, 0, NULL);
+
+	if (ret)
+		return ret;
+
+	/*
+	 * schedule dispatcher to make sure rb[0] is run, because
+	 * if the current RB is not rb[0] and gpu is idle then
+	 * rb[0] will not get scheduled to run
+	 */
+	if (adreno_dev->cur_rb != rb)
+		adreno_dispatcher_schedule(device);
+
+	/* wait for the above commands submitted to complete */
+	ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp,
+		ADRENO_IDLE_TIMEOUT);
+
+	if (ret) {
+		/*
+		 * If we were woken up because of cancelling rb events
+		 * either due to soft reset or adreno_stop, ignore the
+		 * error and return 0 here. The perfcounter is already
+		 * set up in software and it will be programmed in
+		 * hardware when we wake up or come up after soft reset,
+		 * by adreno_perfcounter_restore.
+		 */
+		if (ret == -EAGAIN)
+			ret = 0;
+		else
+			dev_err(device->dev,
+				     "Perfcounter %s/%u/%u start via commands failed %d\n",
+				     group->name, counter, countable, ret);
+	}
+
+	if (!ret)
+		reg->value = 0;
+
+	return ret;
+}
+
+static int a5xx_counter_rbbm_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	if (adreno_is_a540(adreno_dev) && countable == A5XX_RBBM_ALWAYS_COUNT)
+		return -EINVAL;
+
+	return a5xx_counter_inline_enable(adreno_dev, group, counter,
+			countable);
+}
+
+static u64 a5xx_counter_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32  hi, lo;
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	return (((u64) hi) << 32) | lo;
+}
+
+static int a5xx_counter_vbif_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	if (countable > VBIF2_PERF_CNT_SEL_MASK)
+		return -EINVAL;
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 1);
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 0);
+	kgsl_regwrite(device,
+		reg->select, countable & VBIF2_PERF_CNT_SEL_MASK);
+	/* enable reg is 8 DWORDS before select reg */
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1);
+
+	kgsl_regwrite(device, reg->select, countable);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int a5xx_counter_vbif_pwr_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regwrite(device, reg->select +
+		VBIF2_PERF_PWR_CLR_REG_EN_OFF, 1);
+	kgsl_regwrite(device, reg->select +
+		VBIF2_PERF_PWR_CLR_REG_EN_OFF, 0);
+	kgsl_regwrite(device, reg->select, 1);
+
+	reg->value = 0;
+
+	return 0;
+}
+
+static int a5xx_counter_alwayson_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	return 0;
+}
+
+static u64 a5xx_counter_alwayson_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	return a5xx_read_alwayson(adreno_dev) + reg->value;
+}
+
+static int a5xx_counter_pwr_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	kgsl_regwrite(device, reg->select, countable);
+	kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int a5xx_counter_pwr_gpmu_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	unsigned int shift = (counter << 3) % (sizeof(unsigned int) * 8);
+
+	if (adreno_is_a530(adreno_dev)) {
+		if (countable > 43)
+			return -EINVAL;
+	} else if (adreno_is_a540(adreno_dev)) {
+		if (countable > 47)
+			return -EINVAL;
+	}
+
+	kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift);
+	kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int a5xx_counter_pwr_alwayson_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	kgsl_regwrite(device, A5XX_GPMU_ALWAYS_ON_COUNTER_RESET, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static struct adreno_perfcount_register a5xx_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_0_LO,
+		A5XX_RBBM_PERFCTR_CP_0_HI, 0, A5XX_CP_PERFCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_1_LO,
+		A5XX_RBBM_PERFCTR_CP_1_HI, 1, A5XX_CP_PERFCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_2_LO,
+		A5XX_RBBM_PERFCTR_CP_2_HI, 2, A5XX_CP_PERFCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_3_LO,
+		A5XX_RBBM_PERFCTR_CP_3_HI, 3, A5XX_CP_PERFCTR_CP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_4_LO,
+		A5XX_RBBM_PERFCTR_CP_4_HI, 4, A5XX_CP_PERFCTR_CP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_5_LO,
+		A5XX_RBBM_PERFCTR_CP_5_HI, 5, A5XX_CP_PERFCTR_CP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_6_LO,
+		A5XX_RBBM_PERFCTR_CP_6_HI, 6, A5XX_CP_PERFCTR_CP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_7_LO,
+		A5XX_RBBM_PERFCTR_CP_7_HI, 7, A5XX_CP_PERFCTR_CP_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_rbbm[] = {
+	/*
+	 * A5XX_RBBM_PERFCTR_RBBM_0 is used for frequency scaling and omitted
+	 * from the poool of available counters
+	 */
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_1_LO,
+		A5XX_RBBM_PERFCTR_RBBM_1_HI, 9, A5XX_RBBM_PERFCTR_RBBM_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_2_LO,
+		A5XX_RBBM_PERFCTR_RBBM_2_HI, 10, A5XX_RBBM_PERFCTR_RBBM_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_3_LO,
+		A5XX_RBBM_PERFCTR_RBBM_3_HI, 11, A5XX_RBBM_PERFCTR_RBBM_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_0_LO,
+		A5XX_RBBM_PERFCTR_PC_0_HI, 12, A5XX_PC_PERFCTR_PC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_1_LO,
+		A5XX_RBBM_PERFCTR_PC_1_HI, 13, A5XX_PC_PERFCTR_PC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_2_LO,
+		A5XX_RBBM_PERFCTR_PC_2_HI, 14, A5XX_PC_PERFCTR_PC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_3_LO,
+		A5XX_RBBM_PERFCTR_PC_3_HI, 15, A5XX_PC_PERFCTR_PC_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_4_LO,
+		A5XX_RBBM_PERFCTR_PC_4_HI, 16, A5XX_PC_PERFCTR_PC_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_5_LO,
+		A5XX_RBBM_PERFCTR_PC_5_HI, 17, A5XX_PC_PERFCTR_PC_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_6_LO,
+		A5XX_RBBM_PERFCTR_PC_6_HI, 18, A5XX_PC_PERFCTR_PC_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_7_LO,
+		A5XX_RBBM_PERFCTR_PC_7_HI, 19, A5XX_PC_PERFCTR_PC_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_0_LO,
+		A5XX_RBBM_PERFCTR_VFD_0_HI, 20, A5XX_VFD_PERFCTR_VFD_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_1_LO,
+		A5XX_RBBM_PERFCTR_VFD_1_HI, 21, A5XX_VFD_PERFCTR_VFD_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_2_LO,
+		A5XX_RBBM_PERFCTR_VFD_2_HI, 22, A5XX_VFD_PERFCTR_VFD_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_3_LO,
+		A5XX_RBBM_PERFCTR_VFD_3_HI, 23, A5XX_VFD_PERFCTR_VFD_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_4_LO,
+		A5XX_RBBM_PERFCTR_VFD_4_HI, 24, A5XX_VFD_PERFCTR_VFD_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_5_LO,
+		A5XX_RBBM_PERFCTR_VFD_5_HI, 25, A5XX_VFD_PERFCTR_VFD_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_6_LO,
+		A5XX_RBBM_PERFCTR_VFD_6_HI, 26, A5XX_VFD_PERFCTR_VFD_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_7_LO,
+		A5XX_RBBM_PERFCTR_VFD_7_HI, 27, A5XX_VFD_PERFCTR_VFD_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_0_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_0_HI, 28, A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_1_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_1_HI, 29, A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_2_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_2_HI, 30, A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_3_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_3_HI, 31, A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_4_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_4_HI, 32, A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_5_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_5_HI, 33, A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_6_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_6_HI, 34, A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_7_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_7_HI, 35, A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_0_LO,
+		A5XX_RBBM_PERFCTR_VPC_0_HI, 36, A5XX_VPC_PERFCTR_VPC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_1_LO,
+		A5XX_RBBM_PERFCTR_VPC_1_HI, 37, A5XX_VPC_PERFCTR_VPC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_2_LO,
+		A5XX_RBBM_PERFCTR_VPC_2_HI, 38, A5XX_VPC_PERFCTR_VPC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_3_LO,
+		A5XX_RBBM_PERFCTR_VPC_3_HI, 39, A5XX_VPC_PERFCTR_VPC_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_ccu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_0_LO,
+		A5XX_RBBM_PERFCTR_CCU_0_HI, 40, A5XX_RB_PERFCTR_CCU_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_1_LO,
+		A5XX_RBBM_PERFCTR_CCU_1_HI, 41, A5XX_RB_PERFCTR_CCU_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_2_LO,
+		A5XX_RBBM_PERFCTR_CCU_2_HI, 42, A5XX_RB_PERFCTR_CCU_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_3_LO,
+		A5XX_RBBM_PERFCTR_CCU_3_HI, 43, A5XX_RB_PERFCTR_CCU_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_0_LO,
+		A5XX_RBBM_PERFCTR_TSE_0_HI, 44, A5XX_GRAS_PERFCTR_TSE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_1_LO,
+		A5XX_RBBM_PERFCTR_TSE_1_HI, 45, A5XX_GRAS_PERFCTR_TSE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_2_LO,
+		A5XX_RBBM_PERFCTR_TSE_2_HI, 46, A5XX_GRAS_PERFCTR_TSE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_3_LO,
+		A5XX_RBBM_PERFCTR_TSE_3_HI, 47, A5XX_GRAS_PERFCTR_TSE_SEL_3 },
+};
+
+
+static struct adreno_perfcount_register a5xx_perfcounters_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_0_LO,
+		A5XX_RBBM_PERFCTR_RAS_0_HI, 48, A5XX_GRAS_PERFCTR_RAS_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_1_LO,
+		A5XX_RBBM_PERFCTR_RAS_1_HI, 49, A5XX_GRAS_PERFCTR_RAS_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_2_LO,
+		A5XX_RBBM_PERFCTR_RAS_2_HI, 50, A5XX_GRAS_PERFCTR_RAS_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_3_LO,
+		A5XX_RBBM_PERFCTR_RAS_3_HI, 51, A5XX_GRAS_PERFCTR_RAS_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_0_LO,
+		A5XX_RBBM_PERFCTR_UCHE_0_HI, 52, A5XX_UCHE_PERFCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_1_LO,
+		A5XX_RBBM_PERFCTR_UCHE_1_HI, 53, A5XX_UCHE_PERFCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_2_LO,
+		A5XX_RBBM_PERFCTR_UCHE_2_HI, 54, A5XX_UCHE_PERFCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_3_LO,
+		A5XX_RBBM_PERFCTR_UCHE_3_HI, 55, A5XX_UCHE_PERFCTR_UCHE_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_4_LO,
+		A5XX_RBBM_PERFCTR_UCHE_4_HI, 56, A5XX_UCHE_PERFCTR_UCHE_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_5_LO,
+		A5XX_RBBM_PERFCTR_UCHE_5_HI, 57, A5XX_UCHE_PERFCTR_UCHE_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_6_LO,
+		A5XX_RBBM_PERFCTR_UCHE_6_HI, 58, A5XX_UCHE_PERFCTR_UCHE_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_7_LO,
+		A5XX_RBBM_PERFCTR_UCHE_7_HI, 59, A5XX_UCHE_PERFCTR_UCHE_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_0_LO,
+		A5XX_RBBM_PERFCTR_TP_0_HI, 60, A5XX_TPL1_PERFCTR_TP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_1_LO,
+		A5XX_RBBM_PERFCTR_TP_1_HI, 61, A5XX_TPL1_PERFCTR_TP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_2_LO,
+		A5XX_RBBM_PERFCTR_TP_2_HI, 62, A5XX_TPL1_PERFCTR_TP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_3_LO,
+		A5XX_RBBM_PERFCTR_TP_3_HI, 63, A5XX_TPL1_PERFCTR_TP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_4_LO,
+		A5XX_RBBM_PERFCTR_TP_4_HI, 64, A5XX_TPL1_PERFCTR_TP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_5_LO,
+		A5XX_RBBM_PERFCTR_TP_5_HI, 65, A5XX_TPL1_PERFCTR_TP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_6_LO,
+		A5XX_RBBM_PERFCTR_TP_6_HI, 66, A5XX_TPL1_PERFCTR_TP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_7_LO,
+		A5XX_RBBM_PERFCTR_TP_7_HI, 67, A5XX_TPL1_PERFCTR_TP_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_0_LO,
+		A5XX_RBBM_PERFCTR_SP_0_HI, 68, A5XX_SP_PERFCTR_SP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_1_LO,
+		A5XX_RBBM_PERFCTR_SP_1_HI, 69, A5XX_SP_PERFCTR_SP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_2_LO,
+		A5XX_RBBM_PERFCTR_SP_2_HI, 70, A5XX_SP_PERFCTR_SP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_3_LO,
+		A5XX_RBBM_PERFCTR_SP_3_HI, 71, A5XX_SP_PERFCTR_SP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_4_LO,
+		A5XX_RBBM_PERFCTR_SP_4_HI, 72, A5XX_SP_PERFCTR_SP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_5_LO,
+		A5XX_RBBM_PERFCTR_SP_5_HI, 73, A5XX_SP_PERFCTR_SP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_6_LO,
+		A5XX_RBBM_PERFCTR_SP_6_HI, 74, A5XX_SP_PERFCTR_SP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_7_LO,
+		A5XX_RBBM_PERFCTR_SP_7_HI, 75, A5XX_SP_PERFCTR_SP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_8_LO,
+		A5XX_RBBM_PERFCTR_SP_8_HI, 76, A5XX_SP_PERFCTR_SP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_9_LO,
+		A5XX_RBBM_PERFCTR_SP_9_HI, 77, A5XX_SP_PERFCTR_SP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_10_LO,
+		A5XX_RBBM_PERFCTR_SP_10_HI, 78, A5XX_SP_PERFCTR_SP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_11_LO,
+		A5XX_RBBM_PERFCTR_SP_11_HI, 79, A5XX_SP_PERFCTR_SP_SEL_11 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_0_LO,
+		A5XX_RBBM_PERFCTR_RB_0_HI, 80, A5XX_RB_PERFCTR_RB_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_1_LO,
+		A5XX_RBBM_PERFCTR_RB_1_HI, 81, A5XX_RB_PERFCTR_RB_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_2_LO,
+		A5XX_RBBM_PERFCTR_RB_2_HI, 82, A5XX_RB_PERFCTR_RB_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_3_LO,
+		A5XX_RBBM_PERFCTR_RB_3_HI, 83, A5XX_RB_PERFCTR_RB_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_4_LO,
+		A5XX_RBBM_PERFCTR_RB_4_HI, 84, A5XX_RB_PERFCTR_RB_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_5_LO,
+		A5XX_RBBM_PERFCTR_RB_5_HI, 85, A5XX_RB_PERFCTR_RB_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_6_LO,
+		A5XX_RBBM_PERFCTR_RB_6_HI, 86, A5XX_RB_PERFCTR_RB_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_7_LO,
+		A5XX_RBBM_PERFCTR_RB_7_HI, 87, A5XX_RB_PERFCTR_RB_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vsc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_0_LO,
+		A5XX_RBBM_PERFCTR_VSC_0_HI, 88, A5XX_VSC_PERFCTR_VSC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_1_LO,
+		A5XX_RBBM_PERFCTR_VSC_1_HI, 89, A5XX_VSC_PERFCTR_VSC_SEL_1 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_lrz[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_0_LO,
+		A5XX_RBBM_PERFCTR_LRZ_0_HI, 90, A5XX_GRAS_PERFCTR_LRZ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_1_LO,
+		A5XX_RBBM_PERFCTR_LRZ_1_HI, 91, A5XX_GRAS_PERFCTR_LRZ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_2_LO,
+		A5XX_RBBM_PERFCTR_LRZ_2_HI, 92, A5XX_GRAS_PERFCTR_LRZ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_3_LO,
+		A5XX_RBBM_PERFCTR_LRZ_3_HI, 93, A5XX_GRAS_PERFCTR_LRZ_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_cmp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_0_LO,
+		A5XX_RBBM_PERFCTR_CMP_0_HI, 94, A5XX_RB_PERFCTR_CMP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_1_LO,
+		A5XX_RBBM_PERFCTR_CMP_1_HI, 95, A5XX_RB_PERFCTR_CMP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_2_LO,
+		A5XX_RBBM_PERFCTR_CMP_2_HI, 96, A5XX_RB_PERFCTR_CMP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_3_LO,
+		A5XX_RBBM_PERFCTR_CMP_3_HI, 97, A5XX_RB_PERFCTR_CMP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vbif[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW0,
+		A5XX_VBIF_PERF_CNT_HIGH0, -1, A5XX_VBIF_PERF_CNT_SEL0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW1,
+		A5XX_VBIF_PERF_CNT_HIGH1, -1, A5XX_VBIF_PERF_CNT_SEL1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW2,
+		A5XX_VBIF_PERF_CNT_HIGH2, -1, A5XX_VBIF_PERF_CNT_SEL2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW3,
+		A5XX_VBIF_PERF_CNT_HIGH3, -1, A5XX_VBIF_PERF_CNT_SEL3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vbif_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW0,
+		A5XX_VBIF_PERF_PWR_CNT_HIGH0, -1, A5XX_VBIF_PERF_PWR_CNT_EN0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW1,
+		A5XX_VBIF_PERF_PWR_CNT_HIGH1, -1, A5XX_VBIF_PERF_PWR_CNT_EN1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW2,
+		A5XX_VBIF_PERF_PWR_CNT_HIGH2, -1, A5XX_VBIF_PERF_PWR_CNT_EN2 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_alwayson[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_ALWAYSON_COUNTER_LO,
+		A5XX_RBBM_ALWAYSON_COUNTER_HI, -1 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_0_LO,
+		A5XX_SP_POWER_COUNTER_0_HI, -1, A5XX_SP_POWERCTR_SP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_1_LO,
+		A5XX_SP_POWER_COUNTER_1_HI, -1, A5XX_SP_POWERCTR_SP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_2_LO,
+		A5XX_SP_POWER_COUNTER_2_HI, -1, A5XX_SP_POWERCTR_SP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_3_LO,
+		A5XX_SP_POWER_COUNTER_3_HI, -1, A5XX_SP_POWERCTR_SP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_0_LO,
+		A5XX_TP_POWER_COUNTER_0_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_1_LO,
+		A5XX_TP_POWER_COUNTER_1_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_2_LO,
+		A5XX_TP_POWER_COUNTER_2_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_3_LO,
+		A5XX_TP_POWER_COUNTER_3_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_0_LO,
+		A5XX_RB_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_RB_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_1_LO,
+		A5XX_RB_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_RB_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_2_LO,
+		A5XX_RB_POWER_COUNTER_2_HI, -1, A5XX_RB_POWERCTR_RB_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_3_LO,
+		A5XX_RB_POWER_COUNTER_3_HI, -1, A5XX_RB_POWERCTR_RB_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_ccu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_0_LO,
+		A5XX_CCU_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_1_LO,
+		A5XX_CCU_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_1 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_0_LO,
+		A5XX_UCHE_POWER_COUNTER_0_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_1_LO,
+		A5XX_UCHE_POWER_COUNTER_1_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_2_LO,
+		A5XX_UCHE_POWER_COUNTER_2_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_3_LO,
+		A5XX_UCHE_POWER_COUNTER_3_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_0_LO,
+		A5XX_CP_POWER_COUNTER_0_HI, -1, A5XX_CP_POWERCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_1_LO,
+		A5XX_CP_POWER_COUNTER_1_HI, -1, A5XX_CP_POWERCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_2_LO,
+		A5XX_CP_POWER_COUNTER_2_HI, -1, A5XX_CP_POWERCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_3_LO,
+		A5XX_CP_POWER_COUNTER_3_HI, -1, A5XX_CP_POWERCTR_CP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_gpmu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_0_LO,
+		A5XX_GPMU_POWER_COUNTER_0_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_1_LO,
+		A5XX_GPMU_POWER_COUNTER_1_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_2_LO,
+		A5XX_GPMU_POWER_COUNTER_2_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_3_LO,
+		A5XX_GPMU_POWER_COUNTER_3_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_4_LO,
+		A5XX_GPMU_POWER_COUNTER_4_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_5_LO,
+		A5XX_GPMU_POWER_COUNTER_5_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_1 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_alwayson[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_ALWAYS_ON_COUNTER_LO,
+		A5XX_GPMU_ALWAYS_ON_COUNTER_HI, -1 },
+};
+
+#define A5XX_PERFCOUNTER_GROUP(offset, name, enable, read, load) \
+	ADRENO_PERFCOUNTER_GROUP(a5xx, offset, name, enable, read, load)
+
+#define A5XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags, enable, read, load) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(a5xx, offset, name, flags, enable, \
+			read, load)
+
+#define A5XX_POWER_COUNTER_GROUP(offset, name, enable, read) \
+	[KGSL_PERFCOUNTER_GROUP_##offset##_PWR] = { a5xx_pwrcounters_##name, \
+	ARRAY_SIZE(a5xx_pwrcounters_##name), __stringify(name##_pwr), 0, \
+	enable, read, NULL }
+
+#define A5XX_REGULAR_PERFCOUNTER_GROUP(offset, name) \
+	A5XX_PERFCOUNTER_GROUP(offset, name, a5xx_counter_inline_enable, \
+			a5xx_counter_read, a5xx_counter_load)
+
+static struct adreno_perfcount_group a5xx_perfcounter_groups
+				[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	A5XX_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	A5XX_PERFCOUNTER_GROUP(RBBM, rbbm,
+		a5xx_counter_rbbm_enable, a5xx_counter_read, a5xx_counter_load),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(PC, pc),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(VFD, vfd),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(HLSQ, hlsq),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(VPC, vpc),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(CCU, ccu),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(CMP, cmp),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(TSE, tse),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(RAS, ras),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(LRZ, lrz),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(UCHE, uche),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(TP, tp),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(SP, sp),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(RB, rb),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(VSC, vsc),
+	A5XX_PERFCOUNTER_GROUP(VBIF, vbif,
+		a5xx_counter_vbif_enable, a5xx_counter_read_norestore, NULL),
+	A5XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		a5xx_counter_vbif_pwr_enable,
+		a5xx_counter_read_norestore, NULL),
+	A5XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		a5xx_counter_alwayson_enable, a5xx_counter_alwayson_read, NULL),
+	A5XX_POWER_COUNTER_GROUP(SP, sp,
+		a5xx_counter_pwr_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(TP, tp,
+		a5xx_counter_pwr_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(RB, rb,
+		a5xx_counter_pwr_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(CCU, ccu,
+		a5xx_counter_pwr_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(UCHE, uche,
+		a5xx_counter_pwr_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(CP, cp,
+		a5xx_counter_pwr_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(GPMU, gpmu,
+		a5xx_counter_pwr_gpmu_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(ALWAYSON, alwayson,
+		a5xx_counter_pwr_alwayson_enable, a5xx_counter_read_norestore),
+};
+
+const struct adreno_perfcounters adreno_a5xx_perfcounters = {
+	a5xx_perfcounter_groups,
+	ARRAY_SIZE(a5xx_perfcounter_groups),
+};

+ 548 - 0
qcom/opensource/graphics-kernel/adreno_a5xx_preempt.c

@@ -0,0 +1,548 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2014-2017,2021 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a5xx.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+
+#define PREEMPT_RECORD(_field) \
+		offsetof(struct a5xx_cp_preemption_record, _field)
+
+#define PREEMPT_SMMU_RECORD(_field) \
+		offsetof(struct a5xx_cp_smmu_info, _field)
+
+static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	unsigned int wptr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+
+	kgsl_regread(device, A5XX_CP_RB_WPTR, &wptr);
+
+	if (wptr != rb->wptr) {
+		kgsl_regwrite(device, A5XX_CP_RB_WPTR, rb->wptr);
+		/*
+		 * In case something got submitted while preemption was on
+		 * going, reset the timer.
+		 */
+		reset_timer = true;
+	}
+
+	if (reset_timer)
+		rb->dispatch_q.expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+}
+
+static void _a5xx_preemption_done(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * In the very unlikely case that the power is off, do nothing - the
+	 * state will be reset on power up and everybody will be happy
+	 */
+
+	if (!kgsl_state_is_awake(device))
+		return;
+
+	kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status != 0) {
+		dev_err(device->dev,
+			     "Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n",
+			     status, adreno_dev->cur_rb->id,
+			     adreno_get_rptr(adreno_dev->cur_rb),
+			     adreno_dev->cur_rb->wptr,
+			     adreno_dev->next_rb->id,
+			     adreno_get_rptr(adreno_dev->next_rb),
+			     adreno_dev->next_rb->wptr);
+
+		/* Set a fault and restart */
+		adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+
+		return;
+	}
+
+	del_timer_sync(&adreno_dev->preempt.timer);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id, 0, 0);
+
+	/* Clean up all the bits */
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr for the new command queue */
+	_update_wptr(adreno_dev, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	/* Clear the preempt state */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+}
+
+static void _a5xx_preemption_fault(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * If the power is on check the preemption status one more time - if it
+	 * was successful then just transition to the complete state
+	 */
+	if (kgsl_state_is_awake(device)) {
+		kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status);
+
+		if (status == 0) {
+			adreno_set_preempt_state(adreno_dev,
+				ADRENO_PREEMPT_COMPLETE);
+
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+	}
+
+	dev_err(device->dev,
+		     "Preemption timed out: cur=%d R/W=%X/%X, next=%d R/W=%X/%X\n",
+		     adreno_dev->cur_rb->id,
+		     adreno_get_rptr(adreno_dev->cur_rb),
+		     adreno_dev->cur_rb->wptr,
+		     adreno_dev->next_rb->id,
+		     adreno_get_rptr(adreno_dev->next_rb),
+		     adreno_dev->next_rb->wptr);
+
+	adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+}
+
+static void _a5xx_preemption_worker(struct work_struct *work)
+{
+	struct adreno_preemption *preempt = container_of(work,
+		struct adreno_preemption, work);
+	struct adreno_device *adreno_dev = container_of(preempt,
+		struct adreno_device, preempt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Need to take the mutex to make sure that the power stays on */
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED))
+		_a5xx_preemption_fault(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+/* Find the highest priority active ringbuffer */
+static struct adreno_ringbuffer *a5xx_next_ringbuffer(
+		struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb;
+	unsigned long flags;
+	unsigned int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		bool empty;
+
+		spin_lock_irqsave(&rb->preempt_lock, flags);
+		empty = adreno_rb_empty(rb);
+		spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+		if (!empty)
+			return rb;
+	}
+
+	return NULL;
+}
+
+void a5xx_preemption_trigger(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *next;
+	uint64_t ttbr0;
+	unsigned int contextidr;
+	unsigned long flags;
+
+	/* Put ourselves into a possible trigger state */
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START))
+		return;
+
+	/* Get the next ringbuffer to preempt in */
+	next = a5xx_next_ringbuffer(adreno_dev);
+
+	/*
+	 * Nothing to do if every ringbuffer is empty or if the current
+	 * ringbuffer is the only active one
+	 */
+	if (next == NULL || next == adreno_dev->cur_rb) {
+		/*
+		 * Update any critical things that might have been skipped while
+		 * we were looking for a new ringbuffer
+		 */
+
+		if (next != NULL) {
+			_update_wptr(adreno_dev, false);
+
+			mod_timer(&adreno_dev->dispatcher.timer,
+				adreno_dev->cur_rb->dispatch_q.expires);
+		}
+
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+		return;
+	}
+
+	/* Turn off the dispatcher timer */
+	del_timer(&adreno_dev->dispatcher.timer);
+
+	/*
+	 * This is the most critical section - we need to take care not to race
+	 * until we have programmed the CP for the switch
+	 */
+
+	spin_lock_irqsave(&next->preempt_lock, flags);
+
+	/* Get the pagetable from the pagetable info. */
+	kgsl_sharedmem_readq(device->scratch, &ttbr0,
+		SCRATCH_RB_OFFSET(next->id, ttbr0));
+	kgsl_sharedmem_readl(device->scratch, &contextidr,
+		SCRATCH_RB_OFFSET(next->id, contextidr));
+
+	kgsl_sharedmem_writel(next->preemption_desc,
+		PREEMPT_RECORD(wptr), next->wptr);
+
+	spin_unlock_irqrestore(&next->preempt_lock, flags);
+
+	/* And write it to the smmu info */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), ttbr0);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), contextidr);
+	}
+
+	kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
+		lower_32_bits(next->preemption_desc->gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
+		upper_32_bits(next->preemption_desc->gpuaddr));
+
+	adreno_dev->next_rb = next;
+
+	/* Start the timer to detect a stuck preemption */
+	mod_timer(&adreno_dev->preempt.timer,
+		jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT));
+
+	trace_adreno_preempt_trigger(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		1, 0);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED);
+
+	/* Trigger the preemption */
+	kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_CNTL, 1);
+}
+
+void a5xx_preempt_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING))
+		return;
+
+	kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status != 0) {
+		dev_err(KGSL_DEVICE(adreno_dev)->dev,
+			     "preempt interrupt with non-zero status: %X\n",
+			     status);
+
+		/*
+		 * Under the assumption that this is a race between the
+		 * interrupt and the register, schedule the worker to clean up.
+		 * If the status still hasn't resolved itself by the time we get
+		 * there then we have to assume something bad happened
+		 */
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE);
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+
+	del_timer(&adreno_dev->preempt.timer);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id, 0, 0);
+
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr if it changed while preemption was ongoing */
+	_update_wptr(adreno_dev, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	a5xx_preemption_trigger(adreno_dev);
+}
+
+void a5xx_preemption_schedule(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE))
+		_a5xx_preemption_done(adreno_dev);
+
+	a5xx_preemption_trigger(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+u32 a5xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb,
+			struct adreno_context *drawctxt, u32 *cmds)
+{
+	unsigned int *cmds_orig = cmds;
+	uint64_t gpuaddr = rb->preemption_desc->gpuaddr;
+	unsigned int preempt_style = 0;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (drawctxt) {
+		/*
+		 * Preemption from secure to unsecure needs Zap shader to be
+		 * run to clear all secure content. CP does not know during
+		 * preemption if it is switching between secure and unsecure
+		 * contexts so restrict Secure contexts to be preempted at
+		 * ringbuffer level.
+		 */
+		if (drawctxt->base.flags & KGSL_CONTEXT_SECURE)
+			preempt_style = KGSL_CONTEXT_PREEMPT_STYLE_RINGBUFFER;
+		else
+			preempt_style = FIELD_GET(KGSL_CONTEXT_PREEMPT_STYLE_MASK,
+				drawctxt->base.flags);
+	}
+
+	/*
+	 * CP_PREEMPT_ENABLE_GLOBAL(global preemption) can only be set by KMD
+	 * in ringbuffer.
+	 * 1) set global preemption to 0x0 to disable global preemption.
+	 *    Only RB level preemption is allowed in this mode
+	 * 2) Set global preemption to defer(0x2) for finegrain preemption.
+	 *    when global preemption is set to defer(0x2),
+	 *    CP_PREEMPT_ENABLE_LOCAL(local preemption) determines the
+	 *    preemption point. Local preemption
+	 *    can be enabled by both UMD(within IB) and KMD.
+	 */
+	*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1);
+	*cmds++ = ((preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN)
+				? 2 : 0);
+
+	/* Turn CP protection OFF */
+	cmds += cp_protected_mode(adreno_dev, cmds, 0);
+
+	/*
+	 * CP during context switch will save context switch info to
+	 * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR
+	 */
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1);
+	*cmds++ = lower_32_bits(gpuaddr);
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1);
+	*cmds++ = upper_32_bits(gpuaddr);
+
+	/* Turn CP protection ON */
+	cmds += cp_protected_mode(adreno_dev, cmds, 1);
+
+	/*
+	 * Enable local preemption for finegrain preemption in case of
+	 * a misbehaving IB
+	 */
+	if (preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN) {
+		*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1);
+		*cmds++ = 1;
+	} else {
+		*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1);
+		*cmds++ = 0;
+	}
+
+	/* Enable CP_CONTEXT_SWITCH_YIELD packets in the IB2s */
+	*cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1);
+	*cmds++ = 2;
+
+	return (unsigned int) (cmds - cmds_orig);
+}
+
+unsigned int a5xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+	unsigned int *cmds)
+{
+	int dwords = 0;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	cmds[dwords++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	/* Write NULL to the address to skip the data write */
+	dwords += cp_gpuaddr(adreno_dev, &cmds[dwords], 0x0);
+	cmds[dwords++] = 1;
+	/* generate interrupt on preemption completion */
+	cmds[dwords++] = 1;
+
+	return dwords;
+}
+
+void a5xx_preemption_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *rb;
+	unsigned int i;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	/* Force the state to be clear */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	/* Only set up smmu info when per-process pagetables are enabled */
+
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		/* smmu_info is allocated and mapped in a5xx_preemption_iommu_init */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(magic), A5XX_CP_SMMU_INFO_MAGIC_REF);
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device));
+
+		/* The CP doesn't use the asid record, so poison it */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(asid), 0xDECAFBAD);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), 0);
+
+		kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+			lower_32_bits(iommu->smmu_info->gpuaddr));
+
+		kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+			upper_32_bits(iommu->smmu_info->gpuaddr));
+	}
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		/*
+		 * preemption_desc is allocated and mapped at init time,
+		 * so no need to check sharedmem_writel return value
+		 */
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(rptr), 0);
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(wptr), 0);
+
+		adreno_ringbuffer_set_pagetable(device, rb,
+			device->mmu.defaultpagetable);
+	}
+
+}
+
+static int a5xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, uint64_t counteraddr)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (IS_ERR_OR_NULL(rb->preemption_desc))
+		rb->preemption_desc = kgsl_allocate_global(device,
+			A5XX_CP_CTXRECORD_SIZE_IN_BYTES, SZ_16K, 0,
+			KGSL_MEMDESC_PRIVILEGED, "preemption_desc");
+
+	if (IS_ERR(rb->preemption_desc))
+		return PTR_ERR(rb->preemption_desc);
+
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(magic), A5XX_CP_CTXRECORD_MAGIC_REF);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(info), 0);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(data), 0);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(cntl), A5XX_CP_RB_CNTL_DEFAULT);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(rptr), 0);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(wptr), 0);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR(device,
+			rb->id, rptr));
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(counter), counteraddr);
+
+	return 0;
+}
+
+int a5xx_preemption_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	struct adreno_ringbuffer *rb;
+	int ret;
+	unsigned int i;
+	uint64_t addr;
+
+	/* We are dependent on IOMMU to make preemption go on the CP side */
+	if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU)
+		return -ENODEV;
+
+	INIT_WORK(&preempt->work, _a5xx_preemption_worker);
+
+	/* Allocate mem for storing preemption counters */
+	if (IS_ERR_OR_NULL(preempt->scratch))
+		preempt->scratch = kgsl_allocate_global(device,
+			adreno_dev->num_ringbuffers *
+			A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE, 0, 0, 0,
+			"preemption_counters");
+
+	ret = PTR_ERR_OR_ZERO(preempt->scratch);
+	if (ret)
+		return ret;
+
+	addr = preempt->scratch->gpuaddr;
+
+	/* Allocate mem for storing preemption switch record */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		ret = a5xx_preemption_ringbuffer_init(adreno_dev, rb, addr);
+		if (ret)
+			return ret;
+
+		addr += A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE;
+	}
+
+	/* Allocate mem for storing preemption smmu record */
+	if (kgsl_mmu_is_perprocess(&device->mmu) && IS_ERR_OR_NULL(iommu->smmu_info))
+		iommu->smmu_info = kgsl_allocate_global(device, PAGE_SIZE, 0,
+			KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED,
+			"smmu_info");
+
+	if (IS_ERR(iommu->smmu_info))
+		return PTR_ERR(iommu->smmu_info);
+
+	set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+	return 0;
+}

+ 530 - 0
qcom/opensource/graphics-kernel/adreno_a5xx_ringbuffer.c

@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a5xx.h"
+#include "adreno_pm4types.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_trace.h"
+#include "kgsl_trace.h"
+
+static int a5xx_rb_pagetable_switch(struct kgsl_device *device,
+		struct adreno_context *drawctxt,
+		struct adreno_ringbuffer *rb,
+		struct kgsl_pagetable *pagetable, u32 *cmds)
+{
+	u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable);
+	u32 id = drawctxt ? drawctxt->base.id : 0;
+
+	if (pagetable == device->mmu.defaultpagetable)
+		return 0;
+
+	cmds[0] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3);
+	cmds[1] = lower_32_bits(ttbr0);
+	cmds[2] = upper_32_bits(ttbr0);
+	cmds[3] = id;
+
+	cmds[4] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+	cmds[5] = cp_type7_packet(CP_WAIT_FOR_ME, 0);
+	cmds[6] = cp_type4_packet(A5XX_CP_CNTL, 1);
+	cmds[7] = 1;
+
+	cmds[8] = cp_type7_packet(CP_MEM_WRITE, 5);
+	cmds[9] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device,
+			rb->id, ttbr0));
+	cmds[10] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device,
+			rb->id, ttbr0));
+	cmds[11] = lower_32_bits(ttbr0);
+	cmds[12] = upper_32_bits(ttbr0);
+	cmds[13] = id;
+
+	cmds[14] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+	cmds[15] = cp_type7_packet(CP_WAIT_FOR_ME, 0);
+	cmds[16] = cp_type4_packet(A5XX_CP_CNTL, 1);
+	cmds[17] = 0;
+
+	return 18;
+}
+
+#define RB_SOPTIMESTAMP(device, rb) \
+	       MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp)
+#define CTXT_SOPTIMESTAMP(device, drawctxt) \
+	       MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp)
+
+#define RB_EOPTIMESTAMP(device, rb) \
+	       MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp)
+#define CTXT_EOPTIMESTAMP(device, drawctxt) \
+	       MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp)
+
+int a5xx_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time, bool sync)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned long flags;
+
+	adreno_get_submit_time(adreno_dev, rb, time);
+	adreno_profile_submit_time(time);
+
+	if (sync) {
+		u32 *cmds = adreno_ringbuffer_allocspace(rb, 3);
+
+		if (IS_ERR(cmds))
+			return PTR_ERR(cmds);
+
+		cmds[0] = cp_type7_packet(CP_WHERE_AM_I, 2);
+		cmds[1] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id,
+				rptr));
+		cmds[2] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id,
+				rptr));
+	}
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+		if (adreno_dev->cur_rb == rb) {
+			kgsl_pwrscale_busy(device);
+			kgsl_regwrite(device, A5XX_CP_RB_WPTR, rb->_wptr);
+		}
+	}
+
+	rb->wptr = rb->_wptr;
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	return 0;
+}
+
+int a5xx_ringbuffer_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int i;
+
+	if (IS_ERR_OR_NULL(device->scratch))
+		device->scratch = kgsl_allocate_global(device, PAGE_SIZE,
+			0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED,
+			"scratch");
+
+	if (IS_ERR(device->scratch))
+		return PTR_ERR(device->scratch);
+
+	adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+
+	if (!adreno_preemption_feature_set(adreno_dev)) {
+		adreno_dev->num_ringbuffers = 1;
+		return adreno_ringbuffer_setup(adreno_dev,
+			&adreno_dev->ringbuffers[0], 0);
+	}
+
+	adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers);
+
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		int ret;
+
+		ret = adreno_ringbuffer_setup(adreno_dev,
+			&adreno_dev->ringbuffers[i], i);
+		if (ret)
+			return ret;
+	}
+
+	timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0);
+	a5xx_preemption_init(adreno_dev);
+	return 0;
+}
+
+#define A5XX_SUBMIT_MAX 64
+
+int a5xx_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	static u32 sequence;
+	u32 size = A5XX_SUBMIT_MAX + dwords;
+	u32 *cmds, index = 0;
+	u64 profile_gpuaddr;
+	u32 profile_dwords;
+
+	if (adreno_drawctxt_detached(drawctxt))
+		return -ENOENT;
+
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return -EPROTO;
+
+	rb->timestamp++;
+
+	if (drawctxt)
+		drawctxt->internal_timestamp = rb->timestamp;
+
+	cmds = adreno_ringbuffer_allocspace(rb, size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	/* Identify the start of a command */
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER;
+
+	/* 14 dwords */
+	index += a5xx_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt,
+		&cmds[index]);
+
+	profile_gpuaddr = adreno_profile_preib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+		cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = rb->timestamp;
+
+	if (IS_SECURE(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	if (IS_NOTPROTECTED(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 0;
+	}
+
+	memcpy(&cmds[index], in, dwords << 2);
+	index += dwords;
+
+	if (IS_NOTPROTECTED(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	/* 4 dwords */
+	profile_gpuaddr = adreno_profile_postib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (!adreno_is_a510(adreno_dev) &&
+		test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE,
+			&device->mmu.pfpolicy))
+		cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0);
+
+	/*
+	 * Do a unique memory write from the GPU to assist in early detection of
+	 * interrupt storms
+	 */
+
+	cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[index++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+				KGSL_MEMSTORE_GLOBAL, ref_wait_ts));
+	cmds[index++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+				KGSL_MEMSTORE_GLOBAL, ref_wait_ts));
+	cmds[index++] = ++sequence;
+
+	/*
+	 * If this is an internal command, just write the ringbuffer timestamp,
+	 * otherwise, write both
+	 */
+	if (!drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_FLUSH_TS | (1 << 31);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	} else {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_FLUSH_TS | (1 << 31);
+		cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_FLUSH_TS;
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	}
+
+	if (IS_WFI(flags))
+		cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+
+	if (IS_SECURE(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 0;
+	}
+
+	/* 5 dwords */
+	index += a5xx_preemption_post_ibsubmit(adreno_dev, &cmds[index]);
+
+	/* Adjust the thing for the number of bytes we actually wrote */
+	rb->_wptr -= (size - index);
+
+	a5xx_ringbuffer_submit(rb, time,
+		!adreno_is_preemption_enabled(adreno_dev));
+
+	return 0;
+}
+
+static u32 a5xx_get_alwayson_counter(struct adreno_device *adreno_dev,
+		u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = A5XX_RBBM_ALWAYSON_COUNTER_LO;
+
+	/* On some targets the upper 32 bits are not reliable */
+	if (ADRENO_GPUREV(adreno_dev) > ADRENO_REV_A530)
+		cmds[1] |= (1 << 30) | (2 << 18);
+
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+/* This is the maximum possible size for 64 bit targets */
+#define PROFILE_IB_DWORDS 4
+#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))
+
+static u64 a5xx_get_user_profiling_ib(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct kgsl_drawobj_cmd *cmdobj,
+		u32 target_offset, u32 *cmds)
+{
+	u32 offset, *ib, dwords;
+	u64 gpuaddr;
+
+	if (IS_ERR(rb->profile_desc))
+		return 0;
+
+	offset = rb->profile_index * (PROFILE_IB_DWORDS << 2);
+	ib = rb->profile_desc->hostptr + offset;
+	gpuaddr = rb->profile_desc->gpuaddr + offset;
+	dwords = a5xx_get_alwayson_counter(adreno_dev, ib,
+		cmdobj->profiling_buffer_gpuaddr + target_offset);
+
+	cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+	cmds[1] = lower_32_bits(gpuaddr);
+	cmds[2] = upper_32_bits(gpuaddr);
+	cmds[3] = dwords;
+
+	rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;
+
+	return 4;
+}
+
+static int a5xx_rb_context_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_pagetable *pagetable =
+		adreno_drawctxt_get_pagetable(drawctxt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int count = 0;
+	u32 cmds[32];
+
+	if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable)
+		count += a5xx_rb_pagetable_switch(device, drawctxt,
+				rb, pagetable, cmds);
+
+	cmds[count++] = cp_type7_packet(CP_NOP, 1);
+	cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type4_packet(A5XX_UCHE_INVALIDATE0, 1);
+	cmds[count++] = 0x12;
+
+	return a5xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED,
+			cmds, count, 0, NULL);
+}
+
+static int a5xx_drawctxt_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (rb->drawctxt_active == drawctxt)
+		return 0;
+
+	if (kgsl_context_detached(&drawctxt->base))
+		return -ENOENT;
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return -ENOENT;
+
+	trace_adreno_drawctxt_switch(rb, drawctxt);
+
+	a5xx_rb_context_switch(adreno_dev, rb, drawctxt);
+
+	/* Release the current drawctxt as soon as the new one is switched */
+	adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active,
+		rb, rb->timestamp);
+
+	rb->drawctxt_active = drawctxt;
+	return 0;
+}
+
+
+#define A5XX_USER_PROFILE_IB(dev, rb, cmdobj, cmds, field) \
+	a5xx_get_user_profiling_ib((dev), (rb), (cmdobj), \
+		offsetof(struct kgsl_drawobj_profiling_buffer, field), \
+		(cmds))
+
+#define A5XX_KERNEL_PROFILE(dev, cmdobj, cmds, field) \
+	a5xx_get_alwayson_counter((dev), (cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define A5XX_COMMAND_DWORDS 32
+
+int a5xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct adreno_ringbuffer *rb = drawctxt->rb;
+	int ret = 0, numibs = 0, index = 0;
+	u32 *cmds;
+
+	/* Count the number of IBs (if we are not skipping) */
+	if (!IS_SKIP(flags)) {
+		struct list_head *tmp;
+
+		list_for_each(tmp, &cmdobj->cmdlist)
+			numibs++;
+	}
+
+	cmds = kmalloc((A5XX_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL);
+	if (!cmds) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = START_IB_IDENTIFIER;
+
+	/* Kernel profiling: 4 dwords */
+	if (IS_KERNEL_PROFILE(flags))
+		index += A5XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			started);
+
+	/* User profiling: 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += A5XX_USER_PROFILE_IB(adreno_dev, rb, cmdobj,
+			&cmds[index], gpu_ticks_submitted);
+
+	if (numibs) {
+		struct kgsl_memobj_node *ib;
+
+		list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+			if (ib->priv & MEMOBJ_SKIP ||
+			    (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE
+			     && !IS_PREAMBLE(flags)))
+				cmds[index++] = cp_type7_packet(CP_NOP, 4);
+
+			cmds[index++] =
+				cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+			cmds[index++] = lower_32_bits(ib->gpuaddr);
+			cmds[index++] = upper_32_bits(ib->gpuaddr);
+
+			/* Double check that IB_PRIV is never set */
+			cmds[index++] = (ib->size >> 2) & 0xfffff;
+		}
+	}
+
+	/*
+	 * SRM -- set render mode (ex binning, direct render etc)
+	 * SRM is set by UMD usually at start of IB to tell CP the type of
+	 * preemption.
+	 * KMD needs to set SRM to NULL to indicate CP that rendering is
+	 * done by IB.
+	 */
+	cmds[index++] = cp_type7_packet(CP_SET_RENDER_MODE, 5);
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+
+	cmds[index++] = cp_type7_packet(CP_YIELD_ENABLE, 1);
+	cmds[index++] = 1;
+
+	/* 4 dwords */
+	if (IS_KERNEL_PROFILE(flags))
+		index += A5XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			retired);
+
+	/* 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += A5XX_USER_PROFILE_IB(adreno_dev, rb, cmdobj,
+			&cmds[index], gpu_ticks_retired);
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = END_IB_IDENTIFIER;
+
+	ret = a5xx_drawctxt_switch(adreno_dev, rb, drawctxt);
+
+	/*
+	 * In the unlikely event of an error in the drawctxt switch,
+	 * treat it like a hang
+	 */
+	if (ret) {
+		/*
+		 * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it,
+		 * the upper layers know how to handle it
+		 */
+		if (ret != -ENOSPC && ret != -ENOENT)
+			dev_err(device->dev,
+				     "Unable to switch draw context: %d\n",
+				     ret);
+		goto done;
+	}
+
+	adreno_drawobj_set_constraint(device, drawobj);
+
+	ret = a5xx_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt,
+		flags, cmds, index, drawobj->timestamp, time);
+
+done:
+	trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs,
+		drawobj->timestamp, drawobj->flags, ret, drawctxt->type);
+
+	kfree(cmds);
+	return ret;
+}

+ 1219 - 0
qcom/opensource/graphics-kernel/adreno_a5xx_snapshot.c

@@ -0,0 +1,1219 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2015-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a5xx.h"
+#include "adreno_snapshot.h"
+
+enum a5xx_rbbm_debbus_id {
+	A5XX_RBBM_DBGBUS_CP          = 0x1,
+	A5XX_RBBM_DBGBUS_RBBM        = 0x2,
+	A5XX_RBBM_DBGBUS_VBIF        = 0x3,
+	A5XX_RBBM_DBGBUS_HLSQ        = 0x4,
+	A5XX_RBBM_DBGBUS_UCHE        = 0x5,
+	A5XX_RBBM_DBGBUS_DPM         = 0x6,
+	A5XX_RBBM_DBGBUS_TESS        = 0x7,
+	A5XX_RBBM_DBGBUS_PC          = 0x8,
+	A5XX_RBBM_DBGBUS_VFDP        = 0x9,
+	A5XX_RBBM_DBGBUS_VPC         = 0xa,
+	A5XX_RBBM_DBGBUS_TSE         = 0xb,
+	A5XX_RBBM_DBGBUS_RAS         = 0xc,
+	A5XX_RBBM_DBGBUS_VSC         = 0xd,
+	A5XX_RBBM_DBGBUS_COM         = 0xe,
+	A5XX_RBBM_DBGBUS_DCOM        = 0xf,
+	A5XX_RBBM_DBGBUS_LRZ         = 0x10,
+	A5XX_RBBM_DBGBUS_A2D_DSP     = 0x11,
+	A5XX_RBBM_DBGBUS_CCUFCHE     = 0x12,
+	A5XX_RBBM_DBGBUS_GPMU        = 0x13,
+	A5XX_RBBM_DBGBUS_RBP         = 0x14,
+	A5XX_RBBM_DBGBUS_HM          = 0x15,
+	A5XX_RBBM_DBGBUS_RBBM_CFG    = 0x16,
+	A5XX_RBBM_DBGBUS_VBIF_CX     = 0x17,
+	A5XX_RBBM_DBGBUS_GPC         = 0x1d,
+	A5XX_RBBM_DBGBUS_LARC        = 0x1e,
+	A5XX_RBBM_DBGBUS_HLSQ_SPTP   = 0x1f,
+	A5XX_RBBM_DBGBUS_RB_0        = 0x20,
+	A5XX_RBBM_DBGBUS_RB_1        = 0x21,
+	A5XX_RBBM_DBGBUS_RB_2        = 0x22,
+	A5XX_RBBM_DBGBUS_RB_3        = 0x23,
+	A5XX_RBBM_DBGBUS_CCU_0       = 0x28,
+	A5XX_RBBM_DBGBUS_CCU_1       = 0x29,
+	A5XX_RBBM_DBGBUS_CCU_2       = 0x2a,
+	A5XX_RBBM_DBGBUS_CCU_3       = 0x2b,
+	A5XX_RBBM_DBGBUS_A2D_RAS_0   = 0x30,
+	A5XX_RBBM_DBGBUS_A2D_RAS_1   = 0x31,
+	A5XX_RBBM_DBGBUS_A2D_RAS_2   = 0x32,
+	A5XX_RBBM_DBGBUS_A2D_RAS_3   = 0x33,
+	A5XX_RBBM_DBGBUS_VFD_0       = 0x38,
+	A5XX_RBBM_DBGBUS_VFD_1       = 0x39,
+	A5XX_RBBM_DBGBUS_VFD_2       = 0x3a,
+	A5XX_RBBM_DBGBUS_VFD_3       = 0x3b,
+	A5XX_RBBM_DBGBUS_SP_0        = 0x40,
+	A5XX_RBBM_DBGBUS_SP_1        = 0x41,
+	A5XX_RBBM_DBGBUS_SP_2        = 0x42,
+	A5XX_RBBM_DBGBUS_SP_3        = 0x43,
+	A5XX_RBBM_DBGBUS_TPL1_0      = 0x48,
+	A5XX_RBBM_DBGBUS_TPL1_1      = 0x49,
+	A5XX_RBBM_DBGBUS_TPL1_2      = 0x4a,
+	A5XX_RBBM_DBGBUS_TPL1_3      = 0x4b
+};
+
+static const struct adreno_debugbus_block a5xx_debugbus_blocks[] = {
+	{  A5XX_RBBM_DBGBUS_CP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RBBM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VBIF, 0x100, },
+	{  A5XX_RBBM_DBGBUS_HLSQ, 0x100, },
+	{  A5XX_RBBM_DBGBUS_UCHE, 0x100, },
+	{  A5XX_RBBM_DBGBUS_DPM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TESS, 0x100, },
+	{  A5XX_RBBM_DBGBUS_PC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFDP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VPC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TSE, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RAS, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VSC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_COM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_DCOM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_LRZ, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_DSP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCUFCHE, 0x100, },
+	{  A5XX_RBBM_DBGBUS_GPMU, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RBP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_HM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RBBM_CFG, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VBIF_CX, 0x100, },
+	{  A5XX_RBBM_DBGBUS_GPC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_LARC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_HLSQ_SPTP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_3, 0x100, },
+};
+
+#define A5XX_NUM_AXI_ARB_BLOCKS	2
+#define A5XX_NUM_XIN_BLOCKS	4
+
+/* Width of A5XX_CP_DRAW_STATE_ADDR is 8 bits */
+#define A5XX_CP_DRAW_STATE_ADDR_WIDTH 8
+
+/* a5xx_snapshot_cp_pm4() - Dump PM4 data in snapshot */
+static size_t a5xx_snapshot_cp_pm4(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4);
+	size_t size = fw->size;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PM4_RAM;
+	header->size = size;
+
+	memcpy(data, fw->memdesc->hostptr, size * sizeof(uint32_t));
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/* a5xx_snapshot_cp_pfp() - Dump the PFP data on snapshot */
+static size_t a5xx_snapshot_cp_pfp(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP);
+	int size = fw->size;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PFP_RAM;
+	header->size = size;
+
+	memcpy(data, fw->memdesc->hostptr, size * sizeof(uint32_t));
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/* a5xx_rbbm_debug_bus_read() - Read data from trace bus */
+static void a5xx_rbbm_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int reg;
+
+	reg = (block_id << A5XX_RBBM_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT) |
+			(index << A5XX_RBBM_CFG_DBGBUS_SEL_PING_INDEX_SHIFT);
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_A, reg);
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_B, reg);
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_C, reg);
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_D, reg);
+
+	kgsl_regread(device, A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2, val);
+	val++;
+	kgsl_regread(device, A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1, val);
+
+}
+
+/* a5xx_snapshot_vbif_debugbus() - Dump the VBIF debug data */
+static size_t a5xx_snapshot_vbif_debugbus(struct kgsl_device *device,
+			u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i, j;
+	/*
+	 * Total number of VBIF data words considering 3 sections:
+	 * 2 arbiter blocks of 16 words
+	 * 4 AXI XIN blocks of 18 dwords each
+	 * 4 core clock side XIN blocks of 12 dwords each
+	 */
+	unsigned int dwords = (16 * A5XX_NUM_AXI_ARB_BLOCKS) +
+			(18 * A5XX_NUM_XIN_BLOCKS) + (12 * A5XX_NUM_XIN_BLOCKS);
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size;
+	unsigned int reg_clk;
+
+	size = (dwords * sizeof(unsigned int)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+	header->id = block->block_id;
+	header->count = dwords;
+
+	kgsl_regread(device, A5XX_VBIF_CLKON, &reg_clk);
+	kgsl_regwrite(device, A5XX_VBIF_CLKON, reg_clk |
+			(A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK <<
+			A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT));
+	kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL0, 0);
+	kgsl_regwrite(device, A5XX_VBIF_TEST_BUS_OUT_CTRL,
+			(A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK <<
+			A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT));
+	for (i = 0; i < A5XX_NUM_AXI_ARB_BLOCKS; i++) {
+		kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL0,
+			(1 << (i + 16)));
+		for (j = 0; j < 16; j++) {
+			kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL1,
+				((j & A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK)
+				<< A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT,
+					data);
+			data++;
+		}
+	}
+
+	/* XIN blocks AXI side */
+	for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) {
+		kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL0, 1 << i);
+		for (j = 0; j < 18; j++) {
+			kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL1,
+				((j & A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK)
+				<< A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT,
+				data);
+			data++;
+		}
+	}
+
+	/* XIN blocks core clock side */
+	for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) {
+		kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL0, 1 << i);
+		for (j = 0; j < 12; j++) {
+			kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL1,
+				((j & A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK)
+				<< A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT,
+				data);
+			data++;
+		}
+	}
+	/* restore the clock of VBIF */
+	kgsl_regwrite(device, A5XX_VBIF_CLKON, reg_clk);
+	return size;
+}
+
+/* a5xx_snapshot_debugbus_block() - Capture debug data for a gpu block */
+static size_t a5xx_snapshot_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int dwords;
+	size_t size;
+
+	dwords = block->dwords;
+
+	/* For a5xx each debug bus data unit is 2 DWRODS */
+	size = (dwords * sizeof(unsigned int) * 2) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = block->block_id;
+	header->count = dwords * 2;
+
+	for (i = 0; i < dwords; i++)
+		a5xx_rbbm_debug_bus_read(device, block->block_id, i,
+					&data[i*2]);
+
+	return size;
+}
+
+/* a5xx_snapshot_debugbus() - Capture debug bus data */
+static void a5xx_snapshot_debugbus(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_CNTLM,
+		0xf << A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT);
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_debugbus_blocks); i++) {
+		if (a5xx_debugbus_blocks[i].block_id == A5XX_RBBM_DBGBUS_VBIF)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a5xx_snapshot_vbif_debugbus,
+				(void *) &a5xx_debugbus_blocks[i]);
+		else
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a5xx_snapshot_debugbus_block,
+				(void *) &a5xx_debugbus_blocks[i]);
+	}
+}
+
+static const unsigned int a5xx_vbif_registers[] = {
+	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x302C, 0x3030, 0x3030,
+	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
+	0x3042, 0x3042, 0x3049, 0x3049, 0x3058, 0x3058, 0x305A, 0x3061,
+	0x3064, 0x3068, 0x306C, 0x306D, 0x3080, 0x3088, 0x308C, 0x308C,
+	0x3090, 0x3094, 0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0,
+	0x30C8, 0x30C8, 0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0,
+	0x3100, 0x3100, 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118,
+	0x3120, 0x3120, 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131,
+	0x340C, 0x340C, 0x3410, 0x3410, 0x3800, 0x3801,
+};
+
+/*
+ * Set of registers to dump for A5XX on snapshot.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+
+static const unsigned int a5xx_registers[] = {
+	/* RBBM */
+	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
+	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
+	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
+	0x04E0, 0x04F4, 0X04F8, 0x0529, 0x0531, 0x0533, 0x0540, 0x0555,
+	0xF400, 0xF400, 0xF800, 0xF807,
+	/* CP */
+	0x0800, 0x0803, 0x0806, 0x081A, 0x081F, 0x0841, 0x0860, 0x0860,
+	0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0X0B1C, 0X0B1E, 0x0B28,
+	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD,
+	/* VSC */
+	0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 0x0C60, 0x0C61,
+	/* GRAS */
+	0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, 0x0CA0, 0x0CA0,
+	0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
+	/* RB */
+	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
+	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
+	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
+	/* PC */
+	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
+	0x24C0, 0x24C0,
+	/* VFD */
+	0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
+	/* VPC */
+	0x0E60, 0x0E7C,
+	/* UCHE */
+	0x0E80, 0x0E8F, 0x0E90, 0x0E96, 0xEA0, 0xEA8, 0xEB0, 0xEB2,
+
+	/* RB CTX 0 */
+	0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 0xE1B0, 0xE1B6,
+	0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, 0xE210, 0xE21C,
+	0xE240, 0xE268,
+	/* GRAS CTX 0 */
+	0xE000, 0xE006, 0xE010, 0xE09A, 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB,
+	0xE100, 0xE105,
+	/* PC CTX 0 */
+	0xE380, 0xE38F, 0xE3B0, 0xE3B0,
+	/* VFD CTX 0 */
+	0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
+	/* VPC CTX 0 */
+	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2,
+
+	/* RB CTX 1 */
+	0xE940, 0xE947, 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6,
+	0xE9C0, 0xE9C7, 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C,
+	0xEA40, 0xEA68,
+	/* GRAS CTX 1 */
+	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
+	0xE900, 0xE905,
+	/* PC CTX 1 */
+	0xEB80, 0xEB8F, 0xEBB0, 0xEBB0,
+	/* VFD CTX 1 */
+	0xEC00, 0xEC05, 0xEC08, 0xECE9, 0xECF0, 0xECF0,
+	/* VPC CTX 1 */
+	0xEA80, 0xEA80, 0xEA82, 0xEAA3, 0xEAA5, 0xEAC2,
+};
+
+/*
+ * GPMU registers to dump for A5XX on snapshot.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+
+static const unsigned int a5xx_gpmu_registers[] = {
+	/* GPMU */
+	0xA800, 0xA8FF, 0xAC60, 0xAC60,
+};
+
+/*
+ * Set of registers to dump for A5XX before actually triggering crash dumper.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+static const unsigned int a5xx_pre_crashdumper_registers[] = {
+	/* RBBM: RBBM_STATUS - RBBM_STATUS3 */
+	0x04F5, 0x04F7, 0x0530, 0x0530,
+	/* CP: CP_STATUS_1 */
+	0x0B1D, 0x0B1D,
+};
+
+
+struct a5xx_hlsq_sp_tp_regs {
+	unsigned int statetype;
+	unsigned int ahbaddr;
+	unsigned int size;
+	uint64_t offset;
+};
+
+static struct a5xx_hlsq_sp_tp_regs a5xx_hlsq_sp_tp_registers[] = {
+	/* HSLQ non context. 0xe32 - 0xe3f are holes so don't include them */
+	{ 0x35, 0xE00, 0x32 },
+	/* HLSQ CTX 0 2D */
+	{ 0x31, 0x2080, 0x1 },
+	/* HLSQ CTX 1 2D */
+	{ 0x33, 0x2480, 0x1 },
+	/* HLSQ CTX 0 3D. 0xe7e2 - 0xe7ff are holes so don't include them */
+	{ 0x32, 0xE780, 0x62 },
+	/* HLSQ CTX 1 3D. 0xefe2 - 0xefff are holes so don't include them */
+	{ 0x34, 0xEF80, 0x62 },
+
+	/* SP non context */
+	{ 0x3f, 0x0EC0, 0x40 },
+	/* SP CTX 0 2D */
+	{ 0x3d, 0x2040, 0x1 },
+	/* SP CTX 1 2D */
+	{ 0x3b, 0x2440, 0x1 },
+	/* SP CTX 0 3D */
+	{ 0x3e, 0xE580, 0x180 },
+	/* SP CTX 1 3D */
+	{ 0x3c, 0xED80, 0x180 },
+
+	/* TP non context. 0x0f1c - 0x0f3f are holes so don't include them */
+	{ 0x3a, 0x0F00, 0x1c },
+	/* TP CTX 0 2D. 0x200a - 0x200f are holes so don't include them */
+	{ 0x38, 0x2000, 0xa },
+	/* TP CTX 1 2D.   0x240a - 0x240f are holes so don't include them */
+	{ 0x36, 0x2400, 0xa },
+	/* TP CTX 0 3D */
+	{ 0x39, 0xE700, 0x80 },
+	/* TP CTX 1 3D */
+	{ 0x37, 0xEF00, 0x80 },
+};
+
+
+#define A5XX_NUM_SHADER_BANKS 4
+#define A5XX_SHADER_STATETYPE_SHIFT 8
+
+enum a5xx_shader_obj {
+	A5XX_TP_W_MEMOBJ = 1,
+	A5XX_TP_W_SAMPLER = 2,
+	A5XX_TP_W_MIPMAP_BASE = 3,
+	A5XX_TP_W_MEMOBJ_TAG = 4,
+	A5XX_TP_W_SAMPLER_TAG = 5,
+	A5XX_TP_S_3D_MEMOBJ = 6,
+	A5XX_TP_S_3D_SAMPLER = 0x7,
+	A5XX_TP_S_3D_MEMOBJ_TAG = 0x8,
+	A5XX_TP_S_3D_SAMPLER_TAG = 0x9,
+	A5XX_TP_S_CS_MEMOBJ = 0xA,
+	A5XX_TP_S_CS_SAMPLER = 0xB,
+	A5XX_TP_S_CS_MEMOBJ_TAG = 0xC,
+	A5XX_TP_S_CS_SAMPLER_TAG = 0xD,
+	A5XX_SP_W_INSTR = 0xE,
+	A5XX_SP_W_CONST = 0xF,
+	A5XX_SP_W_UAV_SIZE = 0x10,
+	A5XX_SP_W_CB_SIZE = 0x11,
+	A5XX_SP_W_UAV_BASE = 0x12,
+	A5XX_SP_W_CB_BASE = 0x13,
+	A5XX_SP_W_INST_TAG = 0x14,
+	A5XX_SP_W_STATE = 0x15,
+	A5XX_SP_S_3D_INSTR = 0x16,
+	A5XX_SP_S_3D_CONST = 0x17,
+	A5XX_SP_S_3D_CB_BASE = 0x18,
+	A5XX_SP_S_3D_CB_SIZE = 0x19,
+	A5XX_SP_S_3D_UAV_BASE = 0x1A,
+	A5XX_SP_S_3D_UAV_SIZE = 0x1B,
+	A5XX_SP_S_CS_INSTR = 0x1C,
+	A5XX_SP_S_CS_CONST = 0x1D,
+	A5XX_SP_S_CS_CB_BASE = 0x1E,
+	A5XX_SP_S_CS_CB_SIZE = 0x1F,
+	A5XX_SP_S_CS_UAV_BASE = 0x20,
+	A5XX_SP_S_CS_UAV_SIZE = 0x21,
+	A5XX_SP_S_3D_INSTR_DIRTY = 0x22,
+	A5XX_SP_S_3D_CONST_DIRTY = 0x23,
+	A5XX_SP_S_3D_CB_BASE_DIRTY = 0x24,
+	A5XX_SP_S_3D_CB_SIZE_DIRTY = 0x25,
+	A5XX_SP_S_3D_UAV_BASE_DIRTY = 0x26,
+	A5XX_SP_S_3D_UAV_SIZE_DIRTY = 0x27,
+	A5XX_SP_S_CS_INSTR_DIRTY = 0x28,
+	A5XX_SP_S_CS_CONST_DIRTY = 0x29,
+	A5XX_SP_S_CS_CB_BASE_DIRTY = 0x2A,
+	A5XX_SP_S_CS_CB_SIZE_DIRTY = 0x2B,
+	A5XX_SP_S_CS_UAV_BASE_DIRTY = 0x2C,
+	A5XX_SP_S_CS_UAV_SIZE_DIRTY = 0x2D,
+	A5XX_HLSQ_ICB = 0x2E,
+	A5XX_HLSQ_ICB_DIRTY = 0x2F,
+	A5XX_HLSQ_ICB_CB_BASE_DIRTY = 0x30,
+	A5XX_SP_POWER_RESTORE_RAM = 0x40,
+	A5XX_SP_POWER_RESTORE_RAM_TAG = 0x41,
+	A5XX_TP_POWER_RESTORE_RAM = 0x42,
+	A5XX_TP_POWER_RESTORE_RAM_TAG = 0x43,
+
+};
+
+struct a5xx_shader_block {
+	unsigned int statetype;
+	unsigned int sz;
+	uint64_t offset;
+};
+
+struct a5xx_shader_block_info {
+	struct a5xx_shader_block *block;
+	unsigned int bank;
+	uint64_t offset;
+};
+
+static struct a5xx_shader_block a5xx_shader_blocks[] = {
+	{A5XX_TP_W_MEMOBJ,              0x200},
+	{A5XX_TP_W_MIPMAP_BASE,         0x3C0},
+	{A5XX_TP_W_SAMPLER_TAG,          0x40},
+	{A5XX_TP_S_3D_SAMPLER,           0x80},
+	{A5XX_TP_S_3D_SAMPLER_TAG,       0x20},
+	{A5XX_TP_S_CS_SAMPLER,           0x40},
+	{A5XX_TP_S_CS_SAMPLER_TAG,       0x10},
+	{A5XX_SP_W_CONST,               0x800},
+	{A5XX_SP_W_CB_SIZE,              0x30},
+	{A5XX_SP_W_CB_BASE,              0xF0},
+	{A5XX_SP_W_STATE,                 0x1},
+	{A5XX_SP_S_3D_CONST,            0x800},
+	{A5XX_SP_S_3D_CB_SIZE,           0x28},
+	{A5XX_SP_S_3D_UAV_SIZE,          0x80},
+	{A5XX_SP_S_CS_CONST,            0x400},
+	{A5XX_SP_S_CS_CB_SIZE,            0x8},
+	{A5XX_SP_S_CS_UAV_SIZE,          0x80},
+	{A5XX_SP_S_3D_CONST_DIRTY,       0x12},
+	{A5XX_SP_S_3D_CB_SIZE_DIRTY,      0x1},
+	{A5XX_SP_S_3D_UAV_SIZE_DIRTY,     0x2},
+	{A5XX_SP_S_CS_CONST_DIRTY,        0xA},
+	{A5XX_SP_S_CS_CB_SIZE_DIRTY,      0x1},
+	{A5XX_SP_S_CS_UAV_SIZE_DIRTY,     0x2},
+	{A5XX_HLSQ_ICB_DIRTY,             0xB},
+	{A5XX_SP_POWER_RESTORE_RAM_TAG,   0xA},
+	{A5XX_TP_POWER_RESTORE_RAM_TAG,   0xA},
+	{A5XX_TP_W_SAMPLER,              0x80},
+	{A5XX_TP_W_MEMOBJ_TAG,           0x40},
+	{A5XX_TP_S_3D_MEMOBJ,           0x200},
+	{A5XX_TP_S_3D_MEMOBJ_TAG,        0x20},
+	{A5XX_TP_S_CS_MEMOBJ,           0x100},
+	{A5XX_TP_S_CS_MEMOBJ_TAG,        0x10},
+	{A5XX_SP_W_INSTR,               0x800},
+	{A5XX_SP_W_UAV_SIZE,             0x80},
+	{A5XX_SP_W_UAV_BASE,             0x80},
+	{A5XX_SP_W_INST_TAG,             0x40},
+	{A5XX_SP_S_3D_INSTR,            0x800},
+	{A5XX_SP_S_3D_CB_BASE,           0xC8},
+	{A5XX_SP_S_3D_UAV_BASE,          0x80},
+	{A5XX_SP_S_CS_INSTR,            0x400},
+	{A5XX_SP_S_CS_CB_BASE,           0x28},
+	{A5XX_SP_S_CS_UAV_BASE,          0x80},
+	{A5XX_SP_S_3D_INSTR_DIRTY,        0x1},
+	{A5XX_SP_S_3D_CB_BASE_DIRTY,      0x5},
+	{A5XX_SP_S_3D_UAV_BASE_DIRTY,     0x2},
+	{A5XX_SP_S_CS_INSTR_DIRTY,        0x1},
+	{A5XX_SP_S_CS_CB_BASE_DIRTY,      0x1},
+	{A5XX_SP_S_CS_UAV_BASE_DIRTY,     0x2},
+	{A5XX_HLSQ_ICB,                 0x200},
+	{A5XX_HLSQ_ICB_CB_BASE_DIRTY,     0x4},
+	{A5XX_SP_POWER_RESTORE_RAM,     0x140},
+	{A5XX_TP_POWER_RESTORE_RAM,      0x40},
+};
+
+static struct kgsl_memdesc *capturescript;
+static struct kgsl_memdesc *registers;
+static bool crash_dump_valid;
+
+static size_t a5xx_snapshot_shader_memory(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_shader *header =
+		(struct kgsl_snapshot_shader *) buf;
+	struct a5xx_shader_block_info *info =
+		(struct a5xx_shader_block_info *) priv;
+	struct a5xx_shader_block *block = info->block;
+	unsigned int *data = (unsigned int *) (buf + sizeof(*header));
+
+	if (remain < SHADER_SECTION_SZ(block->sz)) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	header->type = block->statetype;
+	header->index = info->bank;
+	header->size = block->sz;
+
+	memcpy(data, registers->hostptr + info->offset,
+		block->sz * sizeof(unsigned int));
+
+	return SHADER_SECTION_SZ(block->sz);
+}
+
+static void a5xx_snapshot_shader(struct kgsl_device *device,
+			   struct kgsl_snapshot *snapshot)
+{
+	unsigned int i, j;
+	struct a5xx_shader_block_info info;
+
+	/* Shader blocks can only be read by the crash dumper */
+	if (!crash_dump_valid)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) {
+		for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) {
+			info.block = &a5xx_shader_blocks[i];
+			info.bank = j;
+			info.offset = a5xx_shader_blocks[i].offset +
+				(j * a5xx_shader_blocks[i].sz);
+
+			/* Shader working/shadow memory */
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_SHADER,
+				snapshot, a5xx_snapshot_shader_memory, &info);
+		}
+	}
+}
+
+/* Dump registers which get affected by crash dumper trigger */
+static size_t a5xx_snapshot_pre_crashdump_regs(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_registers pre_cdregs = {
+			.regs = a5xx_pre_crashdumper_registers,
+			.count = ARRAY_SIZE(a5xx_pre_crashdumper_registers)/2,
+	};
+
+	return kgsl_snapshot_dump_registers(device, buf, remain, &pre_cdregs);
+}
+
+struct registers {
+	const unsigned int *regs;
+	size_t size;
+};
+
+static size_t a5xx_legacy_snapshot_registers(struct kgsl_device *device,
+		u8 *buf, size_t remain, const unsigned int *regs, size_t size)
+{
+	struct kgsl_snapshot_registers snapshot_regs = {
+		.regs = regs,
+		.count = size / 2,
+	};
+
+	return kgsl_snapshot_dump_registers(device, buf, remain,
+			&snapshot_regs);
+}
+
+#define REG_PAIR_COUNT(_a, _i) \
+	(((_a)[(2 * (_i)) + 1] - (_a)[2 * (_i)]) + 1)
+
+static size_t a5xx_snapshot_registers(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int *src = (unsigned int *) registers->hostptr;
+	struct registers *regs = (struct registers *)priv;
+	unsigned int j, k;
+	unsigned int count = 0;
+
+	if (!crash_dump_valid)
+		return a5xx_legacy_snapshot_registers(device, buf, remain,
+				regs->regs, regs->size);
+
+	if (remain < sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	remain -= sizeof(*header);
+
+	for (j = 0; j < regs->size / 2; j++) {
+		unsigned int start = regs->regs[2 * j];
+		unsigned int end = regs->regs[(2 * j) + 1];
+
+		if (remain < ((end - start) + 1) * 8) {
+			SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+			goto out;
+		}
+
+		remain -= ((end - start) + 1) * 8;
+
+		for (k = start; k <= end; k++, count++) {
+			*data++ = k;
+			*data++ = *src++;
+		}
+	}
+
+out:
+	header->count = count;
+
+	/* Return the size of the section */
+	return (count * 8) + sizeof(*header);
+}
+
+/* Snapshot a preemption record buffer */
+static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_memdesc *memdesc = priv;
+
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)buf;
+
+	u8 *ptr = buf + sizeof(*header);
+
+	if (remain < (SZ_64K + sizeof(*header))) {
+		SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD");
+		return 0;
+	}
+
+	header->size = SZ_64K >> 2;
+	header->gpuaddr = memdesc->gpuaddr;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable);
+	header->type = SNAPSHOT_GPU_OBJECT_GLOBAL;
+
+	memcpy(ptr, memdesc->hostptr, SZ_64K);
+
+	return SZ_64K + sizeof(*header);
+}
+
+
+static void _a5xx_do_crashdump(struct kgsl_device *device)
+{
+	unsigned long wait_time;
+	unsigned int reg = 0;
+
+	crash_dump_valid = false;
+
+	if (!device->snapshot_crashdumper)
+		return;
+
+	if (IS_ERR_OR_NULL(capturescript) || IS_ERR_OR_NULL(registers))
+		return;
+
+	/* IF the SMMU is stalled we cannot do a crash dump */
+	if (adreno_smmu_is_stalled(ADRENO_DEVICE(device)))
+		return;
+
+	/* Turn on APRIV so we can access the buffers */
+	kgsl_regwrite(device, A5XX_CP_CNTL, 1);
+
+	kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_LO,
+			lower_32_bits(capturescript->gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_HI,
+			upper_32_bits(capturescript->gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_CRASH_DUMP_CNTL, 1);
+
+	wait_time = jiffies + msecs_to_jiffies(CP_CRASH_DUMPER_TIMEOUT);
+	while (!time_after(jiffies, wait_time)) {
+		kgsl_regread(device, A5XX_CP_CRASH_DUMP_CNTL, &reg);
+		if (reg & 0x4)
+			break;
+		cpu_relax();
+	}
+
+	kgsl_regwrite(device, A5XX_CP_CNTL, 0);
+
+	if (!(reg & 0x4)) {
+		dev_err(device->dev, "Crash dump timed out: 0x%X\n", reg);
+		return;
+	}
+
+	crash_dump_valid = true;
+}
+
+static int get_hlsq_registers(struct kgsl_device *device,
+		const struct a5xx_hlsq_sp_tp_regs *regs, unsigned int *data)
+{
+	unsigned int i;
+	unsigned int *src = registers->hostptr + regs->offset;
+
+	for (i = 0; i < regs->size; i++) {
+		*data++ = regs->ahbaddr + i;
+		*data++ = *(src + i);
+	}
+
+	return (2 * regs->size);
+}
+
+static size_t a5xx_snapshot_dump_hlsq_sp_tp_regs(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int count = 0, i;
+
+	/* Figure out how many registers we are going to dump */
+	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++)
+		count += a5xx_hlsq_sp_tp_registers[i].size;
+
+	if (remain < (count * 8) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++)
+		data += get_hlsq_registers(device,
+				&a5xx_hlsq_sp_tp_registers[i], data);
+
+	header->count = count;
+
+	/* Return the size of the section */
+	return (count * 8) + sizeof(*header);
+}
+
+static size_t a5xx_snapshot_cp_merciu(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i, size;
+
+	if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev) ||
+		adreno_is_a540(adreno_dev) || adreno_is_a512(adreno_dev))
+		size = 1024;
+	else if (adreno_is_a510(adreno_dev))
+		size = 32;
+	else
+		size = 64;
+
+	/* The MERCIU data is two dwords per entry */
+	size = size << 1;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP MERCIU DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_MERCIU;
+	header->size = size;
+
+	kgsl_regwrite(device, A5XX_CP_MERCIU_DBG_ADDR, 0);
+
+	for (i = 0; i < size; i++) {
+		kgsl_regread(device, A5XX_CP_MERCIU_DBG_DATA_1,
+			&data[(i * 2)]);
+		kgsl_regread(device, A5XX_CP_MERCIU_DBG_DATA_2,
+			&data[(i * 2) + 1]);
+	}
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+static size_t a5xx_snapshot_cp_roq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf;
+	u32 size, *data = (u32 *) (buf + sizeof(*header));
+
+	if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev) ||
+		adreno_is_a510(adreno_dev))
+		size = 256;
+	else
+		size = 512;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP ROQ DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_ROQ;
+	header->size = size;
+
+	kgsl_regmap_read_indexed(&device->regmap, A5XX_CP_ROQ_DBG_ADDR,
+		A5XX_CP_ROQ_DBG_DATA, data, size);
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+static size_t a5xx_snapshot_cp_meq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf;
+	u32 size, *data = (u32 *) (buf + sizeof(*header));
+
+	if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev) ||
+		adreno_is_a510(adreno_dev))
+		size = 32;
+	else
+		size = 64;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP MEQ DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_MEQ;
+	header->size = size;
+
+	kgsl_regmap_read_indexed(&device->regmap, A5XX_CP_MEQ_DBG_ADDR,
+		A5XX_CP_MEQ_DBG_DATA, data, size);
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/*
+ * a5xx_snapshot() - A5XX GPU snapshot function
+ * @adreno_dev: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the A5XX specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a5xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int i;
+	u32 hi, lo;
+	struct adreno_ringbuffer *rb;
+	struct registers regs;
+
+	/* Disable Clock gating temporarily for the debug bus to work */
+	a5xx_hwcg_set(adreno_dev, false);
+
+	/* Save some CP information that the generic snapshot uses */
+	kgsl_regread(device, A5XX_CP_IB1_BASE, &lo);
+	kgsl_regread(device, A5XX_CP_IB1_BASE_HI, &hi);
+
+	snapshot->ib1base = (((u64) hi) << 32) | lo;
+
+	kgsl_regread(device, A5XX_CP_IB2_BASE, &lo);
+	kgsl_regread(device, A5XX_CP_IB2_BASE_HI, &hi);
+
+	snapshot->ib2base = (((u64) hi) << 32) | lo;
+
+	kgsl_regread(device, A5XX_CP_IB1_BUFSZ, &snapshot->ib1size);
+	kgsl_regread(device, A5XX_CP_IB2_BUFSZ, &snapshot->ib2size);
+
+	/* Dump the registers which get affected by crash dumper trigger */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
+		snapshot, a5xx_snapshot_pre_crashdump_regs, NULL);
+
+	/* Dump vbif registers as well which get affected by crash dumper */
+	SNAPSHOT_REGISTERS(device, snapshot, a5xx_vbif_registers);
+
+	/* Try to run the crash dumper */
+	_a5xx_do_crashdump(device);
+
+	regs.regs = a5xx_registers;
+	regs.size = ARRAY_SIZE(a5xx_registers);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot,
+			a5xx_snapshot_registers, &regs);
+
+	if (a5xx_has_gpmu(adreno_dev)) {
+		regs.regs = a5xx_gpmu_registers;
+		regs.size = ARRAY_SIZE(a5xx_gpmu_registers);
+
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
+				snapshot, a5xx_snapshot_registers, &regs);
+	}
+
+
+	/* Dump SP TP HLSQ registers */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot,
+		a5xx_snapshot_dump_hlsq_sp_tp_regs, NULL);
+
+	/* CP_PFP indexed registers */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_PFP_STAT_ADDR, A5XX_CP_PFP_STAT_DATA, 0, 36);
+
+	/* CP_ME indexed registers */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_ME_STAT_ADDR, A5XX_CP_ME_STAT_DATA, 0, 29);
+
+	/* CP_DRAW_STATE */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_DRAW_STATE_ADDR, A5XX_CP_DRAW_STATE_DATA,
+		0, 1 << A5XX_CP_DRAW_STATE_ADDR_WIDTH);
+
+	/* ME_UCODE Cache */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_ME_UCODE_DBG_ADDR, A5XX_CP_ME_UCODE_DBG_DATA,
+		0, 0x53F);
+
+	/* PFP_UCODE Cache */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_PFP_UCODE_DBG_ADDR, A5XX_CP_PFP_UCODE_DBG_DATA,
+		0, 0x53F);
+
+	/* CP MEQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a5xx_snapshot_cp_meq, NULL);
+
+	/* CP ROQ */
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a5xx_snapshot_cp_roq, NULL);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a5xx_snapshot_cp_merciu, NULL);
+
+	/* CP PFP and PM4 */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a5xx_snapshot_cp_pfp, NULL);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a5xx_snapshot_cp_pm4, NULL);
+
+	/* Debug bus */
+	a5xx_snapshot_debugbus(device, snapshot);
+
+	/* Shader memory */
+	a5xx_snapshot_shader(device, snapshot);
+
+	/* Preemption record */
+	if (adreno_is_preemption_enabled(adreno_dev)) {
+		FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, snapshot_preemption_record,
+				rb->preemption_desc);
+		}
+	}
+
+}
+
+static int _a5xx_crashdump_init_shader(struct a5xx_shader_block *block,
+		uint64_t *ptr, uint64_t *offset)
+{
+	int qwords = 0;
+	unsigned int j;
+
+	/* Capture each bank in the block */
+	for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) {
+		/* Program the aperture */
+		ptr[qwords++] =
+			(block->statetype << A5XX_SHADER_STATETYPE_SHIFT) | j;
+		ptr[qwords++] = (((uint64_t) A5XX_HLSQ_DBG_READ_SEL << 44)) |
+			(1 << 21) | 1;
+
+		/* Read all the data in one chunk */
+		ptr[qwords++] = registers->gpuaddr + *offset;
+		ptr[qwords++] =
+			(((uint64_t) A5XX_HLSQ_DBG_AHB_READ_APERTURE << 44)) |
+			block->sz;
+
+		/* Remember the offset of the first bank for easy access */
+		if (j == 0)
+			block->offset = *offset;
+
+		*offset += block->sz * sizeof(unsigned int);
+	}
+
+	return qwords;
+}
+
+static int _a5xx_crashdump_init_hlsq(struct a5xx_hlsq_sp_tp_regs *regs,
+		uint64_t *ptr, uint64_t *offset)
+{
+	int qwords = 0;
+
+	/* Program the aperture */
+	ptr[qwords++] =
+		(regs->statetype << A5XX_SHADER_STATETYPE_SHIFT);
+	ptr[qwords++] = (((uint64_t) A5XX_HLSQ_DBG_READ_SEL << 44)) |
+		(1 << 21) | 1;
+
+	/* Read all the data in one chunk */
+	ptr[qwords++] = registers->gpuaddr + *offset;
+	ptr[qwords++] =
+		(((uint64_t) A5XX_HLSQ_DBG_AHB_READ_APERTURE << 44)) |
+		regs->size;
+
+	/* Remember the offset of the first bank for easy access */
+	regs->offset = *offset;
+
+	*offset += regs->size * sizeof(unsigned int);
+
+	return qwords;
+}
+
+void a5xx_crashdump_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int script_size = 0;
+	unsigned int data_size = 0;
+	unsigned int i, j;
+	uint64_t *ptr;
+	uint64_t offset = 0;
+
+	if (!IS_ERR_OR_NULL(capturescript) && !IS_ERR_OR_NULL(registers))
+		return;
+
+	/*
+	 * We need to allocate two buffers:
+	 * 1 - the buffer to hold the draw script
+	 * 2 - the buffer to hold the data
+	 */
+
+	/*
+	 * To save the registers, we need 16 bytes per register pair for the
+	 * script and a dword for each register int the data
+	 */
+
+	/* Each pair needs 16 bytes (2 qwords) */
+	script_size += (ARRAY_SIZE(a5xx_registers) / 2) * 16;
+
+	/* Each register needs a dword in the data */
+	for (j = 0; j < ARRAY_SIZE(a5xx_registers) / 2; j++)
+		data_size += REG_PAIR_COUNT(a5xx_registers, j) *
+			sizeof(unsigned int);
+
+	if (a5xx_has_gpmu(adreno_dev)) {
+		/* Each pair needs 16 bytes (2 qwords) */
+		script_size += (ARRAY_SIZE(a5xx_gpmu_registers) / 2) * 16;
+
+		/* Each register needs a dword in the data */
+		for (j = 0; j < ARRAY_SIZE(a5xx_gpmu_registers) / 2; j++)
+			data_size += REG_PAIR_COUNT(a5xx_gpmu_registers, j) *
+				sizeof(unsigned int);
+	}
+
+	/*
+	 * To save the shader blocks for each block in each type we need 32
+	 * bytes for the script (16 bytes to program the aperture and 16 to
+	 * read the data) and then a block specific number of bytes to hold
+	 * the data
+	 */
+	for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) {
+		script_size += 32 * A5XX_NUM_SHADER_BANKS;
+		data_size += a5xx_shader_blocks[i].sz * sizeof(unsigned int) *
+			A5XX_NUM_SHADER_BANKS;
+	}
+	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) {
+		script_size += 32;
+		data_size +=
+		a5xx_hlsq_sp_tp_registers[i].size * sizeof(unsigned int);
+	}
+
+	/* Now allocate the script and data buffers */
+
+	/* The script buffers needs 2 extra qwords on the end */
+	if (!IS_ERR_OR_NULL(capturescript))
+		capturescript = kgsl_allocate_global(device,
+			script_size + 16, 0, KGSL_MEMFLAGS_GPUREADONLY,
+			KGSL_MEMDESC_PRIVILEGED, "capturescript");
+
+	if (IS_ERR(capturescript))
+		return;
+
+	if (!IS_ERR_OR_NULL(registers))
+		registers = kgsl_allocate_global(device, data_size, 0, 0,
+			KGSL_MEMDESC_PRIVILEGED, "capturescript_regs");
+
+	if (IS_ERR(registers))
+		return;
+
+	/* Build the crash script */
+
+	ptr = (uint64_t *) capturescript->hostptr;
+
+	/* For the registers, program a read command for each pair */
+
+	for (j = 0; j < ARRAY_SIZE(a5xx_registers) / 2; j++) {
+		unsigned int r = REG_PAIR_COUNT(a5xx_registers, j);
+		*ptr++ = registers->gpuaddr + offset;
+		*ptr++ = (((uint64_t) a5xx_registers[2 * j]) << 44)
+			| r;
+		offset += r * sizeof(unsigned int);
+	}
+
+	if (a5xx_has_gpmu(adreno_dev)) {
+		for (j = 0; j < ARRAY_SIZE(a5xx_gpmu_registers) / 2; j++) {
+			unsigned int r = REG_PAIR_COUNT(a5xx_gpmu_registers, j);
+			*ptr++ = registers->gpuaddr + offset;
+			*ptr++ = (((uint64_t) a5xx_gpmu_registers[2 * j]) << 44)
+				| r;
+			offset += r * sizeof(unsigned int);
+		}
+	}
+
+	/* Program each shader block */
+	for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) {
+		ptr += _a5xx_crashdump_init_shader(&a5xx_shader_blocks[i], ptr,
+			&offset);
+	}
+	/* Program the hlsq sp tp register sets */
+	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++)
+		ptr += _a5xx_crashdump_init_hlsq(&a5xx_hlsq_sp_tp_registers[i],
+			ptr, &offset);
+
+	*ptr++ = 0;
+	*ptr++ = 0;
+}

+ 2486 - 0
qcom/opensource/graphics-kernel/adreno_a6xx.c

@@ -0,0 +1,2486 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/clk/qcom.h>
+#include <linux/clk-provider.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/soc/qcom/llcc-qcom.h>
+#include <soc/qcom/of_common.h>
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_a6xx_hwsched.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+#include "kgsl_trace.h"
+#include "kgsl_util.h"
+
+/* IFPC & Preemption static powerup restore list */
+static u32 a6xx_pwrup_reglist[] = {
+	A6XX_VSC_ADDR_MODE_CNTL,
+	A6XX_GRAS_ADDR_MODE_CNTL,
+	A6XX_RB_ADDR_MODE_CNTL,
+	A6XX_PC_ADDR_MODE_CNTL,
+	A6XX_HLSQ_ADDR_MODE_CNTL,
+	A6XX_VFD_ADDR_MODE_CNTL,
+	A6XX_VPC_ADDR_MODE_CNTL,
+	A6XX_UCHE_ADDR_MODE_CNTL,
+	A6XX_SP_ADDR_MODE_CNTL,
+	A6XX_TPL1_ADDR_MODE_CNTL,
+	A6XX_UCHE_WRITE_RANGE_MAX_LO,
+	A6XX_UCHE_WRITE_RANGE_MAX_HI,
+	A6XX_UCHE_TRAP_BASE_LO,
+	A6XX_UCHE_TRAP_BASE_HI,
+	A6XX_UCHE_WRITE_THRU_BASE_LO,
+	A6XX_UCHE_WRITE_THRU_BASE_HI,
+	A6XX_UCHE_GMEM_RANGE_MIN_LO,
+	A6XX_UCHE_GMEM_RANGE_MIN_HI,
+	A6XX_UCHE_GMEM_RANGE_MAX_LO,
+	A6XX_UCHE_GMEM_RANGE_MAX_HI,
+	A6XX_UCHE_FILTER_CNTL,
+	A6XX_UCHE_CACHE_WAYS,
+	A6XX_UCHE_MODE_CNTL,
+	A6XX_RB_NC_MODE_CNTL,
+	A6XX_TPL1_NC_MODE_CNTL,
+	A6XX_SP_NC_MODE_CNTL,
+	A6XX_PC_DBG_ECO_CNTL,
+	A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE,
+	A6XX_UCHE_GBIF_GX_CONFIG,
+	A6XX_UCHE_CLIENT_PF,
+};
+
+/* IFPC only static powerup restore list */
+static u32 a6xx_ifpc_pwrup_reglist[] = {
+	A6XX_CP_CHICKEN_DBG,
+	A6XX_CP_DBG_ECO_CNTL,
+	A6XX_CP_PROTECT_CNTL,
+	A6XX_CP_PROTECT_REG,
+	A6XX_CP_PROTECT_REG+1,
+	A6XX_CP_PROTECT_REG+2,
+	A6XX_CP_PROTECT_REG+3,
+	A6XX_CP_PROTECT_REG+4,
+	A6XX_CP_PROTECT_REG+5,
+	A6XX_CP_PROTECT_REG+6,
+	A6XX_CP_PROTECT_REG+7,
+	A6XX_CP_PROTECT_REG+8,
+	A6XX_CP_PROTECT_REG+9,
+	A6XX_CP_PROTECT_REG+10,
+	A6XX_CP_PROTECT_REG+11,
+	A6XX_CP_PROTECT_REG+12,
+	A6XX_CP_PROTECT_REG+13,
+	A6XX_CP_PROTECT_REG+14,
+	A6XX_CP_PROTECT_REG+15,
+	A6XX_CP_PROTECT_REG+16,
+	A6XX_CP_PROTECT_REG+17,
+	A6XX_CP_PROTECT_REG+18,
+	A6XX_CP_PROTECT_REG+19,
+	A6XX_CP_PROTECT_REG+20,
+	A6XX_CP_PROTECT_REG+21,
+	A6XX_CP_PROTECT_REG+22,
+	A6XX_CP_PROTECT_REG+23,
+	A6XX_CP_PROTECT_REG+24,
+	A6XX_CP_PROTECT_REG+25,
+	A6XX_CP_PROTECT_REG+26,
+	A6XX_CP_PROTECT_REG+27,
+	A6XX_CP_PROTECT_REG+28,
+	A6XX_CP_PROTECT_REG+29,
+	A6XX_CP_PROTECT_REG+30,
+	A6XX_CP_PROTECT_REG+31,
+	A6XX_CP_AHB_CNTL,
+};
+
+/* Applicable to a620, a621, a635, a650 and a660 */
+static u32 a650_ifpc_pwrup_reglist[] = {
+	A6XX_CP_PROTECT_REG+32,
+	A6XX_CP_PROTECT_REG+33,
+	A6XX_CP_PROTECT_REG+34,
+	A6XX_CP_PROTECT_REG+35,
+	A6XX_CP_PROTECT_REG+36,
+	A6XX_CP_PROTECT_REG+37,
+	A6XX_CP_PROTECT_REG+38,
+	A6XX_CP_PROTECT_REG+39,
+	A6XX_CP_PROTECT_REG+40,
+	A6XX_CP_PROTECT_REG+41,
+	A6XX_CP_PROTECT_REG+42,
+	A6XX_CP_PROTECT_REG+43,
+	A6XX_CP_PROTECT_REG+44,
+	A6XX_CP_PROTECT_REG+45,
+	A6XX_CP_PROTECT_REG+46,
+	A6XX_CP_PROTECT_REG+47,
+};
+
+/* Applicable to a620, a621, a635, a650 and a660 */
+static u32 a650_pwrup_reglist[] = {
+	A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0,
+	A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
+	A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
+	A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
+	A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
+	A6XX_UCHE_CMDQ_CONFIG,
+};
+
+static u32 a615_pwrup_reglist[] = {
+	A6XX_UCHE_GBIF_GX_CONFIG,
+};
+
+int a6xx_fenced_write(struct adreno_device *adreno_dev, u32 offset,
+		u32 value, u32 mask)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status, i;
+	u64 ts1, ts2;
+
+	kgsl_regwrite(device, offset, value);
+
+	if (!gmu_core_isenabled(device))
+		return 0;
+
+	ts1 = a6xx_read_alwayson(adreno_dev);
+	for (i = 0; i < GMU_CORE_LONG_WAKEUP_RETRY_LIMIT; i++) {
+		/*
+		 * Make sure the previous register write is posted before
+		 * checking the fence status
+		 */
+		mb();
+
+		kgsl_regread(device, A6XX_GMU_AHB_FENCE_STATUS, &status);
+
+		/*
+		 * If !writedropped0/1, then the write to fenced register
+		 * was successful
+		 */
+		if (!(status & mask))
+			break;
+
+		/* Wait a small amount of time before trying again */
+		udelay(GMU_CORE_WAKEUP_DELAY_US);
+
+		/* Try to write the fenced register again */
+		kgsl_regwrite(device, offset, value);
+	}
+
+	if (i < GMU_CORE_SHORT_WAKEUP_RETRY_LIMIT)
+		return 0;
+
+	if (i == GMU_CORE_LONG_WAKEUP_RETRY_LIMIT) {
+		ts2 = a6xx_read_alwayson(adreno_dev);
+		dev_err(adreno_dev->dev.dev,
+			"Timed out waiting %d usecs to write fenced register 0x%x, timestamps: %llx %llx\n",
+			i * GMU_CORE_WAKEUP_DELAY_US, offset, ts1, ts2);
+		return -ETIMEDOUT;
+	}
+
+	dev_err(adreno_dev->dev.dev,
+		"Waited %d usecs to write fenced register 0x%x\n",
+		i * GMU_CORE_WAKEUP_DELAY_US, offset);
+
+	return 0;
+}
+
+int a6xx_init(struct adreno_device *adreno_dev)
+{
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	u64 freq = a6xx_core->gmu_hub_clk_freq;
+
+	adreno_dev->highest_bank_bit = a6xx_core->highest_bank_bit;
+
+	adreno_dev->gmu_hub_clk_freq = freq ? freq : 150000000;
+
+	adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev,
+							ADRENO_COOP_RESET);
+
+	/* If the memory type is DDR 4, override the existing configuration */
+	if (of_fdt_get_ddrtype() == 0x7) {
+		if (adreno_is_a660_shima(adreno_dev) ||
+			adreno_is_a635(adreno_dev) ||
+			adreno_is_a662(adreno_dev))
+			adreno_dev->highest_bank_bit = 14;
+		else if ((adreno_is_a650(adreno_dev) ||
+				adreno_is_a660(adreno_dev)))
+			adreno_dev->highest_bank_bit = 15;
+	}
+
+	a6xx_crashdump_init(adreno_dev);
+
+	return adreno_allocate_global(KGSL_DEVICE(adreno_dev),
+		&adreno_dev->pwrup_reglist,
+		PAGE_SIZE, 0, 0, KGSL_MEMDESC_PRIVILEGED,
+		"powerup_register_list");
+}
+
+static int a6xx_nogmu_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	ret = a6xx_ringbuffer_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	/* Try to map the GMU wrapper region if applicable */
+	ret = kgsl_regmap_add_region(&device->regmap, device->pdev,
+		"gmu_wrapper", NULL, NULL);
+	if (ret && ret != -ENODEV)
+		dev_err(device->dev, "Couldn't map the GMU wrapper registers\n");
+
+	adreno_create_profile_buffer(adreno_dev);
+
+	return a6xx_init(adreno_dev);
+}
+
+static void a6xx_protect_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	const struct adreno_protected_regs *regs = a6xx_core->protected_regs;
+	int i;
+
+	/*
+	 * Enable access protection to privileged registers, fault on an access
+	 * protect violation and select the last span to protect from the start
+	 * address all the way to the end of the register address space
+	 */
+	kgsl_regwrite(device, A6XX_CP_PROTECT_CNTL,
+		(1 << 0) | (1 << 1) | (1 << 3));
+
+	/* Program each register defined by the core definition */
+	for (i = 0; regs[i].reg; i++) {
+		u32 count;
+
+		/*
+		 * This is the offset of the end register as counted from the
+		 * start, i.e. # of registers in the range - 1
+		 */
+		count = regs[i].end - regs[i].start;
+
+		kgsl_regwrite(device, regs[i].reg,
+			(regs[i].start & 0x3ffff) | ((count & 0x1fff) << 18) |
+			(regs[i].noaccess << 31));
+	}
+}
+
+static inline unsigned int
+__get_rbbm_clock_cntl_on(struct adreno_device *adreno_dev)
+{
+	if (adreno_is_a630(adreno_dev))
+		return 0x8AA8AA02;
+	else if (adreno_is_a612(adreno_dev) || adreno_is_a610_family(adreno_dev))
+		return 0xAAA8AA82;
+	else if (adreno_is_a702(adreno_dev))
+		return 0xAAAAAA82;
+	else
+		return 0x8AA8AA82;
+}
+
+static inline unsigned int
+__get_gmu_ao_cgc_mode_cntl(struct adreno_device *adreno_dev)
+{
+	if (adreno_is_a612(adreno_dev))
+		return 0x00000022;
+	else if (adreno_is_a615_family(adreno_dev))
+		return 0x00000222;
+	/* a662 should be checked before a660 */
+	else if (adreno_is_a662(adreno_dev) || adreno_is_a621(adreno_dev))
+		return 0x00020200;
+	else if (adreno_is_a660(adreno_dev))
+		return 0x00020000;
+	else
+		return 0x00020202;
+}
+
+static inline unsigned int
+__get_gmu_ao_cgc_delay_cntl(struct adreno_device *adreno_dev)
+{
+	if (adreno_is_a612(adreno_dev))
+		return 0x00000011;
+	else if (adreno_is_a615_family(adreno_dev))
+		return 0x00000111;
+	else
+		return 0x00010111;
+}
+
+static inline unsigned int
+__get_gmu_ao_cgc_hyst_cntl(struct adreno_device *adreno_dev)
+{
+	if (adreno_is_a612(adreno_dev))
+		return 0x00000055;
+	else if (adreno_is_a615_family(adreno_dev))
+		return 0x00000555;
+	else
+		return 0x00005555;
+}
+
+static unsigned int __get_gmu_wfi_config(struct adreno_device *adreno_dev)
+{
+	unsigned int rev = ADRENO_GPUREV(adreno_dev);
+
+	if ((rev == ADRENO_REV_A620) || adreno_is_a640(adreno_dev) ||
+		adreno_is_a650(adreno_dev))
+		return 0x00000002;
+
+	return 0x00000000;
+}
+
+static void set_holi_sptprac_clock(struct kgsl_device *device, bool enable)
+{
+	u32 val = 0;
+
+	kgsl_regread(device, A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, &val);
+	val &= ~1;
+	kgsl_regwrite(device, A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL,
+			val | (enable ? 1 : 0));
+}
+
+static void a6xx_hwcg_set(struct adreno_device *adreno_dev, bool on)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	unsigned int value;
+	int i;
+
+	if (!adreno_dev->hwcg_enabled)
+		on = false;
+
+	if (gmu_core_isenabled(device)) {
+		gmu_core_regwrite(device, A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
+			on ? __get_gmu_ao_cgc_mode_cntl(adreno_dev) : 0);
+		gmu_core_regwrite(device, A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
+			on ? __get_gmu_ao_cgc_delay_cntl(adreno_dev) : 0);
+		gmu_core_regwrite(device, A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
+			on ? __get_gmu_ao_cgc_hyst_cntl(adreno_dev) : 0);
+		gmu_core_regwrite(device, A6XX_GMU_CX_GMU_WFI_CONFIG,
+			on ? __get_gmu_wfi_config(adreno_dev) : 0);
+	}
+
+	kgsl_regread(device, A6XX_RBBM_CLOCK_CNTL, &value);
+
+	if (value == __get_rbbm_clock_cntl_on(adreno_dev) && on)
+		return;
+
+	if (value == 0 && !on)
+		return;
+
+	/*
+	 * Disable SP clock before programming HWCG registers.
+	 * A612 and A610 GPU is not having the GX power domain.
+	 * Hence skip GMU_GX registers for A12 and A610.
+	 */
+
+	if (gmu_core_isenabled(device) && !adreno_is_a612(adreno_dev) &&
+		!adreno_is_a610_family(adreno_dev) && !adreno_is_a702(adreno_dev))
+		gmu_core_regrmw(device,
+			A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
+	else if (adreno_is_a619_holi(adreno_dev))
+		set_holi_sptprac_clock(device, false);
+
+	for (i = 0; i < a6xx_core->hwcg_count; i++)
+		kgsl_regwrite(device, a6xx_core->hwcg[i].offset,
+			on ? a6xx_core->hwcg[i].val : 0);
+
+	/* GBIF L2 CGC control is not part of the UCHE */
+	kgsl_regrmw(device, A6XX_UCHE_GBIF_GX_CONFIG, 0x70000,
+			FIELD_PREP(GENMASK(18, 16), on ? 2 : 0));
+
+	/*
+	 * Enable SP clock after programming HWCG registers.
+	 * A612 and A610 GPU is not having the GX power domain.
+	 * Hence skip GMU_GX registers for A612.
+	 */
+	if (gmu_core_isenabled(device) && !adreno_is_a612(adreno_dev) &&
+		!adreno_is_a610_family(adreno_dev) && !adreno_is_a702(adreno_dev))
+		gmu_core_regrmw(device,
+			A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
+	else if (adreno_is_a619_holi(adreno_dev))
+		set_holi_sptprac_clock(device, true);
+
+	/* enable top level HWCG */
+	kgsl_regwrite(device, A6XX_RBBM_CLOCK_CNTL,
+		on ? __get_rbbm_clock_cntl_on(adreno_dev) : 0);
+}
+
+struct a6xx_reglist_list {
+	u32 *regs;
+	u32 count;
+};
+
+#define REGLIST(_a) \
+	 ((struct a6xx_reglist_list) { .regs = _a, .count = ARRAY_SIZE(_a), })
+
+static void a6xx_patch_pwrup_reglist(struct adreno_device *adreno_dev)
+{
+	struct a6xx_reglist_list reglist[4];
+	void *ptr = adreno_dev->pwrup_reglist->hostptr;
+	struct cpu_gpu_lock *lock = ptr;
+	int items = 0, i, j;
+	u32 *dest = ptr + sizeof(*lock);
+	u16 list_offset = 0;
+
+	/* Static IFPC-only registers */
+	reglist[items] = REGLIST(a6xx_ifpc_pwrup_reglist);
+	list_offset += reglist[items++].count * 2;
+
+	if (adreno_is_a650_family(adreno_dev)) {
+		reglist[items] = REGLIST(a650_ifpc_pwrup_reglist);
+		list_offset += reglist[items++].count * 2;
+	}
+
+	/* Static IFPC + preemption registers */
+	reglist[items++] = REGLIST(a6xx_pwrup_reglist);
+
+	/* Add target specific registers */
+	if (adreno_is_a615_family(adreno_dev))
+		reglist[items++] = REGLIST(a615_pwrup_reglist);
+	else if (adreno_is_a650_family(adreno_dev))
+		reglist[items++] = REGLIST(a650_pwrup_reglist);
+
+	/*
+	 * For each entry in each of the lists, write the offset and the current
+	 * register value into the GPU buffer
+	 */
+	for (i = 0; i < items; i++) {
+		u32 *r = reglist[i].regs;
+
+		for (j = 0; j < reglist[i].count; j++) {
+			*dest++ = r[j];
+			kgsl_regread(KGSL_DEVICE(adreno_dev), r[j], dest++);
+		}
+
+		lock->list_length += reglist[i].count * 2;
+	}
+
+	if (adreno_is_a630(adreno_dev)) {
+		*dest++ = A6XX_RBBM_VBIF_CLIENT_QOS_CNTL;
+		kgsl_regread(KGSL_DEVICE(adreno_dev),
+			A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, dest++);
+	} else {
+		*dest++ = A6XX_RBBM_GBIF_CLIENT_QOS_CNTL;
+		kgsl_regread(KGSL_DEVICE(adreno_dev),
+			A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, dest++);
+	}
+
+	lock->list_length += 2;
+
+	*dest++ = A6XX_RBBM_PERFCTR_CNTL;
+	*dest++ = 1;
+	lock->list_length += 2;
+
+	/*
+	 * The overall register list is composed of
+	 * 1. Static IFPC-only registers
+	 * 2. Static IFPC + preemption registers
+	 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects)
+	 *
+	 * The CP views the second and third entries as one dynamic list
+	 * starting from list_offset. list_length should be the total dwords in
+	 * all the lists and list_offset should be specified as the size in
+	 * dwords of the first entry in the list.
+	 */
+	lock->list_offset = list_offset;
+}
+
+
+static void a6xx_llc_configure_gpu_scid(struct adreno_device *adreno_dev);
+static void a6xx_llc_configure_gpuhtw_scid(struct adreno_device *adreno_dev);
+static void a6xx_llc_enable_overrides(struct adreno_device *adreno_dev);
+
+static void a6xx_set_secvid(struct kgsl_device *device)
+{
+	static bool set;
+
+	if (set || !device->mmu.secured)
+		return;
+
+	kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_CNTL, 0x0);
+	kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
+		lower_32_bits(KGSL_IOMMU_SECURE_BASE32));
+	kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI,
+		upper_32_bits(KGSL_IOMMU_SECURE_BASE32));
+	kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE,
+		FIELD_PREP(GENMASK(31, 12),
+		(KGSL_IOMMU_SECURE_SIZE(&device->mmu) / SZ_4K)));
+
+	if (ADRENO_QUIRK(ADRENO_DEVICE(device), ADRENO_QUIRK_SECVID_SET_ONCE))
+		set = true;
+}
+
+static void a6xx_deassert_gbif_halt(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_regwrite(device, A6XX_GBIF_HALT, 0x0);
+
+	if (adreno_is_a619_holi(adreno_dev))
+		kgsl_regwrite(device, A6XX_RBBM_GPR0_CNTL, 0x0);
+	else
+		kgsl_regwrite(device, A6XX_RBBM_GBIF_HALT, 0x0);
+}
+
+bool a6xx_gx_is_on(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	bool gdsc_on, clk_on;
+
+	clk_on = __clk_is_enabled(pwr->grp_clks[0]);
+
+	gdsc_on = regulator_is_enabled(pwr->gx_gdsc);
+
+	return (gdsc_on & clk_on);
+}
+
+/*
+ * Some targets support marking certain transactions as always privileged which
+ * allows us to mark more memory as privileged without having to explicitly set
+ * the APRIV bit.  For those targets, choose the following transactions to be
+ * privileged by default:
+ * CDWRITE     [6:6] - Crashdumper writes
+ * CDREAD      [5:5] - Crashdumper reads
+ * RBRPWB      [3:3] - RPTR shadow writes
+ * RBPRIVLEVEL [2:2] - Memory accesses from PM4 packets in the ringbuffer
+ * RBFETCH     [1:1] - Ringbuffer reads
+ */
+#define A6XX_APRIV_DEFAULT \
+	((1 << 6) | (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1))
+
+void a6xx_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	unsigned int mal, mode, hbb_hi = 0, hbb_lo = 0;
+	unsigned int uavflagprd_inv;
+	unsigned int amsbc = 0;
+	unsigned int rgb565_predicator = 0;
+	unsigned int level2_swizzling_dis = 0;
+
+	/* Enable 64 bit addressing */
+	kgsl_regwrite(device, A6XX_CP_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A6XX_VSC_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A6XX_RB_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A6XX_PC_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A6XX_VFD_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A6XX_VPC_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A6XX_SP_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
+
+	/* Set up VBIF registers from the GPU core definition */
+	kgsl_regmap_multi_write(&device->regmap, a6xx_core->vbif,
+		a6xx_core->vbif_count);
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_LIMIT_UCHE_GBIF_RW))
+		kgsl_regwrite(device, A6XX_UCHE_GBIF_GX_CONFIG, 0x10200F9);
+
+	/* Make all blocks contribute to the GPU BUSY perf counter */
+	kgsl_regwrite(device, A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
+
+	/*
+	 * Set UCHE_WRITE_THRU_BASE to the UCHE_TRAP_BASE effectively
+	 * disabling L2 bypass
+	 */
+	kgsl_regwrite(device, A6XX_UCHE_WRITE_RANGE_MAX_LO, 0xffffffc0);
+	kgsl_regwrite(device, A6XX_UCHE_WRITE_RANGE_MAX_HI, 0x0001ffff);
+	kgsl_regwrite(device, A6XX_UCHE_TRAP_BASE_LO, 0xfffff000);
+	kgsl_regwrite(device, A6XX_UCHE_TRAP_BASE_HI, 0x0001ffff);
+	kgsl_regwrite(device, A6XX_UCHE_WRITE_THRU_BASE_LO, 0xfffff000);
+	kgsl_regwrite(device, A6XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff);
+
+	/*
+	 * Some A6xx targets no longer use a programmed UCHE GMEM base
+	 * address, so only write the registers if this address is
+	 * non-zero.
+	 */
+	if (adreno_dev->uche_gmem_base) {
+		kgsl_regwrite(device, A6XX_UCHE_GMEM_RANGE_MIN_LO,
+				adreno_dev->uche_gmem_base);
+		kgsl_regwrite(device, A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x0);
+		kgsl_regwrite(device, A6XX_UCHE_GMEM_RANGE_MAX_LO,
+				adreno_dev->uche_gmem_base +
+				adreno_dev->gpucore->gmem_size - 1);
+		kgsl_regwrite(device, A6XX_UCHE_GMEM_RANGE_MAX_HI, 0x0);
+	}
+
+	kgsl_regwrite(device, A6XX_UCHE_FILTER_CNTL, 0x804);
+	kgsl_regwrite(device, A6XX_UCHE_CACHE_WAYS, 0x4);
+
+	/* ROQ sizes are twice as big on a640/a680 than on a630 */
+	if ((ADRENO_GPUREV(adreno_dev) >= ADRENO_REV_A640) &&
+		       !adreno_is_a702(adreno_dev)) {
+		kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
+		kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362C);
+	} else if (adreno_is_a612(adreno_dev) || adreno_is_a610_family(adreno_dev) ||
+		adreno_is_a702(adreno_dev)) {
+		kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060);
+		kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16);
+	} else {
+		kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x010000C0);
+		kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362C);
+	}
+
+	if (adreno_is_a660(adreno_dev))
+		kgsl_regwrite(device, A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
+
+	if (adreno_is_a663(adreno_dev)) {
+		kgsl_regwrite(device, A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
+		kgsl_regwrite(device, A6XX_RBBM_LPAC_GBIF_CLIENT_QOS_CNTL, 0x0);
+		kgsl_regwrite(device, A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
+	}
+
+	if (adreno_is_a612(adreno_dev) || adreno_is_a610_family(adreno_dev)) {
+		/* For A612 and A610 Mem pool size is reduced to 48 */
+		kgsl_regwrite(device, A6XX_CP_MEM_POOL_SIZE, 48);
+		kgsl_regwrite(device, A6XX_CP_MEM_POOL_DBG_ADDR, 47);
+	} else if (adreno_is_a702(adreno_dev)) {
+		kgsl_regwrite(device, A6XX_CP_MEM_POOL_SIZE, 64);
+		kgsl_regwrite(device, A6XX_CP_MEM_POOL_DBG_ADDR, 63);
+	} else {
+		kgsl_regwrite(device, A6XX_CP_MEM_POOL_SIZE, 128);
+	}
+
+	/* Setting the primFifo thresholds values */
+	kgsl_regwrite(device, A6XX_PC_DBG_ECO_CNTL,
+		a6xx_core->prim_fifo_threshold);
+
+	/* Set the AHB default slave response to "ERROR" */
+	kgsl_regwrite(device, A6XX_CP_AHB_CNTL, 0x1);
+
+	/* Turn on performance counters */
+	kgsl_regwrite(device, A6XX_RBBM_PERFCTR_CNTL, 0x1);
+
+	/* Turn on the IFPC counter (countable 4 on XOCLK4) */
+	if (gmu_core_isenabled(device))
+		gmu_core_regrmw(device, A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1,
+			0xff, 0x4);
+
+	/* Turn on GX_MEM retention */
+	if (gmu_core_isenabled(device) && adreno_is_a612(adreno_dev)) {
+		kgsl_regwrite(device, A6XX_RBBM_BLOCK_GX_RETENTION_CNTL, 0x7FB);
+		/* For CP IPC interrupt */
+		kgsl_regwrite(device, A6XX_RBBM_INT_2_MASK, 0x00000010);
+	}
+
+	if (of_property_read_u32(device->pdev->dev.of_node,
+		"qcom,min-access-length", &mal))
+		mal = 32;
+
+	if (of_property_read_u32(device->pdev->dev.of_node,
+		"qcom,ubwc-mode", &mode))
+		mode = 0;
+
+	switch (mode) {
+	case KGSL_UBWC_1_0:
+		mode = 1;
+		break;
+	case KGSL_UBWC_2_0:
+		mode = 0;
+		break;
+	case KGSL_UBWC_3_0:
+		mode = 0;
+		amsbc = 1; /* Only valid for A640 and A680 */
+		break;
+	case KGSL_UBWC_4_0:
+		mode = 0;
+		rgb565_predicator = 1;
+		amsbc = 1;
+		if (adreno_is_a663(adreno_dev))
+			level2_swizzling_dis = 1;
+		break;
+	default:
+		break;
+	}
+
+	/* macrotilingmode 0: 4 channels (default)
+	 * overwrite to 1: 8 channels for A680
+	 */
+	if (adreno_is_a680(adreno_dev) ||
+			adreno_is_a663(adreno_dev))
+		kgsl_regwrite(device, A6XX_RBBM_NC_MODE_CNTL, 1);
+
+	if (!WARN_ON(!adreno_dev->highest_bank_bit)) {
+		hbb_lo = (adreno_dev->highest_bank_bit - 13) & 3;
+		hbb_hi = ((adreno_dev->highest_bank_bit - 13) >> 2) & 1;
+	}
+
+	mal = (mal == 64) ? 1 : 0;
+
+	uavflagprd_inv = (adreno_is_a650_family(adreno_dev)) ? 2 : 0;
+
+	kgsl_regwrite(device, A6XX_RB_NC_MODE_CNTL,
+				(level2_swizzling_dis << 12) | (rgb565_predicator << 11)|
+				(hbb_hi << 10) | (amsbc << 4) | (mal << 3) |
+				(hbb_lo << 1) | mode);
+
+	kgsl_regwrite(device, A6XX_TPL1_NC_MODE_CNTL,
+				(level2_swizzling_dis << 6) | (hbb_hi << 4) |
+				(mal << 3) | (hbb_lo << 1) | mode);
+
+	kgsl_regwrite(device, A6XX_SP_NC_MODE_CNTL,
+				(level2_swizzling_dis << 12) | (hbb_hi << 10) |
+				(mal << 3) | (uavflagprd_inv << 4) |
+				(hbb_lo << 1) | mode);
+
+	kgsl_regwrite(device, A6XX_UCHE_MODE_CNTL, (mal << 23) |
+		(hbb_lo << 21));
+
+	kgsl_regwrite(device, A6XX_RBBM_INTERFACE_HANG_INT_CNTL,
+				(1 << 30) | a6xx_core->hang_detect_cycles);
+
+	kgsl_regwrite(device, A6XX_UCHE_CLIENT_PF, BIT(7) |
+			FIELD_PREP(GENMASK(3, 0), adreno_dev->uche_client_pf));
+
+	/* Set weights for bicubic filtering */
+	if (adreno_is_a650_family(adreno_dev)) {
+		kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
+		kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
+			0x3FE05FF4);
+		kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
+			0x3FA0EBEE);
+		kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
+			0x3F5193ED);
+		kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
+			0x3F0243F0);
+	}
+
+	/* Set TWOPASSUSEWFI in A6XX_PC_DBG_ECO_CNTL if requested */
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_TWO_PASS_USE_WFI))
+		kgsl_regrmw(device, A6XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
+
+	/* Set the bit vccCacheSkipDis=1 to get rid of TSEskip logic */
+	if (a6xx_core->disable_tseskip)
+		kgsl_regrmw(device, A6XX_PC_DBG_ECO_CNTL, 0, (1 << 9));
+
+	/* Set the bit in HLSQ Cluster for A702 */
+	if (adreno_is_a702(adreno_dev))
+		kgsl_regwrite(device, A6XX_CP_CHICKEN_DBG, (1 << 24));
+
+	/* Enable the GMEM save/restore feature for preemption */
+	if (adreno_is_preemption_enabled(adreno_dev))
+		kgsl_regwrite(device, A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE,
+			0x1);
+
+	/*
+	 * Enable GMU power counter 0 to count GPU busy. This is applicable to
+	 * all a6xx targets
+	 */
+	kgsl_regwrite(device, A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
+	kgsl_regrmw(device, A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, 0x20);
+	kgsl_regwrite(device, A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0x1);
+
+	a6xx_protect_init(adreno_dev);
+	/*
+	 * We start LM here because we want all the following to be up
+	 * 1. GX HS
+	 * 2. SPTPRAC
+	 * 3. HFI
+	 * At this point, we are guaranteed all.
+	 */
+
+	/* Configure LLCC */
+	a6xx_llc_configure_gpu_scid(adreno_dev);
+	a6xx_llc_configure_gpuhtw_scid(adreno_dev);
+
+	a6xx_llc_enable_overrides(adreno_dev);
+
+	if (adreno_is_a662(adreno_dev))
+		kgsl_regrmw(device, A6XX_GBIF_CX_CONFIG, 0x3c0,
+			FIELD_PREP(GENMASK(7, 6), 0x1) |
+			FIELD_PREP(GENMASK(9, 8), 0x1));
+
+	if (adreno_is_a660(adreno_dev)) {
+		kgsl_regwrite(device, A6XX_CP_CHICKEN_DBG, 0x1);
+		kgsl_regwrite(device, A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
+
+		/* Set dualQ + disable afull for A660 GPU but not for A635 */
+		if (!adreno_is_a635(adreno_dev))
+			kgsl_regwrite(device, A6XX_UCHE_CMDQ_CONFIG, 0x66906);
+	}
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV))
+		kgsl_regwrite(device, A6XX_CP_APRIV_CNTL, A6XX_APRIV_DEFAULT);
+
+	a6xx_set_secvid(device);
+
+	/*
+	 * Enable hardware clock gating here to prevent any register access
+	 * issue due to internal clock gating.
+	 */
+	a6xx_hwcg_set(adreno_dev, true);
+
+	/*
+	 * All registers must be written before this point so that we don't
+	 * miss any register programming when we patch the power up register
+	 * list.
+	 */
+	if (!adreno_dev->patch_reglist &&
+		(adreno_dev->pwrup_reglist->gpuaddr != 0)) {
+		a6xx_patch_pwrup_reglist(adreno_dev);
+		adreno_dev->patch_reglist = true;
+	}
+
+	/*
+	 * During adreno_stop, GBIF halt is asserted to ensure
+	 * no further transaction can go through GPU before GPU
+	 * headswitch is turned off.
+	 *
+	 * This halt is deasserted once headswitch goes off but
+	 * incase headswitch doesn't goes off clear GBIF halt
+	 * here to ensure GPU wake-up doesn't fail because of
+	 * halted GPU transactions.
+	 */
+	a6xx_deassert_gbif_halt(adreno_dev);
+
+}
+
+/* Offsets into the MX/CX mapped register regions */
+#define RDPM_MX_OFFSET 0xf00
+#define RDPM_CX_OFFSET 0xf18
+
+void a6xx_rdpm_mx_freq_update(struct a6xx_gmu_device *gmu,
+		u32 freq)
+{
+	if (gmu->rdpm_mx_virt) {
+		writel_relaxed(freq/1000,
+			(gmu->rdpm_mx_virt + RDPM_MX_OFFSET));
+
+		/*
+		 * ensure previous writes post before this one,
+		 * i.e. act like normal writel()
+		 */
+		wmb();
+	}
+}
+
+void a6xx_rdpm_cx_freq_update(struct a6xx_gmu_device *gmu,
+		u32 freq)
+{
+	if (gmu->rdpm_cx_virt) {
+		writel_relaxed(freq/1000,
+			(gmu->rdpm_cx_virt + RDPM_CX_OFFSET));
+
+		/*
+		 * ensure previous writes post before this one,
+		 * i.e. act like normal writel()
+		 */
+		wmb();
+	}
+}
+
+/* This is the start point for non GMU/RGMU targets */
+static int a6xx_nogmu_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	/*
+	 * During adreno_stop() GBIF halt is asserted to ensure that
+	 * no further transactions go through the GPU before the
+	 * GPU headswitch is turned off.
+	 *
+	 * The halt is supposed to be deasserted when the headswitch goes off
+	 * but clear it again during start to be sure
+	 */
+	kgsl_regwrite(device, A6XX_GBIF_HALT, 0x0);
+	kgsl_regwrite(device, A6XX_RBBM_GBIF_HALT, 0x0);
+
+	ret = kgsl_mmu_start(device);
+	if (ret)
+		return ret;
+
+	adreno_get_bus_counters(adreno_dev);
+	adreno_perfcounter_restore(adreno_dev);
+
+	a6xx_start(adreno_dev);
+	return 0;
+}
+
+/*
+ * CP_INIT_MAX_CONTEXT bit tells if the multiple hardware contexts can
+ * be used at once of if they should be serialized
+ */
+#define CP_INIT_MAX_CONTEXT BIT(0)
+
+/* Enables register protection mode */
+#define CP_INIT_ERROR_DETECTION_CONTROL BIT(1)
+
+/* Header dump information */
+#define CP_INIT_HEADER_DUMP BIT(2) /* Reserved */
+
+/* Default Reset states enabled for PFP and ME */
+#define CP_INIT_DEFAULT_RESET_STATE BIT(3)
+
+/* Drawcall filter range */
+#define CP_INIT_DRAWCALL_FILTER_RANGE BIT(4)
+
+/* Ucode workaround masks */
+#define CP_INIT_UCODE_WORKAROUND_MASK BIT(5)
+
+/*
+ * Operation mode mask
+ *
+ * This ordinal provides the option to disable the
+ * save/restore of performance counters across preemption.
+ */
+#define CP_INIT_OPERATION_MODE_MASK BIT(6)
+
+/* Register initialization list */
+#define CP_INIT_REGISTER_INIT_LIST BIT(7)
+
+/* Register initialization list with spinlock */
+#define CP_INIT_REGISTER_INIT_LIST_WITH_SPINLOCK BIT(8)
+
+#define CP_INIT_MASK (CP_INIT_MAX_CONTEXT | \
+		CP_INIT_ERROR_DETECTION_CONTROL | \
+		CP_INIT_HEADER_DUMP | \
+		CP_INIT_DEFAULT_RESET_STATE | \
+		CP_INIT_UCODE_WORKAROUND_MASK | \
+		CP_INIT_OPERATION_MODE_MASK | \
+		CP_INIT_REGISTER_INIT_LIST_WITH_SPINLOCK)
+
+void a6xx_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds)
+{
+	int i = 0;
+
+	cmds[i++] = cp_type7_packet(CP_ME_INIT, A6XX_CP_INIT_DWORDS - 1);
+
+	/* Enabled ordinal mask */
+	cmds[i++] = CP_INIT_MASK;
+
+	if (CP_INIT_MASK & CP_INIT_MAX_CONTEXT)
+		cmds[i++] = 0x00000003;
+
+	if (CP_INIT_MASK & CP_INIT_ERROR_DETECTION_CONTROL)
+		cmds[i++] = 0x20000000;
+
+	if (CP_INIT_MASK & CP_INIT_HEADER_DUMP) {
+		/* Header dump address */
+		cmds[i++] = 0x00000000;
+		/* Header dump enable and dump size */
+		cmds[i++] = 0x00000000;
+	}
+
+	if (CP_INIT_MASK & CP_INIT_UCODE_WORKAROUND_MASK)
+		cmds[i++] = 0x00000000;
+
+	if (CP_INIT_MASK & CP_INIT_OPERATION_MODE_MASK)
+		cmds[i++] = 0x00000002;
+
+	if (CP_INIT_MASK & CP_INIT_REGISTER_INIT_LIST_WITH_SPINLOCK) {
+		uint64_t gpuaddr = adreno_dev->pwrup_reglist->gpuaddr;
+
+		cmds[i++] = lower_32_bits(gpuaddr);
+		cmds[i++] = upper_32_bits(gpuaddr);
+		cmds[i++] =  0;
+	}
+}
+
+void a6xx_spin_idle_debug(struct adreno_device *adreno_dev,
+				const char *str)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int rptr, wptr;
+	unsigned int status, status3, intstatus;
+	unsigned int hwfault;
+
+	dev_err(device->dev, str);
+
+	kgsl_regread(device, A6XX_CP_RB_RPTR, &rptr);
+	kgsl_regread(device, A6XX_CP_RB_WPTR, &wptr);
+
+	kgsl_regread(device, A6XX_RBBM_STATUS, &status);
+	kgsl_regread(device, A6XX_RBBM_STATUS3, &status3);
+	kgsl_regread(device, A6XX_RBBM_INT_0_STATUS, &intstatus);
+	kgsl_regread(device, A6XX_CP_HW_FAULT, &hwfault);
+
+
+	dev_err(device->dev,
+		"rb=%d pos=%X/%X rbbm_status=%8.8X/%8.8X int_0_status=%8.8X\n",
+		adreno_dev->cur_rb ? adreno_dev->cur_rb->id : -1, rptr, wptr,
+		status, status3, intstatus);
+
+	dev_err(device->dev, " hwfault=%8.8X\n", hwfault);
+
+	kgsl_device_snapshot(device, NULL, NULL, false);
+
+}
+
+/*
+ * a6xx_send_cp_init() - Initialize ringbuffer
+ * @adreno_dev: Pointer to adreno device
+ * @rb: Pointer to the ringbuffer of device
+ *
+ * Submit commands for ME initialization,
+ */
+static int a6xx_send_cp_init(struct adreno_device *adreno_dev,
+			 struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int *cmds;
+	int ret;
+
+	cmds = adreno_ringbuffer_allocspace(rb, A6XX_CP_INIT_DWORDS);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	a6xx_cp_init_cmds(adreno_dev, cmds);
+
+	ret = a6xx_ringbuffer_submit(rb, NULL, true);
+	if (!ret) {
+		ret = adreno_spin_idle(adreno_dev, 2000);
+		if (ret) {
+			a6xx_spin_idle_debug(adreno_dev,
+				"CP initialization failed to idle\n");
+
+			kgsl_sharedmem_writel(device->scratch,
+				SCRATCH_RB_OFFSET(rb->id, rptr), 0);
+			rb->wptr = 0;
+			rb->_wptr = 0;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Follow the ME_INIT sequence with a preemption yield to allow the GPU to move
+ * to a different ringbuffer, if desired
+ */
+static int _preemption_init(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, unsigned int *cmds,
+		struct kgsl_context *context)
+{
+	unsigned int *cmds_orig = cmds;
+
+	/* Turn CP protection OFF on legacy targets */
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_APRIV))
+		cmds += cp_protected_mode(adreno_dev, cmds, 0);
+
+	*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 6);
+	*cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->preemption_desc->gpuaddr);
+
+	*cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+		rb->secure_preemption_desc->gpuaddr);
+
+	/* Turn CP protection back ON */
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_APRIV))
+		cmds += cp_protected_mode(adreno_dev, cmds, 1);
+
+	*cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0x0);
+	*cmds++ = 0;
+	/* generate interrupt on preemption completion */
+	*cmds++ = 0;
+
+	return cmds - cmds_orig;
+}
+
+static int a6xx_post_start(struct adreno_device *adreno_dev)
+{
+	int ret;
+	unsigned int *cmds, *start;
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 42);
+	if (IS_ERR(cmds)) {
+		dev_err(device->dev,
+			     "error allocating preemption init cmds\n");
+		return PTR_ERR(cmds);
+	}
+	start = cmds;
+
+	cmds += _preemption_init(adreno_dev, rb, cmds, NULL);
+
+	rb->_wptr = rb->_wptr - (42 - (cmds - start));
+
+	ret = a6xx_ringbuffer_submit(rb, NULL, false);
+	if (!ret) {
+		ret = adreno_spin_idle(adreno_dev, 2000);
+		if (ret)
+			a6xx_spin_idle_debug(adreno_dev,
+				"hw preemption initialization failed to idle\n");
+	}
+
+	return ret;
+}
+
+int a6xx_rb_start(struct adreno_device *adreno_dev)
+{
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 cp_rb_cntl = A6XX_CP_RB_CNTL_DEFAULT |
+		(ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ? 0 : (1 << 27));
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE);
+	struct adreno_ringbuffer *rb;
+	uint64_t addr;
+	int ret, i;
+	unsigned int *cmds;
+
+	/* Clear all the ringbuffers */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE);
+		kgsl_sharedmem_writel(device->scratch,
+			SCRATCH_RB_OFFSET(rb->id, rptr), 0);
+
+		rb->wptr = 0;
+		rb->_wptr = 0;
+		rb->wptr_preempt_end = ~0;
+	}
+
+	a6xx_preemption_start(adreno_dev);
+
+	/* Set up the current ringbuffer */
+	rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+	addr = SCRATCH_RB_GPU_ADDR(device, rb->id, rptr);
+
+	kgsl_regwrite(device, A6XX_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr));
+	kgsl_regwrite(device, A6XX_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr));
+
+	/*
+	 * The size of the ringbuffer in the hardware is the log2
+	 * representation of the size in quadwords (sizedwords / 2).
+	 */
+	kgsl_regwrite(device, A6XX_CP_RB_CNTL, cp_rb_cntl);
+
+	kgsl_regwrite(device, A6XX_CP_RB_BASE,
+		lower_32_bits(rb->buffer_desc->gpuaddr));
+
+	kgsl_regwrite(device, A6XX_CP_RB_BASE_HI,
+		upper_32_bits(rb->buffer_desc->gpuaddr));
+
+	/* Program the ucode base for CP */
+	kgsl_regwrite(device, A6XX_CP_SQE_INSTR_BASE_LO,
+		lower_32_bits(fw->memdesc->gpuaddr));
+	kgsl_regwrite(device, A6XX_CP_SQE_INSTR_BASE_HI,
+		upper_32_bits(fw->memdesc->gpuaddr));
+
+	/* Clear the SQE_HALT to start the CP engine */
+	kgsl_regwrite(device, A6XX_CP_SQE_CNTL, 1);
+
+	ret = a6xx_send_cp_init(adreno_dev, rb);
+	if (ret)
+		return ret;
+
+	ret = adreno_zap_shader_load(adreno_dev, a6xx_core->zap_name);
+	if (ret)
+		return ret;
+
+	/*
+	 * Take the GPU out of secure mode. Try the zap shader if it is loaded,
+	 * otherwise just try to write directly to the secure control register
+	 */
+	if (!adreno_dev->zap_loaded)
+		kgsl_regwrite(device, A6XX_RBBM_SECVID_TRUST_CNTL, 0);
+	else {
+		cmds = adreno_ringbuffer_allocspace(rb, 2);
+		if (IS_ERR(cmds))
+			return PTR_ERR(cmds);
+
+		*cmds++ = cp_packet(adreno_dev, CP_SET_SECURE_MODE, 1);
+		*cmds++ = 0;
+
+		ret = a6xx_ringbuffer_submit(rb, NULL, true);
+		if (!ret) {
+			ret = adreno_spin_idle(adreno_dev, 2000);
+			if (ret) {
+				a6xx_spin_idle_debug(adreno_dev,
+					"Switch to unsecure failed to idle\n");
+				return ret;
+			}
+		}
+	}
+
+	return a6xx_post_start(adreno_dev);
+}
+
+/*
+ * a6xx_sptprac_enable() - Power on SPTPRAC
+ * @adreno_dev: Pointer to Adreno device
+ */
+static int a6xx_sptprac_enable(struct adreno_device *adreno_dev)
+{
+	return a6xx_gmu_sptprac_enable(adreno_dev);
+}
+
+/*
+ * a6xx_sptprac_disable() - Power off SPTPRAC
+ * @adreno_dev: Pointer to Adreno device
+ */
+static void a6xx_sptprac_disable(struct adreno_device *adreno_dev)
+{
+	a6xx_gmu_sptprac_disable(adreno_dev);
+}
+
+/*
+ * a6xx_prepare_for_regulator_disable() - Prepare for regulator disable
+ * @adreno_dev: Pointer to Adreno device
+ */
+static void a6xx_prepare_for_regulator_disable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_a611(adreno_dev))
+		return;
+
+	/* This sequence is only required for A611 */
+	kgsl_regwrite(device, A6XX_RBBM_SW_RESET_CMD, 0x1);
+	/* Make sure software reset is triggered and completed */
+	wmb();
+	udelay(100);
+}
+
+/*
+ * a6xx_gpu_keepalive() - GMU reg write to request GPU stays on
+ * @adreno_dev: Pointer to the adreno device that has the GMU
+ * @state: State to set: true is ON, false is OFF
+ */
+static void a6xx_gpu_keepalive(struct adreno_device *adreno_dev,
+		bool state)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!gmu_core_isenabled(device))
+		return;
+
+	gmu_core_regwrite(device, A6XX_GMU_GMU_PWR_COL_KEEPALIVE, state);
+}
+
+bool a6xx_irq_pending(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status;
+
+	kgsl_regread(device, A6XX_RBBM_INT_0_STATUS, &status);
+
+	/* Return busy if a interrupt is pending */
+	return ((status & adreno_dev->irq_mask) ||
+		atomic_read(&adreno_dev->pending_irq_refcnt));
+}
+
+static bool a619_holi_hw_isidle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	kgsl_regread(device, A6XX_RBBM_STATUS, &reg);
+	if (reg & 0xfffffffe)
+		return false;
+
+	return a6xx_irq_pending(adreno_dev) ? false : true;
+}
+
+bool a6xx_hw_isidle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	/* Non GMU devices monitor the RBBM status */
+	if (!gmu_core_isenabled(device)) {
+		kgsl_regread(device, A6XX_RBBM_STATUS, &reg);
+		if (reg & 0xfffffffe)
+			return false;
+
+		return a6xx_irq_pending(adreno_dev) ? false : true;
+	}
+
+	gmu_core_regread(device, A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, &reg);
+
+	/* Bit 23 is GPUBUSYIGNAHB */
+	return (reg & BIT(23)) ? false : true;
+}
+
+int a6xx_microcode_read(struct adreno_device *adreno_dev)
+{
+	struct adreno_firmware *sqe_fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE);
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+
+	return adreno_get_firmware(adreno_dev, a6xx_core->sqefw_name, sqe_fw);
+}
+
+static int64_t a6xx_read_throttling_counters(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int64_t adj = -1;
+	u32 a, b, c;
+	struct adreno_busy_data *busy = &adreno_dev->busy_data;
+
+	if (!(adreno_dev->lm_enabled || adreno_dev->bcl_enabled))
+		return 0;
+
+	a = counter_delta(device, A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L,
+		&busy->throttle_cycles[0]);
+
+	b = counter_delta(device, A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L,
+		&busy->throttle_cycles[1]);
+
+	c = counter_delta(device, A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L,
+		&busy->throttle_cycles[2]);
+
+	/*
+	 * Currently there are no a6xx targets with both LM and BCL enabled.
+	 * So if BCL is enabled, we can log bcl counters and return.
+	 */
+	if (adreno_dev->bcl_enabled) {
+		trace_kgsl_bcl_clock_throttling(a, b, c);
+		return 0;
+	}
+
+	/*
+	 * The adjustment is the number of cycles lost to throttling, which
+	 * is calculated as a weighted average of the cycles throttled
+	 * at different levels. The adjustment is negative because in A6XX,
+	 * the busy count includes the throttled cycles. Therefore, we want
+	 * to remove them to prevent appearing to be busier than
+	 * we actually are.
+	 */
+	if (adreno_is_a620(adreno_dev) || adreno_is_a650(adreno_dev))
+		/*
+		 * With the newer generations, CRC throttle from SIDs of 0x14
+		 * and above cannot be observed in power counters. Since 90%
+		 * throttle uses SID 0x16 the adjustment calculation needs
+		 * correction. The throttling is in increments of 4.2%, and the
+		 * 91.7% counter does a weighted count by the value of sid used
+		 * which are taken into consideration for the final formula.
+		 */
+		adj *= div_s64((a * 42) + (b * 500) +
+			(div_s64((int64_t)c - a - b * 12, 22) * 917), 1000);
+	else
+		adj *= ((a * 5) + (b * 50) + (c * 90)) / 100;
+
+	trace_kgsl_clock_throttling(0, b, c, a, adj);
+
+	return adj;
+}
+#define GPU_CPR_FSM_CTL_OFFSET	 0x4
+static void a6xx_gx_cpr_toggle(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	static void __iomem *gx_cpr_virt;
+	struct resource *res;
+	u32 val = 0;
+
+	if (!a6xx_core->gx_cpr_toggle)
+		return;
+
+	if (!gx_cpr_virt) {
+		res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+				"gx_cpr");
+		if (res == NULL)
+			return;
+
+		gx_cpr_virt = devm_ioremap_resource(&device->pdev->dev, res);
+		if (!gx_cpr_virt) {
+			dev_err(device->dev, "Failed to map GX CPR\n");
+			return;
+		}
+	}
+
+	/*
+	 * Toggle(disable -> enable) closed loop functionality to recover
+	 * CPR measurements stall happened under certain conditions.
+	 */
+
+	val = readl_relaxed(gx_cpr_virt + GPU_CPR_FSM_CTL_OFFSET);
+	/* Make sure memory is updated before access */
+	rmb();
+
+	writel_relaxed(val & 0xfffffff0, gx_cpr_virt + GPU_CPR_FSM_CTL_OFFSET);
+	/* make sure register write committed */
+	wmb();
+
+	/* Wait for small time before we enable GX CPR */
+	udelay(5);
+
+	writel_relaxed(val | 0x00000001, gx_cpr_virt + GPU_CPR_FSM_CTL_OFFSET);
+	/* make sure register write committed */
+	wmb();
+}
+
+/* This is only defined for non-GMU and non-RGMU targets */
+static int a6xx_clear_pending_transactions(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (adreno_is_a619_holi(adreno_dev)) {
+		kgsl_regwrite(device, A6XX_RBBM_GPR0_CNTL, 0x1e0);
+		ret = adreno_wait_for_halt_ack(device,
+			A6XX_RBBM_VBIF_GX_RESET_STATUS, 0xf0);
+	} else {
+		kgsl_regwrite(device, A6XX_RBBM_GBIF_HALT,
+			A6XX_GBIF_GX_HALT_MASK);
+		ret = adreno_wait_for_halt_ack(device, A6XX_RBBM_GBIF_HALT_ACK,
+			A6XX_GBIF_GX_HALT_MASK);
+	}
+
+	if (ret)
+		return ret;
+
+	return a6xx_halt_gbif(adreno_dev);
+}
+
+/**
+ * a6xx_reset() - Helper function to reset the GPU
+ * @adreno_dev: Pointer to the adreno device structure for the GPU
+ *
+ * Try to reset the GPU to recover from a fault for targets without
+ * a GMU.
+ */
+static int a6xx_reset(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+	unsigned long flags = device->pwrctrl.ctrl_flags;
+
+	ret = a6xx_clear_pending_transactions(adreno_dev);
+	if (ret)
+		return ret;
+
+	/* Clear ctrl_flags to ensure clocks and regulators are turned off */
+	device->pwrctrl.ctrl_flags = 0;
+
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+
+	/* since device is officially off now clear start bit */
+	clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+	a6xx_reset_preempt_records(adreno_dev);
+
+	ret = adreno_start(device, 0);
+	if (ret)
+		return ret;
+
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+
+	device->pwrctrl.ctrl_flags = flags;
+
+	/* Toggle GX CPR on demand */
+	 a6xx_gx_cpr_toggle(device);
+
+	/*
+	 * If active_cnt is zero, there is no need to keep the GPU active. So,
+	 * we should transition to SLUMBER.
+	 */
+	if (!atomic_read(&device->active_cnt))
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+
+	return 0;
+}
+
+static void a6xx_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status1, status2;
+
+	kgsl_regread(device, A6XX_CP_INTERRUPT_STATUS, &status1);
+
+	if (status1 & BIT(A6XX_CP_OPCODE_ERROR)) {
+		unsigned int opcode;
+
+		kgsl_regwrite(device, A6XX_CP_SQE_STAT_ADDR, 1);
+		kgsl_regread(device, A6XX_CP_SQE_STAT_DATA, &opcode);
+		dev_crit_ratelimited(device->dev,
+			"CP opcode error interrupt | opcode=0x%8.8x\n", opcode);
+	}
+	if (status1 & BIT(A6XX_CP_UCODE_ERROR))
+		dev_crit_ratelimited(device->dev, "CP ucode error interrupt\n");
+	if (status1 & BIT(A6XX_CP_HW_FAULT_ERROR)) {
+		kgsl_regread(device, A6XX_CP_HW_FAULT, &status2);
+		dev_crit_ratelimited(device->dev,
+			"CP | Ringbuffer HW fault | status=%x\n", status2);
+	}
+	if (status1 & BIT(A6XX_CP_REGISTER_PROTECTION_ERROR)) {
+		kgsl_regread(device, A6XX_CP_PROTECT_STATUS, &status2);
+		dev_crit_ratelimited(device->dev,
+			"CP | Protected mode error | %s | addr=%x | status=%x\n",
+			status2 & (1 << 20) ? "READ" : "WRITE",
+			status2 & 0x3FFFF, status2);
+	}
+	if (status1 & BIT(A6XX_CP_AHB_ERROR))
+		dev_crit_ratelimited(device->dev,
+			"CP AHB error interrupt\n");
+	if (status1 & BIT(A6XX_CP_VSD_PARITY_ERROR))
+		dev_crit_ratelimited(device->dev,
+			"CP VSD decoder parity error\n");
+	if (status1 & BIT(A6XX_CP_ILLEGAL_INSTR_ERROR))
+		dev_crit_ratelimited(device->dev,
+			"CP Illegal instruction error\n");
+
+}
+
+static void a6xx_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	switch (bit) {
+	case A6XX_INT_CP_AHB_ERROR:
+		dev_crit_ratelimited(device->dev, "CP: AHB bus error\n");
+		break;
+	case A6XX_INT_ATB_ASYNCFIFO_OVERFLOW:
+		dev_crit_ratelimited(device->dev,
+					"RBBM: ATB ASYNC overflow\n");
+		break;
+	case A6XX_INT_RBBM_ATB_BUS_OVERFLOW:
+		dev_crit_ratelimited(device->dev,
+					"RBBM: ATB bus overflow\n");
+		break;
+	case A6XX_INT_UCHE_OOB_ACCESS:
+		dev_crit_ratelimited(device->dev,
+					"UCHE: Out of bounds access\n");
+		break;
+	case A6XX_INT_UCHE_TRAP_INTR:
+		dev_crit_ratelimited(device->dev, "UCHE: Trap interrupt\n");
+		break;
+	case A6XX_INT_TSB_WRITE_ERROR:
+		dev_crit_ratelimited(device->dev, "TSB: Write error interrupt\n");
+		break;
+	default:
+		dev_crit_ratelimited(device->dev, "Unknown interrupt %d\n",
+					bit);
+	}
+}
+
+/*
+ * a6xx_llc_configure_gpu_scid() - Program the sub-cache ID for all GPU blocks
+ * @adreno_dev: The adreno device pointer
+ */
+static void a6xx_llc_configure_gpu_scid(struct adreno_device *adreno_dev)
+{
+	uint32_t gpu_scid;
+	uint32_t gpu_cntl1_val = 0;
+	int i;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	if (IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice) ||
+		!adreno_dev->gpu_llc_slice_enable)
+		return;
+
+	if (llcc_slice_activate(adreno_dev->gpu_llc_slice))
+		return;
+
+	gpu_scid = llcc_get_slice_id(adreno_dev->gpu_llc_slice);
+	for (i = 0; i < A6XX_LLC_NUM_GPU_SCIDS; i++)
+		gpu_cntl1_val = (gpu_cntl1_val << A6XX_GPU_LLC_SCID_NUM_BITS)
+			| gpu_scid;
+
+	if (mmu->subtype == KGSL_IOMMU_SMMU_V500)
+		kgsl_regrmw(device, A6XX_GBIF_SCACHE_CNTL1,
+			A6XX_GPU_LLC_SCID_MASK, gpu_cntl1_val);
+	else
+		adreno_cx_misc_regrmw(adreno_dev,
+				A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1,
+				A6XX_GPU_LLC_SCID_MASK, gpu_cntl1_val);
+
+	/*
+	 * On A660, the SCID programming for UCHE traffic is done in
+	 * A6XX_GBIF_SCACHE_CNTL0[14:10]
+	 * GFO ENABLE BIT(8) : LLC uses a 64 byte cache line size enabling
+	 * GFO allows it allocate partial cache lines
+	 */
+	if (adreno_is_a660(adreno_dev) ||
+			adreno_is_a663(adreno_dev))
+		kgsl_regrmw(device, A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
+				BIT(8), (gpu_scid << 10) | BIT(8));
+}
+
+/*
+ * a6xx_llc_configure_gpuhtw_scid() - Program the SCID for GPU pagetables
+ * @adreno_dev: The adreno device pointer
+ */
+static void a6xx_llc_configure_gpuhtw_scid(struct adreno_device *adreno_dev)
+{
+	uint32_t gpuhtw_scid;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	if (IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice) ||
+		!adreno_dev->gpuhtw_llc_slice_enable)
+		return;
+
+	if (llcc_slice_activate(adreno_dev->gpuhtw_llc_slice))
+		return;
+
+	/*
+	 * On SMMU-v500, the GPUHTW SCID is configured via a NoC override in
+	 * the XBL image.
+	 */
+	if (mmu->subtype == KGSL_IOMMU_SMMU_V500)
+		return;
+
+	gpuhtw_scid = llcc_get_slice_id(adreno_dev->gpuhtw_llc_slice);
+
+	adreno_cx_misc_regrmw(adreno_dev,
+			A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1,
+			A6XX_GPUHTW_LLC_SCID_MASK,
+			gpuhtw_scid << A6XX_GPUHTW_LLC_SCID_SHIFT);
+}
+
+/*
+ * a6xx_llc_enable_overrides() - Override the page attributes
+ * @adreno_dev: The adreno device pointer
+ */
+static void a6xx_llc_enable_overrides(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	/*
+	 * Attributes override through GBIF is not supported with MMU-500.
+	 * Attributes are used as configured through SMMU pagetable entries.
+	 */
+	if (mmu->subtype == KGSL_IOMMU_SMMU_V500)
+		return;
+
+	/*
+	 * 0x3: readnoallocoverrideen=0
+	 *      read-no-alloc=0 - Allocate lines on read miss
+	 *      writenoallocoverrideen=1
+	 *      write-no-alloc=1 - Do not allocates lines on write miss
+	 */
+	adreno_cx_misc_regwrite(adreno_dev,
+			A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0, 0x3);
+}
+
+static const char *uche_client[7][3] = {
+	{"SP | VSC | VPC | HLSQ | PC | LRZ", "TP", "VFD"},
+	{"VSC | VPC | HLSQ | PC | LRZ", "TP | VFD", "SP"},
+	{"SP | VPC | HLSQ | PC | LRZ", "TP | VFD", "VSC"},
+	{"SP | VSC | HLSQ | PC | LRZ", "TP | VFD", "VPC"},
+	{"SP | VSC | VPC | PC | LRZ", "TP | VFD", "HLSQ"},
+	{"SP | VSC | VPC | HLSQ | LRZ", "TP | VFD", "PC"},
+	{"SP | VSC | VPC | HLSQ | PC", "TP | VFD", "LRZ"},
+};
+
+static const char *const uche_client_a660[] = { "VFD", "SP", "VSC", "VPC",
+						"HLSQ", "PC", "LRZ", "TP" };
+
+#define SCOOBYDOO 0x5c00bd00
+
+static const char *a6xx_fault_block_uche(struct kgsl_device *device,
+		unsigned int mid)
+{
+	unsigned int uche_client_id = 0;
+	static char str[40];
+
+	/*
+	 * Smmu driver takes a vote on CX gdsc before calling the kgsl
+	 * pagefault handler. If there is contention for device mutex in this
+	 * path and the dispatcher fault handler is holding this lock, trying
+	 * to turn off CX gdsc will fail during the reset. So to avoid blocking
+	 * here, try to lock device mutex and return if it fails.
+	 */
+	if (!mutex_trylock(&device->mutex))
+		return "UCHE: unknown";
+
+	if (!kgsl_state_is_awake(device)) {
+		mutex_unlock(&device->mutex);
+		return "UCHE: unknown";
+	}
+
+	kgsl_regread(device, A6XX_UCHE_CLIENT_PF, &uche_client_id);
+	mutex_unlock(&device->mutex);
+
+	/* Ignore the value if the gpu is in IFPC */
+	if (uche_client_id == SCOOBYDOO)
+		return "UCHE: unknown";
+
+	if (adreno_is_a660(ADRENO_DEVICE(device))) {
+
+		/* Mask is 7 bits for A660 */
+		uche_client_id &= 0x7F;
+		if (uche_client_id >= ARRAY_SIZE(uche_client_a660) ||
+				(mid == 2))
+			return "UCHE: Unknown";
+
+		if (mid == 1)
+			snprintf(str, sizeof(str), "UCHE: Not %s",
+				uche_client_a660[uche_client_id]);
+		else if (mid == 3)
+			snprintf(str, sizeof(str), "UCHE: %s",
+				uche_client_a660[uche_client_id]);
+	} else {
+		uche_client_id &= A6XX_UCHE_CLIENT_PF_CLIENT_ID_MASK;
+		if (uche_client_id >= ARRAY_SIZE(uche_client))
+			return "UCHE: Unknown";
+
+		snprintf(str, sizeof(str), "UCHE: %s",
+			uche_client[uche_client_id][mid - 1]);
+	}
+
+	return str;
+}
+
+static const char *a6xx_iommu_fault_block(struct kgsl_device *device,
+		unsigned int fsynr1)
+{
+	unsigned int mid = fsynr1 & 0xff;
+
+	switch (mid) {
+	case 0:
+		return "CP";
+	case 1:
+	case 2:
+	case 3:
+		return a6xx_fault_block_uche(device, mid);
+	case 4:
+		return "CCU";
+	case 6:
+		return "CDP Prefetch";
+	case 7:
+		return "GPMU";
+	}
+
+	return "Unknown";
+}
+
+static void a6xx_cp_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (adreno_is_preemption_enabled(adreno_dev))
+		a6xx_preemption_trigger(adreno_dev, true);
+
+	adreno_dispatcher_schedule(device);
+}
+
+/*
+ * a6xx_gpc_err_int_callback() - Isr for GPC error interrupts
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+static void a6xx_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/*
+	 * GPC error is typically the result of mistake SW programming.
+	 * Force GPU fault for this interrupt so that we can debug it
+	 * with help of register dump.
+	 */
+
+	dev_crit(device->dev, "RBBM: GPC error\n");
+	adreno_irqctrl(adreno_dev, 0);
+
+	/* Trigger a fault in the dispatcher - this will effect a restart */
+	adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT);
+}
+
+static const struct adreno_irq_funcs a6xx_irq_funcs[32] = {
+	ADRENO_IRQ_CALLBACK(NULL),              /* 0 - RBBM_GPU_IDLE */
+	ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 1 - RBBM_AHB_ERROR */
+	ADRENO_IRQ_CALLBACK(NULL), /* 2 - UNUSED */
+	ADRENO_IRQ_CALLBACK(NULL), /* 3 - UNUSED */
+	ADRENO_IRQ_CALLBACK(NULL), /* 4 - UNUSED */
+	ADRENO_IRQ_CALLBACK(NULL), /* 5 - UNUSED */
+	/* 6 - RBBM_ATB_ASYNC_OVERFLOW */
+	ADRENO_IRQ_CALLBACK(a6xx_err_callback),
+	ADRENO_IRQ_CALLBACK(a6xx_gpc_err_int_callback), /* 7 - GPC_ERR */
+	ADRENO_IRQ_CALLBACK(a6xx_preemption_callback),/* 8 - CP_SW */
+	ADRENO_IRQ_CALLBACK(a6xx_cp_hw_err_callback), /* 9 - CP_HW_ERROR */
+	ADRENO_IRQ_CALLBACK(NULL),  /* 10 - CP_CCU_FLUSH_DEPTH_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 11 - CP_CCU_FLUSH_COLOR_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_CCU_RESOLVE_TS */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 13 - CP_IB2_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 14 - CP_IB1_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */
+	ADRENO_IRQ_CALLBACK(NULL), /* 16 - UNUSED */
+	ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_WT_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNUSED */
+	ADRENO_IRQ_CALLBACK(a6xx_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 21 - UNUSED */
+	ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */
+	/* 23 - MISC_HANG_DETECT */
+	ADRENO_IRQ_CALLBACK(adreno_hang_int_callback),
+	ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 24 - UCHE_OOB_ACCESS */
+	ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 25 - UCHE_TRAP_INTR */
+	ADRENO_IRQ_CALLBACK(NULL), /* 26 - DEBBUS_INTR_0 */
+	ADRENO_IRQ_CALLBACK(NULL), /* 27 - DEBBUS_INTR_1 */
+	ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 28 - TSBWRITEERROR */
+	ADRENO_IRQ_CALLBACK(NULL), /* 29 - UNUSED */
+	ADRENO_IRQ_CALLBACK(NULL), /* 30 - ISDB_CPU_IRQ */
+	ADRENO_IRQ_CALLBACK(NULL), /* 31 - ISDB_UNDER_DEBUG */
+};
+
+/*
+ * If the AHB fence is not in ALLOW mode when we receive an RBBM
+ * interrupt, something went wrong. This means that we cannot proceed
+ * since the IRQ status and clear registers are not accessible.
+ * This is usually harmless because the GMU will abort power collapse
+ * and change the fence back to ALLOW. Poll so that this can happen.
+ */
+static int a6xx_irq_poll_fence(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status, fence, fence_retries = 0;
+	u64 a, b, c;
+
+	if (!gmu_core_isenabled(device))
+		return 0;
+
+	a = a6xx_read_alwayson(adreno_dev);
+
+	kgsl_regread(device, A6XX_GMU_AO_AHB_FENCE_CTRL, &fence);
+
+	while (fence != 0) {
+		b = a6xx_read_alwayson(adreno_dev);
+
+		/* Wait for small time before trying again */
+		udelay(1);
+		kgsl_regread(device, A6XX_GMU_AO_AHB_FENCE_CTRL, &fence);
+
+		if (fence_retries == 100 && fence != 0) {
+			c = a6xx_read_alwayson(adreno_dev);
+
+			kgsl_regread(device, A6XX_GMU_RBBM_INT_UNMASKED_STATUS,
+				&status);
+
+			dev_crit_ratelimited(device->dev,
+				"status=0x%x Unmasked status=0x%x Mask=0x%x timestamps: %llx %llx %llx\n",
+					status & adreno_dev->irq_mask, status,
+					adreno_dev->irq_mask, a, b, c);
+				return -ETIMEDOUT;
+		}
+
+		fence_retries++;
+	}
+
+	return 0;
+}
+
+static irqreturn_t a6xx_irq_handler(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	irqreturn_t ret = IRQ_NONE;
+	u32 status;
+
+	/*
+	 * On A6xx, the GPU can power down once the INT_0_STATUS is read
+	 * below. But there still might be some register reads required
+	 * so force the GMU/GPU into KEEPALIVE mode until done with the ISR.
+	 */
+	a6xx_gpu_keepalive(adreno_dev, true);
+
+	if (a6xx_irq_poll_fence(adreno_dev)) {
+		adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT);
+		goto done;
+	}
+
+	kgsl_regread(device, A6XX_RBBM_INT_0_STATUS, &status);
+
+	kgsl_regwrite(device, A6XX_RBBM_INT_CLEAR_CMD, status);
+
+	ret = adreno_irq_callbacks(adreno_dev, a6xx_irq_funcs, status);
+
+	trace_kgsl_a5xx_irq_status(adreno_dev, status);
+
+done:
+	/* If hard fault, then let snapshot turn off the keepalive */
+	if (!(adreno_gpu_fault(adreno_dev) & ADRENO_HARD_FAULT))
+		a6xx_gpu_keepalive(adreno_dev, false);
+
+	return ret;
+}
+
+int a6xx_probe_common(struct platform_device *pdev,
+	struct	adreno_device *adreno_dev, u32 chipid,
+	const struct adreno_gpu_core *gpucore)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpudev *gpudev = gpucore->gpudev;
+	int ret;
+
+	adreno_dev->gpucore = gpucore;
+	adreno_dev->chipid = chipid;
+
+	adreno_reg_offset_init(gpudev->reg_offsets);
+
+	if (gmu_core_isenabled(device) && (gpudev != &adreno_a6xx_rgmu_gpudev))
+		device->pwrctrl.cx_cfg_gdsc_offset = (adreno_is_a662(adreno_dev) ||
+			adreno_is_a621(adreno_dev)) ? A662_GPU_CC_CX_CFG_GDSCR :
+			A6XX_GPU_CC_CX_CFG_GDSCR;
+
+	adreno_dev->hwcg_enabled = true;
+	adreno_dev->uche_client_pf = 1;
+
+	adreno_dev->preempt.preempt_level = 1;
+	adreno_dev->preempt.skipsaverestore = true;
+	adreno_dev->preempt.usesgmem = true;
+
+	ret = adreno_device_probe(pdev, adreno_dev);
+
+	if (ret)
+		return ret;
+
+	a6xx_coresight_init(adreno_dev);
+	return 0;
+}
+
+static int a6xx_probe(struct platform_device *pdev,
+		u32 chipid, const struct adreno_gpu_core *gpucore)
+{
+	struct adreno_device *adreno_dev;
+	struct kgsl_device *device;
+	int ret;
+
+	adreno_dev = (struct adreno_device *)
+		of_device_get_match_data(&pdev->dev);
+
+	memset(adreno_dev, 0, sizeof(*adreno_dev));
+
+	adreno_dev->irq_mask = A6XX_INT_MASK;
+
+	ret = a6xx_probe_common(pdev, adreno_dev, chipid, gpucore);
+	if (ret)
+		return ret;
+
+	ret = adreno_dispatcher_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	device = KGSL_DEVICE(adreno_dev);
+
+	timer_setup(&device->idle_timer, kgsl_timer, 0);
+
+	INIT_WORK(&device->idle_check_ws, kgsl_idle_check);
+
+	return 0;
+}
+
+/* Register offset defines for A6XX, in order of enum adreno_regs */
+static unsigned int a6xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
+
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A6XX_CP_RB_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, A6XX_CP_RB_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_LO,
+				A6XX_CP_RB_RPTR_ADDR_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_HI,
+				A6XX_CP_RB_RPTR_ADDR_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A6XX_CP_RB_RPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A6XX_CP_RB_WPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A6XX_CP_RB_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A6XX_CP_SQE_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A6XX_CP_IB1_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, A6XX_CP_IB1_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A6XX_CP_IB1_REM_SIZE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A6XX_CP_IB2_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, A6XX_CP_IB2_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A6XX_CP_IB2_REM_SIZE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A6XX_CP_CONTEXT_SWITCH_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+			A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+			A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI),
+	ADRENO_REG_DEFINE(
+		ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO,
+			A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO),
+	ADRENO_REG_DEFINE(
+		ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI,
+			A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI),
+	ADRENO_REG_DEFINE(
+		ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO,
+			A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO),
+	ADRENO_REG_DEFINE(
+		ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI,
+			A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO,
+			A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI,
+			A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_LEVEL_STATUS,
+			A6XX_CP_CONTEXT_SWITCH_LEVEL_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A6XX_RBBM_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, A6XX_RBBM_STATUS3),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A6XX_RBBM_INT_0_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A6XX_RBBM_CLOCK_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A6XX_RBBM_SW_RESET_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK,
+				A6XX_GMU_AO_HOST_INTERRUPT_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_GMU_AHB_FENCE_STATUS,
+				A6XX_GMU_AHB_FENCE_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_GMU_GMU2HOST_INTR_MASK,
+				A6XX_GMU_GMU2HOST_INTR_MASK),
+};
+
+int a6xx_perfcounter_update(struct adreno_device *adreno_dev,
+	struct adreno_perfcount_register *reg, bool update_reg)
+{
+	void *ptr = adreno_dev->pwrup_reglist->hostptr;
+	struct cpu_gpu_lock *lock = ptr;
+	u32 *data = ptr + sizeof(*lock);
+	int i, offset = 0;
+	bool select_reg_present = false;
+
+	for (i = 0; i < lock->list_length >> 1; i++) {
+		if (data[offset] == reg->select) {
+			select_reg_present = true;
+			break;
+		}
+
+		if (data[offset] == A6XX_RBBM_PERFCTR_CNTL)
+			break;
+
+		offset += 2;
+	}
+
+	if (kgsl_hwlock(lock)) {
+		kgsl_hwunlock(lock);
+		return -EBUSY;
+	}
+
+	/*
+	 * If the perfcounter select register is already present in reglist
+	 * update it, otherwise append the <select register, value> pair to
+	 * the end of the list.
+	 */
+	if (select_reg_present) {
+		data[offset + 1] = reg->countable;
+		goto update;
+	}
+
+	/*
+	 * For all targets A6XX_RBBM_PERFCTR_CNTL needs to be the last entry,
+	 * so overwrite the existing A6XX_RBBM_PERFCNTL_CTRL and add it back to
+	 * the end.
+	 */
+	data[offset] = reg->select;
+	data[offset + 1] = reg->countable;
+	data[offset + 2] = A6XX_RBBM_PERFCTR_CNTL;
+	data[offset + 3] = 1;
+
+	lock->list_length += 2;
+
+update:
+	if (update_reg)
+		kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg->select,
+			reg->countable);
+
+	kgsl_hwunlock(lock);
+	return 0;
+}
+
+u64 a6xx_read_alwayson(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 lo = 0, hi = 0, tmp = 0;
+
+	if (!gmu_core_isenabled(device)) {
+		kgsl_regread(device, A6XX_CP_ALWAYS_ON_COUNTER_LO, &lo);
+		kgsl_regread(device, A6XX_CP_ALWAYS_ON_COUNTER_HI, &hi);
+	} else {
+		/* Always use the GMU AO counter when doing a AHB read */
+		gmu_core_regread(device, A6XX_GMU_ALWAYS_ON_COUNTER_H, &hi);
+		gmu_core_regread(device, A6XX_GMU_ALWAYS_ON_COUNTER_L, &lo);
+
+		/* Check for overflow */
+		gmu_core_regread(device, A6XX_GMU_ALWAYS_ON_COUNTER_H, &tmp);
+
+		if (hi != tmp) {
+			gmu_core_regread(device, A6XX_GMU_ALWAYS_ON_COUNTER_L,
+				&lo);
+			hi = tmp;
+		}
+	}
+
+	return (((u64) hi) << 32) | lo;
+}
+
+static void a6xx_remove(struct adreno_device *adreno_dev)
+{
+	if (adreno_preemption_feature_set(adreno_dev))
+		del_timer(&adreno_dev->preempt.timer);
+}
+
+static void a6xx_read_bus_stats(struct kgsl_device *device,
+		struct kgsl_power_stats *stats,
+		struct adreno_busy_data *busy)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u64 ram_cycles, starved_ram;
+
+	ram_cycles = counter_delta(device, adreno_dev->ram_cycles_lo,
+		&busy->bif_ram_cycles);
+
+	starved_ram = counter_delta(device, adreno_dev->starved_ram_lo,
+		&busy->bif_starved_ram);
+
+	if (!adreno_is_a630(adreno_dev)) {
+		ram_cycles += counter_delta(device,
+			adreno_dev->ram_cycles_lo_ch1_read,
+			&busy->bif_ram_cycles_read_ch1);
+
+		ram_cycles += counter_delta(device,
+			adreno_dev->ram_cycles_lo_ch0_write,
+			&busy->bif_ram_cycles_write_ch0);
+
+		ram_cycles += counter_delta(device,
+			adreno_dev->ram_cycles_lo_ch1_write,
+			&busy->bif_ram_cycles_write_ch1);
+
+		starved_ram += counter_delta(device,
+			adreno_dev->starved_ram_lo_ch1,
+			&busy->bif_starved_ram_ch1);
+	}
+
+	stats->ram_time = ram_cycles;
+	stats->ram_wait = starved_ram;
+}
+
+static void a6xx_power_stats(struct adreno_device *adreno_dev,
+		struct kgsl_power_stats *stats)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_busy_data *busy = &adreno_dev->busy_data;
+	s64 gpu_busy;
+
+	/* Set the GPU busy counter for frequency scaling */
+	gpu_busy = counter_delta(device, A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
+		&busy->gpu_busy);
+
+	gpu_busy += a6xx_read_throttling_counters(adreno_dev);
+	/* If adjustment cycles are more than busy cycles make gpu_busy zero */
+	if (gpu_busy < 0)
+		gpu_busy = 0;
+
+	stats->busy_time = gpu_busy * 10;
+	do_div(stats->busy_time, 192);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) {
+		u32 ifpc = counter_delta(device,
+			A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L,
+			&busy->num_ifpc);
+
+		adreno_dev->ifpc_count += ifpc;
+		if (ifpc > 0)
+			trace_adreno_ifpc_count(adreno_dev->ifpc_count);
+	}
+
+	if (device->pwrctrl.bus_control)
+		a6xx_read_bus_stats(device, stats, busy);
+}
+
+static int a6xx_setproperty(struct kgsl_device_private *dev_priv,
+		u32 type, void __user *value, u32 sizebytes)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u32 enable;
+
+	if (type != KGSL_PROP_PWRCTRL)
+		return -ENODEV;
+
+	if (sizebytes != sizeof(enable))
+		return -EINVAL;
+
+	if (copy_from_user(&enable, value, sizeof(enable)))
+		return -EFAULT;
+
+	mutex_lock(&device->mutex);
+
+	if (enable) {
+		if (gmu_core_isenabled(device))
+			clear_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags);
+		else
+			device->pwrctrl.ctrl_flags = 0;
+
+		kgsl_pwrscale_enable(device);
+	} else {
+		if (gmu_core_isenabled(device)) {
+			set_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags);
+
+			if (!adreno_active_count_get(adreno_dev))
+				adreno_active_count_put(adreno_dev);
+		} else {
+			kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+			device->pwrctrl.ctrl_flags = KGSL_PWR_ON;
+		}
+		kgsl_pwrscale_disable(device, true);
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static int a6xx_dev_add_to_minidump(struct adreno_device *adreno_dev)
+{
+	return kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_ADRENO_DEVICE,
+				(void *)(adreno_dev), sizeof(struct adreno_device));
+}
+
+static void a6xx_set_isdb_breakpoint_registers(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct clk *clk;
+	int ret;
+
+	if (!device->set_isdb_breakpoint || device->ftbl->is_hwcg_on(device)
+			|| device->qdss_gfx_virt == NULL || !device->force_panic)
+		return;
+
+	clk = clk_get(&device->pdev->dev, "apb_pclk");
+
+	if (IS_ERR(clk)) {
+		dev_err(device->dev, "Unable to get QDSS clock\n");
+		goto err;
+	}
+
+	ret = clk_prepare_enable(clk);
+
+	if (ret) {
+		dev_err(device->dev, "QDSS Clock enable error: %d\n", ret);
+		clk_put(clk);
+		goto err;
+	}
+
+	/* Issue break command for all eight SPs */
+	isdb_write(device->qdss_gfx_virt, 0x0000);
+	isdb_write(device->qdss_gfx_virt, 0x1000);
+	isdb_write(device->qdss_gfx_virt, 0x2000);
+	isdb_write(device->qdss_gfx_virt, 0x3000);
+	isdb_write(device->qdss_gfx_virt, 0x4000);
+	isdb_write(device->qdss_gfx_virt, 0x5000);
+	isdb_write(device->qdss_gfx_virt, 0x6000);
+	isdb_write(device->qdss_gfx_virt, 0x7000);
+
+	clk_disable_unprepare(clk);
+	clk_put(clk);
+
+	return;
+
+err:
+	/* Do not force kernel panic if isdb writes did not go through */
+	device->force_panic = false;
+}
+
+static int a619_holi_sptprac_enable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	void __iomem *addr = kgsl_regmap_virt(&device->regmap,
+		A6XX_GMU_SPTPRAC_PWR_CLK_STATUS);
+	u32 val;
+
+	if (test_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, &adreno_dev->priv))
+		return 0;
+
+	kgsl_regwrite(device, A6XX_GMU_GX_SPTPRAC_POWER_CONTROL,
+		SPTPRAC_POWERON_CTRL_MASK);
+
+	if (readl_poll_timeout(addr, val,
+		(val & SPTPRAC_POWERON_STATUS_MASK) ==
+		SPTPRAC_POWERON_STATUS_MASK, 10, 10 * 1000)) {
+		dev_err(device->dev, "power on SPTPRAC fail\n");
+		return -EINVAL;
+	}
+
+	set_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, &adreno_dev->priv);
+	return 0;
+}
+
+static void a619_holi_sptprac_disable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	void __iomem *addr = kgsl_regmap_virt(&device->regmap,
+		A6XX_GMU_SPTPRAC_PWR_CLK_STATUS);
+	u32 val;
+
+	if (!test_and_clear_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED,
+		&adreno_dev->priv))
+		return;
+
+	/* Ensure that retention is on */
+	kgsl_regrmw(device, A6XX_GPU_CC_GX_GDSCR, 0,
+		A6XX_RETAIN_FF_ENABLE_ENABLE_MASK);
+
+	kgsl_regwrite(device, A6XX_GMU_GX_SPTPRAC_POWER_CONTROL,
+		SPTPRAC_POWEROFF_CTRL_MASK);
+	if (readl_poll_timeout(addr, val,
+		(val & SPTPRAC_POWEROFF_STATUS_MASK) ==
+		SPTPRAC_POWEROFF_STATUS_MASK, 10, 10 * 1000))
+		dev_err(device->dev, "power off SPTPRAC fail\n");
+}
+
+/* This is a non GMU/RGMU part */
+const struct adreno_gpudev adreno_a6xx_gpudev = {
+	.reg_offsets = a6xx_register_offsets,
+	.probe = a6xx_probe,
+	.start = a6xx_nogmu_start,
+	.snapshot = a6xx_snapshot,
+	.init = a6xx_nogmu_init,
+	.irq_handler = a6xx_irq_handler,
+	.rb_start = a6xx_rb_start,
+	.regulator_disable = a6xx_prepare_for_regulator_disable,
+	.gpu_keepalive = a6xx_gpu_keepalive,
+	.hw_isidle = a6xx_hw_isidle,
+	.iommu_fault_block = a6xx_iommu_fault_block,
+	.reset = a6xx_reset,
+	.preemption_schedule = a6xx_preemption_schedule,
+	.preemption_context_init = a6xx_preemption_context_init,
+	.read_alwayson = a6xx_read_alwayson,
+	.power_ops = &adreno_power_operations,
+	.clear_pending_transactions = a6xx_clear_pending_transactions,
+	.deassert_gbif_halt = a6xx_deassert_gbif_halt,
+	.remove = a6xx_remove,
+	.ringbuffer_submitcmd = a6xx_ringbuffer_submitcmd,
+	.is_hw_collapsible = adreno_isidle,
+	.power_stats = a6xx_power_stats,
+	.setproperty = a6xx_setproperty,
+	.add_to_va_minidump = a6xx_dev_add_to_minidump,
+	.gx_is_on = a6xx_gx_is_on,
+};
+
+const struct a6xx_gpudev adreno_a6xx_hwsched_gpudev = {
+	.base = {
+		.reg_offsets = a6xx_register_offsets,
+		.probe = a6xx_hwsched_probe,
+		.snapshot = a6xx_hwsched_snapshot,
+		.irq_handler = a6xx_irq_handler,
+		.iommu_fault_block = a6xx_iommu_fault_block,
+		.preemption_context_init = a6xx_preemption_context_init,
+		.context_detach = a6xx_hwsched_context_detach,
+		.read_alwayson = a6xx_read_alwayson,
+		.reset = a6xx_hwsched_reset_replay,
+		.power_ops = &a6xx_hwsched_power_ops,
+		.power_stats = a6xx_power_stats,
+		.setproperty = a6xx_setproperty,
+		.hw_isidle = a6xx_hw_isidle,
+		.add_to_va_minidump = a6xx_hwsched_add_to_minidump,
+		.gx_is_on = a6xx_gmu_gx_is_on,
+		.send_recurring_cmdobj = a6xx_hwsched_send_recurring_cmdobj,
+		.set_isdb_breakpoint_registers = a6xx_set_isdb_breakpoint_registers,
+	},
+	.hfi_probe = a6xx_hwsched_hfi_probe,
+	.hfi_remove = a6xx_hwsched_hfi_remove,
+	.handle_watchdog = a6xx_hwsched_handle_watchdog,
+};
+
+const struct a6xx_gpudev adreno_a6xx_gmu_gpudev = {
+	.base = {
+		.reg_offsets = a6xx_register_offsets,
+		.probe = a6xx_gmu_device_probe,
+		.snapshot = a6xx_gmu_snapshot,
+		.irq_handler = a6xx_irq_handler,
+		.rb_start = a6xx_rb_start,
+		.regulator_enable = a6xx_sptprac_enable,
+		.regulator_disable = a6xx_sptprac_disable,
+		.gpu_keepalive = a6xx_gpu_keepalive,
+		.hw_isidle = a6xx_hw_isidle,
+		.iommu_fault_block = a6xx_iommu_fault_block,
+		.reset = a6xx_gmu_reset,
+		.preemption_schedule = a6xx_preemption_schedule,
+		.preemption_context_init = a6xx_preemption_context_init,
+		.read_alwayson = a6xx_read_alwayson,
+		.power_ops = &a6xx_gmu_power_ops,
+		.remove = a6xx_remove,
+		.ringbuffer_submitcmd = a6xx_ringbuffer_submitcmd,
+		.power_stats = a6xx_power_stats,
+		.setproperty = a6xx_setproperty,
+		.add_to_va_minidump = a6xx_gmu_add_to_minidump,
+		.gx_is_on = a6xx_gmu_gx_is_on,
+		.set_isdb_breakpoint_registers = a6xx_set_isdb_breakpoint_registers,
+	},
+	.hfi_probe = a6xx_gmu_hfi_probe,
+	.handle_watchdog = a6xx_gmu_handle_watchdog,
+};
+
+const struct adreno_gpudev adreno_a6xx_rgmu_gpudev = {
+	.reg_offsets = a6xx_register_offsets,
+	.probe = a6xx_rgmu_device_probe,
+	.snapshot = a6xx_rgmu_snapshot,
+	.irq_handler = a6xx_irq_handler,
+	.rb_start = a6xx_rb_start,
+	.regulator_enable = a6xx_sptprac_enable,
+	.regulator_disable = a6xx_sptprac_disable,
+	.gpu_keepalive = a6xx_gpu_keepalive,
+	.hw_isidle = a6xx_hw_isidle,
+	.iommu_fault_block = a6xx_iommu_fault_block,
+	.reset = a6xx_rgmu_reset,
+	.preemption_schedule = a6xx_preemption_schedule,
+	.preemption_context_init = a6xx_preemption_context_init,
+	.read_alwayson = a6xx_read_alwayson,
+	.power_ops = &a6xx_rgmu_power_ops,
+	.remove = a6xx_remove,
+	.ringbuffer_submitcmd = a6xx_ringbuffer_submitcmd,
+	.power_stats = a6xx_power_stats,
+	.setproperty = a6xx_setproperty,
+	.add_to_va_minidump = a6xx_rgmu_add_to_minidump,
+	.gx_is_on = a6xx_rgmu_gx_is_on,
+};
+
+/* This is a non GMU/RGMU part */
+const struct adreno_gpudev adreno_a619_holi_gpudev = {
+	.reg_offsets = a6xx_register_offsets,
+	.probe = a6xx_probe,
+	.start = a6xx_nogmu_start,
+	.snapshot = a6xx_snapshot,
+	.init = a6xx_nogmu_init,
+	.irq_handler = a6xx_irq_handler,
+	.rb_start = a6xx_rb_start,
+	.regulator_enable = a619_holi_sptprac_enable,
+	.regulator_disable = a619_holi_sptprac_disable,
+	.gpu_keepalive = a6xx_gpu_keepalive,
+	.hw_isidle = a619_holi_hw_isidle,
+	.iommu_fault_block = a6xx_iommu_fault_block,
+	.reset = a6xx_reset,
+	.preemption_schedule = a6xx_preemption_schedule,
+	.preemption_context_init = a6xx_preemption_context_init,
+	.read_alwayson = a6xx_read_alwayson,
+	.power_ops = &adreno_power_operations,
+	.clear_pending_transactions = a6xx_clear_pending_transactions,
+	.deassert_gbif_halt = a6xx_deassert_gbif_halt,
+	.remove = a6xx_remove,
+	.ringbuffer_submitcmd = a6xx_ringbuffer_submitcmd,
+	.is_hw_collapsible = adreno_isidle,
+	.power_stats = a6xx_power_stats,
+	.setproperty = a6xx_setproperty,
+	.add_to_va_minidump = a6xx_dev_add_to_minidump,
+	.gx_is_on = a619_holi_gx_is_on,
+};
+
+const struct a6xx_gpudev adreno_a630_gpudev = {
+	.base = {
+		.reg_offsets = a6xx_register_offsets,
+		.probe = a6xx_gmu_device_probe,
+		.snapshot = a6xx_gmu_snapshot,
+		.irq_handler = a6xx_irq_handler,
+		.rb_start = a6xx_rb_start,
+		.regulator_enable = a6xx_sptprac_enable,
+		.regulator_disable = a6xx_sptprac_disable,
+		.gpu_keepalive = a6xx_gpu_keepalive,
+		.hw_isidle = a6xx_hw_isidle,
+		.iommu_fault_block = a6xx_iommu_fault_block,
+		.reset = a6xx_gmu_reset,
+		.preemption_schedule = a6xx_preemption_schedule,
+		.preemption_context_init = a6xx_preemption_context_init,
+		.read_alwayson = a6xx_read_alwayson,
+		.power_ops = &a630_gmu_power_ops,
+		.remove = a6xx_remove,
+		.ringbuffer_submitcmd = a6xx_ringbuffer_submitcmd,
+		.power_stats = a6xx_power_stats,
+		.setproperty = a6xx_setproperty,
+		.add_to_va_minidump = a6xx_gmu_add_to_minidump,
+		.gx_is_on = a6xx_gmu_gx_is_on,
+	},
+	.hfi_probe = a6xx_gmu_hfi_probe,
+	.handle_watchdog = a6xx_gmu_handle_watchdog,
+};

+ 450 - 0
qcom/opensource/graphics-kernel/adreno_a6xx.h

@@ -0,0 +1,450 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_A6XX_H_
+#define _ADRENO_A6XX_H_
+
+#include <linux/delay.h>
+
+#include "a6xx_reg.h"
+#include "adreno_a6xx_gmu.h"
+#include "adreno_a6xx_rgmu.h"
+
+extern const struct adreno_power_ops a6xx_gmu_power_ops;
+extern const struct adreno_power_ops a6xx_rgmu_power_ops;
+extern const struct adreno_power_ops a630_gmu_power_ops;
+extern const struct adreno_power_ops a6xx_hwsched_power_ops;
+
+struct a6xx_gpudev {
+	struct adreno_gpudev base;
+	int (*hfi_probe)(struct adreno_device *adreno_dev);
+	void (*hfi_remove)(struct adreno_device *adreno_dev);
+	void (*handle_watchdog)(struct adreno_device *adreno_dev);
+};
+
+extern const struct a6xx_gpudev adreno_a630_gpudev;
+extern const struct a6xx_gpudev adreno_a6xx_gmu_gpudev;
+extern const struct a6xx_gpudev adreno_a6xx_hwsched_gpudev;
+
+/**
+ * struct a6xx_device - Container for the a6xx_device
+ */
+struct a6xx_device {
+	/** @gmu: Container for the a6xx GMU device */
+	struct a6xx_gmu_device gmu;
+	/** @rgmu: Container for the a6xx rGMU device */
+	struct a6xx_rgmu_device rgmu;
+	/** @adreno_dev: Container for the generic adreno device */
+	struct adreno_device adreno_dev;
+};
+
+/**
+ * struct adreno_a6xx_core - a6xx specific GPU core definitions
+ */
+struct adreno_a6xx_core {
+	/** @base: Container for the generic GPU definitions */
+	struct adreno_gpu_core base;
+	/** @gmu_major: The maximum GMU version supported by the core */
+	u32 gmu_major;
+	/** @gmu_minor: The minimum GMU version supported by the core */
+	u32 gmu_minor;
+	/** @prim_fifo_threshold: target specific value for PC_DBG_ECO_CNTL */
+	unsigned int prim_fifo_threshold;
+	/** @sqefw_name: Name of the SQE microcode file */
+	const char *sqefw_name;
+	/** @gmufw_name: Name of the GMU firmware file */
+	const char *gmufw_name;
+	/** @zap_name: Name of the CPZ zap file */
+	const char *zap_name;
+	/** @hwcg: List of registers and values to write for HWCG */
+	const struct kgsl_regmap_list *hwcg;
+	/** @hwcg_count: Number of registers in @hwcg */
+	u32 hwcg_count;
+	/** @vbif: List of registers and values to write for VBIF */
+	const struct kgsl_regmap_list *vbif;
+	/** @vbif_count: Number of registers in @vbif */
+	u32 vbif_count;
+	/** @veto_fal10: veto status for fal10 feature */
+	bool veto_fal10;
+	/** @pdc_in_aop: True if PDC programmed in AOP */
+	bool pdc_in_aop;
+	/** @hang_detect_cycles: Hang detect counter timeout value */
+	u32 hang_detect_cycles;
+	/** @protected_regs: Array of protected registers for the target */
+	const struct adreno_protected_regs *protected_regs;
+	/** @disable_tseskip: True if TSESkip logic is disabled */
+	bool disable_tseskip;
+	/** @gx_cpr_toggle: True to toggle GX CPR FSM to avoid CPR stalls */
+	bool gx_cpr_toggle;
+	/** @highest_bank_bit: The bit of the highest DDR bank */
+	u32 highest_bank_bit;
+	/** @ctxt_record_size: Size of the preemption record in bytes */
+	u64 ctxt_record_size;
+	/** @gmu_hub_clk_freq: Gmu hub interface clock frequency */
+	u64 gmu_hub_clk_freq;
+};
+
+#define SPTPRAC_POWERON_CTRL_MASK	0x00778000
+#define SPTPRAC_POWEROFF_CTRL_MASK	0x00778001
+#define SPTPRAC_POWEROFF_STATUS_MASK	BIT(2)
+#define SPTPRAC_POWERON_STATUS_MASK	BIT(3)
+#define A6XX_RETAIN_FF_ENABLE_ENABLE_MASK BIT(11)
+
+#define CP_CLUSTER_FE		0x0
+#define CP_CLUSTER_SP_VS	0x1
+#define CP_CLUSTER_PC_VS	0x2
+#define CP_CLUSTER_GRAS		0x3
+#define CP_CLUSTER_SP_PS	0x4
+#define CP_CLUSTER_PS		0x5
+#define CP_CLUSTER_VPC_PS	0x6
+
+/**
+ * struct a6xx_cp_preemption_record - CP context record for
+ * preemption.
+ * @magic: (00) Value at this offset must be equal to
+ * A6XX_CP_CTXRECORD_MAGIC_REF.
+ * @info: (04) Type of record. Written non-zero (usually) by CP.
+ * we must set to zero for all ringbuffers.
+ * @errno: (08) Error code. Initialize this to A6XX_CP_CTXRECORD_ERROR_NONE.
+ * CP will update to another value if a preemption error occurs.
+ * @data: (12) DATA field in YIELD and SET_MARKER packets.
+ * Written by CP when switching out. Not used on switch-in. Initialized to 0.
+ * @cntl: (16) RB_CNTL, saved and restored by CP. We must initialize this.
+ * @rptr: (20) RB_RPTR, saved and restored by CP. We must initialize this.
+ * @wptr: (24) RB_WPTR, saved and restored by CP. We must initialize this.
+ * @_pad28: (28) Reserved/padding.
+ * @rptr_addr: (32) RB_RPTR_ADDR_LO|HI saved and restored. We must initialize.
+ * rbase: (40) RB_BASE_LO|HI saved and restored.
+ * counter: (48) Pointer to preemption counter.
+ */
+struct a6xx_cp_preemption_record {
+	uint32_t  magic;
+	uint32_t  info;
+	uint32_t  errno;
+	uint32_t  data;
+	uint32_t  cntl;
+	uint32_t  rptr;
+	uint32_t  wptr;
+	uint32_t  _pad28;
+	uint64_t  rptr_addr;
+	uint64_t  rbase;
+	uint64_t  counter;
+};
+
+/**
+ * struct a6xx_cp_smmu_info - CP preemption SMMU info.
+ * @magic: (00) The value at this offset must be equal to
+ * A6XX_CP_SMMU_INFO_MAGIC_REF.
+ * @_pad4: (04) Reserved/padding
+ * @ttbr0: (08) Base address of the page table for the
+ * incoming context.
+ * @context_idr: (16) Context Identification Register value.
+ */
+struct a6xx_cp_smmu_info {
+	uint32_t  magic;
+	uint32_t  _pad4;
+	uint64_t  ttbr0;
+	uint32_t  asid;
+	uint32_t  context_idr;
+};
+
+#define A6XX_CP_SMMU_INFO_MAGIC_REF     0x241350D5UL
+
+#define A6XX_CP_CTXRECORD_MAGIC_REF     0xAE399D6EUL
+/* Size of each CP preemption record */
+#define A6XX_CP_CTXRECORD_SIZE_IN_BYTES     (2112 * 1024)
+/* Size of the user context record block (in bytes) */
+#define A6XX_CP_CTXRECORD_USER_RESTORE_SIZE (192 * 1024)
+/* Size of the performance counter save/restore block (in bytes) */
+#define A6XX_CP_PERFCOUNTER_SAVE_RESTORE_SIZE   (4 * 1024)
+
+#define A6XX_CP_RB_CNTL_DEFAULT (((ilog2(4) << 8) & 0x1F00) | \
+		(ilog2(KGSL_RB_DWORDS >> 1) & 0x3F))
+
+/* Size of the CP_INIT pm4 stream in dwords */
+#define A6XX_CP_INIT_DWORDS 11
+
+#define A6XX_INT_MASK \
+	((1 << A6XX_INT_CP_AHB_ERROR) |			\
+	 (1 << A6XX_INT_ATB_ASYNCFIFO_OVERFLOW) |	\
+	 (1 << A6XX_INT_RBBM_GPC_ERROR) |		\
+	 (1 << A6XX_INT_CP_SW) |			\
+	 (1 << A6XX_INT_CP_HW_ERROR) |			\
+	 (1 << A6XX_INT_CP_IB2) |			\
+	 (1 << A6XX_INT_CP_IB1) |			\
+	 (1 << A6XX_INT_CP_RB) |			\
+	 (1 << A6XX_INT_CP_CACHE_FLUSH_TS) |		\
+	 (1 << A6XX_INT_RBBM_ATB_BUS_OVERFLOW) |	\
+	 (1 << A6XX_INT_RBBM_HANG_DETECT) |		\
+	 (1 << A6XX_INT_UCHE_OOB_ACCESS) |		\
+	 (1 << A6XX_INT_UCHE_TRAP_INTR) |		\
+	 (1 << A6XX_INT_TSB_WRITE_ERROR))
+
+#define A6XX_HWSCHED_INT_MASK \
+	((1 << A6XX_INT_CP_AHB_ERROR) |			\
+	 (1 << A6XX_INT_ATB_ASYNCFIFO_OVERFLOW) |	\
+	 (1 << A6XX_INT_RBBM_ATB_BUS_OVERFLOW) |	\
+	 (1 << A6XX_INT_UCHE_OOB_ACCESS) |		\
+	 (1 << A6XX_INT_UCHE_TRAP_INTR) |		\
+	 (1 << A6XX_INT_TSB_WRITE_ERROR))
+
+/**
+ * to_a6xx_core - return the a6xx specific GPU core struct
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Returns:
+ * A pointer to the a6xx specific GPU core struct
+ */
+static inline const struct adreno_a6xx_core *
+to_a6xx_core(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpu_core *core = adreno_dev->gpucore;
+
+	return container_of(core, struct adreno_a6xx_core, base);
+}
+
+/* Preemption functions */
+void a6xx_preemption_trigger(struct adreno_device *adreno_dev, bool atomic);
+void a6xx_preemption_schedule(struct adreno_device *adreno_dev);
+void a6xx_preemption_start(struct adreno_device *adreno_dev);
+int a6xx_preemption_init(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_preemption_post_ibsubmit - Insert commands following a submission
+ * @adreno_dev: Adreno GPU handle
+ * @cmds: Pointer to the ringbuffer to insert opcodes
+ *
+ * Return: The number of dwords written to @cmds
+ */
+u32 a6xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev, u32 *cmds);
+
+/**
+ * a6xx_preemption_post_ibsubmit - Insert opcodes before a submission
+ * @adreno_dev: Adreno GPU handle
+ * @rb: The ringbuffer being written
+ * @drawctxt: The draw context being written
+ * @cmds: Pointer to the ringbuffer to insert opcodes
+ *
+ * Return: The number of dwords written to @cmds
+ */
+u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds);
+
+unsigned int a6xx_set_marker(unsigned int *cmds,
+		enum adreno_cp_marker_type type);
+
+void a6xx_preemption_callback(struct adreno_device *adreno_dev, int bit);
+
+int a6xx_preemption_context_init(struct kgsl_context *context);
+
+void a6xx_preemption_context_destroy(struct kgsl_context *context);
+
+void a6xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+void a6xx_crashdump_init(struct adreno_device *adreno_dev);
+int a6xx_gmu_sptprac_enable(struct adreno_device *adreno_dev);
+void a6xx_gmu_sptprac_disable(struct adreno_device *adreno_dev);
+bool a6xx_gmu_sptprac_is_on(struct adreno_device *adreno_dev);
+bool a619_holi_gx_is_on(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_read_alwayson - Read the current always on clock value
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Return: The current value of the GMU always on counter
+ */
+u64 a6xx_read_alwayson(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_start - Program a6xx registers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does all a6xx register programming every
+ * time we boot the gpu
+ */
+void a6xx_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_init - Initialize a6xx resources
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does a6xx specific one time initialization
+ * and is invoked when the very first client opens a
+ * kgsl instance
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int a6xx_init(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_rb_start - A6xx specific ringbuffer setup
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does a6xx specific ringbuffer setup and
+ * attempts to submit CP INIT and bring GPU out of secure mode
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int a6xx_rb_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_microcode_read - Get the cp microcode from the filesystem
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function gets the firmware from filesystem and sets up
+ * the micorocode global buffer
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int a6xx_microcode_read(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_probe_common - Probe common a6xx resources
+ * @pdev: Pointer to the platform device
+ * @adreno_dev: Pointer to the adreno device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore strucure
+ *
+ * This function sets up the a6xx resources common across all
+ * a6xx targets
+ */
+int a6xx_probe_common(struct platform_device *pdev,
+	struct  adreno_device *adreno_dev, u32 chipid,
+	const struct adreno_gpu_core *gpucore);
+
+/**
+ * a6xx_hw_isidle - Check whether a6xx gpu is idle or not
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Return: True if gpu is idle, otherwise false
+ */
+bool a6xx_hw_isidle(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_spin_idle_debug - Debug logging used when gpu fails to idle
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function logs interesting registers and triggers a snapshot
+ */
+void a6xx_spin_idle_debug(struct adreno_device *adreno_dev,
+	const char *str);
+
+/**
+ * a6xx_perfcounter_update - Update the IFPC perfcounter list
+ * @adreno_dev: An Adreno GPU handle
+ * @reg: Perfcounter reg struct to add/remove to the list
+ * @update_reg: true if the perfcounter needs to be programmed by the CPU
+ *
+ * Return: 0 on success or -EBUSY if the lock couldn't be taken
+ */
+int a6xx_perfcounter_update(struct adreno_device *adreno_dev,
+	struct adreno_perfcount_register *reg, bool update_reg);
+
+/*
+ * a6xx_ringbuffer_init - Initialize the ringbuffers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Initialize the ringbuffer(s) for a6xx.
+ * Return: 0 on success or negative on failure
+ */
+int a6xx_ringbuffer_init(struct adreno_device *adreno_dev);
+
+extern const struct adreno_perfcounters adreno_a630_perfcounters;
+extern const struct adreno_perfcounters adreno_a6xx_perfcounters;
+extern const struct adreno_perfcounters adreno_a6xx_legacy_perfcounters;
+extern const struct adreno_perfcounters adreno_a6xx_hwsched_perfcounters;
+
+/**
+ * a6xx_rdpm_mx_freq_update - Update the mx frequency
+ * @gmu: An Adreno GMU handle
+ * @freq: Frequency in KHz
+ *
+ * This function communicates GPU mx frequency(in Mhz) changes to rdpm.
+ */
+void a6xx_rdpm_mx_freq_update(struct a6xx_gmu_device *gmu, u32 freq);
+
+/**
+ * a6xx_rdpm_cx_freq_update - Update the cx frequency
+ * @gmu: An Adreno GMU handle
+ * @freq: Frequency in KHz
+ *
+ * This function communicates GPU cx frequency(in Mhz) changes to rdpm.
+ */
+void a6xx_rdpm_cx_freq_update(struct a6xx_gmu_device *gmu, u32 freq);
+
+/**
+ * a6xx_ringbuffer_addcmds - Submit a command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @rb: Pointer to the ringbuffer to submit on
+ * @drawctxt: Pointer to the draw context for the submission, or NULL for
+ * internal submissions
+ * @flags: Flags for the submission
+ * @in: Commands to write to the ringbuffer
+ * @dwords: Size of @in (in dwords)
+ * @timestamp: Timestamp for the submission
+ * @time: Optional pointer to a submit time structure
+ *
+ * Submit a command to the ringbuffer.
+ * Return: 0 on success or negative on failure
+ */
+int a6xx_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time);
+/**
+ * a6xx_ringbuffer_submitcmd - Submit a user command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @cmdobj: Pointer to a user command object
+ * @flags: Internal submit flags
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int a6xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time);
+
+
+int a6xx_fenced_write(struct adreno_device *adreno_dev, u32 offset,
+		u32 value, u32 mask);
+
+int a6xx_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time, bool sync);
+
+void a6xx_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds);
+
+int a6xx_gmu_hfi_probe(struct adreno_device *adreno_dev);
+
+static inline const struct a6xx_gpudev *
+to_a6xx_gpudev(const struct adreno_gpudev *gpudev)
+{
+	return container_of(gpudev, struct a6xx_gpudev, base);
+}
+
+/**
+ * a6xx_reset_preempt_records - Reset the preemption buffers
+ * @adreno_dev: Handle to the adreno device
+ *
+ * Reset the preemption records at the time of hard reset
+ */
+void a6xx_reset_preempt_records(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_irq_pending - Check if there is any gpu irq pending
+ * @adreno_dev: Handle to the adreno device
+ *
+ * Return true if there is any gpu irq pending
+ */
+bool a6xx_irq_pending(struct adreno_device *adreno_dev);
+
+#ifdef CONFIG_QCOM_KGSL_CORESIGHT
+void a6xx_coresight_init(struct adreno_device *device);
+#else
+static inline void a6xx_coresight_init(struct adreno_device *device) { }
+#endif
+
+#endif

+ 432 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_coresight.c

@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_coresight.h"
+
+static struct adreno_coresight_register a6xx_coresight_regs[] = {
+	{ A6XX_DBGC_CFG_DBGBUS_SEL_A },
+	{ A6XX_DBGC_CFG_DBGBUS_SEL_B },
+	{ A6XX_DBGC_CFG_DBGBUS_SEL_C },
+	{ A6XX_DBGC_CFG_DBGBUS_SEL_D },
+	{ A6XX_DBGC_CFG_DBGBUS_CNTLT },
+	{ A6XX_DBGC_CFG_DBGBUS_CNTLM },
+	{ A6XX_DBGC_CFG_DBGBUS_OPL },
+	{ A6XX_DBGC_CFG_DBGBUS_OPE },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTL_0 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTL_1 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTL_2 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTL_3 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKL_0 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKL_1 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKL_2 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKL_3 },
+	{ A6XX_DBGC_CFG_DBGBUS_BYTEL_0 },
+	{ A6XX_DBGC_CFG_DBGBUS_BYTEL_1 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTE_0 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTE_1 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTE_2 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTE_3 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKE_0 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKE_1 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKE_2 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKE_3 },
+	{ A6XX_DBGC_CFG_DBGBUS_NIBBLEE },
+	{ A6XX_DBGC_CFG_DBGBUS_PTRC0 },
+	{ A6XX_DBGC_CFG_DBGBUS_PTRC1 },
+	{ A6XX_DBGC_CFG_DBGBUS_LOADREG },
+	{ A6XX_DBGC_CFG_DBGBUS_IDX },
+	{ A6XX_DBGC_CFG_DBGBUS_CLRC },
+	{ A6XX_DBGC_CFG_DBGBUS_LOADIVT },
+	{ A6XX_DBGC_VBIF_DBG_CNTL },
+	{ A6XX_DBGC_DBG_LO_HI_GPIO },
+	{ A6XX_DBGC_EXT_TRACE_BUS_CNTL },
+	{ A6XX_DBGC_READ_AHB_THROUGH_DBG },
+	{ A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1 },
+	{ A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2 },
+	{ A6XX_DBGC_EVT_CFG },
+	{ A6XX_DBGC_EVT_INTF_SEL_0 },
+	{ A6XX_DBGC_EVT_INTF_SEL_1 },
+	{ A6XX_DBGC_PERF_ATB_CFG },
+	{ A6XX_DBGC_PERF_ATB_COUNTER_SEL_0 },
+	{ A6XX_DBGC_PERF_ATB_COUNTER_SEL_1 },
+	{ A6XX_DBGC_PERF_ATB_COUNTER_SEL_2 },
+	{ A6XX_DBGC_PERF_ATB_COUNTER_SEL_3 },
+	{ A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 },
+	{ A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 },
+	{ A6XX_DBGC_PERF_ATB_DRAIN_CMD },
+	{ A6XX_DBGC_ECO_CNTL },
+	{ A6XX_DBGC_AHB_DBG_CNTL },
+};
+
+static struct adreno_coresight_register a6xx_coresight_regs_cx[] = {
+	{ A6XX_CX_DBGC_CFG_DBGBUS_SEL_A },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_SEL_B },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_SEL_C },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_SEL_D },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_CNTLT },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_CNTLM },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_OPL },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_OPE },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTE_0 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTE_1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTE_2 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTE_3 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKE_0 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKE_1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKE_2 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKE_3 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_NIBBLEE },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_PTRC0 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_PTRC1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_LOADREG },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IDX },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_CLRC },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_LOADIVT },
+	{ A6XX_CX_DBGC_VBIF_DBG_CNTL },
+	{ A6XX_CX_DBGC_DBG_LO_HI_GPIO },
+	{ A6XX_CX_DBGC_EXT_TRACE_BUS_CNTL },
+	{ A6XX_CX_DBGC_READ_AHB_THROUGH_DBG },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 },
+	{ A6XX_CX_DBGC_EVT_CFG },
+	{ A6XX_CX_DBGC_EVT_INTF_SEL_0 },
+	{ A6XX_CX_DBGC_EVT_INTF_SEL_1 },
+	{ A6XX_CX_DBGC_PERF_ATB_CFG },
+	{ A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_0 },
+	{ A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_1 },
+	{ A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_2 },
+	{ A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_3 },
+	{ A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 },
+	{ A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 },
+	{ A6XX_CX_DBGC_PERF_ATB_DRAIN_CMD },
+	{ A6XX_CX_DBGC_ECO_CNTL },
+	{ A6XX_CX_DBGC_AHB_DBG_CNTL },
+};
+
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &a6xx_coresight_regs[0]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &a6xx_coresight_regs[1]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &a6xx_coresight_regs[2]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &a6xx_coresight_regs[3]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &a6xx_coresight_regs[4]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &a6xx_coresight_regs[5]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &a6xx_coresight_regs[6]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &a6xx_coresight_regs[7]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &a6xx_coresight_regs[8]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &a6xx_coresight_regs[9]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &a6xx_coresight_regs[10]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &a6xx_coresight_regs[11]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &a6xx_coresight_regs[12]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &a6xx_coresight_regs[13]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &a6xx_coresight_regs[14]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &a6xx_coresight_regs[15]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &a6xx_coresight_regs[16]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &a6xx_coresight_regs[17]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &a6xx_coresight_regs[18]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &a6xx_coresight_regs[19]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &a6xx_coresight_regs[20]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &a6xx_coresight_regs[21]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &a6xx_coresight_regs[22]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &a6xx_coresight_regs[23]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &a6xx_coresight_regs[24]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &a6xx_coresight_regs[25]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &a6xx_coresight_regs[26]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &a6xx_coresight_regs[27]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &a6xx_coresight_regs[28]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &a6xx_coresight_regs[29]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &a6xx_coresight_regs[30]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &a6xx_coresight_regs[31]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &a6xx_coresight_regs[32]);
+static ADRENO_CORESIGHT_ATTR(vbif_dbg_cntl, &a6xx_coresight_regs[33]);
+static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &a6xx_coresight_regs[34]);
+static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &a6xx_coresight_regs[35]);
+static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg, &a6xx_coresight_regs[36]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1, &a6xx_coresight_regs[37]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2, &a6xx_coresight_regs[38]);
+static ADRENO_CORESIGHT_ATTR(evt_cfg, &a6xx_coresight_regs[39]);
+static ADRENO_CORESIGHT_ATTR(evt_intf_sel_0, &a6xx_coresight_regs[40]);
+static ADRENO_CORESIGHT_ATTR(evt_intf_sel_1, &a6xx_coresight_regs[41]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_cfg, &a6xx_coresight_regs[42]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_0, &a6xx_coresight_regs[43]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_1, &a6xx_coresight_regs[44]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_2, &a6xx_coresight_regs[45]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_3, &a6xx_coresight_regs[46]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_trig_intf_sel_0,
+				&a6xx_coresight_regs[47]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_trig_intf_sel_1,
+				&a6xx_coresight_regs[48]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_drain_cmd, &a6xx_coresight_regs[49]);
+static ADRENO_CORESIGHT_ATTR(eco_cntl, &a6xx_coresight_regs[50]);
+static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &a6xx_coresight_regs[51]);
+
+/*CX debug registers*/
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_a,
+				&a6xx_coresight_regs_cx[0]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_b,
+				&a6xx_coresight_regs_cx[1]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_c,
+				&a6xx_coresight_regs_cx[2]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_d,
+				&a6xx_coresight_regs_cx[3]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlt,
+				&a6xx_coresight_regs_cx[4]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlm,
+				&a6xx_coresight_regs_cx[5]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_opl,
+				&a6xx_coresight_regs_cx[6]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ope,
+				&a6xx_coresight_regs_cx[7]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_0,
+				&a6xx_coresight_regs_cx[8]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_1,
+				&a6xx_coresight_regs_cx[9]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_2,
+				&a6xx_coresight_regs_cx[10]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_3,
+				&a6xx_coresight_regs_cx[11]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_0,
+				&a6xx_coresight_regs_cx[12]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_1,
+				&a6xx_coresight_regs_cx[13]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_2,
+				&a6xx_coresight_regs_cx[14]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_3,
+				&a6xx_coresight_regs_cx[15]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_0,
+				&a6xx_coresight_regs_cx[16]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_1,
+				&a6xx_coresight_regs_cx[17]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_0,
+				&a6xx_coresight_regs_cx[18]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_1,
+				&a6xx_coresight_regs_cx[19]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_2,
+				&a6xx_coresight_regs_cx[20]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_3,
+				&a6xx_coresight_regs_cx[21]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_0,
+				&a6xx_coresight_regs_cx[22]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_1,
+				&a6xx_coresight_regs_cx[23]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_2,
+				&a6xx_coresight_regs_cx[24]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_3,
+				&a6xx_coresight_regs_cx[25]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_nibblee,
+				&a6xx_coresight_regs_cx[26]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc0,
+				&a6xx_coresight_regs_cx[27]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc1,
+				&a6xx_coresight_regs_cx[28]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadreg,
+				&a6xx_coresight_regs_cx[29]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_idx,
+				&a6xx_coresight_regs_cx[30]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_clrc,
+				&a6xx_coresight_regs_cx[31]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadivt,
+				&a6xx_coresight_regs_cx[32]);
+static ADRENO_CORESIGHT_ATTR(cx_vbif_dbg_cntl,
+				&a6xx_coresight_regs_cx[33]);
+static ADRENO_CORESIGHT_ATTR(cx_dbg_lo_hi_gpio,
+				&a6xx_coresight_regs_cx[34]);
+static ADRENO_CORESIGHT_ATTR(cx_ext_trace_bus_cntl,
+				&a6xx_coresight_regs_cx[35]);
+static ADRENO_CORESIGHT_ATTR(cx_read_ahb_through_dbg,
+				&a6xx_coresight_regs_cx[36]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf1,
+				&a6xx_coresight_regs_cx[37]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf2,
+				&a6xx_coresight_regs_cx[38]);
+static ADRENO_CORESIGHT_ATTR(cx_evt_cfg,
+				&a6xx_coresight_regs_cx[39]);
+static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_0,
+				&a6xx_coresight_regs_cx[40]);
+static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_1,
+				&a6xx_coresight_regs_cx[41]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_cfg,
+				&a6xx_coresight_regs_cx[42]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_0,
+				&a6xx_coresight_regs_cx[43]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_1,
+				&a6xx_coresight_regs_cx[44]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_2,
+				&a6xx_coresight_regs_cx[45]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_3,
+				&a6xx_coresight_regs_cx[46]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_trig_intf_sel_0,
+				&a6xx_coresight_regs_cx[47]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_trig_intf_sel_1,
+				&a6xx_coresight_regs_cx[48]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_drain_cmd,
+				&a6xx_coresight_regs_cx[49]);
+static ADRENO_CORESIGHT_ATTR(cx_eco_cntl,
+				&a6xx_coresight_regs_cx[50]);
+static ADRENO_CORESIGHT_ATTR(cx_ahb_dbg_cntl,
+				&a6xx_coresight_regs_cx[51]);
+
+static struct attribute *a6xx_coresight_attrs[] = {
+	&coresight_attr_cfg_dbgbus_sel_a.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_b.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_c.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_d.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlt.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlm.attr.attr,
+	&coresight_attr_cfg_dbgbus_opl.attr.attr,
+	&coresight_attr_cfg_dbgbus_ope.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_nibblee.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc1.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadreg.attr.attr,
+	&coresight_attr_cfg_dbgbus_idx.attr.attr,
+	&coresight_attr_cfg_dbgbus_clrc.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadivt.attr.attr,
+	&coresight_attr_vbif_dbg_cntl.attr.attr,
+	&coresight_attr_dbg_lo_hi_gpio.attr.attr,
+	&coresight_attr_ext_trace_bus_cntl.attr.attr,
+	&coresight_attr_read_ahb_through_dbg.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf1.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf2.attr.attr,
+	&coresight_attr_evt_cfg.attr.attr,
+	&coresight_attr_evt_intf_sel_0.attr.attr,
+	&coresight_attr_evt_intf_sel_1.attr.attr,
+	&coresight_attr_perf_atb_cfg.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_0.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_1.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_2.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_3.attr.attr,
+	&coresight_attr_perf_atb_trig_intf_sel_0.attr.attr,
+	&coresight_attr_perf_atb_trig_intf_sel_1.attr.attr,
+	&coresight_attr_perf_atb_drain_cmd.attr.attr,
+	&coresight_attr_eco_cntl.attr.attr,
+	&coresight_attr_ahb_dbg_cntl.attr.attr,
+	NULL,
+};
+
+/*cx*/
+static struct attribute *a6xx_coresight_attrs_cx[] = {
+	&coresight_attr_cx_cfg_dbgbus_sel_a.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_sel_b.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_sel_c.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_sel_d.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_cntlt.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_cntlm.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_opl.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ope.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_bytel_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_bytel_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_nibblee.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ptrc0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ptrc1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_loadreg.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_idx.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_clrc.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_loadivt.attr.attr,
+	&coresight_attr_cx_vbif_dbg_cntl.attr.attr,
+	&coresight_attr_cx_dbg_lo_hi_gpio.attr.attr,
+	&coresight_attr_cx_ext_trace_bus_cntl.attr.attr,
+	&coresight_attr_cx_read_ahb_through_dbg.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_trace_buf1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_trace_buf2.attr.attr,
+	&coresight_attr_cx_evt_cfg.attr.attr,
+	&coresight_attr_cx_evt_intf_sel_0.attr.attr,
+	&coresight_attr_cx_evt_intf_sel_1.attr.attr,
+	&coresight_attr_cx_perf_atb_cfg.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_0.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_1.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_2.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_3.attr.attr,
+	&coresight_attr_cx_perf_atb_trig_intf_sel_0.attr.attr,
+	&coresight_attr_cx_perf_atb_trig_intf_sel_1.attr.attr,
+	&coresight_attr_cx_perf_atb_drain_cmd.attr.attr,
+	&coresight_attr_cx_eco_cntl.attr.attr,
+	&coresight_attr_cx_ahb_dbg_cntl.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group a6xx_coresight_group = {
+	.attrs = a6xx_coresight_attrs,
+};
+
+static const struct attribute_group *a6xx_coresight_groups[] = {
+	&a6xx_coresight_group,
+	NULL,
+};
+
+static const struct attribute_group a6xx_coresight_group_cx = {
+	.attrs = a6xx_coresight_attrs_cx,
+};
+
+static const struct attribute_group *a6xx_coresight_groups_cx[] = {
+	&a6xx_coresight_group_cx,
+	NULL,
+};
+
+static const struct adreno_coresight a6xx_coresight = {
+	.registers = a6xx_coresight_regs,
+	.count = ARRAY_SIZE(a6xx_coresight_regs),
+	.groups = a6xx_coresight_groups,
+};
+
+static const struct adreno_coresight a6xx_coresight_cx = {
+	.registers = a6xx_coresight_regs_cx,
+	.count = ARRAY_SIZE(a6xx_coresight_regs_cx),
+	.groups = a6xx_coresight_groups_cx,
+};
+
+void a6xx_coresight_init(struct adreno_device *adreno_dev)
+{
+	adreno_coresight_add_device(adreno_dev, "coresight-gfx",
+		&a6xx_coresight, &adreno_dev->gx_coresight);
+
+	adreno_coresight_add_device(adreno_dev, "coresight-gfx-cx",
+		&a6xx_coresight_cx, &adreno_dev->cx_coresight);
+}

+ 3863 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_gmu.c

@@ -0,0 +1,3863 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <dt-bindings/regulator/qcom,rpmh-regulator-levels.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/delay.h>
+#include <linux/dma-map-ops.h>
+#include <linux/firmware.h>
+#include <linux/interconnect.h>
+#include <linux/io.h>
+#include <linux/kobject.h>
+#include <linux/of_platform.h>
+#include <linux/qcom-iommu-util.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/mailbox/qmp.h>
+#include <soc/qcom/cmd-db.h>
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_trace.h"
+#include "kgsl_bus.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+#include "kgsl_util.h"
+
+#define ARC_VOTE_GET_PRI(_v) ((_v) & 0xFF)
+#define ARC_VOTE_GET_SEC(_v) (((_v) >> 8) & 0xFF)
+#define ARC_VOTE_GET_VLVL(_v) (((_v) >> 16) & 0xFFFF)
+
+#define ARC_VOTE_SET(pri, sec, vlvl) \
+	((((vlvl) & 0xFFFF) << 16) | (((sec) & 0xFF) << 8) | ((pri) & 0xFF))
+
+static struct gmu_vma_entry a6xx_gmu_vma_legacy[] = {
+	[GMU_ITCM] = {
+			.start = 0x00000,
+			.size = SZ_16K
+		},
+	[GMU_ICACHE] = {
+			.start = 0x04000,
+			.size = (SZ_256K - SZ_16K),
+			.next_va = 0x4000
+		},
+	[GMU_DTCM] = {
+			.start = 0x40000,
+			.size = SZ_16K
+		},
+	[GMU_DCACHE] = {
+			.start = 0x44000,
+			.size = (SZ_256K - SZ_16K),
+			.next_va = 0x44000
+		},
+	[GMU_NONCACHED_KERNEL] = {
+			.start = 0x60000000,
+			.size = SZ_512M,
+			.next_va = 0x60000000
+		},
+};
+
+static struct gmu_vma_entry a6xx_gmu_vma[] = {
+	[GMU_ITCM] = {
+			.start = 0x00000000,
+			.size = SZ_16K
+		},
+	[GMU_CACHE] = {
+			.start = SZ_16K,
+			.size = (SZ_16M - SZ_16K),
+			.next_va = SZ_16K
+		},
+	[GMU_DTCM] = {
+			.start = SZ_256M + SZ_16K,
+			.size = SZ_16K
+		},
+	[GMU_DCACHE] = {
+			.start = 0x0,
+			.size = 0x0
+		},
+	[GMU_NONCACHED_KERNEL] = {
+			.start = 0x60000000,
+			.size = SZ_512M,
+			.next_va = 0x60000000
+		},
+};
+
+static void _regwrite(void __iomem *regbase, u32 offsetwords, u32 value)
+{
+	void __iomem *reg;
+
+	reg = regbase + (offsetwords << 2);
+	__raw_writel(value, reg);
+}
+
+static void _regrmw(void __iomem *regbase, u32 offsetwords, u32 mask, u32 or)
+{
+	void __iomem *reg;
+	u32 val;
+
+	reg = regbase + (offsetwords << 2);
+	val = __raw_readl(reg);
+	/* Make sure the read posted and all pending writes are done */
+	mb();
+	__raw_writel((val & ~mask) | or, reg);
+}
+
+static ssize_t log_stream_enable_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct a6xx_gmu_device *gmu = container_of(kobj, struct a6xx_gmu_device, log_kobj);
+	bool val;
+	int ret;
+
+	ret = kstrtobool(buf, &val);
+	if (ret)
+		return ret;
+
+	gmu->log_stream_enable = val;
+	return count;
+}
+
+static ssize_t log_stream_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct a6xx_gmu_device *gmu = container_of(kobj, struct a6xx_gmu_device, log_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", gmu->log_stream_enable);
+}
+
+static ssize_t log_group_mask_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct a6xx_gmu_device *gmu = container_of(kobj, struct a6xx_gmu_device, log_kobj);
+	u32 val;
+	int ret;
+
+	ret = kstrtou32(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	gmu->log_group_mask = val;
+	return count;
+}
+
+static ssize_t log_group_mask_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct a6xx_gmu_device *gmu = container_of(kobj, struct a6xx_gmu_device, log_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->log_group_mask);
+}
+
+static struct kobj_attribute log_stream_enable_attr =
+	__ATTR(log_stream_enable, 0644, log_stream_enable_show, log_stream_enable_store);
+
+static struct kobj_attribute log_group_mask_attr =
+	__ATTR(log_group_mask, 0644, log_group_mask_show, log_group_mask_store);
+
+static struct attribute *log_attrs[] = {
+	&log_stream_enable_attr.attr,
+	&log_group_mask_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(log);
+
+static struct kobj_type log_kobj_type = {
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = log_groups,
+};
+
+static ssize_t stats_enable_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct a6xx_gmu_device *gmu = container_of(kobj, struct a6xx_gmu_device, stats_kobj);
+	bool val;
+	int ret;
+
+	ret = kstrtobool(buf, &val);
+	if (ret)
+		return ret;
+
+	gmu->stats_enable = val;
+	return count;
+}
+
+static ssize_t stats_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct a6xx_gmu_device *gmu = container_of(kobj, struct a6xx_gmu_device, stats_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", gmu->stats_enable);
+}
+
+static ssize_t stats_mask_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct a6xx_gmu_device *gmu = container_of(kobj, struct a6xx_gmu_device, stats_kobj);
+	u32 val;
+	int ret;
+
+	ret = kstrtou32(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	gmu->stats_mask = val;
+	return count;
+}
+
+static ssize_t stats_mask_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct a6xx_gmu_device *gmu = container_of(kobj, struct a6xx_gmu_device, stats_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->stats_mask);
+}
+
+static ssize_t stats_interval_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct a6xx_gmu_device *gmu = container_of(kobj, struct a6xx_gmu_device, stats_kobj);
+	u32 val;
+	int ret;
+
+	ret = kstrtou32(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	gmu->stats_interval = val;
+	return count;
+}
+
+static ssize_t stats_interval_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct a6xx_gmu_device *gmu = container_of(kobj, struct a6xx_gmu_device, stats_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->stats_interval);
+}
+
+static struct kobj_attribute stats_enable_attr =
+	__ATTR(stats_enable, 0644, stats_enable_show, stats_enable_store);
+
+static struct kobj_attribute stats_mask_attr =
+	__ATTR(stats_mask, 0644, stats_mask_show, stats_mask_store);
+
+static struct kobj_attribute stats_interval_attr =
+	__ATTR(stats_interval, 0644, stats_interval_show, stats_interval_store);
+
+static struct attribute *stats_attrs[] = {
+	&stats_enable_attr.attr,
+	&stats_mask_attr.attr,
+	&stats_interval_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(stats);
+
+static struct kobj_type stats_kobj_type = {
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = stats_groups,
+};
+
+static int timed_poll_check_rscc(struct kgsl_device *device,
+		unsigned int offset, unsigned int expected_ret,
+		unsigned int timeout, unsigned int mask)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	u32 value;
+
+	if (!adreno_is_a650_family(adreno_dev))
+		return gmu_core_timed_poll_check(device,
+				offset + RSCC_OFFSET_LEGACY,
+				expected_ret, timeout, mask);
+
+	return readl_poll_timeout(gmu->rscc_virt + (offset << 2), value,
+		(value & mask) == expected_ret, 100, timeout * 1000);
+}
+
+struct a6xx_gmu_device *to_a6xx_gmu(struct adreno_device *adreno_dev)
+{
+	struct a6xx_device *a6xx_dev = container_of(adreno_dev,
+					struct a6xx_device, adreno_dev);
+
+	return &a6xx_dev->gmu;
+}
+
+struct adreno_device *a6xx_gmu_to_adreno(struct a6xx_gmu_device *gmu)
+{
+	struct a6xx_device *a6xx_dev =
+			container_of(gmu, struct a6xx_device, gmu);
+
+	return &a6xx_dev->adreno_dev;
+}
+
+#define RSC_CMD_OFFSET 2
+#define PDC_CMD_OFFSET 4
+#define PDC_ENABLE_REG_VALUE 0x80000001
+
+void a6xx_load_rsc_ucode(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	void __iomem *rscc;
+
+	if (adreno_is_a650_family(adreno_dev))
+		rscc = gmu->rscc_virt;
+	else
+		rscc = kgsl_regmap_virt(&device->regmap, RSCC_OFFSET_LEGACY);
+
+	/* Disable SDE clock gating */
+	_regwrite(rscc, A6XX_GPU_RSCC_RSC_STATUS0_DRV0, BIT(24));
+
+	/* Setup RSC PDC handshake for sleep and wakeup */
+	_regwrite(rscc, A6XX_RSCC_PDC_SLAVE_ID_DRV0, 1);
+	_regwrite(rscc, A6XX_RSCC_HIDDEN_TCS_CMD0_DATA, 0);
+	_regwrite(rscc, A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR, 0);
+	_regwrite(rscc, A6XX_RSCC_HIDDEN_TCS_CMD0_DATA + RSC_CMD_OFFSET, 0);
+	_regwrite(rscc, A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR + RSC_CMD_OFFSET, 0);
+	_regwrite(rscc, A6XX_RSCC_HIDDEN_TCS_CMD0_DATA + RSC_CMD_OFFSET * 2,
+			0x80000000);
+	_regwrite(rscc, A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR + RSC_CMD_OFFSET * 2,
+			0);
+	_regwrite(rscc, A6XX_RSCC_OVERRIDE_START_ADDR, 0);
+	_regwrite(rscc, A6XX_RSCC_PDC_SEQ_START_ADDR, 0x4520);
+	_regwrite(rscc, A6XX_RSCC_PDC_MATCH_VALUE_LO, 0x4510);
+	_regwrite(rscc, A6XX_RSCC_PDC_MATCH_VALUE_HI, 0x4514);
+
+	/* Load RSC sequencer uCode for sleep and wakeup */
+	if (adreno_is_a650_family(adreno_dev)) {
+		_regwrite(rscc, A6XX_RSCC_SEQ_MEM_0_DRV0, 0xEAAAE5A0);
+		_regwrite(rscc, A6XX_RSCC_SEQ_MEM_0_DRV0 + 1, 0xE1A1EBAB);
+		_regwrite(rscc, A6XX_RSCC_SEQ_MEM_0_DRV0 + 2, 0xA2E0A581);
+		_regwrite(rscc, A6XX_RSCC_SEQ_MEM_0_DRV0 + 3, 0xECAC82E2);
+		_regwrite(rscc, A6XX_RSCC_SEQ_MEM_0_DRV0 + 4, 0x0020EDAD);
+	} else {
+		_regwrite(rscc, A6XX_RSCC_SEQ_MEM_0_DRV0, 0xA7A506A0);
+		_regwrite(rscc, A6XX_RSCC_SEQ_MEM_0_DRV0 + 1, 0xA1E6A6E7);
+		_regwrite(rscc, A6XX_RSCC_SEQ_MEM_0_DRV0 + 2, 0xA2E081E1);
+		_regwrite(rscc, A6XX_RSCC_SEQ_MEM_0_DRV0 + 3, 0xE9A982E2);
+		_regwrite(rscc, A6XX_RSCC_SEQ_MEM_0_DRV0 + 4, 0x0020E8A8);
+	}
+}
+
+int a6xx_load_pdc_ucode(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct resource *res_pdc, *res_cfg, *res_seq;
+	unsigned int cfg_offset, seq_offset;
+	void __iomem *cfg = NULL, *seq = NULL;
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	u32 vrm_resource_addr = cmd_db_read_addr("vrm.soc");
+	u32 xo_resource_addr = cmd_db_read_addr("xo.lvl");
+	u32 cx_res_addr = cmd_db_read_addr("cx.lvl");
+	u32 mx_res_addr = cmd_db_read_addr("mx.lvl");
+
+	if (!xo_resource_addr) {
+		dev_err(&gmu->pdev->dev,
+				"Failed to get 'xo.lvl' addr from cmd_db\n");
+		return -ENOENT;
+	}
+
+	if (!cx_res_addr) {
+		dev_err(&gmu->pdev->dev,
+				"Failed to get 'cx.lvl' addr from cmd_db\n");
+		return -ENOENT;
+	}
+
+	if (!mx_res_addr) {
+		dev_err(&gmu->pdev->dev,
+				"Failed to get 'mx.lvl' addr from cmd_db\n");
+		return -ENOENT;
+	}
+	/*
+	 * Older A6x platforms specified PDC registers in the DT using a
+	 * single base pointer that encompassed the entire PDC range. Current
+	 * targets specify the individual GPU-owned PDC register blocks
+	 * (sequence and config).
+	 *
+	 * This code handles both possibilities and generates individual
+	 * pointers to the GPU PDC blocks, either as offsets from the single
+	 * base, or as directly specified ranges.
+	 *
+	 * PDC programming has moved to AOP for newer A6x platforms.
+	 * However registers to enable GPU PDC and set the sequence start
+	 * address still need to be programmed.
+	 */
+
+	/* Offsets from the base PDC (if no PDC subsections in the DTSI) */
+	if (adreno_is_a640v2(adreno_dev)) {
+		cfg_offset = 0x90000;
+		seq_offset = 0x290000;
+	} else {
+		cfg_offset = 0x80000;
+		seq_offset = 0x280000;
+	}
+
+	/* Get pointers to each of the possible PDC resources */
+	res_pdc = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM,
+			"kgsl_gmu_pdc_reg");
+	res_cfg = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM,
+			"kgsl_gmu_pdc_cfg");
+
+	/*
+	 * Map the starting address for pdc_cfg programming. If the pdc_cfg
+	 * resource is not available use an offset from the base PDC resource.
+	 */
+	if (gmu->pdc_cfg_base == NULL) {
+		if (res_cfg)
+			gmu->pdc_cfg_base = devm_ioremap(&gmu->pdev->dev,
+				res_cfg->start, resource_size(res_cfg));
+		else if (res_pdc)
+			gmu->pdc_cfg_base = devm_ioremap(&gmu->pdev->dev,
+				res_pdc->start + cfg_offset, 0x10000);
+
+		if (!gmu->pdc_cfg_base) {
+			dev_err(&gmu->pdev->dev, "Failed to map PDC CFG\n");
+			return -ENODEV;
+		}
+	}
+
+	cfg = gmu->pdc_cfg_base;
+
+	/* PDC is programmed in AOP for newer platforms */
+	if (a6xx_core->pdc_in_aop)
+		goto done;
+
+	/*
+	 * Map the starting address for pdc_seq programming. If the pdc_seq
+	 * resource is not available use an offset from the base PDC resource.
+	 */
+	if (gmu->pdc_seq_base == NULL) {
+		res_seq = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM,
+				"kgsl_gmu_pdc_seq");
+
+		if (res_seq)
+			gmu->pdc_seq_base = devm_ioremap(&gmu->pdev->dev,
+				res_seq->start, resource_size(res_seq));
+		else if (res_pdc)
+			gmu->pdc_seq_base = devm_ioremap(&gmu->pdev->dev,
+				res_pdc->start + seq_offset, 0x10000);
+
+		if (!gmu->pdc_seq_base) {
+			dev_err(&gmu->pdev->dev, "Failed to map PDC SEQ\n");
+			return -ENODEV;
+		}
+	}
+
+	seq = gmu->pdc_seq_base;
+
+	/* Load PDC sequencer uCode for power up and power down sequence */
+	_regwrite(seq, PDC_GPU_SEQ_MEM_0, 0xFEBEA1E1);
+	_regwrite(seq, PDC_GPU_SEQ_MEM_0 + 1, 0xA5A4A3A2);
+	_regwrite(seq, PDC_GPU_SEQ_MEM_0 + 2, 0x8382A6E0);
+	_regwrite(seq, PDC_GPU_SEQ_MEM_0 + 3, 0xBCE3E284);
+	_regwrite(seq, PDC_GPU_SEQ_MEM_0 + 4, 0x002081FC);
+
+	/* Set TCS commands used by PDC sequence for low power modes */
+	_regwrite(cfg, PDC_GPU_TCS1_CMD_ENABLE_BANK, 7);
+	_regwrite(cfg, PDC_GPU_TCS1_CMD_WAIT_FOR_CMPL_BANK, 0);
+	_regwrite(cfg, PDC_GPU_TCS1_CONTROL, 0);
+	_regwrite(cfg, PDC_GPU_TCS1_CMD0_MSGID, 0x10108);
+	_regwrite(cfg, PDC_GPU_TCS1_CMD0_ADDR, mx_res_addr);
+	_regwrite(cfg, PDC_GPU_TCS1_CMD0_DATA, 1);
+	_regwrite(cfg, PDC_GPU_TCS1_CMD0_MSGID + PDC_CMD_OFFSET, 0x10108);
+	_regwrite(cfg, PDC_GPU_TCS1_CMD0_ADDR + PDC_CMD_OFFSET, cx_res_addr);
+	_regwrite(cfg, PDC_GPU_TCS1_CMD0_DATA + PDC_CMD_OFFSET, 0x0);
+	_regwrite(cfg, PDC_GPU_TCS1_CMD0_MSGID + PDC_CMD_OFFSET * 2, 0x10108);
+
+	_regwrite(cfg, PDC_GPU_TCS1_CMD0_ADDR + PDC_CMD_OFFSET * 2,
+			xo_resource_addr);
+
+	_regwrite(cfg, PDC_GPU_TCS1_CMD0_DATA + PDC_CMD_OFFSET * 2, 0x0);
+
+	if (vrm_resource_addr && adreno_is_a620(adreno_dev)) {
+		_regwrite(cfg, PDC_GPU_TCS1_CMD0_MSGID + PDC_CMD_OFFSET * 3,
+				0x10108);
+		_regwrite(cfg, PDC_GPU_TCS1_CMD0_ADDR + PDC_CMD_OFFSET * 3,
+				vrm_resource_addr + 0x4);
+		_regwrite(cfg, PDC_GPU_TCS1_CMD0_DATA + PDC_CMD_OFFSET * 3,
+				0x0);
+	}
+
+	_regwrite(cfg, PDC_GPU_TCS3_CMD_ENABLE_BANK, 7);
+	_regwrite(cfg, PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK, 0);
+	_regwrite(cfg, PDC_GPU_TCS3_CONTROL, 0);
+	_regwrite(cfg, PDC_GPU_TCS3_CMD0_MSGID, 0x10108);
+	_regwrite(cfg, PDC_GPU_TCS3_CMD0_ADDR, mx_res_addr);
+	_regwrite(cfg, PDC_GPU_TCS3_CMD0_DATA, 2);
+	_regwrite(cfg, PDC_GPU_TCS3_CMD0_MSGID + PDC_CMD_OFFSET, 0x10108);
+	_regwrite(cfg, PDC_GPU_TCS3_CMD0_ADDR + PDC_CMD_OFFSET, cx_res_addr);
+
+	if (adreno_is_a618(adreno_dev) || adreno_is_a619(adreno_dev) ||
+			adreno_is_a650_family(adreno_dev))
+		_regwrite(cfg, PDC_GPU_TCS3_CMD0_DATA + PDC_CMD_OFFSET, 0x2);
+	else
+		_regwrite(cfg, PDC_GPU_TCS3_CMD0_DATA + PDC_CMD_OFFSET, 0x3);
+
+	_regwrite(cfg, PDC_GPU_TCS3_CMD0_MSGID + PDC_CMD_OFFSET * 2, 0x10108);
+
+	_regwrite(cfg, PDC_GPU_TCS3_CMD0_ADDR + PDC_CMD_OFFSET * 2,
+			xo_resource_addr);
+
+	_regwrite(cfg, PDC_GPU_TCS3_CMD0_DATA + PDC_CMD_OFFSET * 2, 0x3);
+
+	if (vrm_resource_addr && adreno_is_a620(adreno_dev)) {
+		_regwrite(cfg, PDC_GPU_TCS3_CMD0_MSGID + PDC_CMD_OFFSET * 3,
+				0x10108);
+		_regwrite(cfg, PDC_GPU_TCS3_CMD0_ADDR + PDC_CMD_OFFSET * 3,
+				vrm_resource_addr + 0x4);
+		_regwrite(cfg, PDC_GPU_TCS3_CMD0_DATA + PDC_CMD_OFFSET * 3,
+				0x1);
+	}
+
+done:
+	/* Setup GPU PDC */
+	_regwrite(cfg, PDC_GPU_SEQ_START_ADDR, 0);
+	_regwrite(cfg, PDC_GPU_ENABLE_PDC, PDC_ENABLE_REG_VALUE);
+
+	/* ensure no writes happen before the uCode is fully written */
+	wmb();
+	return 0;
+}
+
+/* GMU timeouts */
+#define GMU_IDLE_TIMEOUT	100	/* ms */
+#define GMU_START_TIMEOUT	100	/* ms */
+#define GPU_START_TIMEOUT	100	/* ms */
+#define GPU_RESET_TIMEOUT	1	/* ms */
+#define GPU_RESET_TIMEOUT_US	10	/* us */
+
+/*
+ * The lowest 16 bits of this value are the number of XO clock cycles
+ * for main hysteresis. This is the first hysteresis. Here we set it
+ * to 0x1680 cycles, or 300 us. The highest 16 bits of this value are
+ * the number of XO clock cycles for short hysteresis. This happens
+ * after main hysteresis. Here we set it to 0xA cycles, or 0.5 us.
+ */
+#define A6X_GMU_LONG_IFPC_HYST	FIELD_PREP(GENMASK(15, 0), 0x1680)
+#define A6X_GMU_SHORT_IFPC_HYST	FIELD_PREP(GENMASK(31, 16), 0xA)
+
+/* Minimum IFPC timer (200usec) allowed to override default value */
+#define A6X_GMU_LONG_IFPC_HYST_FLOOR	FIELD_PREP(GENMASK(15, 0), 0x0F00)
+
+/*
+ * a6xx_gmu_power_config() - Configure and enable GMU's low power mode
+ * setting based on ADRENO feature flags.
+ * @adreno_dev: Pointer to adreno device
+ */
+static void a6xx_gmu_power_config(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	/* Configure registers for idle setting. The setting is cumulative */
+
+	/* Disable GMU WB/RB buffer and caches at boot */
+	gmu_core_regwrite(device, A6XX_GMU_SYS_BUS_CONFIG, 0x1);
+	gmu_core_regwrite(device, A6XX_GMU_ICACHE_CONFIG, 0x1);
+	gmu_core_regwrite(device, A6XX_GMU_DCACHE_CONFIG, 0x1);
+
+	gmu_core_regwrite(device,
+		A6XX_GMU_PWR_COL_INTER_FRAME_CTRL,  0x9C40400);
+
+	if (gmu->idle_level == GPU_HW_IFPC) {
+		gmu_core_regwrite(device, A6XX_GMU_PWR_COL_INTER_FRAME_HYST,
+				A6X_GMU_SHORT_IFPC_HYST | adreno_dev->ifpc_hyst);
+		gmu_core_regrmw(device, A6XX_GMU_PWR_COL_INTER_FRAME_CTRL,
+				IFPC_ENABLE_MASK, IFPC_ENABLE_MASK);
+
+		gmu_core_regwrite(device, A6XX_GMU_PWR_COL_SPTPRAC_HYST,
+				A6X_GMU_SHORT_IFPC_HYST | adreno_dev->ifpc_hyst);
+		gmu_core_regrmw(device, A6XX_GMU_PWR_COL_INTER_FRAME_CTRL,
+				SPTP_ENABLE_MASK, SPTP_ENABLE_MASK);
+	}
+
+	/* Enable RPMh GPU client */
+	gmu_core_regrmw(device, A6XX_GMU_RPMH_CTRL, RPMH_ENABLE_MASK,
+		RPMH_ENABLE_MASK);
+}
+
+static void gmu_ao_sync_event(struct adreno_device *adreno_dev)
+{
+	unsigned long flags;
+	u64 ticks;
+
+	local_irq_save(flags);
+
+	/* Read GMU always on register */
+	ticks = a6xx_read_alwayson(adreno_dev);
+
+	/* Trace the GMU time to create a mapping to ftrace time */
+	trace_gmu_ao_sync(ticks);
+
+	local_irq_restore(flags);
+}
+
+void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC))
+		regulator_set_mode(pwr->cx_gdsc, REGULATOR_MODE_IDLE);
+
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC))
+		regulator_set_mode(pwr->cx_gdsc, REGULATOR_MODE_NORMAL);
+}
+
+int a6xx_gmu_device_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	u32 val = 0x00000100;
+	u32 mask = 0x000001FF;
+
+	gmu_core_reset_trace_header(&gmu->trace);
+	gmu_ao_sync_event(adreno_dev);
+
+	/* Check for 0xBABEFACE on legacy targets */
+	if (gmu->ver.core <= 0x20010004) {
+		val = 0xBABEFACE;
+		mask = 0xFFFFFFFF;
+	}
+
+	/* Bring GMU out of reset */
+	gmu_core_regwrite(device, A6XX_GMU_CM3_SYSRESET, 0);
+
+	/* Make sure the write is posted before moving ahead */
+	wmb();
+
+	if (gmu_core_timed_poll_check(device,
+			A6XX_GMU_CM3_FW_INIT_RESULT,
+			val, GMU_START_TIMEOUT, mask)) {
+
+		dev_err(&gmu->pdev->dev, "GMU doesn't boot\n");
+		gmu_core_fault_snapshot(device);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/*
+ * a6xx_gmu_hfi_start() - Write registers and start HFI.
+ * @device: Pointer to KGSL device
+ */
+int a6xx_gmu_hfi_start(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	gmu_core_regwrite(device, A6XX_GMU_HFI_CTRL_INIT, 1);
+
+	if (gmu_core_timed_poll_check(device,
+			A6XX_GMU_HFI_CTRL_STATUS,
+			BIT(0),
+			GMU_START_TIMEOUT,
+			BIT(0))) {
+		dev_err(&gmu->pdev->dev, "GMU HFI init failed\n");
+		gmu_core_fault_snapshot(device);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+int a6xx_rscc_wakeup_sequence(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct device *dev = &gmu->pdev->dev;
+	int val;
+
+	/* Skip wakeup sequence if we didn't do the sleep sequence */
+	if (!test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags))
+		return 0;
+	 /* A660 has a replacement register */
+	if (adreno_is_a662(adreno_dev) || adreno_is_a621(adreno_dev))
+		gmu_core_regread(device, A662_GPU_CC_GX_DOMAIN_MISC3, &val);
+	else if (adreno_is_a660(ADRENO_DEVICE(device)) ||
+			adreno_is_a663(adreno_dev))
+		gmu_core_regread(device, A6XX_GPU_CC_GX_DOMAIN_MISC3, &val);
+	else
+		gmu_core_regread(device, A6XX_GPU_CC_GX_DOMAIN_MISC, &val);
+
+	if (!(val & 0x1))
+		dev_info_ratelimited(&gmu->pdev->dev,
+			"GMEM CLAMP IO not set while GFX rail off\n");
+
+	/* RSC wake sequence */
+	gmu_core_regwrite(device, A6XX_GMU_RSCC_CONTROL_REQ, BIT(1));
+
+	/* Write request before polling */
+	wmb();
+
+	if (gmu_core_timed_poll_check(device,
+			A6XX_GMU_RSCC_CONTROL_ACK,
+			BIT(1),
+			GPU_START_TIMEOUT,
+			BIT(1))) {
+		dev_err(dev, "Failed to do GPU RSC power on\n");
+		return -ETIMEDOUT;
+	}
+
+	if (timed_poll_check_rscc(device,
+			A6XX_RSCC_SEQ_BUSY_DRV0,
+			0,
+			GPU_START_TIMEOUT,
+			0xFFFFFFFF))
+		goto error_rsc;
+
+	gmu_core_regwrite(device, A6XX_GMU_RSCC_CONTROL_REQ, 0);
+
+	clear_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags);
+
+	return 0;
+
+error_rsc:
+	dev_err(dev, "GPU RSC sequence stuck in waking up GPU\n");
+	return -ETIMEDOUT;
+}
+
+int a6xx_rscc_sleep_sequence(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret;
+
+	if (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))
+		return 0;
+
+	if (test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags))
+		return 0;
+
+	gmu_core_regwrite(device, A6XX_GMU_CM3_SYSRESET, 1);
+	/* Make sure M3 is in reset before going on */
+	wmb();
+
+	gmu_core_regread(device, A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_RESP,
+			&gmu->log_wptr_retention);
+
+	gmu_core_regwrite(device, A6XX_GMU_RSCC_CONTROL_REQ, 1);
+	/* Make sure the request completes before continuing */
+	wmb();
+
+	ret = timed_poll_check_rscc(device,
+			A6XX_GPU_RSCC_RSC_STATUS0_DRV0,
+			BIT(16),
+			GPU_START_TIMEOUT,
+			BIT(16));
+
+	if (ret) {
+		dev_err(&gmu->pdev->dev, "GPU RSC power off fail\n");
+		return -ETIMEDOUT;
+	}
+
+	gmu_core_regwrite(device, A6XX_GMU_RSCC_CONTROL_REQ, 0);
+
+	if (adreno_dev->lm_enabled)
+		gmu_core_regwrite(device, A6XX_GMU_AO_SPARE_CNTL, 0);
+
+	set_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags);
+
+	return 0;
+}
+
+static struct kgsl_memdesc *find_gmu_memdesc(struct a6xx_gmu_device *gmu,
+	u32 addr, u32 size)
+{
+	int i;
+
+	for (i = 0; i < gmu->global_entries; i++) {
+		struct kgsl_memdesc *md = &gmu->gmu_globals[i];
+
+		if ((addr >= md->gmuaddr) &&
+		(((addr + size) <= (md->gmuaddr + md->size))))
+			return md;
+	}
+
+	return NULL;
+}
+
+static int find_vma_block(struct a6xx_gmu_device *gmu, u32 addr, u32 size)
+{
+	int i;
+
+	for (i = 0; i < GMU_MEM_TYPE_MAX; i++) {
+		struct gmu_vma_entry *vma = &gmu->vma[i];
+
+		if ((addr >= vma->start) &&
+			((addr + size) <= (vma->start + vma->size)))
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+#define MAX_GMUFW_SIZE	0x8000	/* in bytes */
+
+static int _load_legacy_gmu_fw(struct kgsl_device *device,
+	struct a6xx_gmu_device *gmu)
+{
+	const struct firmware *fw = gmu->fw_image;
+
+	if (fw->size > MAX_GMUFW_SIZE)
+		return -EINVAL;
+
+	gmu_core_blkwrite(device, A6XX_GMU_CM3_ITCM_START, fw->data,
+			fw->size);
+
+	/* Proceed only after the FW is written */
+	wmb();
+	return 0;
+}
+
+static void load_tcm(struct adreno_device *adreno_dev, const u8 *src,
+	u32 tcm_start, u32 base, const struct gmu_block_header *blk)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 tcm_offset = tcm_start + ((blk->addr - base)/sizeof(u32));
+	void __iomem *addr = kgsl_regmap_virt(&device->regmap, tcm_offset);
+
+	memcpy_toio(addr, src, blk->size);
+}
+
+int a6xx_gmu_load_fw(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	const u8 *fw = (const u8 *)gmu->fw_image->data;
+
+	if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev))
+		return _load_legacy_gmu_fw(KGSL_DEVICE(adreno_dev), gmu);
+
+	while (fw < gmu->fw_image->data + gmu->fw_image->size) {
+		const struct gmu_block_header *blk  =
+					(const struct gmu_block_header *)fw;
+		int id;
+
+		fw += sizeof(*blk);
+
+		/* Don't deal with zero size blocks */
+		if (blk->size == 0)
+			continue;
+
+		id = find_vma_block(gmu, blk->addr, blk->size);
+
+		if (id < 0) {
+			dev_err(&gmu->pdev->dev,
+				"Unknown block in GMU FW addr:0x%x size:0x%x\n",
+				blk->addr, blk->size);
+			return -EINVAL;
+		}
+
+		if (id == GMU_ITCM) {
+			load_tcm(adreno_dev, fw,
+				A6XX_GMU_CM3_ITCM_START,
+				gmu->vma[GMU_ITCM].start, blk);
+		} else if (id == GMU_DTCM) {
+			load_tcm(adreno_dev, fw,
+				A6XX_GMU_CM3_DTCM_START,
+				gmu->vma[GMU_DTCM].start, blk);
+		} else {
+			struct kgsl_memdesc *md =
+				find_gmu_memdesc(gmu, blk->addr, blk->size);
+
+			if (!md) {
+				dev_err(&gmu->pdev->dev,
+					"No backing memory for GMU FW block addr:0x%x size:0x%x\n",
+					blk->addr, blk->size);
+				return -EINVAL;
+			}
+
+			memcpy(md->hostptr + (blk->addr - md->gmuaddr), fw,
+				blk->size);
+		}
+
+		fw += blk->size;
+	}
+
+	/* Proceed only after the FW is written */
+	wmb();
+	return 0;
+}
+
+static const char *oob_to_str(enum oob_request req)
+{
+	if (req == oob_gpu)
+		return "oob_gpu";
+	else if (req == oob_perfcntr)
+		return "oob_perfcntr";
+	else if (req == oob_boot_slumber)
+		return "oob_boot_slumber";
+	else if (req == oob_dcvs)
+		return "oob_dcvs";
+	return "unknown";
+}
+
+static void trigger_reset_recovery(struct adreno_device *adreno_dev,
+	enum oob_request req)
+{
+	/*
+	 * Trigger recovery for perfcounter oob only since only
+	 * perfcounter oob can happen alongside an actively rendering gpu.
+	 */
+	if (req != oob_perfcntr)
+		return;
+
+	if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->fault)
+		adreno_dev->dispatch_ops->fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+}
+
+int a6xx_gmu_oob_set(struct kgsl_device *device,
+		enum oob_request req)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret = 0;
+	int set, check;
+
+	if (req == oob_perfcntr && gmu->num_oob_perfcntr++)
+		return 0;
+
+	if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev)) {
+		set = BIT(req + 16);
+		check = BIT(req + 24);
+	} else {
+		/*
+		 * The legacy targets have special bits that aren't supported on
+		 * newer implementations
+		 */
+		if (req >= oob_boot_slumber) {
+			dev_err(&gmu->pdev->dev,
+				"Unsupported OOB request %s\n",
+				oob_to_str(req));
+			return -EINVAL;
+		}
+
+		set = BIT(30 - req * 2);
+		check = BIT(31 - req);
+	}
+
+	gmu_core_regwrite(device, A6XX_GMU_HOST2GMU_INTR_SET, set);
+
+	if (gmu_core_timed_poll_check(device, A6XX_GMU_GMU2HOST_INTR_INFO,
+				check, GPU_START_TIMEOUT, check)) {
+		if (req == oob_perfcntr)
+			gmu->num_oob_perfcntr--;
+		gmu_core_fault_snapshot(device);
+		ret = -ETIMEDOUT;
+		WARN(1, "OOB request %s timed out\n", oob_to_str(req));
+		trigger_reset_recovery(adreno_dev, req);
+	}
+
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, check);
+
+	trace_kgsl_gmu_oob_set(set);
+	return ret;
+}
+
+void a6xx_gmu_oob_clear(struct kgsl_device *device,
+		enum oob_request req)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int clear;
+
+	if (req == oob_perfcntr && --gmu->num_oob_perfcntr)
+		return;
+
+	if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev)) {
+		clear = BIT(req + 24);
+	} else {
+		clear = BIT(31 - req * 2);
+		if (req >= oob_boot_slumber) {
+			dev_err(&gmu->pdev->dev, "Unsupported OOB clear %s\n",
+				oob_to_str(req));
+			return;
+		}
+	}
+
+	gmu_core_regwrite(device, A6XX_GMU_HOST2GMU_INTR_SET, clear);
+	trace_kgsl_gmu_oob_clear(clear);
+}
+
+void a6xx_gmu_irq_enable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hfi *hfi = &gmu->hfi;
+
+	/* Clear pending IRQs and Unmask needed IRQs */
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, 0xffffffff);
+	gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_CLR, 0xffffffff);
+
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_MASK,
+		(unsigned int)~HFI_IRQ_MASK);
+	gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_MASK,
+		(unsigned int)~GMU_AO_INT_MASK);
+
+
+	/* Enable all IRQs on host */
+	enable_irq(hfi->irq);
+	enable_irq(gmu->irq);
+}
+
+void a6xx_gmu_irq_disable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hfi *hfi = &gmu->hfi;
+
+	/* Disable all IRQs on host */
+	disable_irq(gmu->irq);
+	disable_irq(hfi->irq);
+
+	/* Mask all IRQs and clear pending IRQs */
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_MASK, 0xffffffff);
+	gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_MASK, 0xffffffff);
+
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, 0xffffffff);
+	gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_CLR, 0xffffffff);
+
+}
+
+static int a6xx_gmu_hfi_start_msg(struct adreno_device *adreno_dev)
+{
+	struct hfi_start_cmd req;
+
+	/*
+	 * This HFI was not supported in legacy firmware and this quirk
+	 * serves as a better means to identify targets that depend on
+	 * legacy firmware.
+	 */
+	if (!ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG)) {
+		int ret;
+
+		ret = CMD_MSG_HDR(req, H2F_MSG_START);
+		if (ret)
+			return ret;
+
+		return a6xx_hfi_send_generic_req(adreno_dev, &req, sizeof(req));
+	}
+
+	return 0;
+
+}
+
+#define FREQ_VOTE(idx, ack) (((idx) & 0xFF) | (((ack) & 0xF) << 28))
+#define BW_VOTE(idx) ((((idx) & 0xFFF) << 12) | ((idx) & 0xFFF))
+
+#define CLKSET_OPTION_ATLEAST 3
+
+/*
+ * a6xx_gmu_dcvs_nohfi() - request GMU to do DCVS without using HFI
+ * @device: Pointer to KGSL device
+ * @perf_idx: Index into GPU performance level table defined in
+ *	HFI DCVS table message
+ * @bw_idx: Index into GPU b/w table defined in HFI b/w table message
+ *
+ */
+static int a6xx_gmu_dcvs_nohfi(struct kgsl_device *device,
+		unsigned int perf_idx, unsigned int bw_idx)
+{
+	int ret;
+
+	gmu_core_regwrite(device, A6XX_GMU_DCVS_ACK_OPTION, DCVS_ACK_NONBLOCK);
+
+	gmu_core_regwrite(device, A6XX_GMU_DCVS_PERF_SETTING,
+			FREQ_VOTE(perf_idx, CLKSET_OPTION_ATLEAST));
+
+	gmu_core_regwrite(device, A6XX_GMU_DCVS_BW_SETTING, BW_VOTE(bw_idx));
+
+	ret = a6xx_gmu_oob_set(device, oob_dcvs);
+	if (ret == 0)
+		gmu_core_regread(device, A6XX_GMU_DCVS_RETURN, &ret);
+
+	a6xx_gmu_oob_clear(device, oob_dcvs);
+
+	return ret;
+}
+
+static u32 a6xx_rscc_tcsm_drv0_status_reglist[] = {
+	A6XX_RSCC_TCS0_DRV0_STATUS,
+	A6XX_RSCC_TCS1_DRV0_STATUS,
+	A6XX_RSCC_TCS2_DRV0_STATUS,
+	A6XX_RSCC_TCS3_DRV0_STATUS,
+	A6XX_RSCC_TCS4_DRV0_STATUS,
+	A6XX_RSCC_TCS5_DRV0_STATUS,
+	A6XX_RSCC_TCS6_DRV0_STATUS,
+	A6XX_RSCC_TCS7_DRV0_STATUS,
+	A6XX_RSCC_TCS8_DRV0_STATUS,
+	A6XX_RSCC_TCS9_DRV0_STATUS,
+};
+
+static int a6xx_complete_rpmh_votes(struct adreno_device *adreno_dev,
+		unsigned int timeout)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	/* Number of TCS commands are increased to 10 from A650 family onwards */
+	int count = adreno_is_a650_family(adreno_dev) ?
+				ARRAY_SIZE(a6xx_rscc_tcsm_drv0_status_reglist) : 4;
+	int i, ret = 0;
+
+	for (i = 0; i < count; i++)
+		ret |= timed_poll_check_rscc(device, a6xx_rscc_tcsm_drv0_status_reglist[i],
+				BIT(0), timeout, BIT(0));
+
+	if (ret)
+		dev_err(device->dev, "RPMH votes timedout: %d\n", ret);
+
+	return ret;
+}
+
+#define SPTPRAC_CTRL_TIMEOUT		10 /* ms */
+
+/*
+ * a6xx_gmu_sptprac_enable() - Power on SPTPRAC
+ * @adreno_dev: Pointer to Adreno device
+ */
+int a6xx_gmu_sptprac_enable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	/* Only certain targets have sptprac */
+	if (!adreno_is_a630(adreno_dev) && !adreno_is_a615_family(adreno_dev))
+		return 0;
+
+	if (test_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, &adreno_dev->priv))
+		return 0;
+
+	/* GMU enabled a630 and a615 targets */
+	gmu_core_regwrite(device, A6XX_GMU_GX_SPTPRAC_POWER_CONTROL,
+			SPTPRAC_POWERON_CTRL_MASK);
+
+	if (gmu_core_timed_poll_check(device,
+			A6XX_GMU_SPTPRAC_PWR_CLK_STATUS,
+			SPTPRAC_POWERON_STATUS_MASK,
+			SPTPRAC_CTRL_TIMEOUT,
+			SPTPRAC_POWERON_STATUS_MASK)) {
+		dev_err(&gmu->pdev->dev, "power on SPTPRAC fail\n");
+		gmu_core_fault_snapshot(device);
+		return -ETIMEDOUT;
+	}
+
+	set_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, &adreno_dev->priv);
+	return 0;
+}
+
+/*
+ * a6xx_gmu_sptprac_disable() - Power of SPTPRAC
+ * @adreno_dev: Pointer to Adreno device
+ */
+void a6xx_gmu_sptprac_disable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	/* Only certain targets have sptprac */
+	if (!adreno_is_a630(adreno_dev) && !adreno_is_a615_family(adreno_dev))
+		return;
+
+	if (!test_and_clear_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED,
+		&adreno_dev->priv))
+		return;
+
+	/* GMU enabled a630 and a615 targets */
+
+	/* Ensure that retention is on */
+	gmu_core_regrmw(device, A6XX_GPU_CC_GX_GDSCR, 0,
+			A6XX_RETAIN_FF_ENABLE_ENABLE_MASK);
+
+	gmu_core_regwrite(device, A6XX_GMU_GX_SPTPRAC_POWER_CONTROL,
+			SPTPRAC_POWEROFF_CTRL_MASK);
+
+	if (gmu_core_timed_poll_check(device,
+			A6XX_GMU_SPTPRAC_PWR_CLK_STATUS,
+			SPTPRAC_POWEROFF_STATUS_MASK,
+			SPTPRAC_CTRL_TIMEOUT,
+			SPTPRAC_POWEROFF_STATUS_MASK))
+		dev_err(&gmu->pdev->dev, "power off SPTPRAC fail\n");
+}
+
+#define SPTPRAC_POWER_OFF	BIT(2)
+#define SP_CLK_OFF		BIT(4)
+#define GX_GDSC_POWER_OFF	BIT(6)
+#define GX_CLK_OFF		BIT(7)
+#define is_on(val)		(!(val & (GX_GDSC_POWER_OFF | GX_CLK_OFF)))
+
+bool a6xx_gmu_gx_is_on(struct adreno_device *adreno_dev)
+{
+	unsigned int val;
+
+	gmu_core_regread(KGSL_DEVICE(adreno_dev),
+			A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, &val);
+	return is_on(val);
+}
+
+bool a619_holi_gx_is_on(struct adreno_device *adreno_dev)
+{
+	unsigned int val;
+
+	gmu_core_regread(KGSL_DEVICE(adreno_dev),
+			A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, &val);
+	return is_on(val);
+}
+
+/*
+ * a6xx_gmu_sptprac_is_on() - Check if SPTP is on using pwr status register
+ * @adreno_dev - Pointer to adreno_device
+ * This check should only be performed if the keepalive bit is set or it
+ * can be guaranteed that the power state of the GPU will remain unchanged
+ */
+bool a6xx_gmu_sptprac_is_on(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int val;
+
+	if (!adreno_is_a630(adreno_dev) && !adreno_is_a615_family(adreno_dev))
+		return true;
+
+	if (adreno_is_a619_holi(adreno_dev))
+		kgsl_regread(device,
+			A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, &val);
+	else
+		gmu_core_regread(device, A6XX_GMU_SPTPRAC_PWR_CLK_STATUS,
+			&val);
+
+	return !(val & (SPTPRAC_POWER_OFF | SP_CLK_OFF));
+}
+
+/*
+ * a6xx_gmu_gfx_rail_on() - request GMU to power GPU at given OPP.
+ * @device: Pointer to KGSL device
+ *
+ */
+static int a6xx_gmu_gfx_rail_on(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	u32 perf_idx = gmu->hfi.dcvs_table.gpu_level_num -
+		pwr->default_pwrlevel - 1;
+	u32 default_opp = gmu->hfi.dcvs_table.gx_votes[perf_idx].vote;
+
+	gmu_core_regwrite(device, A6XX_GMU_BOOT_SLUMBER_OPTION,
+			OOB_BOOT_OPTION);
+	gmu_core_regwrite(device, A6XX_GMU_GX_VOTE_IDX,
+			ARC_VOTE_GET_PRI(default_opp));
+	gmu_core_regwrite(device, A6XX_GMU_MX_VOTE_IDX,
+			ARC_VOTE_GET_SEC(default_opp));
+
+	a6xx_rdpm_mx_freq_update(gmu,
+			gmu->hfi.dcvs_table.gx_votes[perf_idx].freq);
+
+	return a6xx_gmu_oob_set(device, oob_boot_slumber);
+}
+
+static bool idle_trandition_complete(unsigned int idle_level,
+	unsigned int gmu_power_reg,
+	unsigned int sptprac_clk_reg)
+{
+	if (idle_level != gmu_power_reg)
+		return false;
+
+	if (idle_level == GPU_HW_IFPC && is_on(sptprac_clk_reg))
+		return false;
+
+	return true;
+}
+
+static const char *idle_level_name(int level)
+{
+	if (level == GPU_HW_ACTIVE)
+		return "GPU_HW_ACTIVE";
+	else if (level == GPU_HW_IFPC)
+		return "GPU_HW_IFPC";
+
+	return "";
+}
+
+int a6xx_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	unsigned int reg, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8;
+	unsigned long t;
+	uint64_t ts1, ts2, ts3;
+
+	ts1 = a6xx_read_alwayson(adreno_dev);
+
+	t = jiffies + msecs_to_jiffies(GMU_IDLE_TIMEOUT);
+	do {
+		gmu_core_regread(device,
+			A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE, &reg);
+		gmu_core_regread(device,
+			A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, &reg1);
+
+		if (idle_trandition_complete(gmu->idle_level, reg, reg1))
+			return 0;
+		/* Wait 100us to reduce unnecessary AHB bus traffic */
+		usleep_range(10, 100);
+	} while (!time_after(jiffies, t));
+
+	ts2 = a6xx_read_alwayson(adreno_dev);
+	/* Check one last time */
+
+	gmu_core_regread(device, A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE, &reg);
+	gmu_core_regread(device, A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, &reg1);
+
+	if (idle_trandition_complete(gmu->idle_level, reg, reg1))
+		return 0;
+
+	ts3 = a6xx_read_alwayson(adreno_dev);
+
+	/* Collect abort data to help with debugging */
+	gmu_core_regread(device, A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, &reg2);
+	gmu_core_regread(device, A6XX_GMU_RBBM_INT_UNMASKED_STATUS, &reg3);
+	gmu_core_regread(device, A6XX_GMU_GMU_PWR_COL_KEEPALIVE, &reg4);
+	gmu_core_regread(device, A6XX_GMU_AO_SPARE_CNTL, &reg5);
+
+	dev_err(&gmu->pdev->dev,
+		"----------------------[ GMU error ]----------------------\n");
+	dev_err(&gmu->pdev->dev,
+		"Timeout waiting for lowest idle level %s\n",
+		idle_level_name(gmu->idle_level));
+	dev_err(&gmu->pdev->dev, "Start: %llx (absolute ticks)\n", ts1);
+	dev_err(&gmu->pdev->dev, "Poll: %llx (ticks relative to start)\n",
+		ts2-ts1);
+	dev_err(&gmu->pdev->dev, "Retry: %llx (ticks relative to poll)\n",
+		ts3-ts2);
+	dev_err(&gmu->pdev->dev,
+		"RPMH_POWER_STATE=%x SPTPRAC_PWR_CLK_STATUS=%x\n", reg, reg1);
+	dev_err(&gmu->pdev->dev, "CX_BUSY_STATUS=%x\n", reg2);
+	dev_err(&gmu->pdev->dev,
+		"RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n",
+		reg3, reg4);
+	dev_err(&gmu->pdev->dev, "A6XX_GMU_AO_SPARE_CNTL=%x\n", reg5);
+
+	if (adreno_is_a660(adreno_dev)) {
+		u32 val;
+
+		gmu_core_regread(device, A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE, &val);
+		dev_err(&gmu->pdev->dev, "PWR_COL_PREEMPT_KEEPALIVE=%x\n", val);
+	}
+
+	/* Access GX registers only when GX is ON */
+	if (is_on(reg1)) {
+		kgsl_regread(device, A6XX_CP_STATUS_1, &reg6);
+		kgsl_regread(device, A6XX_CP_CP2GMU_STATUS, &reg7);
+		kgsl_regread(device, A6XX_CP_CONTEXT_SWITCH_CNTL, &reg8);
+
+		dev_err(&gmu->pdev->dev, "A6XX_CP_STATUS_1=%x\n", reg6);
+		dev_err(&gmu->pdev->dev,
+			"CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n",
+			reg7, reg8);
+	}
+
+	WARN_ON(1);
+	gmu_core_fault_snapshot(device);
+	return -ETIMEDOUT;
+}
+
+/* Bitmask for GPU idle status check */
+#define CXGXCPUBUSYIGNAHB	BIT(30)
+int a6xx_gmu_wait_for_idle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	unsigned int status2;
+	uint64_t ts1;
+
+	ts1 = a6xx_read_alwayson(adreno_dev);
+	if (gmu_core_timed_poll_check(device, A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS,
+			0, GMU_START_TIMEOUT, CXGXCPUBUSYIGNAHB)) {
+		gmu_core_regread(device,
+				A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2, &status2);
+		dev_err(&gmu->pdev->dev,
+				"GMU not idling: status2=0x%x %llx %llx\n",
+				status2, ts1,
+				a6xx_read_alwayson(ADRENO_DEVICE(device)));
+		gmu_core_fault_snapshot(device);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/* A6xx GMU FENCE RANGE MASK */
+#define GMU_FENCE_RANGE_MASK	((0x1 << 31) | ((0xA << 2) << 18) | (0x8A0))
+
+void a6xx_gmu_version_info(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	/* GMU version info is at a fixed offset in the DTCM */
+	gmu_core_regread(device, A6XX_GMU_CM3_DTCM_START + 0xFF8,
+				&gmu->ver.core);
+	gmu_core_regread(device, A6XX_GMU_CM3_DTCM_START + 0xFF9,
+				&gmu->ver.core_dev);
+	gmu_core_regread(device, A6XX_GMU_CM3_DTCM_START + 0xFFA,
+				&gmu->ver.pwr);
+	gmu_core_regread(device, A6XX_GMU_CM3_DTCM_START + 0xFFB,
+				&gmu->ver.pwr_dev);
+	gmu_core_regread(device, A6XX_GMU_CM3_DTCM_START + 0xFFC,
+				&gmu->ver.hfi);
+}
+
+int a6xx_gmu_itcm_shadow(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	u32 i, *dest;
+
+	if (gmu->itcm_shadow)
+		return 0;
+
+	gmu->itcm_shadow = vzalloc(gmu->vma[GMU_ITCM].size);
+	if (!gmu->itcm_shadow)
+		return -ENOMEM;
+
+	dest = (u32 *)gmu->itcm_shadow;
+
+	/* FIXME: use bulk read? */
+	for (i = 0; i < (gmu->vma[GMU_ITCM].size >> 2); i++)
+		gmu_core_regread(KGSL_DEVICE(adreno_dev),
+			A6XX_GMU_CM3_ITCM_START + i, dest++);
+
+	return 0;
+}
+
+static void a6xx_gmu_enable_throttle_counters(
+	struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 val;
+
+	if (!(adreno_dev->lm_enabled || adreno_dev->bcl_enabled))
+		return;
+
+	if (adreno_dev->lm_enabled) {
+		/*
+		 * For LM throttling -
+		 * XOCLK1: countable: 0x10
+		 * XOCLK2: countable: 0x16 for newer hardware / 0x15 for others
+		 * XOCLK3: countable: 0xf for newer hardware / 0x19 for others
+		 *
+		 * POWER_CONTROL_SELECT_0 controls counters 0 - 3, each selector
+		 * is 8 bits wide.
+		 */
+
+		if (adreno_is_a620(adreno_dev) || adreno_is_a650(adreno_dev))
+			val = (0x10 << 8) | (0x16 << 16) | (0x0f << 24);
+		else
+			val = (0x10 << 8) | (0x15 << 16) | (0x19 << 24);
+	} else {
+		/*
+		 * When LM is not enabled, we can enable BCL throttling -
+		 * XOCLK1: countable: 0x13 (25% throttle)
+		 * XOCLK2: countable: 0x17 (58% throttle)
+		 * XOCLK3: countable: 0x19 (75% throttle)
+		 *
+		 * POWER_CONTROL_SELECT_0 controls counters 0 - 3, each selector
+		 * is 8 bits wide.
+		 */
+		val = (0x13 << 8) | (0x17 << 16) | (0x19 << 24);
+	}
+	/* Make sure not to write over XOCLK0 */
+	gmu_core_regrmw(device, A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0,
+		0xffffff00, val);
+
+	gmu_core_regwrite(device, A6XX_GMU_AO_SPARE_CNTL, 1);
+}
+
+void a6xx_gmu_register_config(struct adreno_device *adreno_dev)
+{
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 gmu_log_info, chipid = 0;
+
+	/* Clear any previously set cm3 fault */
+	atomic_set(&gmu->cm3_fault, 0);
+
+	/* Vote veto for FAL10 feature if supported*/
+	if (a6xx_core->veto_fal10) {
+		gmu_core_regwrite(device,
+			A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1);
+		gmu_core_regwrite(device, A6XX_GPU_GMU_CX_GMU_CX_FAL_INTF, 0x1);
+	}
+
+	/* Turn on TCM retention */
+	gmu_core_regwrite(device, A6XX_GMU_GENERAL_7, 1);
+
+	/* Clear init result to make sure we are getting fresh value */
+	gmu_core_regwrite(device, A6XX_GMU_CM3_FW_INIT_RESULT, 0);
+	gmu_core_regwrite(device, A6XX_GMU_CM3_BOOT_CONFIG, 0x2);
+
+	gmu_core_regwrite(device, A6XX_GMU_HFI_QTBL_ADDR,
+			gmu->hfi.hfi_mem->gmuaddr);
+	gmu_core_regwrite(device, A6XX_GMU_HFI_QTBL_INFO, 1);
+
+	/*
+	 * For A6xx GMUAO interrupt line BIT[1] is combined for ipcc
+	 * and doorbell. Enable dbdWakeupEn interrupt for GMU to receive
+	 * IPC interrupt.
+	 */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LSR))
+		gmu_core_regwrite(device, A6XX_GMU_AO_INTERRUPT_EN, BIT(1));
+
+	gmu_core_regwrite(device, A6XX_GMU_AHB_FENCE_RANGE_0,
+			GMU_FENCE_RANGE_MASK);
+
+	/*
+	 * Make sure that CM3 state is at reset value. Snapshot is changing
+	 * NMI bit and if we boot up GMU with NMI bit set GMU will boot
+	 * straight in to NMI handler without executing __main code
+	 */
+	gmu_core_regwrite(device, A6XX_GMU_CM3_CFG, 0x4052);
+
+	/**
+	 * We may have asserted gbif halt as part of reset sequence which may
+	 * not get cleared if the gdsc was not reset. So clear it before
+	 * attempting GMU boot.
+	 */
+	if (!adreno_is_a630(adreno_dev))
+		kgsl_regwrite(device, A6XX_GBIF_HALT, 0x0);
+
+	/* Set vrb address before starting GMU */
+	if (!IS_ERR_OR_NULL(gmu->vrb))
+		gmu_core_regwrite(device, A6XX_GMU_GENERAL_11, gmu->vrb->gmuaddr);
+
+	/* Set the log wptr index */
+	gmu_core_regwrite(device, A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_RESP,
+			gmu->log_wptr_retention);
+
+	/* Pass chipid to GMU FW, must happen before starting GMU */
+	chipid = ADRENO_GMU_CHIPID(adreno_dev->chipid);
+
+	/*
+	 * For A660 GPU variant, GMU firmware expects chipid as per below
+	 * format to differentiate between A660 and A660 variant. In device
+	 * tree, target version is specified as high nibble of patch to align
+	 * with usermode driver expectation. Format the chipid according to
+	 * firmware requirement.
+	 *
+	 * Bit 11-8: patch version
+	 * Bit 15-12: minor version
+	 * Bit 23-16: major version
+	 * Bit 27-24: core version
+	 * Bit 31-28: target version
+	 */
+	if (adreno_is_a660_shima(adreno_dev))
+		chipid |= ((ADRENO_CHIPID_PATCH(adreno_dev->chipid) >> 4) << 28);
+
+	gmu_core_regwrite(device, A6XX_GMU_HFI_SFR_ADDR, chipid);
+
+	/* Log size is encoded in (number of 4K units - 1) */
+	gmu_log_info = (gmu->gmu_log->gmuaddr & 0xFFFFF000) |
+		((GMU_LOG_SIZE/SZ_4K - 1) & 0xFF);
+	gmu_core_regwrite(device, A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_MSG,
+			gmu_log_info);
+
+	/* Configure power control and bring the GMU out of reset */
+	a6xx_gmu_power_config(adreno_dev);
+
+	a6xx_gmu_enable_throttle_counters(adreno_dev);
+}
+
+struct kgsl_memdesc *reserve_gmu_kernel_block(struct a6xx_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, u32 align)
+{
+	int ret;
+	struct kgsl_memdesc *md;
+	struct gmu_vma_entry *vma = &gmu->vma[vma_id];
+	struct kgsl_device *device = KGSL_DEVICE(a6xx_gmu_to_adreno(gmu));
+	u32 aligned_size = ALIGN(size, hfi_get_gmu_sz_alignment(align));
+
+	if (gmu->global_entries == ARRAY_SIZE(gmu->gmu_globals))
+		return ERR_PTR(-ENOMEM);
+
+	md = &gmu->gmu_globals[gmu->global_entries];
+
+	ret = kgsl_allocate_kernel(device, md, size, 0, KGSL_MEMDESC_SYSMEM);
+	if (ret) {
+		memset(md, 0x0, sizeof(*md));
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (!addr)
+		addr = ALIGN(vma->next_va, hfi_get_gmu_va_alignment(align));
+
+	ret = gmu_core_map_memdesc(gmu->domain, md, addr,
+		IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV);
+	if (ret) {
+		dev_err(&gmu->pdev->dev,
+			"Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n",
+			addr, md->size, ret);
+		kgsl_sharedmem_free(md);
+		memset(md, 0, sizeof(*md));
+		return ERR_PTR(-ENOMEM);
+	}
+
+	md->gmuaddr = addr;
+
+	/* Take into account the size alignment when reserving the GMU VA */
+	vma->next_va = md->gmuaddr + aligned_size;
+
+	gmu->global_entries++;
+
+	return md;
+}
+
+struct kgsl_memdesc *reserve_gmu_kernel_block_fixed(struct a6xx_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, const char *resource, int attrs, u32 align)
+{
+	int ret;
+	struct kgsl_memdesc *md;
+	struct gmu_vma_entry *vma = &gmu->vma[vma_id];
+	struct kgsl_device *device = KGSL_DEVICE(a6xx_gmu_to_adreno(gmu));
+	u32 aligned_size = ALIGN(size, hfi_get_gmu_sz_alignment(align));
+
+	if (gmu->global_entries == ARRAY_SIZE(gmu->gmu_globals))
+		return ERR_PTR(-ENOMEM);
+
+	md = &gmu->gmu_globals[gmu->global_entries];
+
+	ret = kgsl_memdesc_init_fixed(device, gmu->pdev, resource, md);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (!addr)
+		addr = ALIGN(vma->next_va, hfi_get_gmu_va_alignment(align));
+
+	if ((vma->next_va + aligned_size) > (vma->start + vma->size)) {
+		dev_err(&gmu->pdev->dev,
+			"GMU mapping too big. available: %d required: %d\n",
+			vma->next_va - vma->start, aligned_size);
+			md =  ERR_PTR(-ENOMEM);
+			goto done;
+	}
+
+	ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs);
+	if (ret) {
+		dev_err(&gmu->pdev->dev,
+			"Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n",
+			addr, md->size, ret);
+		md =  ERR_PTR(-ENOMEM);
+		goto done;
+
+	}
+
+	md->gmuaddr = addr;
+	/* Take into account the size alignment when reserving the GMU VA */
+	vma->next_va = md->gmuaddr + aligned_size;
+	gmu->global_entries++;
+done:
+	sg_free_table(md->sgt);
+	kfree(md->sgt);
+	md->sgt = NULL;
+	return md;
+}
+
+static int reserve_entire_vma(struct a6xx_gmu_device *gmu, u32 vma_id)
+{
+	struct kgsl_memdesc *md;
+	u32 start = gmu->vma[vma_id].start, size = gmu->vma[vma_id].size;
+
+	md = find_gmu_memdesc(gmu, start, size);
+	if (md)
+		return 0;
+
+	md = reserve_gmu_kernel_block(gmu, start, size, vma_id, 0);
+
+	return PTR_ERR_OR_ZERO(md);
+}
+
+static int a6xx_gmu_cache_finalize(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_memdesc *md;
+	int ret;
+
+	/* Preallocations were made so no need to request all this memory */
+	if (gmu->preallocations)
+		return 0;
+
+	ret = reserve_entire_vma(gmu, GMU_ICACHE);
+	if (ret)
+		return ret;
+
+	if (!adreno_is_a650_family(adreno_dev)) {
+		ret = reserve_entire_vma(gmu, GMU_DCACHE);
+		if (ret)
+			return ret;
+	}
+
+	md = reserve_gmu_kernel_block(gmu, 0, SZ_4K, GMU_NONCACHED_KERNEL, 0);
+	if (IS_ERR(md))
+		return PTR_ERR(md);
+
+	gmu->preallocations = true;
+
+	return 0;
+}
+
+static int a6xx_gmu_process_prealloc(struct a6xx_gmu_device *gmu,
+	struct gmu_block_header *blk)
+{
+	struct kgsl_memdesc *md;
+
+	int id = find_vma_block(gmu, blk->addr, blk->value);
+
+	if (id < 0) {
+		dev_err(&gmu->pdev->dev,
+			"Invalid prealloc block addr: 0x%x value:%d\n",
+			blk->addr, blk->value);
+		return id;
+	}
+
+	/* Nothing to do for TCM blocks or user uncached */
+	if (id == GMU_ITCM || id == GMU_DTCM || id == GMU_NONCACHED_USER)
+		return 0;
+
+	/* Check if the block is already allocated */
+	md = find_gmu_memdesc(gmu, blk->addr, blk->value);
+	if (md != NULL)
+		return 0;
+
+	md = reserve_gmu_kernel_block(gmu, blk->addr, blk->value, id, 0);
+	if (IS_ERR(md))
+		return PTR_ERR(md);
+
+	gmu->preallocations = true;
+
+	return 0;
+}
+
+int a6xx_gmu_parse_fw(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	struct gmu_block_header *blk;
+	int ret, offset = 0;
+
+	/* GMU fw already saved and verified so do nothing new */
+	if (!gmu->fw_image) {
+
+		if (a6xx_core->gmufw_name == NULL)
+			return -EINVAL;
+
+		ret = request_firmware(&gmu->fw_image, a6xx_core->gmufw_name,
+				&gmu->pdev->dev);
+		if (ret) {
+			dev_err(&gmu->pdev->dev, "request_firmware (%s) failed: %d\n",
+					a6xx_core->gmufw_name, ret);
+			return ret;
+		}
+	}
+
+	/*
+	 * Zero payload fw blocks contain metadata and are
+	 * guaranteed to precede fw load data. Parse the
+	 * metadata blocks.
+	 */
+	while (offset < gmu->fw_image->size) {
+		blk = (struct gmu_block_header *)&gmu->fw_image->data[offset];
+
+		if (offset + sizeof(*blk) > gmu->fw_image->size) {
+			dev_err(&gmu->pdev->dev, "Invalid FW Block\n");
+			return -EINVAL;
+		}
+
+		/* Done with zero length blocks so return */
+		if (blk->size)
+			break;
+
+		offset += sizeof(*blk);
+
+		if (blk->type == GMU_BLK_TYPE_PREALLOC_REQ ||
+				blk->type == GMU_BLK_TYPE_PREALLOC_PERSIST_REQ) {
+			ret = a6xx_gmu_process_prealloc(gmu, blk);
+
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+int a6xx_gmu_memory_init(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	/* Allocates & maps GMU crash dump memory */
+	if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev)) {
+		if (IS_ERR_OR_NULL(gmu->dump_mem))
+			gmu->dump_mem = reserve_gmu_kernel_block(gmu, 0, SZ_16K,
+					GMU_NONCACHED_KERNEL, 0);
+		if (IS_ERR(gmu->dump_mem))
+			return PTR_ERR(gmu->dump_mem);
+	}
+
+	/* GMU master log */
+	if (IS_ERR_OR_NULL(gmu->gmu_log))
+		gmu->gmu_log = reserve_gmu_kernel_block(gmu, 0, GMU_LOG_SIZE,
+				GMU_NONCACHED_KERNEL, 0);
+
+	return PTR_ERR_OR_ZERO(gmu->gmu_log);
+}
+
+static int a6xx_gmu_init(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = a6xx_gmu_parse_fw(adreno_dev);
+	if (ret)
+		return ret;
+
+	 /* Request any other cache ranges that might be required */
+	ret = a6xx_gmu_cache_finalize(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_gmu_memory_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	return a6xx_hfi_init(adreno_dev);
+}
+
+#define A6XX_VBIF_XIN_HALT_CTRL1_ACKS   (BIT(0) | BIT(1) | BIT(2) | BIT(3))
+
+static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev)
+{
+	int ret = 0;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	/* If SPTP_RAC is on, turn off SPTP_RAC HS */
+	a6xx_gmu_sptprac_disable(adreno_dev);
+
+	/* Disconnect GPU from BUS is not needed if CX GDSC goes off later */
+
+	/*
+	 * GEMNOC can enter power collapse state during GPU power down sequence.
+	 * This could abort CX GDSC collapse. Assert Qactive to avoid this.
+	 */
+	if ((adreno_is_a662(adreno_dev) || adreno_is_a621(adreno_dev) ||
+			adreno_is_a635(adreno_dev)))
+		gmu_core_regwrite(device, A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1);
+
+	/* Check no outstanding RPMh voting */
+	a6xx_complete_rpmh_votes(adreno_dev, GPU_RESET_TIMEOUT);
+
+	/* Clear the WRITEDROPPED fields and set fence to allow mode */
+	gmu_core_regwrite(device, A6XX_GMU_AHB_FENCE_STATUS_CLR, 0x7);
+	gmu_core_regwrite(device, A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
+
+	/* Make sure above writes are committed before we proceed to recovery */
+	wmb();
+
+	gmu_core_regwrite(device, A6XX_GMU_CM3_SYSRESET, 1);
+
+	if (!adreno_is_a630(adreno_dev)) {
+		/* Halt GX traffic */
+		if (a6xx_gmu_gx_is_on(adreno_dev)) {
+			kgsl_regwrite(device, A6XX_RBBM_GBIF_HALT,
+				A6XX_GBIF_GX_HALT_MASK);
+			adreno_wait_for_halt_ack(device,
+					A6XX_RBBM_GBIF_HALT_ACK,
+					A6XX_GBIF_GX_HALT_MASK);
+		}
+		/* Halt CX traffic */
+		a6xx_halt_gbif(adreno_dev);
+		/* De-assert the halts */
+		kgsl_regwrite(device, A6XX_GBIF_HALT, 0x0);
+	}
+
+	if (a6xx_gmu_gx_is_on(adreno_dev))
+		kgsl_regwrite(device, A6XX_RBBM_SW_RESET_CMD, 0x1);
+
+	/* Make sure above writes are posted before turning off power resources */
+	wmb();
+
+	/* Allow the software reset to complete */
+	udelay(100);
+
+	/*
+	 * This is based on the assumption that GMU is the only one controlling
+	 * the GX HS. This code path is the only client voting for GX through
+	 * the regulator interface.
+	 */
+	if (pwr->gx_gdsc) {
+		if (a6xx_gmu_gx_is_on(adreno_dev)) {
+			/* Switch gx gdsc control from GMU to CPU
+			 * force non-zero reference count in clk driver
+			 * so next disable call will turn
+			 * off the GDSC
+			 */
+			ret = regulator_enable(pwr->gx_gdsc);
+			if (ret)
+				dev_err(&gmu->pdev->dev,
+					"suspend fail: gx enable %d\n", ret);
+
+			/*
+			 * Toggle the loop_en bit, across disabling the gx gdsc,
+			 * with a delay of 10 XO cycles before disabling gx
+			 * gdsc. This is to prevent CPR measurements from
+			 * failing.
+			 */
+			if (adreno_is_a660(adreno_dev)) {
+				gmu_core_regrmw(device, A6XX_GPU_CPR_FSM_CTL,
+					1, 0);
+				ndelay(520);
+			}
+
+			ret = regulator_disable(pwr->gx_gdsc);
+			if (ret)
+				dev_err(&gmu->pdev->dev,
+					"suspend fail: gx disable %d\n", ret);
+
+			if (adreno_is_a660(adreno_dev))
+				gmu_core_regrmw(device, A6XX_GPU_CPR_FSM_CTL,
+					1, 1);
+
+			if (a6xx_gmu_gx_is_on(adreno_dev))
+				dev_err(&gmu->pdev->dev,
+					"gx is stuck on\n");
+		}
+	}
+}
+
+/*
+ * a6xx_gmu_notify_slumber() - initiate request to GMU to prepare to slumber
+ * @device: Pointer to KGSL device
+ */
+static int a6xx_gmu_notify_slumber(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int bus_level = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq;
+	int perf_idx = gmu->hfi.dcvs_table.gpu_level_num -
+			pwr->default_pwrlevel - 1;
+	int ret, state;
+
+	/* Disable the power counter so that the GMU is not busy */
+	gmu_core_regwrite(device, A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0);
+
+	/* Turn off SPTPRAC if we own it */
+	if (gmu->idle_level == GPU_HW_ACTIVE)
+		a6xx_gmu_sptprac_disable(adreno_dev);
+
+	if (!ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG)) {
+		struct hfi_prep_slumber_cmd req = {
+			.freq = perf_idx,
+			.bw = bus_level,
+		};
+
+		ret = CMD_MSG_HDR(req, H2F_MSG_PREPARE_SLUMBER);
+		if (!ret)
+			ret = a6xx_hfi_send_generic_req(adreno_dev, &req, sizeof(req));
+
+		goto out;
+	}
+
+	gmu_core_regwrite(device, A6XX_GMU_BOOT_SLUMBER_OPTION,
+			OOB_SLUMBER_OPTION);
+	gmu_core_regwrite(device, A6XX_GMU_GX_VOTE_IDX, perf_idx);
+	gmu_core_regwrite(device, A6XX_GMU_MX_VOTE_IDX, bus_level);
+
+	ret = a6xx_gmu_oob_set(device, oob_boot_slumber);
+	a6xx_gmu_oob_clear(device, oob_boot_slumber);
+
+	if (!ret) {
+		gmu_core_regread(device,
+			A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE, &state);
+		if (state != GPU_HW_SLUMBER) {
+			dev_err(&gmu->pdev->dev,
+					"Failed to prepare for slumber: 0x%x\n",
+					state);
+			ret = -ETIMEDOUT;
+		}
+	}
+
+out:
+	/* Make sure the fence is in ALLOW mode */
+	gmu_core_regwrite(device, A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
+
+	/*
+	 * GEMNOC can enter power collapse state during GPU power down sequence.
+	 * This could abort CX GDSC collapse. Assert Qactive to avoid this.
+	 */
+	if ((adreno_is_a662(adreno_dev) || adreno_is_a621(adreno_dev) ||
+			adreno_is_a635(adreno_dev)))
+		gmu_core_regwrite(device, A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1);
+
+	return ret;
+}
+
+void a6xx_gmu_suspend(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	a6xx_gmu_pwrctrl_suspend(adreno_dev);
+
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+	a6xx_gmu_disable_gdsc(adreno_dev);
+
+	a6xx_rdpm_cx_freq_update(gmu, 0);
+
+	dev_err(&gmu->pdev->dev, "Suspended GMU\n");
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE);
+}
+
+static int a6xx_gmu_dcvs_set(struct adreno_device *adreno_dev,
+		int gpu_pwrlevel, int bus_level)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table;
+	struct hfi_gx_bw_perf_vote_cmd req = {
+		.ack_type = DCVS_ACK_BLOCK,
+		.freq = INVALID_DCVS_IDX,
+		.bw = INVALID_DCVS_IDX,
+	};
+	int ret = 0;
+
+	if (!test_bit(GMU_PRIV_HFI_STARTED, &gmu->flags))
+		return 0;
+
+	/* Do not set to XO and lower GPU clock vote from GMU */
+	if ((gpu_pwrlevel != INVALID_DCVS_IDX) &&
+			(gpu_pwrlevel >= table->gpu_level_num - 1))
+		return -EINVAL;
+
+	if (gpu_pwrlevel < table->gpu_level_num - 1)
+		req.freq = table->gpu_level_num - gpu_pwrlevel - 1;
+
+	if (bus_level < pwr->ddr_table_count && bus_level > 0)
+		req.bw = bus_level;
+
+	/* GMU will vote for slumber levels through the sleep sequence */
+	if ((req.freq == INVALID_DCVS_IDX) &&
+		(req.bw == INVALID_DCVS_IDX)) {
+		return 0;
+	}
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_GX_BW_PERF_VOTE);
+	if (ret)
+		return ret;
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG))
+		ret = a6xx_gmu_dcvs_nohfi(device, req.freq, req.bw);
+	else
+		ret = a6xx_hfi_send_generic_req(adreno_dev, &req, sizeof(req));
+
+	if (ret) {
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"Failed to set GPU perf idx %u, bw idx %u\n",
+			req.freq, req.bw);
+
+		/*
+		 * If this was a dcvs request along side an active gpu, request
+		 * dispatcher based reset and recovery.
+		 */
+		if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+			adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT |
+				ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	}
+
+	if (req.freq != INVALID_DCVS_IDX)
+		a6xx_rdpm_mx_freq_update(gmu,
+			gmu->hfi.dcvs_table.gx_votes[req.freq].freq);
+
+	return ret;
+}
+
+static int a6xx_gmu_clock_set(struct adreno_device *adreno_dev, u32 pwrlevel)
+{
+	return a6xx_gmu_dcvs_set(adreno_dev, pwrlevel, INVALID_DCVS_IDX);
+}
+
+static int a6xx_gmu_ifpc_store(struct kgsl_device *device,
+		unsigned int val)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	unsigned int requested_idle_level;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_IFPC))
+		return -EINVAL;
+
+	if (val)
+		requested_idle_level = GPU_HW_IFPC;
+	else
+		requested_idle_level = GPU_HW_ACTIVE;
+
+	if (gmu->idle_level == requested_idle_level)
+		return 0;
+
+	/* Power down the GPU before changing the idle level */
+	return adreno_power_cycle_u32(adreno_dev, &gmu->idle_level,
+		requested_idle_level);
+}
+
+static unsigned int a6xx_gmu_ifpc_isenabled(struct kgsl_device *device)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(ADRENO_DEVICE(device));
+
+	return gmu->idle_level == GPU_HW_IFPC;
+}
+
+/* Send an NMI to the GMU */
+void a6xx_gmu_send_nmi(struct kgsl_device *device, bool force)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	u32 val;
+
+	/*
+	 * Do not send NMI if the SMMU is stalled because GMU will not be able
+	 * to save cm3 state to DDR.
+	 */
+	if (a6xx_gmu_gx_is_on(adreno_dev) && adreno_smmu_is_stalled(adreno_dev)) {
+		dev_err(&gmu->pdev->dev,
+			"Skipping NMI because SMMU is stalled\n");
+		return;
+	}
+
+	if (force)
+		goto nmi;
+
+	/*
+	 * We should not send NMI if there was a CM3 fault reported because we
+	 * don't want to overwrite the critical CM3 state captured by gmu before
+	 * it sent the CM3 fault interrupt. Also don't send NMI if GMU reset is
+	 * already active. We could have hit a GMU assert and NMI might have
+	 * already been triggered.
+	 */
+
+	/* make sure we're reading the latest cm3_fault */
+	smp_rmb();
+
+	if (atomic_read(&gmu->cm3_fault))
+		return;
+
+	gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &val);
+
+	if (val & 0xE00)
+		return;
+
+nmi:
+	/* Mask so there's no interrupt caused by NMI */
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_MASK, 0xFFFFFFFF);
+
+	/* Make sure the interrupt is masked before causing it */
+	wmb();
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG))
+		gmu_core_regwrite(device,
+			A6XX_GMU_NMI_CONTROL_STATUS, 0);
+
+	/* This will cause the GMU to save it's internal state to ddr */
+	gmu_core_regread(device, A6XX_GMU_CM3_CFG, &val);
+	val |=  BIT(9);
+	gmu_core_regwrite(device, A6XX_GMU_CM3_CFG, val);
+
+	/* Make sure the NMI is invoked before we proceed*/
+	wmb();
+
+	/* Wait for the NMI to be handled */
+	udelay(200);
+}
+
+static void a6xx_gmu_cooperative_reset(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	unsigned int result;
+
+	gmu_core_regwrite(device, A6XX_GMU_CX_GMU_WDOG_CTRL, 0);
+	gmu_core_regwrite(device, A6XX_GMU_HOST2GMU_INTR_SET, BIT(17));
+
+	/*
+	 * After triggering graceful death wait for snapshot ready
+	 * indication from GMU.
+	 */
+	if (!gmu_core_timed_poll_check(device, A6XX_GMU_CM3_FW_INIT_RESULT,
+				0x800, 2, 0x800))
+		return;
+
+	gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &result);
+	dev_err(&gmu->pdev->dev,
+		"GMU cooperative reset timed out 0x%x\n", result);
+	/*
+	 * If we dont get a snapshot ready from GMU, trigger NMI
+	 * and if we still timeout then we just continue with reset.
+	 */
+	a6xx_gmu_send_nmi(device, true);
+
+	gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &result);
+	if ((result & 0x800) != 0x800)
+		dev_err(&gmu->pdev->dev,
+			"GMU cooperative reset NMI timed out 0x%x\n", result);
+}
+
+static int a6xx_gmu_wait_for_active_transition(
+	struct kgsl_device *device)
+{
+	unsigned int reg;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(ADRENO_DEVICE(device));
+
+	if (!gmu_core_isenabled(device))
+		return 0;
+
+	if (gmu_core_timed_poll_check(device, A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE,
+			GPU_HW_ACTIVE, 100, GENMASK(3, 0))) {
+		gmu_core_regread(device, A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE, &reg);
+		dev_err(&gmu->pdev->dev,
+			"GMU failed to move to ACTIVE state, Current state: 0x%x\n",
+			reg);
+
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static bool a6xx_gmu_scales_bandwidth(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	return (ADRENO_GPUREV(adreno_dev) >= ADRENO_REV_A640);
+}
+
+void a6xx_gmu_handle_watchdog(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 mask;
+
+	/* Temporarily mask the watchdog interrupt to prevent a storm */
+	gmu_core_regread(device, A6XX_GMU_AO_HOST_INTERRUPT_MASK,
+		&mask);
+	gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_MASK,
+			(mask | GMU_INT_WDOG_BITE));
+
+	a6xx_gmu_send_nmi(device, false);
+
+	dev_err_ratelimited(&gmu->pdev->dev,
+			"GMU watchdog expired interrupt received\n");
+}
+
+static irqreturn_t a6xx_gmu_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	const struct a6xx_gpudev *a6xx_gpudev =
+		to_a6xx_gpudev(ADRENO_GPU_DEVICE(adreno_dev));
+	unsigned int status = 0;
+
+	gmu_core_regread(device, A6XX_GMU_AO_HOST_INTERRUPT_STATUS, &status);
+	gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_CLR, status);
+
+	/* Ignore GMU_INT_RSCC_COMP and GMU_INT_DBD WAKEUP interrupts */
+	if (status & GMU_INT_WDOG_BITE)
+		a6xx_gpudev->handle_watchdog(adreno_dev);
+	if (status & GMU_INT_HOST_AHB_BUS_ERR)
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"AHB bus error interrupt received\n");
+	if (status & GMU_INT_FENCE_ERR) {
+		unsigned int fence_status;
+
+		gmu_core_regread(device, A6XX_GMU_AHB_FENCE_STATUS,
+			&fence_status);
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"FENCE error interrupt received %x\n", fence_status);
+	}
+
+	if (status & ~GMU_AO_INT_MASK)
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"Unhandled GMU interrupts 0x%lx\n",
+				status & ~GMU_AO_INT_MASK);
+
+	return IRQ_HANDLED;
+}
+
+void a6xx_gmu_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	a6xx_gmu_device_snapshot(device, snapshot);
+
+	a6xx_snapshot(adreno_dev, snapshot);
+
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR,
+		0xffffffff);
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_MASK,
+		HFI_IRQ_MASK);
+
+}
+
+void a6xx_gmu_aop_send_acd_state(struct a6xx_gmu_device *gmu, bool flag)
+{
+	struct qmp_pkt msg;
+	char msg_buf[36];
+	u32 size;
+	int ret;
+
+	if (IS_ERR_OR_NULL(gmu->mailbox.channel))
+		return;
+
+	size = scnprintf(msg_buf, sizeof(msg_buf),
+			"{class: gpu, res: acd, val: %d}", flag);
+
+	/* mailbox controller expects 4-byte aligned buffer */
+	msg.size = ALIGN((size + 1), SZ_4);
+	msg.data = msg_buf;
+
+	ret = mbox_send_message(gmu->mailbox.channel, &msg);
+
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+			"AOP mbox send message failed: %d\n", ret);
+}
+
+int a6xx_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	a6xx_rdpm_cx_freq_update(gmu, gmu->freqs[level] / 1000);
+
+	ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk",
+			gmu->freqs[level]);
+	if (ret) {
+		dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n",
+			gmu->freqs[level], ret);
+		return ret;
+	}
+
+	ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "hub_clk",
+			adreno_dev->gmu_hub_clk_freq);
+	if (ret && ret != -ENODEV) {
+		dev_err(&gmu->pdev->dev, "Unable to set the HUB clock\n");
+		return ret;
+	}
+
+	ret = clk_bulk_prepare_enable(gmu->num_clks, gmu->clks);
+	if (ret) {
+		dev_err(&gmu->pdev->dev, "Cannot enable GMU clocks\n");
+		return ret;
+	}
+
+	device->state = KGSL_STATE_AWARE;
+
+	return 0;
+}
+
+static void a6xx_gmu_force_first_boot(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	u32 val = 0;
+
+	if (gmu->pdc_cfg_base) {
+		kgsl_pwrctrl_enable_cx_gdsc(device);
+		a6xx_gmu_enable_clks(adreno_dev, 0);
+
+		val = __raw_readl(gmu->pdc_cfg_base + (PDC_GPU_ENABLE_PDC << 2));
+
+		/* ensure this read operation is done before the next one */
+		rmb();
+
+		clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+		a6xx_gmu_disable_gdsc(adreno_dev);
+		a6xx_rdpm_cx_freq_update(gmu, 0);
+	}
+
+	if (val != PDC_ENABLE_REG_VALUE) {
+		clear_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags);
+		clear_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags);
+	}
+}
+
+static int a6xx_gmu_first_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int level, ret;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE);
+
+	a6xx_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled);
+
+	ret = kgsl_pwrctrl_enable_cx_gdsc(device);
+	if (ret)
+		return ret;
+
+	ret = a6xx_gmu_enable_clks(adreno_dev, 0);
+	if (ret)
+		goto gdsc_off;
+
+	ret = a6xx_gmu_load_fw(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = a6xx_gmu_itcm_shadow(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	a6xx_gmu_register_config(adreno_dev);
+
+	a6xx_gmu_version_info(adreno_dev);
+
+	a6xx_gmu_irq_enable(adreno_dev);
+
+	/* Vote for minimal DDR BW for GMU to init */
+	level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min;
+	icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level]));
+
+	/* Clear any GPU faults that might have been left over */
+	adreno_clear_gpu_fault(adreno_dev);
+
+	ret = a6xx_gmu_device_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG)) {
+		ret = a6xx_gmu_gfx_rail_on(adreno_dev);
+		if (ret) {
+			a6xx_gmu_oob_clear(device, oob_boot_slumber);
+			goto err;
+		}
+	}
+
+	if (gmu->idle_level == GPU_HW_ACTIVE) {
+		ret = a6xx_gmu_sptprac_enable(adreno_dev);
+		if (ret)
+			goto err;
+	}
+
+	if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) {
+		ret = a6xx_load_pdc_ucode(adreno_dev);
+		if (ret)
+			goto err;
+
+		a6xx_load_rsc_ucode(adreno_dev);
+		set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags);
+	}
+
+	ret = a6xx_gmu_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = a6xx_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	icc_set_bw(pwr->icc_path, 0, 0);
+
+	device->gmu_fault = false;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+
+	return 0;
+
+err:
+	a6xx_gmu_irq_disable(adreno_dev);
+
+	if (device->gmu_fault) {
+		a6xx_gmu_suspend(adreno_dev);
+		return ret;
+	}
+
+clks_gdsc_off:
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+gdsc_off:
+	a6xx_gmu_disable_gdsc(adreno_dev);
+
+	a6xx_rdpm_cx_freq_update(gmu, 0);
+
+	return ret;
+}
+
+static int a6xx_gmu_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret = 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE);
+
+	ret = kgsl_pwrctrl_enable_cx_gdsc(device);
+	if (ret)
+		return ret;
+
+	ret = a6xx_gmu_enable_clks(adreno_dev, 0);
+	if (ret)
+		goto gdsc_off;
+
+	ret = a6xx_rscc_wakeup_sequence(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = a6xx_gmu_load_fw(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	a6xx_gmu_register_config(adreno_dev);
+
+	a6xx_gmu_irq_enable(adreno_dev);
+
+	/* Clear any GPU faults that might have been left over */
+	adreno_clear_gpu_fault(adreno_dev);
+
+	ret = a6xx_gmu_device_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG)) {
+		ret = a6xx_gmu_gfx_rail_on(adreno_dev);
+		if (ret) {
+			a6xx_gmu_oob_clear(device, oob_boot_slumber);
+			goto err;
+		}
+	}
+
+	if (gmu->idle_level == GPU_HW_ACTIVE) {
+		ret = a6xx_gmu_sptprac_enable(adreno_dev);
+		if (ret)
+			goto err;
+	}
+
+	ret = a6xx_gmu_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = a6xx_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	device->gmu_fault = false;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+
+	return 0;
+
+err:
+	a6xx_gmu_irq_disable(adreno_dev);
+
+	if (device->gmu_fault) {
+		a6xx_gmu_suspend(adreno_dev);
+		return ret;
+	}
+
+clks_gdsc_off:
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+gdsc_off:
+	a6xx_gmu_disable_gdsc(adreno_dev);
+
+	a6xx_rdpm_cx_freq_update(gmu, 0);
+
+	return ret;
+}
+
+static void set_acd(struct adreno_device *adreno_dev, void *priv)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	adreno_dev->acd_enabled = *((bool *)priv);
+	a6xx_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled);
+}
+
+static int a6xx_gmu_acd_set(struct kgsl_device *device, bool val)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	if (IS_ERR_OR_NULL(gmu->mailbox.channel))
+		return -EINVAL;
+
+	/* Don't do any unneeded work if ACD is already in the correct state */
+	if (adreno_dev->acd_enabled == val)
+		return 0;
+
+	/* Power cycle the GPU for changes to take effect */
+	return adreno_power_cycle(adreno_dev, set_acd, &val);
+}
+
+static void a6xx_send_tlb_hint(struct kgsl_device *device, bool val)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	if (!gmu->domain)
+		return;
+
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+	qcom_skip_tlb_management(&gmu->pdev->dev, val);
+#endif
+	if (!val)
+		iommu_flush_iotlb_all(gmu->domain);
+}
+
+static const struct gmu_dev_ops a6xx_gmudev = {
+	.oob_set = a6xx_gmu_oob_set,
+	.oob_clear = a6xx_gmu_oob_clear,
+	.ifpc_store = a6xx_gmu_ifpc_store,
+	.ifpc_isenabled = a6xx_gmu_ifpc_isenabled,
+	.cooperative_reset = a6xx_gmu_cooperative_reset,
+	.wait_for_active_transition = a6xx_gmu_wait_for_active_transition,
+	.scales_bandwidth = a6xx_gmu_scales_bandwidth,
+	.acd_set = a6xx_gmu_acd_set,
+	.force_first_boot = a6xx_gmu_force_first_boot,
+	.send_nmi = a6xx_gmu_send_nmi,
+	.send_tlb_hint = a6xx_send_tlb_hint,
+};
+
+static int a6xx_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel,
+	u32 ab)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int ret = 0;
+
+	kgsl_icc_set_tag(pwr, buslevel);
+
+	if (buslevel != pwr->cur_buslevel) {
+		ret = a6xx_gmu_dcvs_set(adreno_dev, INVALID_DCVS_IDX, buslevel);
+		if (ret)
+			return ret;
+
+		pwr->cur_buslevel = buslevel;
+	}
+
+	if (ab != pwr->cur_ab) {
+		icc_set_bw(pwr->icc_path, MBps_to_icc(ab), 0);
+		pwr->cur_ab = ab;
+	}
+
+	trace_kgsl_buslevel(device, pwr->active_pwrlevel, pwr->cur_buslevel, pwr->cur_ab);
+	return ret;
+}
+
+static void a6xx_free_gmu_globals(struct a6xx_gmu_device *gmu)
+{
+	int i;
+
+	for (i = 0; i < gmu->global_entries && i < ARRAY_SIZE(gmu->gmu_globals); i++) {
+		struct kgsl_memdesc *md = &gmu->gmu_globals[i];
+
+		if (!md->gmuaddr)
+			continue;
+
+		iommu_unmap(gmu->domain, md->gmuaddr, md->size);
+
+		if (md->priv & KGSL_MEMDESC_SYSMEM)
+			kgsl_sharedmem_free(md);
+
+		memset(md, 0, sizeof(*md));
+	}
+
+	if (gmu->domain) {
+		iommu_detach_device(gmu->domain, &gmu->pdev->dev);
+		iommu_domain_free(gmu->domain);
+		gmu->domain = NULL;
+	}
+
+	gmu->global_entries = 0;
+}
+
+static int a6xx_gmu_aop_mailbox_init(struct adreno_device *adreno_dev,
+		struct a6xx_gmu_device *gmu)
+{
+	struct kgsl_mailbox *mailbox = &gmu->mailbox;
+
+	mailbox->client.dev = &gmu->pdev->dev;
+	mailbox->client.tx_block = true;
+	mailbox->client.tx_tout = 1000;
+	mailbox->client.knows_txdone = false;
+
+	mailbox->channel = mbox_request_channel(&mailbox->client, 0);
+	if (IS_ERR(mailbox->channel))
+		return PTR_ERR(mailbox->channel);
+
+	adreno_dev->acd_enabled = true;
+	return 0;
+}
+
+static void a6xx_gmu_acd_probe(struct kgsl_device *device,
+		struct a6xx_gmu_device *gmu, struct device_node *node)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwrlevel *pwrlevel =
+			&pwr->pwrlevels[pwr->num_pwrlevels - 1];
+	struct hfi_acd_table_cmd *cmd = &gmu->hfi.acd_table;
+	int ret, i, cmd_idx = 0;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_ACD))
+		return;
+
+	cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ACD_TBL, HFI_MSG_CMD);
+
+	cmd->version = 1;
+	cmd->stride = 1;
+	cmd->enable_by_level = 0;
+
+	/*
+	 * Iterate through each gpu power level and generate a mask for GMU
+	 * firmware for ACD enabled levels and store the corresponding control
+	 * register configurations to the acd_table structure.
+	 */
+	for (i = 0; i < pwr->num_pwrlevels; i++) {
+		if (pwrlevel->acd_level) {
+			cmd->enable_by_level |= (1 << (i + 1));
+			cmd->data[cmd_idx++] = pwrlevel->acd_level;
+		}
+		pwrlevel--;
+	}
+
+	if (!cmd->enable_by_level)
+		return;
+
+	cmd->num_levels = cmd_idx;
+
+	ret = a6xx_gmu_aop_mailbox_init(adreno_dev, gmu);
+	if (ret)
+		dev_err(&gmu->pdev->dev,
+			"AOP mailbox init failed: %d\n", ret);
+}
+
+static int a6xx_gmu_reg_probe(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret;
+
+	ret = kgsl_regmap_add_region(&device->regmap, gmu->pdev,
+		"kgsl_gmu_reg", NULL, NULL);
+	if (ret)
+		dev_err(&gmu->pdev->dev, "Unable to map the GMU registers\n");
+
+	return ret;
+}
+
+static int a6xx_gmu_clk_probe(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret, i;
+	int tbl_size;
+	int num_freqs;
+	int offset;
+
+	ret = devm_clk_bulk_get_all(&gmu->pdev->dev, &gmu->clks);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Voting for apb_pclk will enable power and clocks required for
+	 * QDSS path to function. However, if QCOM_KGSL_QDSS_STM is not enabled,
+	 * QDSS is essentially unusable. Hence, if QDSS cannot be used,
+	 * don't vote for this clock.
+	 */
+	if (!IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) {
+		for (i = 0; i < ret; i++) {
+			if (!strcmp(gmu->clks[i].id, "apb_pclk")) {
+				gmu->clks[i].clk = NULL;
+				break;
+			}
+		}
+	}
+
+	gmu->num_clks = ret;
+
+	/* Read the optional list of GMU frequencies */
+	if (of_get_property(gmu->pdev->dev.of_node,
+		"qcom,gmu-freq-table", &tbl_size) == NULL)
+		goto default_gmu_freq;
+
+	num_freqs = (tbl_size / sizeof(u32)) / 2;
+	if (num_freqs != ARRAY_SIZE(gmu->freqs))
+		goto default_gmu_freq;
+
+	for (i = 0; i < num_freqs; i++) {
+		offset = i * 2;
+		ret = of_property_read_u32_index(gmu->pdev->dev.of_node,
+			"qcom,gmu-freq-table", offset, &gmu->freqs[i]);
+		if (ret)
+			goto default_gmu_freq;
+		ret = of_property_read_u32_index(gmu->pdev->dev.of_node,
+			"qcom,gmu-freq-table", offset + 1, &gmu->vlvls[i]);
+		if (ret)
+			goto default_gmu_freq;
+	}
+	return 0;
+
+default_gmu_freq:
+	/* The GMU frequency table is missing or invalid. Go with a default */
+	gmu->freqs[0] = GMU_FREQ_MIN;
+	gmu->vlvls[0] = RPMH_REGULATOR_LEVEL_MIN_SVS;
+	gmu->freqs[1] = GMU_FREQ_MAX;
+	gmu->vlvls[1] = RPMH_REGULATOR_LEVEL_SVS;
+
+	if (adreno_is_a660(adreno_dev))
+		gmu->vlvls[0] = RPMH_REGULATOR_LEVEL_LOW_SVS;
+
+	return 0;
+}
+
+static void a6xx_gmu_rdpm_probe(struct a6xx_gmu_device *gmu,
+		struct kgsl_device *device)
+{
+	struct resource *res;
+
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+			"rdpm_cx");
+	if (res)
+		gmu->rdpm_cx_virt = devm_ioremap(&device->pdev->dev,
+				res->start, resource_size(res));
+
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+			"rdpm_mx");
+	if (res)
+		gmu->rdpm_mx_virt = devm_ioremap(&device->pdev->dev,
+				res->start, resource_size(res));
+}
+
+void a6xx_gmu_remove(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	if (!IS_ERR_OR_NULL(gmu->mailbox.channel))
+		mbox_free_channel(gmu->mailbox.channel);
+
+	adreno_dev->acd_enabled = false;
+
+	if (gmu->fw_image)
+		release_firmware(gmu->fw_image);
+
+	a6xx_free_gmu_globals(gmu);
+
+	vfree(gmu->itcm_shadow);
+
+	kobject_put(&gmu->log_kobj);
+	kobject_put(&gmu->stats_kobj);
+}
+
+static int a6xx_gmu_iommu_fault_handler(struct iommu_domain *domain,
+		struct device *dev, unsigned long addr, int flags, void *token)
+{
+	char *fault_type = "unknown";
+
+	if (flags & IOMMU_FAULT_TRANSLATION)
+		fault_type = "translation";
+	else if (flags & IOMMU_FAULT_PERMISSION)
+		fault_type = "permission";
+	else if (flags & IOMMU_FAULT_EXTERNAL)
+		fault_type = "external";
+	else if (flags & IOMMU_FAULT_TRANSACTION_STALLED)
+		fault_type = "transaction stalled";
+
+	dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n",
+			addr,
+			(flags & IOMMU_FAULT_WRITE) ? "write" : "read",
+			fault_type);
+
+	return 0;
+}
+
+static int a6xx_gmu_iommu_init(struct a6xx_gmu_device *gmu)
+{
+	int ret;
+
+	gmu->domain = iommu_domain_alloc(&platform_bus_type);
+	if (gmu->domain == NULL) {
+		dev_err(&gmu->pdev->dev, "Unable to allocate GMU IOMMU domain\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * Disable stall on fault for the GMU context bank.
+	 * This sets SCTLR.CFCFG = 0.
+	 * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default.
+	 */
+	qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL);
+
+	ret = iommu_attach_device(gmu->domain, &gmu->pdev->dev);
+	if (!ret) {
+		iommu_set_fault_handler(gmu->domain,
+			a6xx_gmu_iommu_fault_handler, gmu);
+		return 0;
+	}
+
+	dev_err(&gmu->pdev->dev,
+		"Unable to attach GMU IOMMU domain: %d\n", ret);
+	iommu_domain_free(gmu->domain);
+	gmu->domain = NULL;
+
+	return ret;
+}
+
+int a6xx_gmu_probe(struct kgsl_device *device,
+		struct platform_device *pdev)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	int ret;
+
+	gmu->pdev = pdev;
+
+	dma_set_coherent_mask(&gmu->pdev->dev, DMA_BIT_MASK(64));
+	gmu->pdev->dev.dma_mask = &gmu->pdev->dev.coherent_dma_mask;
+	set_dma_ops(&gmu->pdev->dev, NULL);
+
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+						"rscc");
+	if (res) {
+		gmu->rscc_virt = devm_ioremap(&device->pdev->dev, res->start,
+						resource_size(res));
+		if (gmu->rscc_virt == NULL) {
+			dev_err(&gmu->pdev->dev, "rscc ioremap failed\n");
+			return -ENOMEM;
+		}
+	}
+
+	/* Setup any rdpm register ranges */
+	a6xx_gmu_rdpm_probe(gmu, device);
+
+	/* Set up GMU regulators */
+	ret = kgsl_pwrctrl_probe_regulators(device, pdev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_gmu_clk_probe(adreno_dev);
+	if (ret < 0)
+		return ret;
+
+	/* Set up GMU IOMMU and shared memory with GMU */
+	ret = a6xx_gmu_iommu_init(gmu);
+	if (ret)
+		goto error;
+
+	if (adreno_is_a650_family(adreno_dev))
+		gmu->vma = a6xx_gmu_vma;
+	else
+		gmu->vma = a6xx_gmu_vma_legacy;
+
+	/* Map and reserve GMU CSRs registers */
+	ret = a6xx_gmu_reg_probe(adreno_dev);
+	if (ret)
+		goto error;
+
+	/* Populates RPMh configurations */
+	ret = a6xx_build_rpmh_tables(adreno_dev);
+	if (ret)
+		goto error;
+
+	/* Set up GMU idle state */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) {
+		gmu->idle_level = GPU_HW_IFPC;
+		adreno_dev->ifpc_hyst = A6X_GMU_LONG_IFPC_HYST;
+		adreno_dev->ifpc_hyst_floor = A6X_GMU_LONG_IFPC_HYST_FLOOR;
+	} else {
+		gmu->idle_level = GPU_HW_ACTIVE;
+	}
+
+	a6xx_gmu_acd_probe(device, gmu, pdev->dev.of_node);
+
+	set_bit(GMU_ENABLED, &device->gmu_core.flags);
+
+	/* Initialize to zero to detect trace packet loss */
+	gmu->trace.seq_num = 0;
+
+	device->gmu_core.dev_ops = &a6xx_gmudev;
+
+	/* Set default GMU attributes */
+	gmu->log_stream_enable = false;
+	gmu->log_group_mask = 0x3;
+
+	/* Disabled by default */
+	gmu->stats_enable = false;
+	/* Set default to CM3 busy cycles countable */
+	gmu->stats_mask = BIT(A6XX_GMU_CM3_BUSY_CYCLES);
+	/* Interval is in 50 us units. Set default sampling frequency to 4x50 us */
+	gmu->stats_interval = HFI_FEATURE_GMU_STATS_INTERVAL;
+
+	/* GMU sysfs nodes setup */
+	(void) kobject_init_and_add(&gmu->log_kobj, &log_kobj_type, &dev->kobj, "log");
+	(void) kobject_init_and_add(&gmu->stats_kobj, &stats_kobj_type, &dev->kobj, "stats");
+
+	of_property_read_u32(gmu->pdev->dev.of_node, "qcom,gmu-perf-ddr-bw",
+		&gmu->perf_ddr_bw);
+
+	gmu->irq = kgsl_request_irq(gmu->pdev, "kgsl_gmu_irq",
+		a6xx_gmu_irq_handler, device);
+
+	if (gmu->irq >= 0)
+		return 0;
+
+	ret = gmu->irq;
+
+error:
+	a6xx_gmu_remove(device);
+	return ret;
+}
+
+static void a6xx_gmu_active_count_put(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return;
+
+	if (WARN(atomic_read(&device->active_cnt) == 0,
+		"Unbalanced get/put calls to KGSL active count\n"))
+		return;
+
+	if (atomic_dec_and_test(&device->active_cnt)) {
+		kgsl_pwrscale_update_stats(device);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+	}
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	wake_up(&device->active_cnt_wq);
+}
+
+int a6xx_halt_gbif(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	/* Halt new client requests */
+	kgsl_regwrite(device, A6XX_GBIF_HALT, A6XX_GBIF_CLIENT_HALT_MASK);
+	ret = adreno_wait_for_halt_ack(device,
+		A6XX_GBIF_HALT_ACK, A6XX_GBIF_CLIENT_HALT_MASK);
+
+	/* Halt all AXI requests */
+	kgsl_regwrite(device, A6XX_GBIF_HALT, A6XX_GBIF_ARB_HALT_MASK);
+	ret = adreno_wait_for_halt_ack(device,
+		A6XX_GBIF_HALT_ACK, A6XX_GBIF_ARB_HALT_MASK);
+
+	return ret;
+}
+
+#define RPMH_VOTE_TIMEOUT		2 /* ms */
+
+static int a6xx_gmu_power_off(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+
+	if (device->gmu_fault)
+		goto error;
+
+	/* Wait for the lowest idle level we requested */
+	ret = a6xx_gmu_wait_for_lowest_idle(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = a6xx_complete_rpmh_votes(adreno_dev, RPMH_VOTE_TIMEOUT);
+	if (ret)
+		goto error;
+
+	ret = a6xx_gmu_notify_slumber(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = a6xx_gmu_wait_for_idle(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = a6xx_rscc_sleep_sequence(adreno_dev);
+
+	a6xx_rdpm_mx_freq_update(gmu, 0);
+
+	/* Now that we are done with GMU and GPU, Clear the GBIF */
+	if (!adreno_is_a630(adreno_dev)) {
+		ret = a6xx_halt_gbif(adreno_dev);
+		/* De-assert the halts */
+		kgsl_regwrite(device, A6XX_GBIF_HALT, 0x0);
+	}
+
+	a6xx_gmu_irq_disable(adreno_dev);
+
+	a6xx_hfi_stop(adreno_dev);
+
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+	a6xx_gmu_disable_gdsc(adreno_dev);
+
+	a6xx_rdpm_cx_freq_update(gmu, 0);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE);
+
+	return ret;
+
+error:
+	a6xx_gmu_irq_disable(adreno_dev);
+	a6xx_hfi_stop(adreno_dev);
+	a6xx_gmu_suspend(adreno_dev);
+
+	return ret;
+}
+
+void a6xx_enable_gpu_irq(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_pwrctrl_irq(device, true);
+
+	adreno_irqctrl(adreno_dev, 1);
+}
+
+void a6xx_disable_gpu_irq(struct adreno_device *adreno_dev)
+{
+	kgsl_pwrctrl_irq(KGSL_DEVICE(adreno_dev), false);
+
+	if (a6xx_gmu_gx_is_on(adreno_dev))
+		adreno_irqctrl(adreno_dev, 0);
+
+}
+
+static void a6xx_fusa_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	void __iomem *fusa_virt = NULL;
+	struct resource *res;
+
+	if (!adreno_is_a663(adreno_dev))
+		return;
+
+	res = platform_get_resource_byname(device->pdev,
+			IORESOURCE_MEM, "fusa");
+	if (res)
+		fusa_virt = ioremap(res->start, resource_size(res));
+
+	if (!fusa_virt) {
+		dev_err(device->dev, "Failed to map fusa\n");
+		return;
+	}
+
+	/* Disable fusa mode in boot stage */
+	_regrmw(fusa_virt, A6XX_GPU_FUSA_REG_ECC_CTRL - A6XX_GPU_FUSA_REG_BASE,
+			A6XX_GPU_FUSA_DISABLE_MASK, A6XX_GPU_FUSA_DISABLE_BITS);
+	_regrmw(fusa_virt, A6XX_GPU_FUSA_REG_CSR_PRIY - A6XX_GPU_FUSA_REG_BASE,
+			A6XX_GPU_FUSA_DISABLE_MASK, A6XX_GPU_FUSA_DISABLE_BITS);
+
+	iounmap(fusa_virt);
+}
+
+static int a6xx_gpu_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	ret = kgsl_mmu_start(device);
+	if (ret)
+		goto err;
+
+	ret = a6xx_gmu_oob_set(device, oob_gpu);
+	if (ret)
+		goto oob_clear;
+
+	ret = a6xx_gmu_hfi_start_msg(adreno_dev);
+	if (ret)
+		goto oob_clear;
+
+	/* Clear the busy_data stats - we're starting over from scratch */
+	memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data));
+
+	/* Restore performance counter registers with saved values */
+	adreno_perfcounter_restore(adreno_dev);
+
+	a6xx_start(adreno_dev);
+
+	/* Re-initialize the coresight registers if applicable */
+	adreno_coresight_start(adreno_dev);
+
+	adreno_perfcounter_start(adreno_dev);
+
+	/* Clear FSR here in case it is set from a previous pagefault */
+	kgsl_mmu_clear_fsr(&device->mmu);
+
+	a6xx_enable_gpu_irq(adreno_dev);
+
+	ret = a6xx_rb_start(adreno_dev);
+	if (ret) {
+		a6xx_disable_gpu_irq(adreno_dev);
+		goto oob_clear;
+	}
+
+	/*
+	 * At this point it is safe to assume that we recovered. Setting
+	 * this field allows us to take a new snapshot for the next failure
+	 * if we are prioritizing the first unrecoverable snapshot.
+	 */
+	if (device->snapshot)
+		device->snapshot->recovered = true;
+
+	/* Start the dispatcher */
+	adreno_dispatcher_start(device);
+
+	device->reset_counter++;
+
+	a6xx_gmu_oob_clear(device, oob_gpu);
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG))
+		gmu_core_dev_oob_clear(device, oob_boot_slumber);
+
+	return 0;
+
+oob_clear:
+	a6xx_gmu_oob_clear(device, oob_gpu);
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG))
+		gmu_core_dev_oob_clear(device, oob_boot_slumber);
+
+err:
+	a6xx_gmu_power_off(adreno_dev);
+
+	return ret;
+}
+
+static void gmu_idle_timer(struct timer_list *t)
+{
+	struct kgsl_device *device = container_of(t, struct kgsl_device,
+					idle_timer);
+
+	kgsl_schedule_work(&device->idle_check_ws);
+}
+
+static int a6xx_boot(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (WARN_ON(test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	if (IS_ENABLED(CONFIG_QCOM_KGSL_HIBERNATION) &&
+		!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags))
+		ret = a6xx_gmu_first_boot(adreno_dev);
+	else
+		ret = a6xx_gmu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_start_idle_timer(device);
+
+	kgsl_pwrscale_wake(device);
+
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	return ret;
+}
+
+static int a6xx_first_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) {
+		if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+			return a6xx_boot(adreno_dev);
+
+		return 0;
+	}
+
+	KGSL_BOOT_MARKER("ADRENO Init");
+
+	ret = a6xx_ringbuffer_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_gmu_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = a6xx_gmu_first_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	a6xx_fusa_init(adreno_dev);
+
+	ret = a6xx_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	adreno_get_bus_counters(adreno_dev);
+
+	adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev,
+						 ADRENO_COOP_RESET);
+
+	adreno_create_profile_buffer(adreno_dev);
+
+	set_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags);
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	/*
+	 * BCL needs respective Central Broadcast register to
+	 * be programed from TZ. This programing happens only
+	 * when zap shader firmware load is successful. Zap firmware
+	 * load can fail in boot up path hence enable BCL only after we
+	 * successfully complete first boot to ensure that Central
+	 * Broadcast register was programed before enabling BCL.
+	 */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL))
+		adreno_dev->bcl_enabled = true;
+
+	/*
+	 * There is a possible deadlock scenario during kgsl firmware reading
+	 * (request_firmware) and devfreq update calls. During first boot, kgsl
+	 * device mutex is held and then request_firmware is called for reading
+	 * firmware. request_firmware internally takes dev_pm_qos_mtx lock.
+	 * Whereas in case of devfreq update calls triggered by thermal/bcl or
+	 * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then
+	 * tries to take kgsl device mutex as part of get_dev_status/target
+	 * calls. This results in deadlock when both thread are unable to acquire
+	 * the mutex held by other thread. Enable devfreq updates now as we are
+	 * done reading all firmware files.
+	 */
+	device->pwrscale.devfreq_enabled = true;
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	KGSL_BOOT_MARKER("ADRENO Ready");
+
+	return 0;
+}
+
+static int a630_vbif_halt(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	kgsl_regwrite(device, A6XX_VBIF_XIN_HALT_CTRL0,
+		A6XX_VBIF_XIN_HALT_CTRL0_MASK);
+	ret = adreno_wait_for_halt_ack(device,
+			A6XX_VBIF_XIN_HALT_CTRL1,
+			A6XX_VBIF_XIN_HALT_CTRL0_MASK);
+	kgsl_regwrite(device, A6XX_VBIF_XIN_HALT_CTRL0, 0);
+
+	return ret;
+}
+
+static int a6xx_power_off(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret;
+
+	WARN_ON(!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags));
+
+	adreno_suspend_context(device);
+
+	/*
+	 * adreno_suspend_context() unlocks the device mutex, which
+	 * could allow a concurrent thread to attempt SLUMBER sequence.
+	 * Hence, check the flags again before proceeding with SLUMBER.
+	 */
+	if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER);
+
+	ret = a6xx_gmu_oob_set(device, oob_gpu);
+	if (ret) {
+		a6xx_gmu_oob_clear(device, oob_gpu);
+		goto no_gx_power;
+	}
+
+	if (a6xx_irq_pending(adreno_dev)) {
+		a6xx_gmu_oob_clear(device, oob_gpu);
+		return -EBUSY;
+	}
+
+	kgsl_pwrscale_update_stats(device);
+
+	/* Save active coresight registers if applicable */
+	adreno_coresight_stop(adreno_dev);
+
+	/* Save physical performance counter values before GPU power down*/
+	adreno_perfcounter_save(adreno_dev);
+
+	/*
+	 * Clear GX halt on non-gbif targets. For targets with GBIF,
+	 * GX halt is handled by the GMU FW.
+	 */
+	if (adreno_is_a630(adreno_dev))
+		a630_vbif_halt(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 0);
+
+	a6xx_gmu_oob_clear(device, oob_gpu);
+
+no_gx_power:
+	kgsl_pwrctrl_irq(device, false);
+
+	a6xx_gmu_power_off(adreno_dev);
+
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	adreno_dispatcher_stop(adreno_dev);
+
+	adreno_ringbuffer_stop(adreno_dev);
+
+	adreno_llcc_slice_deactivate(adreno_dev);
+
+	clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	del_timer_sync(&device->idle_timer);
+
+	kgsl_pwrscale_sleep(device);
+
+	kgsl_pwrctrl_clear_l3_vote(device);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
+
+	return ret;
+}
+
+static void gmu_idle_check(struct work_struct *work)
+{
+	struct kgsl_device *device = container_of(work,
+					struct kgsl_device, idle_check_ws);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret;
+
+	mutex_lock(&device->mutex);
+
+	if (test_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags))
+		goto done;
+
+	if (atomic_read(&device->active_cnt) || time_is_after_jiffies(device->idle_jiffies)) {
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		goto done;
+
+	spin_lock(&device->submit_lock);
+
+	if (device->submit_now) {
+		spin_unlock(&device->submit_lock);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	device->skip_inline_submit = true;
+	spin_unlock(&device->submit_lock);
+
+	ret = a6xx_power_off(adreno_dev);
+	if (ret == -EBUSY) {
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+	}
+
+done:
+	mutex_unlock(&device->mutex);
+}
+
+static int a6xx_gmu_first_open(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	/*
+	 * Do the one time settings that need to happen when we
+	 * attempt to boot the gpu the very first time
+	 */
+	ret = a6xx_first_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	/*
+	 * A client that does a first_open but never closes the device
+	 * may prevent us from going back to SLUMBER. So trigger the idle
+	 * check by incrementing the active count and immediately releasing it.
+	 */
+	atomic_inc(&device->active_cnt);
+	a6xx_gmu_active_count_put(adreno_dev);
+
+	return 0;
+}
+
+static int a6xx_gmu_last_close(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return a6xx_power_off(adreno_dev);
+
+	return 0;
+}
+
+static int a6xx_gmu_active_count_get(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret = 0;
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EINVAL;
+
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags))
+		return -EINVAL;
+
+	if ((atomic_read(&device->active_cnt) == 0) &&
+		!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		ret = a6xx_boot(adreno_dev);
+
+	if (ret == 0)
+		atomic_inc(&device->active_cnt);
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	return ret;
+}
+
+static int a6xx_gmu_pm_suspend(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SUSPEND);
+
+	/* Halt any new submissions */
+	reinit_completion(&device->halt_gate);
+
+	/* wait for active count so device can be put in slumber */
+	ret = kgsl_active_count_wait(device, 0, HZ);
+	if (ret) {
+		dev_err(device->dev,
+			"Timed out waiting for the active count\n");
+		goto err;
+	}
+
+	ret = adreno_idle(device);
+	if (ret)
+		goto err;
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		a6xx_power_off(adreno_dev);
+
+	set_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
+
+	adreno_get_gpu_halt(adreno_dev);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND);
+
+	return 0;
+err:
+	adreno_dispatcher_start(device);
+	return ret;
+}
+
+static void a6xx_gmu_pm_resume(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags),
+		"resume invoked without a suspend\n"))
+		return;
+
+	adreno_put_gpu_halt(adreno_dev);
+
+	adreno_dispatcher_start(device);
+
+	clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
+}
+
+static void a6xx_gmu_touch_wakeup(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret;
+
+	/*
+	 * Do not wake up a suspended device or until the first boot sequence
+	 * has been completed.
+	 */
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags) ||
+		!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))
+		return;
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		goto done;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = a6xx_gmu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	ret = a6xx_gpu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	kgsl_pwrscale_wake(device);
+
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+done:
+	/*
+	 * When waking up from a touch event we want to stay active long enough
+	 * for the user to send a draw command.  The default idle timer timeout
+	 * is shorter than we want so go ahead and push the idle timer out
+	 * further for this special case
+	 */
+	mod_timer(&device->idle_timer, jiffies +
+			msecs_to_jiffies(adreno_wake_timeout));
+
+}
+
+const struct adreno_power_ops a6xx_gmu_power_ops = {
+	.first_open = a6xx_gmu_first_open,
+	.last_close = a6xx_gmu_last_close,
+	.active_count_get = a6xx_gmu_active_count_get,
+	.active_count_put = a6xx_gmu_active_count_put,
+	.pm_suspend = a6xx_gmu_pm_suspend,
+	.pm_resume = a6xx_gmu_pm_resume,
+	.touch_wakeup = a6xx_gmu_touch_wakeup,
+	.gpu_clock_set = a6xx_gmu_clock_set,
+	.gpu_bus_set = a6xx_gmu_bus_set,
+};
+
+const struct adreno_power_ops a630_gmu_power_ops = {
+	.first_open = a6xx_gmu_first_open,
+	.last_close = a6xx_gmu_last_close,
+	.active_count_get = a6xx_gmu_active_count_get,
+	.active_count_put = a6xx_gmu_active_count_put,
+	.pm_suspend = a6xx_gmu_pm_suspend,
+	.pm_resume = a6xx_gmu_pm_resume,
+	.touch_wakeup = a6xx_gmu_touch_wakeup,
+	.gpu_clock_set = a6xx_gmu_clock_set,
+};
+
+int a6xx_gmu_device_probe(struct platform_device *pdev,
+	u32 chipid, const struct adreno_gpu_core *gpucore)
+{
+	struct adreno_device *adreno_dev;
+	struct kgsl_device *device;
+	struct a6xx_device *a6xx_dev;
+	int ret;
+
+	a6xx_dev = devm_kzalloc(&pdev->dev, sizeof(*a6xx_dev),
+			GFP_KERNEL);
+	if (!a6xx_dev)
+		return -ENOMEM;
+
+	adreno_dev = &a6xx_dev->adreno_dev;
+
+	adreno_dev->irq_mask = A6XX_INT_MASK;
+
+	ret = a6xx_probe_common(pdev, adreno_dev, chipid, gpucore);
+	if (ret)
+		return ret;
+
+	ret = adreno_dispatcher_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	device = KGSL_DEVICE(adreno_dev);
+
+	INIT_WORK(&device->idle_check_ws, gmu_idle_check);
+
+	timer_setup(&device->idle_timer, gmu_idle_timer, 0);
+
+	return 0;
+}
+
+int a6xx_gmu_reset(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	a6xx_disable_gpu_irq(adreno_dev);
+
+	a6xx_gmu_irq_disable(adreno_dev);
+
+	a6xx_hfi_stop(adreno_dev);
+
+	/* Hard reset the gmu and gpu */
+	a6xx_gmu_suspend(adreno_dev);
+
+	a6xx_reset_preempt_records(adreno_dev);
+
+	adreno_llcc_slice_deactivate(adreno_dev);
+
+	clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	/* Attempt to reboot the gmu and gpu */
+	return a6xx_boot(adreno_dev);
+}
+
+int a6xx_gmu_hfi_probe(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hfi *hfi = &gmu->hfi;
+
+	hfi->irq = kgsl_request_irq(gmu->pdev, "kgsl_hfi_irq",
+		a6xx_hfi_irq_handler, KGSL_DEVICE(adreno_dev));
+
+	return hfi->irq < 0 ? hfi->irq : 0;
+}
+
+int a6xx_gmu_add_to_minidump(struct adreno_device *adreno_dev)
+{
+	struct a6xx_device *a6xx_dev = container_of(adreno_dev,
+					struct a6xx_device, adreno_dev);
+	int ret;
+
+	ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_A6XX_DEVICE,
+			(void *)(a6xx_dev), sizeof(struct a6xx_device));
+	if (ret)
+		return ret;
+
+	ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GMU_LOG_ENTRY,
+			a6xx_dev->gmu.gmu_log->hostptr, a6xx_dev->gmu.gmu_log->size);
+	if (ret)
+		return ret;
+
+	ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HFIMEM_ENTRY,
+			a6xx_dev->gmu.hfi.hfi_mem->hostptr, a6xx_dev->gmu.hfi.hfi_mem->size);
+	if (ret)
+		return ret;
+
+	if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev))
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GMU_DUMPMEM_ENTRY,
+				a6xx_dev->gmu.dump_mem->hostptr, a6xx_dev->gmu.dump_mem->size);
+
+	return ret;
+}
+
+static int a6xx_gmu_bind(struct device *dev, struct device *master, void *data)
+{
+	struct kgsl_device *device = dev_get_drvdata(master);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	const struct a6xx_gpudev *a6xx_gpudev = to_a6xx_gpudev(gpudev);
+	int ret;
+
+	ret = a6xx_gmu_probe(device, to_platform_device(dev));
+	if (ret)
+		return ret;
+
+	if (a6xx_gpudev->hfi_probe) {
+		ret = a6xx_gpudev->hfi_probe(adreno_dev);
+
+		if (ret) {
+			a6xx_gmu_remove(device);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void a6xx_gmu_unbind(struct device *dev, struct device *master,
+		void *data)
+{
+	struct kgsl_device *device = dev_get_drvdata(master);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	const struct a6xx_gpudev *a6xx_gpudev = to_a6xx_gpudev(gpudev);
+
+	if (a6xx_gpudev->hfi_remove)
+		a6xx_gpudev->hfi_remove(adreno_dev);
+
+	a6xx_gmu_remove(device);
+}
+
+static const struct component_ops a6xx_gmu_component_ops = {
+	.bind = a6xx_gmu_bind,
+	.unbind = a6xx_gmu_unbind,
+};
+
+static int a6xx_gmu_probe_dev(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &a6xx_gmu_component_ops);
+}
+
+static int a6xx_gmu_remove_dev(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &a6xx_gmu_component_ops);
+	return 0;
+}
+
+static const struct of_device_id a6xx_gmu_match_table[] = {
+	{ .compatible = "qcom,gpu-gmu" },
+	{ },
+};
+
+struct platform_driver a6xx_gmu_driver = {
+	.probe = a6xx_gmu_probe_dev,
+	.remove = a6xx_gmu_remove_dev,
+	.driver = {
+		.name = "adreno-a6xx-gmu",
+		.of_match_table = a6xx_gmu_match_table,
+	},
+};

+ 451 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_gmu.h

@@ -0,0 +1,451 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_A6XX_GMU_H
+#define __ADRENO_A6XX_GMU_H
+
+#include <linux/mailbox_client.h>
+
+#include "adreno_a6xx_hfi.h"
+#include "kgsl_gmu_core.h"
+
+/**
+ * struct a6xx_gmu_device - GMU device structure
+ * @ver: GMU Version information
+ * @irq: GMU interrupt number
+ * @fw_image: GMU FW image
+ * @hfi_mem: pointer to HFI shared memory
+ * @dump_mem: pointer to GMU debug dump memory
+ * @gmu_log: gmu event log memory
+ * @hfi: HFI controller
+ * @num_gpupwrlevels: number GPU frequencies in GPU freq table
+ * @num_bwlevel: number of GPU BW levels
+ * @num_cnocbwlevel: number CNOC BW levels
+ * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling
+ * @clks: GPU subsystem clocks required for GMU functionality
+ * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different
+ *		than default power level
+ * @idle_level: Minimal GPU idle power level
+ * @fault_count: GMU fault count
+ * @mailbox: Messages to AOP for ACD enable/disable go through this
+ * @log_wptr_retention: Store the log wptr offset on slumber
+ */
+struct a6xx_gmu_device {
+	struct {
+		u32 core;
+		u32 core_dev;
+		u32 pwr;
+		u32 pwr_dev;
+		u32 hfi;
+	} ver;
+	struct platform_device *pdev;
+	int irq;
+	const struct firmware *fw_image;
+	struct kgsl_memdesc *dump_mem;
+	struct kgsl_memdesc *gmu_log;
+	/** @vrb: GMU virtual register bank memory */
+	struct kgsl_memdesc *vrb;
+	/** @trace: gmu trace container */
+	struct kgsl_gmu_trace trace;
+	struct a6xx_hfi hfi;
+	struct clk_bulk_data *clks;
+	/** @num_clks: Number of entries in the @clks array */
+	int num_clks;
+	unsigned int idle_level;
+	/** @freqs: Array of GMU frequencies */
+	u32 freqs[GMU_MAX_PWRLEVELS];
+	/** @vlvls: Array of GMU voltage levels */
+	u32 vlvls[GMU_MAX_PWRLEVELS];
+	struct kgsl_mailbox mailbox;
+	bool preallocations;
+	/** @gmu_globals: Array to store gmu global buffers */
+	struct kgsl_memdesc gmu_globals[GMU_KERNEL_ENTRIES];
+	/** @global_entries: To keep track of number of gmu buffers */
+	u32 global_entries;
+	struct gmu_vma_entry *vma;
+	unsigned int log_wptr_retention;
+	/** @cm3_fault: whether gmu received a cm3 fault interrupt */
+	atomic_t cm3_fault;
+	/**
+	 * @itcm_shadow: Copy of the itcm block in firmware binary used for
+	 * snapshot
+	 */
+	void *itcm_shadow;
+	/** @flags: Internal gmu flags */
+	unsigned long flags;
+	/** @rscc_virt: Pointer where RSCC block is mapped */
+	void __iomem *rscc_virt;
+	/** @domain: IOMMU domain for the kernel context */
+	struct iommu_domain *domain;
+	/** @rdpm_cx_virt: Pointer where the RDPM CX block is mapped */
+	void __iomem *rdpm_cx_virt;
+	/** @rdpm_mx_virt: Pointer where the RDPM MX block is mapped */
+	void __iomem *rdpm_mx_virt;
+	/** @log_stream_enable: GMU log streaming enable. Disabled by default */
+	bool log_stream_enable;
+	/** @log_group_mask: Allows overriding default GMU log group mask */
+	u32 log_group_mask;
+	struct kobject log_kobj;
+	/*
+	 * @perf_ddr_bw: The lowest ddr bandwidth that puts CX at a corner at
+	 * which GMU can run at higher frequency.
+	 */
+	u32 perf_ddr_bw;
+	/** @num_oob_perfcntr: Number of active oob_perfcntr requests */
+	u32 num_oob_perfcntr;
+	/** @pdc_cfg_base: Base address of PDC cfg registers */
+	void __iomem *pdc_cfg_base;
+	/** @pdc_seq_base: Base address of PDC seq registers */
+	void __iomem *pdc_seq_base;
+	/** @stats_enable: GMU stats feature enable */
+	bool stats_enable;
+	/** @stats_mask: GMU performance countables to enable */
+	u32 stats_mask;
+	/** @stats_interval: GMU performance counters sampling interval */
+	u32 stats_interval;
+	/** @stats_kobj: kernel object for GMU stats directory in sysfs */
+	struct kobject stats_kobj;
+};
+
+/* Helper function to get to a6xx gmu device from adreno device */
+struct a6xx_gmu_device *to_a6xx_gmu(struct adreno_device *adreno_dev);
+
+/* Helper function to get to adreno device from a6xx gmu device */
+struct adreno_device *a6xx_gmu_to_adreno(struct a6xx_gmu_device *gmu);
+
+/**
+ * reserve_gmu_kernel_block() - Allocate a gmu buffer
+ * @gmu: Pointer to the a6xx gmu device
+ * @addr: Desired gmu virtual address
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this bufer should be mapped
+ * @va_align: Alignment as a power of two(2^n) bytes for the GMU VA
+ *
+ * This function allocates a buffer and maps it in
+ * the desired gmu vma
+ *
+ * Return: Pointer to the memory descriptor or error pointer on failure
+ */
+struct kgsl_memdesc *reserve_gmu_kernel_block(struct a6xx_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, u32 va_align);
+
+/**
+ * reserve_gmu_kernel_block_fixed() - Maps phyical resource address to gmu
+ * @gmu: Pointer to the a6xx gmu device
+ * @addr: Desired gmu virtual address
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @resource: Name of the resource to get the size and address to allocate
+ * @attrs: Attributes for the mapping
+ * @va_align: Alignment as a power of two(2^n) bytes for the GMU VA
+ *
+ * This function maps the physcial resource address to desired gmu vma
+ *
+ * Return: Pointer to the memory descriptor or error pointer on failure
+ */
+struct kgsl_memdesc *reserve_gmu_kernel_block_fixed(struct a6xx_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, const char *resource, int attrs, u32 va_align);
+
+/**
+ * a6xx_build_rpmh_tables - Build the rpmh tables
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function creates the gpu dcvs and bw tables
+ *
+ * Return: 0 on success and negative error on failure
+ */
+int a6xx_build_rpmh_tables(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_gx_is_on - Check if GX is on
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function reads pwr status registers to check if GX
+ * is on or off
+ */
+bool a6xx_gmu_gx_is_on(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_device_snapshot - A6XX GMU snapshot function
+ * @device: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the A6XX GMU specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a6xx_gmu_device_snapshot(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot);
+
+/**
+ * a6xx_gmu_device_probe - A6XX GMU snapshot function
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for gmu based a6xx targets.
+ */
+int a6xx_gmu_device_probe(struct platform_device *pdev,
+	u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * a6xx_gmu_reset - Reset and restart the gmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_reset(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_enable_gpu_irq - Enable gpu interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_enable_gpu_irq(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_disable_gpu_irq - Disable gpu interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_disable_gpu_irq(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_snapshot- Take snapshot for gmu targets
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot structure
+ *
+ * Send an NMI to gmu if we hit a gmu fault. Then take gmu
+ * snapshot and carry on with rest of the a6xx snapshot
+ */
+void a6xx_gmu_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot);
+
+/**
+ * a6xx_gmu_probe - Probe a6xx gmu resources
+ * @device: Pointer to the kgsl device
+ * @pdev: Pointer to the gmu platform device
+ *
+ * Probe the gmu and hfi resources
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_probe(struct kgsl_device *device,
+	struct platform_device *pdev);
+
+/**
+ * a6xx_gmu_parse_fw - Parse the gmu fw binary
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_parse_fw(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_memory_init - Allocate gmu memory
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Allocates the gmu log buffer and others if ndeeded.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_memory_init(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_aop_send_acd_state - Enable or disable acd feature in aop
+ * @gmu: Pointer to the a6xx gmu device
+ * @flag: Boolean to enable or disable acd in aop
+ *
+ * This function enables or disables gpu acd feature using mailbox
+ */
+void a6xx_gmu_aop_send_acd_state(struct a6xx_gmu_device *gmu, bool flag);
+
+/**
+ * a6xx_gmu_disable_gdsc - Disable gmu gdsc
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_load_fw - Load gmu firmware
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Loads the gmu firmware binary into TCMs and memory
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_load_fw(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_device_start - Bring gmu out of reset
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_device_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_hfi_start - Indicate hfi start to gmu
+ * @device: Pointer to the kgsl device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_itcm_shadow - Create itcm shadow copy for snapshot
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_itcm_shadow(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_register_config - gmu register configuration
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Program gmu regsiters based on features
+ */
+void a6xx_gmu_register_config(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_version_info - Get gmu firmware version
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_gmu_version_info(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_irq_enable - Enable gmu interrupts
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_gmu_irq_enable(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_irq_disable - Disaable gmu interrupts
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_gmu_irq_disable(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_suspend - Hard reset the gpu and gmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * In case we hit a gmu fault, hard reset the gpu and gmu
+ * to recover from the fault
+ */
+void a6xx_gmu_suspend(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_oob_set - send gmu oob request
+ * @device: Pointer to the kgsl device
+ * @req: Type of oob request as defined in enum oob_request
+ *
+ * Request gmu to keep gpu powered up till the oob is cleared
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_oob_set(struct kgsl_device *device, enum oob_request oob);
+
+/**
+ * a6xx_gmu_oob_clear - clear an asserted oob request
+ * @device: Pointer to the kgsl device
+ * @req: Type of oob request as defined in enum oob_request
+ *
+ * Clear a previously requested oob so that gmu can power
+ * collapse the gpu
+ */
+void a6xx_gmu_oob_clear(struct kgsl_device *device, enum oob_request oob);
+
+/**
+ * a6xx_gmu_wait_for_lowest_idle - wait for gmu to complete ifpc
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * If ifpc is enabled, wait for gmu to put gpu into ifpc.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_wait_for_idle - Wait for gmu to become idle
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_wait_for_idle(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_rscc_sleep_sequence - Trigger rscc sleep sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_rscc_sleep_sequence(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_rscc_wakeup_sequence - Trigger rscc wakeup sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_rscc_wakeup_sequence(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_halt_gbif - Halt CX and GX requests in GBIF
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Clear any pending GX or CX transactions in GBIF and
+ * deassert GBIF halt
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_halt_gbif(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_load_pdc_ucode - Load and enable pdc sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_load_pdc_ucode(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_load_rsc_ucode - Load rscc sequence
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_load_rsc_ucode(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_remove - Clean up gmu probed resources
+ * @device: Pointer to the kgsl device
+ */
+void a6xx_gmu_remove(struct kgsl_device *device);
+
+/**
+ * a6xx_gmu_enable_clks - Enable gmu clocks
+ * @adreno_dev: Pointer to the adreno device
+ * @level: GMU frequency level
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level);
+
+/**
+ * a6xx_gmu_handle_watchdog - Handle watchdog interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_gmu_handle_watchdog(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_send_nmi - Send NMI to GMU
+ * @device: Pointer to the kgsl device
+ * @force: Boolean to forcefully send NMI irrespective of GMU state
+ */
+void a6xx_gmu_send_nmi(struct kgsl_device *device, bool force);
+
+/**
+ * a6xx_gmu_add_to_minidump - Register a6xx_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int a6xx_gmu_add_to_minidump(struct adreno_device *adreno_dev);
+
+#endif

+ 469 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_gmu_snapshot.c

@@ -0,0 +1,469 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "a6xx_reg.h"
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_a6xx_gmu.h"
+#include "adreno_snapshot.h"
+#include "kgsl_device.h"
+
+static const unsigned int a6xx_gmu_gx_registers[] = {
+	/* GMU GX */
+	0x1A800, 0x1A800, 0x1A810, 0x1A813, 0x1A816, 0x1A816, 0x1A818, 0x1A81B,
+	0x1A81E, 0x1A81E, 0x1A820, 0x1A823, 0x1A826, 0x1A826, 0x1A828, 0x1A82B,
+	0x1A82E, 0x1A82E, 0x1A830, 0x1A833, 0x1A836, 0x1A836, 0x1A838, 0x1A83B,
+	0x1A83E, 0x1A83E, 0x1A840, 0x1A843, 0x1A846, 0x1A846, 0x1A880, 0x1A884,
+	0x1A900, 0x1A92B, 0x1A940, 0x1A940,
+};
+
+static const unsigned int a6xx_gmu_tcm_registers[] = {
+	/* ITCM */
+	0x1B400, 0x1C3FF,
+	/* DTCM */
+	0x1C400, 0x1D3FF,
+};
+
+static const unsigned int a6xx_gmu_registers[] = {
+	/* GMU CX */
+	0x1F400, 0x1F407, 0x1F410, 0x1F412, 0x1F500, 0x1F500, 0x1F507, 0x1F50A,
+	0x1F800, 0x1F804, 0x1F807, 0x1F808, 0x1F80B, 0x1F80C, 0x1F80F, 0x1F81C,
+	0x1F824, 0x1F82A, 0x1F82D, 0x1F830, 0x1F840, 0x1F853, 0x1F887, 0x1F889,
+	0x1F8A0, 0x1F8A2, 0x1F8A4, 0x1F8AF, 0x1F8C0, 0x1F8C3, 0x1F8D0, 0x1F8D0,
+	0x1F8E4, 0x1F8E4, 0x1F8E8, 0x1F8EC, 0x1F900, 0x1F903, 0x1F940, 0x1F940,
+	0x1F942, 0x1F944, 0x1F94C, 0x1F94D, 0x1F94F, 0x1F951, 0x1F954, 0x1F954,
+	0x1F957, 0x1F958, 0x1F95D, 0x1F95D, 0x1F962, 0x1F962, 0x1F964, 0x1F965,
+	0x1F980, 0x1F986, 0x1F990, 0x1F99E, 0x1F9C0, 0x1F9C0, 0x1F9C5, 0x1F9CC,
+	0x1F9E0, 0x1F9E2, 0x1F9F0, 0x1F9F0, 0x1FA00, 0x1FA01,
+	/* GMU AO */
+	0x23B00, 0x23B16,
+};
+
+static const unsigned int a660_gmu_registers[] = {
+	/* GMU CX */
+	0x1F408, 0x1F40D, 0x1F40F, 0x1F40F, 0x1F50B, 0x1F50B, 0x1F860, 0x1F860,
+	0x1F870, 0x1F877, 0x1F8C4, 0x1F8C4, 0x1F8F0, 0x1F8F1, 0x1F948, 0x1F94A,
+	0x1F966, 0x1F96B, 0x1F970, 0x1F970, 0x1F972, 0x1F979, 0x1F9CD, 0x1F9D4,
+	0x1FA02, 0x1FA03, 0x20000, 0x20001, 0x20004, 0x20004, 0x20008, 0x20012,
+	0x20018, 0x20018,
+	/* GMU AO LPAC */
+	0x23B30, 0x23B30,
+};
+
+static const unsigned int a6xx_gmu_gpucc_registers[] = {
+	/* GPU CC */
+	0x24000, 0x24012, 0x24040, 0x24052, 0x24400, 0x24404, 0x24407, 0x2440B,
+	0x24415, 0x2441C, 0x2441E, 0x2442D, 0x2443C, 0x2443D, 0x2443F, 0x24440,
+	0x24442, 0x24449, 0x24458, 0x2445A, 0x24540, 0x2455E, 0x24800, 0x24802,
+	0x24C00, 0x24C02, 0x25400, 0x25402, 0x25800, 0x25802, 0x25C00, 0x25C02,
+	0x26000, 0x26002,
+	/* GPU CC ACD */
+	0x26400, 0x26416, 0x26420, 0x26427,
+};
+
+static const unsigned int a662_gmu_gpucc_registers[] = {
+	/* GPU CC */
+	0x24000, 0x2400e, 0x24400, 0x2440e, 0x24800, 0x24805, 0x24c00, 0x24cff,
+	0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, 0x26400, 0x26405,
+	0x26414, 0x2641d, 0x2642a, 0x26430, 0x26432, 0x26432, 0x26441, 0x26455,
+	0x26466, 0x26468, 0x26478, 0x2647a, 0x26489, 0x2648a, 0x2649c, 0x2649e,
+	0x264a0, 0x264a3, 0x264b3, 0x264b5, 0x264c5, 0x264c7, 0x264d6, 0x264d8,
+	0x264e8, 0x264e9, 0x264f9, 0x264fc, 0x2650b, 0x2650c, 0x2651c, 0x2651e,
+	0x26540, 0x26570, 0x26600, 0x26616, 0x26620, 0x2662d,
+};
+
+static const unsigned int a663_gmu_gpucc_registers[] = {
+	/* GPU CC */
+	0x24000, 0x2400e, 0x24400, 0x2440e, 0x25800, 0x25804, 0x25c00, 0x25c04,
+	0x26000, 0x26004, 0x26400, 0x26405, 0x26414, 0x2641d, 0x2642a, 0x26430,
+	0x26432, 0x26432, 0x26441, 0x26455, 0x26466, 0x26468, 0x26478, 0x2647a,
+	0x26489, 0x2648a, 0x2649c, 0x2649e, 0x264a0, 0x264a3, 0x264b3, 0x264b5,
+	0x264c5, 0x264c7, 0x264d6, 0x264d8, 0x264e8, 0x264e9, 0x264f9, 0x264fc,
+	0x2650b, 0x2650c, 0x2651c, 0x2651e, 0x26540, 0x26570, 0x26600, 0x26616,
+	0x26620, 0x2662d,
+};
+
+static const unsigned int a630_rscc_snapshot_registers[] = {
+	0x23400, 0x23434, 0x23436, 0x23436, 0x23480, 0x23484, 0x23489, 0x2348C,
+	0x23491, 0x23494, 0x23499, 0x2349C, 0x234A1, 0x234A4, 0x234A9, 0x234AC,
+	0x23500, 0x23502, 0x23504, 0x23507, 0x23514, 0x23519, 0x23524, 0x2352B,
+	0x23580, 0x23597, 0x23740, 0x23741, 0x23744, 0x23747, 0x2374C, 0x23787,
+	0x237EC, 0x237EF, 0x237F4, 0x2382F, 0x23894, 0x23897, 0x2389C, 0x238D7,
+	0x2393C, 0x2393F, 0x23944, 0x2397F,
+};
+
+static const unsigned int a6xx_rscc_snapshot_registers[] = {
+	0x23400, 0x23434, 0x23436, 0x23436, 0x23440, 0x23440, 0x23480, 0x23484,
+	0x23489, 0x2348C, 0x23491, 0x23494, 0x23499, 0x2349C, 0x234A1, 0x234A4,
+	0x234A9, 0x234AC, 0x23500, 0x23502, 0x23504, 0x23507, 0x23514, 0x23519,
+	0x23524, 0x2352B, 0x23580, 0x23597, 0x23740, 0x23741, 0x23744, 0x23747,
+	0x2374C, 0x23787, 0x237EC, 0x237EF, 0x237F4, 0x2382F, 0x23894, 0x23897,
+	0x2389C, 0x238D7, 0x2393C, 0x2393F, 0x23944, 0x2397F,
+};
+
+static const unsigned int a650_rscc_registers[] = {
+	0x38000, 0x38034, 0x38036, 0x38036, 0x38040, 0x38042, 0x38080, 0x38084,
+	0x38089, 0x3808C, 0x38091, 0x38094, 0x38099, 0x3809C, 0x380A1, 0x380A4,
+	0x380A9, 0x380AC, 0x38100, 0x38102, 0x38104, 0x38107, 0x38114, 0x38119,
+	0x38124, 0x3812E, 0x38180, 0x38197, 0x38340, 0x38341, 0x38344, 0x38347,
+	0x3834C, 0x3834F, 0x38351, 0x38354, 0x38356, 0x38359, 0x3835B, 0x3835E,
+	0x38360, 0x38363, 0x38365, 0x38368, 0x3836A, 0x3836D, 0x3836F, 0x38372,
+	0x383EC, 0x383EF, 0x383F4, 0x383F7, 0x383F9, 0x383FC, 0x383FE, 0x38401,
+	0x38403, 0x38406, 0x38408, 0x3840B, 0x3840D, 0x38410, 0x38412, 0x38415,
+	0x38417, 0x3841A, 0x38494, 0x38497, 0x3849C, 0x3849F, 0x384A1, 0x384A4,
+	0x384A6, 0x384A9, 0x384AB, 0x384AE, 0x384B0, 0x384B3, 0x384B5, 0x384B8,
+	0x384BA, 0x384BD, 0x384BF, 0x384C2, 0x3853C, 0x3853F, 0x38544, 0x38547,
+	0x38549, 0x3854C, 0x3854E, 0x38551, 0x38553, 0x38556, 0x38558, 0x3855B,
+	0x3855D, 0x38560, 0x38562, 0x38565, 0x38567, 0x3856A, 0x385E4, 0x385E7,
+	0x385EC, 0x385EF, 0x385F1, 0x385F4, 0x385F6, 0x385F9, 0x385FB, 0x385FE,
+	0x38600, 0x38603, 0x38605, 0x38608, 0x3860A, 0x3860D, 0x3860F, 0x38612,
+	0x3868C, 0x3868F, 0x38694, 0x38697, 0x38699, 0x3869C, 0x3869E, 0x386A1,
+	0x386A3, 0x386A6, 0x386A8, 0x386AB, 0x386AD, 0x386B0, 0x386B2, 0x386B5,
+	0x386B7, 0x386BA, 0x38734, 0x38737, 0x3873C, 0x3873F, 0x38741, 0x38744,
+	0x38746, 0x38749, 0x3874B, 0x3874E, 0x38750, 0x38753, 0x38755, 0x38758,
+	0x3875A, 0x3875D, 0x3875F, 0x38762, 0x387DC, 0x387DF, 0x387E4, 0x387E7,
+	0x387E9, 0x387EC, 0x387EE, 0x387F1, 0x387F3, 0x387F6, 0x387F8, 0x387FB,
+	0x387FD, 0x38800, 0x38802, 0x38805, 0x38807, 0x3880A, 0x38884, 0x38887,
+	0x3888C, 0x3888F, 0x38891, 0x38894, 0x38896, 0x38899, 0x3889B, 0x3889E,
+	0x388A0, 0x388A3, 0x388A5, 0x388A8, 0x388AA, 0x388AD, 0x388AF, 0x388B2,
+	0x3892C, 0x3892F, 0x38934, 0x38937, 0x38939, 0x3893C, 0x3893E, 0x38941,
+	0x38943, 0x38946, 0x38948, 0x3894B, 0x3894D, 0x38950, 0x38952, 0x38955,
+	0x38957, 0x3895A, 0x38B50, 0x38B51, 0x38B53, 0x38B55, 0x38B5A, 0x38B5A,
+	0x38B5F, 0x38B5F, 0x38B64, 0x38B64, 0x38B69, 0x38B69, 0x38B6E, 0x38B6E,
+	0x38B73, 0x38B73, 0x38BF8, 0x38BF8, 0x38BFD, 0x38BFD, 0x38C02, 0x38C02,
+	0x38C07, 0x38C07, 0x38C0C, 0x38C0C, 0x38C11, 0x38C11, 0x38C16, 0x38C16,
+	0x38C1B, 0x38C1B, 0x38CA0, 0x38CA0, 0x38CA5, 0x38CA5, 0x38CAA, 0x38CAA,
+	0x38CAF, 0x38CAF, 0x38CB4, 0x38CB4, 0x38CB9, 0x38CB9, 0x38CBE, 0x38CBE,
+	0x38CC3, 0x38CC3, 0x38D48, 0x38D48, 0x38D4D, 0x38D4D, 0x38D52, 0x38D52,
+	0x38D57, 0x38D57, 0x38D5C, 0x38D5C, 0x38D61, 0x38D61, 0x38D66, 0x38D66,
+	0x38D6B, 0x38D6B, 0x38DF0, 0x38DF0, 0x38DF5, 0x38DF5, 0x38DFA, 0x38DFA,
+	0x38DFF, 0x38DFF, 0x38E04, 0x38E04, 0x38E09, 0x38E09, 0x38E0E, 0x38E0E,
+	0x38E13, 0x38E13, 0x38E98, 0x38E98, 0x38E9D, 0x38E9D, 0x38EA2, 0x38EA2,
+	0x38EA7, 0x38EA7, 0x38EAC, 0x38EAC, 0x38EB1, 0x38EB1, 0x38EB6, 0x38EB6,
+	0x38EBB, 0x38EBB, 0x38F40, 0x38F40, 0x38F45, 0x38F45, 0x38F4A, 0x38F4A,
+	0x38F4F, 0x38F4F, 0x38F54, 0x38F54, 0x38F59, 0x38F59, 0x38F5E, 0x38F5E,
+	0x38F63, 0x38F63, 0x38FE8, 0x38FE8, 0x38FED, 0x38FED, 0x38FF2, 0x38FF2,
+	0x38FF7, 0x38FF7, 0x38FFC, 0x38FFC, 0x39001, 0x39001, 0x39006, 0x39006,
+	0x3900B, 0x3900B, 0x39090, 0x39090, 0x39095, 0x39095, 0x3909A, 0x3909A,
+	0x3909F, 0x3909F, 0x390A4, 0x390A4, 0x390A9, 0x390A9, 0x390AE, 0x390AE,
+	0x390B3, 0x390B3, 0x39138, 0x39138, 0x3913D, 0x3913D, 0x39142, 0x39142,
+	0x39147, 0x39147, 0x3914C, 0x3914C, 0x39151, 0x39151, 0x39156, 0x39156,
+	0x3915B, 0x3915B,
+};
+
+static size_t a6xx_snapshot_gmu_mem(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+		(struct kgsl_snapshot_gmu_mem *)buf;
+	unsigned int *data = (unsigned int *)
+		(buf + sizeof(*mem_hdr));
+	struct gmu_mem_type_desc *desc = priv;
+
+	if (priv == NULL || desc->memdesc->hostptr == NULL)
+		return 0;
+
+	if (remain < desc->memdesc->size + sizeof(*mem_hdr)) {
+		dev_err(device->dev,
+			"snapshot: Not enough memory for the gmu section %d\n",
+			desc->type);
+		return 0;
+	}
+
+	memset(mem_hdr, 0, sizeof(*mem_hdr));
+	mem_hdr->type = desc->type;
+	mem_hdr->hostaddr = (uintptr_t)desc->memdesc->hostptr;
+	mem_hdr->gmuaddr = desc->memdesc->gmuaddr;
+	mem_hdr->gpuaddr = 0;
+
+	/* Just copy the ringbuffer, there are no active IBs */
+	memcpy(data, desc->memdesc->hostptr, desc->memdesc->size);
+
+	return desc->memdesc->size + sizeof(*mem_hdr);
+}
+
+static size_t a6xx_gmu_snapshot_dtcm(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+		(struct kgsl_snapshot_gmu_mem *)buf;
+	struct a6xx_gmu_device *gmu = (struct a6xx_gmu_device *)priv;
+	u32 *data = (u32 *)(buf + sizeof(*mem_hdr));
+	u32 i;
+
+	if (remain < gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU DTCM Memory");
+		return 0;
+	}
+
+	mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+	mem_hdr->hostaddr = 0;
+	mem_hdr->gmuaddr = gmu->vma[GMU_DTCM].start;
+	mem_hdr->gpuaddr = 0;
+
+	/* FIXME: use a bulk read? */
+	for (i = 0; i < (gmu->vma[GMU_DTCM].size >> 2); i++)
+		gmu_core_regread(device, A6XX_GMU_CM3_DTCM_START + i, data++);
+
+	return gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr);
+}
+
+static size_t a6xx_gmu_snapshot_itcm(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+			(struct kgsl_snapshot_gmu_mem *)buf;
+	void *dest = buf + sizeof(*mem_hdr);
+	struct a6xx_gmu_device *gmu = (struct a6xx_gmu_device *)priv;
+
+	if (!gmu->itcm_shadow) {
+		dev_err(&gmu->pdev->dev, "ITCM not captured\n");
+		return 0;
+	}
+
+	if (remain < gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU ITCM Memory");
+		return 0;
+	}
+
+	mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+	mem_hdr->hostaddr = 0;
+	mem_hdr->gmuaddr = gmu->vma[GMU_ITCM].start;
+	mem_hdr->gpuaddr = 0;
+
+	memcpy(dest, gmu->itcm_shadow, gmu->vma[GMU_ITCM].size);
+
+	return gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr);
+}
+
+static void a6xx_gmu_snapshot_memories(struct kgsl_device *device,
+	struct a6xx_gmu_device *gmu, struct kgsl_snapshot *snapshot)
+{
+	struct gmu_mem_type_desc desc;
+	struct kgsl_memdesc *md;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(gmu->gmu_globals); i++) {
+
+		md = &gmu->gmu_globals[i];
+		if (!md->size)
+			continue;
+
+		desc.memdesc = md;
+		if (md == gmu->hfi.hfi_mem)
+			desc.type = SNAPSHOT_GMU_MEM_HFI;
+		else if (md == gmu->gmu_log)
+			desc.type = SNAPSHOT_GMU_MEM_LOG;
+		else if (md == gmu->dump_mem)
+			desc.type = SNAPSHOT_GMU_MEM_DEBUG;
+		else if (md == gmu->vrb)
+			desc.type = SNAPSHOT_GMU_MEM_VRB;
+		else if (md == gmu->trace.md)
+			desc.type = SNAPSHOT_GMU_MEM_TRACE;
+		else
+			desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+			snapshot, a6xx_snapshot_gmu_mem, &desc);
+	}
+}
+
+struct kgsl_snapshot_gmu_version {
+	uint32_t type;
+	uint32_t value;
+};
+
+static size_t a6xx_snapshot_gmu_version(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	uint32_t *data = (uint32_t *) (buf + sizeof(*header));
+	struct kgsl_snapshot_gmu_version *ver = priv;
+
+	if (remain < DEBUG_SECTION_SZ(1)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU Version");
+		return 0;
+	}
+
+	header->type = ver->type;
+	header->size = 1;
+
+	*data = ver->value;
+
+	return DEBUG_SECTION_SZ(1);
+}
+
+static void a6xx_gmu_snapshot_versions(struct kgsl_device *device,
+		struct a6xx_gmu_device *gmu,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+
+	struct kgsl_snapshot_gmu_version gmu_vers[] = {
+		{ .type = SNAPSHOT_DEBUG_GMU_CORE_VERSION,
+			.value = gmu->ver.core, },
+		{ .type = SNAPSHOT_DEBUG_GMU_CORE_DEV_VERSION,
+			.value = gmu->ver.core_dev, },
+		{ .type = SNAPSHOT_DEBUG_GMU_PWR_VERSION,
+			.value = gmu->ver.pwr, },
+		{ .type = SNAPSHOT_DEBUG_GMU_PWR_DEV_VERSION,
+			.value = gmu->ver.pwr_dev, },
+		{ .type = SNAPSHOT_DEBUG_GMU_HFI_VERSION,
+			.value = gmu->ver.hfi, },
+	};
+
+	for (i = 0; i < ARRAY_SIZE(gmu_vers); i++)
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+				snapshot, a6xx_snapshot_gmu_version,
+				&gmu_vers[i]);
+}
+
+#define RSCC_OFFSET_DWORDS 0x38000
+
+static size_t a6xx_snapshot_rscc_registers(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf;
+	struct kgsl_snapshot_registers *regs = priv;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int count = 0, j, k;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	/* Figure out how many registers we are going to dump */
+	for (j = 0; j < regs->count; j++) {
+		int start = regs->regs[j * 2];
+		int end = regs->regs[j * 2 + 1];
+
+		count += (end - start + 1);
+	}
+
+	if (remain < (count * 8) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "RSCC REGISTERS");
+		return 0;
+	}
+
+	for (j = 0; j < regs->count; j++) {
+		unsigned int start = regs->regs[j * 2];
+		unsigned int end = regs->regs[j * 2 + 1];
+
+		for (k = start; k <= end; k++) {
+			unsigned int val;
+
+			val = __raw_readl(gmu->rscc_virt +
+				((k - RSCC_OFFSET_DWORDS) << 2));
+			*data++ = k;
+			*data++ = val;
+		}
+	}
+
+	header->count = count;
+
+	/* Return the size of the section */
+	return (count * 8) + sizeof(*header);
+}
+
+static void snapshot_rscc_registers(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* RSCC registers are on cx */
+	if (adreno_is_a650_family(adreno_dev)) {
+		struct kgsl_snapshot_registers r;
+
+		r.regs = a650_rscc_registers;
+		r.count = ARRAY_SIZE(a650_rscc_registers) / 2;
+
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
+			snapshot, a6xx_snapshot_rscc_registers, &r);
+	} else if (adreno_is_a615_family(adreno_dev) ||
+			adreno_is_a630(adreno_dev)) {
+		adreno_snapshot_registers(device, snapshot,
+			a630_rscc_snapshot_registers,
+			ARRAY_SIZE(a630_rscc_snapshot_registers) / 2);
+	} else if (adreno_is_a640(adreno_dev) || adreno_is_a680(adreno_dev)) {
+		adreno_snapshot_registers(device, snapshot,
+			a6xx_rscc_snapshot_registers,
+			ARRAY_SIZE(a6xx_rscc_snapshot_registers) / 2);
+	}
+}
+
+/*
+ * a6xx_gmu_device_snapshot() - A6XX GMU snapshot function
+ * @device: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the A6XX GMU specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a6xx_gmu_device_snapshot(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, a6xx_gmu_snapshot_itcm, gmu);
+
+	a6xx_gmu_snapshot_versions(device, gmu, snapshot);
+
+	a6xx_gmu_snapshot_memories(device, gmu, snapshot);
+
+	/* Snapshot tcms as registers for legacy targets */
+	if (adreno_is_a630(adreno_dev) ||
+			adreno_is_a615_family(adreno_dev))
+		adreno_snapshot_registers(device, snapshot,
+				a6xx_gmu_tcm_registers,
+				ARRAY_SIZE(a6xx_gmu_tcm_registers) / 2);
+
+	adreno_snapshot_registers(device, snapshot, a6xx_gmu_registers,
+					ARRAY_SIZE(a6xx_gmu_registers) / 2);
+
+	if (adreno_is_a662(adreno_dev) || adreno_is_a621(adreno_dev))
+		adreno_snapshot_registers(device, snapshot,
+			a662_gmu_gpucc_registers,
+			ARRAY_SIZE(a662_gmu_gpucc_registers) / 2);
+	else if (adreno_is_a663(adreno_dev))
+		adreno_snapshot_registers(device, snapshot,
+			a663_gmu_gpucc_registers,
+			ARRAY_SIZE(a663_gmu_gpucc_registers) / 2);
+	else
+		adreno_snapshot_registers(device, snapshot,
+			a6xx_gmu_gpucc_registers,
+			ARRAY_SIZE(a6xx_gmu_gpucc_registers) / 2);
+
+	/* Snapshot A660 specific GMU registers */
+	if (adreno_is_a660(adreno_dev))
+		adreno_snapshot_registers(device, snapshot, a660_gmu_registers,
+					ARRAY_SIZE(a660_gmu_registers) / 2);
+
+	snapshot_rscc_registers(adreno_dev, snapshot);
+
+	if (!a6xx_gmu_gx_is_on(adreno_dev))
+		goto dtcm;
+
+	/* Set fence to ALLOW mode so registers can be read */
+	kgsl_regwrite(device, A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
+	/* Make sure the previous write posted before reading */
+	wmb();
+
+	adreno_snapshot_registers(device, snapshot,
+			a6xx_gmu_gx_registers,
+			ARRAY_SIZE(a6xx_gmu_gx_registers) / 2);
+
+	/* A stalled SMMU can lead to NoC timeouts when host accesses DTCM */
+	if (adreno_smmu_is_stalled(adreno_dev)) {
+		dev_err(&gmu->pdev->dev,
+			"Not dumping dtcm because SMMU is stalled\n");
+		return;
+	}
+
+dtcm:
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, a6xx_gmu_snapshot_dtcm, gmu);
+}

+ 852 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_hfi.c

@@ -0,0 +1,852 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/delay.h>
+#include <linux/nvmem-consumer.h>
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_a6xx_hfi.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+
+/* Below section is for all structures related to HFI queues */
+#define HFI_QUEUE_MAX HFI_QUEUE_DEFAULT_CNT
+
+/* Total header sizes + queue sizes + 16 for alignment */
+#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \
+		(HFI_QUEUE_SIZE * HFI_QUEUE_MAX))
+
+struct a6xx_hfi *to_a6xx_hfi(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	return &gmu->hfi;
+}
+
+/* Size in below functions are in unit of dwords */
+int a6xx_hfi_queue_read(struct a6xx_gmu_device *gmu, uint32_t queue_idx,
+		unsigned int *output, unsigned int max_size)
+{
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	uint32_t *queue;
+	uint32_t msg_hdr;
+	uint32_t i, read;
+	uint32_t size;
+	int result = 0;
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+		return -EINVAL;
+
+	if (hdr->read_index == hdr->write_index)
+		return -ENODATA;
+
+	/* Clear the output data before populating */
+	memset(output, 0, max_size);
+
+	queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx);
+	msg_hdr = queue[hdr->read_index];
+	size = MSG_HDR_GET_SIZE(msg_hdr);
+
+	if (size > (max_size >> 2)) {
+		dev_err(&gmu->pdev->dev,
+		"HFI message too big: hdr:0x%x rd idx=%d\n",
+			msg_hdr, hdr->read_index);
+		result = -EMSGSIZE;
+		goto done;
+	}
+
+	read = hdr->read_index;
+
+	if (read < hdr->queue_size) {
+		for (i = 0; i < size && i < (max_size >> 2); i++) {
+			output[i] = queue[read];
+			read = (read + 1)%hdr->queue_size;
+		}
+		result = size;
+	} else {
+		/* In case FW messed up */
+		dev_err(&gmu->pdev->dev,
+			"Read index %d greater than queue size %d\n",
+			hdr->read_index, hdr->queue_size);
+		result = -ENODATA;
+	}
+
+	if (GMU_VER_MAJOR(gmu->ver.hfi) >= 2)
+		read = ALIGN(read, SZ_4) % hdr->queue_size;
+
+	/* For acks, trace the packet for which this ack was sent */
+	if (MSG_HDR_GET_TYPE(msg_hdr) == HFI_MSG_ACK)
+		trace_kgsl_hfi_receive(MSG_HDR_GET_ID(output[1]),
+			MSG_HDR_GET_SIZE(output[1]),
+			MSG_HDR_GET_SEQNUM(output[1]));
+	else
+		trace_kgsl_hfi_receive(MSG_HDR_GET_ID(msg_hdr),
+			MSG_HDR_GET_SIZE(msg_hdr), MSG_HDR_GET_SEQNUM(msg_hdr));
+
+	hfi_update_read_idx(hdr, read);
+
+done:
+	return result;
+}
+
+/* Size in below functions are in unit of dwords */
+int a6xx_hfi_queue_write(struct adreno_device *adreno_dev, uint32_t queue_idx,
+		uint32_t *msg, u32 size_bytes)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	uint32_t *queue;
+	uint32_t i, write_idx, read_idx, empty_space;
+	uint32_t size_dwords = size_bytes >> 2;
+	u32 align_size = ALIGN(size_dwords, SZ_4);
+	uint32_t id = MSG_HDR_GET_ID(*msg);
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED || !IS_ALIGNED(size_bytes, sizeof(u32)))
+		return -EINVAL;
+
+	queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx);
+
+	write_idx = hdr->write_index;
+	read_idx = hdr->read_index;
+
+	empty_space = (write_idx >= read_idx) ?
+			(hdr->queue_size - (write_idx - read_idx))
+			: (read_idx - write_idx);
+
+	if (empty_space <= align_size)
+		return -ENOSPC;
+
+	for (i = 0; i < size_dwords; i++) {
+		queue[write_idx] = msg[i];
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	/* Cookify any non used data at the end of the write buffer */
+	if (GMU_VER_MAJOR(gmu->ver.hfi) >= 2) {
+		for (; i < align_size; i++) {
+			queue[write_idx] = 0xFAFAFAFA;
+			write_idx = (write_idx + 1) % hdr->queue_size;
+		}
+	}
+
+	trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg));
+
+	hfi_update_write_idx(&hdr->write_index, write_idx);
+
+	return 0;
+}
+
+int a6xx_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes)
+{
+	int ret;
+
+	ret = a6xx_hfi_queue_write(adreno_dev, HFI_CMD_ID, msg, size_bytes);
+
+	/*
+	 * Memory barrier to make sure packet and write index are written before
+	 * an interrupt is raised
+	 */
+	wmb();
+
+	/* Send interrupt to GMU to receive the message */
+	if (!ret)
+		gmu_core_regwrite(KGSL_DEVICE(adreno_dev),
+			A6XX_GMU_HOST2GMU_INTR_SET,
+			0x1);
+
+	return ret;
+}
+
+/* Sizes of the queue and message are in unit of dwords */
+static void init_queues(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	int i;
+	struct hfi_queue_table *tbl;
+	struct hfi_queue_header *hdr;
+	struct {
+		unsigned int idx;
+		unsigned int pri;
+		unsigned int status;
+	} queue[HFI_QUEUE_MAX] = {
+		{ HFI_CMD_IDX, HFI_CMD_PRI, HFI_QUEUE_STATUS_ENABLED },
+		{ HFI_MSG_IDX, HFI_MSG_PRI, HFI_QUEUE_STATUS_ENABLED },
+		{ HFI_DBG_IDX, HFI_DBG_PRI, HFI_QUEUE_STATUS_ENABLED },
+	};
+
+	/*
+	 * Overwrite the queue IDs for A630, A615 and A616 as they use
+	 * legacy firmware. Legacy firmware has different queue IDs for
+	 * message, debug and dispatch queues (dispatch queues aren't used
+	 * on these targets so the queue idx value update is not needed).
+	 */
+	if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev)) {
+		queue[HFI_MSG_ID].idx = HFI_MSG_IDX_LEGACY;
+		queue[HFI_DBG_ID].idx = HFI_DBG_IDX_LEGACY;
+	}
+
+	/* Fill Table Header */
+	tbl = mem_addr->hostptr;
+	tbl->qtbl_hdr.version = 0;
+	tbl->qtbl_hdr.size = sizeof(struct hfi_queue_table) >> 2;
+	tbl->qtbl_hdr.qhdr0_offset = sizeof(struct hfi_queue_table_header) >> 2;
+	tbl->qtbl_hdr.qhdr_size = sizeof(struct hfi_queue_header) >> 2;
+	tbl->qtbl_hdr.num_q = HFI_QUEUE_MAX;
+	tbl->qtbl_hdr.num_active_q = HFI_QUEUE_MAX;
+
+	memset(&tbl->qhdr[0], 0, sizeof(tbl->qhdr));
+
+	/* Fill Individual Queue Headers */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		hdr = &tbl->qhdr[i];
+		hdr->start_addr = GMU_QUEUE_START_ADDR(mem_addr->gmuaddr, i);
+		hdr->type = QUEUE_HDR_TYPE(queue[i].idx, queue[i].pri, 0,  0);
+		hdr->status = queue[i].status;
+		hdr->queue_size = HFI_QUEUE_SIZE >> 2; /* convert to dwords */
+	}
+}
+
+int a6xx_hfi_init(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hfi *hfi = &gmu->hfi;
+
+	/* Allocates & maps memory for HFI */
+	if (IS_ERR_OR_NULL(hfi->hfi_mem)) {
+		hfi->hfi_mem = reserve_gmu_kernel_block(gmu, 0, HFIMEM_SIZE,
+			GMU_NONCACHED_KERNEL, 0);
+		if (!IS_ERR(hfi->hfi_mem))
+			init_queues(adreno_dev);
+	}
+
+	return PTR_ERR_OR_ZERO(hfi->hfi_mem);
+}
+
+int a6xx_receive_ack_cmd(struct a6xx_gmu_device *gmu, void *rcvd,
+	struct pending_cmd *ret_cmd)
+{
+	struct adreno_device *adreno_dev = a6xx_gmu_to_adreno(gmu);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	uint32_t *ack = rcvd;
+	uint32_t hdr = ack[0];
+	uint32_t req_hdr = ack[1];
+
+	if (ret_cmd == NULL)
+		return -EINVAL;
+
+	if (CMP_HFI_ACK_HDR(ret_cmd->sent_hdr, req_hdr)) {
+		memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2);
+		return 0;
+	}
+
+	/* Didn't find the sender, list the waiter */
+	dev_err_ratelimited(&gmu->pdev->dev,
+		"HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n",
+		req_hdr, ret_cmd->sent_hdr);
+
+	gmu_core_fault_snapshot(device);
+
+	return -ENODEV;
+}
+
+static int poll_gmu_reg(struct adreno_device *adreno_dev,
+	u32 offsetdwords, unsigned int expected_val,
+	unsigned int mask, unsigned int timeout_ms)
+{
+	unsigned int val;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+	u64 ao_pre_poll, ao_post_poll;
+	bool nmi = false;
+
+	ao_pre_poll = a6xx_read_alwayson(adreno_dev);
+
+	/* FIXME: readl_poll_timeout? */
+	while (time_is_after_jiffies(timeout)) {
+		gmu_core_regread(device, offsetdwords, &val);
+		if ((val & mask) == expected_val)
+			return 0;
+
+		/*
+		 * If GMU firmware fails any assertion, error message is sent
+		 * to KMD and NMI is triggered. So check if GMU is in NMI and
+		 * timeout early. Bits [11:9] of A6XX_GMU_CM3_FW_INIT_RESULT
+		 * contain GMU reset status. Non zero value here indicates that
+		 * GMU reset is active, NMI handler would eventually complete
+		 * and GMU would wait for recovery.
+		 */
+		gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &val);
+		if (val & 0xE00) {
+			nmi = true;
+			break;
+		}
+
+		usleep_range(10, 100);
+	}
+
+	ao_post_poll = a6xx_read_alwayson(adreno_dev);
+
+	/* Check one last time */
+	gmu_core_regread(device, offsetdwords, &val);
+	if ((val & mask) == expected_val)
+		return 0;
+
+	dev_err(&gmu->pdev->dev, "kgsl hfi poll %s: always on: %lld ms\n",
+		nmi ? "abort" : "timeout",
+		div_u64((ao_post_poll - ao_pre_poll) * 52, USEC_PER_SEC));
+
+	return -ETIMEDOUT;
+}
+
+static int a6xx_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev,
+	void *data, u32 size_bytes, struct pending_cmd *ret_cmd)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int rc;
+	uint32_t *cmd = data;
+	struct a6xx_hfi *hfi = &gmu->hfi;
+	unsigned int seqnum = atomic_inc_return(&hfi->seqnum);
+
+	*cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2);
+	if (ret_cmd == NULL)
+		return a6xx_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+
+	ret_cmd->sent_hdr = cmd[0];
+
+	rc = a6xx_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+	if (rc)
+		return rc;
+
+	rc = poll_gmu_reg(adreno_dev, A6XX_GMU_GMU2HOST_INTR_INFO,
+		HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT);
+
+	if (rc) {
+		gmu_core_fault_snapshot(device);
+		dev_err(&gmu->pdev->dev,
+		"Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n",
+		cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd));
+		return rc;
+	}
+
+	/* Clear the interrupt */
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR,
+		HFI_IRQ_MSGQ_MASK);
+
+	rc = a6xx_hfi_process_queue(gmu, HFI_MSG_ID, ret_cmd);
+
+	return rc;
+}
+
+int a6xx_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes)
+{
+	struct pending_cmd ret_cmd;
+	int rc;
+
+	memset(&ret_cmd, 0, sizeof(ret_cmd));
+
+	rc = a6xx_hfi_send_cmd_wait_inline(adreno_dev, cmd, size_bytes, &ret_cmd);
+	if (rc)
+		return rc;
+
+	if (ret_cmd.results[2]) {
+		struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		gmu_core_fault_snapshot(device);
+		dev_err(&gmu->pdev->dev,
+				"HFI ACK failure: Req=0x%8.8X, Result=0x%8.8X\n",
+				ret_cmd.results[1],
+				ret_cmd.results[2]);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int a6xx_hfi_send_gmu_init(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_gmu_init_cmd cmd = {
+		.seg_id = 0,
+		.dbg_buffer_addr = (unsigned int) gmu->dump_mem->gmuaddr,
+		.dbg_buffer_size = (unsigned int) gmu->dump_mem->size,
+		.boot_state = 0x1,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_INIT);
+	if (ret)
+		return ret;
+
+	return a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int a6xx_hfi_get_fw_version(struct adreno_device *adreno_dev,
+		uint32_t expected_ver, uint32_t *ver)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_fw_version_cmd cmd = {
+		.supported_ver = expected_ver,
+	};
+	int rc;
+	struct pending_cmd ret_cmd;
+
+	rc = CMD_MSG_HDR(cmd, H2F_MSG_FW_VER);
+	if (rc)
+		return rc;
+
+	memset(&ret_cmd, 0, sizeof(ret_cmd));
+
+	rc = a6xx_hfi_send_cmd_wait_inline(adreno_dev, &cmd, sizeof(cmd), &ret_cmd);
+	if (rc)
+		return rc;
+
+	rc = ret_cmd.results[2];
+	if (!rc)
+		*ver = ret_cmd.results[3];
+	else
+		dev_err(&gmu->pdev->dev,
+			"gmu get fw ver failed with error=%d\n", rc);
+
+	return rc;
+}
+
+int a6xx_hfi_send_core_fw_start(struct adreno_device *adreno_dev)
+{
+	struct hfi_core_fw_start_cmd cmd = {
+		.handle = 0x0,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CORE_FW_START);
+	if (ret)
+		return ret;
+
+	return a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static const char *feature_to_string(uint32_t feature)
+{
+	if (feature == HFI_FEATURE_ACD)
+		return "ACD";
+	else if (feature == HFI_FEATURE_LM)
+		return "LM";
+
+	return "unknown";
+}
+
+int a6xx_hfi_send_feature_ctrl(struct adreno_device *adreno_dev,
+	uint32_t feature, uint32_t enable, uint32_t data)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_feature_ctrl_cmd cmd = {
+		.feature = feature,
+		.enable = enable,
+		.data = data,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_FEATURE_CTRL);
+	if (ret)
+		return ret;
+
+	ret = a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+	if (ret)
+		dev_err(&gmu->pdev->dev,
+				"Unable to %s feature %s (%d)\n",
+				enable ? "enable" : "disable",
+				feature_to_string(feature),
+				feature);
+	return ret;
+}
+
+int a6xx_hfi_send_set_value(struct adreno_device *adreno_dev,
+		u32 type, u32 subtype, u32 data)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_set_value_cmd cmd = {
+		.type = type,
+		.subtype = subtype,
+		.data = data,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_SET_VALUE);
+	if (ret)
+		return ret;
+
+	ret = a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+	if (ret)
+		dev_err(&gmu->pdev->dev,
+			"Unable to set HFI Value %d, %d to %d, error = %d\n",
+			type, subtype, data, ret);
+	return ret;
+}
+
+static int a6xx_hfi_send_dcvstbl_v1(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table;
+	struct hfi_dcvstable_v1_cmd cmd = {
+		.gpu_level_num = table->gpu_level_num,
+		.gmu_level_num = table->gmu_level_num,
+	};
+	int i, ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_PERF_TBL);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < table->gpu_level_num; i++) {
+		cmd.gx_votes[i].vote = table->gx_votes[i].vote;
+		cmd.gx_votes[i].freq = table->gx_votes[i].freq;
+	}
+
+	cmd.cx_votes[0].vote = table->cx_votes[0].vote;
+	cmd.cx_votes[0].freq = table->cx_votes[0].freq;
+	cmd.cx_votes[1].vote = table->cx_votes[1].vote;
+	cmd.cx_votes[1].freq = table->cx_votes[1].freq;
+
+	return a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int a6xx_hfi_send_test(struct adreno_device *adreno_dev)
+{
+	struct hfi_test_cmd cmd;
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_TEST);
+	if (ret)
+		return ret;
+
+	cmd.data = 0;
+
+	return a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+void adreno_a6xx_receive_err_req(struct a6xx_gmu_device *gmu, void *rcvd)
+{
+	struct hfi_err_cmd *cmd = rcvd;
+
+	dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n",
+			((cmd->error_code >> 16) & 0xFFFF),
+			(cmd->error_code & 0xFFFF),
+			(char *) cmd->data);
+}
+
+void adreno_a6xx_receive_debug_req(struct a6xx_gmu_device *gmu, void *rcvd)
+{
+	struct hfi_debug_cmd *cmd = rcvd;
+
+	dev_dbg(&gmu->pdev->dev, "HFI Debug Received: %d %d %d\n",
+			cmd->type, cmd->timestamp, cmd->data);
+}
+
+static void a6xx_hfi_v1_receiver(struct a6xx_gmu_device *gmu, uint32_t *rcvd,
+	struct pending_cmd *ret_cmd)
+{
+	/* V1 ACK Handler */
+	if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_V1_MSG_ACK) {
+		a6xx_receive_ack_cmd(gmu, rcvd, ret_cmd);
+		return;
+	}
+
+	/* V1 Request Handler */
+	switch (MSG_HDR_GET_ID(rcvd[0])) {
+	case F2H_MSG_ERR: /* No Reply */
+		adreno_a6xx_receive_err_req(gmu, rcvd);
+		break;
+	case F2H_MSG_DEBUG: /* No Reply */
+		adreno_a6xx_receive_debug_req(gmu, rcvd);
+		break;
+	default: /* No Reply */
+		dev_err(&gmu->pdev->dev,
+				"HFI V1 request %d not supported\n",
+				MSG_HDR_GET_ID(rcvd[0]));
+		break;
+	}
+}
+
+int a6xx_hfi_process_queue(struct a6xx_gmu_device *gmu,
+		uint32_t queue_idx, struct pending_cmd *ret_cmd)
+{
+	uint32_t rcvd[MAX_RCVD_SIZE];
+
+	while (a6xx_hfi_queue_read(gmu, queue_idx, rcvd, sizeof(rcvd)) > 0) {
+		/* Special case if we're v1 */
+		if (GMU_VER_MAJOR(gmu->ver.hfi) < 2) {
+			a6xx_hfi_v1_receiver(gmu, rcvd, ret_cmd);
+			continue;
+		}
+
+		/* V2 ACK Handler */
+		if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) {
+			int ret = a6xx_receive_ack_cmd(gmu, rcvd, ret_cmd);
+
+			if (ret)
+				return ret;
+			continue;
+		}
+
+		/* V2 Request Handler */
+		switch (MSG_HDR_GET_ID(rcvd[0])) {
+		case F2H_MSG_ERR: /* No Reply */
+			adreno_a6xx_receive_err_req(gmu, rcvd);
+			break;
+		case F2H_MSG_DEBUG: /* No Reply */
+			adreno_a6xx_receive_debug_req(gmu, rcvd);
+			break;
+		default: /* No Reply */
+			dev_err(&gmu->pdev->dev,
+				"HFI request %d not supported\n",
+				MSG_HDR_GET_ID(rcvd[0]));
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int a6xx_hfi_verify_fw_version(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	int result;
+	unsigned int ver, major, minor;
+
+	/* GMU version is already known, so don't waste time finding again */
+	if (gmu->ver.core != 0)
+		return 0;
+
+	major = a6xx_core->gmu_major;
+	minor = a6xx_core->gmu_minor;
+
+	result = a6xx_hfi_get_fw_version(adreno_dev, GMU_VERSION(major, minor, 0),
+			&ver);
+	if (result) {
+		dev_err_once(&gmu->pdev->dev,
+				"Failed to get FW version via HFI\n");
+		return result;
+	}
+
+	/* For now, warn once. Could return error later if needed */
+	if (major != GMU_VER_MAJOR(ver))
+		dev_err_once(&gmu->pdev->dev,
+				"FW Major Error: Wanted %d, got %d\n",
+				major, GMU_VER_MAJOR(ver));
+
+	if (minor > GMU_VER_MINOR(ver))
+		dev_err_once(&gmu->pdev->dev,
+				"FW Minor Error: Wanted < %d, got %d\n",
+				GMU_VER_MINOR(ver), minor);
+
+	/* Save the gmu version information */
+	gmu->ver.core = ver;
+
+	return 0;
+}
+
+int a6xx_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	if (!adreno_dev->bcl_enabled)
+		return 0;
+
+	ret = a6xx_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_BCL, 1, 0);
+
+	return ret;
+}
+
+int a6xx_hfi_send_lm_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct hfi_set_value_cmd req;
+	u32 slope = 0;
+	int ret;
+
+	if (!adreno_dev->lm_enabled)
+		return 0;
+
+	memset(&req, 0, sizeof(req));
+
+	nvmem_cell_read_u32(&device->pdev->dev, "isense_slope", &slope);
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_SET_VALUE);
+	if (ret)
+		return ret;
+
+	req.type = HFI_VALUE_LM_CS0;
+	req.subtype = 0;
+	req.data = slope;
+
+	ret = a6xx_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_LM, 1,
+			device->pwrctrl.throttle_mask);
+
+	if (!ret)
+		ret = a6xx_hfi_send_generic_req(adreno_dev, &req, sizeof(req));
+
+	return ret;
+}
+
+int a6xx_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret = 0;
+
+	if (adreno_dev->acd_enabled) {
+		ret = a6xx_hfi_send_generic_req(adreno_dev,
+			&gmu->hfi.acd_table, sizeof(gmu->hfi.acd_table));
+		if (!ret)
+			ret = a6xx_hfi_send_feature_ctrl(adreno_dev,
+				HFI_FEATURE_ACD, 1, 0);
+	}
+
+	return ret;
+}
+
+static void reset_hfi_queues(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr;
+	unsigned int i;
+
+	/* Flush HFI queues */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		hdr = &tbl->qhdr[i];
+
+		if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+			continue;
+
+		hdr->read_index = hdr->write_index;
+	}
+}
+
+int a6xx_hfi_start(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int result;
+
+	reset_hfi_queues(adreno_dev);
+
+	/* This is legacy HFI message for A630 and A615 family firmware */
+	if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev)) {
+		result = a6xx_hfi_send_gmu_init(adreno_dev);
+		if (result)
+			goto err;
+	}
+
+	result = a6xx_hfi_verify_fw_version(adreno_dev);
+	if (result)
+		goto err;
+
+	if (GMU_VER_MAJOR(gmu->ver.hfi) < 2)
+		result = a6xx_hfi_send_dcvstbl_v1(adreno_dev);
+	else
+		result = a6xx_hfi_send_generic_req(adreno_dev,
+			&gmu->hfi.dcvs_table, sizeof(gmu->hfi.dcvs_table));
+	if (result)
+		goto err;
+
+	result = a6xx_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table,
+			sizeof(gmu->hfi.bw_table));
+	if (result)
+		goto err;
+
+	/*
+	 * If quirk is enabled send H2F_MSG_TEST and tell the GMU
+	 * we are sending no more HFIs until the next boot otherwise
+	 * send H2F_MSG_CORE_FW_START and features for A640 devices
+	 */
+	if (GMU_VER_MAJOR(gmu->ver.hfi) >= 2) {
+		result = a6xx_hfi_send_acd_feature_ctrl(adreno_dev);
+		if (result)
+			goto err;
+
+		result = a6xx_hfi_send_lm_feature_ctrl(adreno_dev);
+		if (result)
+			goto err;
+
+		result = a6xx_hfi_send_bcl_feature_ctrl(adreno_dev);
+		if (result)
+			goto err;
+
+		result = a6xx_hfi_send_core_fw_start(adreno_dev);
+		if (result)
+			goto err;
+	} else {
+		if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG)) {
+			result = a6xx_hfi_send_test(adreno_dev);
+			if (result)
+				goto err;
+		}
+	}
+
+	set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	/* Request default DCVS level */
+	result = kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+	if (result)
+		goto err;
+
+	/* Request default BW vote */
+	result = kgsl_pwrctrl_axi(device, true);
+
+err:
+	if (result)
+		a6xx_hfi_stop(adreno_dev);
+
+	return result;
+
+}
+
+void a6xx_hfi_stop(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_pwrctrl_axi(device, false);
+
+	clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+}
+
+/* HFI interrupt handler */
+irqreturn_t a6xx_hfi_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(ADRENO_DEVICE(device));
+	unsigned int status = 0;
+
+	gmu_core_regread(device, A6XX_GMU_GMU2HOST_INTR_INFO, &status);
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, HFI_IRQ_MASK);
+
+	if (status & HFI_IRQ_DBGQ_MASK)
+		a6xx_hfi_process_queue(gmu, HFI_DBG_ID, NULL);
+	if (status & HFI_IRQ_CM3_FAULT_MASK) {
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"GMU CM3 fault interrupt received\n");
+		atomic_set(&gmu->cm3_fault, 1);
+
+		/* make sure other CPUs see the update */
+		smp_wmb();
+	}
+	if (status & ~HFI_IRQ_MASK)
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"Unhandled HFI interrupts 0x%lx\n",
+				status & ~HFI_IRQ_MASK);
+
+	return IRQ_HANDLED;
+}

+ 188 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_hfi.h

@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_A6XX_HFI_H
+#define __ADRENO_A6XX_HFI_H
+
+#include "adreno_hfi.h"
+
+/**
+ * struct a6xx_hfi - HFI control structure
+ * @seqnum: atomic counter that is incremented for each message sent. The
+ *	value of the counter is used as sequence number for HFI message
+ * @bw_table: HFI BW table buffer
+ * @acd_table: HFI table for ACD data
+ */
+struct a6xx_hfi {
+	/** @irq: HFI interrupt line */
+	int irq;
+	atomic_t seqnum;
+	/** @hfi_mem: Memory descriptor for the hfi memory */
+	struct kgsl_memdesc *hfi_mem;
+	struct hfi_bwtable_cmd bw_table;
+	struct hfi_acd_table_cmd acd_table;
+	/** @dcvs_table: HFI table for gpu dcvs levels */
+	struct hfi_dcvstable_cmd dcvs_table;
+};
+
+struct a6xx_gmu_device;
+
+/* a6xx_hfi_irq_handler - IRQ handler for HFI interripts */
+irqreturn_t a6xx_hfi_irq_handler(int irq, void *data);
+
+/**
+ * a6xx_hfi_start - Send the various HFIs during device boot up
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hfi_start - Send the various HFIs during device boot up
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+void a6xx_hfi_stop(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hfi_init - Initialize hfi resources
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function allocates and sets up hfi queues
+ * when a process creates the very first kgsl instance
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_init(struct adreno_device *adreno_dev);
+
+/* Helper function to get to a6xx hfi struct from adreno device */
+struct a6xx_hfi *to_a6xx_hfi(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hfi_queue_write - Write a command to hfi queue
+ * @adreno_dev: Pointer to the adreno device
+ * @queue_idx: destination queue id
+ * @msg: Data to be written to the queue
+ * @size_bytes: Size of the command in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx,
+	u32 *msg, u32 size_bytes);
+
+/**
+ * a6xx_hfi_queue_read - Read data from hfi queue
+ * @gmu: Pointer to the a6xx gmu device
+ * @queue_idx: queue id to read from
+ * @output: Pointer to read the data into
+ * @max_size: Number of bytes to read from the queue
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_queue_read(struct a6xx_gmu_device *gmu, u32 queue_idx,
+	u32 *output, u32 max_size);
+
+/**
+ * a6xx_receive_ack_cmd - Process ack type packets
+ * @gmu: Pointer to the a6xx gmu device
+ * @rcvd: Pointer to the data read from hfi queue
+ * @ret_cmd: Container for the hfi packet for which this ack is received
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_receive_ack_cmd(struct a6xx_gmu_device *gmu, void *rcvd,
+	struct pending_cmd *ret_cmd);
+
+/**
+ * a6xx_hfi_send_feature_ctrl - Enable gmu feature via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @feature: feature to be enabled or disabled
+ * enable: Set 1 to enable or 0 to disable a feature
+ * @data: payload for the send feature hfi packet
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_feature_ctrl(struct adreno_device *adreno_dev,
+	u32 feature, u32 enable, u32 data);
+
+/**
+ * a6xx_hfi_send_set_value - Send gmu set_values via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @type: GMU set_value type
+ * @subtype: GMU set_value subtype
+ * @data: Value to set
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_set_value(struct adreno_device *adreno_dev,
+		u32 type, u32 subtype, u32 data);
+
+/**
+ * a6xx_hfi_send_core_fw_start - Send the core fw start hfi
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_core_fw_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hfi_send_acd_feature_ctrl - Send the acd table and acd feature
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hfi_send_lm_feature_ctrl -  Send the lm feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_lm_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hfi_send_generic_req -  Send a generic hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ * @cmd: Pointer to the hfi packet header and data
+ * @size_bytes: Size of the command in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes);
+
+/**
+ * a6xx_hfi_send_bcl_feature_ctrl -  Send the bcl feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev);
+
+/*
+ * a6xx_hfi_process_queue - Check hfi queue for messages from gmu
+ * @gmu: Pointer to the a6xx gmu device
+ * @queue_idx: queue id to be processed
+ * @ret_cmd: Container for data needed for waiting for the ack
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_process_queue(struct a6xx_gmu_device *gmu,
+	u32 queue_idx, struct pending_cmd *ret_cmd);
+
+/**
+ * a6xx_hfi_cmdq_write - Write a command to command queue
+ * @adreno_dev: Pointer to the adreno device
+ * @msg: Data to be written to the queue
+ * @size_bytes: Size of the command in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes);
+void adreno_a6xx_receive_err_req(struct a6xx_gmu_device *gmu, void *rcvd);
+void adreno_a6xx_receive_debug_req(struct a6xx_gmu_device *gmu, void *rcvd);
+#endif

+ 1407 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_hwsched.c

@@ -0,0 +1,1407 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/interconnect.h>
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_a6xx_hwsched.h"
+#include "adreno_hfi.h"
+#include "adreno_snapshot.h"
+#include "kgsl_bus.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+
+static size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	struct kgsl_memdesc *rb = (struct kgsl_memdesc *)priv;
+
+	if (remain < rb->size + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "RB");
+		return 0;
+	}
+
+	header->start = 0;
+	header->end = rb->size >> 2;
+	header->rptr = 0;
+	header->rbsize = rb->size >> 2;
+	header->count = rb->size >> 2;
+	header->timestamp_queued = 0;
+	header->timestamp_retired = 0;
+	header->gpuaddr = rb->gpuaddr;
+	header->id = 0;
+
+	memcpy(data, rb->hostptr, rb->size);
+
+	return rb->size + sizeof(*header);
+}
+
+static void a6xx_hwsched_snapshot_preemption_record(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset)
+{
+	struct kgsl_snapshot_section_header *section_header =
+		(struct kgsl_snapshot_section_header *)snapshot->ptr;
+	u8 *dest = snapshot->ptr + sizeof(*section_header);
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)dest;
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(ADRENO_DEVICE(device));
+	u64 ctxt_record_size = A6XX_CP_CTXRECORD_SIZE_IN_BYTES;
+	size_t section_size;
+
+	if (a6xx_core->ctxt_record_size)
+		ctxt_record_size = a6xx_core->ctxt_record_size;
+
+	ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size);
+
+	section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size;
+
+	if (snapshot->remain < section_size) {
+		SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD");
+		return;
+	}
+
+	section_header->magic = SNAPSHOT_SECTION_MAGIC;
+	section_header->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2;
+	section_header->size = section_size;
+
+	header->size = ctxt_record_size >> 2;
+	header->gpuaddr = md->gpuaddr + offset;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable);
+	header->type = SNAPSHOT_GPU_OBJECT_GLOBAL;
+
+	dest += sizeof(*header);
+
+	memcpy(dest, md->hostptr + offset, ctxt_record_size);
+
+	snapshot->ptr += section_header->size;
+	snapshot->remain -= section_header->size;
+	snapshot->size += section_header->size;
+}
+
+static void snapshot_preemption_records(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md)
+{
+	const struct adreno_a6xx_core *a6xx_core =
+		to_a6xx_core(ADRENO_DEVICE(device));
+	u64 ctxt_record_size = A6XX_CP_CTXRECORD_SIZE_IN_BYTES;
+	u64 offset;
+
+	if (a6xx_core->ctxt_record_size)
+		ctxt_record_size = a6xx_core->ctxt_record_size;
+
+	/* All preemption records exist as a single mem alloc entry */
+	for (offset = 0; offset < md->size; offset += ctxt_record_size)
+		a6xx_hwsched_snapshot_preemption_record(device, snapshot, md,
+			offset);
+}
+
+static void *get_rb_hostptr(struct adreno_device *adreno_dev,
+	u64 gpuaddr, u32 size)
+{
+	struct a6xx_hwsched_hfi *hw_hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	u64 offset;
+	u32 i;
+
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct kgsl_memdesc *md = hw_hfi->mem_alloc_table[i].md;
+
+		if (md && (gpuaddr >= md->gpuaddr) &&
+			((gpuaddr + size) <= (md->gpuaddr + md->size))) {
+			offset = gpuaddr - md->gpuaddr;
+			return md->hostptr + offset;
+		}
+	}
+	return NULL;
+}
+
+static u32 a6xx_copy_gpu_global(void *out, void *in, u32 size)
+{
+	if (out && in) {
+		memcpy(out, in, size);
+		return size;
+	}
+
+	return 0;
+}
+
+
+static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot, struct payload_section *payload)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_snapshot_section_header *section_header =
+		(struct kgsl_snapshot_section_header *)snapshot->ptr;
+	u8 *buf = snapshot->ptr + sizeof(*section_header);
+	struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	u32 size = adreno_hwsched_parse_payload(payload, KEY_RB_SIZEDWORDS) << 2;
+	u64 lo, hi, gpuaddr;
+	void *rb_hostptr;
+	char str[16];
+
+	lo = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_LO);
+	hi = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_HI);
+	gpuaddr = hi << 32 | lo;
+
+	/* Sanity check to make sure there is enough for the header */
+	if (snapshot->remain < sizeof(*section_header))
+		goto err;
+
+	rb_hostptr = get_rb_hostptr(adreno_dev, gpuaddr, size);
+
+	/* If the gpuaddress and size don't match any allocation, then abort */
+	if (((snapshot->remain - sizeof(*section_header)) <
+	    (size + sizeof(*header))) ||
+	    !a6xx_copy_gpu_global(data, rb_hostptr, size))
+		goto err;
+
+	if (device->dump_all_ibs) {
+		u64 rbaddr;
+
+		kgsl_regread64(device, A6XX_CP_RB_BASE,
+			       A6XX_CP_RB_BASE_HI, &rbaddr);
+
+		/* Parse all IBs from current RB */
+		if (rbaddr == gpuaddr)
+			adreno_snapshot_dump_all_ibs(device, rb_hostptr, snapshot);
+	}
+
+	header->start = 0;
+	header->end = size >> 2;
+	header->rptr = adreno_hwsched_parse_payload(payload, KEY_RB_RPTR);
+	header->wptr = adreno_hwsched_parse_payload(payload, KEY_RB_WPTR);
+	header->rbsize = size >> 2;
+	header->count = size >> 2;
+	header->timestamp_queued = adreno_hwsched_parse_payload(payload,
+			KEY_RB_QUEUED_TS);
+	header->timestamp_retired = adreno_hwsched_parse_payload(payload,
+			KEY_RB_RETIRED_TS);
+	header->gpuaddr = gpuaddr;
+	header->id = adreno_hwsched_parse_payload(payload, KEY_RB_ID);
+
+	section_header->magic = SNAPSHOT_SECTION_MAGIC;
+	section_header->id = KGSL_SNAPSHOT_SECTION_RB_V2;
+	section_header->size = size + sizeof(*header) + sizeof(*section_header);
+
+	snapshot->ptr += section_header->size;
+	snapshot->remain -= section_header->size;
+	snapshot->size += section_header->size;
+
+	return;
+err:
+	snprintf(str, sizeof(str), "RB addr:0x%llx", gpuaddr);
+	SNAPSHOT_ERR_NOMEM(device, str);
+}
+
+static bool parse_payload_rb_legacy(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+	bool ret = false;
+
+	/* Skip if we didn't receive a context bad HFI */
+	if (!cmd->hdr)
+		return false;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd_legacy, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == PAYLOAD_RB) {
+			adreno_hwsched_snapshot_rb_payload(adreno_dev, snapshot, payload);
+			ret = true;
+		}
+
+		i += sizeof(*payload) + (payload->dwords << 2);
+	}
+
+	return ret;
+}
+
+static bool parse_payload_rb(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+	bool ret = false;
+
+	/* Skip if we didn't receive a context bad HFI */
+	if (!cmd->hdr)
+		return false;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == PAYLOAD_RB) {
+			adreno_hwsched_snapshot_rb_payload(adreno_dev,
+							   snapshot, payload);
+			ret = true;
+		}
+
+		i += sizeof(*payload) + (payload->dwords << 2);
+	}
+
+	return ret;
+}
+
+void a6xx_hwsched_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_hwsched_hfi *hw_hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	u32 i;
+	bool skip_memkind_rb = false;
+	bool parse_payload;
+
+	a6xx_gmu_snapshot(adreno_dev, snapshot);
+
+	adreno_hwsched_parse_fault_cmdobj(adreno_dev, snapshot);
+
+	/*
+	 * First try to dump ringbuffers using context bad HFI payloads
+	 * because they have all the ringbuffer parameters. If ringbuffer
+	 * payloads are not present, fall back to dumping ringbuffers
+	 * based on MEMKIND_RB
+	 */
+	if (GMU_VER_MINOR(gmu->ver.hfi) < 2)
+		parse_payload = parse_payload_rb_legacy(adreno_dev, snapshot);
+	else
+		parse_payload = parse_payload_rb(adreno_dev, snapshot);
+
+	if (parse_payload)
+		skip_memkind_rb = true;
+
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i];
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_RB && !skip_memkind_rb)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_RB_V2,
+				snapshot, adreno_hwsched_snapshot_rb,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_SCRATCH)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, adreno_snapshot_global,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_PROFILE)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, adreno_snapshot_global,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_CSW_SMMU_INFO)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, adreno_snapshot_global,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_CSW_PRIV_NON_SECURE)
+			snapshot_preemption_records(device, snapshot,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_PREEMPT_SCRATCH)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, adreno_snapshot_global,
+				entry->md);
+	}
+}
+
+static int a6xx_hwsched_gmu_first_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int level, ret = 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE);
+
+	a6xx_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled);
+
+	ret = kgsl_pwrctrl_enable_cx_gdsc(device);
+	if (ret)
+		return ret;
+
+	ret = a6xx_gmu_enable_clks(adreno_dev, GMU_MAX_PWRLEVELS - 1);
+	if (ret)
+		goto gdsc_off;
+
+	ret = a6xx_gmu_load_fw(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = a6xx_gmu_itcm_shadow(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) {
+		ret = a6xx_load_pdc_ucode(adreno_dev);
+		if (ret)
+			goto clks_gdsc_off;
+
+		a6xx_load_rsc_ucode(adreno_dev);
+		set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags);
+	}
+
+	a6xx_gmu_register_config(adreno_dev);
+
+	a6xx_gmu_version_info(adreno_dev);
+
+	if (GMU_VER_MINOR(gmu->ver.hfi) < 2)
+		set_bit(ADRENO_HWSCHED_CTX_BAD_LEGACY, &adreno_dev->hwsched.flags);
+
+	a6xx_gmu_irq_enable(adreno_dev);
+
+	/* Vote for minimal DDR BW for GMU to init */
+	level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min;
+
+	icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level]));
+
+	/* Clear any hwsched faults that might have been left over */
+	adreno_hwsched_clear_fault(adreno_dev);
+
+	ret = a6xx_gmu_device_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = a6xx_hwsched_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	icc_set_bw(pwr->icc_path, 0, 0);
+
+	device->gmu_fault = false;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+
+	return 0;
+
+err:
+	a6xx_gmu_irq_disable(adreno_dev);
+
+	if (device->gmu_fault) {
+		a6xx_gmu_suspend(adreno_dev);
+
+		return ret;
+	}
+
+clks_gdsc_off:
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+gdsc_off:
+	a6xx_gmu_disable_gdsc(adreno_dev);
+
+	a6xx_rdpm_cx_freq_update(gmu, 0);
+
+	return ret;
+}
+
+static int a6xx_hwsched_gmu_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret = 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE);
+
+	ret = kgsl_pwrctrl_enable_cx_gdsc(device);
+	if (ret)
+		return ret;
+
+	ret = a6xx_gmu_enable_clks(adreno_dev, GMU_MAX_PWRLEVELS - 1);
+	if (ret)
+		goto gdsc_off;
+
+	ret = a6xx_rscc_wakeup_sequence(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = a6xx_gmu_load_fw(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	a6xx_gmu_register_config(adreno_dev);
+
+	a6xx_gmu_irq_enable(adreno_dev);
+
+	/* Clear any hwsched faults that might have been left over */
+	adreno_hwsched_clear_fault(adreno_dev);
+
+	ret = a6xx_gmu_device_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = a6xx_hwsched_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	device->gmu_fault = false;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+
+	return 0;
+err:
+	a6xx_gmu_irq_disable(adreno_dev);
+
+	if (device->gmu_fault) {
+		a6xx_gmu_suspend(adreno_dev);
+
+		return ret;
+	}
+
+clks_gdsc_off:
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+gdsc_off:
+	a6xx_gmu_disable_gdsc(adreno_dev);
+
+	a6xx_rdpm_cx_freq_update(gmu, 0);
+
+	return ret;
+}
+
+void a6xx_hwsched_active_count_put(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return;
+
+	if (WARN(atomic_read(&device->active_cnt) == 0,
+		"Unbalanced get/put calls to KGSL active count\n"))
+		return;
+
+	if (atomic_dec_and_test(&device->active_cnt)) {
+		kgsl_pwrscale_update_stats(device);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+	}
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	wake_up(&device->active_cnt_wq);
+}
+
+static int a6xx_hwsched_notify_slumber(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_prep_slumber_cmd req;
+	int ret;
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_PREPARE_SLUMBER);
+	if (ret)
+		return ret;
+
+	req.freq = gmu->hfi.dcvs_table.gpu_level_num -
+			pwr->default_pwrlevel - 1;
+	req.bw = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq;
+
+	/* Disable the power counter so that the GMU is not busy */
+	gmu_core_regwrite(device, A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0);
+
+	ret = a6xx_hfi_send_cmd_async(adreno_dev, &req, sizeof(req));
+
+	/*
+	 * GEMNOC can enter power collapse state during GPU power down sequence.
+	 * This could abort CX GDSC collapse. Assert Qactive to avoid this.
+	 */
+	if ((adreno_is_a662(adreno_dev) || adreno_is_a621(adreno_dev) ||
+			adreno_is_a635(adreno_dev)))
+		gmu_core_regwrite(device, A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1);
+
+	return ret;
+}
+static int a6xx_hwsched_gmu_power_off(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret = 0;
+
+	if (device->gmu_fault)
+		goto error;
+
+	/* Wait for the lowest idle level we requested */
+	ret = a6xx_gmu_wait_for_lowest_idle(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = a6xx_hwsched_notify_slumber(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = a6xx_gmu_wait_for_idle(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = a6xx_rscc_sleep_sequence(adreno_dev);
+
+	a6xx_rdpm_mx_freq_update(gmu, 0);
+
+	/* Now that we are done with GMU and GPU, Clear the GBIF */
+	ret = a6xx_halt_gbif(adreno_dev);
+	/* De-assert the halts */
+	kgsl_regwrite(device, A6XX_GBIF_HALT, 0x0);
+
+	a6xx_gmu_irq_disable(adreno_dev);
+
+	a6xx_hwsched_hfi_stop(adreno_dev);
+
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+	a6xx_gmu_disable_gdsc(adreno_dev);
+
+	a6xx_rdpm_cx_freq_update(gmu, 0);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE);
+
+	return ret;
+
+error:
+	a6xx_gmu_irq_disable(adreno_dev);
+	a6xx_hwsched_hfi_stop(adreno_dev);
+	a6xx_gmu_suspend(adreno_dev);
+
+	return ret;
+}
+
+static int a6xx_hwsched_gpu_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	ret = kgsl_mmu_start(device);
+	if (ret)
+		goto err;
+
+	ret = a6xx_gmu_oob_set(device, oob_gpu);
+	if (ret)
+		goto err;
+
+	/* Clear the busy_data stats - we're starting over from scratch */
+	memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data));
+
+	/* Restore performance counter registers with saved values */
+	adreno_perfcounter_restore(adreno_dev);
+
+	a6xx_start(adreno_dev);
+
+	/* Re-initialize the coresight registers if applicable */
+	adreno_coresight_start(adreno_dev);
+
+	adreno_perfcounter_start(adreno_dev);
+
+	/* Clear FSR here in case it is set from a previous pagefault */
+	kgsl_mmu_clear_fsr(&device->mmu);
+
+	a6xx_enable_gpu_irq(adreno_dev);
+
+	ret = a6xx_hwsched_cp_init(adreno_dev);
+	if (ret) {
+		a6xx_disable_gpu_irq(adreno_dev);
+		goto err;
+	}
+
+	/*
+	 * At this point it is safe to assume that we recovered. Setting
+	 * this field allows us to take a new snapshot for the next failure
+	 * if we are prioritizing the first unrecoverable snapshot.
+	 */
+	if (device->snapshot)
+		device->snapshot->recovered = true;
+
+	device->reset_counter++;
+err:
+	a6xx_gmu_oob_clear(device, oob_gpu);
+
+	if (ret)
+		a6xx_hwsched_gmu_power_off(adreno_dev);
+
+	return ret;
+}
+
+static void hwsched_idle_timer(struct timer_list *t)
+{
+	struct kgsl_device *device = container_of(t, struct kgsl_device,
+					idle_timer);
+
+	kgsl_schedule_work(&device->idle_check_ws);
+}
+
+static int a6xx_hwsched_gmu_memory_init(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	/* GMU Virtual register bank */
+	if (IS_ERR_OR_NULL(gmu->vrb)) {
+		gmu->vrb = reserve_gmu_kernel_block(gmu, 0, GMU_VRB_SIZE,
+				GMU_NONCACHED_KERNEL, 0);
+
+		if (IS_ERR(gmu->vrb))
+			return PTR_ERR(gmu->vrb);
+
+		/* Populate size of the virtual register bank */
+		gmu_core_set_vrb_register(gmu->vrb->hostptr, VRB_SIZE_IDX,
+					gmu->vrb->size >> 2);
+	}
+
+	/* GMU trace log */
+	if (IS_ERR_OR_NULL(gmu->trace.md)) {
+		gmu->trace.md = reserve_gmu_kernel_block(gmu, 0,
+					GMU_TRACE_SIZE, GMU_NONCACHED_KERNEL, 0);
+
+		if (IS_ERR(gmu->trace.md))
+			return PTR_ERR(gmu->trace.md);
+
+		/* Pass trace buffer address to GMU through the VRB */
+		gmu_core_set_vrb_register(gmu->vrb->hostptr,
+					VRB_TRACE_BUFFER_ADDR_IDX,
+					gmu->trace.md->gmuaddr);
+
+		/* Initialize the GMU trace buffer header */
+		gmu_core_trace_header_init(&gmu->trace);
+	}
+
+	return 0;
+}
+
+static int a6xx_hwsched_gmu_init(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = a6xx_gmu_parse_fw(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_gmu_memory_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_hwsched_gmu_memory_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	return a6xx_hwsched_hfi_init(adreno_dev);
+}
+
+static void a6xx_hwsched_touch_wakeup(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret;
+
+	/*
+	 * Do not wake up a suspended device or until the first boot sequence
+	 * has been completed.
+	 */
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags) ||
+		!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))
+		return;
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		goto done;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = a6xx_hwsched_gmu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	ret = a6xx_hwsched_gpu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	kgsl_pwrscale_wake(device);
+
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+done:
+	/*
+	 * When waking up from a touch event we want to stay active long enough
+	 * for the user to send a draw command.  The default idle timer timeout
+	 * is shorter than we want so go ahead and push the idle timer out
+	 * further for this special case
+	 */
+	mod_timer(&device->idle_timer, jiffies +
+		msecs_to_jiffies(adreno_wake_timeout));
+}
+
+static int a6xx_hwsched_boot(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	adreno_hwsched_start(adreno_dev);
+
+	if (IS_ENABLED(CONFIG_QCOM_KGSL_HIBERNATION) &&
+		!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags))
+		ret = a6xx_hwsched_gmu_first_boot(adreno_dev);
+	else
+		ret = a6xx_hwsched_gmu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_hwsched_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_start_idle_timer(device);
+
+	kgsl_pwrscale_wake(device);
+
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	return ret;
+}
+
+static int a6xx_hwsched_first_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))
+		return a6xx_hwsched_boot(adreno_dev);
+
+	if (adreno_preemption_feature_set(adreno_dev))
+		set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+
+	adreno_hwsched_start(adreno_dev);
+
+	ret = a6xx_microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_hwsched_gmu_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = a6xx_hwsched_gmu_first_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_hwsched_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	adreno_get_bus_counters(adreno_dev);
+
+	adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev,
+						 ADRENO_COOP_RESET);
+
+	set_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags);
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	/*
+	 * There is a possible deadlock scenario during kgsl firmware reading
+	 * (request_firmware) and devfreq update calls. During first boot, kgsl
+	 * device mutex is held and then request_firmware is called for reading
+	 * firmware. request_firmware internally takes dev_pm_qos_mtx lock.
+	 * Whereas in case of devfreq update calls triggered by thermal/bcl or
+	 * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then
+	 * tries to take kgsl device mutex as part of get_dev_status/target
+	 * calls. This results in deadlock when both thread are unable to acquire
+	 * the mutex held by other thread. Enable devfreq updates now as we are
+	 * done reading all firmware files.
+	 */
+	device->pwrscale.devfreq_enabled = true;
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	return 0;
+}
+
+static int a6xx_hwsched_power_off(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret;
+
+	if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER);
+
+	ret = a6xx_gmu_oob_set(device, oob_gpu);
+	if (ret) {
+		a6xx_gmu_oob_clear(device, oob_gpu);
+		goto no_gx_power;
+	}
+
+	kgsl_pwrscale_update_stats(device);
+
+	/* Save active coresight registers if applicable */
+	adreno_coresight_stop(adreno_dev);
+
+	/* Save physical performance counter values before GPU power down*/
+	adreno_perfcounter_save(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 0);
+
+	a6xx_gmu_oob_clear(device, oob_gpu);
+
+no_gx_power:
+	kgsl_pwrctrl_irq(device, false);
+
+	a6xx_hwsched_gmu_power_off(adreno_dev);
+
+	adreno_hwsched_unregister_contexts(adreno_dev);
+
+	adreno_llcc_slice_deactivate(adreno_dev);
+
+	clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	del_timer_sync(&device->idle_timer);
+
+	kgsl_pwrscale_sleep(device);
+
+	kgsl_pwrctrl_clear_l3_vote(device);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
+
+	return ret;
+}
+
+static void hwsched_idle_check(struct work_struct *work)
+{
+	struct kgsl_device *device = container_of(work,
+					struct kgsl_device, idle_check_ws);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	mutex_lock(&device->mutex);
+
+	if (test_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags))
+		goto done;
+
+	if (atomic_read(&device->active_cnt) || time_is_after_jiffies(device->idle_jiffies)) {
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	spin_lock(&device->submit_lock);
+	if (device->submit_now) {
+		spin_unlock(&device->submit_lock);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	device->skip_inline_submit = true;
+	spin_unlock(&device->submit_lock);
+
+	if (!a6xx_hw_isidle(adreno_dev)) {
+		dev_err(device->dev, "GPU isn't idle before SLUMBER\n");
+		gmu_core_fault_snapshot(device);
+	}
+
+	a6xx_hwsched_power_off(adreno_dev);
+
+done:
+	mutex_unlock(&device->mutex);
+}
+
+static int a6xx_hwsched_first_open(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	/*
+	 * Do the one time settings that need to happen when we
+	 * attempt to boot the gpu the very first time
+	 */
+	ret = a6xx_hwsched_first_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	/*
+	 * A client that does a first_open but never closes the device
+	 * may prevent us from going back to SLUMBER. So trigger the idle
+	 * check by incrementing the active count and immediately releasing it.
+	 */
+	atomic_inc(&device->active_cnt);
+	a6xx_hwsched_active_count_put(adreno_dev);
+
+	return 0;
+}
+
+int a6xx_hwsched_active_count_get(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret = 0;
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EINVAL;
+
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags))
+		return -EINVAL;
+
+	if ((atomic_read(&device->active_cnt) == 0))
+		ret = a6xx_hwsched_boot(adreno_dev);
+
+	if (ret == 0)
+		atomic_inc(&device->active_cnt);
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	return ret;
+}
+
+static int a6xx_hwsched_dcvs_set(struct adreno_device *adreno_dev,
+		int gpu_pwrlevel, int bus_level)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table;
+	struct hfi_gx_bw_perf_vote_cmd req = {
+		.ack_type = DCVS_ACK_BLOCK,
+		.freq = INVALID_DCVS_IDX,
+		.bw = INVALID_DCVS_IDX,
+	};
+	int ret;
+
+	if (!test_bit(GMU_PRIV_HFI_STARTED, &gmu->flags))
+		return 0;
+
+	/* Do not set to XO and lower GPU clock vote from GMU */
+	if ((gpu_pwrlevel != INVALID_DCVS_IDX) &&
+			(gpu_pwrlevel >= table->gpu_level_num - 1)) {
+		dev_err(&gmu->pdev->dev, "Invalid gpu dcvs request: %d\n",
+			gpu_pwrlevel);
+		return -EINVAL;
+	}
+
+	if (gpu_pwrlevel < table->gpu_level_num - 1)
+		req.freq = table->gpu_level_num - gpu_pwrlevel - 1;
+
+	if (bus_level < pwr->ddr_table_count && bus_level > 0)
+		req.bw = bus_level;
+
+	/* GMU will vote for slumber levels through the sleep sequence */
+	if ((req.freq == INVALID_DCVS_IDX) && (req.bw == INVALID_DCVS_IDX))
+		return 0;
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_GX_BW_PERF_VOTE);
+	if (ret)
+		return ret;
+
+	ret = a6xx_hfi_send_cmd_async(adreno_dev, &req, sizeof(req));
+
+	if (ret) {
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"Failed to set GPU perf idx %u, bw idx %u\n",
+			req.freq, req.bw);
+
+		/*
+		 * If this was a dcvs request along side an active gpu, request
+		 * dispatcher based reset and recovery.
+		 */
+		if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+			adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+	}
+
+	if (req.freq != INVALID_DCVS_IDX)
+		a6xx_rdpm_mx_freq_update(gmu,
+			gmu->hfi.dcvs_table.gx_votes[req.freq].freq);
+
+	return ret;
+}
+
+static int a6xx_hwsched_clock_set(struct adreno_device *adreno_dev,
+	u32 pwrlevel)
+{
+	return a6xx_hwsched_dcvs_set(adreno_dev, pwrlevel, INVALID_DCVS_IDX);
+}
+
+static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	static unsigned long prev_freq;
+	unsigned long freq = gmu->freqs[0];
+
+	if (!gmu->perf_ddr_bw)
+		return;
+
+	/*
+	 * Scale the GMU if DDR is at a CX corner at which GMU can run at
+	 * a higher frequency
+	 */
+	if (pwr->ddr_table[buslevel] >= gmu->perf_ddr_bw)
+		freq = gmu->freqs[GMU_MAX_PWRLEVELS - 1];
+
+	if (prev_freq == freq)
+		return;
+
+	if (kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", freq)) {
+		dev_err(&gmu->pdev->dev, "Unable to set the GMU clock to %ld\n",
+			freq);
+		return;
+	}
+
+	a6xx_rdpm_cx_freq_update(gmu, freq / 1000);
+
+	trace_kgsl_gmu_pwrlevel(freq, prev_freq);
+
+	prev_freq = freq;
+}
+
+static int a6xx_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel,
+	u32 ab)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int ret = 0;
+
+	kgsl_icc_set_tag(pwr, buslevel);
+
+	if (buslevel != pwr->cur_buslevel) {
+		ret = a6xx_hwsched_dcvs_set(adreno_dev, INVALID_DCVS_IDX,
+				buslevel);
+		if (ret)
+			return ret;
+
+		scale_gmu_frequency(adreno_dev, buslevel);
+
+		pwr->cur_buslevel = buslevel;
+	}
+
+	if (ab != pwr->cur_ab) {
+		icc_set_bw(pwr->icc_path, MBps_to_icc(ab), 0);
+		pwr->cur_ab = ab;
+	}
+
+	trace_kgsl_buslevel(device, pwr->active_pwrlevel, pwr->cur_buslevel, pwr->cur_ab);
+	return ret;
+}
+
+static int a6xx_hwsched_pm_suspend(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SUSPEND);
+
+	/* Halt any new submissions */
+	reinit_completion(&device->halt_gate);
+
+	/**
+	 * Wait for the dispatcher to retire everything by waiting
+	 * for the active count to go to zero.
+	 */
+	ret = kgsl_active_count_wait(device, 0, msecs_to_jiffies(100));
+	if (ret) {
+		dev_err(device->dev, "Timed out waiting for the active count\n");
+		goto err;
+	}
+
+	ret = adreno_hwsched_idle(adreno_dev);
+	if (ret)
+		goto err;
+
+	a6xx_hwsched_power_off(adreno_dev);
+
+	adreno_get_gpu_halt(adreno_dev);
+
+	set_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND);
+
+	return 0;
+
+err:
+	adreno_hwsched_start(adreno_dev);
+
+	return ret;
+}
+
+void a6xx_hwsched_handle_watchdog(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 mask;
+
+	/* Temporarily mask the watchdog interrupt to prevent a storm */
+	gmu_core_regread(device, A6XX_GMU_AO_HOST_INTERRUPT_MASK,
+		&mask);
+	gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_MASK,
+			(mask | GMU_INT_WDOG_BITE));
+
+	a6xx_gmu_send_nmi(device, false);
+
+	dev_err_ratelimited(&gmu->pdev->dev,
+			"GMU watchdog expired interrupt received\n");
+
+	adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+}
+
+static void a6xx_hwsched_pm_resume(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags),
+		"resume invoked without a suspend\n"))
+		return;
+
+	adreno_put_gpu_halt(adreno_dev);
+
+	adreno_hwsched_start(adreno_dev);
+
+	clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
+}
+
+static void a6xx_hwsched_drain_ctxt_unregister(struct adreno_device *adreno_dev)
+{
+	struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	struct pending_cmd *cmd = NULL;
+
+	read_lock(&hfi->msglock);
+
+	list_for_each_entry(cmd, &hfi->msglist, node) {
+		if (MSG_HDR_GET_ID(cmd->sent_hdr) == H2F_MSG_UNREGISTER_CONTEXT)
+			complete(&cmd->complete);
+	}
+
+	read_unlock(&hfi->msglock);
+}
+
+int a6xx_hwsched_reset_replay(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret;
+
+	/*
+	 * Any pending context unregister packets will be lost
+	 * since we hard reset the GMU. This means any threads waiting
+	 * for context unregister hfi ack will timeout. Wake them
+	 * to avoid false positive ack timeout messages later.
+	 */
+	a6xx_hwsched_drain_ctxt_unregister(adreno_dev);
+
+	adreno_hwsched_unregister_contexts(adreno_dev);
+
+	if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return 0;
+
+	a6xx_disable_gpu_irq(adreno_dev);
+
+	a6xx_gmu_irq_disable(adreno_dev);
+
+	a6xx_hwsched_hfi_stop(adreno_dev);
+
+	a6xx_gmu_suspend(adreno_dev);
+
+	adreno_llcc_slice_deactivate(adreno_dev);
+
+	clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	ret = a6xx_hwsched_boot(adreno_dev);
+
+	if (!ret)
+		adreno_hwsched_replay(adreno_dev);
+
+	BUG_ON(ret);
+
+	return ret;
+}
+
+const struct adreno_power_ops a6xx_hwsched_power_ops = {
+	.first_open = a6xx_hwsched_first_open,
+	.last_close = a6xx_hwsched_power_off,
+	.active_count_get = a6xx_hwsched_active_count_get,
+	.active_count_put = a6xx_hwsched_active_count_put,
+	.touch_wakeup = a6xx_hwsched_touch_wakeup,
+	.pm_suspend = a6xx_hwsched_pm_suspend,
+	.pm_resume = a6xx_hwsched_pm_resume,
+	.gpu_clock_set = a6xx_hwsched_clock_set,
+	.gpu_bus_set = a6xx_hwsched_bus_set,
+};
+
+const struct adreno_hwsched_ops a6xx_hwsched_ops = {
+	.submit_drawobj = a6xx_hwsched_submit_drawobj,
+	.preempt_count = a6xx_hwsched_preempt_count_get,
+};
+
+int a6xx_hwsched_probe(struct platform_device *pdev,
+	u32 chipid, const struct adreno_gpu_core *gpucore)
+{
+	struct adreno_device *adreno_dev;
+	struct kgsl_device *device;
+	struct a6xx_hwsched_device *a6xx_hwsched_dev;
+	int ret;
+
+	a6xx_hwsched_dev = devm_kzalloc(&pdev->dev, sizeof(*a6xx_hwsched_dev),
+				GFP_KERNEL);
+	if (!a6xx_hwsched_dev)
+		return -ENOMEM;
+
+	adreno_dev = &a6xx_hwsched_dev->a6xx_dev.adreno_dev;
+
+	adreno_dev->hwsched_enabled = true;
+
+	adreno_dev->irq_mask = A6XX_HWSCHED_INT_MASK;
+
+	ret = a6xx_probe_common(pdev, adreno_dev, chipid, gpucore);
+	if (ret)
+		return ret;
+
+	device = KGSL_DEVICE(adreno_dev);
+
+	INIT_WORK(&device->idle_check_ws, hwsched_idle_check);
+
+	timer_setup(&device->idle_timer, hwsched_idle_timer, 0);
+
+	return adreno_hwsched_init(adreno_dev, &a6xx_hwsched_ops);
+}
+
+int a6xx_hwsched_add_to_minidump(struct adreno_device *adreno_dev)
+{
+	struct a6xx_device *a6xx_dev = container_of(adreno_dev,
+					struct a6xx_device, adreno_dev);
+	struct a6xx_hwsched_device *a6xx_hwsched = container_of(a6xx_dev,
+					struct a6xx_hwsched_device, a6xx_dev);
+	struct a6xx_hwsched_hfi *hw_hfi = &a6xx_hwsched->hwsched_hfi;
+	int ret, i;
+
+	ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HWSCHED_DEVICE,
+			(void *)(a6xx_hwsched), sizeof(struct a6xx_hwsched_device));
+	if (ret)
+		return ret;
+
+	if (!IS_ERR_OR_NULL(a6xx_dev->gmu.gmu_log)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_GMU_LOG_ENTRY,
+					a6xx_dev->gmu.gmu_log->hostptr,
+					a6xx_dev->gmu.gmu_log->size);
+		if (ret)
+			return ret;
+	}
+
+	if (!IS_ERR_OR_NULL(a6xx_dev->gmu.hfi.hfi_mem)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_HFIMEM_ENTRY,
+					a6xx_dev->gmu.hfi.hfi_mem->hostptr,
+					a6xx_dev->gmu.hfi.hfi_mem->size);
+		if (ret)
+			return ret;
+	}
+
+	if (!IS_ERR_OR_NULL(a6xx_dev->gmu.vrb)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_GMU_VRB_ENTRY,
+					a6xx_dev->gmu.vrb->hostptr,
+					a6xx_dev->gmu.vrb->size);
+		if (ret)
+			return ret;
+	}
+
+	if (!IS_ERR_OR_NULL(a6xx_dev->gmu.dump_mem)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_GMU_DUMPMEM_ENTRY,
+					a6xx_dev->gmu.dump_mem->hostptr,
+					a6xx_dev->gmu.dump_mem->size);
+		if (ret)
+			return ret;
+	}
+
+	if (!IS_ERR_OR_NULL(a6xx_dev->gmu.trace.md)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_GMU_TRACE_ENTRY,
+					a6xx_dev->gmu.trace.md->hostptr,
+					a6xx_dev->gmu.trace.md->size);
+		if (ret)
+			return ret;
+	}
+
+	/* Dump HFI hwsched global mem alloc entries */
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i];
+		char hfi_minidump_str[MAX_VA_MINIDUMP_STR_LEN] = {0};
+		u32 rb_id = 0;
+
+		if (!hfi_get_minidump_string(entry->desc.mem_kind,
+					     &hfi_minidump_str[0],
+					     sizeof(hfi_minidump_str), &rb_id)) {
+			ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+						      hfi_minidump_str,
+						      entry->md->hostptr,
+						      entry->md->size);
+			if (ret)
+				return ret;
+		}
+	}
+
+	if (!IS_ERR_OR_NULL(hw_hfi->big_ib)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					      KGSL_HFI_BIG_IB_ENTRY,
+					      hw_hfi->big_ib->hostptr,
+					      hw_hfi->big_ib->size);
+		if (ret)
+			return ret;
+	}
+
+	if (!IS_ERR_OR_NULL(hw_hfi->big_ib_recurring))
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					      KGSL_HFI_BIG_IB_REC_ENTRY,
+					      hw_hfi->big_ib_recurring->hostptr,
+					      hw_hfi->big_ib_recurring->size);
+
+	return ret;
+}

+ 100 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_hwsched.h

@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_A6XX_HWSCHED_H_
+#define _ADRENO_A6XX_HWSCHED_H_
+
+#include "adreno_a6xx_hwsched_hfi.h"
+
+/**
+ * struct a6xx_hwsched_device - Container for the a6xx hwscheduling device
+ */
+struct a6xx_hwsched_device {
+	/** @a6xx_dev: Container for the a6xx device */
+	struct a6xx_device a6xx_dev;
+	/** @hwsched_hfi: Container for hwscheduling specific hfi resources */
+	struct a6xx_hwsched_hfi hwsched_hfi;
+};
+
+/**
+ * a6xx_hwsched_probe - Target specific probe for hwsched
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for hwsched enabled gmu targets.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hwsched_probe(struct platform_device *pdev,
+	u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * a6xx_hwsched_reset_replay - Restart the gmu and gpu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Restart the GMU and GPU and replay the inflight commands
+
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hwsched_reset_replay(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_snapshot - take a6xx hwsched snapshot
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * Snapshot the faulty ib and then snapshot rest of a6xx gmu things
+ */
+void a6xx_hwsched_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot);
+
+/**
+ * a6xx_hwsched_handle_watchdog - Handle watchdog interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_hwsched_handle_watchdog(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_active_count_get - Increment the active count
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function increments the active count. If active count
+ * is 0, this function also powers up the device.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hwsched_active_count_get(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_active_count_put - Put back the active count
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function decrements the active count sets the idle
+ * timer if active count is zero.
+ */
+void a6xx_hwsched_active_count_put(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_add_to_minidump - Register hwsched_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int a6xx_hwsched_add_to_minidump(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_send_recurring_cmdobj - Dispatch IBs to GMU
+ * @adreno_dev: Pointer to adreno device structure
+ * @cmdobj: The command object which needs to be submitted
+ *
+ * This function is used to register the context if needed and submit
+ * recurring IBs to the GMU. Upon receiving ipc interrupt GMU will submit
+ * recurring IBs to GPU.
+
+ * Return: 0 on success and negative error on failure
+ */
+int a6xx_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj);
+#endif

+ 2162 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_hwsched_hfi.c

@@ -0,0 +1,2162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/iommu.h>
+#include <linux/sched/clock.h>
+#include <soc/qcom/msm_performance.h>
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_a6xx_hwsched.h"
+#include "adreno_hfi.h"
+#include "adreno_perfcounter.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+#include "kgsl_device.h"
+#include "kgsl_eventlog.h"
+#include "kgsl_pwrctrl.h"
+#include "kgsl_trace.h"
+#include "kgsl_util.h"
+
+#define HFI_QUEUE_MAX (HFI_QUEUE_DEFAULT_CNT + HFI_QUEUE_DISPATCH_MAX_CNT)
+
+#define DEFINE_QHDR(gmuaddr, id, prio) \
+	{\
+		.status = 1, \
+		.start_addr = GMU_QUEUE_START_ADDR(gmuaddr, id), \
+		.type = QUEUE_HDR_TYPE(id, prio, 0, 0), \
+		.queue_size = SZ_4K >> 2, \
+		.msg_size = 0, \
+		.unused0 = 0, \
+		.unused1 = 0, \
+		.unused2 = 0, \
+		.unused3 = 0, \
+		.unused4 = 0, \
+		.read_index = 0, \
+		.write_index = 0, \
+}
+
+static struct dq_info {
+	/** @max_dq: Maximum number of dispatch queues per RB level */
+	u32 max_dq;
+	/** @base_dq_id: Base dqid for level */
+	u32 base_dq_id;
+	/** @offset: Next dqid to use for roundrobin context assignment */
+	u32 offset;
+} a6xx_hfi_dqs[KGSL_PRIORITY_MAX_RB_LEVELS] = {
+	{ 4, 0, }, /* RB0 */
+	{ 4, 4, }, /* RB1 */
+	{ 3, 8, }, /* RB2 */
+	{ 3, 11, }, /* RB3 */
+};
+
+static int a6xx_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, uint32_t queue_idx,
+	uint32_t *msg, u32 size_bytes, struct kgsl_drawobj_cmd *cmdobj,
+	struct adreno_submit_time *time);
+
+struct a6xx_hwsched_hfi *to_a6xx_hwsched_hfi(
+	struct adreno_device *adreno_dev)
+{
+	struct a6xx_device *a6xx_dev = container_of(adreno_dev,
+					struct a6xx_device, adreno_dev);
+	struct a6xx_hwsched_device *a6xx_hwsched = container_of(a6xx_dev,
+					struct a6xx_hwsched_device, a6xx_dev);
+
+	return &a6xx_hwsched->hwsched_hfi;
+}
+
+static void add_waiter(struct a6xx_hwsched_hfi *hfi, u32 hdr,
+	struct pending_cmd *ack)
+{
+	memset(ack, 0x0, sizeof(*ack));
+
+	init_completion(&ack->complete);
+	write_lock_irq(&hfi->msglock);
+	list_add_tail(&ack->node, &hfi->msglist);
+	write_unlock_irq(&hfi->msglock);
+
+	ack->sent_hdr = hdr;
+}
+
+static void del_waiter(struct a6xx_hwsched_hfi *hfi, struct pending_cmd *ack)
+{
+	write_lock_irq(&hfi->msglock);
+	list_del(&ack->node);
+	write_unlock_irq(&hfi->msglock);
+}
+
+static void a6xx_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	struct pending_cmd *cmd = NULL;
+	u32 waiters[64], num_waiters = 0, i;
+	u32 *ack = rcvd;
+	u32 hdr = ack[0];
+	u32 req_hdr = ack[1];
+	u32 size_bytes = MSG_HDR_GET_SIZE(hdr) << 2;
+
+	if (size_bytes > sizeof(cmd->results))
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"Ack result too big: %d Truncating to: %ld\n",
+			size_bytes, sizeof(cmd->results));
+
+	read_lock(&hfi->msglock);
+
+	list_for_each_entry(cmd, &hfi->msglist, node) {
+		if (CMP_HFI_ACK_HDR(cmd->sent_hdr, req_hdr)) {
+			memcpy(cmd->results, ack,
+				min_t(u32, size_bytes,
+					sizeof(cmd->results)));
+			complete(&cmd->complete);
+			read_unlock(&hfi->msglock);
+			return;
+		}
+
+		if (num_waiters < ARRAY_SIZE(waiters))
+			waiters[num_waiters++] = cmd->sent_hdr;
+	}
+
+	read_unlock(&hfi->msglock);
+
+	/* Didn't find the sender, list the waiter */
+	dev_err_ratelimited(&gmu->pdev->dev,
+		"Unexpectedly got id %d seqnum %d. Total waiters: %d Top %d Waiters:\n",
+		MSG_HDR_GET_ID(req_hdr), MSG_HDR_GET_SEQNUM(req_hdr),
+		num_waiters, min_t(u32, num_waiters, 5));
+
+	for (i = 0; i < num_waiters && i < 5; i++)
+		dev_err_ratelimited(&gmu->pdev->dev,
+			" id %d seqnum %d\n",
+			MSG_HDR_GET_ID(waiters[i]),
+			MSG_HDR_GET_SEQNUM(waiters[i]));
+}
+
+static void log_profiling_info(struct adreno_device *adreno_dev, u32 *rcvd)
+{
+	struct hfi_ts_retire_cmd *cmd = (struct hfi_ts_retire_cmd *)rcvd;
+	struct kgsl_context *context;
+	struct retire_info info = {0};
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	context = kgsl_context_get(device, cmd->ctxt_id);
+	if (context == NULL)
+		return;
+
+	/* protected GPU work must not be reported */
+	if  (!(context->flags & KGSL_CONTEXT_SECURE))
+		kgsl_work_period_update(device, context->proc_priv->period,
+					     cmd->active);
+
+	info.timestamp = cmd->ts;
+	info.rb_id = adreno_get_level(context);
+	info.gmu_dispatch_queue = context->gmu_dispatch_queue;
+	info.submitted_to_rb = cmd->submitted_to_rb;
+	info.sop = cmd->sop;
+	info.eop = cmd->eop;
+	if (GMU_VER_MINOR(gmu->ver.hfi) < 4)
+		info.active = cmd->eop - cmd->sop;
+	else
+		info.active = cmd->active;
+	info.retired_on_gmu = cmd->retired_on_gmu;
+
+	trace_adreno_cmdbatch_retired(context, &info, 0, 0, 0);
+
+	log_kgsl_cmdbatch_retired_event(context->id, cmd->ts, context->priority,
+		0, cmd->sop, cmd->eop);
+
+	kgsl_context_put(context);
+}
+
+/* Look up a particular key's value for a given type of payload */
+static u32 a6xx_hwsched_lookup_key_value_legacy(struct adreno_device *adreno_dev,
+	u32 type, u32 key)
+{
+	struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+
+	if (!cmd->hdr)
+		return 0;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd_legacy, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == type)
+			return adreno_hwsched_parse_payload(payload, key);
+
+		i += struct_size(payload, data, payload->dwords);
+	}
+
+	return 0;
+}
+
+static u32 get_payload_rb_key_legacy(struct adreno_device *adreno_dev,
+	u32 rb_id, u32 key)
+{
+	struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+
+	if (!cmd->hdr)
+		return 0;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd_legacy, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == PAYLOAD_RB) {
+			u32 id = adreno_hwsched_parse_payload(payload, KEY_RB_ID);
+
+			if (id == rb_id)
+				return adreno_hwsched_parse_payload(payload, key);
+		}
+
+		i += struct_size(payload, data, payload->dwords);
+	}
+
+	return 0;
+}
+
+static void log_gpu_fault_legacy(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct device *dev = &gmu->pdev->dev;
+	struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad;
+
+	switch (cmd->error) {
+	case GMU_GPU_HW_HANG:
+		dev_crit_ratelimited(dev, "MISC: GPU hang detected\n");
+		break;
+	case GMU_GPU_SW_HANG:
+		dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %u\n",
+			cmd->ctxt_id, cmd->ts);
+		break;
+	case GMU_CP_OPCODE_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP opcode error interrupt | opcode=0x%8.8x\n",
+			a6xx_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS,
+			KEY_CP_OPCODE_ERROR));
+		break;
+	case GMU_CP_PROTECTED_ERROR: {
+		u32 status = a6xx_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_PROTECTED_ERROR);
+
+		dev_crit_ratelimited(dev,
+			"CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n",
+			status & (1 << 20) ? "READ" : "WRITE",
+			status & 0x3FFFF, status);
+		}
+		break;
+	case GMU_CP_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "CP Illegal instruction error\n");
+		break;
+	case GMU_CP_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "CP ucode error interrupt\n");
+		break;
+	case GMU_CP_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP | Ringbuffer HW fault | status=0x%8.8x\n",
+			a6xx_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_HW_FAULT));
+		break;
+	case GMU_GPU_PREEMPT_TIMEOUT: {
+		u32 cur, next, cur_rptr, cur_wptr, next_rptr, next_wptr;
+
+		cur = a6xx_hwsched_lookup_key_value_legacy(adreno_dev,
+			PAYLOAD_PREEMPT_TIMEOUT, KEY_PREEMPT_TIMEOUT_CUR_RB_ID);
+		next = a6xx_hwsched_lookup_key_value_legacy(adreno_dev,
+			PAYLOAD_PREEMPT_TIMEOUT,
+			KEY_PREEMPT_TIMEOUT_NEXT_RB_ID);
+		cur_rptr = get_payload_rb_key_legacy(adreno_dev, cur, KEY_RB_RPTR);
+		cur_wptr = get_payload_rb_key_legacy(adreno_dev, cur, KEY_RB_WPTR);
+		next_rptr = get_payload_rb_key_legacy(adreno_dev, next, KEY_RB_RPTR);
+		next_wptr = get_payload_rb_key_legacy(adreno_dev, next, KEY_RB_WPTR);
+
+		dev_crit_ratelimited(dev,
+			"Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n",
+			cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr);
+		}
+		break;
+	case GMU_CP_GPC_ERROR:
+		dev_crit_ratelimited(dev, "RBBM: GPC error\n");
+		break;
+	default:
+		dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n",
+			cmd->error);
+		break;
+	}
+}
+
+/* Look up a particular key's value for a given type of payload */
+static u32 a6xx_hwsched_lookup_key_value(struct adreno_device *adreno_dev,
+	u32 type, u32 key)
+{
+	struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+
+	if (!cmd->hdr)
+		return 0;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == type)
+			return adreno_hwsched_parse_payload(payload, key);
+
+		i += struct_size(payload, data, payload->dwords);
+	}
+
+	return 0;
+}
+
+static u32 get_payload_rb_key(struct adreno_device *adreno_dev,
+	u32 rb_id, u32 key)
+{
+	struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+
+	if (!cmd->hdr)
+		return 0;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == PAYLOAD_RB) {
+			u32 id = adreno_hwsched_parse_payload(payload, KEY_RB_ID);
+
+			if (id == rb_id)
+				return adreno_hwsched_parse_payload(payload, key);
+		}
+
+		i += struct_size(payload, data, payload->dwords);
+	}
+
+	return 0;
+}
+
+static void log_gpu_fault(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct device *dev = &gmu->pdev->dev;
+	struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad;
+
+	switch (cmd->error) {
+	case GMU_GPU_HW_HANG:
+		dev_crit_ratelimited(dev, "MISC: GPU hang detected\n");
+		break;
+	case GMU_GPU_SW_HANG:
+		dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %d\n",
+			cmd->gc.ctxt_id, cmd->gc.ts);
+		break;
+	case GMU_CP_OPCODE_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP opcode error interrupt | opcode=0x%8.8x\n",
+			a6xx_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+			KEY_CP_OPCODE_ERROR));
+		break;
+	case GMU_CP_PROTECTED_ERROR: {
+		u32 status = a6xx_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_PROTECTED_ERROR);
+
+		dev_crit_ratelimited(dev,
+			"CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n",
+			status & (1 << 20) ? "READ" : "WRITE",
+			status & 0x3FFFF, status);
+		}
+		break;
+	case GMU_CP_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "CP Illegal instruction error\n");
+		break;
+	case GMU_CP_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "CP ucode error interrupt\n");
+		break;
+	case GMU_CP_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP | Ringbuffer HW fault | status=0x%8.8x\n",
+			a6xx_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_HW_FAULT));
+		break;
+	case GMU_GPU_PREEMPT_TIMEOUT: {
+		u32 cur, next, cur_rptr, cur_wptr, next_rptr, next_wptr;
+
+		cur = a6xx_hwsched_lookup_key_value(adreno_dev,
+			PAYLOAD_PREEMPT_TIMEOUT, KEY_PREEMPT_TIMEOUT_CUR_RB_ID);
+		next = a6xx_hwsched_lookup_key_value(adreno_dev,
+			PAYLOAD_PREEMPT_TIMEOUT,
+			KEY_PREEMPT_TIMEOUT_NEXT_RB_ID);
+		cur_rptr = get_payload_rb_key(adreno_dev, cur, KEY_RB_RPTR);
+		cur_wptr = get_payload_rb_key(adreno_dev, cur, KEY_RB_WPTR);
+		next_rptr = get_payload_rb_key(adreno_dev, next, KEY_RB_RPTR);
+		next_wptr = get_payload_rb_key(adreno_dev, next, KEY_RB_WPTR);
+
+		dev_crit_ratelimited(dev,
+			"Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n",
+			cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr);
+		}
+		break;
+	case GMU_CP_GPC_ERROR:
+		dev_crit_ratelimited(dev, "RBBM: GPC error\n");
+		break;
+	default:
+		dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n",
+			cmd->error);
+		break;
+	}
+}
+
+static void process_ctx_bad(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	if (GMU_VER_MINOR(gmu->ver.hfi) < 2)
+		log_gpu_fault_legacy(adreno_dev);
+	else
+		log_gpu_fault(adreno_dev);
+
+	adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT);
+}
+
+static u32 peek_next_header(struct a6xx_gmu_device *gmu, uint32_t queue_idx)
+{
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	u32 *queue;
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+		return 0;
+
+	if (hdr->read_index == hdr->write_index)
+		return 0;
+
+	queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx);
+
+	return queue[hdr->read_index];
+}
+
+static void a6xx_hwsched_process_msgq(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hwsched_hfi *hw_hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	u32 rcvd[MAX_RCVD_SIZE], next_hdr;
+
+	mutex_lock(&hw_hfi->msgq_mutex);
+
+	for (;;) {
+		next_hdr = peek_next_header(gmu, HFI_MSG_ID);
+
+		if (!next_hdr)
+			break;
+
+		if (MSG_HDR_GET_ID(next_hdr) == F2H_MSG_CONTEXT_BAD) {
+			a6xx_hfi_queue_read(gmu, HFI_MSG_ID,
+				(u32 *)adreno_dev->hwsched.ctxt_bad,
+				HFI_MAX_MSG_SIZE);
+			process_ctx_bad(adreno_dev);
+			continue;
+		}
+
+		a6xx_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd));
+
+		/*
+		 * We are assuming that there is only one outstanding ack
+		 * because hfi sending thread waits for completion while
+		 * holding the device mutex
+		 */
+		if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) {
+			a6xx_receive_ack_async(adreno_dev, rcvd);
+		} else if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_TS_RETIRE) {
+			adreno_hwsched_trigger(adreno_dev);
+			log_profiling_info(adreno_dev, rcvd);
+		} else if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_GMU_CNTR_RELEASE) {
+			struct hfi_gmu_cntr_release_cmd *cmd =
+				(struct hfi_gmu_cntr_release_cmd *) rcvd;
+
+			adreno_perfcounter_put(adreno_dev,
+				cmd->group_id, cmd->countable, PERFCOUNTER_FLAG_KERNEL);
+		}
+	}
+	mutex_unlock(&hw_hfi->msgq_mutex);
+}
+
+static void process_log_block(struct adreno_device *adreno_dev, void *data)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_log_block *cmd = data;
+	u32 *log_event = gmu->gmu_log->hostptr;
+	u32 start, end;
+
+	start = cmd->start_index;
+	end = cmd->stop_index;
+
+	log_event += start * 4;
+	while (start != end) {
+		trace_gmu_event(log_event);
+		log_event += 4;
+		start++;
+	}
+}
+
+static void a6xx_hwsched_process_dbgq(struct adreno_device *adreno_dev, bool limited)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	u32 rcvd[MAX_RCVD_SIZE];
+	bool recovery = false;
+
+	while (a6xx_hfi_queue_read(gmu, HFI_DBG_ID, rcvd, sizeof(rcvd)) > 0) {
+
+		if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_ERR) {
+			adreno_a6xx_receive_err_req(gmu, rcvd);
+			recovery = true;
+			break;
+		}
+
+		if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_DEBUG)
+			adreno_a6xx_receive_debug_req(gmu, rcvd);
+
+		if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_LOG_BLOCK)
+			process_log_block(adreno_dev, rcvd);
+
+		/* Process one debug queue message and return to not delay msgq processing */
+		if (limited)
+			break;
+	}
+
+	if (!recovery)
+		return;
+
+	adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+}
+
+/* HFI interrupt handler */
+static irqreturn_t a6xx_hwsched_hfi_handler(int irq, void *data)
+{
+	struct adreno_device *adreno_dev = data;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status = 0;
+
+	/*
+	 * A6XX_GMU_GMU2HOST_INTR_INFO may have bits set not specified in hfi->irq_mask.
+	 * Read and clear only those irq bits that we are processing here.
+	 */
+	gmu_core_regread(device, A6XX_GMU_GMU2HOST_INTR_INFO, &status);
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, status & hfi->irq_mask);
+
+	/*
+	 * If interrupts are not enabled on the HFI message queue,
+	 * the inline message processing loop will process it,
+	 * else, process it here.
+	 */
+	if (!(hfi->irq_mask & HFI_IRQ_MSGQ_MASK))
+		status &= ~HFI_IRQ_MSGQ_MASK;
+
+	if (status & (HFI_IRQ_MSGQ_MASK | HFI_IRQ_DBGQ_MASK)) {
+		wake_up_interruptible(&hfi->f2h_wq);
+		adreno_hwsched_trigger(adreno_dev);
+	}
+	if (status & HFI_IRQ_CM3_FAULT_MASK) {
+		atomic_set(&gmu->cm3_fault, 1);
+
+		/* make sure other CPUs see the update */
+		smp_wmb();
+
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"GMU CM3 fault interrupt received\n");
+
+		adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+	}
+
+	/* Ignore OOB bits */
+	status &= GENMASK(31 - (oob_max - 1), 0);
+
+	if (status & ~hfi->irq_mask)
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"Unhandled HFI interrupts 0x%x\n",
+			status & ~hfi->irq_mask);
+
+	return IRQ_HANDLED;
+}
+
+#define HFI_IRQ_MSGQ_MASK BIT(0)
+
+static int check_ack_failure(struct adreno_device *adreno_dev,
+	struct pending_cmd *ack)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	if (ack->results[2] != 0xffffffff)
+		return 0;
+
+	dev_err(&gmu->pdev->dev,
+		"ACK error: sender id %d seqnum %d\n",
+		MSG_HDR_GET_ID(ack->sent_hdr),
+		MSG_HDR_GET_SEQNUM(ack->sent_hdr));
+
+	return -EINVAL;
+}
+
+int a6xx_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 size_bytes)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	u32 *cmd = data;
+	u32 seqnum;
+	int rc;
+	struct pending_cmd pending_ack;
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	*cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2);
+
+	add_waiter(hfi, *cmd, &pending_ack);
+
+	rc = a6xx_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+	if (rc)
+		goto done;
+
+	rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack,
+		a6xx_hwsched_process_msgq);
+	if (rc)
+		goto done;
+
+	rc = check_ack_failure(adreno_dev, &pending_ack);
+
+done:
+	del_waiter(hfi, &pending_ack);
+
+	return rc;
+}
+
+static void init_queues(struct a6xx_hfi *hfi)
+{
+	u32 gmuaddr = hfi->hfi_mem->gmuaddr;
+	struct hfi_queue_table hfi_table = {
+		.qtbl_hdr = {
+			.version = 0,
+			.size = sizeof(struct hfi_queue_table) >> 2,
+			.qhdr0_offset =
+				sizeof(struct hfi_queue_table_header) >> 2,
+			.qhdr_size = sizeof(struct hfi_queue_header) >> 2,
+			.num_q = HFI_QUEUE_MAX,
+			.num_active_q = HFI_QUEUE_MAX,
+		},
+		.qhdr = {
+			DEFINE_QHDR(gmuaddr, HFI_CMD_ID, 0),
+			DEFINE_QHDR(gmuaddr, HFI_MSG_ID, 0),
+			DEFINE_QHDR(gmuaddr, HFI_DBG_ID, 0),
+			/* 4 DQs for RB priority 0 */
+			DEFINE_QHDR(gmuaddr, 3, 0),
+			DEFINE_QHDR(gmuaddr, 4, 0),
+			DEFINE_QHDR(gmuaddr, 5, 0),
+			DEFINE_QHDR(gmuaddr, 6, 0),
+			/* 4 DQs for RB priority 1 */
+			DEFINE_QHDR(gmuaddr, 7, 1),
+			DEFINE_QHDR(gmuaddr, 8, 1),
+			DEFINE_QHDR(gmuaddr, 9, 1),
+			DEFINE_QHDR(gmuaddr, 10, 1),
+			/* 3 DQs for RB priority 2 */
+			DEFINE_QHDR(gmuaddr, 11, 2),
+			DEFINE_QHDR(gmuaddr, 12, 2),
+			DEFINE_QHDR(gmuaddr, 13, 2),
+			/* 3 DQs for RB priority 3 */
+			DEFINE_QHDR(gmuaddr, 14, 3),
+			DEFINE_QHDR(gmuaddr, 15, 3),
+			DEFINE_QHDR(gmuaddr, 16, 3),
+		},
+	};
+
+	memcpy(hfi->hfi_mem->hostptr, &hfi_table, sizeof(hfi_table));
+}
+
+/* Total header sizes + queue sizes + 16 for alignment */
+#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \
+	(SZ_4K * HFI_QUEUE_MAX))
+
+static int hfi_f2h_main(void *arg);
+
+int a6xx_hwsched_hfi_init(struct adreno_device *adreno_dev)
+{
+	struct a6xx_hwsched_hfi *hw_hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	struct a6xx_hfi *hfi = to_a6xx_hfi(adreno_dev);
+
+	if (IS_ERR_OR_NULL(hw_hfi->big_ib)) {
+		hw_hfi->big_ib = reserve_gmu_kernel_block(to_a6xx_gmu(adreno_dev),
+				0,
+				HWSCHED_MAX_IBS * sizeof(struct hfi_issue_ib),
+				GMU_NONCACHED_KERNEL, 0);
+		if (IS_ERR(hw_hfi->big_ib))
+			return PTR_ERR(hw_hfi->big_ib);
+	}
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LSR) &&
+			IS_ERR_OR_NULL(hw_hfi->big_ib_recurring)) {
+		hw_hfi->big_ib_recurring = reserve_gmu_kernel_block(
+				to_a6xx_gmu(adreno_dev), 0,
+				HWSCHED_MAX_IBS * sizeof(struct hfi_issue_ib),
+				GMU_NONCACHED_KERNEL, 0);
+		if (IS_ERR(hw_hfi->big_ib_recurring))
+			return PTR_ERR(hw_hfi->big_ib_recurring);
+	}
+
+	if (IS_ERR_OR_NULL(hfi->hfi_mem)) {
+		hfi->hfi_mem = reserve_gmu_kernel_block(to_a6xx_gmu(adreno_dev),
+				0, HFIMEM_SIZE, GMU_NONCACHED_KERNEL, 0);
+		if (IS_ERR(hfi->hfi_mem))
+			return PTR_ERR(hfi->hfi_mem);
+		init_queues(hfi);
+	}
+
+	if (IS_ERR_OR_NULL(hw_hfi->f2h_task))
+		hw_hfi->f2h_task = kthread_run(hfi_f2h_main, adreno_dev, "gmu_f2h");
+
+	return PTR_ERR_OR_ZERO(hw_hfi->f2h_task);
+}
+
+static int get_attrs(u32 flags)
+{
+	int attrs = IOMMU_READ;
+
+	if (flags & HFI_MEMFLAG_GMU_PRIV)
+		attrs |= IOMMU_PRIV;
+
+	if (flags & HFI_MEMFLAG_GMU_WRITEABLE)
+		attrs |= IOMMU_WRITE;
+
+	return attrs;
+}
+
+static int gmu_import_buffer(struct adreno_device *adreno_dev,
+	struct hfi_mem_alloc_entry *entry)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_mem_alloc_desc *desc = &entry->desc;
+	int attrs = get_attrs(desc->flags);
+	struct gmu_vma_entry *vma = &gmu->vma[GMU_NONCACHED_KERNEL];
+	int ret;
+
+	if (desc->flags & HFI_MEMFLAG_GMU_CACHEABLE)
+		vma = &gmu->vma[GMU_CACHE];
+
+	if ((vma->next_va + desc->size) > (vma->start + vma->size)) {
+		dev_err(&gmu->pdev->dev,
+			"GMU mapping too big. available: %d required: %d\n",
+			vma->next_va - vma->start, desc->size);
+		return -ENOMEM;
+	}
+
+
+	ret = gmu_core_map_memdesc(gmu->domain, entry->md, vma->next_va, attrs);
+	if (ret) {
+		dev_err(&gmu->pdev->dev, "gmu map err: 0x%08x, %x\n",
+			vma->next_va, attrs);
+		return ret;
+	}
+
+	entry->md->gmuaddr = vma->next_va;
+	vma->next_va += desc->size;
+	return 0;
+}
+
+static struct hfi_mem_alloc_entry *lookup_mem_alloc_table(
+	struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc)
+{
+	struct a6xx_hwsched_hfi *hw_hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	int i;
+
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i];
+
+		if ((entry->desc.mem_kind == desc->mem_kind) &&
+			(entry->desc.gmu_mem_handle == desc->gmu_mem_handle))
+			return entry;
+	}
+
+	return NULL;
+}
+
+static struct hfi_mem_alloc_entry *get_mem_alloc_entry(
+	struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	struct hfi_mem_alloc_entry *entry =
+		lookup_mem_alloc_table(adreno_dev, desc);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	u64 flags = 0;
+	u32 priv = 0;
+	int ret;
+	const char *memkind_string = desc->mem_kind < HFI_MEMKIND_MAX ?
+			hfi_memkind_strings[desc->mem_kind] : "UNKNOWN";
+
+	if (entry)
+		return entry;
+
+	if (desc->mem_kind >= HFI_MEMKIND_MAX) {
+		dev_err(&gmu->pdev->dev, "Invalid mem kind: %d\n",
+			desc->mem_kind);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (hfi->mem_alloc_entries == ARRAY_SIZE(hfi->mem_alloc_table)) {
+		dev_err(&gmu->pdev->dev,
+			"Reached max mem alloc entries\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	entry = &hfi->mem_alloc_table[hfi->mem_alloc_entries];
+
+	memcpy(&entry->desc, desc, sizeof(*desc));
+
+	entry->desc.host_mem_handle = desc->gmu_mem_handle;
+
+	if (desc->flags & HFI_MEMFLAG_GFX_PRIV)
+		priv |= KGSL_MEMDESC_PRIVILEGED;
+
+	if (!(desc->flags & HFI_MEMFLAG_GFX_WRITEABLE))
+		flags |= KGSL_MEMFLAGS_GPUREADONLY;
+
+	if (desc->flags & HFI_MEMFLAG_GFX_SECURE)
+		flags |= KGSL_MEMFLAGS_SECURE;
+
+	if (!(desc->flags & HFI_MEMFLAG_GFX_ACC)) {
+		if (desc->mem_kind == HFI_MEMKIND_MMIO_IPC_CORE)
+			entry->md = reserve_gmu_kernel_block_fixed(gmu, 0, desc->size,
+					(desc->flags & HFI_MEMFLAG_GMU_CACHEABLE) ?
+					GMU_CACHE : GMU_NONCACHED_KERNEL,
+					"qcom,ipc-core", get_attrs(desc->flags), desc->align);
+		else
+			entry->md = reserve_gmu_kernel_block(gmu, 0, desc->size,
+					(desc->flags & HFI_MEMFLAG_GMU_CACHEABLE) ?
+					GMU_CACHE : GMU_NONCACHED_KERNEL, desc->align);
+
+		if (IS_ERR(entry->md)) {
+			int ret = PTR_ERR(entry->md);
+
+			memset(entry, 0, sizeof(*entry));
+			return ERR_PTR(ret);
+		}
+		entry->desc.size = entry->md->size;
+		entry->desc.gmu_addr = entry->md->gmuaddr;
+
+		goto done;
+	}
+
+	entry->md = kgsl_allocate_global(device, desc->size, 0, flags, priv,
+		memkind_string);
+	if (IS_ERR(entry->md)) {
+		int ret = PTR_ERR(entry->md);
+
+		memset(entry, 0, sizeof(*entry));
+		return ERR_PTR(ret);
+	}
+
+	entry->desc.size = entry->md->size;
+	entry->desc.gpu_addr = entry->md->gpuaddr;
+
+	if (!(desc->flags & HFI_MEMFLAG_GMU_ACC))
+		goto done;
+
+	 /*
+	  * If gmu mapping fails, then we have to live with
+	  * leaking the gpu global buffer allocated above.
+	  */
+	ret = gmu_import_buffer(adreno_dev, entry);
+	if (ret) {
+		dev_err(&gmu->pdev->dev,
+			"gpuaddr: 0x%llx size: %lld bytes lost\n",
+			entry->md->gpuaddr, entry->md->size);
+		memset(entry, 0, sizeof(*entry));
+		return ERR_PTR(ret);
+	}
+
+	entry->desc.gmu_addr = entry->md->gmuaddr;
+done:
+	hfi->mem_alloc_entries++;
+
+	return entry;
+}
+
+static int process_mem_alloc(struct adreno_device *adreno_dev,
+	struct hfi_mem_alloc_desc *mad)
+{
+	struct hfi_mem_alloc_entry *entry;
+
+	entry = get_mem_alloc_entry(adreno_dev, mad);
+	if (IS_ERR(entry))
+		return PTR_ERR(entry);
+
+	if (entry->md) {
+		mad->gpu_addr = entry->md->gpuaddr;
+		mad->gmu_addr = entry->md->gmuaddr;
+	}
+
+	/*
+	 * GMU uses the host_mem_handle to check if this memalloc was
+	 * successful
+	 */
+	mad->host_mem_handle = mad->gmu_mem_handle;
+
+	return 0;
+}
+
+static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd)
+{
+	struct hfi_mem_alloc_desc desc = {0};
+	struct hfi_mem_alloc_reply_cmd out = {0};
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	u32 seqnum;
+	int ret;
+
+	hfi_get_mem_alloc_desc(rcvd, &desc);
+
+	ret = process_mem_alloc(adreno_dev, &desc);
+	if (ret)
+		return ret;
+
+	memcpy(&out.desc, &desc, sizeof(out.desc));
+
+	out.hdr = ACK_MSG_HDR(F2H_MSG_MEM_ALLOC);
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2);
+	out.req_hdr = *(u32 *)rcvd;
+
+	return a6xx_hfi_cmdq_write(adreno_dev, (u32 *)&out, sizeof(out));
+}
+
+static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd)
+{
+	struct hfi_gmu_cntr_register_cmd *in = (struct hfi_gmu_cntr_register_cmd *)rcvd;
+	struct hfi_gmu_cntr_register_reply_cmd out = {0};
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	u32 lo = 0, hi = 0, seqnum;
+
+	/*
+	 * Failure to allocate counter is not fatal. Sending lo = 0, hi = 0
+	 * indicates to GMU that counter allocation failed.
+	 */
+	adreno_perfcounter_get(adreno_dev,
+		in->group_id, in->countable, &lo, &hi, PERFCOUNTER_FLAG_KERNEL);
+
+	out.hdr = ACK_MSG_HDR(F2H_MSG_GMU_CNTR_REGISTER);
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2);
+	out.req_hdr = in->hdr;
+	out.group_id = in->group_id;
+	out.countable = in->countable;
+	/* Fill in byte offset of counter */
+	out.cntr_lo = lo << 2;
+	out.cntr_hi = hi << 2;
+
+	return a6xx_hfi_cmdq_write(adreno_dev, (u32 *)&out, sizeof(out));
+}
+
+static int send_start_msg(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 seqnum;
+	int rc;
+	struct hfi_start_cmd cmd;
+	u32 rcvd[MAX_RCVD_SIZE];
+	struct pending_cmd pending_ack = {0};
+
+	rc = CMD_MSG_HDR(cmd, H2F_MSG_START);
+	if (rc)
+		return rc;
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2);
+
+	pending_ack.sent_hdr = cmd.hdr;
+
+	rc = a6xx_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd));
+	if (rc)
+		return rc;
+
+poll:
+	rc = gmu_core_timed_poll_check(device, A6XX_GMU_GMU2HOST_INTR_INFO,
+		HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK);
+
+	if (rc) {
+		dev_err(&gmu->pdev->dev,
+			"Timed out processing MSG_START seqnum: %d\n",
+			seqnum);
+		gmu_core_fault_snapshot(device);
+		return rc;
+	}
+
+	/* Clear the interrupt */
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR,
+		HFI_IRQ_MSGQ_MASK);
+
+	if (a6xx_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) {
+		dev_err(&gmu->pdev->dev, "MSG_START: no payload\n");
+		gmu_core_fault_snapshot(device);
+		return -EINVAL;
+	}
+
+	if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) {
+		rc = a6xx_receive_ack_cmd(gmu, rcvd, &pending_ack);
+		if (rc)
+			return rc;
+
+		return check_ack_failure(adreno_dev, &pending_ack);
+	}
+
+	if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_MEM_ALLOC) {
+		rc = mem_alloc_reply(adreno_dev, rcvd);
+		if (rc)
+			return rc;
+
+		goto poll;
+	}
+
+	if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_GMU_CNTR_REGISTER) {
+		rc = gmu_cntr_register_reply(adreno_dev, rcvd);
+		if (rc)
+			return rc;
+		goto poll;
+	}
+
+	dev_err(&gmu->pdev->dev,
+		"MSG_START: unexpected response id:%d, type:%d\n",
+		MSG_HDR_GET_ID(rcvd[0]),
+		MSG_HDR_GET_TYPE(rcvd[0]));
+
+	gmu_core_fault_snapshot(device);
+
+	return rc;
+}
+
+static void reset_hfi_mem_records(struct adreno_device *adreno_dev)
+{
+	struct a6xx_hwsched_hfi *hw_hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	struct kgsl_memdesc *md = NULL;
+	u32 i;
+
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct hfi_mem_alloc_desc *desc = &hw_hfi->mem_alloc_table[i].desc;
+
+		if (desc->flags & HFI_MEMFLAG_HOST_INIT) {
+			md = hw_hfi->mem_alloc_table[i].md;
+			memset(md->hostptr, 0x0, md->size);
+		}
+	}
+}
+
+static void reset_hfi_queues(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr;
+	u32 i;
+
+	/* Flush HFI queues */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		struct hfi_queue_header *hdr = &tbl->qhdr[i];
+
+		if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+			continue;
+
+		hdr->read_index = hdr->write_index;
+	}
+}
+
+void a6xx_hwsched_hfi_stop(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
+
+	hfi->irq_mask &= ~HFI_IRQ_MSGQ_MASK;
+
+	/*
+	 * In some corner cases, it is possible that GMU put TS_RETIRE
+	 * on the msgq after we have turned off gmu interrupts. Hence,
+	 * drain the queue one last time before we reset HFI queues.
+	 */
+	a6xx_hwsched_process_msgq(adreno_dev);
+
+	/* Drain the debug queue before we reset HFI queues */
+	a6xx_hwsched_process_dbgq(adreno_dev, false);
+
+	kgsl_pwrctrl_axi(KGSL_DEVICE(adreno_dev), false);
+
+	clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	/*
+	 * Reset the hfi host access memory records, As GMU expects hfi memory
+	 * records to be clear in bootup.
+	 */
+	reset_hfi_mem_records(adreno_dev);
+}
+
+static void enable_async_hfi(struct adreno_device *adreno_dev)
+{
+	struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
+
+	hfi->irq_mask |= HFI_IRQ_MSGQ_MASK;
+
+	gmu_core_regwrite(KGSL_DEVICE(adreno_dev), A6XX_GMU_GMU2HOST_INTR_MASK,
+		(u32)~hfi->irq_mask);
+}
+
+static int enable_preemption(struct adreno_device *adreno_dev)
+{
+	u32 data;
+	int ret;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	/*
+	 * Bits [0:1] contains the preemption level
+	 * Bit 2 is to enable/disable gmem save/restore
+	 * Bit 3 is to enable/disable skipsaverestore
+	 */
+	data = FIELD_PREP(GENMASK(1, 0), adreno_dev->preempt.preempt_level) |
+			FIELD_PREP(BIT(2), adreno_dev->preempt.usesgmem) |
+			FIELD_PREP(BIT(3), adreno_dev->preempt.skipsaverestore);
+
+	ret = a6xx_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_PREEMPTION, 1,
+			data);
+	if (ret)
+		return ret;
+
+	/*
+	 * Bits[3:0] contain the preemption timeout enable bit per ringbuffer
+	 * Bits[31:4] contain the timeout in ms
+	 */
+	return a6xx_hfi_send_set_value(adreno_dev, HFI_VALUE_BIN_TIME, 1,
+			FIELD_PREP(GENMASK(31, 4), ADRENO_PREEMPT_TIMEOUT) |
+			FIELD_PREP(GENMASK(3, 0), 0xf));
+}
+
+static int enable_gmu_stats(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	u32 data;
+
+	if (!gmu->stats_enable)
+		return 0;
+
+	/*
+	 * Bits [23:0] contains the countables mask
+	 * Bits [31:24] is the sampling interval
+	 */
+	data = FIELD_PREP(GENMASK(23, 0), gmu->stats_mask) |
+		FIELD_PREP(GENMASK(31, 24), gmu->stats_interval);
+
+	return a6xx_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_GMU_STATS, 1, data);
+}
+
+static int a6xx_hfi_send_perfcounter_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	/*
+	 * Perfcounter retention is disabled by default in GMU firmware.
+	 * In case perfcounter retention behaviour is overwritten by sysfs
+	 * setting dynmaically, send this HFI feature with 'enable = 0' to
+	 * disable this feature in GMU firmware.
+	 */
+	if (adreno_dev->perfcounter)
+		return a6xx_hfi_send_feature_ctrl(adreno_dev,
+				HFI_FEATURE_PERF_NORETAIN, 0, 0);
+
+	return 0;
+}
+
+int a6xx_hwsched_hfi_start(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	reset_hfi_queues(adreno_dev);
+
+	ret = a6xx_gmu_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = a6xx_hfi_send_generic_req(adreno_dev, &gmu->hfi.dcvs_table,
+		sizeof(gmu->hfi.dcvs_table));
+	if (ret)
+		goto err;
+
+	ret = a6xx_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table, sizeof(gmu->hfi.bw_table));
+	if (ret)
+		goto err;
+
+	ret = a6xx_hfi_send_acd_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = a6xx_hfi_send_lm_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = a6xx_hfi_send_bcl_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = a6xx_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HWSCHED, 1, 0);
+	if (ret)
+		goto err;
+
+	ret = a6xx_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_A6XX_KPROF,
+			1, 0);
+	if (ret)
+		goto err;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LSR)) {
+		ret = a6xx_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_LSR,
+				1, 0);
+		if (ret)
+			goto err;
+	}
+
+	ret = a6xx_hfi_send_perfcounter_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	/* Enable the long ib timeout detection */
+	if (adreno_long_ib_detect(adreno_dev)) {
+		ret = a6xx_hfi_send_feature_ctrl(adreno_dev,
+			HFI_FEATURE_BAIL_OUT_TIMER, 1, 0);
+		if (ret)
+			goto err;
+	}
+
+	enable_gmu_stats(adreno_dev);
+
+	if (gmu->log_stream_enable)
+		a6xx_hfi_send_set_value(adreno_dev,
+			HFI_VALUE_LOG_STREAM_ENABLE, 0, 1);
+
+	if (gmu->log_group_mask)
+		a6xx_hfi_send_set_value(adreno_dev, HFI_VALUE_LOG_GROUP, 0, gmu->log_group_mask);
+
+	ret = a6xx_hfi_send_core_fw_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = enable_preemption(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = send_start_msg(adreno_dev);
+	if (ret)
+		goto err;
+
+	enable_async_hfi(adreno_dev);
+
+	set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	/* Request default DCVS level */
+	ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+	if (ret)
+		goto err;
+
+	/* Request default BW vote */
+	ret = kgsl_pwrctrl_axi(device, true);
+	if (ret)
+		goto err;
+
+	/* Switch to min GMU clock */
+	a6xx_rdpm_cx_freq_update(gmu, gmu->freqs[0] / 1000);
+
+	ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk",
+			gmu->freqs[0]);
+	if (ret)
+		dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n",
+			gmu->freqs[0], ret);
+
+err:
+	if (ret)
+		a6xx_hwsched_hfi_stop(adreno_dev);
+
+	return ret;
+}
+
+static int submit_raw_cmds(struct adreno_device *adreno_dev, void *cmds, u32 size_bytes,
+	const char *str)
+{
+	int ret;
+
+	ret = a6xx_hfi_send_cmd_async(adreno_dev, cmds, size_bytes);
+	if (ret)
+		return ret;
+
+	ret = gmu_core_timed_poll_check(KGSL_DEVICE(adreno_dev),
+			A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, 0, 200, BIT(23));
+	if (ret)
+		a6xx_spin_idle_debug(adreno_dev, str);
+
+	return ret;
+}
+
+static int cp_init(struct adreno_device *adreno_dev)
+{
+	u32 cmds[A6XX_CP_INIT_DWORDS + 1];
+
+	cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, HFI_MSG_CMD);
+
+	a6xx_cp_init_cmds(adreno_dev, &cmds[1]);
+
+	return submit_raw_cmds(adreno_dev, cmds, sizeof(cmds),
+			"CP initialization failed to idle\n");
+}
+
+static int send_switch_to_unsecure(struct adreno_device *adreno_dev)
+{
+	u32 cmds[3];
+
+	cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, HFI_MSG_CMD);
+
+	cmds[1] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+	cmds[2] = 0;
+
+	return  submit_raw_cmds(adreno_dev, cmds, sizeof(cmds),
+			"Switch to unsecure failed to idle\n");
+}
+
+int a6xx_hwsched_cp_init(struct adreno_device *adreno_dev)
+{
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	/* Program the ucode base for CP */
+	kgsl_regwrite(device, A6XX_CP_SQE_INSTR_BASE_LO,
+		lower_32_bits(fw->memdesc->gpuaddr));
+	kgsl_regwrite(device, A6XX_CP_SQE_INSTR_BASE_HI,
+		upper_32_bits(fw->memdesc->gpuaddr));
+
+	ret = cp_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = adreno_zap_shader_load(adreno_dev, a6xx_core->zap_name);
+	if (ret)
+		return ret;
+
+	if (!adreno_dev->zap_loaded)
+		kgsl_regwrite(KGSL_DEVICE(adreno_dev),
+			A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
+	else
+		ret = send_switch_to_unsecure(adreno_dev);
+
+	return ret;
+}
+
+static int register_global_ctxt(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct hfi_register_ctxt_cmd rcmd = {0};
+	struct hfi_context_pointers_cmd pcmd = {0};
+	int ret;
+
+	if (hwsched->global_ctxt_gmu_registered)
+		return 0;
+
+	ret = CMD_MSG_HDR(rcmd, H2F_MSG_REGISTER_CONTEXT);
+	if (ret)
+		return ret;
+
+	rcmd.ctxt_id = KGSL_GLOBAL_CTXT_ID;
+	rcmd.flags = (KGSL_CONTEXT_PRIORITY_HIGH << KGSL_CONTEXT_PRIORITY_SHIFT);
+
+	ret = a6xx_hfi_send_cmd_async(adreno_dev, &rcmd, sizeof(rcmd));
+	if (ret)
+		return ret;
+
+	ret = CMD_MSG_HDR(pcmd, H2F_MSG_CONTEXT_POINTERS);
+	if (ret)
+		return ret;
+
+	pcmd.ctxt_id = KGSL_GLOBAL_CTXT_ID;
+	pcmd.sop_addr = MEMSTORE_ID_GPU_ADDR(device, KGSL_GLOBAL_CTXT_ID, soptimestamp);
+	pcmd.eop_addr = MEMSTORE_ID_GPU_ADDR(device, KGSL_GLOBAL_CTXT_ID, eoptimestamp);
+
+	ret = a6xx_hfi_send_cmd_async(adreno_dev, &pcmd, sizeof(pcmd));
+	if (!ret)
+		hwsched->global_ctxt_gmu_registered = true;
+
+	return ret;
+}
+
+#define HFI_DSP_IRQ_BASE 2
+
+#define DISPQ_IRQ_BIT(_idx) BIT((_idx) + HFI_DSP_IRQ_BASE)
+
+static int submit_global_ctxt_cmd(struct adreno_device *adreno_dev, u64 gpuaddr, u32 size)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct {
+		struct hfi_submit_cmd submit_cmd;
+		struct hfi_issue_ib issue_ib;
+	} cmd = {0};
+	u32 seqnum, cmd_size = sizeof(cmd);
+	static u32 ts;
+	int ret = 0;
+
+	cmd.submit_cmd.ctxt_id = KGSL_GLOBAL_CTXT_ID;
+	cmd.submit_cmd.ts = ++ts;
+	cmd.submit_cmd.numibs = 1;
+
+	cmd.issue_ib.addr = gpuaddr;
+	cmd.issue_ib.size = size;
+
+	seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum);
+	cmd.submit_cmd.hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD);
+	cmd.submit_cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.submit_cmd.hdr, seqnum, cmd_size >> 2);
+
+	ret = a6xx_hfi_dispatch_queue_write(adreno_dev, HFI_DSP_ID_0,
+			(u32 *)&cmd, cmd_size, NULL, NULL);
+	/* Send interrupt to GMU to receive the message */
+	if (!ret)
+		gmu_core_regwrite(device, A6XX_GMU_HOST2GMU_INTR_SET, DISPQ_IRQ_BIT(0));
+
+	return ret;
+}
+
+int a6xx_hwsched_counter_inline_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable)
+{
+	struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 val, *cmds, count = 0;
+	int ret;
+
+	ret = register_global_ctxt(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = adreno_allocate_global(device, &hfi->perfctr_scratch,
+		PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "perfctr_scratch");
+	if (ret)
+		goto err;
+
+	if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE)
+		a6xx_perfcounter_update(adreno_dev, reg, false);
+
+	cmds = hfi->perfctr_scratch->hostptr;
+
+	cmds[count++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+	cmds[count++] = cp_type4_packet(reg->select, 1);
+	cmds[count++] = countable;
+
+	ret = submit_global_ctxt_cmd(adreno_dev, hfi->perfctr_scratch->gpuaddr, count << 2);
+	if (ret)
+		goto err;
+
+	/* Wait till the register is programmed with the countable */
+	ret = kgsl_regmap_read_poll_timeout(&device->regmap, reg->select, val,
+				val == countable, 100, ADRENO_IDLE_TIMEOUT);
+	if (!ret) {
+		reg->value = 0;
+		return ret;
+	}
+
+err:
+	dev_err(device->dev, "Perfcounter %s/%u/%u start via commands failed\n",
+			group->name, counter, countable);
+	return ret;
+}
+
+static bool is_queue_empty(struct adreno_device *adreno_dev, u32 queue_idx)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+		return true;
+
+	if (hdr->read_index == hdr->write_index)
+		return true;
+
+	return false;
+}
+
+static int hfi_f2h_main(void *arg)
+{
+	struct adreno_device *adreno_dev = arg;
+	struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(hfi->f2h_wq, kthread_should_stop() ||
+			/* If msgq irq is enabled and msgq has messages to process */
+			(((hfi->irq_mask & HFI_IRQ_MSGQ_MASK) &&
+			!is_queue_empty(adreno_dev, HFI_MSG_ID)) ||
+			/* Trace buffer has messages to process */
+			!gmu_core_is_trace_empty(gmu->trace.md->hostptr) ||
+			/* Dbgq has messages to process */
+			!is_queue_empty(adreno_dev, HFI_DBG_ID)));
+
+		if (kthread_should_stop())
+			break;
+
+		a6xx_hwsched_process_msgq(adreno_dev);
+		gmu_core_process_trace_data(KGSL_DEVICE(adreno_dev),
+					&gmu->pdev->dev, &gmu->trace);
+		a6xx_hwsched_process_dbgq(adreno_dev, true);
+	}
+
+	return 0;
+}
+
+int a6xx_hwsched_hfi_probe(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hwsched_hfi *hw_hfi = to_a6xx_hwsched_hfi(adreno_dev);
+
+	gmu->hfi.irq = kgsl_request_irq(gmu->pdev, "kgsl_hfi_irq",
+		a6xx_hwsched_hfi_handler, adreno_dev);
+
+	if (gmu->hfi.irq < 0)
+		return gmu->hfi.irq;
+
+	hw_hfi->irq_mask = HFI_IRQ_MASK;
+
+	rwlock_init(&hw_hfi->msglock);
+
+	INIT_LIST_HEAD(&hw_hfi->msglist);
+
+	init_waitqueue_head(&hw_hfi->f2h_wq);
+
+	mutex_init(&hw_hfi->msgq_mutex);
+
+	return 0;
+}
+
+void a6xx_hwsched_hfi_remove(struct adreno_device *adreno_dev)
+{
+	struct a6xx_hwsched_hfi *hw_hfi = to_a6xx_hwsched_hfi(adreno_dev);
+
+	if (hw_hfi->f2h_task)
+		kthread_stop(hw_hfi->f2h_task);
+}
+
+static void a6xx_add_profile_events(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj_cmd *cmdobj, struct adreno_submit_time *time)
+{
+	unsigned long flags;
+	u64 time_in_s;
+	unsigned long time_in_ns;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct kgsl_context *context = drawobj->context;
+	struct submission_info info = {0};
+
+	if (!time)
+		return;
+
+	/*
+	 * Here we are attempting to create a mapping between the
+	 * GPU time domain (alwayson counter) and the CPU time domain
+	 * (local_clock) by sampling both values as close together as
+	 * possible. This is useful for many types of debugging and
+	 * profiling. In order to make this mapping as accurate as
+	 * possible, we must turn off interrupts to avoid running
+	 * interrupt handlers between the two samples.
+	 */
+
+	local_irq_save(flags);
+
+	/* Read always on registers */
+	time->ticks = a6xx_read_alwayson(adreno_dev);
+
+	/* Trace the GPU time to create a mapping to ftrace time */
+	trace_adreno_cmdbatch_sync(context->id, context->priority,
+		drawobj->timestamp, time->ticks);
+
+	/* Get the kernel clock for time since boot */
+	time->ktime = local_clock();
+
+	/* Get the timeofday for the wall time (for the user) */
+	ktime_get_real_ts64(&time->utime);
+
+	local_irq_restore(flags);
+
+	/* Return kernel clock time to the client if requested */
+	time_in_s = time->ktime;
+	time_in_ns = do_div(time_in_s, 1000000000);
+
+	info.inflight = -1;
+	info.rb_id = adreno_get_level(context);
+	info.gmu_dispatch_queue = context->gmu_dispatch_queue;
+
+	cmdobj->submit_ticks = time->ticks;
+
+	msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_SUBMIT,
+		pid_nr(context->proc_priv->pid),
+		context->id, drawobj->timestamp,
+		!!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME));
+	trace_adreno_cmdbatch_submitted(drawobj, &info, time->ticks,
+		(unsigned long) time_in_s, time_in_ns / 1000, 0);
+
+	log_kgsl_cmdbatch_submitted_event(context->id, drawobj->timestamp,
+		context->priority, drawobj->flags);
+}
+
+static u32 get_next_dq(u32 priority)
+{
+	struct dq_info *info = &a6xx_hfi_dqs[priority];
+	u32 next = info->base_dq_id + info->offset;
+
+	info->offset = (info->offset + 1) % info->max_dq;
+
+	return next;
+}
+
+static u32 get_dq_id(struct kgsl_context *context)
+{
+	u32 level = adreno_get_level(context);
+
+	return get_next_dq(level);
+}
+
+static int send_context_register(struct adreno_device *adreno_dev,
+	struct kgsl_context *context)
+{
+	struct hfi_register_ctxt_cmd cmd;
+	struct kgsl_pagetable *pt = context->proc_priv->pagetable;
+	int ret, asid = kgsl_mmu_pagetable_get_asid(pt, context);
+
+	if (asid < 0)
+		return asid;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_REGISTER_CONTEXT);
+	if (ret)
+		return ret;
+
+	cmd.ctxt_id = context->id;
+	cmd.flags = HFI_CTXT_FLAG_NOTIFY | context->flags;
+
+	/*
+	 * HLOS SMMU driver programs context bank to look up ASID from TTBR0 during a page
+	 * table walk. So the TLB entries are tagged with the ASID from TTBR0. TLBIASID
+	 * invalidates TLB entries whose ASID matches the value that was written to the
+	 * CBn_TLBIASID register. Set ASID along with PT address.
+	 */
+	cmd.pt_addr = kgsl_mmu_pagetable_get_ttbr0(pt) |
+		FIELD_PREP(GENMASK_ULL(63, KGSL_IOMMU_ASID_START_BIT), asid);
+	cmd.ctxt_idr = pid_nr(context->proc_priv->pid);
+	cmd.ctxt_bank = kgsl_mmu_pagetable_get_context_bank(pt, context);
+
+	return a6xx_hfi_send_cmd_async(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int send_context_pointers(struct adreno_device *adreno_dev,
+	struct kgsl_context *context)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct hfi_context_pointers_cmd cmd = {0};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CONTEXT_POINTERS);
+	if (ret)
+		return ret;
+
+	cmd.ctxt_id = context->id;
+	cmd.sop_addr = MEMSTORE_ID_GPU_ADDR(device, context->id, soptimestamp);
+	cmd.eop_addr = MEMSTORE_ID_GPU_ADDR(device, context->id, eoptimestamp);
+	if (context->user_ctxt_record)
+		cmd.user_ctxt_record_addr =
+			context->user_ctxt_record->memdesc.gpuaddr;
+
+	return a6xx_hfi_send_cmd_async(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int hfi_context_register(struct adreno_device *adreno_dev,
+	struct kgsl_context *context)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (context->gmu_registered)
+		return 0;
+
+	ret = send_context_register(adreno_dev, context);
+	if (ret) {
+		dev_err(&gmu->pdev->dev,
+			"Unable to register context %u: %d\n",
+			context->id, ret);
+
+		if (device->gmu_fault)
+			adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+
+		return ret;
+	}
+
+	ret = send_context_pointers(adreno_dev, context);
+	if (ret) {
+		dev_err(&gmu->pdev->dev,
+			"Unable to register context %u pointers: %d\n",
+			context->id, ret);
+
+		if (device->gmu_fault)
+			adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+
+		return ret;
+	}
+
+	context->gmu_registered = true;
+	context->gmu_dispatch_queue = get_dq_id(context);
+
+	return 0;
+}
+
+static void populate_ibs(struct adreno_device *adreno_dev,
+	struct hfi_submit_cmd *cmd, struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct hfi_issue_ib *issue_ib;
+	struct kgsl_memobj_node *ib;
+
+	if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS) {
+		struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
+		struct kgsl_memdesc *big_ib;
+
+		if (test_bit(CMDOBJ_RECURRING_START, &cmdobj->priv))
+			big_ib = hfi->big_ib_recurring;
+		else
+			big_ib = hfi->big_ib;
+		/*
+		 * The dispatcher ensures that there is only one big IB inflight
+		 */
+		cmd->big_ib_gmu_va = big_ib->gmuaddr;
+		cmd->flags |= CMDBATCH_INDIRECT;
+		issue_ib = big_ib->hostptr;
+	} else {
+		issue_ib = (struct hfi_issue_ib *)&cmd[1];
+	}
+
+	list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+		issue_ib->addr = ib->gpuaddr;
+		issue_ib->size = ib->size;
+		issue_ib++;
+	}
+
+	cmd->numibs = cmdobj->numibs;
+}
+
+/* Size in below functions are in unit of dwords */
+static int a6xx_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, uint32_t queue_idx,
+	uint32_t *msg, u32 size_bytes, struct kgsl_drawobj_cmd *cmdobj,
+	struct adreno_submit_time *time)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	uint32_t *queue;
+	uint32_t i, write, empty_space;
+	uint32_t size_dwords = size_bytes >> 2;
+	u32 align_size = ALIGN(size_dwords, SZ_4);
+	uint32_t id = MSG_HDR_GET_ID(*msg);
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED || !IS_ALIGNED(size_bytes, sizeof(u32)))
+		return -EINVAL;
+
+	queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx);
+
+	empty_space = (hdr->write_index >= hdr->read_index) ?
+			(hdr->queue_size - (hdr->write_index - hdr->read_index))
+			: (hdr->read_index - hdr->write_index);
+
+	if (empty_space <= align_size)
+		return -ENOSPC;
+
+	write = hdr->write_index;
+
+	for (i = 0; i < size_dwords; i++) {
+		queue[write] = msg[i];
+		write = (write + 1) % hdr->queue_size;
+	}
+
+	/* Cookify any non used data at the end of the write buffer */
+	if (GMU_VER_MAJOR(gmu->ver.hfi) >= 2) {
+		for (; i < align_size; i++) {
+			queue[write] = 0xFAFAFAFA;
+			write = (write + 1) % hdr->queue_size;
+		}
+	}
+
+	/* Ensure packet is written out before proceeding */
+	wmb();
+
+	if (!cmdobj)
+		goto done;
+
+	a6xx_add_profile_events(adreno_dev, cmdobj, time);
+
+	/*
+	 * Put the profiling information in the user profiling buffer.
+	 * The hfi_update_write_idx below has a wmb() before the actual
+	 * write index update to ensure that the GMU does not see the
+	 * packet before the profile data is written out.
+	 */
+	adreno_profile_submit_time(time);
+
+done:
+	trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg));
+
+	hfi_update_write_idx(&hdr->write_index, write);
+
+	return 0;
+}
+
+int a6xx_hwsched_submit_drawobj(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj)
+{
+	int ret = 0;
+	u32 cmd_sizebytes, seqnum;
+	struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj);
+	struct hfi_submit_cmd *cmd;
+	struct adreno_submit_time time = {0};
+	static void *cmdbuf;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+
+	if (cmdbuf == NULL) {
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		cmdbuf = devm_kzalloc(&device->pdev->dev, HFI_MAX_MSG_SIZE,
+			GFP_KERNEL);
+		if (!cmdbuf)
+			return -ENOMEM;
+	}
+
+	ret = hfi_context_register(adreno_dev, drawobj->context);
+	if (ret)
+		return ret;
+
+	/* Add a *issue_ib struct for each IB */
+	if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS ||
+		test_bit(CMDOBJ_SKIP, &cmdobj->priv))
+		cmd_sizebytes = sizeof(*cmd);
+	else
+		cmd_sizebytes = sizeof(*cmd) +
+			(sizeof(struct hfi_issue_ib) * cmdobj->numibs);
+
+	if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE))
+		return -EMSGSIZE;
+
+	memset(cmdbuf, 0x0, cmd_sizebytes);
+
+	cmd = cmdbuf;
+
+	cmd->ctxt_id = drawobj->context->id;
+	cmd->flags = HFI_CTXT_FLAG_NOTIFY;
+	if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME)
+		cmd->flags |= CMDBATCH_EOF;
+
+	cmd->ts = drawobj->timestamp;
+
+	if (test_bit(CMDOBJ_SKIP, &cmdobj->priv))
+		goto skipib;
+
+	populate_ibs(adreno_dev, cmd, cmdobj);
+
+	if ((drawobj->flags & KGSL_DRAWOBJ_PROFILING) &&
+		cmdobj->profiling_buf_entry) {
+
+		time.drawobj = drawobj;
+
+		cmd->profile_gpuaddr_lo =
+			lower_32_bits(cmdobj->profiling_buffer_gpuaddr);
+		cmd->profile_gpuaddr_hi =
+			upper_32_bits(cmdobj->profiling_buffer_gpuaddr);
+
+		/* Indicate to GMU to do user profiling for this submission */
+		cmd->flags |= CMDBATCH_PROFILING;
+	}
+
+skipib:
+	adreno_drawobj_set_constraint(KGSL_DEVICE(adreno_dev), drawobj);
+
+	seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum);
+	cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD);
+	cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2);
+
+	ret = a6xx_hfi_dispatch_queue_write(adreno_dev,
+		HFI_DSP_ID_0 + drawobj->context->gmu_dispatch_queue,
+		(u32 *)cmd, cmd_sizebytes, cmdobj, &time);
+	if (ret)
+		return ret;
+
+	/* Send interrupt to GMU to receive the message */
+	gmu_core_regwrite(KGSL_DEVICE(adreno_dev), A6XX_GMU_HOST2GMU_INTR_SET,
+		DISPQ_IRQ_BIT(drawobj->context->gmu_dispatch_queue));
+
+	/*
+	 * We don't need the drawctxt spinlock here because hardware fences are not enabled for a6x
+	 */
+	drawctxt->internal_timestamp = drawobj->timestamp;
+
+	return ret;
+}
+
+int a6xx_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct hfi_submit_cmd *cmd;
+	struct kgsl_memobj_node *ib;
+	u32 cmd_sizebytes;
+	int ret;
+	static bool active;
+
+	if (adreno_gpu_halt(adreno_dev) || adreno_hwsched_gpu_fault(adreno_dev))
+		return -EBUSY;
+
+	if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) {
+		cmdobj->numibs = 0;
+	} else {
+		list_for_each_entry(ib, &cmdobj->cmdlist, node)
+			cmdobj->numibs++;
+	}
+
+	if (cmdobj->numibs > HWSCHED_MAX_IBS)
+		return -EINVAL;
+
+	if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS)
+		cmd_sizebytes = sizeof(*cmd);
+	else
+		cmd_sizebytes = sizeof(*cmd) +
+			(sizeof(struct hfi_issue_ib) * cmdobj->numibs);
+
+	if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE))
+		return -EMSGSIZE;
+
+	cmd = kzalloc(cmd_sizebytes, GFP_KERNEL);
+	if (cmd == NULL)
+		return -ENOMEM;
+
+	if (test_bit(CMDOBJ_RECURRING_START, &cmdobj->priv)) {
+		if (!active) {
+			ret = adreno_active_count_get(adreno_dev);
+			if (ret) {
+				kfree(cmd);
+				return ret;
+			}
+			active = true;
+		}
+		cmd->flags |= CMDBATCH_RECURRING_START;
+		populate_ibs(adreno_dev, cmd, cmdobj);
+	} else
+		cmd->flags |= CMDBATCH_RECURRING_STOP;
+
+	cmd->ctxt_id = drawobj->context->id;
+
+	ret = hfi_context_register(adreno_dev, drawobj->context);
+	if (ret) {
+		adreno_active_count_put(adreno_dev);
+		active = false;
+		kfree(cmd);
+		return ret;
+	}
+
+	cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_RECURRING_CMD, HFI_MSG_CMD);
+
+	ret = a6xx_hfi_send_cmd_async(adreno_dev, cmd, cmd_sizebytes);
+
+	kfree(cmd);
+
+	if (ret) {
+		adreno_active_count_put(adreno_dev);
+		active = false;
+		return ret;
+	}
+
+	if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) {
+		adreno_hwsched_retire_cmdobj(hwsched, hwsched->recurring_cmdobj);
+		hwsched->recurring_cmdobj = NULL;
+		del_timer_sync(&hwsched->lsr_timer);
+		if (active)
+			adreno_active_count_put(adreno_dev);
+		active = false;
+		return ret;
+	}
+
+	hwsched->recurring_cmdobj = cmdobj;
+	/* Star LSR timer for power stats collection */
+	mod_timer(&hwsched->lsr_timer, jiffies + msecs_to_jiffies(10));
+	return ret;
+}
+
+static void trigger_context_unregister_fault(struct adreno_device *adreno_dev,
+	struct kgsl_context *context)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	gmu_core_fault_snapshot(device);
+
+	/*
+	 * Trigger dispatcher based reset and recovery. Invalidate the
+	 * context so that any un-finished inflight submissions are not
+	 * replayed after recovery.
+	 */
+	adreno_drawctxt_set_guilty(device, context);
+	adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+}
+
+static int send_context_unregister_hfi(struct adreno_device *adreno_dev,
+	struct kgsl_context *context, u32 ts)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	struct pending_cmd pending_ack;
+	struct hfi_unregister_ctxt_cmd cmd;
+	u32 seqnum;
+	int ret;
+
+	/* Only send HFI if device is not in SLUMBER */
+	if (!context->gmu_registered ||
+		!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return 0;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_UNREGISTER_CONTEXT);
+	if (ret)
+		return ret;
+
+	cmd.ctxt_id = context->id,
+	cmd.ts = ts,
+
+	/*
+	 * Although we know device is powered on, we can still enter SLUMBER
+	 * because the wait for ack below is done without holding the mutex. So
+	 * take an active count before releasing the mutex so as to avoid a
+	 * concurrent SLUMBER sequence while GMU is un-registering this context.
+	 */
+	ret = a6xx_hwsched_active_count_get(adreno_dev);
+	if (ret) {
+		trigger_context_unregister_fault(adreno_dev, context);
+		return ret;
+	}
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2);
+	add_waiter(hfi, cmd.hdr, &pending_ack);
+
+	ret = a6xx_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd));
+	if (ret) {
+		trigger_context_unregister_fault(adreno_dev, context);
+		goto done;
+	}
+
+	ret = adreno_hwsched_ctxt_unregister_wait_completion(adreno_dev,
+		&gmu->pdev->dev, &pending_ack, a6xx_hwsched_process_msgq, &cmd);
+	if (ret) {
+		trigger_context_unregister_fault(adreno_dev, context);
+		goto done;
+	}
+
+	ret = check_ack_failure(adreno_dev, &pending_ack);
+
+done:
+	a6xx_hwsched_active_count_put(adreno_dev);
+	del_waiter(hfi, &pending_ack);
+
+	return ret;
+}
+
+void a6xx_hwsched_context_detach(struct adreno_context *drawctxt)
+{
+	struct kgsl_context *context = &drawctxt->base;
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret = 0;
+
+	mutex_lock(&device->mutex);
+
+	ret = send_context_unregister_hfi(adreno_dev, context,
+		drawctxt->internal_timestamp);
+
+	if (!ret) {
+		kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+			drawctxt->timestamp);
+
+		kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+			drawctxt->timestamp);
+
+		adreno_profile_process_results(adreno_dev);
+	}
+
+	context->gmu_registered = false;
+
+	mutex_unlock(&device->mutex);
+}
+
+u32 a6xx_hwsched_preempt_count_get(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct hfi_get_value_cmd cmd;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
+	struct pending_cmd pending_ack;
+	int rc;
+	u32 seqnum;
+
+	if (device->state != KGSL_STATE_ACTIVE)
+		return 0;
+
+	rc = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE);
+	if (rc)
+		return 0;
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2);
+	cmd.type = HFI_VALUE_PREEMPT_COUNT;
+	cmd.subtype = 0;
+
+	add_waiter(hfi, cmd.hdr, &pending_ack);
+
+	rc = a6xx_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd));
+	if (rc)
+		goto done;
+
+	rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack,
+		a6xx_hwsched_process_msgq);
+	if (rc)
+		goto done;
+
+	rc = check_ack_failure(adreno_dev, &pending_ack);
+
+done:
+	del_waiter(hfi, &pending_ack);
+
+	return rc ? 0 : pending_ack.results[2];
+}

+ 152 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_hwsched_hfi.h

@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_A6XX_HWSCHED_HFI_H_
+#define _ADRENO_A6XX_HWSCHED_HFI_H_
+
+struct a6xx_hwsched_hfi {
+	struct hfi_mem_alloc_entry mem_alloc_table[32];
+	u32 mem_alloc_entries;
+	/** @irq_mask: Store the hfi interrupt mask */
+	u32 irq_mask;
+	/** @msglock: To protect the list of un-ACKed hfi packets */
+	rwlock_t msglock;
+	/** @msglist: List of un-ACKed hfi packets */
+	struct list_head msglist;
+	/** @f2h_task: Task for processing gmu fw to host packets */
+	struct task_struct *f2h_task;
+	/** @f2h_wq: Waitqueue for the f2h_task */
+	wait_queue_head_t f2h_wq;
+	/** @big_ib: GMU buffer to hold big IBs */
+	struct kgsl_memdesc *big_ib;
+	/** @big_ib_recurring: GMU buffer to hold big recurring IBs */
+	struct kgsl_memdesc *big_ib_recurring;
+	/** @perfctr_scratch: Buffer to hold perfcounter PM4 commands */
+	struct kgsl_memdesc *perfctr_scratch;
+	/** @msg_mutex: Mutex for accessing the msgq */
+	struct mutex msgq_mutex;
+};
+
+struct kgsl_drawobj_cmd;
+
+/**
+ * a6xx_hwsched_hfi_probe - Probe hwsched hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int a6xx_hwsched_hfi_probe(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_hfi_remove - Release hwsched hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ */
+void a6xx_hwsched_hfi_remove(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_hfi_init - Initialize hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to initialize hfi resources
+ * once before the very first gmu boot
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int a6xx_hwsched_hfi_init(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_hfi_start - Start hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * Send the various hfi packets before booting the gpu
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int a6xx_hwsched_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_hfi_stop - Stop the hfi resources
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function does the hfi cleanup when powering down the gmu
+ */
+void a6xx_hwsched_hfi_stop(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwched_cp_init - Send CP_INIT via HFI
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to send CP INIT packet and bring
+ * GPU out of secure mode using hfi raw packets.
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int a6xx_hwsched_cp_init(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_counter_inline_enable - Configure a performance counter for a countable
+ * @adreno_dev -  Adreno device to configure
+ * @group - Desired performance counter group
+ * @counter - Desired performance counter in the group
+ * @countable - Desired countable
+ *
+ * Physically set up a counter within a group with the desired countable
+ * Return 0 on success or negative error on failure.
+ */
+int a6xx_hwsched_counter_inline_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable);
+
+/**
+ * a6xx_hfi_send_cmd_async - Send an hfi packet
+ * @adreno_dev: Pointer to adreno device structure
+ * @data: Data to be sent in the hfi packet
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Send data in the form of an HFI packet to gmu and wait for
+ * it's ack asynchronously
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int a6xx_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 size_bytes);
+
+/**
+ * a6xx_hwsched_submit_drawobj - Dispatch IBs to dispatch queues
+ * @adreno_dev: Pointer to adreno device structure
+ * @drawobj: The command draw object which needs to be submitted
+ *
+ * This function is used to register the context if needed and submit
+ * IBs to the hfi dispatch queues.
+
+ * Return: 0 on success and negative error on failure
+ */
+int a6xx_hwsched_submit_drawobj(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj);
+
+/**
+ * a6xx_hwsched_context_detach - Unregister a context with GMU
+ * @drawctxt: Pointer to the adreno context
+ *
+ * This function sends context unregister HFI and waits for the ack
+ * to ensure all submissions from this context have retired
+ */
+void a6xx_hwsched_context_detach(struct adreno_context *drawctxt);
+
+/* Helper function to get to a6xx hwsched hfi device from adreno device */
+struct a6xx_hwsched_hfi *to_a6xx_hwsched_hfi(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_preempt_count_get - Get preemption count from GMU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * This function sends a GET_VALUE HFI packet to get the number of
+ * preemptions completed since last SLUMBER exit.
+ *
+ * Return: Preemption count
+ */
+u32 a6xx_hwsched_preempt_count_get(struct adreno_device *adreno_dev);
+
+#endif

+ 1010 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_perfcounter.c

@@ -0,0 +1,1010 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_a6xx_hwsched_hfi.h"
+#include "adreno_perfcounter.h"
+#include "adreno_pm4types.h"
+#include "kgsl_device.h"
+
+#define VBIF2_PERF_CNT_SEL_MASK 0x7F
+/* offset of clear register from select register */
+#define VBIF2_PERF_CLR_REG_SEL_OFF 8
+/* offset of enable register from select register */
+#define VBIF2_PERF_EN_REG_SEL_OFF 16
+/* offset of clear register from the enable register */
+#define VBIF2_PERF_PWR_CLR_REG_EN_OFF 8
+
+/* offset of clear register from select register for GBIF */
+#define GBIF_PERF_CLR_REG_SEL_OFF 1
+/* offset of enable register from select register for GBIF*/
+#define GBIF_PERF_EN_REG_SEL_OFF  2
+/* offset of clear register from the power enable register for GBIF*/
+#define GBIF_PWR_CLR_REG_EN_OFF    1
+#define GBIF_PWR_SEL_REG_EN_OFF  3
+
+#define GBIF_PERF_SEL_RMW_MASK   0xFF
+#define GBIF_PWR_SEL_RMW_MASK    0xFF
+#define GBIF_PWR_EN_CLR_RMW_MASK 0x10000
+
+static void a6xx_counter_load(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_register *reg)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int index = reg->load_bit / 32;
+	u32 enable = BIT(reg->load_bit & 31);
+
+	/*
+	 * a650 and a660 currently have the perfcounter values saved via
+	 * retention in the GMU.
+	 */
+	if (adreno_is_a650(adreno_dev) || adreno_is_a660(adreno_dev))
+		return;
+
+	kgsl_regwrite(device, A6XX_RBBM_PERFCTR_LOAD_VALUE_LO,
+		lower_32_bits(reg->value));
+
+	kgsl_regwrite(device, A6XX_RBBM_PERFCTR_LOAD_VALUE_HI,
+		upper_32_bits(reg->value));
+
+	kgsl_regwrite(device, A6XX_RBBM_PERFCTR_LOAD_CMD0 + index, enable);
+}
+
+/*
+ * For registers that do not get restored on power cycle, read the value and add
+ * the stored shadow value
+ */
+static u64 a6xx_counter_read_norestore(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 hi, lo;
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	return ((((u64) hi) << 32) | lo) + reg->value;
+}
+
+static int a6xx_counter_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	int ret = 0;
+
+	if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE)
+		ret = a6xx_perfcounter_update(adreno_dev, reg, true);
+	else
+		kgsl_regwrite(device, reg->select, countable);
+
+	if (!ret)
+		reg->value = 0;
+
+	return ret;
+}
+
+static int a6xx_hwsched_counter_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable)
+{
+	if (!(KGSL_DEVICE(adreno_dev)->state == KGSL_STATE_ACTIVE))
+		return a6xx_counter_enable(adreno_dev, group, counter, countable);
+
+	return a6xx_hwsched_counter_inline_enable(adreno_dev, group, counter, countable);
+}
+
+static int a6xx_counter_inline_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0];
+	u32 cmds[3];
+	int ret;
+
+
+	if (!(device->state == KGSL_STATE_ACTIVE))
+		return a6xx_counter_enable(adreno_dev, group, counter,
+			countable);
+
+	if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE)
+		a6xx_perfcounter_update(adreno_dev, reg, false);
+
+	cmds[0]  = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+	cmds[1] = cp_type4_packet(reg->select, 1);
+	cmds[2] = countable;
+
+	/* submit to highest priority RB always */
+	ret = a6xx_ringbuffer_addcmds(adreno_dev, rb, NULL,
+		F_NOTPROTECTED, cmds, 3, 0, NULL);
+	if (ret)
+		return ret;
+
+	/*
+	 * schedule dispatcher to make sure rb[0] is run, because
+	 * if the current RB is not rb[0] and gpu is idle then
+	 * rb[0] will not get scheduled to run
+	 */
+	if (adreno_dev->cur_rb != rb)
+		adreno_dispatcher_schedule(device);
+
+	/* wait for the above commands submitted to complete */
+	ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp,
+		ADRENO_IDLE_TIMEOUT);
+
+	if (ret) {
+		/*
+		 * If we were woken up because of cancelling rb events
+		 * either due to soft reset or adreno_stop, ignore the
+		 * error and return 0 here. The perfcounter is already
+		 * set up in software and it will be programmed in
+		 * hardware when we wake up or come up after soft reset,
+		 * by adreno_perfcounter_restore.
+		 */
+		if (ret == -EAGAIN)
+			ret = 0;
+		else
+			dev_err(device->dev,
+				     "Perfcounter %s/%u/%u start via commands failed %d\n",
+				     group->name, counter, countable, ret);
+	}
+
+	if (!ret)
+		reg->value = 0;
+
+	return ret;
+}
+
+static u64 a6xx_counter_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32  hi, lo;
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* These registers are restored on power resume */
+	return (((u64) hi) << 32) | lo;
+}
+
+static int a6xx_counter_gbif_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	unsigned int shift = counter << 3;
+	unsigned int perfctr_mask = 1 << counter;
+
+	if (countable > VBIF2_PERF_CNT_SEL_MASK)
+		return -EINVAL;
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regrmw(device, reg->select - GBIF_PERF_CLR_REG_SEL_OFF,
+		perfctr_mask, perfctr_mask);
+	kgsl_regrmw(device, reg->select - GBIF_PERF_CLR_REG_SEL_OFF,
+		perfctr_mask, 0);
+		/* select the desired countable */
+	kgsl_regrmw(device, reg->select,
+		GBIF_PERF_SEL_RMW_MASK << shift, countable << shift);
+	/* enable counter */
+	kgsl_regrmw(device, reg->select - GBIF_PERF_EN_REG_SEL_OFF,
+		perfctr_mask, perfctr_mask);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int a630_counter_vbif_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	if (countable > VBIF2_PERF_CNT_SEL_MASK)
+		return -EINVAL;
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 1);
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 0);
+	kgsl_regwrite(device,
+		reg->select, countable & VBIF2_PERF_CNT_SEL_MASK);
+	/* enable reg is 8 DWORDS before select reg */
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1);
+
+	kgsl_regwrite(device, reg->select, countable);
+	reg->value = 0;
+
+	return 0;
+}
+
+static int a630_counter_vbif_pwr_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regwrite(device, reg->select +
+		VBIF2_PERF_PWR_CLR_REG_EN_OFF, 1);
+	kgsl_regwrite(device, reg->select +
+		VBIF2_PERF_PWR_CLR_REG_EN_OFF, 0);
+	kgsl_regwrite(device, reg->select, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int a6xx_counter_gbif_pwr_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	unsigned int shift = counter << 3;
+	unsigned int perfctr_mask = GBIF_PWR_EN_CLR_RMW_MASK << counter;
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regrmw(device, reg->select + GBIF_PWR_CLR_REG_EN_OFF,
+		perfctr_mask, perfctr_mask);
+	kgsl_regrmw(device, reg->select + GBIF_PWR_CLR_REG_EN_OFF,
+		perfctr_mask, 0);
+	/* select the desired countable */
+	kgsl_regrmw(device, reg->select + GBIF_PWR_SEL_REG_EN_OFF,
+		GBIF_PWR_SEL_RMW_MASK << shift, countable << shift);
+	/* Enable the counter */
+	kgsl_regrmw(device, reg->select, perfctr_mask, perfctr_mask);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int a6xx_counter_alwayson_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	return 0;
+}
+
+static u64 a6xx_counter_alwayson_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	return a6xx_read_alwayson(adreno_dev) + reg->value;
+}
+
+static void a6xx_write_gmu_counter_enable(struct kgsl_device *device,
+		struct adreno_perfcount_register *reg, u32 bit, u32 countable)
+{
+	u32 val;
+
+	kgsl_regread(device, reg->select, &val);
+	val &= ~(0xff << bit);
+	val |= countable << bit;
+	kgsl_regwrite(device, reg->select, val);
+
+}
+
+static int a6xx_counter_gmu_xoclk_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	if (countable > 0xff)
+		return -EINVAL;
+
+	if (counter >= 6 && !adreno_is_a660(adreno_dev))
+		return -EINVAL;
+
+	/*
+	 * Counters [0:3] are in select 1 bit offsets 0, 8, 16 and 24
+	 * Counters [4:5] are in select 2 bit offset 0, 8
+	 * Counters [6:9] are in select 3 bit offset 0, 8, 16 and 24
+	 */
+
+	if (counter == 4 || counter == 5)
+		counter -= 4;
+	else if (counter >= 6)
+		counter -= 6;
+
+	a6xx_write_gmu_counter_enable(device, reg, counter * 8, countable);
+
+	reg->value = 0;
+
+	kgsl_regwrite(device, A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
+
+	return 0;
+}
+
+static int a6xx_counter_gmu_gmuclk_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	if (countable > 0xff)
+		return -EINVAL;
+
+	/*
+	 * The two counters are stuck into GMU_CX_GMU_POWER_COUNTER_SELECT_1
+	 * at bit offset 16 and 24
+	 */
+	a6xx_write_gmu_counter_enable(device, reg,
+		16 + (counter * 8), countable);
+
+	kgsl_regwrite(device, A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int a6xx_counter_gmu_perf_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	if (countable > 0xff)
+		return -EINVAL;
+
+	/*
+	 * Counters [0:3] are in select 1 bit offsets 0, 8, 16 and 24
+	 * Counters [4:5] are in select 2 bit offset 0, 8
+	 */
+
+	if (counter >= 4)
+		counter -= 4;
+
+	a6xx_write_gmu_counter_enable(device, reg, counter * 8, countable);
+
+	kgsl_regwrite(device, A6XX_GMU_CX_GMU_PERF_COUNTER_ENABLE, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static struct adreno_perfcount_register a6xx_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_0_LO,
+		A6XX_RBBM_PERFCTR_CP_0_HI, 0, A6XX_CP_PERFCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_1_LO,
+		A6XX_RBBM_PERFCTR_CP_1_HI, 1, A6XX_CP_PERFCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_2_LO,
+		A6XX_RBBM_PERFCTR_CP_2_HI, 2, A6XX_CP_PERFCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_3_LO,
+		A6XX_RBBM_PERFCTR_CP_3_HI, 3, A6XX_CP_PERFCTR_CP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_4_LO,
+		A6XX_RBBM_PERFCTR_CP_4_HI, 4, A6XX_CP_PERFCTR_CP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_5_LO,
+		A6XX_RBBM_PERFCTR_CP_5_HI, 5, A6XX_CP_PERFCTR_CP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_6_LO,
+		A6XX_RBBM_PERFCTR_CP_6_HI, 6, A6XX_CP_PERFCTR_CP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_7_LO,
+		A6XX_RBBM_PERFCTR_CP_7_HI, 7, A6XX_CP_PERFCTR_CP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_8_LO,
+		A6XX_RBBM_PERFCTR_CP_8_HI, 8, A6XX_CP_PERFCTR_CP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_9_LO,
+		A6XX_RBBM_PERFCTR_CP_9_HI, 9, A6XX_CP_PERFCTR_CP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_10_LO,
+		A6XX_RBBM_PERFCTR_CP_10_HI, 10, A6XX_CP_PERFCTR_CP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_11_LO,
+		A6XX_RBBM_PERFCTR_CP_11_HI, 11, A6XX_CP_PERFCTR_CP_SEL_11 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_12_LO,
+		A6XX_RBBM_PERFCTR_CP_12_HI, 12, A6XX_CP_PERFCTR_CP_SEL_12 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CP_13_LO,
+		A6XX_RBBM_PERFCTR_CP_13_HI, 13, A6XX_CP_PERFCTR_CP_SEL_13 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_rbbm[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RBBM_0_LO,
+		A6XX_RBBM_PERFCTR_RBBM_0_HI, 14, A6XX_RBBM_PERFCTR_RBBM_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RBBM_1_LO,
+		A6XX_RBBM_PERFCTR_RBBM_1_HI, 15, A6XX_RBBM_PERFCTR_RBBM_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RBBM_2_LO,
+		A6XX_RBBM_PERFCTR_RBBM_2_HI, 16, A6XX_RBBM_PERFCTR_RBBM_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RBBM_3_LO,
+		A6XX_RBBM_PERFCTR_RBBM_3_HI, 17, A6XX_RBBM_PERFCTR_RBBM_SEL_3 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_PC_0_LO,
+		A6XX_RBBM_PERFCTR_PC_0_HI, 18, A6XX_PC_PERFCTR_PC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_PC_1_LO,
+		A6XX_RBBM_PERFCTR_PC_1_HI, 19, A6XX_PC_PERFCTR_PC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_PC_2_LO,
+		A6XX_RBBM_PERFCTR_PC_2_HI, 20, A6XX_PC_PERFCTR_PC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_PC_3_LO,
+		A6XX_RBBM_PERFCTR_PC_3_HI, 21, A6XX_PC_PERFCTR_PC_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_PC_4_LO,
+		A6XX_RBBM_PERFCTR_PC_4_HI, 22, A6XX_PC_PERFCTR_PC_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_PC_5_LO,
+		A6XX_RBBM_PERFCTR_PC_5_HI, 23, A6XX_PC_PERFCTR_PC_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_PC_6_LO,
+		A6XX_RBBM_PERFCTR_PC_6_HI, 24, A6XX_PC_PERFCTR_PC_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_PC_7_LO,
+		A6XX_RBBM_PERFCTR_PC_7_HI, 25, A6XX_PC_PERFCTR_PC_SEL_7 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VFD_0_LO,
+		A6XX_RBBM_PERFCTR_VFD_0_HI, 26, A6XX_VFD_PERFCTR_VFD_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VFD_1_LO,
+		A6XX_RBBM_PERFCTR_VFD_1_HI, 27, A6XX_VFD_PERFCTR_VFD_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VFD_2_LO,
+		A6XX_RBBM_PERFCTR_VFD_2_HI, 28, A6XX_VFD_PERFCTR_VFD_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VFD_3_LO,
+		A6XX_RBBM_PERFCTR_VFD_3_HI, 29, A6XX_VFD_PERFCTR_VFD_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VFD_4_LO,
+		A6XX_RBBM_PERFCTR_VFD_4_HI, 30, A6XX_VFD_PERFCTR_VFD_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VFD_5_LO,
+		A6XX_RBBM_PERFCTR_VFD_5_HI, 31, A6XX_VFD_PERFCTR_VFD_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VFD_6_LO,
+		A6XX_RBBM_PERFCTR_VFD_6_HI, 32, A6XX_VFD_PERFCTR_VFD_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VFD_7_LO,
+		A6XX_RBBM_PERFCTR_VFD_7_HI, 33, A6XX_VFD_PERFCTR_VFD_SEL_7 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_HLSQ_0_LO,
+		A6XX_RBBM_PERFCTR_HLSQ_0_HI, 34, A6XX_HLSQ_PERFCTR_HLSQ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_HLSQ_1_LO,
+		A6XX_RBBM_PERFCTR_HLSQ_1_HI, 35, A6XX_HLSQ_PERFCTR_HLSQ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_HLSQ_2_LO,
+		A6XX_RBBM_PERFCTR_HLSQ_2_HI, 36, A6XX_HLSQ_PERFCTR_HLSQ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_HLSQ_3_LO,
+		A6XX_RBBM_PERFCTR_HLSQ_3_HI, 37, A6XX_HLSQ_PERFCTR_HLSQ_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_HLSQ_4_LO,
+		A6XX_RBBM_PERFCTR_HLSQ_4_HI, 38, A6XX_HLSQ_PERFCTR_HLSQ_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_HLSQ_5_LO,
+		A6XX_RBBM_PERFCTR_HLSQ_5_HI, 39, A6XX_HLSQ_PERFCTR_HLSQ_SEL_5 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VPC_0_LO,
+		A6XX_RBBM_PERFCTR_VPC_0_HI, 40, A6XX_VPC_PERFCTR_VPC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VPC_1_LO,
+		A6XX_RBBM_PERFCTR_VPC_1_HI, 41, A6XX_VPC_PERFCTR_VPC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VPC_2_LO,
+		A6XX_RBBM_PERFCTR_VPC_2_HI, 42, A6XX_VPC_PERFCTR_VPC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VPC_3_LO,
+		A6XX_RBBM_PERFCTR_VPC_3_HI, 43, A6XX_VPC_PERFCTR_VPC_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VPC_4_LO,
+		A6XX_RBBM_PERFCTR_VPC_4_HI, 44, A6XX_VPC_PERFCTR_VPC_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VPC_5_LO,
+		A6XX_RBBM_PERFCTR_VPC_5_HI, 45, A6XX_VPC_PERFCTR_VPC_SEL_5 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_ccu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CCU_0_LO,
+		A6XX_RBBM_PERFCTR_CCU_0_HI, 46, A6XX_RB_PERFCTR_CCU_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CCU_1_LO,
+		A6XX_RBBM_PERFCTR_CCU_1_HI, 47, A6XX_RB_PERFCTR_CCU_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CCU_2_LO,
+		A6XX_RBBM_PERFCTR_CCU_2_HI, 48, A6XX_RB_PERFCTR_CCU_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CCU_3_LO,
+		A6XX_RBBM_PERFCTR_CCU_3_HI, 49, A6XX_RB_PERFCTR_CCU_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CCU_4_LO,
+		A6XX_RBBM_PERFCTR_CCU_4_HI, 50, A6XX_RB_PERFCTR_CCU_SEL_4 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TSE_0_LO,
+		A6XX_RBBM_PERFCTR_TSE_0_HI, 51, A6XX_GRAS_PERFCTR_TSE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TSE_1_LO,
+		A6XX_RBBM_PERFCTR_TSE_1_HI, 52, A6XX_GRAS_PERFCTR_TSE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TSE_2_LO,
+		A6XX_RBBM_PERFCTR_TSE_2_HI, 53, A6XX_GRAS_PERFCTR_TSE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TSE_3_LO,
+		A6XX_RBBM_PERFCTR_TSE_3_HI, 54, A6XX_GRAS_PERFCTR_TSE_SEL_3 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RAS_0_LO,
+		A6XX_RBBM_PERFCTR_RAS_0_HI, 55, A6XX_GRAS_PERFCTR_RAS_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RAS_1_LO,
+		A6XX_RBBM_PERFCTR_RAS_1_HI, 56, A6XX_GRAS_PERFCTR_RAS_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RAS_2_LO,
+		A6XX_RBBM_PERFCTR_RAS_2_HI, 57, A6XX_GRAS_PERFCTR_RAS_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RAS_3_LO,
+		A6XX_RBBM_PERFCTR_RAS_3_HI, 58, A6XX_GRAS_PERFCTR_RAS_SEL_3 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_UCHE_0_LO,
+		A6XX_RBBM_PERFCTR_UCHE_0_HI, 59, A6XX_UCHE_PERFCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_UCHE_1_LO,
+		A6XX_RBBM_PERFCTR_UCHE_1_HI, 60, A6XX_UCHE_PERFCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_UCHE_2_LO,
+		A6XX_RBBM_PERFCTR_UCHE_2_HI, 61, A6XX_UCHE_PERFCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_UCHE_3_LO,
+		A6XX_RBBM_PERFCTR_UCHE_3_HI, 62, A6XX_UCHE_PERFCTR_UCHE_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_UCHE_4_LO,
+		A6XX_RBBM_PERFCTR_UCHE_4_HI, 63, A6XX_UCHE_PERFCTR_UCHE_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_UCHE_5_LO,
+		A6XX_RBBM_PERFCTR_UCHE_5_HI, 64, A6XX_UCHE_PERFCTR_UCHE_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_UCHE_6_LO,
+		A6XX_RBBM_PERFCTR_UCHE_6_HI, 65, A6XX_UCHE_PERFCTR_UCHE_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_UCHE_7_LO,
+		A6XX_RBBM_PERFCTR_UCHE_7_HI, 66, A6XX_UCHE_PERFCTR_UCHE_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_UCHE_8_LO,
+		A6XX_RBBM_PERFCTR_UCHE_8_HI, 67, A6XX_UCHE_PERFCTR_UCHE_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_UCHE_9_LO,
+		A6XX_RBBM_PERFCTR_UCHE_9_HI, 68, A6XX_UCHE_PERFCTR_UCHE_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_UCHE_10_LO,
+		A6XX_RBBM_PERFCTR_UCHE_10_HI, 69,
+					A6XX_UCHE_PERFCTR_UCHE_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_UCHE_11_LO,
+		A6XX_RBBM_PERFCTR_UCHE_11_HI, 70,
+					A6XX_UCHE_PERFCTR_UCHE_SEL_11 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TP_0_LO,
+		A6XX_RBBM_PERFCTR_TP_0_HI, 71, A6XX_TPL1_PERFCTR_TP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TP_1_LO,
+		A6XX_RBBM_PERFCTR_TP_1_HI, 72, A6XX_TPL1_PERFCTR_TP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TP_2_LO,
+		A6XX_RBBM_PERFCTR_TP_2_HI, 73, A6XX_TPL1_PERFCTR_TP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TP_3_LO,
+		A6XX_RBBM_PERFCTR_TP_3_HI, 74, A6XX_TPL1_PERFCTR_TP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TP_4_LO,
+		A6XX_RBBM_PERFCTR_TP_4_HI, 75, A6XX_TPL1_PERFCTR_TP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TP_5_LO,
+		A6XX_RBBM_PERFCTR_TP_5_HI, 76, A6XX_TPL1_PERFCTR_TP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TP_6_LO,
+		A6XX_RBBM_PERFCTR_TP_6_HI, 77, A6XX_TPL1_PERFCTR_TP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TP_7_LO,
+		A6XX_RBBM_PERFCTR_TP_7_HI, 78, A6XX_TPL1_PERFCTR_TP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TP_8_LO,
+		A6XX_RBBM_PERFCTR_TP_8_HI, 79, A6XX_TPL1_PERFCTR_TP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TP_9_LO,
+		A6XX_RBBM_PERFCTR_TP_9_HI, 80, A6XX_TPL1_PERFCTR_TP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TP_10_LO,
+		A6XX_RBBM_PERFCTR_TP_10_HI, 81, A6XX_TPL1_PERFCTR_TP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_TP_11_LO,
+		A6XX_RBBM_PERFCTR_TP_11_HI, 82, A6XX_TPL1_PERFCTR_TP_SEL_11 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_0_LO,
+		A6XX_RBBM_PERFCTR_SP_0_HI, 83, A6XX_SP_PERFCTR_SP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_1_LO,
+		A6XX_RBBM_PERFCTR_SP_1_HI, 84, A6XX_SP_PERFCTR_SP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_2_LO,
+		A6XX_RBBM_PERFCTR_SP_2_HI, 85, A6XX_SP_PERFCTR_SP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_3_LO,
+		A6XX_RBBM_PERFCTR_SP_3_HI, 86, A6XX_SP_PERFCTR_SP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_4_LO,
+		A6XX_RBBM_PERFCTR_SP_4_HI, 87, A6XX_SP_PERFCTR_SP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_5_LO,
+		A6XX_RBBM_PERFCTR_SP_5_HI, 88, A6XX_SP_PERFCTR_SP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_6_LO,
+		A6XX_RBBM_PERFCTR_SP_6_HI, 89, A6XX_SP_PERFCTR_SP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_7_LO,
+		A6XX_RBBM_PERFCTR_SP_7_HI, 90, A6XX_SP_PERFCTR_SP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_8_LO,
+		A6XX_RBBM_PERFCTR_SP_8_HI, 91, A6XX_SP_PERFCTR_SP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_9_LO,
+		A6XX_RBBM_PERFCTR_SP_9_HI, 92, A6XX_SP_PERFCTR_SP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_10_LO,
+		A6XX_RBBM_PERFCTR_SP_10_HI, 93, A6XX_SP_PERFCTR_SP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_11_LO,
+		A6XX_RBBM_PERFCTR_SP_11_HI, 94, A6XX_SP_PERFCTR_SP_SEL_11 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_12_LO,
+		A6XX_RBBM_PERFCTR_SP_12_HI, 95, A6XX_SP_PERFCTR_SP_SEL_12 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_13_LO,
+		A6XX_RBBM_PERFCTR_SP_13_HI, 96, A6XX_SP_PERFCTR_SP_SEL_13 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_14_LO,
+		A6XX_RBBM_PERFCTR_SP_14_HI, 97, A6XX_SP_PERFCTR_SP_SEL_14 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_15_LO,
+		A6XX_RBBM_PERFCTR_SP_15_HI, 98, A6XX_SP_PERFCTR_SP_SEL_15 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_16_LO,
+		A6XX_RBBM_PERFCTR_SP_16_HI, 99, A6XX_SP_PERFCTR_SP_SEL_16 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_17_LO,
+		A6XX_RBBM_PERFCTR_SP_17_HI, 100, A6XX_SP_PERFCTR_SP_SEL_17 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_18_LO,
+		A6XX_RBBM_PERFCTR_SP_18_HI, 101, A6XX_SP_PERFCTR_SP_SEL_18 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_19_LO,
+		A6XX_RBBM_PERFCTR_SP_19_HI, 102, A6XX_SP_PERFCTR_SP_SEL_19 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_20_LO,
+		A6XX_RBBM_PERFCTR_SP_20_HI, 103, A6XX_SP_PERFCTR_SP_SEL_20 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_21_LO,
+		A6XX_RBBM_PERFCTR_SP_21_HI, 104, A6XX_SP_PERFCTR_SP_SEL_21 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_22_LO,
+		A6XX_RBBM_PERFCTR_SP_22_HI, 105, A6XX_SP_PERFCTR_SP_SEL_22 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_SP_23_LO,
+		A6XX_RBBM_PERFCTR_SP_23_HI, 106, A6XX_SP_PERFCTR_SP_SEL_23 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RB_0_LO,
+		A6XX_RBBM_PERFCTR_RB_0_HI, 107, A6XX_RB_PERFCTR_RB_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RB_1_LO,
+		A6XX_RBBM_PERFCTR_RB_1_HI, 108, A6XX_RB_PERFCTR_RB_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RB_2_LO,
+		A6XX_RBBM_PERFCTR_RB_2_HI, 109, A6XX_RB_PERFCTR_RB_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RB_3_LO,
+		A6XX_RBBM_PERFCTR_RB_3_HI, 110, A6XX_RB_PERFCTR_RB_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RB_4_LO,
+		A6XX_RBBM_PERFCTR_RB_4_HI, 111, A6XX_RB_PERFCTR_RB_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RB_5_LO,
+		A6XX_RBBM_PERFCTR_RB_5_HI, 112, A6XX_RB_PERFCTR_RB_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RB_6_LO,
+		A6XX_RBBM_PERFCTR_RB_6_HI, 113, A6XX_RB_PERFCTR_RB_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_RB_7_LO,
+		A6XX_RBBM_PERFCTR_RB_7_HI, 114, A6XX_RB_PERFCTR_RB_SEL_7 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_vsc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VSC_0_LO,
+		A6XX_RBBM_PERFCTR_VSC_0_HI, 115, A6XX_VSC_PERFCTR_VSC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_VSC_1_LO,
+		A6XX_RBBM_PERFCTR_VSC_1_HI, 116, A6XX_VSC_PERFCTR_VSC_SEL_1 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_lrz[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_LRZ_0_LO,
+		A6XX_RBBM_PERFCTR_LRZ_0_HI, 117, A6XX_GRAS_PERFCTR_LRZ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_LRZ_1_LO,
+		A6XX_RBBM_PERFCTR_LRZ_1_HI, 118, A6XX_GRAS_PERFCTR_LRZ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_LRZ_2_LO,
+		A6XX_RBBM_PERFCTR_LRZ_2_HI, 119, A6XX_GRAS_PERFCTR_LRZ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_LRZ_3_LO,
+		A6XX_RBBM_PERFCTR_LRZ_3_HI, 120, A6XX_GRAS_PERFCTR_LRZ_SEL_3 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_cmp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CMP_0_LO,
+		A6XX_RBBM_PERFCTR_CMP_0_HI, 121, A6XX_RB_PERFCTR_CMP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CMP_1_LO,
+		A6XX_RBBM_PERFCTR_CMP_1_HI, 122, A6XX_RB_PERFCTR_CMP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CMP_2_LO,
+		A6XX_RBBM_PERFCTR_CMP_2_HI, 123, A6XX_RB_PERFCTR_CMP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_RBBM_PERFCTR_CMP_3_LO,
+		A6XX_RBBM_PERFCTR_CMP_3_HI, 124, A6XX_RB_PERFCTR_CMP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_vbif[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_VBIF_PERF_CNT_LOW0,
+		A6XX_VBIF_PERF_CNT_HIGH0, -1, A6XX_VBIF_PERF_CNT_SEL0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_VBIF_PERF_CNT_LOW1,
+		A6XX_VBIF_PERF_CNT_HIGH1, -1, A6XX_VBIF_PERF_CNT_SEL1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_VBIF_PERF_CNT_LOW2,
+		A6XX_VBIF_PERF_CNT_HIGH2, -1, A6XX_VBIF_PERF_CNT_SEL2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_VBIF_PERF_CNT_LOW3,
+		A6XX_VBIF_PERF_CNT_HIGH3, -1, A6XX_VBIF_PERF_CNT_SEL3 },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_vbif_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_VBIF_PERF_PWR_CNT_LOW0,
+		A6XX_VBIF_PERF_PWR_CNT_HIGH0, -1, A6XX_VBIF_PERF_PWR_CNT_EN0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_VBIF_PERF_PWR_CNT_LOW1,
+		A6XX_VBIF_PERF_PWR_CNT_HIGH1, -1, A6XX_VBIF_PERF_PWR_CNT_EN1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_VBIF_PERF_PWR_CNT_LOW2,
+		A6XX_VBIF_PERF_PWR_CNT_HIGH2, -1, A6XX_VBIF_PERF_PWR_CNT_EN2 },
+};
+
+
+static struct adreno_perfcount_register a6xx_perfcounters_gbif[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_GBIF_PERF_CNT_LOW0,
+		A6XX_GBIF_PERF_CNT_HIGH0, -1, A6XX_GBIF_PERF_CNT_SEL },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_GBIF_PERF_CNT_LOW1,
+		A6XX_GBIF_PERF_CNT_HIGH1, -1, A6XX_GBIF_PERF_CNT_SEL },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_GBIF_PERF_CNT_LOW2,
+		A6XX_GBIF_PERF_CNT_HIGH2, -1, A6XX_GBIF_PERF_CNT_SEL },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_GBIF_PERF_CNT_LOW3,
+		A6XX_GBIF_PERF_CNT_HIGH3, -1, A6XX_GBIF_PERF_CNT_SEL },
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_gbif_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_GBIF_PWR_CNT_LOW0,
+		A6XX_GBIF_PWR_CNT_HIGH0, -1, A6XX_GBIF_PERF_PWR_CNT_EN },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_GBIF_PWR_CNT_LOW1,
+		A6XX_GBIF_PWR_CNT_HIGH1, -1, A6XX_GBIF_PERF_PWR_CNT_EN },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_GBIF_PWR_CNT_LOW2,
+		A6XX_GBIF_PWR_CNT_HIGH2, -1, A6XX_GBIF_PERF_PWR_CNT_EN },
+};
+
+#define GMU_COUNTER(lo, hi, sel) \
+	{ .countable = KGSL_PERFCOUNTER_NOT_USED, \
+	  .offset = lo, .offset_hi = hi, .select = sel }
+
+#define GMU_COUNTER_RESERVED(lo, hi, sel) \
+	{ .countable = KGSL_PERFCOUNTER_BROKEN, \
+	  .offset = lo, .offset_hi = hi, .select = sel }
+
+static struct adreno_perfcount_register a6xx_perfcounters_gmu_xoclk[] = {
+	/*
+	 * COUNTER_XOCLK_0 and COUNTER_XOCLK_4 are used for the GPU
+	 * busy and ifpc count. Mark them as reserved to ensure they
+	 * are not re-used.
+	 */
+	GMU_COUNTER_RESERVED(A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_H,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_H,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_H,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0),
+	GMU_COUNTER_RESERVED(A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_H,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_L,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_H,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_L,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_H,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_2),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_L,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_H,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_2),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_L,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_H,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_2),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_L,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_H,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_2),
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_gmu_gmuclk[] = {
+	GMU_COUNTER(A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_L,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_H,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_L,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_H,
+		A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1),
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_gmu_perf[] = {
+	GMU_COUNTER(A6XX_GMU_CX_GMU_PERF_COUNTER_0_L,
+		A6XX_GMU_CX_GMU_PERF_COUNTER_0_H,
+		A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_0),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_PERF_COUNTER_1_L,
+		A6XX_GMU_CX_GMU_PERF_COUNTER_1_H,
+		A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_0),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_PERF_COUNTER_2_L,
+		A6XX_GMU_CX_GMU_PERF_COUNTER_2_H,
+		A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_0),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_PERF_COUNTER_3_L,
+		A6XX_GMU_CX_GMU_PERF_COUNTER_3_H,
+		A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_0),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_PERF_COUNTER_4_L,
+		A6XX_GMU_CX_GMU_PERF_COUNTER_4_H,
+		A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_1),
+	GMU_COUNTER(A6XX_GMU_CX_GMU_PERF_COUNTER_5_L,
+		A6XX_GMU_CX_GMU_PERF_COUNTER_5_H,
+		A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_1),
+};
+
+static struct adreno_perfcount_register a6xx_perfcounters_alwayson[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_CP_ALWAYS_ON_COUNTER_LO,
+		A6XX_CP_ALWAYS_ON_COUNTER_HI, -1 },
+};
+
+/*
+ * ADRENO_PERFCOUNTER_GROUP_RESTORE flag is enabled by default
+ * because most of the perfcounter groups need to be restored
+ * as part of preemption and IFPC. Perfcounter groups that are
+ * not restored as part of preemption and IFPC should be defined
+ * using A6XX_PERFCOUNTER_GROUP_FLAGS macro
+ */
+#define A6XX_PERFCOUNTER_GROUP(offset, name, enable, read, load) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(a6xx, offset, name, \
+	ADRENO_PERFCOUNTER_GROUP_RESTORE, enable, read, load)
+
+#define A6XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags, enable, read, load) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(a6xx, offset, name, flags, enable, \
+			read, load)
+
+#define A6XX_REGULAR_PERFCOUNTER_GROUP(offset, name) \
+	A6XX_PERFCOUNTER_GROUP(offset, name, \
+		a6xx_counter_enable, a6xx_counter_read, a6xx_counter_load)
+
+static const struct adreno_perfcount_group a630_perfcounter_groups
+				[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	A6XX_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(RBBM, rbbm, 0,
+		a6xx_counter_enable, a6xx_counter_read, a6xx_counter_load),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(PC, pc),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(VFD, vfd),
+	A6XX_PERFCOUNTER_GROUP(HLSQ, hlsq, a6xx_counter_inline_enable,
+			a6xx_counter_read, a6xx_counter_load),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(VPC, vpc),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(CCU, ccu),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(CMP, cmp),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(TSE, tse),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(RAS, ras),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(LRZ, lrz),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(UCHE, uche),
+	A6XX_PERFCOUNTER_GROUP(TP, tp, a6xx_counter_inline_enable,
+			a6xx_counter_read, a6xx_counter_load),
+	A6XX_PERFCOUNTER_GROUP(SP, sp, a6xx_counter_inline_enable,
+			a6xx_counter_read, a6xx_counter_load),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(RB, rb),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(VSC, vsc),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(VBIF, vbif, 0,
+		a630_counter_vbif_enable, a6xx_counter_read_norestore, NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED, a630_counter_vbif_pwr_enable,
+		a6xx_counter_read_norestore, NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		a6xx_counter_alwayson_enable, a6xx_counter_alwayson_read, NULL),
+};
+
+static const struct adreno_perfcount_group
+a6xx_legacy_perfcounter_groups [KGSL_PERFCOUNTER_GROUP_MAX] = {
+	A6XX_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(RBBM, rbbm, 0,
+		a6xx_counter_enable, a6xx_counter_read, a6xx_counter_load),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(PC, pc),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(VFD, vfd),
+	A6XX_PERFCOUNTER_GROUP(HLSQ, hlsq, a6xx_counter_inline_enable,
+			a6xx_counter_read, a6xx_counter_load),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(VPC, vpc),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(CCU, ccu),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(CMP, cmp),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(TSE, tse),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(RAS, ras),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(LRZ, lrz),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(UCHE, uche),
+	A6XX_PERFCOUNTER_GROUP(TP, tp, a6xx_counter_inline_enable,
+			a6xx_counter_read, a6xx_counter_load),
+	A6XX_PERFCOUNTER_GROUP(SP, sp, a6xx_counter_inline_enable,
+			a6xx_counter_read, a6xx_counter_load),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(RB, rb),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(VSC, vsc),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(VBIF, gbif, 0,
+		a6xx_counter_gbif_enable, a6xx_counter_read_norestore, NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, gbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED, a6xx_counter_gbif_pwr_enable,
+		a6xx_counter_read_norestore, NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		a6xx_counter_alwayson_enable, a6xx_counter_alwayson_read, NULL),
+};
+
+static const struct adreno_perfcount_group a6xx_perfcounter_groups
+				[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	A6XX_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(RBBM, rbbm, 0,
+		a6xx_counter_enable, a6xx_counter_read, a6xx_counter_load),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(PC, pc),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(VFD, vfd),
+	A6XX_PERFCOUNTER_GROUP(HLSQ, hlsq, a6xx_counter_inline_enable,
+			a6xx_counter_read, a6xx_counter_load),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(VPC, vpc),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(CCU, ccu),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(CMP, cmp),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(TSE, tse),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(RAS, ras),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(LRZ, lrz),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(UCHE, uche),
+	A6XX_PERFCOUNTER_GROUP(TP, tp, a6xx_counter_inline_enable,
+			a6xx_counter_read, a6xx_counter_load),
+	A6XX_PERFCOUNTER_GROUP(SP, sp, a6xx_counter_inline_enable,
+			a6xx_counter_read, a6xx_counter_load),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(RB, rb),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(VSC, vsc),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(VBIF, gbif, 0,
+		a6xx_counter_gbif_enable, a6xx_counter_read_norestore, NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, gbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED, a6xx_counter_gbif_pwr_enable,
+		a6xx_counter_read_norestore, NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		a6xx_counter_alwayson_enable, a6xx_counter_alwayson_read, NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(GMU_XOCLK, gmu_xoclk, 0,
+		a6xx_counter_gmu_xoclk_enable, a6xx_counter_read_norestore,
+		NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(GMU_GMUCLK, gmu_gmuclk, 0,
+		a6xx_counter_gmu_gmuclk_enable, a6xx_counter_read_norestore,
+		NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(GMU_PERF, gmu_perf, 0,
+		a6xx_counter_gmu_perf_enable, a6xx_counter_read_norestore,
+		NULL),
+};
+
+static const struct adreno_perfcount_group a6xx_hwsched_perfcounter_groups
+				[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	A6XX_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(RBBM, rbbm, 0,
+		a6xx_counter_enable, a6xx_counter_read, a6xx_counter_load),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(PC, pc),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(VFD, vfd),
+	A6XX_PERFCOUNTER_GROUP(HLSQ, hlsq, a6xx_hwsched_counter_enable,
+			a6xx_counter_read, a6xx_counter_load),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(VPC, vpc),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(CCU, ccu),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(CMP, cmp),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(TSE, tse),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(RAS, ras),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(LRZ, lrz),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(UCHE, uche),
+	A6XX_PERFCOUNTER_GROUP(TP, tp, a6xx_hwsched_counter_enable,
+			a6xx_counter_read, a6xx_counter_load),
+	A6XX_PERFCOUNTER_GROUP(SP, sp, a6xx_hwsched_counter_enable,
+			a6xx_counter_read, a6xx_counter_load),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(RB, rb),
+	A6XX_REGULAR_PERFCOUNTER_GROUP(VSC, vsc),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(VBIF, gbif, 0,
+		a6xx_counter_gbif_enable, a6xx_counter_read_norestore, NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, gbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED, a6xx_counter_gbif_pwr_enable,
+		a6xx_counter_read_norestore, NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		a6xx_counter_alwayson_enable, a6xx_counter_alwayson_read, NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(GMU_XOCLK, gmu_xoclk, 0,
+		a6xx_counter_gmu_xoclk_enable, a6xx_counter_read_norestore,
+		NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(GMU_GMUCLK, gmu_gmuclk, 0,
+		a6xx_counter_gmu_gmuclk_enable, a6xx_counter_read_norestore,
+		NULL),
+	A6XX_PERFCOUNTER_GROUP_FLAGS(GMU_PERF, gmu_perf, 0,
+		a6xx_counter_gmu_perf_enable, a6xx_counter_read_norestore,
+		NULL),
+};
+
+/* a610, a612, a616, a618 and a619 do not have the GMU registers.
+ * a605, a608, a615, a630, a640 and a680 don't have enough room in the
+ * CP_PROTECT registers so the GMU counters are not accessible
+ */
+const struct adreno_perfcounters adreno_a6xx_legacy_perfcounters = {
+	a6xx_legacy_perfcounter_groups,
+	ARRAY_SIZE(a6xx_legacy_perfcounter_groups),
+};
+
+const struct adreno_perfcounters adreno_a630_perfcounters = {
+	a630_perfcounter_groups,
+	ARRAY_SIZE(a630_perfcounter_groups),
+};
+
+const struct adreno_perfcounters adreno_a6xx_perfcounters = {
+	a6xx_perfcounter_groups,
+	ARRAY_SIZE(a6xx_perfcounter_groups),
+};
+
+const struct adreno_perfcounters adreno_a6xx_hwsched_perfcounters = {
+	a6xx_hwsched_perfcounter_groups,
+	ARRAY_SIZE(a6xx_hwsched_perfcounter_groups),
+};

+ 793 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_preempt.c

@@ -0,0 +1,793 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+
+#define PREEMPT_RECORD(_field) \
+		offsetof(struct a6xx_cp_preemption_record, _field)
+
+#define PREEMPT_SMMU_RECORD(_field) \
+		offsetof(struct a6xx_cp_smmu_info, _field)
+
+static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer,
+	bool atomic)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+
+	if (!atomic) {
+		/*
+		 * We might have skipped updating the wptr in case we are in
+		 * dispatcher context. Do it now.
+		 */
+		if (rb->skip_inline_wptr) {
+
+			ret = a6xx_fenced_write(adreno_dev,
+				A6XX_CP_RB_WPTR, rb->wptr,
+				FENCE_STATUS_WRITEDROPPED0_MASK);
+
+			reset_timer = true;
+			rb->skip_inline_wptr = false;
+		}
+	} else {
+		unsigned int wptr;
+
+		kgsl_regread(device, A6XX_CP_RB_WPTR, &wptr);
+		if (wptr != rb->wptr) {
+			kgsl_regwrite(device, A6XX_CP_RB_WPTR, rb->wptr);
+			reset_timer = true;
+		}
+	}
+
+	if (reset_timer)
+		rb->dispatch_q.expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	if (!atomic) {
+		/* If WPTR update fails, set the fault and trigger recovery */
+		if (ret) {
+			gmu_core_fault_snapshot(device);
+			adreno_dispatcher_fault(adreno_dev,
+				ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+		}
+	}
+}
+
+static void _power_collapse_set(struct adreno_device *adreno_dev, bool val)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!gmu_core_isenabled(device))
+		return;
+
+	if (val) {
+		if (adreno_is_a660(adreno_dev) ||
+				adreno_is_a663(adreno_dev))
+			gmu_core_regwrite(device,
+				 A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE, 0x1);
+		else
+			gmu_core_regrmw(device,
+				 A6XX_GMU_AO_SPARE_CNTL, 0x0, 0x2);
+	} else {
+		if (adreno_is_a660(adreno_dev) ||
+				adreno_is_a663(adreno_dev))
+			gmu_core_regwrite(device,
+				 A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE, 0x0);
+		else
+			gmu_core_regrmw(device,
+				 A6XX_GMU_AO_SPARE_CNTL, 0x2, 0x0);
+	}
+}
+
+static void _a6xx_preemption_done(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * In the very unlikely case that the power is off, do nothing - the
+	 * state will be reset on power up and everybody will be happy
+	 */
+
+	if (!kgsl_state_is_awake(device))
+		return;
+
+	kgsl_regread(device, A6XX_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status & 0x1) {
+		dev_err(device->dev,
+			     "Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n",
+			     status, adreno_dev->cur_rb->id,
+			     adreno_get_rptr(adreno_dev->cur_rb),
+			     adreno_dev->cur_rb->wptr,
+			     adreno_dev->next_rb->id,
+			     adreno_get_rptr(adreno_dev->next_rb),
+			     adreno_dev->next_rb->wptr);
+
+		/* Set a fault and restart */
+		adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+
+		return;
+	}
+
+	adreno_dev->preempt.count++;
+
+	/*
+	 * In normal scenarios, preemption keep alive bit is cleared during
+	 * CP interrupt callback. However, if preemption is successful
+	 * immediately after preemption timer expires or there is a preemption
+	 * interrupt with non-zero status, the state is transitioned to complete
+	 * state. Once dispatcher is scheduled, it calls this function.
+	 * We can now safely clear the preemption keepalive bit, allowing
+	 * power collapse to resume its regular activity.
+	 */
+	_power_collapse_set(adreno_dev, false);
+
+	del_timer_sync(&adreno_dev->preempt.timer);
+
+	kgsl_regread(device,  A6XX_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		status, 0);
+
+	/* Clean up all the bits */
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr for the new command queue */
+	_update_wptr(adreno_dev, true, false);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	/* Clear the preempt state */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+}
+
+static void _a6xx_preemption_fault(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * If the power is on check the preemption status one more time - if it
+	 * was successful then just transition to the complete state
+	 */
+	if (kgsl_state_is_awake(device)) {
+		kgsl_regread(device, A6XX_CP_CONTEXT_SWITCH_CNTL, &status);
+
+		if (!(status & 0x1)) {
+			adreno_set_preempt_state(adreno_dev,
+				ADRENO_PREEMPT_COMPLETE);
+
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+	}
+
+	dev_err(device->dev,
+		     "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n",
+		     adreno_dev->cur_rb->id,
+		     adreno_get_rptr(adreno_dev->cur_rb),
+		     adreno_dev->cur_rb->wptr,
+		     adreno_dev->next_rb->id,
+		     adreno_get_rptr(adreno_dev->next_rb),
+		     adreno_dev->next_rb->wptr);
+
+	adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+}
+
+static void _a6xx_preemption_worker(struct work_struct *work)
+{
+	struct adreno_preemption *preempt = container_of(work,
+		struct adreno_preemption, work);
+	struct adreno_device *adreno_dev = container_of(preempt,
+		struct adreno_device, preempt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Need to take the mutex to make sure that the power stays on */
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED))
+		_a6xx_preemption_fault(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+/* Find the highest priority active ringbuffer */
+static struct adreno_ringbuffer *a6xx_next_ringbuffer(
+		struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb;
+	unsigned long flags;
+	unsigned int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		bool empty;
+
+		spin_lock_irqsave(&rb->preempt_lock, flags);
+		empty = adreno_rb_empty(rb);
+		spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+		if (!empty)
+			return rb;
+	}
+
+	return NULL;
+}
+
+void a6xx_preemption_trigger(struct adreno_device *adreno_dev, bool atomic)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *next;
+	uint64_t ttbr0, gpuaddr;
+	unsigned int contextidr, cntl;
+	unsigned long flags;
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+
+	/* Put ourselves into a possible trigger state */
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START))
+		return;
+
+	/* Get the next ringbuffer to preempt in */
+	next = a6xx_next_ringbuffer(adreno_dev);
+
+	/*
+	 * Nothing to do if every ringbuffer is empty or if the current
+	 * ringbuffer is the only active one
+	 */
+	if (next == NULL || next == adreno_dev->cur_rb) {
+		/*
+		 * Update any critical things that might have been skipped while
+		 * we were looking for a new ringbuffer
+		 */
+
+		if (next != NULL) {
+			_update_wptr(adreno_dev, false, atomic);
+
+			mod_timer(&adreno_dev->dispatcher.timer,
+				adreno_dev->cur_rb->dispatch_q.expires);
+		}
+
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+		return;
+	}
+
+	/* Turn off the dispatcher timer */
+	del_timer(&adreno_dev->dispatcher.timer);
+
+	/*
+	 * This is the most critical section - we need to take care not to race
+	 * until we have programmed the CP for the switch
+	 */
+
+	spin_lock_irqsave(&next->preempt_lock, flags);
+
+	/* Get the pagetable from the pagetable info. */
+	kgsl_sharedmem_readq(device->scratch, &ttbr0,
+		SCRATCH_RB_OFFSET(next->id, ttbr0));
+	kgsl_sharedmem_readl(device->scratch, &contextidr,
+		SCRATCH_RB_OFFSET(next->id, contextidr));
+
+	kgsl_sharedmem_writel(next->preemption_desc,
+		PREEMPT_RECORD(wptr), next->wptr);
+
+	spin_unlock_irqrestore(&next->preempt_lock, flags);
+
+	/* And write it to the smmu info */
+
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), ttbr0);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), contextidr);
+	}
+
+	kgsl_sharedmem_readq(preempt->scratch, &gpuaddr,
+		next->id * sizeof(u64));
+
+	/*
+	 * Set a keepalive bit before the first preemption register write.
+	 * This is required since while each individual write to the context
+	 * switch registers will wake the GPU from collapse, it will not in
+	 * itself cause GPU activity. Thus, the GPU could technically be
+	 * re-collapsed between subsequent register writes leading to a
+	 * prolonged preemption sequence. The keepalive bit prevents any
+	 * further power collapse while it is set.
+	 * It is more efficient to use a keepalive+wake-on-fence approach here
+	 * rather than an OOB. Both keepalive and the fence are effectively
+	 * free when the GPU is already powered on, whereas an OOB requires an
+	 * unconditional handshake with the GMU.
+	 */
+	_power_collapse_set(adreno_dev, true);
+
+	/*
+	 * Fenced writes on this path will make sure the GPU is woken up
+	 * in case it was power collapsed by the GMU.
+	 */
+	if (a6xx_fenced_write(adreno_dev,
+		A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO,
+		lower_32_bits(next->preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	/*
+	 * Above fence writes will make sure GMU comes out of
+	 * IFPC state if its was in IFPC state but it doesn't
+	 * guarantee that GMU FW actually moved to ACTIVE state
+	 * i.e. wake-up from IFPC is complete.
+	 * Wait for GMU to move to ACTIVE state before triggering
+	 * preemption. This is require to make sure CP doesn't
+	 * interrupt GMU during wake-up from IFPC.
+	 */
+	if (!atomic && gmu_core_dev_wait_for_active_transition(device))
+		goto err;
+
+	if (a6xx_fenced_write(adreno_dev,
+		A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI,
+		upper_32_bits(next->preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (a6xx_fenced_write(adreno_dev,
+		A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO,
+		lower_32_bits(next->secure_preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (a6xx_fenced_write(adreno_dev,
+		A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI,
+		upper_32_bits(next->secure_preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (a6xx_fenced_write(adreno_dev,
+		A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO,
+		lower_32_bits(gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (a6xx_fenced_write(adreno_dev,
+		A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI,
+		upper_32_bits(gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	adreno_dev->next_rb = next;
+
+	/* Start the timer to detect a stuck preemption */
+	mod_timer(&adreno_dev->preempt.timer,
+		jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT));
+
+	cntl = (preempt->preempt_level << 6) | 0x01;
+
+	/* Skip save/restore during L1 preemption */
+	if (preempt->skipsaverestore)
+		cntl |= (1 << 9);
+
+	/* Enable GMEM save/restore across preemption */
+	if (preempt->usesgmem)
+		cntl |= (1 << 8);
+
+	trace_adreno_preempt_trigger(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		cntl, 0);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED);
+
+	/* Trigger the preemption */
+	if (a6xx_fenced_write(adreno_dev, A6XX_CP_CONTEXT_SWITCH_CNTL, cntl,
+					FENCE_STATUS_WRITEDROPPED1_MASK)) {
+		adreno_dev->next_rb = NULL;
+		del_timer(&adreno_dev->preempt.timer);
+		goto err;
+	}
+
+	return;
+err:
+	/* If fenced write fails, take inline snapshot and trigger recovery */
+	if (!atomic) {
+		gmu_core_fault_snapshot(device);
+		adreno_dispatcher_fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	} else {
+		adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT);
+	}
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+	/* Clear the keep alive */
+	_power_collapse_set(adreno_dev, false);
+
+}
+
+void a6xx_preemption_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING))
+		return;
+
+	kgsl_regread(device, A6XX_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status & 0x1) {
+		dev_err(KGSL_DEVICE(adreno_dev)->dev,
+			     "preempt interrupt with non-zero status: %X\n",
+			     status);
+
+		/*
+		 * Under the assumption that this is a race between the
+		 * interrupt and the register, schedule the worker to clean up.
+		 * If the status still hasn't resolved itself by the time we get
+		 * there then we have to assume something bad happened
+		 */
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE);
+		adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+		return;
+	}
+
+	adreno_dev->preempt.count++;
+
+	/*
+	 * We can now safely clear the preemption keepalive bit, allowing
+	 * power collapse to resume its regular activity.
+	 */
+	_power_collapse_set(adreno_dev, false);
+
+	del_timer(&adreno_dev->preempt.timer);
+
+	kgsl_regread(device, A6XX_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		status, 0);
+
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr if it changed while preemption was ongoing */
+	_update_wptr(adreno_dev, true, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	a6xx_preemption_trigger(adreno_dev, true);
+}
+
+void a6xx_preemption_schedule(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE))
+		_a6xx_preemption_done(adreno_dev);
+
+	a6xx_preemption_trigger(adreno_dev, false);
+
+	mutex_unlock(&device->mutex);
+}
+
+u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds)
+{
+	unsigned int *cmds_orig = cmds;
+	uint64_t gpuaddr = 0;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (drawctxt) {
+		gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr;
+		*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 15);
+	} else {
+		*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12);
+	}
+
+	/* NULL SMMU_INFO buffer - we track in KMD */
+	*cmds++ = SET_PSEUDO_SMMU_INFO;
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0x0);
+
+	*cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr);
+
+	*cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->secure_preemption_desc->gpuaddr);
+
+	if (drawctxt) {
+		*cmds++ = SET_PSEUDO_NON_PRIV_SAVE_ADDR;
+		cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr);
+	}
+
+	/*
+	 * There is no need to specify this address when we are about to
+	 * trigger preemption. This is because CP internally stores this
+	 * address specified here in the CP_SET_PSEUDO_REGISTER payload to
+	 * the context record and thus knows from where to restore
+	 * the saved perfcounters for the new ringbuffer.
+	 */
+	*cmds++ = SET_PSEUDO_COUNTER;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->perfcounter_save_restore_desc->gpuaddr);
+
+	if (drawctxt) {
+		struct adreno_ringbuffer *rb = drawctxt->rb;
+		uint64_t dest = PREEMPT_SCRATCH_ADDR(adreno_dev, rb->id);
+
+		*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2);
+		cmds += cp_gpuaddr(adreno_dev, cmds, dest);
+		*cmds++ = lower_32_bits(gpuaddr);
+		*cmds++ = upper_32_bits(gpuaddr);
+
+		/* Add a KMD post amble to clear the perf counters during preemption */
+		if (!adreno_dev->perfcounter) {
+			u64 kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev));
+			*cmds++ = cp_type7_packet(CP_SET_AMBLE, 3);
+			*cmds++ = lower_32_bits(kmd_postamble_addr);
+			*cmds++ = upper_32_bits(kmd_postamble_addr);
+			*cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE)
+				| (FIELD_PREP(GENMASK(19, 0), adreno_dev->preempt.postamble_len));
+		}
+	}
+
+	return (unsigned int) (cmds - cmds_orig);
+}
+
+u32 a6xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+		u32 *cmds)
+{
+	u32 index = 0;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (adreno_dev->cur_rb) {
+		u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, adreno_dev->cur_rb->id);
+
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 4);
+		cmds[index++] = lower_32_bits(dest);
+		cmds[index++] = upper_32_bits(dest);
+		cmds[index++] = 0;
+		cmds[index++] = 0;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+	cmds[index++] = 1;
+	cmds[index++] = 0;
+
+	return index;
+}
+
+void a6xx_preemption_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *rb;
+	unsigned int i;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	/* Force the state to be clear */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		/* smmu_info is allocated and mapped in a6xx_preemption_iommu_init */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(magic), A6XX_CP_SMMU_INFO_MAGIC_REF);
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device));
+
+		/* The CP doesn't use the asid record, so poison it */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(asid), 0xDECAFBAD);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), 0);
+
+		kgsl_regwrite(device, A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+			lower_32_bits(iommu->smmu_info->gpuaddr));
+
+		kgsl_regwrite(device, A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+			upper_32_bits(iommu->smmu_info->gpuaddr));
+	}
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(rptr), 0);
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(wptr), 0);
+
+		adreno_ringbuffer_set_pagetable(device, rb,
+			device->mmu.defaultpagetable);
+	}
+}
+
+static void reset_rb_preempt_record(struct adreno_device *adreno_dev,
+	struct adreno_ringbuffer *rb)
+{
+	u32 cp_rb_cntl = A6XX_CP_RB_CNTL_DEFAULT |
+		(ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ? 0 : (1 << 27));
+
+	memset(rb->preemption_desc->hostptr, 0x0, rb->preemption_desc->size);
+
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(magic), A6XX_CP_CTXRECORD_MAGIC_REF);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(cntl), cp_rb_cntl);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR(
+		KGSL_DEVICE(adreno_dev), rb->id, rptr));
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr);
+}
+
+void a6xx_reset_preempt_records(struct adreno_device *adreno_dev)
+{
+	int i;
+	struct adreno_ringbuffer *rb;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		reset_rb_preempt_record(adreno_dev, rb);
+	}
+}
+
+static int a6xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev,
+	struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	u64 ctxt_record_size = A6XX_CP_CTXRECORD_SIZE_IN_BYTES;
+	int ret;
+
+	if (a6xx_core->ctxt_record_size)
+		ctxt_record_size = a6xx_core->ctxt_record_size;
+
+	ret = adreno_allocate_global(device, &rb->preemption_desc,
+		ctxt_record_size, SZ_16K, 0, KGSL_MEMDESC_PRIVILEGED,
+		"preemption_desc");
+	if (ret)
+		return ret;
+
+	ret = adreno_allocate_global(device, &rb->secure_preemption_desc,
+		ctxt_record_size, 0, KGSL_MEMFLAGS_SECURE,
+		KGSL_MEMDESC_PRIVILEGED, "preemption_desc");
+	if (ret)
+		return ret;
+
+	ret = adreno_allocate_global(device, &rb->perfcounter_save_restore_desc,
+			A6XX_CP_PERFCOUNTER_SAVE_RESTORE_SIZE, 0, 0,
+			KGSL_MEMDESC_PRIVILEGED,
+			"perfcounter_save_restore_desc");
+	if (ret)
+		return ret;
+
+	reset_rb_preempt_record(adreno_dev, rb);
+
+	return 0;
+}
+
+int a6xx_preemption_init(struct adreno_device *adreno_dev)
+{
+	u32 flags = ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ? KGSL_MEMDESC_PRIVILEGED : 0;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	struct adreno_ringbuffer *rb;
+	int ret;
+	unsigned int i;
+
+	/* We are dependent on IOMMU to make preemption go on the CP side */
+	if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU)
+		return -ENODEV;
+
+	INIT_WORK(&preempt->work, _a6xx_preemption_worker);
+
+	/* Allocate mem for storing preemption switch record */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		ret = a6xx_preemption_ringbuffer_init(adreno_dev, rb);
+		if (ret)
+			return ret;
+	}
+
+	ret = adreno_allocate_global(device, &preempt->scratch,
+		PAGE_SIZE, 0, 0, flags, "preempt_scratch");
+	if (ret)
+		return ret;
+
+	/* Allocate mem for storing preemption smmu record */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		ret = adreno_allocate_global(device, &iommu->smmu_info, PAGE_SIZE, 0,
+			KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED,
+			"smmu_info");
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * First 28 dwords of the device scratch buffer are used to store shadow rb data.
+	 * Reserve 11 dwords in the device scratch buffer from SCRATCH_POSTAMBLE_OFFSET for
+	 * KMD postamble pm4 packets. This should be in *device->scratch* so that userspace
+	 * cannot access it.
+	 */
+	if (!adreno_dev->perfcounter) {
+		u32 *postamble = device->scratch->hostptr + SCRATCH_POSTAMBLE_OFFSET;
+		u32 count = 0;
+
+		postamble[count++] = cp_type7_packet(CP_REG_RMW, 3);
+		postamble[count++] = A6XX_RBBM_PERFCTR_SRAM_INIT_CMD;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+
+		postamble[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
+		postamble[count++] = 0x3;
+		postamble[count++] = A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+		postamble[count++] = 0x1;
+		postamble[count++] = 0x0;
+
+		preempt->postamble_len = count;
+	}
+
+	set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+	return 0;
+}
+
+int a6xx_preemption_context_init(struct kgsl_context *context)
+{
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	uint64_t flags = 0;
+
+	if (!adreno_preemption_feature_set(adreno_dev))
+		return 0;
+
+	if (context->flags & KGSL_CONTEXT_SECURE)
+		flags |= KGSL_MEMFLAGS_SECURE;
+
+	if (is_compat_task())
+		flags |= KGSL_MEMFLAGS_FORCE_32BIT;
+
+	/*
+	 * gpumem_alloc_entry takes an extra refcount. Put it only when
+	 * destroying the context to keep the context record valid
+	 */
+	context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv,
+			A6XX_CP_CTXRECORD_USER_RESTORE_SIZE, flags);
+	if (IS_ERR(context->user_ctxt_record)) {
+		int ret = PTR_ERR(context->user_ctxt_record);
+
+		context->user_ctxt_record = NULL;
+		return ret;
+	}
+
+	return 0;
+}

+ 1425 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_rgmu.c

@@ -0,0 +1,1425 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/component.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/regulator/consumer.h>
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_a6xx_rgmu.h"
+#include "adreno_snapshot.h"
+#include "kgsl_bus.h"
+#include "kgsl_trace.h"
+#include "kgsl_util.h"
+
+#define RGMU_CLK_FREQ 200000000
+
+/* RGMU timeouts */
+#define RGMU_IDLE_TIMEOUT		100	/* ms */
+#define RGMU_START_TIMEOUT		100	/* ms */
+#define GPU_START_TIMEOUT		100	/* ms */
+#define GLM_SLEEP_TIMEOUT		10	/* ms */
+
+static const unsigned int a6xx_rgmu_registers[] = {
+	/* GMU CX */
+	0x1F80F, 0x1F83D, 0x1F840, 0x1F8D8, 0x1F990, 0x1F99E, 0x1F9C0, 0x1F9CC,
+	/* GMU AO */
+	0x23B03, 0x23B16, 0x23B80, 0x23B82,
+	/* GPU CC */
+	0x24000, 0x24012, 0x24040, 0x24052, 0x24400, 0x24404, 0x24407, 0x2440B,
+	0x24415, 0x2441C, 0x2441E, 0x2442D, 0x2443C, 0x2443D, 0x2443F, 0x24440,
+	0x24442, 0x24449, 0x24458, 0x2445A, 0x24540, 0x2455E, 0x24800, 0x24802,
+	0x24C00, 0x24C02, 0x25400, 0x25402, 0x25800, 0x25802, 0x25C00, 0x25C02,
+	0x26000, 0x26002,
+};
+
+static struct a6xx_rgmu_device *to_a6xx_rgmu(struct adreno_device *adreno_dev)
+{
+	struct a6xx_device *a6xx_dev = container_of(adreno_dev,
+					struct a6xx_device, adreno_dev);
+
+	return &a6xx_dev->rgmu;
+}
+
+static void a6xx_rgmu_active_count_put(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return;
+
+	if (WARN(atomic_read(&device->active_cnt) == 0,
+		"Unbalanced get/put calls to KGSL active count\n"))
+		return;
+
+	if (atomic_dec_and_test(&device->active_cnt)) {
+		kgsl_pwrscale_update_stats(device);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+	}
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	wake_up(&device->active_cnt_wq);
+}
+
+static irqreturn_t a6xx_rgmu_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(ADRENO_DEVICE(device));
+	unsigned int status = 0;
+
+	gmu_core_regread(device, A6XX_GMU_AO_HOST_INTERRUPT_STATUS, &status);
+
+	if (status & RGMU_AO_IRQ_FENCE_ERR) {
+		unsigned int fence_status;
+
+		gmu_core_regread(device, A6XX_GMU_AHB_FENCE_STATUS,
+			&fence_status);
+		gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_CLR,
+			status);
+
+		dev_err_ratelimited(&rgmu->pdev->dev,
+			"FENCE error interrupt received %x\n", fence_status);
+	}
+
+	if (status & ~RGMU_AO_IRQ_MASK)
+		dev_err_ratelimited(&rgmu->pdev->dev,
+				"Unhandled RGMU interrupts 0x%lx\n",
+				status & ~RGMU_AO_IRQ_MASK);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t a6xx_oob_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	unsigned int status = 0;
+
+	gmu_core_regread(device, A6XX_GMU_GMU2HOST_INTR_INFO, &status);
+
+	if (status & RGMU_OOB_IRQ_ERR_MSG) {
+		gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, status);
+
+		dev_err_ratelimited(&rgmu->pdev->dev,
+				"RGMU oob irq error\n");
+		adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT);
+	}
+	if (status & ~RGMU_OOB_IRQ_MASK)
+		dev_err_ratelimited(&rgmu->pdev->dev,
+				"Unhandled OOB interrupts 0x%lx\n",
+				status & ~RGMU_OOB_IRQ_MASK);
+
+	return IRQ_HANDLED;
+}
+
+static const char *oob_to_str(enum oob_request req)
+{
+	if (req == oob_gpu)
+		return "oob_gpu";
+	else if (req == oob_perfcntr)
+		return "oob_perfcntr";
+	return "unknown";
+}
+
+/*
+ * a6xx_rgmu_oob_set() - Set OOB interrupt to RGMU
+ * @adreno_dev: Pointer to adreno device
+ * @req: Which of the OOB bits to request
+ */
+static int a6xx_rgmu_oob_set(struct kgsl_device *device,
+		enum oob_request req)
+{
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(ADRENO_DEVICE(device));
+	int ret, set, check;
+
+	if (req == oob_perfcntr && rgmu->num_oob_perfcntr++)
+		return 0;
+
+	set = BIT(req + 16);
+	check = BIT(req + 16);
+
+	gmu_core_regwrite(device, A6XX_GMU_HOST2GMU_INTR_SET, set);
+
+	ret = gmu_core_timed_poll_check(device,
+			A6XX_GMU_GMU2HOST_INTR_INFO,
+			check,
+			GPU_START_TIMEOUT,
+			check);
+
+	if (ret) {
+		unsigned int status;
+
+		if (req == oob_perfcntr)
+			rgmu->num_oob_perfcntr--;
+		gmu_core_regread(device, A6XX_RGMU_CX_PCC_DEBUG, &status);
+		dev_err(&rgmu->pdev->dev,
+				"Timed out while setting OOB req:%s status:0x%x\n",
+				oob_to_str(req), status);
+		gmu_core_fault_snapshot(device);
+		return ret;
+	}
+
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, check);
+	trace_kgsl_gmu_oob_set(set);
+	return 0;
+}
+
+/*
+ * a6xx_rgmu_oob_clear() - Clear a previously set OOB request.
+ * @adreno_dev: Pointer to the adreno device that has the RGMU
+ * @req: Which of the OOB bits to clear
+ */
+static void a6xx_rgmu_oob_clear(struct kgsl_device *device,
+		enum oob_request req)
+{
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(ADRENO_DEVICE(device));
+
+	if (req == oob_perfcntr && --rgmu->num_oob_perfcntr)
+		return;
+
+	gmu_core_regwrite(device, A6XX_GMU_HOST2GMU_INTR_SET, BIT(req + 24));
+	trace_kgsl_gmu_oob_clear(BIT(req + 24));
+}
+
+static void a6xx_rgmu_bcl_config(struct kgsl_device *device, bool on)
+{
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(ADRENO_DEVICE(device));
+
+	if (on) {
+		/* Enable BCL CRC HW i/f */
+		gmu_core_regwrite(device,
+				A6XX_GMU_AO_RGMU_GLM_HW_CRC_DISABLE, 0);
+	} else {
+		/* Disable CRC HW i/f */
+		gmu_core_regwrite(device,
+				A6XX_GMU_AO_RGMU_GLM_HW_CRC_DISABLE, 1);
+
+		/* Wait for HW CRC disable ACK */
+		if (gmu_core_timed_poll_check(device,
+				A6XX_GMU_AO_RGMU_GLM_SLEEP_STATUS,
+				BIT(1), GLM_SLEEP_TIMEOUT, BIT(1)))
+			dev_err_ratelimited(&rgmu->pdev->dev,
+				"Timed out waiting for HW CRC disable acknowledgment\n");
+
+		/* Pull down the valid RGMU_GLM_SLEEP_CTRL[7] to 0 */
+		gmu_core_regrmw(device, A6XX_GMU_AO_RGMU_GLM_SLEEP_CTRL,
+				BIT(7), 0);
+
+	}
+}
+
+static void a6xx_rgmu_irq_enable(struct adreno_device *adreno_dev)
+{
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Clear pending IRQs and Unmask needed IRQs */
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, 0xffffffff);
+	gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_CLR, 0xffffffff);
+
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_MASK,
+		~((unsigned int)RGMU_OOB_IRQ_MASK));
+	gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_MASK,
+		(unsigned int)~RGMU_AO_IRQ_MASK);
+
+	/* Enable all IRQs on host */
+	enable_irq(rgmu->oob_interrupt_num);
+	enable_irq(rgmu->rgmu_interrupt_num);
+}
+
+static void a6xx_rgmu_irq_disable(struct adreno_device *adreno_dev)
+{
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Disable all IRQs on host */
+	disable_irq(rgmu->rgmu_interrupt_num);
+	disable_irq(rgmu->oob_interrupt_num);
+
+	/* Mask all IRQs and clear pending IRQs */
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_MASK, 0xffffffff);
+	gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_MASK, 0xffffffff);
+
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, 0xffffffff);
+	gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_CLR, 0xffffffff);
+}
+
+static int a6xx_rgmu_ifpc_store(struct kgsl_device *device,
+		unsigned int val)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	unsigned int requested_idle_level;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_IFPC))
+		return -EINVAL;
+
+	if (val)
+		requested_idle_level = GPU_HW_IFPC;
+	else
+		requested_idle_level = GPU_HW_ACTIVE;
+
+	if (requested_idle_level == rgmu->idle_level)
+		return 0;
+
+	/* Power cycle the GPU for changes to take effect */
+	return adreno_power_cycle_u32(adreno_dev, &rgmu->idle_level,
+		requested_idle_level);
+}
+
+static unsigned int a6xx_rgmu_ifpc_isenabled(struct kgsl_device *device)
+{
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(ADRENO_DEVICE(device));
+
+	return rgmu->idle_level == GPU_HW_IFPC;
+}
+
+
+static void a6xx_rgmu_prepare_stop(struct kgsl_device *device)
+{
+	/* Turn off GX_MEM retention */
+	kgsl_regwrite(device, A6XX_RBBM_BLOCK_GX_RETENTION_CNTL, 0);
+}
+
+#define GX_GDSC_POWER_OFF	BIT(6)
+bool a6xx_rgmu_gx_is_on(struct adreno_device *adreno_dev)
+{
+	unsigned int val;
+
+	gmu_core_regread(KGSL_DEVICE(adreno_dev),
+			A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, &val);
+	return !(val & GX_GDSC_POWER_OFF);
+}
+
+static int a6xx_rgmu_wait_for_lowest_idle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	unsigned int reg[10] = {0};
+	unsigned long t;
+	uint64_t ts1, ts2, ts3;
+
+	if (rgmu->idle_level != GPU_HW_IFPC)
+		return 0;
+
+	ts1 = a6xx_read_alwayson(adreno_dev);
+
+	/* FIXME: readl_poll_timeout? */
+	t = jiffies + msecs_to_jiffies(RGMU_IDLE_TIMEOUT);
+	do {
+		gmu_core_regread(device,
+			A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, &reg[0]);
+
+		if (reg[0] & GX_GDSC_POWER_OFF)
+			return 0;
+
+		/* Wait 10us to reduce unnecessary AHB bus traffic */
+		usleep_range(10, 100);
+	} while (!time_after(jiffies, t));
+
+	ts2 = a6xx_read_alwayson(adreno_dev);
+
+	/* Do one last read incase it succeeds */
+	gmu_core_regread(device,
+		A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, &reg[0]);
+
+	if (reg[0] & GX_GDSC_POWER_OFF)
+		return 0;
+
+	ts3 = a6xx_read_alwayson(adreno_dev);
+
+	/* Collect abort data to help with debugging */
+	gmu_core_regread(device, A6XX_RGMU_CX_PCC_DEBUG, &reg[1]);
+	gmu_core_regread(device, A6XX_RGMU_CX_PCC_STATUS, &reg[2]);
+	gmu_core_regread(device, A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, &reg[3]);
+	kgsl_regread(device, A6XX_CP_STATUS_1, &reg[4]);
+	gmu_core_regread(device, A6XX_GMU_RBBM_INT_UNMASKED_STATUS, &reg[5]);
+	gmu_core_regread(device, A6XX_GMU_GMU_PWR_COL_KEEPALIVE, &reg[6]);
+	kgsl_regread(device, A6XX_CP_CP2GMU_STATUS, &reg[7]);
+	kgsl_regread(device, A6XX_CP_CONTEXT_SWITCH_CNTL, &reg[8]);
+	gmu_core_regread(device, A6XX_GMU_AO_SPARE_CNTL, &reg[9]);
+
+	dev_err(&rgmu->pdev->dev,
+		"----------------------[ RGMU error ]----------------------\n");
+	dev_err(&rgmu->pdev->dev, "Timeout waiting for lowest idle level\n");
+	dev_err(&rgmu->pdev->dev,
+			"Timestamps: %llx %llx %llx\n", ts1, ts2, ts3);
+	dev_err(&rgmu->pdev->dev,
+			"SPTPRAC_PWR_CLK_STATUS=%x PCC_DEBUG=%x PCC_STATUS=%x\n",
+			reg[0], reg[1], reg[2]);
+	dev_err(&rgmu->pdev->dev,
+			"CX_BUSY_STATUS=%x CP_STATUS_1=%x\n", reg[3], reg[4]);
+	dev_err(&rgmu->pdev->dev,
+			"RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n",
+			reg[5], reg[6]);
+	dev_err(&rgmu->pdev->dev,
+			"CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x AO_SPARE_CNTL=%x\n",
+			reg[7], reg[8], reg[9]);
+
+	WARN_ON(1);
+	gmu_core_fault_snapshot(device);
+	return -ETIMEDOUT;
+}
+
+/*
+ * The lowest 16 bits of this value are the number of XO clock cycles
+ * for main hysteresis. This is the first hysteresis. Here we set it
+ * to 0x1680 cycles, or 300 us. The highest 16 bits of this value are
+ * the number of XO clock cycles for short hysteresis. This happens
+ * after main hysteresis. Here we set it to 0xA cycles, or 0.5 us.
+ */
+#define A6X_RGMU_LONG_IFPC_HYST	FIELD_PREP(GENMASK(15, 0), 0x1680)
+#define A6X_RGMU_SHORT_IFPC_HYST	FIELD_PREP(GENMASK(31, 16), 0xA)
+
+/* Minimum IFPC timer (200usec) allowed to override default value */
+#define A6X_RGMU_LONG_IFPC_HYST_FLOOR	FIELD_PREP(GENMASK(15, 0), 0x0F00)
+
+/* HOSTTOGMU and TIMER0/1 interrupt mask: 0x20060 */
+#define RGMU_INTR_EN_MASK  (BIT(5) | BIT(6) | BIT(17))
+
+/* RGMU FENCE RANGE MASK */
+#define RGMU_FENCE_RANGE_MASK	((0x1 << 31) | ((0xA << 2) << 18) | (0x8A0))
+
+static int a6xx_rgmu_fw_start(struct adreno_device *adreno_dev,
+		unsigned int boot_state)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	unsigned int status;
+	int i;
+
+	switch (boot_state) {
+	case GMU_COLD_BOOT:
+	case GMU_WARM_BOOT:
+		/* Turn on TCM retention */
+		gmu_core_regwrite(device, A6XX_GMU_GENERAL_7, 1);
+
+		/* Load RGMU FW image via AHB bus */
+		for (i = 0; i < rgmu->fw_size; i++)
+			gmu_core_regwrite(device, A6XX_GMU_CM3_ITCM_START + i,
+					rgmu->fw_hostptr[i]);
+		break;
+	}
+
+	/* IFPC Feature Enable */
+	if (rgmu->idle_level == GPU_HW_IFPC) {
+		gmu_core_regwrite(device, A6XX_GMU_PWR_COL_INTER_FRAME_HYST,
+				A6X_RGMU_SHORT_IFPC_HYST | adreno_dev->ifpc_hyst);
+		gmu_core_regwrite(device, A6XX_GMU_PWR_COL_INTER_FRAME_CTRL,
+				BIT(0));
+	}
+
+	/* For RGMU CX interrupt */
+	gmu_core_regwrite(device, A6XX_RGMU_CX_INTR_GEN_EN, RGMU_INTR_EN_MASK);
+
+	/* Enable GMU AO to host interrupt */
+	gmu_core_regwrite(device, A6XX_GMU_AO_INTERRUPT_EN, RGMU_AO_IRQ_MASK);
+
+	/* For OOB */
+	gmu_core_regwrite(device, A6XX_GMU_HOST2GMU_INTR_EN_2, 0x00FF0000);
+	gmu_core_regwrite(device, A6XX_GMU_HOST2GMU_INTR_EN_3, 0xFF000000);
+
+	/* Fence Address range configuration */
+	gmu_core_regwrite(device, A6XX_GMU_AHB_FENCE_RANGE_0,
+			RGMU_FENCE_RANGE_MASK);
+
+	/* During IFPC RGMU will put fence in drop mode so we would
+	 * need to put fence allow mode during slumber out sequence.
+	 */
+	gmu_core_regwrite(device, A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
+
+	/* BCL ON Sequence */
+	a6xx_rgmu_bcl_config(device, true);
+
+	/* Write 0 first to make sure that rgmu is reset */
+	gmu_core_regwrite(device, A6XX_RGMU_CX_PCC_CTRL, 0);
+
+	/* Make sure putting in reset doesn't happen after writing 1 */
+	wmb();
+
+	/* Bring rgmu out of reset */
+	gmu_core_regwrite(device, A6XX_RGMU_CX_PCC_CTRL, 1);
+
+	if (gmu_core_timed_poll_check(device, A6XX_RGMU_CX_PCC_INIT_RESULT,
+			BIT(0), RGMU_START_TIMEOUT, BIT(0))) {
+		gmu_core_regread(device, A6XX_RGMU_CX_PCC_DEBUG, &status);
+		dev_err(&rgmu->pdev->dev,
+				"rgmu boot Failed. status:%08x\n", status);
+		gmu_core_fault_snapshot(device);
+		return -ETIMEDOUT;
+	}
+
+	/* Read the RGMU firmware version from registers */
+	gmu_core_regread(device, A6XX_GMU_GENERAL_0, &rgmu->ver);
+
+	return 0;
+}
+
+static void a6xx_rgmu_notify_slumber(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Disable the power counter so that the RGMU is not busy */
+	gmu_core_regwrite(device, A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0);
+
+	/* BCL OFF Sequence */
+	a6xx_rgmu_bcl_config(device, false);
+}
+
+static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev)
+{
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int  ret;
+
+	/* Check GX GDSC is status */
+	if (a6xx_rgmu_gx_is_on(adreno_dev)) {
+
+		if (IS_ERR_OR_NULL(pwr->gx_gdsc))
+			return;
+
+		/*
+		 * Switch gx gdsc control from RGMU to CPU. Force non-zero
+		 * reference count in clk driver so next disable call will
+		 * turn off the GDSC.
+		 */
+		ret = regulator_enable(pwr->gx_gdsc);
+		if (ret)
+			dev_err(&rgmu->pdev->dev,
+					"Fail to enable gx gdsc:%d\n", ret);
+
+		ret = regulator_disable(pwr->gx_gdsc);
+		if (ret)
+			dev_err(&rgmu->pdev->dev,
+					"Fail to disable gx gdsc:%d\n", ret);
+
+		if (a6xx_rgmu_gx_is_on(adreno_dev))
+			dev_err(&rgmu->pdev->dev, "gx is stuck on\n");
+	}
+
+	clk_bulk_disable_unprepare(rgmu->num_clks, rgmu->clks);
+}
+
+void a6xx_rgmu_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+
+	adreno_snapshot_registers(device, snapshot, a6xx_rgmu_registers,
+			ARRAY_SIZE(a6xx_rgmu_registers) / 2);
+
+	a6xx_snapshot(adreno_dev, snapshot);
+
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, 0xffffffff);
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_MASK,
+		RGMU_OOB_IRQ_MASK);
+
+	if (device->gmu_fault)
+		rgmu->fault_count++;
+}
+
+static void a6xx_rgmu_suspend(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	a6xx_rgmu_irq_disable(adreno_dev);
+	a6xx_rgmu_disable_clks(adreno_dev);
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	kgsl_pwrctrl_set_state(KGSL_DEVICE(adreno_dev), KGSL_STATE_NONE);
+}
+
+static int a6xx_rgmu_enable_clks(struct adreno_device *adreno_dev)
+{
+	int ret;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	ret = clk_set_rate(rgmu->rgmu_clk, RGMU_CLK_FREQ);
+	if (ret) {
+		dev_err(&rgmu->pdev->dev, "Couldn't set the RGMU clock\n");
+		return ret;
+	}
+
+	ret = clk_set_rate(rgmu->gpu_clk,
+		pwr->pwrlevels[pwr->default_pwrlevel].gpu_freq);
+	if (ret) {
+		dev_err(&rgmu->pdev->dev, "Couldn't set the GPU clock\n");
+		return ret;
+	}
+
+	ret = clk_bulk_prepare_enable(rgmu->num_clks, rgmu->clks);
+	if (ret) {
+		dev_err(&rgmu->pdev->dev, "Failed to enable RGMU clocks\n");
+		return ret;
+	}
+
+	device->state = KGSL_STATE_AWARE;
+
+	return 0;
+}
+
+/*
+ * a6xx_rgmu_load_firmware() - Load the ucode into the RGMU TCM
+ * @adreno_dev: Pointer to adreno device
+ */
+static int a6xx_rgmu_load_firmware(struct adreno_device *adreno_dev)
+{
+	const struct firmware *fw = NULL;
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	int ret;
+
+	/* RGMU fw already saved and verified so do nothing new */
+	if (rgmu->fw_hostptr)
+		return 0;
+
+	ret = request_firmware(&fw, a6xx_core->gmufw_name, &rgmu->pdev->dev);
+	if (ret < 0) {
+		dev_err(&rgmu->pdev->dev, "request_firmware (%s) failed: %d\n",
+				a6xx_core->gmufw_name, ret);
+		return ret;
+	}
+
+	rgmu->fw_hostptr = devm_kmemdup(&rgmu->pdev->dev, fw->data,
+					fw->size, GFP_KERNEL);
+
+	if (rgmu->fw_hostptr)
+		rgmu->fw_size = (fw->size / sizeof(u32));
+
+	release_firmware(fw);
+	return rgmu->fw_hostptr ? 0 : -ENOMEM;
+}
+
+/* Halt RGMU execution */
+static void a6xx_rgmu_halt_execution(struct kgsl_device *device, bool force)
+{
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(ADRENO_DEVICE(device));
+	unsigned int index, status, fence;
+
+	if (!device->gmu_fault)
+		return;
+
+	/* Mask so there's no interrupt caused by NMI */
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_MASK, 0xFFFFFFFF);
+
+	/* Make sure the interrupt is masked */
+	wmb();
+
+	gmu_core_regread(device, A6XX_RGMU_CX_PCC_DEBUG, &index);
+	gmu_core_regread(device, A6XX_RGMU_CX_PCC_STATUS, &status);
+	gmu_core_regread(device, A6XX_GMU_AO_AHB_FENCE_CTRL, &fence);
+
+	dev_err(&rgmu->pdev->dev,
+			"RGMU Fault PCC_DEBUG:0x%x PCC_STATUS:0x%x FENCE_CTRL:0x%x\n",
+			index, status, fence);
+
+	/*
+	 * Write 0 to halt RGMU execution. We halt it in GMU/GPU fault and
+	 * re start PCC execution in recovery path.
+	 */
+	gmu_core_regwrite(device, A6XX_RGMU_CX_PCC_CTRL, 0);
+
+	/*
+	 * Ensure that fence is in allow mode after halting RGMU.
+	 * After halting RGMU we dump snapshot.
+	 */
+	gmu_core_regwrite(device, A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
+
+}
+
+static void halt_gbif_arb(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Halt all AXI requests */
+	kgsl_regwrite(device, A6XX_GBIF_HALT, A6XX_GBIF_ARB_HALT_MASK);
+	adreno_wait_for_halt_ack(device, A6XX_GBIF_HALT_ACK,
+		A6XX_GBIF_ARB_HALT_MASK);
+
+	/* De-assert the halts */
+	kgsl_regwrite(device, A6XX_GBIF_HALT, 0x0);
+}
+
+
+/* Caller shall ensure GPU is ready for SLUMBER */
+static void a6xx_rgmu_power_off(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	kgsl_pwrctrl_axi(device, false);
+
+	if (device->gmu_fault)
+		return a6xx_rgmu_suspend(adreno_dev);
+
+	/* Wait for the lowest idle level we requested */
+	ret = a6xx_rgmu_wait_for_lowest_idle(adreno_dev);
+	if (ret)
+		return a6xx_rgmu_suspend(adreno_dev);
+
+	a6xx_rgmu_notify_slumber(adreno_dev);
+
+	/* Halt CX traffic and de-assert halt */
+	halt_gbif_arb(adreno_dev);
+
+	a6xx_rgmu_irq_disable(adreno_dev);
+	a6xx_rgmu_disable_clks(adreno_dev);
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	kgsl_pwrctrl_clear_l3_vote(device);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE);
+}
+
+static int a6xx_rgmu_clock_set(struct adreno_device *adreno_dev,
+		u32 pwrlevel)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	int ret;
+	unsigned long rate;
+
+	if (pwrlevel == INVALID_DCVS_IDX)
+		return -EINVAL;
+
+	rate = device->pwrctrl.pwrlevels[pwrlevel].gpu_freq;
+
+	ret = clk_set_rate(rgmu->gpu_clk, rate);
+	if (ret)
+		dev_err(&rgmu->pdev->dev, "Couldn't set the GPU clock\n");
+
+	return ret;
+}
+
+static int a6xx_gpu_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	ret = kgsl_mmu_start(device);
+	if (ret)
+		goto err;
+
+	ret = a6xx_rgmu_oob_set(device, oob_gpu);
+	if (ret)
+		goto err_oob_clear;
+
+	/* Clear the busy_data stats - we're starting over from scratch */
+	memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data));
+
+	/* Restore performance counter registers with saved values */
+	adreno_perfcounter_restore(adreno_dev);
+
+	a6xx_start(adreno_dev);
+
+	/* Re-initialize the coresight registers if applicable */
+	adreno_coresight_start(adreno_dev);
+
+	adreno_perfcounter_start(adreno_dev);
+
+	/* Clear FSR here in case it is set from a previous pagefault */
+	kgsl_mmu_clear_fsr(&device->mmu);
+
+	a6xx_enable_gpu_irq(adreno_dev);
+
+	ret = a6xx_rb_start(adreno_dev);
+	if (ret) {
+		a6xx_disable_gpu_irq(adreno_dev);
+		goto err_oob_clear;
+	}
+
+	/*
+	 * At this point it is safe to assume that we recovered. Setting
+	 * this field allows us to take a new snapshot for the next failure
+	 * if we are prioritizing the first unrecoverable snapshot.
+	 */
+	if (device->snapshot)
+		device->snapshot->recovered = true;
+
+	/* Start the dispatcher */
+	adreno_dispatcher_start(device);
+
+	device->reset_counter++;
+
+	a6xx_rgmu_oob_clear(device, oob_gpu);
+
+	return 0;
+
+err_oob_clear:
+	a6xx_rgmu_oob_clear(device, oob_gpu);
+
+err:
+	a6xx_rgmu_power_off(adreno_dev);
+
+	return ret;
+}
+
+static int a6xx_rgmu_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE);
+
+	ret = kgsl_pwrctrl_enable_cx_gdsc(device);
+	if (ret)
+		return ret;
+
+	ret = a6xx_rgmu_enable_clks(adreno_dev);
+	if (ret) {
+		kgsl_pwrctrl_disable_cx_gdsc(device);
+		return ret;
+	}
+
+	a6xx_rgmu_irq_enable(adreno_dev);
+
+	/* Clear any GPU faults that might have been left over */
+	adreno_clear_gpu_fault(adreno_dev);
+
+	ret = a6xx_rgmu_fw_start(adreno_dev, GMU_COLD_BOOT);
+	if (ret)
+		goto err;
+
+	/* Request default DCVS level */
+	ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+	if (ret)
+		goto err;
+
+	ret = kgsl_pwrctrl_axi(device, true);
+	if (ret)
+		goto err;
+
+	device->gmu_fault = false;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+
+	return 0;
+
+err:
+	a6xx_rgmu_power_off(adreno_dev);
+
+	return ret;
+}
+
+static int a6xx_power_off(struct adreno_device *adreno_dev);
+
+static void rgmu_idle_check(struct work_struct *work)
+{
+	struct kgsl_device *device = container_of(work,
+					struct kgsl_device, idle_check_ws);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret;
+
+	mutex_lock(&device->mutex);
+
+	if (test_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags))
+		goto done;
+
+	if (atomic_read(&device->active_cnt) || time_is_after_jiffies(device->idle_jiffies)) {
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	spin_lock(&device->submit_lock);
+
+	if (device->submit_now) {
+		spin_unlock(&device->submit_lock);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	device->skip_inline_submit = true;
+	spin_unlock(&device->submit_lock);
+
+	ret = a6xx_power_off(adreno_dev);
+	if (ret == -EBUSY) {
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+	}
+
+done:
+	mutex_unlock(&device->mutex);
+}
+
+static void rgmu_idle_timer(struct timer_list *t)
+{
+	struct kgsl_device *device = container_of(t, struct kgsl_device,
+					idle_timer);
+
+	kgsl_schedule_work(&device->idle_check_ws);
+}
+
+static int a6xx_boot(struct adreno_device *adreno_dev)
+{
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (test_bit(RGMU_PRIV_GPU_STARTED, &rgmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = a6xx_rgmu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_start_idle_timer(device);
+
+	kgsl_pwrscale_wake(device);
+
+	set_bit(RGMU_PRIV_GPU_STARTED, &rgmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	return 0;
+}
+
+static void a6xx_rgmu_touch_wakeup(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	int ret;
+
+	/*
+	 * Do not wake up a suspended device or until the first boot sequence
+	 * has been completed.
+	 */
+	if (test_bit(RGMU_PRIV_PM_SUSPEND, &rgmu->flags) ||
+		!test_bit(RGMU_PRIV_FIRST_BOOT_DONE, &rgmu->flags))
+		return;
+
+	if (test_bit(RGMU_PRIV_GPU_STARTED, &rgmu->flags))
+		goto done;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = a6xx_rgmu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	ret = a6xx_gpu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	kgsl_pwrscale_wake(device);
+
+	set_bit(RGMU_PRIV_GPU_STARTED, &rgmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+done:
+	/*
+	 * When waking up from a touch event we want to stay active long enough
+	 * for the user to send a draw command.  The default idle timer timeout
+	 * is shorter than we want so go ahead and push the idle timer out
+	 * further for this special case
+	 */
+	mod_timer(&device->idle_timer, jiffies +
+			msecs_to_jiffies(adreno_wake_timeout));
+
+}
+
+static int a6xx_first_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	int ret;
+
+	if (test_bit(RGMU_PRIV_FIRST_BOOT_DONE, &rgmu->flags))
+		return a6xx_boot(adreno_dev);
+
+	ret = a6xx_ringbuffer_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_rgmu_load_firmware(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = a6xx_rgmu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = a6xx_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	adreno_get_bus_counters(adreno_dev);
+
+	adreno_create_profile_buffer(adreno_dev);
+
+	set_bit(RGMU_PRIV_FIRST_BOOT_DONE, &rgmu->flags);
+	set_bit(RGMU_PRIV_GPU_STARTED, &rgmu->flags);
+
+	/*
+	 * There is a possible deadlock scenario during kgsl firmware reading
+	 * (request_firmware) and devfreq update calls. During first boot, kgsl
+	 * device mutex is held and then request_firmware is called for reading
+	 * firmware. request_firmware internally takes dev_pm_qos_mtx lock.
+	 * Whereas in case of devfreq update calls triggered by thermal/bcl or
+	 * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then
+	 * tries to take kgsl device mutex as part of get_dev_status/target
+	 * calls. This results in deadlock when both thread are unable to acquire
+	 * the mutex held by other thread. Enable devfreq updates now as we are
+	 * done reading all firmware files.
+	 */
+	device->pwrscale.devfreq_enabled = true;
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	return 0;
+}
+
+static int a6xx_rgmu_first_open(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	ret = a6xx_first_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	/*
+	 * A client that does a first_open but never closes the device
+	 * may prevent us from going back to SLUMBER. So trigger the idle
+	 * check by incrementing the active count and immediately releasing it.
+	 */
+	atomic_inc(&device->active_cnt);
+	a6xx_rgmu_active_count_put(adreno_dev);
+
+	return 0;
+}
+
+static int a6xx_power_off(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	int ret;
+
+	adreno_suspend_context(device);
+
+	/*
+	 * adreno_suspend_context() unlocks the device mutex, which
+	 * could allow a concurrent thread to attempt SLUMBER sequence.
+	 * Hence, check the flags before proceeding with SLUMBER.
+	 */
+	if (!test_bit(RGMU_PRIV_GPU_STARTED, &rgmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER);
+
+	ret = a6xx_rgmu_oob_set(device, oob_gpu);
+	if (ret) {
+		a6xx_rgmu_oob_clear(device, oob_gpu);
+		goto no_gx_power;
+	}
+
+	if (a6xx_irq_pending(adreno_dev)) {
+		a6xx_gmu_oob_clear(device, oob_gpu);
+		return -EBUSY;
+	}
+
+	kgsl_pwrscale_update_stats(device);
+
+	/* Save active coresight registers if applicable */
+	adreno_coresight_stop(adreno_dev);
+
+	/* Save physical performance counter values before GPU power down*/
+	adreno_perfcounter_save(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 0);
+
+	a6xx_rgmu_prepare_stop(device);
+
+	a6xx_rgmu_oob_clear(device, oob_gpu);
+
+no_gx_power:
+	/* Halt all gx traffic */
+	kgsl_regwrite(device, A6XX_GBIF_HALT, A6XX_GBIF_CLIENT_HALT_MASK);
+
+	adreno_wait_for_halt_ack(device, A6XX_GBIF_HALT_ACK,
+		A6XX_GBIF_CLIENT_HALT_MASK);
+
+	kgsl_pwrctrl_irq(device, false);
+
+	a6xx_rgmu_power_off(adreno_dev);
+
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	adreno_dispatcher_stop(adreno_dev);
+
+	adreno_ringbuffer_stop(adreno_dev);
+
+	adreno_llcc_slice_deactivate(adreno_dev);
+
+	clear_bit(RGMU_PRIV_GPU_STARTED, &rgmu->flags);
+
+	del_timer_sync(&device->idle_timer);
+
+	kgsl_pwrscale_sleep(device);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
+
+	return ret;
+}
+
+int a6xx_rgmu_reset(struct adreno_device *adreno_dev)
+{
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+
+	a6xx_disable_gpu_irq(adreno_dev);
+
+	/* Hard reset the rgmu and gpu */
+	a6xx_rgmu_suspend(adreno_dev);
+
+	a6xx_reset_preempt_records(adreno_dev);
+
+	adreno_llcc_slice_deactivate(adreno_dev);
+
+	clear_bit(RGMU_PRIV_GPU_STARTED, &rgmu->flags);
+
+	/* Attempt rebooting the rgmu and gpu */
+	return a6xx_boot(adreno_dev);
+}
+
+static int a6xx_rgmu_active_count_get(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	int ret = 0;
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EINVAL;
+
+	if (test_bit(RGMU_PRIV_PM_SUSPEND, &rgmu->flags))
+		return -EINVAL;
+
+	if (atomic_read(&device->active_cnt) == 0)
+		ret = a6xx_boot(adreno_dev);
+
+	if (ret == 0) {
+		atomic_inc(&device->active_cnt);
+		trace_kgsl_active_count(device,
+			(unsigned long) __builtin_return_address(0));
+	}
+
+	return ret;
+}
+
+static int a6xx_rgmu_pm_suspend(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	int ret;
+
+	if (test_bit(RGMU_PRIV_PM_SUSPEND, &rgmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SUSPEND);
+
+	/* Halt any new submissions */
+	reinit_completion(&device->halt_gate);
+
+	/* wait for active count so device can be put in slumber */
+	ret = kgsl_active_count_wait(device, 0, HZ);
+	if (ret) {
+		dev_err(device->dev,
+			"Timed out waiting for the active count\n");
+		goto err;
+	}
+
+	ret = adreno_idle(device);
+	if (ret)
+		goto err;
+
+	a6xx_power_off(adreno_dev);
+
+	set_bit(RGMU_PRIV_PM_SUSPEND, &rgmu->flags);
+
+	adreno_get_gpu_halt(adreno_dev);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND);
+
+	return 0;
+err:
+	adreno_dispatcher_start(device);
+	return ret;
+}
+
+static void a6xx_rgmu_pm_resume(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+
+	if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &rgmu->flags),
+		"resume invoked without a suspend\n"))
+		return;
+
+	adreno_put_gpu_halt(adreno_dev);
+
+	clear_bit(RGMU_PRIV_PM_SUSPEND, &rgmu->flags);
+
+	adreno_dispatcher_start(device);
+}
+
+static const struct gmu_dev_ops a6xx_rgmudev = {
+	.oob_set = a6xx_rgmu_oob_set,
+	.oob_clear = a6xx_rgmu_oob_clear,
+	.ifpc_store = a6xx_rgmu_ifpc_store,
+	.ifpc_isenabled = a6xx_rgmu_ifpc_isenabled,
+	.send_nmi = a6xx_rgmu_halt_execution,
+};
+
+static int a6xx_rgmu_irq_probe(struct kgsl_device *device)
+{
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(ADRENO_DEVICE(device));
+	int ret;
+
+	ret = kgsl_request_irq(rgmu->pdev, "kgsl_oob",
+			a6xx_oob_irq_handler, device);
+	if (ret < 0)
+		return ret;
+
+	rgmu->oob_interrupt_num  = ret;
+
+	ret = kgsl_request_irq(rgmu->pdev,
+		"kgsl_rgmu", a6xx_rgmu_irq_handler, device);
+	if (ret < 0)
+		return ret;
+
+	rgmu->rgmu_interrupt_num = ret;
+	return 0;
+}
+
+static int a6xx_rgmu_clocks_probe(struct a6xx_rgmu_device *rgmu,
+		struct device_node *node)
+{
+	int ret, i;
+
+	ret = devm_clk_bulk_get_all(&rgmu->pdev->dev, &rgmu->clks);
+	if (ret < 0)
+		return ret;
+	/*
+	 * Voting for apb_pclk will enable power and clocks required for
+	 * QDSS path to function. However, if QCOM_KGSL_QDSS_STM is not enabled,
+	 * QDSS is essentially unusable. Hence, if QDSS cannot be used,
+	 * don't vote for this clock.
+	 */
+	if (!IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) {
+		for (i = 0; i < ret; i++) {
+			if (!strcmp(rgmu->clks[i].id, "apb_pclk")) {
+				rgmu->clks[i].clk = NULL;
+				break;
+			}
+		}
+	}
+	rgmu->num_clks = ret;
+
+	rgmu->gpu_clk = kgsl_of_clk_by_name(rgmu->clks, ret, "core");
+	if (!rgmu->gpu_clk) {
+		dev_err(&rgmu->pdev->dev, "The GPU clock isn't defined\n");
+		return -ENODEV;
+	}
+
+	rgmu->rgmu_clk = kgsl_of_clk_by_name(rgmu->clks, ret, "gmu");
+	if (!rgmu->rgmu_clk) {
+		dev_err(&rgmu->pdev->dev, "The RGMU clock isn't defined\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+const struct adreno_power_ops a6xx_rgmu_power_ops = {
+	.first_open = a6xx_rgmu_first_open,
+	.last_close = a6xx_power_off,
+	.active_count_get = a6xx_rgmu_active_count_get,
+	.active_count_put = a6xx_rgmu_active_count_put,
+	.pm_suspend = a6xx_rgmu_pm_suspend,
+	.pm_resume = a6xx_rgmu_pm_resume,
+	.touch_wakeup = a6xx_rgmu_touch_wakeup,
+	.gpu_clock_set = a6xx_rgmu_clock_set,
+};
+
+int a6xx_rgmu_device_probe(struct platform_device *pdev,
+	u32 chipid, const struct adreno_gpu_core *gpucore)
+{
+	struct adreno_device *adreno_dev;
+	struct kgsl_device *device;
+	struct a6xx_device *a6xx_dev;
+	int ret;
+
+	a6xx_dev = devm_kzalloc(&pdev->dev, sizeof(*a6xx_dev),
+			GFP_KERNEL);
+	if (!a6xx_dev)
+		return -ENOMEM;
+
+	adreno_dev = &a6xx_dev->adreno_dev;
+
+	adreno_dev->irq_mask = A6XX_INT_MASK;
+
+	ret = a6xx_probe_common(pdev, adreno_dev, chipid, gpucore);
+	if (ret)
+		return ret;
+
+	ret = adreno_dispatcher_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	device = KGSL_DEVICE(adreno_dev);
+
+	INIT_WORK(&device->idle_check_ws, rgmu_idle_check);
+
+	timer_setup(&device->idle_timer, rgmu_idle_timer, 0);
+
+	return 0;
+}
+
+int a6xx_rgmu_add_to_minidump(struct adreno_device *adreno_dev)
+{
+	struct a6xx_device *a6xx_dev = container_of(adreno_dev,
+					struct a6xx_device, adreno_dev);
+
+	return kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_A6XX_DEVICE,
+				(void *)(a6xx_dev), sizeof(struct a6xx_device));
+}
+
+/* Do not access any RGMU registers in RGMU probe function */
+static int a6xx_rgmu_probe(struct kgsl_device *device,
+		struct platform_device *pdev)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev);
+	int ret;
+
+	rgmu->pdev = pdev;
+
+	/* Set up RGMU regulators */
+	ret = kgsl_pwrctrl_probe_regulators(device, pdev);
+	if (ret)
+		return ret;
+
+	/* Set up RGMU clocks */
+	ret = a6xx_rgmu_clocks_probe(rgmu, pdev->dev.of_node);
+	if (ret)
+		return ret;
+
+	ret = kgsl_regmap_add_region(&device->regmap, pdev,
+		"kgsl_rgmu", NULL, NULL);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to map the RGMU registers\n");
+		return ret;
+	}
+
+	/* Initialize OOB and RGMU interrupts */
+	ret = a6xx_rgmu_irq_probe(device);
+	if (ret)
+		return ret;
+
+	/* Set up RGMU idle states */
+	if (ADRENO_FEATURE(ADRENO_DEVICE(device), ADRENO_IFPC)) {
+		rgmu->idle_level = GPU_HW_IFPC;
+		adreno_dev->ifpc_hyst = A6X_RGMU_LONG_IFPC_HYST;
+		adreno_dev->ifpc_hyst_floor = A6X_RGMU_LONG_IFPC_HYST_FLOOR;
+	} else {
+		rgmu->idle_level = GPU_HW_ACTIVE;
+	}
+
+	set_bit(GMU_ENABLED, &device->gmu_core.flags);
+	device->gmu_core.dev_ops = &a6xx_rgmudev;
+
+	return 0;
+}
+
+static void a6xx_rgmu_remove(struct kgsl_device *device)
+{
+
+	memset(&device->gmu_core, 0, sizeof(device->gmu_core));
+}
+
+static int a6xx_rgmu_bind(struct device *dev, struct device *master, void *data)
+{
+	struct kgsl_device *device = dev_get_drvdata(master);
+
+	return a6xx_rgmu_probe(device, to_platform_device(dev));
+}
+
+static void a6xx_rgmu_unbind(struct device *dev, struct device *master,
+		void *data)
+{
+	struct kgsl_device *device = dev_get_drvdata(master);
+
+	a6xx_rgmu_remove(device);
+}
+
+static const struct component_ops a6xx_rgmu_component_ops = {
+	.bind = a6xx_rgmu_bind,
+	.unbind = a6xx_rgmu_unbind,
+};
+
+static int a6xx_rgmu_probe_dev(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &a6xx_rgmu_component_ops);
+}
+
+static int a6xx_rgmu_remove_dev(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &a6xx_rgmu_component_ops);
+	return 0;
+}
+
+static const struct of_device_id a6xx_rgmu_match_table[] = {
+	{ .compatible = "qcom,gpu-rgmu" },
+	{ },
+};
+
+struct platform_driver a6xx_rgmu_driver = {
+	.probe = a6xx_rgmu_probe_dev,
+	.remove = a6xx_rgmu_remove_dev,
+	.driver = {
+		.name = "kgsl-rgmu",
+		.of_match_table = a6xx_rgmu_match_table,
+	},
+};

+ 106 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_rgmu.h

@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_A6XX_RGMU_H
+#define __ADRENO_A6XX_RGMU_H
+
+#define RGMU_AO_IRQ_FENCE_ERR		BIT(3)
+#define RGMU_AO_IRQ_MASK			RGMU_AO_IRQ_FENCE_ERR
+
+#define RGMU_OOB_IRQ_ERR_MSG		BIT(24)
+#define RGMU_OOB_IRQ_ACK_MASK		GENMASK(23, 16)
+#define RGMU_OOB_IRQ_ERR_MSG_MASK	GENMASK(31, 24)
+#define RGMU_OOB_IRQ_MASK		RGMU_OOB_IRQ_ERR_MSG_MASK
+
+#define MAX_RGMU_CLKS  8
+
+enum {
+	/* @RGMU_PRIV_FIRST_BOOT_DONE: The very first ggpu boot is done */
+	RGMU_PRIV_FIRST_BOOT_DONE,
+	/* @RGMU_PRIV_GPU_STARTED: GPU has been started */
+	RGMU_PRIV_GPU_STARTED,
+	/* @RGMU_PRIV_PM_SUSPEND: The rgmu driver is suspended */
+	RGMU_PRIV_PM_SUSPEND,
+};
+
+/**
+ * struct a6xx_rgmu_device - rGMU device structure
+ * @ver: RGMU firmware version
+ * @rgmu_interrupt_num: RGMU interrupt number
+ * @oob_interrupt_num: number of RGMU asserted OOB interrupt
+ * @fw_hostptr: Buffer which holds the RGMU firmware
+ * @fw_size: Size of RGMU firmware buffer
+ * @clks: RGMU clocks including the GPU
+ * @gpu_clk: Pointer to GPU core clock
+ * @rgmu_clk: Pointer to rgmu clock
+ * @flags: RGMU flags
+ * @idle_level: Minimal GPU idle power level
+ * @fault_count: RGMU fault count
+ */
+struct a6xx_rgmu_device {
+	u32 ver;
+	struct platform_device *pdev;
+	unsigned int rgmu_interrupt_num;
+	unsigned int oob_interrupt_num;
+	unsigned int *fw_hostptr;
+	uint32_t fw_size;
+	struct clk_bulk_data *clks;
+	/** @num_clks: Number of clocks in @clks */
+	int num_clks;
+	struct clk *gpu_clk;
+	struct clk *rgmu_clk;
+	unsigned int idle_level;
+	unsigned int fault_count;
+	/** @flags: rgmu internal flags */
+	unsigned long flags;
+	/** @num_oob_perfcntr: Number of active oob_perfcntr requests */
+	u32 num_oob_perfcntr;
+};
+
+/**
+ * a6xx_rgmu_device_probe - Probe a6xx rgmu resources
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for rgmu based a6xx targets.
+ */
+int a6xx_rgmu_device_probe(struct platform_device *pdev,
+	u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * a6xx_rgmu_reset - Reset and restart the rgmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_rgmu_reset(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_rgmu_snapshot - Take snapshot for rgmu based targets
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot
+ *
+ * This function halts rgmu execution if we hit a rgmu
+ * fault. And then, it takes rgmu and gpu snapshot.
+ */
+void a6xx_rgmu_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot);
+
+/**
+ * a6xx_rgmu_add_to_minidump - Register a6xx_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int a6xx_rgmu_add_to_minidump(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_rgmu_gx_is_on() - Check if GX is on using pwr status register
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This check should only be performed if the keepalive bit is set or it
+ * can be guaranteed that the power state of the GPU will remain unchanged
+ */
+bool a6xx_rgmu_gx_is_on(struct adreno_device *adreno_dev);
+#endif

+ 578 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_ringbuffer.c

@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_pm4types.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_trace.h"
+#include "kgsl_trace.h"
+
+static int a6xx_rb_pagetable_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		struct kgsl_pagetable *pagetable, u32 *cmds)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable);
+	int count = 0;
+	u32 id = drawctxt ? drawctxt->base.id : 0;
+
+	if (pagetable == device->mmu.defaultpagetable)
+		return 0;
+
+	cmds[count++] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3);
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+	cmds[count++] = id;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) {
+		cmds[count++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+		cmds[count++] = cp_type7_packet(CP_WAIT_FOR_ME, 0);
+		cmds[count++] = cp_type4_packet(A6XX_CP_MISC_CNTL, 1);
+		cmds[count++] = 1;
+	}
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5);
+	cmds[count++] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device,
+				rb->id, ttbr0));
+	cmds[count++] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device,
+				rb->id, ttbr0));
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+	cmds[count++] = id;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) {
+		cmds[count++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+		cmds[count++] = cp_type7_packet(CP_WAIT_FOR_ME, 0);
+		cmds[count++] = cp_type4_packet(A6XX_CP_MISC_CNTL, 1);
+		cmds[count++] = 0;
+	}
+
+	return count;
+}
+
+static int a6xx_rb_context_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_pagetable *pagetable =
+		adreno_drawctxt_get_pagetable(drawctxt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int count = 0;
+	u32 cmds[41];
+
+	if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) {
+
+		/* Clear performance counters during context switches */
+		if (!adreno_dev->perfcounter) {
+			cmds[count++] = cp_type4_packet(A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
+			cmds[count++] = 0x1;
+		}
+
+		count += a6xx_rb_pagetable_switch(adreno_dev, rb, drawctxt,
+			pagetable, &cmds[count]);
+
+		/* Wait for performance counter clear to finish */
+		if (!adreno_dev->perfcounter) {
+			cmds[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
+			cmds[count++] = 0x3;
+			cmds[count++] = A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS;
+			cmds[count++] = 0x0;
+			cmds[count++] = 0x1;
+			cmds[count++] = 0x1;
+			cmds[count++] = 0x0;
+		}
+	}
+
+	cmds[count++] = cp_type7_packet(CP_NOP, 1);
+	cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[count++] = 0x31;
+
+	return a6xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED,
+			cmds, count, 0, NULL);
+}
+
+#define RB_SOPTIMESTAMP(device, rb) \
+	       MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp)
+#define CTXT_SOPTIMESTAMP(device, drawctxt) \
+	       MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp)
+
+#define RB_EOPTIMESTAMP(device, rb) \
+	       MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp)
+#define CTXT_EOPTIMESTAMP(device, drawctxt) \
+	       MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp)
+
+int a6xx_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time, bool sync)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+	unsigned long flags;
+
+	adreno_get_submit_time(adreno_dev, rb, time);
+	adreno_profile_submit_time(time);
+
+	if (sync && !ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) {
+		u32 *cmds = adreno_ringbuffer_allocspace(rb, 3);
+
+		if (IS_ERR(cmds))
+			return PTR_ERR(cmds);
+
+		cmds[0] = cp_type7_packet(CP_WHERE_AM_I, 2);
+		cmds[1] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id,
+				rptr));
+		cmds[2] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id,
+				rptr));
+	}
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+		if (adreno_dev->cur_rb == rb) {
+			kgsl_pwrscale_busy(device);
+			ret = a6xx_fenced_write(adreno_dev,
+				A6XX_CP_RB_WPTR, rb->_wptr,
+				FENCE_STATUS_WRITEDROPPED0_MASK);
+			rb->skip_inline_wptr = false;
+		}
+	} else {
+		if (adreno_dev->cur_rb == rb)
+			rb->skip_inline_wptr = true;
+	}
+
+	rb->wptr = rb->_wptr;
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	if (ret) {
+		/*
+		 * If WPTR update fails, take inline snapshot and trigger
+		 * recovery.
+		 */
+		gmu_core_fault_snapshot(device);
+		adreno_dispatcher_fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	}
+
+	return ret;
+}
+
+int a6xx_ringbuffer_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int i, ret;
+
+	ret = adreno_allocate_global(device, &device->scratch, PAGE_SIZE,
+		0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED,
+		"scratch");
+	if (ret)
+		return ret;
+
+	adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+
+	if (!adreno_preemption_feature_set(adreno_dev)) {
+		adreno_dev->num_ringbuffers = 1;
+		return adreno_ringbuffer_setup(adreno_dev,
+			&adreno_dev->ringbuffers[0], 0);
+	}
+
+	adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers);
+
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		int ret;
+
+		ret = adreno_ringbuffer_setup(adreno_dev,
+			&adreno_dev->ringbuffers[i], i);
+		if (ret)
+			return ret;
+	}
+
+	timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0);
+	a6xx_preemption_init(adreno_dev);
+	return 0;
+}
+
+#define A6XX_SUBMIT_MAX 79
+
+int a6xx_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 size = A6XX_SUBMIT_MAX + dwords;
+	u32 *cmds, index = 0;
+	u64 profile_gpuaddr;
+	u32 profile_dwords;
+
+	if (adreno_drawctxt_detached(drawctxt))
+		return -ENOENT;
+
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return -EPROTO;
+
+	rb->timestamp++;
+
+	if (drawctxt)
+		drawctxt->internal_timestamp = rb->timestamp;
+
+	/*
+	 * if APRIV is enabled we assume all submissions are run with protected
+	 * mode off
+	 */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV))
+		flags &= ~F_NOTPROTECTED;
+
+	cmds = adreno_ringbuffer_allocspace(rb, size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	/* Identify the start of a command */
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER;
+
+	/* This is 25 dwords when drawctxt is not NULL and perfcounter needs to be zapped */
+	index += a6xx_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt,
+		&cmds[index]);
+
+	cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+	cmds[index++] = 0x101; /* IFPC disable */
+
+	profile_gpuaddr = adreno_profile_preib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+		cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = rb->timestamp;
+
+	if (IS_SECURE(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	if (IS_NOTPROTECTED(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 0;
+	}
+
+	memcpy(&cmds[index], in, dwords << 2);
+	index += dwords;
+
+	if (IS_NOTPROTECTED(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	profile_gpuaddr = adreno_profile_postib_processing(adreno_dev,
+		drawctxt, &dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy))
+		cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0);
+
+	/*
+	 * If this is an internal command, just write the ringbuffer timestamp,
+	 * otherwise, write both
+	 */
+	if (!drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_FLUSH_TS | (1 << 31);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	} else {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_FLUSH_TS | (1 << 31);
+		cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_FLUSH_TS;
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+	cmds[index++] = 0x100; /* IFPC enable */
+
+	if (IS_WFI(flags))
+		cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+
+	if (IS_SECURE(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 0;
+	}
+
+	/* 10 dwords */
+	index += a6xx_preemption_post_ibsubmit(adreno_dev, &cmds[index]);
+
+	/* Adjust the thing for the number of dwords we actually wrote */
+	rb->_wptr -= (size - index);
+
+	return a6xx_ringbuffer_submit(rb, time,
+			!adreno_is_preemption_enabled(adreno_dev));
+}
+
+static u32 a6xx_get_alwayson_counter(u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = A6XX_CP_ALWAYS_ON_COUNTER_LO | (1 << 30) | (2 << 18);
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+static u32 a6xx_get_alwayson_context(u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = A6XX_CP_ALWAYS_ON_CONTEXT_LO | (1 << 30) | (2 << 18);
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+#define PROFILE_IB_DWORDS 4
+#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))
+
+static u64 a6xx_get_user_profiling_ib(struct adreno_ringbuffer *rb,
+		struct kgsl_drawobj_cmd *cmdobj, u32 target_offset, u32 *cmds)
+{
+	u32 offset, *ib, dwords;
+	u64 gpuaddr;
+
+	if (IS_ERR(rb->profile_desc))
+		return 0;
+
+	offset = rb->profile_index * (PROFILE_IB_DWORDS << 2);
+	ib = rb->profile_desc->hostptr + offset;
+	gpuaddr = rb->profile_desc->gpuaddr + offset;
+	dwords = a6xx_get_alwayson_counter(ib,
+		cmdobj->profiling_buffer_gpuaddr + target_offset);
+
+	cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+	cmds[1] = lower_32_bits(gpuaddr);
+	cmds[2] = upper_32_bits(gpuaddr);
+	cmds[3] = dwords;
+
+	rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;
+
+	return 4;
+}
+
+static int a6xx_drawctxt_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (rb->drawctxt_active == drawctxt)
+		return 0;
+
+	if (kgsl_context_detached(&drawctxt->base))
+		return -ENOENT;
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return -ENOENT;
+
+	ret = a6xx_rb_context_switch(adreno_dev, rb, drawctxt);
+	if (ret) {
+		kgsl_context_put(&drawctxt->base);
+		return ret;
+	}
+
+	trace_adreno_drawctxt_switch(rb, drawctxt);
+
+	/* Release the current drawctxt as soon as the new one is switched */
+	adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active,
+		rb, rb->timestamp);
+
+	rb->drawctxt_active = drawctxt;
+	return 0;
+}
+
+#define A6XX_USER_PROFILE_IB(rb, cmdobj, cmds, field) \
+	a6xx_get_user_profiling_ib((rb), (cmdobj), \
+		offsetof(struct kgsl_drawobj_profiling_buffer, field), \
+		(cmds))
+
+#define A6XX_KERNEL_PROFILE(dev, cmdobj, cmds, field) \
+	a6xx_get_alwayson_counter((cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define A6XX_KERNEL_PROFILE_CONTEXT(dev, cmdobj, cmds, field) \
+	a6xx_get_alwayson_context((cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define A6XX_COMMAND_DWORDS 40
+
+int a6xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct adreno_ringbuffer *rb = drawctxt->rb;
+	int ret = 0, numibs = 0, index = 0;
+	u32 *cmds;
+
+	/* Count the number of IBs (if we are not skipping) */
+	if (!IS_SKIP(flags)) {
+		struct list_head *tmp;
+
+		list_for_each(tmp, &cmdobj->cmdlist)
+			numibs++;
+	}
+
+	cmds = kvmalloc((A6XX_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL);
+	if (!cmds) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = START_IB_IDENTIFIER;
+
+	/* Kernel profiling: 8 dwords */
+	if (IS_KERNEL_PROFILE(flags)) {
+		index += A6XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			started);
+		index += A6XX_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
+			ctx_start);
+	}
+
+	/* User profiling: 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += A6XX_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
+			gpu_ticks_submitted);
+
+	if (numibs) {
+		struct kgsl_memobj_node *ib;
+
+		cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+		cmds[index++] = 0x00d; /* IB1LIST start */
+
+		list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+			if (ib->priv & MEMOBJ_SKIP ||
+			    (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE
+			     && !IS_PREAMBLE(flags)))
+				cmds[index++] = cp_type7_packet(CP_NOP, 4);
+
+			cmds[index++] =
+				cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+			cmds[index++] = lower_32_bits(ib->gpuaddr);
+			cmds[index++] = upper_32_bits(ib->gpuaddr);
+
+			/* Double check that IB_PRIV is never set */
+			cmds[index++] = (ib->size >> 2) & 0xfffff;
+		}
+
+		cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+		cmds[index++] = 0x00e; /* IB1LIST end */
+	}
+
+	/* CCU invalidate */
+	cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 24;
+
+	cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 25;
+
+	/* 8 dwords */
+	if (IS_KERNEL_PROFILE(flags)) {
+		index += A6XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			retired);
+		index += A6XX_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
+			ctx_end);
+	}
+
+	/* 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += A6XX_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
+			gpu_ticks_retired);
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = END_IB_IDENTIFIER;
+
+	ret = a6xx_drawctxt_switch(adreno_dev, rb, drawctxt);
+
+	/*
+	 * In the unlikely event of an error in the drawctxt switch,
+	 * treat it like a hang
+	 */
+	if (ret) {
+		/*
+		 * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it,
+		 * the upper layers know how to handle it
+		 */
+		if (ret != -ENOSPC && ret != -ENOENT)
+			dev_err(device->dev,
+				     "Unable to switch draw context: %d\n",
+				     ret);
+		goto done;
+	}
+
+	adreno_drawobj_set_constraint(device, drawobj);
+
+	ret = a6xx_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt,
+		flags, cmds, index, drawobj->timestamp, time);
+
+done:
+	trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs,
+		drawobj->timestamp, drawobj->flags, ret, drawctxt->type);
+
+	kvfree(cmds);
+	return ret;
+}

+ 515 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_rpmh.c

@@ -0,0 +1,515 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <soc/qcom/cmd-db.h>
+#include <soc/qcom/tcs.h>
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_hfi.h"
+#include "kgsl_bus.h"
+#include "kgsl_device.h"
+
+struct rpmh_arc_vals {
+	u32 num;
+	const u16 *val;
+};
+
+struct bcm {
+	const char *name;
+	u32 buswidth;
+	u32 channels;
+	u32 unit;
+	u16 width;
+	u8 vcd;
+	bool fixed;
+};
+
+struct bcm_data {
+	__le32 unit;
+	__le16 width;
+	u8 vcd;
+	u8 reserved;
+};
+
+struct rpmh_bw_votes {
+	u32 wait_bitmask;
+	u32 num_cmds;
+	u32 *addrs;
+	u32 num_levels;
+	u32 **cmds;
+};
+
+#define ARC_VOTE_SET(pri, sec, vlvl) \
+	((((vlvl) & 0xFFFF) << 16) | (((sec) & 0xFF) << 8) | ((pri) & 0xFF))
+
+static int rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id)
+{
+	size_t len = 0;
+
+	arc->val = cmd_db_read_aux_data(res_id, &len);
+
+	/*
+	 * cmd_db_read_aux_data() gives us a zero-padded table of
+	 * size len that contains the arc values. To determine the
+	 * number of arc values, we loop through the table and count
+	 * them until we get to the end of the buffer or hit the
+	 * zero padding.
+	 */
+	for (arc->num = 1; arc->num < (len >> 1); arc->num++) {
+		if (arc->val[arc->num - 1] != 0 &&  arc->val[arc->num] == 0)
+			break;
+	}
+
+	return 0;
+}
+
+static int setup_volt_dependency_tbl(uint32_t *votes,
+		struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+		u16 *vlvl, unsigned int num_entries)
+{
+	int i, j, k;
+	uint16_t cur_vlvl;
+	bool found_match;
+
+	/* i tracks current KGSL GPU frequency table entry
+	 * j tracks secondary rail voltage table entry
+	 * k tracks primary rail voltage table entry
+	 */
+	for (i = 0; i < num_entries; i++) {
+		found_match = false;
+
+		/* Look for a primary rail voltage that matches a VLVL level */
+		for (k = 0; k < pri_rail->num; k++) {
+			if (pri_rail->val[k] >= vlvl[i]) {
+				cur_vlvl = pri_rail->val[k];
+				found_match = true;
+				break;
+			}
+		}
+
+		/* If we did not find a matching VLVL level then abort */
+		if (!found_match)
+			return -EINVAL;
+
+		/*
+		 * Look for a secondary rail index whose VLVL value
+		 * is greater than or equal to the VLVL value of the
+		 * corresponding index of the primary rail
+		 */
+		for (j = 0; j < sec_rail->num; j++) {
+			if (sec_rail->val[j] >= cur_vlvl ||
+					j + 1 == sec_rail->num)
+				break;
+		}
+
+		if (j == sec_rail->num)
+			j = 0;
+
+		votes[i] = ARC_VOTE_SET(k, j, cur_vlvl);
+	}
+
+	return 0;
+}
+
+/* Generate a set of bandwidth votes for the list of BCMs */
+static void tcs_cmd_data(struct bcm *bcms, int count, u32 ab, u32 ib,
+		u32 *data)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		bool valid = true;
+		bool commit = false;
+		u64 avg, peak, x, y;
+
+		if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd)
+			commit = true;
+
+		/*
+		 * On a660, the "ACV" y vote should be 0x08 if there is a valid
+		 * vote and 0x00 if not. This is kind of hacky and a660 specific
+		 * but we can clean it up when we add a new target
+		 */
+		if (bcms[i].fixed) {
+			if (!ab && !ib)
+				data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0);
+			else
+				data[i] = BCM_TCS_CMD(commit, true, 0x0, 0x8);
+			continue;
+		}
+
+		/* Multiple the bandwidth by the width of the connection */
+		avg = ((u64) ab) * bcms[i].width;
+
+		/* And then divide by the total width across channels */
+		do_div(avg, bcms[i].buswidth * bcms[i].channels);
+
+		peak = ((u64) ib) * bcms[i].width;
+		do_div(peak, bcms[i].buswidth);
+
+		/* Input bandwidth value is in KBps */
+		x = avg * 1000ULL;
+		do_div(x, bcms[i].unit);
+
+		/* Input bandwidth value is in KBps */
+		y = peak * 1000ULL;
+		do_div(y, bcms[i].unit);
+
+		/*
+		 * If a bandwidth value was specified but the calculation ends
+		 * rounding down to zero, set a minimum level
+		 */
+		if (ab && x == 0)
+			x = 1;
+
+		if (ib && y == 0)
+			y = 1;
+
+		x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK);
+		y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK);
+
+		if (!x && !y)
+			valid = false;
+
+		data[i] = BCM_TCS_CMD(commit, valid, x, y);
+	}
+}
+
+static void free_rpmh_bw_votes(struct rpmh_bw_votes *votes)
+{
+	int i;
+
+	if (!votes)
+		return;
+
+	for (i = 0; votes->cmds && i < votes->num_levels; i++)
+		kfree(votes->cmds[i]);
+
+	kfree(votes->cmds);
+	kfree(votes->addrs);
+	kfree(votes);
+}
+
+/* Build the votes table from the specified bandwidth levels */
+static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms,
+		int bcm_count, u32 *levels, int levels_count)
+{
+	struct rpmh_bw_votes *votes;
+	int i;
+
+	votes = kzalloc(sizeof(*votes), GFP_KERNEL);
+	if (!votes)
+		return ERR_PTR(-ENOMEM);
+
+	votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL);
+	if (!votes->addrs) {
+		free_rpmh_bw_votes(votes);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL);
+	if (!votes->cmds) {
+		free_rpmh_bw_votes(votes);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	votes->num_cmds = bcm_count;
+	votes->num_levels = levels_count;
+
+	/* Get the cmd-db information for each BCM */
+	for (i = 0; i < bcm_count; i++) {
+		size_t l;
+		const struct bcm_data *data;
+
+		data = cmd_db_read_aux_data(bcms[i].name, &l);
+
+		votes->addrs[i] = cmd_db_read_addr(bcms[i].name);
+
+		bcms[i].unit = le32_to_cpu(data->unit);
+		bcms[i].width = le16_to_cpu(data->width);
+		bcms[i].vcd = data->vcd;
+	}
+
+	for (i = 0; i < bcm_count; i++) {
+		if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd)
+			votes->wait_bitmask |= (1 << i);
+	}
+
+	for (i = 0; i < levels_count; i++) {
+		votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL);
+		if (!votes->cmds[i]) {
+			free_rpmh_bw_votes(votes);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		tcs_cmd_data(bcms, bcm_count, 0, levels[i], votes->cmds[i]);
+	}
+
+	return votes;
+}
+
+/*
+ * setup_gmu_arc_votes - Build the gmu voting table
+ * @adreno_dev: Pointer to adreno device
+ * @pri_rail: Pointer to primary power rail vlvl table
+ * @sec_rail: Pointer to second/dependent power rail vlvl table
+ * @freqs: List of GMU frequencies
+ * @vlvls: List of GMU voltage levels
+ *
+ * This function initializes the cx votes for all gmu frequencies
+ * for gmu dcvs
+ */
+static int setup_cx_arc_votes(struct adreno_device *adreno_dev,
+	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+	u32 *freqs, u32 *vlvls)
+{
+	/* Hardcoded values of GMU CX voltage levels */
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hfi *hfi = &gmu->hfi;
+	u16 gmu_cx_vlvl[MAX_CX_LEVELS];
+	u32 cx_votes[MAX_CX_LEVELS];
+	struct hfi_dcvstable_cmd *table = &hfi->dcvs_table;
+	int ret, i;
+
+	gmu_cx_vlvl[0] = 0;
+	gmu_cx_vlvl[1] = vlvls[0];
+	gmu_cx_vlvl[2] = vlvls[1];
+
+	table->gmu_level_num = 3;
+
+	table->cx_votes[0].freq = 0;
+	table->cx_votes[1].freq = freqs[0] / 1000;
+	table->cx_votes[2].freq = freqs[1] / 1000;
+
+	ret = setup_volt_dependency_tbl(cx_votes, pri_rail,
+			sec_rail, gmu_cx_vlvl, table->gmu_level_num);
+	if (!ret) {
+		for (i = 0; i < table->gmu_level_num; i++)
+			table->cx_votes[i].vote = cx_votes[i];
+	}
+
+	return ret;
+}
+
+static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl)
+{
+	u32 i;
+
+	/*
+	 * This means that the Gx level doesn't have a dependency on Cx level.
+	 * Return the same value to disable cx voting at GMU.
+	 */
+	if (vlvl == 0xffffffff) {
+		*hlvl = vlvl;
+		return 0;
+	}
+
+	for (i = 0; i < cx_rail->num; i++) {
+		if (cx_rail->val[i] >= vlvl) {
+			*hlvl = i;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * setup_gx_arc_votes - Build the gpu dcvs voting table
+ * @hfi: Pointer to hfi device
+ * @pri_rail: Pointer to primary power rail vlvl table
+ * @sec_rail: Pointer to second/dependent power rail vlvl table
+ *
+ * This function initializes the gx votes for all gpu frequencies
+ * for gpu dcvs
+ */
+static int setup_gx_arc_votes(struct adreno_device *adreno_dev,
+	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+	struct rpmh_arc_vals *cx_rail)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table;
+	u32 index;
+	u16 vlvl_tbl[MAX_GX_LEVELS];
+	u32 gx_votes[MAX_GX_LEVELS];
+	int ret, i;
+
+	/* Add the zero powerlevel for the perf table */
+	table->gpu_level_num = device->pwrctrl.num_pwrlevels + 1;
+
+	if (table->gpu_level_num > pri_rail->num ||
+		table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) {
+		dev_err(&gmu->pdev->dev,
+			"Defined more GPU DCVS levels than RPMh can support\n");
+		return -ERANGE;
+	}
+
+	memset(vlvl_tbl, 0, sizeof(vlvl_tbl));
+
+	table->gx_votes[0].freq = 0;
+	table->gx_votes[0].cx_vote = 0;
+	/* Disable cx vote in gmu dcvs table if it is not supported in DT */
+	if (pwr->pwrlevels[0].cx_level == 0xffffffff)
+		table->gx_votes[0].cx_vote = 0xffffffff;
+
+	/* GMU power levels are in ascending order */
+	for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) {
+		u32 cx_vlvl = pwr->pwrlevels[i].cx_level;
+
+		vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level;
+		table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000;
+
+		ret = to_cx_hlvl(cx_rail, cx_vlvl,
+				&table->gx_votes[index].cx_vote);
+		if (ret) {
+			dev_err(&gmu->pdev->dev, "Unsupported cx corner: %u\n",
+					cx_vlvl);
+			return ret;
+		}
+	}
+
+	ret = setup_volt_dependency_tbl(gx_votes, pri_rail,
+			sec_rail, vlvl_tbl, table->gpu_level_num);
+	if (!ret) {
+		for (i = 0; i < table->gpu_level_num; i++)
+			table->gx_votes[i].vote = gx_votes[i];
+	}
+
+	return ret;
+
+}
+
+static int build_dcvs_table(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hfi *hfi = &gmu->hfi;
+	struct rpmh_arc_vals gx_arc, cx_arc, mx_arc;
+	int ret;
+
+	ret = CMD_MSG_HDR(hfi->dcvs_table, H2F_MSG_PERF_TBL);
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl");
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&cx_arc, "cx.lvl");
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&mx_arc, "mx.lvl");
+	if (ret)
+		return ret;
+
+	ret = setup_cx_arc_votes(adreno_dev, &cx_arc, &mx_arc,
+					gmu->freqs, gmu->vlvls);
+	if (ret)
+		return ret;
+
+	return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc);
+}
+
+/*
+ * List of Bus Control Modules (BCMs) that need to be configured for the GPU
+ * to access DDR. For each bus level we will generate a vote each BC
+ */
+static struct bcm a660_ddr_bcms[] = {
+	{ .name = "SH0", .buswidth = 16 },
+	{ .name = "MC0", .buswidth = 4 },
+	{ .name = "ACV", .fixed = true },
+};
+
+/* Same as above, but for the CNOC BCMs */
+static struct bcm a660_cnoc_bcms[] = {
+	{ .name = "CN0", .buswidth = 4 },
+};
+
+static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd,
+		struct rpmh_bw_votes *ddr, struct rpmh_bw_votes *cnoc)
+{
+	u32 i, j;
+
+	cmd->bw_level_num = ddr->num_levels;
+	cmd->ddr_cmds_num = ddr->num_cmds;
+	cmd->ddr_wait_bitmask = ddr->wait_bitmask;
+
+	for (i = 0; i < ddr->num_cmds; i++)
+		cmd->ddr_cmd_addrs[i] = ddr->addrs[i];
+
+	for (i = 0; i < ddr->num_levels; i++)
+		for (j = 0; j < ddr->num_cmds; j++)
+			cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j];
+
+	if (!cnoc)
+		return;
+
+	cmd->cnoc_cmds_num = cnoc->num_cmds;
+		cmd->cnoc_wait_bitmask = cnoc->wait_bitmask;
+
+	for (i = 0; i < cnoc->num_cmds; i++)
+		cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i];
+
+	for (i = 0; i < cnoc->num_levels; i++)
+		for (j = 0; j < cnoc->num_cmds; j++)
+			cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j];
+}
+
+static int build_bw_table(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct rpmh_bw_votes *ddr, *cnoc = NULL;
+	u32 *cnoc_table;
+	u32 count;
+	int ret;
+
+	ddr = build_rpmh_bw_votes(a660_ddr_bcms, ARRAY_SIZE(a660_ddr_bcms),
+		pwr->ddr_table, pwr->ddr_table_count);
+	if (IS_ERR(ddr))
+		return PTR_ERR(ddr);
+
+	cnoc_table = kgsl_bus_get_table(device->pdev, "qcom,bus-table-cnoc",
+		&count);
+
+	if (count > 0)
+		cnoc = build_rpmh_bw_votes(a660_cnoc_bcms,
+			ARRAY_SIZE(a660_cnoc_bcms), cnoc_table, count);
+
+	kfree(cnoc_table);
+
+	if (IS_ERR(cnoc)) {
+		free_rpmh_bw_votes(ddr);
+		return PTR_ERR(cnoc);
+	}
+
+	ret = CMD_MSG_HDR(gmu->hfi.bw_table, H2F_MSG_BW_VOTE_TBL);
+	if (ret)
+		return ret;
+
+	build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc);
+
+	free_rpmh_bw_votes(ddr);
+	free_rpmh_bw_votes(cnoc);
+
+	return 0;
+}
+
+int a6xx_build_rpmh_tables(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = build_dcvs_table(adreno_dev);
+	if (ret)
+		return ret;
+
+	return build_bw_table(adreno_dev);
+}

+ 2306 - 0
qcom/opensource/graphics-kernel/adreno_a6xx_snapshot.c

@@ -0,0 +1,2306 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_snapshot.h"
+
+#define A6XX_NUM_CTXTS 2
+#define A6XX_NUM_AXI_ARB_BLOCKS 2
+#define A6XX_NUM_XIN_AXI_BLOCKS 5
+#define A6XX_NUM_XIN_CORE_BLOCKS 4
+
+static const unsigned int a6xx_gras_cluster[] = {
+	0x8000, 0x8006, 0x8010, 0x8092, 0x8094, 0x809D, 0x80A0, 0x80A6,
+	0x80AF, 0x80F1, 0x8100, 0x8107, 0x8109, 0x8109, 0x8110, 0x8110,
+	0x8400, 0x840B,
+};
+
+static const unsigned int a6xx_ps_cluster_rac[] = {
+	0x8800, 0x8806, 0x8809, 0x8811, 0x8818, 0x881E, 0x8820, 0x8865,
+	0x8870, 0x8879, 0x8880, 0x8889, 0x8890, 0x8891, 0x8898, 0x8898,
+	0x88C0, 0x88C1, 0x88D0, 0x88E3, 0x8900, 0x890C, 0x890F, 0x891A,
+	0x8C00, 0x8C01, 0x8C08, 0x8C10, 0x8C17, 0x8C1F, 0x8C26, 0x8C33,
+};
+
+static const unsigned int a6xx_ps_cluster_rbp[] = {
+	0x88F0, 0x88F3, 0x890D, 0x890E, 0x8927, 0x8928, 0x8BF0, 0x8BF1,
+	0x8C02, 0x8C07, 0x8C11, 0x8C16, 0x8C20, 0x8C25,
+};
+
+static const unsigned int a6xx_vpc_ps_cluster[] = {
+	0x9200, 0x9216, 0x9218, 0x9236, 0x9300, 0x9306,
+};
+
+static const unsigned int a6xx_fe_cluster[] = {
+	0x9300, 0x9306, 0x9800, 0x9806, 0x9B00, 0x9B07, 0xA000, 0xA009,
+	0xA00E, 0xA0EF, 0xA0F8, 0xA0F8,
+};
+
+static const unsigned int a660_fe_cluster[] = {
+	0x9807, 0x9807,
+};
+
+static const unsigned int a6xx_pc_vs_cluster[] = {
+	0x9100, 0x9108, 0x9300, 0x9306, 0x9980, 0x9981, 0x9B00, 0x9B07,
+};
+
+static const unsigned int a650_isense_registers[] = {
+	0x22C00, 0x22C19, 0x22C26, 0x22C2D, 0x22C2F, 0x22C36, 0x22C40, 0x22C44,
+	0x22C50, 0x22C57, 0x22C60, 0x22C67, 0x22C80, 0x22C87, 0x22D25, 0x22D2A,
+	0x22D2C, 0x22D32, 0x22D3E, 0x22D3F, 0x22D4E, 0x22D55, 0x22D58, 0x22D60,
+	0x22D64, 0x22D64, 0x22D66, 0x22D66, 0x22D68, 0x22D6B, 0x22D6E, 0x22D76,
+	0x22D78, 0x22D78, 0x22D80, 0x22D87, 0x22D90, 0x22D97, 0x22DA0, 0x22DA0,
+	0x22DB0, 0x22DB7, 0x22DC0, 0x22DC2, 0x22DC4, 0x22DE3, 0x2301A, 0x2301A,
+	0x2301D, 0x2302A, 0x23120, 0x23121, 0x23133, 0x23133, 0x23156, 0x23157,
+	0x23165, 0x23165, 0x2316D, 0x2316D, 0x23180, 0x23191,
+};
+
+static const struct sel_reg {
+	unsigned int host_reg;
+	unsigned int cd_reg;
+	unsigned int val;
+} _a6xx_rb_rac_aperture = {
+	.host_reg = A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0x0,
+},
+_a6xx_rb_rbp_aperture = {
+	.host_reg = A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0x9,
+};
+
+static struct a6xx_cluster_registers {
+	unsigned int id;
+	const unsigned int *regs;
+	unsigned int num_sets;
+	const struct sel_reg *sel;
+	unsigned int offset0;
+	unsigned int offset1;
+} a6xx_clusters[] = {
+	{ CP_CLUSTER_GRAS, a6xx_gras_cluster, ARRAY_SIZE(a6xx_gras_cluster)/2,
+		NULL },
+	{ CP_CLUSTER_PS, a6xx_ps_cluster_rac, ARRAY_SIZE(a6xx_ps_cluster_rac)/2,
+		&_a6xx_rb_rac_aperture },
+	{ CP_CLUSTER_PS, a6xx_ps_cluster_rbp, ARRAY_SIZE(a6xx_ps_cluster_rbp)/2,
+		&_a6xx_rb_rbp_aperture },
+	{ CP_CLUSTER_PS, a6xx_vpc_ps_cluster, ARRAY_SIZE(a6xx_vpc_ps_cluster)/2,
+		NULL },
+	{ CP_CLUSTER_FE, a6xx_fe_cluster, ARRAY_SIZE(a6xx_fe_cluster)/2,
+		NULL },
+	{ CP_CLUSTER_PC_VS, a6xx_pc_vs_cluster,
+		ARRAY_SIZE(a6xx_pc_vs_cluster)/2, NULL },
+	{ CP_CLUSTER_FE, a660_fe_cluster, ARRAY_SIZE(a660_fe_cluster)/2,
+		NULL },
+};
+
+struct a6xx_cluster_regs_info {
+	struct a6xx_cluster_registers *cluster;
+	unsigned int ctxt_id;
+};
+
+static const unsigned int a6xx_sp_vs_hlsq_cluster[] = {
+	0xB800, 0xB803, 0xB820, 0xB822,
+};
+
+static const unsigned int a6xx_sp_vs_sp_cluster[] = {
+	0xA800, 0xA824, 0xA830, 0xA83C, 0xA840, 0xA864, 0xA870, 0xA895,
+	0xA8A0, 0xA8AF, 0xA8C0, 0xA8C3,
+};
+
+static const unsigned int a6xx_hlsq_duplicate_cluster[] = {
+	0xBB10, 0xBB11, 0xBB20, 0xBB29,
+};
+
+static const unsigned int a6xx_sp_duplicate_cluster[] = {
+	0xAB00, 0xAB00, 0xAB04, 0xAB05, 0xAB10, 0xAB1B, 0xAB20, 0xAB20,
+};
+
+static const unsigned int a6xx_tp_duplicate_cluster[] = {
+	0xB300, 0xB307, 0xB309, 0xB309, 0xB380, 0xB382,
+};
+
+static const unsigned int a6xx_sp_ps_hlsq_cluster[] = {
+	0xB980, 0xB980, 0xB982, 0xB987, 0xB990, 0xB99B, 0xB9A0, 0xB9A2,
+	0xB9C0, 0xB9C9,
+};
+
+static const unsigned int a6xx_sp_ps_sp_cluster[] = {
+	0xA980, 0xA9A8, 0xA9B0, 0xA9BC, 0xA9D0, 0xA9D3, 0xA9E0, 0xA9F3,
+	0xAA00, 0xAA00, 0xAA30, 0xAA31, 0xAAF2, 0xAAF2,
+};
+
+static const unsigned int a6xx_sp_ps_sp_2d_cluster[] = {
+	0xACC0, 0xACC0,
+};
+
+static const unsigned int a6xx_sp_ps_tp_cluster[] = {
+	0xB180, 0xB183, 0xB190, 0xB191,
+};
+
+static const unsigned int a6xx_sp_ps_tp_2d_cluster[] = {
+	0xB4C0, 0xB4D1,
+};
+
+static struct a6xx_cluster_dbgahb_registers {
+	unsigned int id;
+	unsigned int regbase;
+	unsigned int statetype;
+	const unsigned int *regs;
+	unsigned int num_sets;
+	unsigned int offset0;
+	unsigned int offset1;
+} a6xx_dbgahb_ctx_clusters[] = {
+	{ CP_CLUSTER_SP_VS, 0x0002E000, 0x41, a6xx_sp_vs_hlsq_cluster,
+		ARRAY_SIZE(a6xx_sp_vs_hlsq_cluster) / 2 },
+	{ CP_CLUSTER_SP_VS, 0x0002A000, 0x21, a6xx_sp_vs_sp_cluster,
+		ARRAY_SIZE(a6xx_sp_vs_sp_cluster) / 2 },
+	{ CP_CLUSTER_SP_VS, 0x0002E000, 0x41, a6xx_hlsq_duplicate_cluster,
+		ARRAY_SIZE(a6xx_hlsq_duplicate_cluster) / 2 },
+	{ CP_CLUSTER_SP_VS, 0x0002A000, 0x21, a6xx_sp_duplicate_cluster,
+		ARRAY_SIZE(a6xx_sp_duplicate_cluster) / 2 },
+	{ CP_CLUSTER_SP_VS, 0x0002C000, 0x1, a6xx_tp_duplicate_cluster,
+		ARRAY_SIZE(a6xx_tp_duplicate_cluster) / 2 },
+	{ CP_CLUSTER_SP_PS, 0x0002E000, 0x42, a6xx_sp_ps_hlsq_cluster,
+		ARRAY_SIZE(a6xx_sp_ps_hlsq_cluster) / 2 },
+	{ CP_CLUSTER_SP_PS, 0x0002A000, 0x22, a6xx_sp_ps_sp_cluster,
+		ARRAY_SIZE(a6xx_sp_ps_sp_cluster) / 2 },
+	{ CP_CLUSTER_SP_PS, 0x0002B000, 0x26, a6xx_sp_ps_sp_2d_cluster,
+		ARRAY_SIZE(a6xx_sp_ps_sp_2d_cluster) / 2 },
+	{ CP_CLUSTER_SP_PS, 0x0002C000, 0x2, a6xx_sp_ps_tp_cluster,
+		ARRAY_SIZE(a6xx_sp_ps_tp_cluster) / 2 },
+	{ CP_CLUSTER_SP_PS, 0x0002D000, 0x6, a6xx_sp_ps_tp_2d_cluster,
+		ARRAY_SIZE(a6xx_sp_ps_tp_2d_cluster) / 2 },
+	{ CP_CLUSTER_SP_PS, 0x0002E000, 0x42, a6xx_hlsq_duplicate_cluster,
+		ARRAY_SIZE(a6xx_hlsq_duplicate_cluster) / 2 },
+	{ CP_CLUSTER_SP_PS, 0x0002A000, 0x22, a6xx_sp_duplicate_cluster,
+		ARRAY_SIZE(a6xx_sp_duplicate_cluster) / 2 },
+	{ CP_CLUSTER_SP_PS, 0x0002C000, 0x2, a6xx_tp_duplicate_cluster,
+		ARRAY_SIZE(a6xx_tp_duplicate_cluster) / 2 },
+};
+
+struct a6xx_cluster_dbgahb_regs_info {
+	struct a6xx_cluster_dbgahb_registers *cluster;
+	unsigned int ctxt_id;
+};
+
+static const unsigned int a6xx_hlsq_non_ctx_registers[] = {
+	0xBE00, 0xBE01, 0xBE04, 0xBE05, 0xBE08, 0xBE09, 0xBE10, 0xBE15,
+	0xBE20, 0xBE23,
+};
+
+static const unsigned int a6xx_sp_non_ctx_registers[] = {
+	0xAE00, 0xAE04, 0xAE0C, 0xAE0C, 0xAE0F, 0xAE2B, 0xAE30, 0xAE32,
+	0xAE35, 0xAE35, 0xAE3A, 0xAE3F, 0xAE50, 0xAE52,
+};
+
+static const unsigned int a6xx_tp_non_ctx_registers[] = {
+	0xB600, 0xB601, 0xB604, 0xB605, 0xB610, 0xB61B, 0xB620, 0xB623,
+};
+
+static struct a6xx_non_ctx_dbgahb_registers {
+	unsigned int regbase;
+	unsigned int statetype;
+	const unsigned int *regs;
+	unsigned int num_sets;
+	unsigned int offset;
+} a6xx_non_ctx_dbgahb[] = {
+	{ 0x0002F800, 0x40, a6xx_hlsq_non_ctx_registers,
+		ARRAY_SIZE(a6xx_hlsq_non_ctx_registers) / 2 },
+	{ 0x0002B800, 0x20, a6xx_sp_non_ctx_registers,
+		ARRAY_SIZE(a6xx_sp_non_ctx_registers) / 2 },
+	{ 0x0002D800, 0x0, a6xx_tp_non_ctx_registers,
+		ARRAY_SIZE(a6xx_tp_non_ctx_registers) / 2 },
+};
+
+static const unsigned int a6xx_vbif_registers[] = {
+	/* VBIF */
+	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x302D, 0x3030, 0x3031,
+	0x3034, 0x3036, 0x303C, 0x303D, 0x3040, 0x3040, 0x3042, 0x3042,
+	0x3049, 0x3049, 0x3058, 0x3058, 0x305A, 0x3061, 0x3064, 0x3068,
+	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
+	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
+	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
+	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
+	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x3154, 0x3154,
+	0x3156, 0x3156, 0x3158, 0x3158, 0x315A, 0x315A, 0x315C, 0x315C,
+	0x315E, 0x315E, 0x3160, 0x3160, 0x3162, 0x3162, 0x340C, 0x340C,
+	0x3410, 0x3410, 0x3800, 0x3801,
+};
+
+static const unsigned int a6xx_gbif_registers[] = {
+	/* GBIF */
+	0x3C00, 0X3C0B, 0X3C40, 0X3C47, 0X3CC0, 0X3CD1, 0xE3A, 0xE3A,
+};
+
+static const unsigned int a6xx_gbif_reinit_registers[] = {
+	/* GBIF with REINIT */
+	0x3C00, 0X3C0B, 0X3C40, 0X3C47, 0X3C49, 0X3C4A, 0X3CC0, 0X3CD1,
+	0xE3A, 0xE3A, 0x0016, 0x0017,
+};
+
+static const unsigned int a6xx_rb_rac_registers[] = {
+	0x8E04, 0x8E05, 0x8E07, 0x8E08, 0x8E10, 0x8E1C, 0x8E20, 0x8E25,
+	0x8E28, 0x8E28, 0x8E2C, 0x8E2F, 0x8E50, 0x8E52,
+};
+
+static const unsigned int a6xx_rb_rbp_registers[] = {
+	0x8E01, 0x8E01, 0x8E0C, 0x8E0C, 0x8E3B, 0x8E3E, 0x8E40, 0x8E43,
+	0x8E53, 0x8E5F, 0x8E70, 0x8E77,
+};
+
+/*
+ * Set of registers to dump for A6XX on snapshot.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+
+static const unsigned int a6xx_registers[] = {
+	/* RBBM */
+	0x0000, 0x0002, 0x0010, 0x0010, 0x0012, 0x0012, 0x0018, 0x001B,
+	0x001e, 0x0032, 0x0038, 0x003C, 0x0042, 0x0042, 0x0044, 0x0044,
+	0x0047, 0x0047, 0x0056, 0x0056, 0x00AD, 0x00AE, 0x00B0, 0x00FB,
+	0x0100, 0x011D, 0x0200, 0x020D, 0x0218, 0x023D, 0x0400, 0x04F9,
+	0x0500, 0x0500, 0x0505, 0x050B, 0x050E, 0x0511, 0x0533, 0x0533,
+	0x0540, 0x0555,
+	/* CP */
+	0x0800, 0x0803, 0x0806, 0x0808, 0x0810, 0x0813, 0x0820, 0x0821,
+	0x0823, 0x0824, 0x0826, 0x0827, 0x0830, 0x0833, 0x0840, 0x0845,
+	0x084F, 0x086F, 0x0880, 0x088A, 0x08A0, 0x08AB, 0x08C0, 0x08C4,
+	0x08D0, 0x08DD, 0x08F0, 0x08F3, 0x0900, 0x0903, 0x0908, 0x0911,
+	0x0928, 0x093E, 0x0942, 0x094D, 0x0980, 0x0984, 0x098D, 0x0996,
+	0x0998, 0x099E, 0x09A0, 0x09A6, 0x09A8, 0x09AE, 0x09B0, 0x09B1,
+	0x09C2, 0x09C8, 0x0A00, 0x0A03,
+	/* VSC */
+	0x0C00, 0x0C04, 0x0C06, 0x0C06, 0x0C10, 0x0CD9, 0x0E00, 0x0E0E,
+	/* UCHE */
+	0x0E10, 0x0E13, 0x0E17, 0x0E19, 0x0E1C, 0x0E2B, 0x0E30, 0x0E32,
+	0x0E38, 0x0E39,
+	/* GRAS */
+	0x8600, 0x8601, 0x8610, 0x861B, 0x8620, 0x8620, 0x8628, 0x862B,
+	0x8630, 0x8637,
+	/* VPC */
+	0x9600, 0x9604, 0x9624, 0x9637,
+	/* PC */
+	0x9E00, 0x9E01, 0x9E03, 0x9E0E, 0x9E11, 0x9E16, 0x9E19, 0x9E19,
+	0x9E1C, 0x9E1C, 0x9E20, 0x9E23, 0x9E30, 0x9E31, 0x9E34, 0x9E34,
+	0x9E70, 0x9E72, 0x9E78, 0x9E79, 0x9E80, 0x9FFF,
+	/* VFD */
+	0xA600, 0xA601, 0xA603, 0xA603, 0xA60A, 0xA60A, 0xA610, 0xA617,
+	0xA630, 0xA630,
+	/* HLSQ */
+	0xD002, 0xD003,
+};
+
+static const unsigned int a660_registers[] = {
+	/* UCHE */
+	0x0E3C, 0x0E3C,
+	/* LPAC RBBM */
+	0x05FC, 0x05FF,
+	/* LPAC CP */
+	0x0B00, 0x0B40, 0x0B80, 0x0B83,
+};
+
+/*
+ * Set of registers to dump for A6XX before actually triggering crash dumper.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+static const unsigned int a6xx_pre_crashdumper_registers[] = {
+	/* RBBM: RBBM_STATUS - RBBM_STATUS3 */
+	0x210, 0x213,
+	/* CP: CP_STATUS_1 */
+	0x825, 0x825,
+};
+
+static const unsigned int a6xx_gmu_wrapper_registers[] = {
+	/* GMU CX */
+	0x1f840, 0x1f840, 0x1f844, 0x1f845, 0x1f887, 0x1f889, 0x1f8d0, 0x1f8d0,
+	/* GMU AO*/
+	0x23b0C, 0x23b0E, 0x23b15, 0x23b15,
+};
+
+static const unsigned int a6xx_holi_gmu_wrapper_registers[] = {
+	/* GMU SPTPRAC */
+	0x1a880, 0x1a881,
+	/* GMU CX */
+	0x1f840, 0x1f840, 0x1f844, 0x1f845, 0x1f887, 0x1f889, 0x1f8d0, 0x1f8d0,
+	/* GMU AO*/
+	0x23b0c, 0x23b0e, 0x23b15, 0x23b15,
+};
+
+enum a6xx_debugbus_id {
+	A6XX_DBGBUS_CP           = 0x1,
+	A6XX_DBGBUS_RBBM         = 0x2,
+	A6XX_DBGBUS_VBIF         = 0x3,
+	A6XX_DBGBUS_HLSQ         = 0x4,
+	A6XX_DBGBUS_UCHE         = 0x5,
+	A6XX_DBGBUS_DPM          = 0x6,
+	A6XX_DBGBUS_TESS         = 0x7,
+	A6XX_DBGBUS_PC           = 0x8,
+	A6XX_DBGBUS_VFDP         = 0x9,
+	A6XX_DBGBUS_VPC          = 0xa,
+	A6XX_DBGBUS_TSE          = 0xb,
+	A6XX_DBGBUS_RAS          = 0xc,
+	A6XX_DBGBUS_VSC          = 0xd,
+	A6XX_DBGBUS_COM          = 0xe,
+	A6XX_DBGBUS_LRZ          = 0x10,
+	A6XX_DBGBUS_A2D          = 0x11,
+	A6XX_DBGBUS_CCUFCHE      = 0x12,
+	A6XX_DBGBUS_GMU_CX       = 0x13,
+	A6XX_DBGBUS_RBP          = 0x14,
+	A6XX_DBGBUS_DCS          = 0x15,
+	A6XX_DBGBUS_RBBM_CFG     = 0x16,
+	A6XX_DBGBUS_CX           = 0x17,
+	A6XX_DBGBUS_GMU_GX       = 0x18,
+	A6XX_DBGBUS_TPFCHE       = 0x19,
+	A6XX_DBGBUS_GBIF_GX      = 0x1a,
+	A6XX_DBGBUS_GPC          = 0x1d,
+	A6XX_DBGBUS_LARC         = 0x1e,
+	A6XX_DBGBUS_HLSQ_SPTP    = 0x1f,
+	A6XX_DBGBUS_RB_0         = 0x20,
+	A6XX_DBGBUS_RB_1         = 0x21,
+	A6XX_DBGBUS_RB_2         = 0x22,
+	A6XX_DBGBUS_UCHE_WRAPPER = 0x24,
+	A6XX_DBGBUS_CCU_0        = 0x28,
+	A6XX_DBGBUS_CCU_1        = 0x29,
+	A6XX_DBGBUS_CCU_2        = 0x2a,
+	A6XX_DBGBUS_VFD_0        = 0x38,
+	A6XX_DBGBUS_VFD_1        = 0x39,
+	A6XX_DBGBUS_VFD_2        = 0x3a,
+	A6XX_DBGBUS_VFD_3        = 0x3b,
+	A6XX_DBGBUS_VFD_4        = 0x3c,
+	A6XX_DBGBUS_VFD_5        = 0x3d,
+	A6XX_DBGBUS_SP_0         = 0x40,
+	A6XX_DBGBUS_SP_1         = 0x41,
+	A6XX_DBGBUS_SP_2         = 0x42,
+	A6XX_DBGBUS_TPL1_0       = 0x48,
+	A6XX_DBGBUS_TPL1_1       = 0x49,
+	A6XX_DBGBUS_TPL1_2       = 0x4a,
+	A6XX_DBGBUS_TPL1_3       = 0x4b,
+	A6XX_DBGBUS_TPL1_4       = 0x4c,
+	A6XX_DBGBUS_TPL1_5       = 0x4d,
+	A6XX_DBGBUS_SPTP_0       = 0x58,
+	A6XX_DBGBUS_SPTP_1       = 0x59,
+	A6XX_DBGBUS_SPTP_2       = 0x5a,
+	A6XX_DBGBUS_SPTP_3       = 0x5b,
+	A6XX_DBGBUS_SPTP_4       = 0x5c,
+	A6XX_DBGBUS_SPTP_5       = 0x5d,
+};
+
+static const struct adreno_debugbus_block a6xx_dbgc_debugbus_blocks[] = {
+	{ A6XX_DBGBUS_CP, 0x100, },
+	{ A6XX_DBGBUS_RBBM, 0x100, },
+	{ A6XX_DBGBUS_HLSQ, 0x100, },
+	{ A6XX_DBGBUS_UCHE, 0x100, },
+	{ A6XX_DBGBUS_DPM, 0x100, },
+	{ A6XX_DBGBUS_TESS, 0x100, },
+	{ A6XX_DBGBUS_PC, 0x100, },
+	{ A6XX_DBGBUS_VFDP, 0x100, },
+	{ A6XX_DBGBUS_VPC, 0x100, },
+	{ A6XX_DBGBUS_TSE, 0x100, },
+	{ A6XX_DBGBUS_RAS, 0x100, },
+	{ A6XX_DBGBUS_VSC, 0x100, },
+	{ A6XX_DBGBUS_COM, 0x100, },
+	{ A6XX_DBGBUS_LRZ, 0x100, },
+	{ A6XX_DBGBUS_A2D, 0x100, },
+	{ A6XX_DBGBUS_CCUFCHE, 0x100, },
+	{ A6XX_DBGBUS_RBP, 0x100, },
+	{ A6XX_DBGBUS_DCS, 0x100, },
+	{ A6XX_DBGBUS_RBBM_CFG, 0x100, },
+	{ A6XX_DBGBUS_GMU_GX, 0x100, },
+	{ A6XX_DBGBUS_TPFCHE, 0x100, },
+	{ A6XX_DBGBUS_GPC, 0x100, },
+	{ A6XX_DBGBUS_LARC, 0x100, },
+	{ A6XX_DBGBUS_HLSQ_SPTP, 0x100, },
+	{ A6XX_DBGBUS_RB_0, 0x100, },
+	{ A6XX_DBGBUS_RB_1, 0x100, },
+	{ A6XX_DBGBUS_UCHE_WRAPPER, 0x100, },
+	{ A6XX_DBGBUS_CCU_0, 0x100, },
+	{ A6XX_DBGBUS_CCU_1, 0x100, },
+	{ A6XX_DBGBUS_VFD_0, 0x100, },
+	{ A6XX_DBGBUS_VFD_1, 0x100, },
+	{ A6XX_DBGBUS_VFD_2, 0x100, },
+	{ A6XX_DBGBUS_VFD_3, 0x100, },
+	{ A6XX_DBGBUS_SP_0, 0x100, },
+	{ A6XX_DBGBUS_SP_1, 0x100, },
+	{ A6XX_DBGBUS_TPL1_0, 0x100, },
+	{ A6XX_DBGBUS_TPL1_1, 0x100, },
+	{ A6XX_DBGBUS_TPL1_2, 0x100, },
+	{ A6XX_DBGBUS_TPL1_3, 0x100, },
+};
+
+static const struct adreno_debugbus_block a6xx_vbif_debugbus_blocks = {
+	A6XX_DBGBUS_VBIF, 0x100,
+};
+
+static const struct adreno_debugbus_block a6xx_cx_dbgc_debugbus_blocks[] = {
+	{ A6XX_DBGBUS_GMU_CX, 0x100, },
+	{ A6XX_DBGBUS_CX, 0x100, },
+};
+
+static const struct adreno_debugbus_block a650_dbgc_debugbus_blocks[] = {
+	{ A6XX_DBGBUS_RB_2, 0x100, },
+	{ A6XX_DBGBUS_CCU_2, 0x100, },
+	{ A6XX_DBGBUS_VFD_4, 0x100, },
+	{ A6XX_DBGBUS_VFD_5, 0x100, },
+	{ A6XX_DBGBUS_SP_2, 0x100, },
+	{ A6XX_DBGBUS_TPL1_4, 0x100, },
+	{ A6XX_DBGBUS_TPL1_5, 0x100, },
+	{ A6XX_DBGBUS_SPTP_0, 0x100, },
+	{ A6XX_DBGBUS_SPTP_1, 0x100, },
+	{ A6XX_DBGBUS_SPTP_2, 0x100, },
+	{ A6XX_DBGBUS_SPTP_3, 0x100, },
+	{ A6XX_DBGBUS_SPTP_4, 0x100, },
+	{ A6XX_DBGBUS_SPTP_5, 0x100, },
+};
+
+#define A6XX_NUM_SHADER_BANKS 3
+#define A6XX_SHADER_STATETYPE_SHIFT 8
+
+enum a6xx_shader_obj {
+	A6XX_TP0_TMO_DATA               = 0x9,
+	A6XX_TP0_SMO_DATA               = 0xa,
+	A6XX_TP0_MIPMAP_BASE_DATA       = 0xb,
+	A6XX_TP1_TMO_DATA               = 0x19,
+	A6XX_TP1_SMO_DATA               = 0x1a,
+	A6XX_TP1_MIPMAP_BASE_DATA       = 0x1b,
+	A6XX_SP_INST_DATA               = 0x29,
+	A6XX_SP_LB_0_DATA               = 0x2a,
+	A6XX_SP_LB_1_DATA               = 0x2b,
+	A6XX_SP_LB_2_DATA               = 0x2c,
+	A6XX_SP_LB_3_DATA               = 0x2d,
+	A6XX_SP_LB_4_DATA               = 0x2e,
+	A6XX_SP_LB_5_DATA               = 0x2f,
+	A6XX_SP_CB_BINDLESS_DATA        = 0x30,
+	A6XX_SP_CB_LEGACY_DATA          = 0x31,
+	A6XX_SP_UAV_DATA                = 0x32,
+	A6XX_SP_INST_TAG                = 0x33,
+	A6XX_SP_CB_BINDLESS_TAG         = 0x34,
+	A6XX_SP_TMO_UMO_TAG             = 0x35,
+	A6XX_SP_SMO_TAG                 = 0x36,
+	A6XX_SP_STATE_DATA              = 0x37,
+	A6XX_HLSQ_CHUNK_CVS_RAM         = 0x49,
+	A6XX_HLSQ_CHUNK_CPS_RAM         = 0x4a,
+	A6XX_HLSQ_CHUNK_CVS_RAM_TAG     = 0x4b,
+	A6XX_HLSQ_CHUNK_CPS_RAM_TAG     = 0x4c,
+	A6XX_HLSQ_ICB_CVS_CB_BASE_TAG   = 0x4d,
+	A6XX_HLSQ_ICB_CPS_CB_BASE_TAG   = 0x4e,
+	A6XX_HLSQ_CVS_MISC_RAM          = 0x50,
+	A6XX_HLSQ_CPS_MISC_RAM          = 0x51,
+	A6XX_HLSQ_INST_RAM              = 0x52,
+	A6XX_HLSQ_GFX_CVS_CONST_RAM     = 0x53,
+	A6XX_HLSQ_GFX_CPS_CONST_RAM     = 0x54,
+	A6XX_HLSQ_CVS_MISC_RAM_TAG      = 0x55,
+	A6XX_HLSQ_CPS_MISC_RAM_TAG      = 0x56,
+	A6XX_HLSQ_INST_RAM_TAG          = 0x57,
+	A6XX_HLSQ_GFX_CVS_CONST_RAM_TAG = 0x58,
+	A6XX_HLSQ_GFX_CPS_CONST_RAM_TAG = 0x59,
+	A6XX_HLSQ_PWR_REST_RAM          = 0x5a,
+	A6XX_HLSQ_PWR_REST_TAG          = 0x5b,
+	A6XX_HLSQ_DATAPATH_META         = 0x60,
+	A6XX_HLSQ_FRONTEND_META         = 0x61,
+	A6XX_HLSQ_INDIRECT_META         = 0x62,
+	A6XX_HLSQ_BACKEND_META          = 0x63,
+	A6XX_SP_LB_6_DATA               = 0x70,
+	A6XX_SP_LB_7_DATA               = 0x71,
+	A6XX_HLSQ_INST_RAM_1            = 0x73,
+};
+
+struct a6xx_shader_block {
+	unsigned int statetype;
+	unsigned int sz;
+	uint64_t offset;
+};
+
+struct a6xx_shader_block_info {
+	struct a6xx_shader_block *block;
+	unsigned int bank;
+	uint64_t offset;
+};
+
+static struct a6xx_shader_block a6xx_shader_blocks[] = {
+	{A6XX_TP0_TMO_DATA,               0x200},
+	{A6XX_TP0_SMO_DATA,               0x80,},
+	{A6XX_TP0_MIPMAP_BASE_DATA,       0x3C0},
+	{A6XX_TP1_TMO_DATA,               0x200},
+	{A6XX_TP1_SMO_DATA,               0x80,},
+	{A6XX_TP1_MIPMAP_BASE_DATA,       0x3C0},
+	{A6XX_SP_INST_DATA,               0x800},
+	{A6XX_SP_LB_0_DATA,               0x800},
+	{A6XX_SP_LB_1_DATA,               0x800},
+	{A6XX_SP_LB_2_DATA,               0x800},
+	{A6XX_SP_LB_3_DATA,               0x800},
+	{A6XX_SP_LB_4_DATA,               0x800},
+	{A6XX_SP_LB_5_DATA,               0x200},
+	{A6XX_SP_CB_BINDLESS_DATA,        0x800},
+	{A6XX_SP_CB_LEGACY_DATA,          0x280,},
+	{A6XX_SP_UAV_DATA,                0x80,},
+	{A6XX_SP_INST_TAG,                0x80,},
+	{A6XX_SP_CB_BINDLESS_TAG,         0x80,},
+	{A6XX_SP_TMO_UMO_TAG,             0x80,},
+	{A6XX_SP_SMO_TAG,                 0x80},
+	{A6XX_SP_STATE_DATA,              0x3F},
+	{A6XX_HLSQ_CHUNK_CVS_RAM,         0x1C0},
+	{A6XX_HLSQ_CHUNK_CPS_RAM,         0x280},
+	{A6XX_HLSQ_CHUNK_CVS_RAM_TAG,     0x40,},
+	{A6XX_HLSQ_CHUNK_CPS_RAM_TAG,     0x40,},
+	{A6XX_HLSQ_ICB_CVS_CB_BASE_TAG,   0x4,},
+	{A6XX_HLSQ_ICB_CPS_CB_BASE_TAG,   0x4,},
+	{A6XX_HLSQ_CVS_MISC_RAM,          0x1C0},
+	{A6XX_HLSQ_CPS_MISC_RAM,          0x580},
+	{A6XX_HLSQ_INST_RAM,              0x800},
+	{A6XX_HLSQ_GFX_CVS_CONST_RAM,     0x800},
+	{A6XX_HLSQ_GFX_CPS_CONST_RAM,     0x800},
+	{A6XX_HLSQ_CVS_MISC_RAM_TAG,      0x8,},
+	{A6XX_HLSQ_CPS_MISC_RAM_TAG,      0x4,},
+	{A6XX_HLSQ_INST_RAM_TAG,          0x80,},
+	{A6XX_HLSQ_GFX_CVS_CONST_RAM_TAG, 0xC,},
+	{A6XX_HLSQ_GFX_CPS_CONST_RAM_TAG, 0x10},
+	{A6XX_HLSQ_PWR_REST_RAM,          0x28},
+	{A6XX_HLSQ_PWR_REST_TAG,          0x14},
+	{A6XX_HLSQ_DATAPATH_META,         0x40,},
+	{A6XX_HLSQ_FRONTEND_META,         0x40},
+	{A6XX_HLSQ_INDIRECT_META,         0x40,},
+	{A6XX_SP_LB_6_DATA,               0x200},
+	{A6XX_SP_LB_7_DATA,               0x200},
+	{A6XX_HLSQ_INST_RAM_1,            0x200},
+};
+
+static struct kgsl_memdesc *a6xx_capturescript;
+static struct kgsl_memdesc *a6xx_crashdump_registers;
+static bool crash_dump_valid;
+static u32 *a6xx_cd_reg_end;
+
+static struct reg_list {
+	const unsigned int *regs;
+	unsigned int count;
+	const struct sel_reg *sel;
+	uint64_t offset;
+} a6xx_reg_list[] = {
+	{ a6xx_registers, ARRAY_SIZE(a6xx_registers) / 2, NULL },
+	{ a660_registers, ARRAY_SIZE(a660_registers) / 2, NULL },
+	{ a6xx_rb_rac_registers, ARRAY_SIZE(a6xx_rb_rac_registers) / 2,
+		&_a6xx_rb_rac_aperture },
+	{ a6xx_rb_rbp_registers, ARRAY_SIZE(a6xx_rb_rbp_registers) / 2,
+		&_a6xx_rb_rbp_aperture },
+};
+
+#define REG_PAIR_COUNT(_a, _i) \
+	(((_a)[(2 * (_i)) + 1] - (_a)[2 * (_i)]) + 1)
+
+static size_t a6xx_legacy_snapshot_registers(struct kgsl_device *device,
+		u8 *buf, size_t remain, struct reg_list *regs)
+{
+	struct kgsl_snapshot_registers snapshot_regs = {
+		.regs = regs->regs,
+		.count = regs->count,
+	};
+
+	if (regs->sel)
+		kgsl_regwrite(device, regs->sel->host_reg, regs->sel->val);
+
+	return kgsl_snapshot_dump_registers(device, buf, remain,
+		&snapshot_regs);
+}
+
+static size_t a6xx_snapshot_registers(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf;
+	struct reg_list *regs = (struct reg_list *)priv;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int *src;
+	unsigned int j, k;
+	unsigned int count = 0;
+
+	if (!crash_dump_valid)
+		return a6xx_legacy_snapshot_registers(device, buf, remain,
+			regs);
+
+	if (remain < sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	src = a6xx_crashdump_registers->hostptr + regs->offset;
+	remain -= sizeof(*header);
+
+	for (j = 0; j < regs->count; j++) {
+		unsigned int start = regs->regs[2 * j];
+		unsigned int end = regs->regs[(2 * j) + 1];
+
+		if (remain < ((end - start) + 1) * 8) {
+			SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+			goto out;
+		}
+
+		remain -= ((end - start) + 1) * 8;
+
+		for (k = start; k <= end; k++, count++) {
+			*data++ = k;
+			*data++ = *src++;
+		}
+	}
+
+out:
+	header->count = count;
+
+	/* Return the size of the section */
+	return (count * 8) + sizeof(*header);
+}
+
+static size_t a6xx_snapshot_pre_crashdump_regs(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_registers pre_cdregs = {
+			.regs = a6xx_pre_crashdumper_registers,
+			.count = ARRAY_SIZE(a6xx_pre_crashdumper_registers)/2,
+	};
+
+	return kgsl_snapshot_dump_registers(device, buf, remain, &pre_cdregs);
+}
+
+static size_t a6xx_legacy_snapshot_shader(struct kgsl_device *device,
+				u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_shader *header =
+		(struct kgsl_snapshot_shader *) buf;
+	struct a6xx_shader_block_info *info =
+		(struct a6xx_shader_block_info *) priv;
+	struct a6xx_shader_block *block = info->block;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int read_sel, val;
+	int i;
+
+	if (!device->snapshot_legacy)
+		return 0;
+
+	if (remain < SHADER_SECTION_SZ(block->sz)) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	/*
+	 * If crashdumper times out, accessing some readback states from
+	 * AHB path might fail. Hence, skip SP_INST_TAG and SP_INST_DATA
+	 * state types during snapshot dump in legacy flow.
+	 */
+	if (adreno_is_a660(ADRENO_DEVICE(device)) &&
+		(block->statetype == A6XX_SP_INST_TAG ||
+		 block->statetype == A6XX_SP_INST_DATA))
+		return 0;
+
+	header->type = block->statetype;
+	header->index = info->bank;
+	header->size = block->sz;
+
+	read_sel = (block->statetype << A6XX_SHADER_STATETYPE_SHIFT) |
+		info->bank;
+	kgsl_regwrite(device, A6XX_HLSQ_DBG_READ_SEL, read_sel);
+
+	/*
+	 * An explicit barrier is needed so that reads do not happen before
+	 * the register write.
+	 */
+	mb();
+
+	for (i = 0; i < block->sz; i++) {
+		kgsl_regread(device, (A6XX_HLSQ_DBG_AHB_READ_APERTURE + i),
+			&val);
+		*data++ = val;
+	}
+
+	return SHADER_SECTION_SZ(block->sz);
+}
+
+static size_t a6xx_snapshot_shader_memory(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_shader *header =
+		(struct kgsl_snapshot_shader *) buf;
+	struct a6xx_shader_block_info *info =
+		(struct a6xx_shader_block_info *) priv;
+	struct a6xx_shader_block *block = info->block;
+	unsigned int *data = (unsigned int *) (buf + sizeof(*header));
+
+	if (!crash_dump_valid)
+		return a6xx_legacy_snapshot_shader(device, buf, remain, priv);
+
+	if (remain < SHADER_SECTION_SZ(block->sz)) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	header->type = block->statetype;
+	header->index = info->bank;
+	header->size = block->sz;
+
+	memcpy(data, a6xx_crashdump_registers->hostptr + info->offset,
+		block->sz * sizeof(unsigned int));
+
+	return SHADER_SECTION_SZ(block->sz);
+}
+
+static void a6xx_snapshot_shader(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot)
+{
+	unsigned int i, j;
+	struct a6xx_shader_block_info info;
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) {
+		for (j = 0; j < A6XX_NUM_SHADER_BANKS; j++) {
+			info.block = &a6xx_shader_blocks[i];
+			info.bank = j;
+			info.offset = a6xx_shader_blocks[i].offset +
+				(j * a6xx_shader_blocks[i].sz);
+
+			/* Shader working/shadow memory */
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_SHADER,
+				snapshot, a6xx_snapshot_shader_memory, &info);
+		}
+	}
+}
+
+static void a650_snapshot_mempool(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot)
+{
+	u32 val;
+
+	/* set CP_CHICKEN_DBG[StabilizeMVC] to stabilize it while dumping */
+	kgsl_regread(device, A6XX_CP_CHICKEN_DBG, &val);
+	kgsl_regwrite(device, A6XX_CP_CHICKEN_DBG, val | BIT(2));
+
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A6XX_CP_MEM_POOL_DBG_ADDR, A6XX_CP_MEM_POOL_DBG_DATA,
+		0, 0x2100);
+
+	kgsl_regwrite(device, A6XX_CP_CHICKEN_DBG, val);
+}
+
+static void a6xx_snapshot_mempool(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot)
+{
+	unsigned int pool_size;
+	u8 *buf = snapshot->ptr;
+
+	/* Set the mempool size to 0 to stabilize it while dumping */
+	kgsl_regread(device, A6XX_CP_MEM_POOL_SIZE, &pool_size);
+	kgsl_regwrite(device, A6XX_CP_MEM_POOL_SIZE, 0);
+
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A6XX_CP_MEM_POOL_DBG_ADDR, A6XX_CP_MEM_POOL_DBG_DATA,
+		0, 0x2100);
+
+	/*
+	 * Data at offset 0x2000 in the mempool section is the mempool size.
+	 * Since we set it to 0, patch in the original size so that the data
+	 * is consistent.
+	 */
+	if (buf < snapshot->ptr) {
+		unsigned int *data;
+
+		/* Skip over the headers */
+		buf += sizeof(struct kgsl_snapshot_section_header) +
+				sizeof(struct kgsl_snapshot_indexed_regs);
+
+		data = (unsigned int *)buf + 0x2000;
+		*data = pool_size;
+	}
+
+	/* Restore the saved mempool size */
+	kgsl_regwrite(device, A6XX_CP_MEM_POOL_SIZE, pool_size);
+}
+
+static inline unsigned int a6xx_read_dbgahb(struct kgsl_device *device,
+				unsigned int regbase, unsigned int reg)
+{
+	unsigned int read_reg = A6XX_HLSQ_DBG_AHB_READ_APERTURE +
+				reg - regbase / 4;
+	unsigned int val;
+
+	kgsl_regread(device, read_reg, &val);
+	return val;
+}
+
+static size_t a6xx_legacy_snapshot_cluster_dbgahb(struct kgsl_device *device,
+				u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mvc_regs *header =
+				(struct kgsl_snapshot_mvc_regs *)buf;
+	struct a6xx_cluster_dbgahb_regs_info *info =
+				(struct a6xx_cluster_dbgahb_regs_info *)priv;
+	struct a6xx_cluster_dbgahb_registers *cur_cluster = info->cluster;
+	unsigned int read_sel;
+	unsigned int data_size = 0;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i, j;
+
+	if (!device->snapshot_legacy)
+		return 0;
+
+	if (remain < sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	remain -= sizeof(*header);
+
+	header->ctxt_id = info->ctxt_id;
+	header->cluster_id = cur_cluster->id;
+
+	read_sel = ((cur_cluster->statetype + info->ctxt_id * 2) & 0xff) << 8;
+	kgsl_regwrite(device, A6XX_HLSQ_DBG_READ_SEL, read_sel);
+
+	/*
+	 * An explicit barrier is needed so that reads do not happen before
+	 * the register write.
+	 */
+	mb();
+
+	for (i = 0; i < cur_cluster->num_sets; i++) {
+		unsigned int start = cur_cluster->regs[2 * i];
+		unsigned int end = cur_cluster->regs[2 * i + 1];
+
+		if (remain < (end - start + 3) * 4) {
+			SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS");
+			goto out;
+		}
+
+		remain -= (end - start + 3) * 4;
+		data_size += (end - start + 3) * 4;
+
+		*data++ = start | (1 << 31);
+		*data++ = end;
+
+		for (j = start; j <= end; j++) {
+			unsigned int val;
+
+			val = a6xx_read_dbgahb(device, cur_cluster->regbase, j);
+			*data++ = val;
+
+		}
+	}
+
+out:
+	return data_size + sizeof(*header);
+}
+
+static size_t a6xx_snapshot_cluster_dbgahb(struct kgsl_device *device, u8 *buf,
+				size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mvc_regs *header =
+				(struct kgsl_snapshot_mvc_regs *)buf;
+	struct a6xx_cluster_dbgahb_regs_info *info =
+				(struct a6xx_cluster_dbgahb_regs_info *)priv;
+	struct a6xx_cluster_dbgahb_registers *cluster = info->cluster;
+	unsigned int data_size = 0;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i, j;
+	unsigned int *src;
+
+
+	if (!crash_dump_valid)
+		return a6xx_legacy_snapshot_cluster_dbgahb(device, buf, remain,
+				info);
+
+	if (remain < sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	remain -= sizeof(*header);
+
+	header->ctxt_id = info->ctxt_id;
+	header->cluster_id = cluster->id;
+
+	src = a6xx_crashdump_registers->hostptr +
+		(header->ctxt_id ? cluster->offset1 : cluster->offset0);
+
+	for (i = 0; i < cluster->num_sets; i++) {
+		unsigned int start;
+		unsigned int end;
+
+		start = cluster->regs[2 * i];
+		end = cluster->regs[2 * i + 1];
+
+		if (remain < (end - start + 3) * 4) {
+			SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS");
+			goto out;
+		}
+
+		remain -= (end - start + 3) * 4;
+		data_size += (end - start + 3) * 4;
+
+		*data++ = start | (1 << 31);
+		*data++ = end;
+		for (j = start; j <= end; j++)
+			*data++ = *src++;
+	}
+out:
+	return data_size + sizeof(*header);
+}
+
+static size_t a6xx_legacy_snapshot_non_ctx_dbgahb(struct kgsl_device *device,
+				u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header =
+				(struct kgsl_snapshot_regs *)buf;
+	struct a6xx_non_ctx_dbgahb_registers *regs =
+				(struct a6xx_non_ctx_dbgahb_registers *)priv;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int count = 0;
+	unsigned int read_sel;
+	int i, j;
+
+	if (!device->snapshot_legacy)
+		return 0;
+
+	/* Figure out how many registers we are going to dump */
+	for (i = 0; i < regs->num_sets; i++) {
+		int start = regs->regs[i * 2];
+		int end = regs->regs[i * 2 + 1];
+
+		count += (end - start + 1);
+	}
+
+	if (remain < (count * 8) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	header->count = count;
+
+	read_sel = (regs->statetype & 0xff) << 8;
+	kgsl_regwrite(device, A6XX_HLSQ_DBG_READ_SEL, read_sel);
+
+	for (i = 0; i < regs->num_sets; i++) {
+		unsigned int start = regs->regs[2 * i];
+		unsigned int end = regs->regs[2 * i + 1];
+
+		for (j = start; j <= end; j++) {
+			unsigned int val;
+
+			val = a6xx_read_dbgahb(device, regs->regbase, j);
+			*data++ = j;
+			*data++ = val;
+
+		}
+	}
+	return (count * 8) + sizeof(*header);
+}
+
+static size_t a6xx_snapshot_non_ctx_dbgahb(struct kgsl_device *device, u8 *buf,
+				size_t remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header =
+				(struct kgsl_snapshot_regs *)buf;
+	struct a6xx_non_ctx_dbgahb_registers *regs =
+				(struct a6xx_non_ctx_dbgahb_registers *)priv;
+	unsigned int count = 0;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int i, k;
+	unsigned int *src;
+
+	if (!crash_dump_valid)
+		return a6xx_legacy_snapshot_non_ctx_dbgahb(device, buf, remain,
+				regs);
+
+	if (remain < sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	remain -= sizeof(*header);
+
+	src = a6xx_crashdump_registers->hostptr + regs->offset;
+
+	for (i = 0; i < regs->num_sets; i++) {
+		unsigned int start;
+		unsigned int end;
+
+		start = regs->regs[2 * i];
+		end = regs->regs[(2 * i) + 1];
+
+		if (remain < (end - start + 1) * 8) {
+			SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+			goto out;
+		}
+
+		remain -= ((end - start) + 1) * 8;
+
+		for (k = start; k <= end; k++, count++) {
+			*data++ = k;
+			*data++ = *src++;
+		}
+	}
+out:
+	header->count = count;
+
+	/* Return the size of the section */
+	return (count * 8) + sizeof(*header);
+}
+
+static void a6xx_snapshot_dbgahb_regs(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_ctx_clusters); i++) {
+		struct a6xx_cluster_dbgahb_registers *cluster =
+				&a6xx_dbgahb_ctx_clusters[i];
+		struct a6xx_cluster_dbgahb_regs_info info;
+
+		info.cluster = cluster;
+		for (j = 0; j < A6XX_NUM_CTXTS; j++) {
+			info.ctxt_id = j;
+
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_MVC, snapshot,
+				a6xx_snapshot_cluster_dbgahb, &info);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_non_ctx_dbgahb); i++) {
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_REGS, snapshot,
+			a6xx_snapshot_non_ctx_dbgahb, &a6xx_non_ctx_dbgahb[i]);
+	}
+}
+
+static size_t a6xx_legacy_snapshot_mvc(struct kgsl_device *device, u8 *buf,
+				size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mvc_regs *header =
+					(struct kgsl_snapshot_mvc_regs *)buf;
+	struct a6xx_cluster_regs_info *info =
+					(struct a6xx_cluster_regs_info *)priv;
+	struct a6xx_cluster_registers *cur_cluster = info->cluster;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int ctxt = info->ctxt_id;
+	unsigned int start, end, i, j, aperture_cntl = 0;
+	unsigned int data_size = 0;
+
+	if (remain < sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS");
+		return 0;
+	}
+
+	remain -= sizeof(*header);
+
+	header->ctxt_id = info->ctxt_id;
+	header->cluster_id = cur_cluster->id;
+
+	/*
+	 * Set the AHB control for the Host to read from the
+	 * cluster/context for this iteration.
+	 */
+	aperture_cntl = ((cur_cluster->id & 0x7) << 8) | (ctxt << 4) | ctxt;
+	kgsl_regwrite(device, A6XX_CP_APERTURE_CNTL_HOST, aperture_cntl);
+
+	if (cur_cluster->sel)
+		kgsl_regwrite(device, cur_cluster->sel->host_reg,
+			cur_cluster->sel->val);
+
+	for (i = 0; i < cur_cluster->num_sets; i++) {
+		start = cur_cluster->regs[2 * i];
+		end = cur_cluster->regs[2 * i + 1];
+
+		if (remain < (end - start + 3) * 4) {
+			SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS");
+			goto out;
+		}
+
+		remain -= (end - start + 3) * 4;
+		data_size += (end - start + 3) * 4;
+
+		*data++ = start | (1 << 31);
+		*data++ = end;
+		for (j = start; j <= end; j++) {
+			unsigned int val;
+
+			kgsl_regread(device, j, &val);
+			*data++ = val;
+		}
+	}
+out:
+	return data_size + sizeof(*header);
+}
+
+static size_t a6xx_snapshot_mvc(struct kgsl_device *device, u8 *buf,
+				size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mvc_regs *header =
+				(struct kgsl_snapshot_mvc_regs *)buf;
+	struct a6xx_cluster_regs_info *info =
+				(struct a6xx_cluster_regs_info *)priv;
+	struct a6xx_cluster_registers *cluster = info->cluster;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int *src;
+	int i, j;
+	unsigned int start, end;
+	size_t data_size = 0;
+
+	if (!crash_dump_valid)
+		return a6xx_legacy_snapshot_mvc(device, buf, remain, info);
+
+	if (remain < sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS");
+		return 0;
+	}
+
+	remain -= sizeof(*header);
+
+	header->ctxt_id = info->ctxt_id;
+	header->cluster_id = cluster->id;
+
+	src = a6xx_crashdump_registers->hostptr +
+		(header->ctxt_id ? cluster->offset1 : cluster->offset0);
+
+	for (i = 0; i < cluster->num_sets; i++) {
+		start = cluster->regs[2 * i];
+		end = cluster->regs[2 * i + 1];
+
+		if (remain < (end - start + 3) * 4) {
+			SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS");
+			goto out;
+		}
+
+		remain -= (end - start + 3) * 4;
+		data_size += (end - start + 3) * 4;
+
+		*data++ = start | (1 << 31);
+		*data++ = end;
+		for (j = start; j <= end; j++)
+			*data++ = *src++;
+	}
+
+out:
+	return data_size + sizeof(*header);
+
+}
+
+static void a6xx_snapshot_mvc_regs(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot)
+{
+	int i, j;
+	struct a6xx_cluster_regs_info info;
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) {
+		struct a6xx_cluster_registers *cluster = &a6xx_clusters[i];
+
+		/* Skip registers that dont exists on targets other than A660 */
+		if (!adreno_is_a660(ADRENO_DEVICE(device)) &&
+				(cluster->regs == a660_fe_cluster))
+			continue;
+
+		info.cluster = cluster;
+		for (j = 0; j < A6XX_NUM_CTXTS; j++) {
+			info.ctxt_id = j;
+
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_MVC, snapshot,
+				a6xx_snapshot_mvc, &info);
+		}
+	}
+}
+
+/* a6xx_dbgc_debug_bus_read() - Read data from trace bus */
+static void a6xx_dbgc_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int reg;
+
+	reg = (block_id << A6XX_DBGC_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT) |
+			(index << A6XX_DBGC_CFG_DBGBUS_SEL_PING_INDEX_SHIFT);
+
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
+
+	/*
+	 * There needs to be a delay of 1 us to ensure enough time for correct
+	 * data is funneled into the trace buffer
+	 */
+	udelay(1);
+
+	kgsl_regread(device, A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2, val);
+	val++;
+	kgsl_regread(device, A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1, val);
+}
+
+/* a6xx_snapshot_dbgc_debugbus_block() - Capture debug data for a gpu block */
+static size_t a6xx_snapshot_dbgc_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int dwords;
+	unsigned int block_id;
+	size_t size;
+
+	dwords = block->dwords;
+
+	/* For a6xx each debug bus data unit is 2 DWORDS */
+	size = (dwords * sizeof(unsigned int) * 2) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = block->block_id;
+	if ((block->block_id == A6XX_DBGBUS_VBIF) && !adreno_is_a630(adreno_dev))
+		header->id = A6XX_DBGBUS_GBIF_GX;
+	header->count = dwords * 2;
+
+	block_id = block->block_id;
+	/* GMU_GX data is read using the GMU_CX block id on A630 */
+	if ((adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev)) &&
+		(block_id == A6XX_DBGBUS_GMU_GX))
+		block_id = A6XX_DBGBUS_GMU_CX;
+
+	for (i = 0; i < dwords; i++)
+		a6xx_dbgc_debug_bus_read(device, block_id, i, &data[i*2]);
+
+	return size;
+}
+
+/* a6xx_snapshot_vbif_debugbus_block() - Capture debug data for VBIF block */
+static size_t a6xx_snapshot_vbif_debugbus_block(struct kgsl_device *device,
+			u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i, j;
+	/*
+	 * Total number of VBIF data words considering 3 sections:
+	 * 2 arbiter blocks of 16 words
+	 * 5 AXI XIN blocks of 18 dwords each
+	 * 4 core clock side XIN blocks of 12 dwords each
+	 */
+	unsigned int dwords = (16 * A6XX_NUM_AXI_ARB_BLOCKS) +
+			(18 * A6XX_NUM_XIN_AXI_BLOCKS) +
+			(12 * A6XX_NUM_XIN_CORE_BLOCKS);
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size;
+	unsigned int reg_clk;
+
+	size = (dwords * sizeof(unsigned int)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+	header->id = block->block_id;
+	header->count = dwords;
+
+	kgsl_regread(device, A6XX_VBIF_CLKON, &reg_clk);
+	kgsl_regwrite(device, A6XX_VBIF_CLKON, reg_clk |
+			(A6XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK <<
+			A6XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT));
+	kgsl_regwrite(device, A6XX_VBIF_TEST_BUS1_CTRL0, 0);
+	kgsl_regwrite(device, A6XX_VBIF_TEST_BUS_OUT_CTRL,
+			(A6XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK <<
+			A6XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT));
+
+	for (i = 0; i < A6XX_NUM_AXI_ARB_BLOCKS; i++) {
+		kgsl_regwrite(device, A6XX_VBIF_TEST_BUS2_CTRL0,
+			(1 << (i + 16)));
+		for (j = 0; j < 16; j++) {
+			kgsl_regwrite(device, A6XX_VBIF_TEST_BUS2_CTRL1,
+				((j & A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK)
+				<< A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A6XX_VBIF_TEST_BUS_OUT,
+					data);
+			data++;
+		}
+	}
+
+	/* XIN blocks AXI side */
+	for (i = 0; i < A6XX_NUM_XIN_AXI_BLOCKS; i++) {
+		kgsl_regwrite(device, A6XX_VBIF_TEST_BUS2_CTRL0, 1 << i);
+		for (j = 0; j < 18; j++) {
+			kgsl_regwrite(device, A6XX_VBIF_TEST_BUS2_CTRL1,
+				((j & A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK)
+				<< A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A6XX_VBIF_TEST_BUS_OUT,
+				data);
+			data++;
+		}
+	}
+	kgsl_regwrite(device, A6XX_VBIF_TEST_BUS2_CTRL0, 0);
+
+	/* XIN blocks core clock side */
+	for (i = 0; i < A6XX_NUM_XIN_CORE_BLOCKS; i++) {
+		kgsl_regwrite(device, A6XX_VBIF_TEST_BUS1_CTRL0, 1 << i);
+		for (j = 0; j < 12; j++) {
+			kgsl_regwrite(device, A6XX_VBIF_TEST_BUS1_CTRL1,
+				((j & A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK)
+				<< A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A6XX_VBIF_TEST_BUS_OUT,
+				data);
+			data++;
+		}
+	}
+	/* restore the clock of VBIF */
+	kgsl_regwrite(device, A6XX_VBIF_CLKON, reg_clk);
+	return size;
+}
+
+/* a6xx_cx_dbgc_debug_bus_read() - Read data from trace bus */
+static void a6xx_cx_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int reg;
+
+	reg = (block_id << A6XX_CX_DBGC_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT) |
+			(index << A6XX_CX_DBGC_CFG_DBGBUS_SEL_PING_INDEX_SHIFT);
+
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
+
+	/*
+	 * There needs to be a delay of 1 us to ensure enough time for correct
+	 * data is funneled into the trace buffer
+	 */
+	udelay(1);
+
+	kgsl_regread(device, A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2, val);
+	val++;
+	kgsl_regread(device, A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1, val);
+}
+
+/*
+ * a6xx_snapshot_cx_dbgc_debugbus_block() - Capture debug data for a gpu
+ * block from the CX DBGC block
+ */
+static size_t a6xx_snapshot_cx_dbgc_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int dwords;
+	size_t size;
+
+	dwords = block->dwords;
+
+	/* For a6xx each debug bus data unit is 2 DWRODS */
+	size = (dwords * sizeof(unsigned int) * 2) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = block->block_id;
+	header->count = dwords * 2;
+
+	for (i = 0; i < dwords; i++)
+		a6xx_cx_debug_bus_read(device, block->block_id, i,
+					&data[i*2]);
+
+	return size;
+}
+
+/* a6xx_snapshot_debugbus() - Capture debug bus data */
+static void a6xx_snapshot_debugbus(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_CNTLT,
+		(0xf << A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT_SHIFT) |
+		(0x0 << A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU_SHIFT) |
+		(0x0 << A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN_SHIFT));
+
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_CNTLM,
+		0xf << A6XX_DBGC_CFG_DBGBUS_CTLTM_ENABLE_SHIFT);
+
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
+
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_BYTEL_0,
+		(0 << A6XX_DBGC_CFG_DBGBUS_BYTEL0_SHIFT) |
+		(1 << A6XX_DBGC_CFG_DBGBUS_BYTEL1_SHIFT) |
+		(2 << A6XX_DBGC_CFG_DBGBUS_BYTEL2_SHIFT) |
+		(3 << A6XX_DBGC_CFG_DBGBUS_BYTEL3_SHIFT) |
+		(4 << A6XX_DBGC_CFG_DBGBUS_BYTEL4_SHIFT) |
+		(5 << A6XX_DBGC_CFG_DBGBUS_BYTEL5_SHIFT) |
+		(6 << A6XX_DBGC_CFG_DBGBUS_BYTEL6_SHIFT) |
+		(7 << A6XX_DBGC_CFG_DBGBUS_BYTEL7_SHIFT));
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_BYTEL_1,
+		(8 << A6XX_DBGC_CFG_DBGBUS_BYTEL8_SHIFT) |
+		(9 << A6XX_DBGC_CFG_DBGBUS_BYTEL9_SHIFT) |
+		(10 << A6XX_DBGC_CFG_DBGBUS_BYTEL10_SHIFT) |
+		(11 << A6XX_DBGC_CFG_DBGBUS_BYTEL11_SHIFT) |
+		(12 << A6XX_DBGC_CFG_DBGBUS_BYTEL12_SHIFT) |
+		(13 << A6XX_DBGC_CFG_DBGBUS_BYTEL13_SHIFT) |
+		(14 << A6XX_DBGC_CFG_DBGBUS_BYTEL14_SHIFT) |
+		(15 << A6XX_DBGC_CFG_DBGBUS_BYTEL15_SHIFT));
+
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
+	kgsl_regwrite(device, A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
+
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
+		(0xf << A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT_SHIFT) |
+		(0x0 << A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU_SHIFT) |
+		(0x0 << A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN_SHIFT));
+
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
+		0xf << A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE_SHIFT);
+
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
+
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
+		(0 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL0_SHIFT) |
+		(1 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL1_SHIFT) |
+		(2 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL2_SHIFT) |
+		(3 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL3_SHIFT) |
+		(4 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL4_SHIFT) |
+		(5 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL5_SHIFT) |
+		(6 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL6_SHIFT) |
+		(7 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL7_SHIFT));
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
+		(8 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL8_SHIFT) |
+		(9 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL9_SHIFT) |
+		(10 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL10_SHIFT) |
+		(11 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL11_SHIFT) |
+		(12 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL12_SHIFT) |
+		(13 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL13_SHIFT) |
+		(14 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL14_SHIFT) |
+		(15 << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL15_SHIFT));
+
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
+	kgsl_regwrite(device, A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_dbgc_debugbus_blocks); i++) {
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+			snapshot, a6xx_snapshot_dbgc_debugbus_block,
+			(void *) &a6xx_dbgc_debugbus_blocks[i]);
+	}
+
+	if (adreno_is_a650_family(adreno_dev)) {
+		for (i = 0; i < ARRAY_SIZE(a650_dbgc_debugbus_blocks); i++) {
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a6xx_snapshot_dbgc_debugbus_block,
+				(void *) &a650_dbgc_debugbus_blocks[i]);
+		}
+	}
+
+	/*
+	 * GBIF has same debugbus as of other GPU blocks hence fall back to
+	 * default path if GPU uses GBIF.
+	 * GBIF uses exactly same ID as of VBIF so use it as it is.
+	 */
+	if (!adreno_is_a630(adreno_dev))
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+			snapshot, a6xx_snapshot_dbgc_debugbus_block,
+			(void *) &a6xx_vbif_debugbus_blocks);
+	else
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+			snapshot, a6xx_snapshot_vbif_debugbus_block,
+			(void *) &a6xx_vbif_debugbus_blocks);
+
+	/* Dump the CX debugbus data if the block exists */
+	if (kgsl_regmap_valid_offset(&device->regmap, A6XX_CX_DBGC_CFG_DBGBUS_SEL_A)) {
+		for (i = 0; i < ARRAY_SIZE(a6xx_cx_dbgc_debugbus_blocks); i++) {
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a6xx_snapshot_cx_dbgc_debugbus_block,
+				(void *) &a6xx_cx_dbgc_debugbus_blocks[i]);
+		}
+		/*
+		 * Get debugbus for GBIF CX part if GPU has GBIF block
+		 * GBIF uses exactly same ID as of VBIF so use
+		 * it as it is.
+		 */
+		if (!adreno_is_a630(adreno_dev))
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot,
+				a6xx_snapshot_cx_dbgc_debugbus_block,
+				(void *) &a6xx_vbif_debugbus_blocks);
+	}
+}
+
+
+
+/* a6xx_snapshot_sqe() - Dump SQE data in snapshot */
+static size_t a6xx_snapshot_sqe(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE);
+
+	if (remain < DEBUG_SECTION_SZ(1)) {
+		SNAPSHOT_ERR_NOMEM(device, "SQE VERSION DEBUG");
+		return 0;
+	}
+
+	/* Dump the SQE firmware version */
+	header->type = SNAPSHOT_DEBUG_SQE_VERSION;
+	header->size = 1;
+	*data = fw->version;
+
+	return DEBUG_SECTION_SZ(1);
+}
+
+static void _a6xx_do_crashdump(struct kgsl_device *device)
+{
+	u32 val = 0;
+	ktime_t timeout;
+
+	crash_dump_valid = false;
+
+	if (!device->snapshot_crashdumper)
+		return;
+
+	if (IS_ERR_OR_NULL(a6xx_capturescript) ||
+		IS_ERR_OR_NULL(a6xx_crashdump_registers))
+		return;
+
+	/* IF the SMMU is stalled we cannot do a crash dump */
+	if (adreno_smmu_is_stalled(ADRENO_DEVICE(device)))
+		return;
+
+	/* Turn on APRIV for legacy targets so we can access the buffers */
+	if (!ADRENO_FEATURE(ADRENO_DEVICE(device), ADRENO_APRIV))
+		kgsl_regwrite(device, A6XX_CP_MISC_CNTL, 1);
+
+	kgsl_regwrite(device, A6XX_CP_CRASH_SCRIPT_BASE_LO,
+			lower_32_bits(a6xx_capturescript->gpuaddr));
+	kgsl_regwrite(device, A6XX_CP_CRASH_SCRIPT_BASE_HI,
+			upper_32_bits(a6xx_capturescript->gpuaddr));
+	kgsl_regwrite(device, A6XX_CP_CRASH_DUMP_CNTL, 1);
+
+	timeout = ktime_add_ms(ktime_get(), CP_CRASH_DUMPER_TIMEOUT);
+
+	if (!device->snapshot_atomic)
+		might_sleep();
+
+	for (;;) {
+		/* make sure we're reading the latest value */
+		rmb();
+		if ((*a6xx_cd_reg_end) != 0xaaaaaaaa)
+			break;
+
+		if (ktime_compare(ktime_get(), timeout) > 0)
+			break;
+
+		/* Wait 1msec to avoid unnecessary looping */
+		if (!device->snapshot_atomic)
+			usleep_range(100, 1000);
+	}
+
+	kgsl_regread(device, A6XX_CP_CRASH_DUMP_STATUS, &val);
+
+	if (!ADRENO_FEATURE(ADRENO_DEVICE(device), ADRENO_APRIV))
+		kgsl_regwrite(device, A6XX_CP_MISC_CNTL, 0);
+
+	if (!(val & 0x2)) {
+		dev_err(device->dev, "Crash dump timed out: 0x%X\n", val);
+		return;
+	}
+
+	crash_dump_valid = true;
+}
+
+static size_t a6xx_snapshot_isense_registers(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf;
+	struct kgsl_snapshot_registers *regs = priv;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int count = 0, j, k;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* Figure out how many registers we are going to dump */
+
+	for (j = 0; j < regs->count; j++) {
+		int start = regs->regs[j * 2];
+		int end = regs->regs[j * 2 + 1];
+
+		count += (end - start + 1);
+	}
+
+	if (remain < (count * 8) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "ISENSE REGISTERS");
+		return 0;
+	}
+
+	for (j = 0; j < regs->count; j++) {
+		unsigned int start = regs->regs[j * 2];
+		unsigned int end = regs->regs[j * 2 + 1];
+
+		for (k = start; k <= end; k++) {
+			unsigned int val;
+
+			adreno_isense_regread(adreno_dev,
+				k - (adreno_dev->isense_base >> 2), &val);
+			*data++ = k;
+			*data++ = val;
+		}
+	}
+
+	header->count = count;
+
+	/* Return the size of the section */
+	return (count * 8) + sizeof(*header);
+}
+
+/* Snapshot the preemption related buffers */
+static size_t snapshot_preemption_record(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_memdesc *memdesc = priv;
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)buf;
+	u8 *ptr = buf + sizeof(*header);
+	const struct adreno_a6xx_core *gpucore = to_a6xx_core(ADRENO_DEVICE(device));
+	u64 ctxt_record_size = A6XX_CP_CTXRECORD_SIZE_IN_BYTES;
+
+	if (gpucore->ctxt_record_size)
+		ctxt_record_size = gpucore->ctxt_record_size;
+
+	ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size);
+
+	if (remain < (ctxt_record_size + sizeof(*header))) {
+		SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD");
+		return 0;
+	}
+
+	header->size = ctxt_record_size >> 2;
+	header->gpuaddr = memdesc->gpuaddr;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable);
+	header->type = SNAPSHOT_GPU_OBJECT_GLOBAL;
+
+	memcpy(ptr, memdesc->hostptr, ctxt_record_size);
+
+	return ctxt_record_size + sizeof(*header);
+}
+
+static size_t a6xx_snapshot_cp_roq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf;
+	u32 size, *data = (u32 *) (buf + sizeof(*header));
+	int i;
+
+	kgsl_regread(device, A6XX_CP_ROQ_THRESHOLDS_2, &size);
+	size >>= 14;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP ROQ DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_ROQ;
+	header->size = size;
+
+	kgsl_regwrite(device, A6XX_CP_ROQ_DBG_ADDR, 0x0);
+	for (i = 0; i < size; i++)
+		kgsl_regread(device, A6XX_CP_ROQ_DBG_DATA, &data[i]);
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+static inline bool a6xx_has_gbif_reinit(struct adreno_device *adreno_dev)
+{
+	/*
+	 * Some targets in a6xx do not have reinit support in hardware.
+	 * This check is only for hardware capability and not for finding
+	 * whether gbif reinit sequence in software is enabled or not.
+	 */
+	return !(adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev) ||
+		 adreno_is_a640_family(adreno_dev));
+}
+
+/*
+ * a6xx_snapshot() - A6XX GPU snapshot function
+ * @adreno_dev: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the A6XX specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a6xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb;
+	bool sptprac_on;
+	unsigned int i;
+	u32 hi, lo;
+
+	/*
+	 * Dump debugbus data here to capture it for both
+	 * GMU and GPU snapshot. Debugbus data can be accessed
+	 * even if the gx headswitch or sptprac is off. If gx
+	 * headswitch is off, data for gx blocks will show as
+	 * 0x5c00bd00.
+	 */
+	a6xx_snapshot_debugbus(adreno_dev, snapshot);
+
+	/* RSCC registers are on cx */
+	if (adreno_is_a650_family(adreno_dev)) {
+		struct kgsl_snapshot_registers r;
+
+		r.regs = a650_isense_registers;
+		r.count = ARRAY_SIZE(a650_isense_registers) / 2;
+
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
+			snapshot, a6xx_snapshot_isense_registers, &r);
+	}
+
+	if (!gmu_core_isenabled(device)) {
+		if (adreno_is_a619_holi(adreno_dev))
+			adreno_snapshot_registers(device, snapshot,
+					a6xx_holi_gmu_wrapper_registers,
+					ARRAY_SIZE(a6xx_holi_gmu_wrapper_registers) / 2);
+		else
+			adreno_snapshot_registers(device, snapshot,
+					a6xx_gmu_wrapper_registers,
+					ARRAY_SIZE(a6xx_gmu_wrapper_registers) / 2);
+	}
+
+	sptprac_on = a6xx_gmu_sptprac_is_on(adreno_dev);
+
+	/* SQE Firmware */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a6xx_snapshot_sqe, NULL);
+
+	if (!adreno_gx_is_on(adreno_dev))
+		return;
+
+	kgsl_regread(device, A6XX_CP_IB1_BASE, &lo);
+	kgsl_regread(device, A6XX_CP_IB1_BASE_HI, &hi);
+
+	snapshot->ib1base = (((u64) hi) << 32) | lo;
+
+	kgsl_regread(device, A6XX_CP_IB2_BASE, &lo);
+	kgsl_regread(device, A6XX_CP_IB2_BASE_HI, &hi);
+
+	snapshot->ib2base = (((u64) hi) << 32) | lo;
+
+	kgsl_regread(device, A6XX_CP_IB1_REM_SIZE, &snapshot->ib1size);
+	kgsl_regread(device, A6XX_CP_IB2_REM_SIZE, &snapshot->ib2size);
+
+	/* Assert the isStatic bit before triggering snapshot */
+	if (adreno_is_a660(adreno_dev))
+		kgsl_regwrite(device, A6XX_RBBM_SNAPSHOT_STATUS, 0x1);
+
+	/* Dump the registers which get affected by crash dumper trigger */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
+		snapshot, a6xx_snapshot_pre_crashdump_regs, NULL);
+
+	/* Dump vbif registers as well which get affected by crash dumper */
+	if (adreno_is_a630(adreno_dev))
+		SNAPSHOT_REGISTERS(device, snapshot, a6xx_vbif_registers);
+	else if (a6xx_has_gbif_reinit(adreno_dev))
+		adreno_snapshot_registers(device, snapshot,
+					  a6xx_gbif_reinit_registers,
+					  ARRAY_SIZE(a6xx_gbif_reinit_registers) / 2);
+	else
+		adreno_snapshot_registers(device, snapshot,
+			a6xx_gbif_registers,
+			ARRAY_SIZE(a6xx_gbif_registers) / 2);
+
+	/* Try to run the crash dumper */
+	if (sptprac_on)
+		_a6xx_do_crashdump(device);
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_reg_list); i++) {
+		/* Skip registers that dont exists on targets other than A660 */
+		if (!adreno_is_a660(adreno_dev) &&
+				(a6xx_reg_list[i].regs == a660_registers))
+			continue;
+
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
+			snapshot, a6xx_snapshot_registers, &a6xx_reg_list[i]);
+	}
+
+	/* CP_SQE indexed registers */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A6XX_CP_SQE_STAT_ADDR, A6XX_CP_SQE_STAT_DATA, 0, 0x33);
+
+	/* CP_DRAW_STATE */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A6XX_CP_DRAW_STATE_ADDR, A6XX_CP_DRAW_STATE_DATA,
+		0, 0x100);
+
+	 /* SQE_UCODE Cache */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A6XX_CP_SQE_UCODE_DBG_ADDR, A6XX_CP_SQE_UCODE_DBG_DATA,
+		0, 0x8000);
+
+	/* CP LPAC indexed registers */
+	if (adreno_is_a660(adreno_dev)) {
+		u32 roq_size;
+
+		kgsl_snapshot_indexed_registers(device, snapshot,
+			 A6XX_CP_SQE_AC_STAT_ADDR, A6XX_CP_SQE_AC_STAT_DATA,
+				0, 0x33);
+		kgsl_snapshot_indexed_registers(device, snapshot,
+			A6XX_CP_LPAC_DRAW_STATE_ADDR,
+				A6XX_CP_LPAC_DRAW_STATE_DATA, 0, 0x100);
+		kgsl_snapshot_indexed_registers(device, snapshot,
+			A6XX_CP_SQE_AC_UCODE_DBG_ADDR,
+				A6XX_CP_SQE_AC_UCODE_DBG_DATA, 0, 0x8000);
+
+		kgsl_regread(device, A6XX_CP_LPAC_ROQ_THRESHOLDS_2, &roq_size);
+		roq_size = roq_size >> 14;
+		kgsl_snapshot_indexed_registers(device, snapshot,
+			A6XX_CP_LPAC_ROQ_DBG_ADDR,
+				A6XX_CP_LPAC_ROQ_DBG_DATA, 0, roq_size);
+
+		kgsl_snapshot_indexed_registers(device, snapshot,
+			A6XX_CP_LPAC_FIFO_DBG_ADDR, A6XX_CP_LPAC_FIFO_DBG_DATA,
+			0, 0x40);
+	}
+	/*
+	 * CP ROQ dump units is 4dwords. The number of units is stored
+	 * in CP_ROQ_THRESHOLDS_2[31:16]. Read the value and convert to
+	 * dword units.
+	 */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a6xx_snapshot_cp_roq, NULL);
+
+	/* Mempool debug data */
+	if (adreno_is_a650_family(adreno_dev))
+		a650_snapshot_mempool(device, snapshot);
+	else
+		a6xx_snapshot_mempool(device, snapshot);
+
+	if (sptprac_on) {
+		/* MVC register section */
+		a6xx_snapshot_mvc_regs(device, snapshot);
+
+		/* registers dumped through DBG AHB */
+		a6xx_snapshot_dbgahb_regs(device, snapshot);
+
+		/* Shader memory */
+		a6xx_snapshot_shader(device, snapshot);
+
+		if (!adreno_smmu_is_stalled(adreno_dev))
+			memset(a6xx_crashdump_registers->hostptr, 0xaa,
+					a6xx_crashdump_registers->size);
+	}
+
+	if (adreno_is_a660(adreno_dev)) {
+		u32 val;
+
+		kgsl_regread(device, A6XX_RBBM_SNAPSHOT_STATUS, &val);
+
+		if (!val)
+			dev_err(device->dev,
+				"Interface signals may have changed during snapshot\n");
+
+		kgsl_regwrite(device, A6XX_RBBM_SNAPSHOT_STATUS, 0x0);
+	}
+
+	/* Preemption record */
+	if (adreno_is_preemption_enabled(adreno_dev)) {
+		FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, snapshot_preemption_record,
+				rb->preemption_desc);
+		}
+	}
+}
+
+static int _a6xx_crashdump_init_mvc(struct adreno_device *adreno_dev,
+	uint64_t *ptr, uint64_t *offset)
+{
+	int qwords = 0;
+	unsigned int i, j, k;
+	unsigned int count;
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) {
+		struct a6xx_cluster_registers *cluster = &a6xx_clusters[i];
+
+		/* Skip registers that dont exists on targets other than A660 */
+		if (!adreno_is_a660(adreno_dev) &&
+				(cluster->regs == a660_fe_cluster))
+			continue;
+
+		/* The VPC registers are driven by VPC_PS cluster on a650 */
+		if (adreno_is_a650_family(adreno_dev) &&
+				(cluster->regs == a6xx_vpc_ps_cluster))
+			cluster->id = CP_CLUSTER_VPC_PS;
+
+		if (cluster->sel) {
+			ptr[qwords++] = cluster->sel->val;
+			ptr[qwords++] = ((uint64_t)cluster->sel->cd_reg << 44) |
+				(1 << 21) | 1;
+		}
+
+		cluster->offset0 = *offset;
+		for (j = 0; j < A6XX_NUM_CTXTS; j++) {
+
+			if (j == 1)
+				cluster->offset1 = *offset;
+
+			ptr[qwords++] = (cluster->id << 8) | (j << 4) | j;
+			ptr[qwords++] =
+				((uint64_t)A6XX_CP_APERTURE_CNTL_CD << 44) |
+				(1 << 21) | 1;
+
+			for (k = 0; k < cluster->num_sets; k++) {
+				count = REG_PAIR_COUNT(cluster->regs, k);
+				ptr[qwords++] =
+				a6xx_crashdump_registers->gpuaddr + *offset;
+				ptr[qwords++] =
+				(((uint64_t)cluster->regs[2 * k]) << 44) |
+						count;
+
+				*offset += count * sizeof(unsigned int);
+			}
+		}
+	}
+
+	return qwords;
+}
+
+static int _a6xx_crashdump_init_shader(struct a6xx_shader_block *block,
+		uint64_t *ptr, uint64_t *offset)
+{
+	int qwords = 0;
+	unsigned int j;
+
+	/* Capture each bank in the block */
+	for (j = 0; j < A6XX_NUM_SHADER_BANKS; j++) {
+		/* Program the aperture */
+		ptr[qwords++] =
+			(block->statetype << A6XX_SHADER_STATETYPE_SHIFT) | j;
+		ptr[qwords++] = (((uint64_t) A6XX_HLSQ_DBG_READ_SEL << 44)) |
+			(1 << 21) | 1;
+
+		/* Read all the data in one chunk */
+		ptr[qwords++] = a6xx_crashdump_registers->gpuaddr + *offset;
+		ptr[qwords++] =
+			(((uint64_t) A6XX_HLSQ_DBG_AHB_READ_APERTURE << 44)) |
+			block->sz;
+
+		/* Remember the offset of the first bank for easy access */
+		if (j == 0)
+			block->offset = *offset;
+
+		*offset += block->sz * sizeof(unsigned int);
+	}
+
+	return qwords;
+}
+
+static int _a6xx_crashdump_init_ctx_dbgahb(uint64_t *ptr, uint64_t *offset)
+{
+	int qwords = 0;
+	unsigned int i, j, k;
+	unsigned int count;
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_ctx_clusters); i++) {
+		struct a6xx_cluster_dbgahb_registers *cluster =
+				&a6xx_dbgahb_ctx_clusters[i];
+
+		cluster->offset0 = *offset;
+
+		for (j = 0; j < A6XX_NUM_CTXTS; j++) {
+			if (j == 1)
+				cluster->offset1 = *offset;
+
+			/* Program the aperture */
+			ptr[qwords++] =
+				((cluster->statetype + j * 2) & 0xff) << 8;
+			ptr[qwords++] =
+				(((uint64_t)A6XX_HLSQ_DBG_READ_SEL << 44)) |
+					(1 << 21) | 1;
+
+			for (k = 0; k < cluster->num_sets; k++) {
+				unsigned int start = cluster->regs[2 * k];
+
+				count = REG_PAIR_COUNT(cluster->regs, k);
+				ptr[qwords++] =
+				a6xx_crashdump_registers->gpuaddr + *offset;
+				ptr[qwords++] =
+				(((uint64_t)(A6XX_HLSQ_DBG_AHB_READ_APERTURE +
+					start - cluster->regbase / 4) << 44)) |
+							count;
+
+				*offset += count * sizeof(unsigned int);
+			}
+		}
+	}
+	return qwords;
+}
+
+static int _a6xx_crashdump_init_non_ctx_dbgahb(uint64_t *ptr, uint64_t *offset)
+{
+	int qwords = 0;
+	unsigned int i, k;
+	unsigned int count;
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_non_ctx_dbgahb); i++) {
+		struct a6xx_non_ctx_dbgahb_registers *regs =
+				&a6xx_non_ctx_dbgahb[i];
+
+		regs->offset = *offset;
+
+		/* Program the aperture */
+		ptr[qwords++] = (regs->statetype & 0xff) << 8;
+		ptr[qwords++] =	(((uint64_t)A6XX_HLSQ_DBG_READ_SEL << 44)) |
+					(1 << 21) | 1;
+
+		for (k = 0; k < regs->num_sets; k++) {
+			unsigned int start = regs->regs[2 * k];
+
+			count = REG_PAIR_COUNT(regs->regs, k);
+			ptr[qwords++] =
+				a6xx_crashdump_registers->gpuaddr + *offset;
+			ptr[qwords++] =
+				(((uint64_t)(A6XX_HLSQ_DBG_AHB_READ_APERTURE +
+					start - regs->regbase / 4) << 44)) |
+							count;
+
+			*offset += count * sizeof(unsigned int);
+		}
+	}
+	return qwords;
+}
+
+void a6xx_crashdump_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int script_size = 0;
+	unsigned int data_size = 0;
+	unsigned int i, j, k, ret;
+	uint64_t *ptr;
+	uint64_t offset = 0;
+
+	if (!IS_ERR_OR_NULL(a6xx_capturescript) &&
+		!IS_ERR_OR_NULL(a6xx_crashdump_registers))
+		return;
+
+	/*
+	 * We need to allocate two buffers:
+	 * 1 - the buffer to hold the draw script
+	 * 2 - the buffer to hold the data
+	 */
+
+	/*
+	 * To save the registers, we need 16 bytes per register pair for the
+	 * script and a dword for each register in the data
+	 */
+	for (i = 0; i < ARRAY_SIZE(a6xx_reg_list); i++) {
+		struct reg_list *regs = &a6xx_reg_list[i];
+
+		/* Skip registers that dont exists on targets other than A660 */
+		if (!adreno_is_a660(adreno_dev) &&
+			(regs->regs == a660_registers))
+			continue;
+
+		/* 16 bytes for programming the aperture */
+		if (regs->sel)
+			script_size += 16;
+
+		/* Each pair needs 16 bytes (2 qwords) */
+		script_size += regs->count * 16;
+
+		/* Each register needs a dword in the data */
+		for (j = 0; j < regs->count; j++)
+			data_size += REG_PAIR_COUNT(regs->regs, j) *
+				sizeof(unsigned int);
+
+	}
+
+	/*
+	 * To save the shader blocks for each block in each type we need 32
+	 * bytes for the script (16 bytes to program the aperture and 16 to
+	 * read the data) and then a block specific number of bytes to hold
+	 * the data
+	 */
+	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) {
+		script_size += 32 * A6XX_NUM_SHADER_BANKS;
+		data_size += a6xx_shader_blocks[i].sz * sizeof(unsigned int) *
+			A6XX_NUM_SHADER_BANKS;
+	}
+
+	/* Calculate the script and data size for MVC registers */
+	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) {
+		struct a6xx_cluster_registers *cluster = &a6xx_clusters[i];
+
+		/* Skip registers that dont exists on targets other than A660 */
+		if (!adreno_is_a660(adreno_dev) &&
+				(cluster->regs == a660_fe_cluster))
+			continue;
+
+		/* 16 bytes if cluster sel exists */
+		if (cluster->sel)
+			script_size += 16;
+
+		for (j = 0; j < A6XX_NUM_CTXTS; j++) {
+
+			/* 16 bytes for programming the aperture */
+			script_size += 16;
+
+			/* Reading each pair of registers takes 16 bytes */
+			script_size += 16 * cluster->num_sets;
+
+			/* A dword per register read from the cluster list */
+			for (k = 0; k < cluster->num_sets; k++)
+				data_size += REG_PAIR_COUNT(cluster->regs, k) *
+						sizeof(unsigned int);
+		}
+	}
+
+	/* Calculate the script and data size for debug AHB registers */
+	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_ctx_clusters); i++) {
+		struct a6xx_cluster_dbgahb_registers *cluster =
+				&a6xx_dbgahb_ctx_clusters[i];
+
+		for (j = 0; j < A6XX_NUM_CTXTS; j++) {
+
+			/* 16 bytes for programming the aperture */
+			script_size += 16;
+
+			/* Reading each pair of registers takes 16 bytes */
+			script_size += 16 * cluster->num_sets;
+
+			/* A dword per register read from the cluster list */
+			for (k = 0; k < cluster->num_sets; k++)
+				data_size += REG_PAIR_COUNT(cluster->regs, k) *
+						sizeof(unsigned int);
+		}
+	}
+
+	/*
+	 * Calculate the script and data size for non context debug
+	 * AHB registers
+	 */
+	for (i = 0; i < ARRAY_SIZE(a6xx_non_ctx_dbgahb); i++) {
+		struct a6xx_non_ctx_dbgahb_registers *regs =
+				&a6xx_non_ctx_dbgahb[i];
+
+		/* 16 bytes for programming the aperture */
+		script_size += 16;
+
+		/* Reading each pair of registers takes 16 bytes */
+		script_size += 16 * regs->num_sets;
+
+		/* A dword per register read from the cluster list */
+		for (k = 0; k < regs->num_sets; k++)
+			data_size += REG_PAIR_COUNT(regs->regs, k) *
+				sizeof(unsigned int);
+	}
+
+	/* 16 bytes (2 qwords) for last entry in CD script */
+	script_size += 16;
+	/* Increment data size to store last entry in CD */
+	data_size += sizeof(unsigned int);
+
+	/* Now allocate the script and data buffers */
+
+	/* The script buffers needs 2 extra qwords on the end */
+	ret = adreno_allocate_global(device, &a6xx_capturescript,
+		script_size + 16, 0, KGSL_MEMFLAGS_GPUREADONLY,
+		KGSL_MEMDESC_PRIVILEGED, "capturescript");
+	if (ret)
+		return;
+
+	ret = adreno_allocate_global(device, &a6xx_crashdump_registers,
+		data_size, 0, 0, KGSL_MEMDESC_PRIVILEGED,
+		"capturescript_regs");
+	if (ret)
+		return;
+
+	/* Build the crash script */
+
+	ptr = (uint64_t *)a6xx_capturescript->hostptr;
+
+	/* For the registers, program a read command for each pair */
+	for (i = 0; i < ARRAY_SIZE(a6xx_reg_list); i++) {
+		struct reg_list *regs = &a6xx_reg_list[i];
+
+		/* Skip registers that dont exists on targets other than A660 */
+		if (!adreno_is_a660(adreno_dev) &&
+			(regs->regs == a660_registers))
+			continue;
+
+		regs->offset = offset;
+
+		/* Program the SEL_CNTL_CD register appropriately */
+		if (regs->sel) {
+			*ptr++ = regs->sel->val;
+			*ptr++ = (((uint64_t)regs->sel->cd_reg << 44)) |
+					(1 << 21) | 1;
+		}
+
+		for (j = 0; j < regs->count; j++) {
+			unsigned int r = REG_PAIR_COUNT(regs->regs, j);
+			*ptr++ = a6xx_crashdump_registers->gpuaddr + offset;
+			*ptr++ = (((uint64_t) regs->regs[2 * j]) << 44) | r;
+			offset += r * sizeof(unsigned int);
+		}
+	}
+
+	/* Program each shader block */
+	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) {
+		ptr += _a6xx_crashdump_init_shader(&a6xx_shader_blocks[i], ptr,
+							&offset);
+	}
+
+	/* Program the capturescript for the MVC regsiters */
+	ptr += _a6xx_crashdump_init_mvc(adreno_dev, ptr, &offset);
+
+	if (!adreno_is_a663(adreno_dev)) {
+		ptr += _a6xx_crashdump_init_ctx_dbgahb(ptr, &offset);
+
+		ptr += _a6xx_crashdump_init_non_ctx_dbgahb(ptr, &offset);
+	}
+
+	/* Save CD register end pointer to check CD status completion */
+	a6xx_cd_reg_end = a6xx_crashdump_registers->hostptr + offset;
+
+	memset(a6xx_crashdump_registers->hostptr, 0xaa,
+			a6xx_crashdump_registers->size);
+
+	/* Program the capturescript to read the last register entry */
+	*ptr++ = a6xx_crashdump_registers->gpuaddr + offset;
+	*ptr++ = (((uint64_t) A6XX_CP_CRASH_DUMP_STATUS) << 44) | (uint64_t) 1;
+
+	*ptr++ = 0;
+	*ptr++ = 0;
+}

+ 193 - 0
qcom/opensource/graphics-kernel/adreno_compat.c

@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2013-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_compat.h"
+#include "kgsl_compat.h"
+
+int adreno_getproperty_compat(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	int status = -EINVAL;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	switch (param->type) {
+	case KGSL_PROP_DEVICE_INFO:
+		{
+			struct kgsl_devinfo_compat devinfo;
+
+			if (param->sizebytes != sizeof(devinfo)) {
+				status = -EINVAL;
+				break;
+			}
+
+			memset(&devinfo, 0, sizeof(devinfo));
+			devinfo.device_id = device->id + 1;
+			devinfo.chip_id = adreno_dev->chipid;
+			devinfo.mmu_enabled =
+				kgsl_mmu_has_feature(device, KGSL_MMU_PAGED);
+			devinfo.gmem_gpubaseaddr = 0;
+			devinfo.gmem_sizebytes =
+					adreno_dev->gpucore->gmem_size;
+
+			if (copy_to_user(param->value, &devinfo,
+				sizeof(devinfo))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_DEVICE_SHADOW:
+		{
+			struct kgsl_shadowprop_compat shadowprop;
+
+			if (param->sizebytes != sizeof(shadowprop)) {
+				status = -EINVAL;
+				break;
+			}
+			memset(&shadowprop, 0, sizeof(shadowprop));
+			if (device->memstore->hostptr) {
+				/* Give a token address to identify memstore */
+				shadowprop.gpuaddr = (unsigned int)
+					KGSL_MEMSTORE_TOKEN_ADDRESS;
+				shadowprop.size =
+					(unsigned int) device->memstore->size;
+				/*
+				 * GSL needs this to be set, even if it
+				 * appears to be meaningless
+				 */
+				shadowprop.flags = KGSL_FLAGS_INITIALIZED |
+					KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS;
+			}
+			if (copy_to_user(param->value, &shadowprop,
+				sizeof(shadowprop))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	default:
+		status = device->ftbl->getproperty(device, param);
+	}
+
+	return status;
+}
+
+int adreno_setproperty_compat(struct kgsl_device_private *dev_priv,
+				unsigned int type,
+				void __user *value,
+				unsigned int sizebytes)
+{
+	int status = -EINVAL;
+	struct kgsl_device *device = dev_priv->device;
+
+	switch (type) {
+	case KGSL_PROP_PWR_CONSTRAINT:
+	case KGSL_PROP_L3_PWR_CONSTRAINT: {
+			struct kgsl_device_constraint_compat constraint32;
+			struct kgsl_device_constraint constraint;
+			struct kgsl_context *context;
+
+			if (sizebytes != sizeof(constraint32))
+				break;
+
+			if (copy_from_user(&constraint32, value,
+				sizeof(constraint32))) {
+				status = -EFAULT;
+				break;
+			}
+
+			/* Populate the real constraint type from the compat */
+			constraint.type = constraint32.type;
+			constraint.context_id = constraint32.context_id;
+			constraint.data = compat_ptr(constraint32.data);
+			constraint.size = (size_t)constraint32.size;
+
+			context = kgsl_context_get_owner(dev_priv,
+							constraint.context_id);
+			if (context == NULL)
+				break;
+			status = adreno_set_constraint(device, context,
+								&constraint);
+			kgsl_context_put(context);
+		}
+		break;
+	default:
+		/*
+		 * Call adreno_setproperty in case the property type was
+		 * KGSL_PROP_PWRCTRL
+		 */
+		status = device->ftbl->setproperty(dev_priv, type, value,
+						sizebytes);
+	}
+
+	return status;
+}
+
+static long adreno_ioctl_perfcounter_query_compat(
+		struct kgsl_device_private *dev_priv, unsigned int cmd,
+		void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_query_compat *query32 = data;
+	struct kgsl_perfcounter_query query;
+	long result;
+
+	query.groupid = query32->groupid;
+	query.countables = compat_ptr(query32->countables);
+	query.count = query32->count;
+	query.max_counters = query32->max_counters;
+
+	result = adreno_perfcounter_query_group(adreno_dev,
+		query.groupid, query.countables,
+		query.count, &query.max_counters);
+	query32->max_counters = query.max_counters;
+
+	return result;
+}
+
+static long adreno_ioctl_perfcounter_read_compat(
+		struct kgsl_device_private *dev_priv, unsigned int cmd,
+		void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_read_compat *read32 = data;
+	struct kgsl_perfcounter_read read;
+
+	/*
+	 * When performance counter zapping is enabled, the counters are cleared
+	 * across context switches. Reading the counters when they are zapped is
+	 * not permitted.
+	 */
+	if (!adreno_dev->perfcounter)
+		return -EPERM;
+
+	read.reads = (struct kgsl_perfcounter_read_group __user *)
+		(uintptr_t)read32->reads;
+	read.count = read32->count;
+
+	return adreno_perfcounter_read_group(adreno_dev, read.reads,
+		read.count);
+}
+
+static struct kgsl_ioctl adreno_compat_ioctl_funcs[] = {
+	{ IOCTL_KGSL_PERFCOUNTER_GET, adreno_ioctl_perfcounter_get },
+	{ IOCTL_KGSL_PERFCOUNTER_PUT, adreno_ioctl_perfcounter_put },
+	{ IOCTL_KGSL_PERFCOUNTER_QUERY_COMPAT,
+		adreno_ioctl_perfcounter_query_compat },
+	{ IOCTL_KGSL_PERFCOUNTER_READ_COMPAT,
+		adreno_ioctl_perfcounter_read_compat },
+};
+
+long adreno_compat_ioctl(struct kgsl_device_private *dev_priv,
+			      unsigned int cmd, unsigned long arg)
+{
+	return adreno_ioctl_helper(dev_priv, cmd, arg,
+		adreno_compat_ioctl_funcs,
+		ARRAY_SIZE(adreno_compat_ioctl_funcs));
+}

+ 46 - 0
qcom/opensource/graphics-kernel/adreno_compat.h

@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013-2015, 2017, 2019 The Linux Foundation. All rights reserved.
+ */
+#ifndef __ADRENO_COMPAT_H
+#define __ADRENO_COMPAT_H
+
+#ifdef CONFIG_COMPAT
+
+struct kgsl_device;
+struct kgsl_device_private;
+
+int adreno_getproperty_compat(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param);
+
+int adreno_setproperty_compat(struct kgsl_device_private *dev_priv,
+				unsigned int type,
+				void __user *value,
+				unsigned int sizebytes);
+
+long adreno_compat_ioctl(struct kgsl_device_private *dev_priv,
+			unsigned int cmd, unsigned long arg);
+
+#else
+
+static inline int adreno_getproperty_compat(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	return -EINVAL;
+}
+
+static inline int adreno_setproperty_compat(struct kgsl_device_private
+				*dev_priv, unsigned int type,
+				void __user *value, unsigned int sizebytes)
+{
+	return -EINVAL;
+}
+
+static inline long adreno_compat_ioctl(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+#endif /* CONFIG_COMPAT */
+#endif /* __ADRENO_COMPAT_H */

+ 333 - 0
qcom/opensource/graphics-kernel/adreno_coresight.c

@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2013-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/of.h>
+#include <linux/of_platform.h>
+
+#include "adreno.h"
+
+#define TO_ADRENO_CORESIGHT_ATTR(_attr) \
+	container_of(_attr, struct adreno_coresight_attr, attr)
+
+ssize_t adreno_coresight_show_register(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct adreno_coresight_device *adreno_csdev = dev_get_drvdata(dev);
+	struct adreno_coresight_attr *cattr = TO_ADRENO_CORESIGHT_ATTR(attr);
+	struct kgsl_device *device = adreno_csdev->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int val = 0;
+
+	mutex_lock(&device->mutex);
+	/*
+	 * Return the current value of the register if coresight is enabled,
+	 * otherwise report 0
+	 */
+
+	if (!adreno_csdev->enabled)
+		goto out;
+
+	if (!adreno_active_count_get(adreno_dev)) {
+		kgsl_regread(device, cattr->reg->offset, &cattr->reg->value);
+		adreno_active_count_put(adreno_dev);
+	}
+	val = cattr->reg->value;
+
+out:
+	mutex_unlock(&device->mutex);
+	return scnprintf(buf, PAGE_SIZE, "0x%X\n", val);
+}
+
+ssize_t adreno_coresight_store_register(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+
+	struct adreno_coresight_attr *cattr = TO_ADRENO_CORESIGHT_ATTR(attr);
+	struct adreno_coresight_device *adreno_csdev = dev_get_drvdata(dev);
+	struct kgsl_device *device = adreno_csdev->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+
+	/* Ignore writes while coresight is off */
+	if (!adreno_csdev->enabled)
+		goto out;
+
+	cattr->reg->value = val;
+	if (!adreno_active_count_get(adreno_dev)) {
+		kgsl_regwrite(device, cattr->reg->offset, cattr->reg->value);
+		adreno_active_count_put(adreno_dev);
+	}
+
+out:
+	mutex_unlock(&device->mutex);
+	return size;
+}
+
+/*
+ * This is a generic function to disable coresight debug bus on Adreno
+ * devices. This function in turn calls the device specific function
+ * through the gpudev hook.
+ */
+static void adreno_coresight_disable(struct coresight_device *csdev,
+					struct perf_event *event)
+{
+	struct adreno_coresight_device *adreno_csdev = dev_get_drvdata(&csdev->dev);
+	struct kgsl_device *device = adreno_csdev->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_coresight *coresight = adreno_csdev->coresight;
+	int i;
+
+	mutex_lock(&device->mutex);
+
+	if (!adreno_csdev->enabled) {
+		mutex_unlock(&device->mutex);
+		return;
+	}
+
+	if (!adreno_active_count_get(adreno_dev)) {
+		for (i = 0; i < coresight->count; i++)
+			kgsl_regwrite(device,
+				coresight->registers[i].offset, 0);
+		adreno_active_count_put(adreno_dev);
+	}
+
+	adreno_csdev->enabled = false;
+
+	mutex_unlock(&device->mutex);
+}
+
+static void _adreno_coresight_get_and_clear(struct adreno_device *adreno_dev,
+		struct adreno_coresight_device *adreno_csdev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_coresight *coresight = adreno_csdev->coresight;
+	int i;
+
+	if (IS_ERR_OR_NULL(adreno_csdev->dev) || !adreno_csdev->enabled)
+		return;
+
+	kgsl_pre_hwaccess(device);
+	/*
+	 * Save the current value of each coresight register
+	 * and then clear each register
+	 */
+	for (i = 0; i < coresight->count; i++) {
+		kgsl_regread(device, coresight->registers[i].offset,
+			&coresight->registers[i].value);
+		kgsl_regwrite(device, coresight->registers[i].offset, 0);
+	}
+}
+
+static void _adreno_coresight_set(struct adreno_device *adreno_dev,
+		struct adreno_coresight_device *adreno_csdev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_coresight *coresight = adreno_csdev->coresight;
+	int i;
+
+	if (IS_ERR_OR_NULL(adreno_csdev->dev) || !adreno_csdev->enabled)
+		return;
+
+	for (i = 0; i < coresight->count; i++)
+		kgsl_regwrite(device, coresight->registers[i].offset,
+			coresight->registers[i].value);
+}
+
+/* Generic function to enable coresight debug bus on adreno devices */
+static int adreno_coresight_enable(struct coresight_device *csdev,
+				struct perf_event *event, u32 mode)
+{
+	struct adreno_coresight_device *adreno_csdev = dev_get_drvdata(&csdev->dev);
+	const struct adreno_coresight *coresight = adreno_csdev->coresight;
+	struct kgsl_device *device = adreno_csdev->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret = 0;
+
+	mutex_lock(&device->mutex);
+	if (!adreno_csdev->enabled) {
+		int i;
+
+		adreno_csdev->enabled = true;
+
+		/* Reset all the debug registers to their default values */
+		for (i = 0; i < coresight->count; i++)
+			coresight->registers[i].value =
+				coresight->registers[i].initial;
+
+		ret = adreno_active_count_get(adreno_dev);
+		if (!ret) {
+			_adreno_coresight_set(adreno_dev, adreno_csdev);
+			adreno_active_count_put(adreno_dev);
+		}
+
+	}
+	mutex_unlock(&device->mutex);
+	return ret;
+}
+
+void adreno_coresight_stop(struct adreno_device *adreno_dev)
+{
+	_adreno_coresight_get_and_clear(adreno_dev, &adreno_dev->gx_coresight);
+	_adreno_coresight_get_and_clear(adreno_dev, &adreno_dev->cx_coresight);
+}
+
+void adreno_coresight_start(struct adreno_device *adreno_dev)
+{
+	_adreno_coresight_set(adreno_dev, &adreno_dev->gx_coresight);
+	_adreno_coresight_set(adreno_dev, &adreno_dev->cx_coresight);
+}
+
+static int adreno_coresight_trace_id(struct coresight_device *csdev)
+{
+	struct adreno_coresight_device *adreno_csdev = dev_get_drvdata(&csdev->dev);
+
+	return adreno_csdev->atid;
+}
+
+static const struct coresight_ops_source adreno_coresight_source_ops = {
+	.trace_id = adreno_coresight_trace_id,
+	.enable = adreno_coresight_enable,
+	.disable = adreno_coresight_disable,
+};
+
+static const struct coresight_ops adreno_coresight_ops = {
+	.source_ops = &adreno_coresight_source_ops,
+};
+
+void adreno_coresight_remove(struct adreno_device *adreno_dev)
+{
+	if (!IS_ERR_OR_NULL(adreno_dev->gx_coresight.dev))
+		coresight_unregister(adreno_dev->gx_coresight.dev);
+
+	if (!IS_ERR_OR_NULL(adreno_dev->cx_coresight.dev))
+		coresight_unregister(adreno_dev->cx_coresight.dev);
+}
+
+static int funnel_gfx_enable(struct coresight_device *csdev, int inport,
+			 int outport)
+{
+	struct kgsl_device *device = kgsl_get_device(0);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret;
+
+	if (!device)
+		return -ENODEV;
+
+	mutex_lock(&device->mutex);
+
+	ret = adreno_active_count_get(adreno_dev);
+	if (ret)
+		goto err;
+
+	/* Now that GPU is up, Call into coresight driver to enable funnel */
+	ret = adreno_dev->funnel_gfx.funnel_ops->link_ops->enable(csdev, inport, outport);
+
+	adreno_active_count_put(adreno_dev);
+err:
+	mutex_unlock(&device->mutex);
+	return ret;
+}
+
+static void funnel_gfx_disable(struct coresight_device *csdev, int inport,
+			   int outport)
+{
+	struct kgsl_device *device = kgsl_get_device(0);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret;
+
+	if (!device)
+		return;
+
+	mutex_lock(&device->mutex);
+
+	ret = adreno_active_count_get(adreno_dev);
+	if (ret)
+		goto err;
+
+	/* Now that GPU is up, Call into coresight driver to disable funnel */
+	adreno_dev->funnel_gfx.funnel_ops->link_ops->disable(csdev, inport, outport);
+
+	adreno_active_count_put(adreno_dev);
+err:
+	mutex_unlock(&device->mutex);
+}
+
+struct coresight_ops_link funnel_link_gfx_ops = {
+	.enable = funnel_gfx_enable,
+	.disable = funnel_gfx_disable,
+};
+
+struct coresight_ops funnel_gfx_ops = {
+	.link_ops = &funnel_link_gfx_ops,
+};
+
+static void adreno_coresight_dev_probe(struct kgsl_device *device,
+		const struct adreno_coresight *coresight,
+		struct adreno_coresight_device *adreno_csdev,
+		struct device_node *node)
+{
+	struct platform_device *pdev = of_find_device_by_node(node);
+	struct coresight_desc desc;
+	u32 atid;
+
+	if (!pdev)
+		return;
+
+	if (of_property_read_u32(node, "coresight-atid", &atid))
+		return;
+
+	if (of_property_read_string(node, "coresight-name", &desc.name))
+		return;
+
+	desc.pdata = coresight_get_platform_data(&pdev->dev);
+	platform_device_put(pdev);
+
+	if (IS_ERR(desc.pdata))
+		return;
+
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE;
+	desc.ops = &adreno_coresight_ops;
+	desc.dev = &device->pdev->dev;
+	desc.groups = coresight->groups;
+
+	adreno_csdev->atid = atid;
+	adreno_csdev->device = device;
+	adreno_csdev->dev = coresight_register(&desc);
+
+	adreno_csdev->coresight = coresight;
+
+	if (!IS_ERR_OR_NULL(adreno_csdev->dev))
+		dev_set_drvdata(&adreno_csdev->dev->dev, adreno_csdev);
+}
+
+void adreno_coresight_add_device(struct adreno_device *adreno_dev, const char *name,
+		const struct adreno_coresight *coresight,
+		struct adreno_coresight_device *adreno_csdev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct device_node *node = of_find_compatible_node(device->pdev->dev.of_node, NULL, name);
+	struct adreno_funnel_device *funnel_gfx = &adreno_dev->funnel_gfx;
+
+	if (!node)
+		return;
+
+	/* Set the funnel ops as graphics ops to bring GPU up before enabling funnel */
+	if ((funnel_gfx != NULL) && (funnel_gfx->funnel_csdev != NULL)
+						&& (funnel_gfx->funnel_csdev->ops == NULL))
+		funnel_gfx->funnel_csdev->ops = &funnel_gfx_ops;
+
+	adreno_coresight_dev_probe(device, coresight, adreno_csdev, node);
+
+	of_node_put(node);
+}

+ 164 - 0
qcom/opensource/graphics-kernel/adreno_coresight.h

@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2019, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_CORESIGHT_H_
+#define _ADRENO_CORESIGHT_H_
+
+#include <linux/device.h>
+#include <linux/coresight.h>
+
+struct adreno_device;
+
+/**
+ * struct adreno_coresight_register - Definition for a coresight (tracebus)
+ * debug register
+ */
+struct adreno_coresight_register {
+	/** @offset: Offset of the debug register in the KGSL register space */
+	unsigned int offset;
+	/** @initial: Default value to write when coresight is enabled */
+	unsigned int initial;
+	/**
+	 * @value: Current shadow value of the register (to be reprogrammed
+	 * after power collapse)
+	 */
+	unsigned int value;
+};
+
+/**
+ * struct adreno_coresight_attr - Local attribute struct for coresight sysfs
+ *
+ * files
+ */
+struct adreno_coresight_attr {
+	/** @attr: Base device attribute */
+	struct device_attribute attr;
+	/**
+	 * @reg: Pointer to the &struct adreno_coresight_register definition
+	 * for this register
+	 */
+	struct adreno_coresight_register *reg;
+};
+
+/**
+ * adreno_coresight_show_register - Callback function for sysfs show
+ * @device: Pointer to a device handle
+ * @attr: Pointer to the device attribute
+ * @buf: Contains the output buffer for sysfs
+ *
+ * Callback function to write the value of the register into the sysfs node.
+ * Return: The size of the data written to the buffer or negative on error.
+ */
+ssize_t adreno_coresight_show_register(struct device *device,
+		struct device_attribute *attr, char *buf);
+
+/**
+ * adreno_coresight_show_register - Callback function for sysfs store
+ * @device: Pointer to a device handle
+ * @attr: Pointer to the device attribute
+ * @buf: Contains the input buffer for sysfs
+ * @size: Size of the data stored in buf
+ *
+ * Callback function to read the value of a register from a sysfs node.
+ * Return: The size of the data consumed or negative on error.
+ */
+ssize_t adreno_coresight_store_register(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size);
+
+#define ADRENO_CORESIGHT_ATTR(_attrname, _reg) \
+	struct adreno_coresight_attr coresight_attr_##_attrname  = { \
+		__ATTR(_attrname, 0644, \
+		adreno_coresight_show_register, \
+		adreno_coresight_store_register), \
+		(_reg), }
+
+/**
+ * struct adreno_coresight - GPU specific coresight definition
+ */
+struct adreno_coresight {
+	/**
+	 * @registers: Array of GPU specific registers to configure trace
+	 * bus output
+	 */
+	struct adreno_coresight_register *registers;
+	/** @count: Number of registers in the array */
+	unsigned int count;
+	/** @groups: Pointer to an attribute list of control files */
+	const struct attribute_group **groups;
+};
+
+/**
+ * struct adreno_coresight_device - Container for a coresight instance
+ */
+struct adreno_coresight_device {
+	/** @dev: Pointer to the corsight device */
+	struct coresight_device *dev;
+	/** @coresight: Point to the GPU specific coresight definition */
+	const struct adreno_coresight *coresight;
+	/** @device: Pointer to a GPU device handle */
+	struct kgsl_device *device;
+	/** @enabled: True if the coresight instance is enabled */
+	bool enabled;
+	/** @atid: The unique ATID value of the coresight device */
+	unsigned int atid;
+};
+
+/**
+ * struct adreno_funnel_device - Container for a coresight gfx funnel
+ */
+struct adreno_funnel_device {
+	/** @funnel_dev: Pointer to the gfx funnel device */
+	struct device *funnel_dev;
+	/** @funnel_csdev: Point to the gfx funnel coresight definition */
+	struct coresight_device *funnel_csdev;
+	/** @funnel_ops: Function pointers to enable/disable the coresight funnel */
+	const struct coresight_ops *funnel_ops;
+};
+
+#ifdef CONFIG_QCOM_KGSL_CORESIGHT
+
+void adreno_coresight_add_device(struct adreno_device *adreno_dev,
+		const char *name,
+		const struct adreno_coresight *coresight,
+		struct adreno_coresight_device *adreno_csdev);
+
+/**
+ * adreno_coresight_start - Reprogram coresight registers after power collapse
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Reprogram the cached values to the coresight registers on power up.
+ */
+void adreno_coresight_start(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_coresight_stop - Reprogram coresight registers after power collapse
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Cache the current coresight register values so they can be restored after
+ * power collapse.
+ */
+void adreno_coresight_stop(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_coresight_remove - Destroy active coresight devices
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Destroy any active coresight devices.
+ */
+void adreno_coresight_remove(struct adreno_device *adreno_dev);
+#else
+static inline void adreno_coresight_add_device(struct kgsl_device *device,
+		const char *name,
+		const struct adreno_coresight *coresight,
+		struct adreno_coresight_device *adreno_csdev)
+{
+}
+
+static inline void adreno_coresight_start(struct adreno_device *adreno_dev) { }
+static inline void adreno_coresight_stop(struct adreno_device *adreno_dev) { }
+static inline void adreno_coresight_remove(struct adreno_device *adreno_dev) { }
+#endif
+#endif

+ 1047 - 0
qcom/opensource/graphics-kernel/adreno_cp_parser.c

@@ -0,0 +1,1047 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/slab.h>
+
+#include "adreno.h"
+#include "adreno_cp_parser.h"
+#include "adreno_pm4types.h"
+#include "adreno_snapshot.h"
+
+#define MAX_IB_OBJS 1000
+#define NUM_SET_DRAW_GROUPS 32
+
+struct set_draw_state {
+	uint64_t cmd_stream_addr;
+	uint64_t cmd_stream_dwords;
+};
+
+/* List of variables used when parsing an IB */
+struct ib_parser_variables {
+	/* List of registers containing addresses and their sizes */
+	unsigned int cp_addr_regs[ADRENO_CP_ADDR_MAX];
+	/* 32 groups of command streams in set draw state packets */
+	struct set_draw_state set_draw_groups[NUM_SET_DRAW_GROUPS];
+};
+
+/*
+ * Used for locating shader objects. This array holds the unit size of shader
+ * objects based on type and block of shader. The type can be 0 or 1 hence there
+ * are 2 columns and block can be 0-7 hence 7 rows.
+ */
+static int load_state_unit_sizes[7][2] = {
+	{ 2, 4 },
+	{ 0, 1 },
+	{ 2, 4 },
+	{ 0, 1 },
+	{ 8, 2 },
+	{ 8, 2 },
+	{ 8, 2 },
+};
+
+static int adreno_ib_find_objs(struct kgsl_device *device,
+				struct kgsl_process_private *process,
+				uint64_t gpuaddr, uint64_t dwords,
+				uint64_t ib2base, int obj_type,
+				struct adreno_ib_object_list *ib_obj_list,
+				int ib_level);
+
+static int ib_parse_set_draw_state(struct kgsl_device *device,
+	unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars);
+
+static int ib_parse_type7_set_draw_state(struct kgsl_device *device,
+	unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list);
+
+/*
+ * adreno_ib_merge_range() - Increases the address range tracked by an ib
+ * object
+ * @ib_obj: The ib object
+ * @gpuaddr: The start address which is to be merged
+ * @size: Size of the merging address
+ */
+static void adreno_ib_merge_range(struct adreno_ib_object *ib_obj,
+		uint64_t gpuaddr, uint64_t size)
+{
+	uint64_t addr_end1 = ib_obj->gpuaddr + ib_obj->size;
+	uint64_t addr_end2 = gpuaddr + size;
+
+	if (gpuaddr < ib_obj->gpuaddr)
+		ib_obj->gpuaddr = gpuaddr;
+	if (addr_end2 > addr_end1)
+		ib_obj->size = addr_end2 - ib_obj->gpuaddr;
+	else
+		ib_obj->size = addr_end1 - ib_obj->gpuaddr;
+}
+
+/*
+ * adreno_ib_check_overlap() - Checks if an address range overlap
+ * @gpuaddr: The start address range to check for overlap
+ * @size: Size of the address range
+ * @type: The type of address range
+ * @ib_obj_list: The list of address ranges to check for overlap
+ *
+ * Checks if an address range overlaps with a list of address ranges
+ * Returns the entry from list which overlaps else NULL
+ */
+static struct adreno_ib_object *adreno_ib_check_overlap(uint64_t gpuaddr,
+		uint64_t size, int type,
+		struct adreno_ib_object_list *ib_obj_list)
+{
+	struct adreno_ib_object *ib_obj;
+	int i;
+
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		ib_obj = &(ib_obj_list->obj_list[i]);
+		if ((type == ib_obj->snapshot_obj_type) &&
+			kgsl_addr_range_overlap(ib_obj->gpuaddr, ib_obj->size,
+			gpuaddr, size))
+			/* regions overlap */
+			return ib_obj;
+	}
+	return NULL;
+}
+
+/*
+ * adreno_ib_add() - Add a gpuaddress range to list
+ * @process: Process in which the gpuaddress is mapped
+ * @type: The type of address range
+ * @ib_obj_list: List of the address ranges in which the given range is to be
+ * added
+ *
+ * Add a gpuaddress range as an ib object to a given list after checking if it
+ * overlaps with another entry on the list. If it conflicts then change the
+ * existing entry to incorporate this range
+ *
+ * Returns 0 on success else error code
+ */
+static int adreno_ib_add(struct kgsl_process_private *process,
+				uint64_t gpuaddr, int type,
+				struct adreno_ib_object_list *ib_obj_list)
+{
+	uint64_t size;
+	struct adreno_ib_object *ib_obj;
+	struct kgsl_mem_entry *entry;
+
+	if (ib_obj_list->num_objs >= MAX_IB_OBJS)
+		return -E2BIG;
+
+	entry = kgsl_sharedmem_find(process, gpuaddr);
+	if (!entry)
+		/*
+		 * Do not fail if gpuaddr not found, we can continue
+		 * to search for other objects even if few objects are
+		 * not found
+		 */
+		return 0;
+
+	size = entry->memdesc.size;
+	gpuaddr = entry->memdesc.gpuaddr;
+
+	ib_obj = adreno_ib_check_overlap(gpuaddr, size, type, ib_obj_list);
+	if (ib_obj) {
+		adreno_ib_merge_range(ib_obj, gpuaddr, size);
+		kgsl_mem_entry_put(entry);
+	} else {
+		adreno_ib_init_ib_obj(gpuaddr, size, type, entry,
+			&(ib_obj_list->obj_list[ib_obj_list->num_objs]));
+		ib_obj_list->num_objs++;
+		/* Skip reclaim for the memdesc until it is dumped */
+		entry->memdesc.priv |= KGSL_MEMDESC_SKIP_RECLAIM;
+	}
+	return 0;
+}
+
+/*
+ * ib_save_mip_addresses() - Find mip addresses
+ * @pkt: Pointer to the packet in IB
+ * @process: The process in which IB is mapped
+ * @ib_obj_list: List in which any objects found are added
+ *
+ * Returns 0 on success else error code
+ */
+static int ib_save_mip_addresses(unsigned int *pkt,
+		struct kgsl_process_private *process,
+		struct adreno_ib_object_list *ib_obj_list)
+{
+	int ret = 0;
+	int num_levels = (pkt[1] >> 22) & 0x03FF;
+	int i;
+	unsigned int *hostptr;
+	struct kgsl_mem_entry *ent;
+	unsigned int block, type;
+	int unitsize = 0;
+
+	block = (pkt[1] >> 19) & 0x07;
+	type = pkt[2] & 0x03;
+
+	if (type == 0)
+		unitsize = load_state_unit_sizes[block][0];
+	else
+		unitsize = load_state_unit_sizes[block][1];
+
+	if (3 == block && 1 == type) {
+		uint64_t gpuaddr = pkt[2] & 0xFFFFFFFC;
+		uint64_t size = (num_levels * unitsize) << 2;
+
+		ent = kgsl_sharedmem_find(process, gpuaddr);
+		if (ent == NULL)
+			return 0;
+
+		if (!kgsl_gpuaddr_in_memdesc(&ent->memdesc,
+			gpuaddr, size)) {
+			kgsl_mem_entry_put(ent);
+			return 0;
+		}
+
+		hostptr = kgsl_gpuaddr_to_vaddr(&ent->memdesc, gpuaddr);
+		if (hostptr != NULL) {
+			for (i = 0; i < num_levels; i++) {
+				ret = adreno_ib_add(process, hostptr[i],
+					SNAPSHOT_GPU_OBJECT_GENERIC,
+					ib_obj_list);
+				if (ret)
+					break;
+			}
+		}
+
+		kgsl_memdesc_unmap(&ent->memdesc);
+		kgsl_mem_entry_put(ent);
+	}
+	return ret;
+}
+
+/*
+ * ib_parse_load_state() - Parse load state packet
+ * @pkt: Pointer to the packet in IB
+ * @process: The pagetable in which the IB is mapped
+ * @ib_obj_list: List in which any objects found are added
+ * @ib_parse_vars: VAriable list that store temporary addressses
+ *
+ * Parse load state packet found in an IB and add any memory object found to
+ * a list
+ * Returns 0 on success else error code
+ */
+static int ib_parse_load_state(unsigned int *pkt,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int ret = 0;
+	int i;
+
+	/*
+	 * The object here is to find indirect shaders i.e - shaders loaded from
+	 * GPU memory instead of directly in the command.  These should be added
+	 * to the list of memory objects to dump. So look at the load state
+	 * if the block is indirect (source = 4). If so then add the memory
+	 * address to the list.  The size of the object differs depending on the
+	 * type per the load_state_unit_sizes array above.
+	 */
+
+	if (type3_pkt_size(pkt[0]) < 2)
+		return 0;
+
+	/*
+	 * Anything from 3rd ordinal onwards of packet can be a memory object,
+	 * no need to be fancy about parsing it, just save it if it looks
+	 * like memory
+	 */
+	for (i = 0; i <= (type3_pkt_size(pkt[0]) - 2); i++) {
+		ret |= adreno_ib_add(process, pkt[2 + i] & 0xFFFFFFFC,
+				SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		if (ret)
+			break;
+	}
+	/* get the mip addresses */
+	if (!ret)
+		ret = ib_save_mip_addresses(pkt, process, ib_obj_list);
+	return ret;
+}
+
+/*
+ * This opcode sets the base addresses for the visibilty stream buffer and the
+ * visiblity stream size buffer.
+ */
+
+static int ib_parse_set_bin_data(unsigned int *pkt,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int ret = 0;
+
+	if (type3_pkt_size(pkt[0]) < 2)
+		return 0;
+
+	/* Visiblity stream buffer */
+	ret = adreno_ib_add(process, pkt[1],
+		SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+	if (ret)
+		return ret;
+
+	/* visiblity stream size buffer (fixed size 8 dwords) */
+	ret = adreno_ib_add(process, pkt[2],
+		SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+
+	return ret;
+}
+
+/*
+ * This opcode writes to GPU memory - if the buffer is written to, there is a
+ * good chance that it would be valuable to capture in the snapshot, so mark all
+ * buffers that are written to as frozen
+ */
+
+static int ib_parse_mem_write(unsigned int *pkt,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	if (type3_pkt_size(pkt[0]) < 1)
+		return 0;
+
+	/*
+	 * The address is where the data in the rest of this packet is written
+	 * to, but since that might be an offset into the larger buffer we need
+	 * to get the whole thing. Pass a size of 0 tocapture the entire buffer.
+	 */
+
+	return adreno_ib_add(process, pkt[1] & 0xFFFFFFFC,
+		SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+}
+
+/*
+ * ib_add_type0_entries() - Add memory objects to list
+ * @device: The device on which the IB will execute
+ * @process: The process in which IB is mapped
+ * @ib_obj_list: The list of gpu objects
+ * @ib_parse_vars: addresses ranges found in type0 packets
+ *
+ * Add memory objects to given list that are found in type0 packets
+ * Returns 0 on success else 0
+ */
+static int ib_add_type0_entries(struct kgsl_device *device,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int ret = 0;
+	int i;
+	int vfd_end;
+	unsigned int mask;
+	/* First up the visiblity stream buffer */
+	mask = 0xFFFFFFFF;
+	for (i = ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0;
+		i < ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7; i++) {
+		if (ib_parse_vars->cp_addr_regs[i]) {
+			ret = adreno_ib_add(process,
+				ib_parse_vars->cp_addr_regs[i] & mask,
+				SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+			if (ret)
+				return ret;
+			ib_parse_vars->cp_addr_regs[i] = 0;
+			ib_parse_vars->cp_addr_regs[i + 1] = 0;
+			i++;
+		}
+	}
+
+	vfd_end = ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15;
+	for (i = ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0;
+		i <= vfd_end; i++) {
+		if (ib_parse_vars->cp_addr_regs[i]) {
+			ret = adreno_ib_add(process,
+				ib_parse_vars->cp_addr_regs[i],
+				SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+			if (ret)
+				return ret;
+			ib_parse_vars->cp_addr_regs[i] = 0;
+		}
+	}
+
+	if (ib_parse_vars->cp_addr_regs[ADRENO_CP_ADDR_VSC_SIZE_ADDRESS]) {
+		ret = adreno_ib_add(process,
+			ib_parse_vars->cp_addr_regs[
+				ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] & mask,
+			SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+		if (ret)
+			return ret;
+		ib_parse_vars->cp_addr_regs[
+			ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] = 0;
+	}
+	mask = 0xFFFFFFE0;
+	for (i = ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR;
+		i <= ADRENO_CP_ADDR_SP_FS_OBJ_START_REG; i++) {
+		ret = adreno_ib_add(process,
+			ib_parse_vars->cp_addr_regs[i] & mask,
+			SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+		if (ret)
+			return ret;
+		ib_parse_vars->cp_addr_regs[i] = 0;
+	}
+	return ret;
+}
+/*
+ * The DRAW_INDX opcode sends a draw initator which starts a draw operation in
+ * the GPU, so this is the point where all the registers and buffers become
+ * "valid".  The DRAW_INDX may also have an index buffer pointer that should be
+ * frozen with the others
+ */
+
+static int ib_parse_draw_indx(struct kgsl_device *device, unsigned int *pkt,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int ret = 0;
+	int i;
+	int opcode = cp_type3_opcode(pkt[0]);
+
+	switch (opcode) {
+	case CP_DRAW_INDX:
+		if (type3_pkt_size(pkt[0]) > 3) {
+			ret = adreno_ib_add(process,
+				pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	case CP_DRAW_INDX_OFFSET:
+		if (type3_pkt_size(pkt[0]) == 6) {
+			ret = adreno_ib_add(process,
+				pkt[5], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	case CP_DRAW_INDIRECT:
+		if (type3_pkt_size(pkt[0]) == 2) {
+			ret = adreno_ib_add(process,
+				pkt[2], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	case CP_DRAW_INDX_INDIRECT:
+		if (type3_pkt_size(pkt[0]) == 4) {
+			ret = adreno_ib_add(process,
+				pkt[2], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+			if (ret)
+				break;
+			ret = adreno_ib_add(process,
+				pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	case CP_DRAW_AUTO:
+		if (type3_pkt_size(pkt[0]) == 6) {
+			ret = adreno_ib_add(process,
+				 pkt[3], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+			if (ret)
+				break;
+			ret = adreno_ib_add(process,
+				pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	}
+
+	if (ret)
+		return ret;
+	/*
+	 * All of the type0 writes are valid at a draw initiator, so freeze
+	 * the various buffers that we are tracking
+	 */
+	ret = ib_add_type0_entries(device, process, ib_obj_list,
+				ib_parse_vars);
+	if (ret)
+		return ret;
+	/* Process set draw state command streams if any */
+	for (i = 0; i < NUM_SET_DRAW_GROUPS; i++) {
+		if (!ib_parse_vars->set_draw_groups[i].cmd_stream_dwords)
+			continue;
+		ret = adreno_ib_find_objs(device, process,
+			ib_parse_vars->set_draw_groups[i].cmd_stream_addr,
+			ib_parse_vars->set_draw_groups[i].cmd_stream_dwords,
+			0, SNAPSHOT_GPU_OBJECT_DRAW,
+			ib_obj_list, 2);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+/*
+ * Parse all the type7 opcode packets that may contain important information,
+ * such as additional GPU buffers to grab or a draw initator
+ */
+
+static int ib_parse_type7(struct kgsl_device *device, unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int opcode = cp_type7_opcode(*ptr);
+
+	switch (opcode) {
+	case CP_SET_DRAW_STATE:
+		return ib_parse_type7_set_draw_state(device, ptr, process,
+					ib_obj_list);
+	}
+
+	return 0;
+}
+
+/*
+ * Parse all the type3 opcode packets that may contain important information,
+ * such as additional GPU buffers to grab or a draw initator
+ */
+
+static int ib_parse_type3(struct kgsl_device *device, unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int opcode = cp_type3_opcode(*ptr);
+
+	switch (opcode) {
+	case  CP_LOAD_STATE:
+		return ib_parse_load_state(ptr, process, ib_obj_list,
+					ib_parse_vars);
+	case CP_SET_BIN_DATA:
+		return ib_parse_set_bin_data(ptr, process, ib_obj_list,
+					ib_parse_vars);
+	case CP_MEM_WRITE:
+		return ib_parse_mem_write(ptr, process, ib_obj_list,
+					ib_parse_vars);
+	case CP_DRAW_INDX:
+	case CP_DRAW_INDX_OFFSET:
+	case CP_DRAW_INDIRECT:
+	case CP_DRAW_INDX_INDIRECT:
+		return ib_parse_draw_indx(device, ptr, process, ib_obj_list,
+					ib_parse_vars);
+	case CP_SET_DRAW_STATE:
+		return ib_parse_set_draw_state(device, ptr, process,
+					ib_obj_list, ib_parse_vars);
+	}
+
+	return 0;
+}
+
+/*
+ * Parse type0 packets found in the stream.  Some of the registers that are
+ * written are clues for GPU buffers that we need to freeze.  Register writes
+ * are considred valid when a draw initator is called, so just cache the values
+ * here and freeze them when a CP_DRAW_INDX is seen.  This protects against
+ * needlessly caching buffers that won't be used during a draw call
+ */
+
+static int ib_parse_type0(struct kgsl_device *device, unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int size = type0_pkt_size(*ptr);
+	int offset = type0_pkt_offset(*ptr);
+	int i;
+	int reg_index;
+	int ret = 0;
+
+	for (i = 0; i < size; i++, offset++) {
+		/* Visiblity stream buffer */
+		if (offset >= adreno_cp_parser_getreg(adreno_dev,
+				ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0) &&
+			offset <= adreno_cp_parser_getreg(adreno_dev,
+				ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7)) {
+			reg_index = adreno_cp_parser_regindex(
+					adreno_dev, offset,
+					ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0,
+					ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7);
+			if (reg_index >= 0)
+				ib_parse_vars->cp_addr_regs[reg_index] =
+								ptr[i + 1];
+			continue;
+		} else if ((offset >= adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0)) &&
+			(offset <= adreno_cp_parser_getreg(adreno_dev,
+				ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15))) {
+			reg_index = adreno_cp_parser_regindex(adreno_dev,
+					offset,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15);
+			if (reg_index >= 0)
+				ib_parse_vars->cp_addr_regs[reg_index] =
+								ptr[i + 1];
+			continue;
+		} else if ((offset >= adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16)) &&
+			(offset <= adreno_cp_parser_getreg(adreno_dev,
+				ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31))) {
+			reg_index = adreno_cp_parser_regindex(adreno_dev,
+					offset,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31);
+			if (reg_index >= 0)
+				ib_parse_vars->cp_addr_regs[reg_index] =
+								ptr[i + 1];
+			continue;
+		} else {
+			if (offset ==
+				adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_VSC_SIZE_ADDRESS))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] =
+						ptr[i + 1];
+			else if (offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR] =
+						ptr[i + 1];
+			else if (offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR] =
+						ptr[i + 1];
+			else if (offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_SP_VS_OBJ_START_REG))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_SP_VS_OBJ_START_REG] =
+						ptr[i + 1];
+			else if (offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_SP_FS_OBJ_START_REG))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_SP_FS_OBJ_START_REG] =
+						ptr[i + 1];
+			else if ((offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_UCHE_INVALIDATE0)) ||
+				(offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_UCHE_INVALIDATE1))) {
+				ret = adreno_ib_add(process,
+					ptr[i + 1] & 0xFFFFFFC0,
+					SNAPSHOT_GPU_OBJECT_GENERIC,
+					ib_obj_list);
+				if (ret)
+					break;
+			}
+		}
+	}
+	return ret;
+}
+
+static int ib_parse_type7_set_draw_state(struct kgsl_device *device,
+	unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list)
+{
+	int size = type7_pkt_size(*ptr);
+	int i;
+	int ret = 0;
+	int flags;
+	uint64_t cmd_stream_dwords;
+	uint64_t cmd_stream_addr;
+
+	/*
+	 * size is the size of the packet that does not include the DWORD
+	 * for the packet header, we only want to loop here through the
+	 * packet parameters from ptr[1] till ptr[size] where ptr[0] is the
+	 * packet header. In each loop we look at 3 DWORDS hence increment
+	 * loop counter by 3 always
+	 */
+	for (i = 1; i <= size; i += 3) {
+		/* take action based on flags */
+		flags = (ptr[i] & 0x000F0000) >> 16;
+
+		/*
+		 * dirty flag or no flags both mean we need to load it for
+		 * next draw. No flags is used when the group is activated
+		 * or initialized for the first time in the IB
+		 */
+		if (flags & 0x1 || !flags) {
+			cmd_stream_dwords = ptr[i] & 0x0000FFFF;
+			cmd_stream_addr = ptr[i + 2];
+			cmd_stream_addr = cmd_stream_addr << 32 | ptr[i + 1];
+			if (cmd_stream_dwords)
+				ret = adreno_ib_find_objs(device, process,
+					cmd_stream_addr, cmd_stream_dwords,
+					0, SNAPSHOT_GPU_OBJECT_DRAW,
+					ib_obj_list, 2);
+			if (ret)
+				break;
+			continue;
+		}
+		/* load immediate */
+		if (flags & 0x8) {
+			uint64_t gpuaddr = ptr[i + 2];
+
+			gpuaddr = gpuaddr << 32 | ptr[i + 1];
+			ret = adreno_ib_find_objs(device, process,
+				gpuaddr, (ptr[i] & 0x0000FFFF),
+				0, SNAPSHOT_GPU_OBJECT_IB,
+				ib_obj_list, 2);
+			if (ret)
+				break;
+		}
+	}
+	return ret;
+}
+
+static int ib_parse_set_draw_state(struct kgsl_device *device,
+	unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int size = type0_pkt_size(*ptr);
+	int i;
+	int grp_id;
+	int ret = 0;
+	int flags;
+	struct set_draw_state *group;
+
+	/*
+	 * size is the size of the packet that does not include the DWORD
+	 * for the packet header, we only want to loop here through the
+	 * packet parameters from ptr[1] till ptr[size] where ptr[0] is the
+	 * packet header. In each loop we look at 2 DWORDS hence increment
+	 * loop counter by 2 always
+	 */
+	for (i = 1; i <= size; i += 2) {
+		grp_id = (ptr[i] & 0x1F000000) >> 24;
+		/* take action based on flags */
+		flags = (ptr[i] & 0x000F0000) >> 16;
+		/* Disable all groups */
+		if (flags & 0x4) {
+			int j;
+
+			for (j = 0; j < NUM_SET_DRAW_GROUPS; j++) {
+				group = &(ib_parse_vars->set_draw_groups[j]);
+				group->cmd_stream_dwords = 0;
+			}
+			continue;
+		}
+		/* disable flag */
+		if (flags & 0x2) {
+			group = &(ib_parse_vars->set_draw_groups[grp_id]);
+			group->cmd_stream_dwords = 0;
+			continue;
+		}
+		/*
+		 * dirty flag or no flags both mean we need to load it for
+		 * next draw. No flags is used when the group is activated
+		 * or initialized for the first time in the IB
+		 */
+		if (flags & 0x1 || !flags) {
+			group = &(ib_parse_vars->set_draw_groups[grp_id]);
+			group->cmd_stream_dwords = ptr[i] & 0x0000FFFF;
+			group->cmd_stream_addr =  ptr[i + 1];
+			continue;
+		}
+		/* load immediate */
+		if (flags & 0x8) {
+			ret = adreno_ib_find_objs(device, process,
+				ptr[i + 1], (ptr[i] & 0x0000FFFF),
+				0, SNAPSHOT_GPU_OBJECT_IB,
+				ib_obj_list, 2);
+			if (ret)
+				break;
+		}
+	}
+	return ret;
+}
+
+/*
+ * adreno_cp_parse_ib2() - Wrapper function around IB2 parsing
+ * @device: Device pointer
+ * @process: Process in which the IB is allocated
+ * @gpuaddr: IB2 gpuaddr
+ * @dwords: IB2 size in dwords
+ * @ib2base: Base address of active IB2
+ * @ib_obj_list: List of objects found in IB
+ * @ib_level: The level from which function is called, either from IB1 or IB2
+ *
+ * Function does some checks to ensure that IB2 parsing is called from IB1
+ * and then calls the function to find objects in IB2.
+ */
+static int adreno_cp_parse_ib2(struct kgsl_device *device,
+			struct kgsl_process_private *process,
+			uint64_t gpuaddr, uint64_t dwords, uint64_t ib2base,
+			struct adreno_ib_object_list *ib_obj_list,
+			int ib_level)
+{
+	int i;
+
+	/*
+	 * We can only expect an IB2 in IB1, if we are
+	 * already processing an IB2 then return error
+	 */
+	if (ib_level == 2)
+		return -EINVAL;
+
+	/* Save current IB2 statically */
+	if (ib2base == gpuaddr)
+		kgsl_snapshot_push_object(device, process, gpuaddr, dwords);
+	/*
+	 * only try to find sub objects iff this IB has
+	 * not been processed already
+	 */
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		struct adreno_ib_object *ib_obj = &(ib_obj_list->obj_list[i]);
+
+		if ((ib_obj->snapshot_obj_type == SNAPSHOT_GPU_OBJECT_IB) &&
+			(gpuaddr >= ib_obj->gpuaddr) &&
+			(gpuaddr + dwords * sizeof(unsigned int) <=
+			ib_obj->gpuaddr + ib_obj->size))
+			return 0;
+	}
+
+	return adreno_ib_find_objs(device, process, gpuaddr, dwords, ib2base,
+		SNAPSHOT_GPU_OBJECT_IB, ib_obj_list, 2);
+}
+
+/*
+ * adreno_ib_find_objs() - Find all IB objects in a given IB
+ * @device: The device pointer on which the IB executes
+ * @process: The process in which the IB and all contained objects are mapped.
+ * @gpuaddr: The gpu address of the IB
+ * @ib2base: IB2 base address
+ * @dwords: Size of ib in dwords
+ * @obj_type: The object type can be either an IB or a draw state sequence
+ * @ib_obj_list: The list in which the IB and the objects in it are added.
+ * @ib_level: Indicates if IB1 or IB2 is being processed
+ *
+ * Finds all IB objects in a given IB and puts then in a list. Can be called
+ * recursively for the IB2's in the IB1's
+ * Returns 0 on success else error code
+ */
+static int adreno_ib_find_objs(struct kgsl_device *device,
+				struct kgsl_process_private *process,
+				uint64_t gpuaddr, uint64_t dwords,
+				uint64_t ib2base, int obj_type,
+				struct adreno_ib_object_list *ib_obj_list,
+				int ib_level)
+{
+	int ret = 0;
+	uint64_t rem = dwords;
+	int i;
+	struct ib_parser_variables ib_parse_vars;
+	unsigned int *src;
+	struct adreno_ib_object *ib_obj;
+	struct kgsl_mem_entry *entry;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* check that this IB is not already on list */
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		ib_obj = &(ib_obj_list->obj_list[i]);
+		if ((obj_type == ib_obj->snapshot_obj_type) &&
+			(ib_obj->gpuaddr <= gpuaddr) &&
+			((ib_obj->gpuaddr + ib_obj->size) >=
+			(gpuaddr + (dwords << 2))))
+			return 0;
+	}
+
+	entry = kgsl_sharedmem_find(process, gpuaddr);
+	if (!entry)
+		return -EINVAL;
+
+	if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, (dwords << 2))) {
+		kgsl_mem_entry_put(entry);
+		return -EINVAL;
+	}
+
+	src = kgsl_gpuaddr_to_vaddr(&entry->memdesc, gpuaddr);
+	if (!src) {
+		kgsl_mem_entry_put(entry);
+		return -EINVAL;
+	}
+
+	memset(&ib_parse_vars, 0, sizeof(struct ib_parser_variables));
+
+	ret = adreno_ib_add(process, gpuaddr, obj_type, ib_obj_list);
+	if (ret)
+		goto done;
+
+	for (i = 0; rem > 0; rem--, i++) {
+		int pktsize;
+
+		if (pkt_is_type0(src[i]))
+			pktsize = type0_pkt_size(src[i]);
+
+		else if (pkt_is_type3(src[i]))
+			pktsize = type3_pkt_size(src[i]);
+
+		else if (pkt_is_type4(src[i]))
+			pktsize = type4_pkt_size(src[i]);
+
+		else if (pkt_is_type7(src[i]))
+			pktsize = type7_pkt_size(src[i]);
+
+		/*
+		 * If the packet isn't a type 1, type 3, type 4 or type 7 then
+		 * don't bother parsing it - it is likely corrupted
+		 */
+		else
+			break;
+
+		if (((pkt_is_type0(src[i]) || pkt_is_type3(src[i])) && !pktsize)
+			|| ((pktsize + 1) > rem))
+			break;
+
+		if (pkt_is_type3(src[i])) {
+			if (adreno_cmd_is_ib(adreno_dev, src[i])) {
+				uint64_t gpuaddrib2 = src[i + 1];
+				uint64_t size = src[i + 2];
+
+				ret = adreno_cp_parse_ib2(device, process,
+						gpuaddrib2, size, ib2base,
+						ib_obj_list, ib_level);
+				if (ret)
+					goto done;
+			} else {
+				ret = ib_parse_type3(device, &src[i], process,
+						ib_obj_list,
+						&ib_parse_vars);
+				/*
+				 * If the parse function failed (probably
+				 * because of a bad decode) then bail out and
+				 * just capture the binary IB data
+				 */
+
+				if (ret)
+					goto done;
+			}
+		}
+
+		else if (pkt_is_type7(src[i])) {
+			if (adreno_cmd_is_ib(adreno_dev, src[i])) {
+				uint64_t size = src[i + 3];
+				uint64_t gpuaddrib2 = src[i + 2];
+
+				gpuaddrib2 = gpuaddrib2 << 32 | src[i + 1];
+
+				ret = adreno_cp_parse_ib2(device, process,
+						gpuaddrib2, size, ib2base,
+						ib_obj_list, ib_level);
+				if (ret)
+					goto done;
+			} else {
+				ret = ib_parse_type7(device, &src[i], process,
+						ib_obj_list,
+						&ib_parse_vars);
+				/*
+				 * If the parse function failed (probably
+				 * because of a bad decode) then bail out and
+				 * just capture the binary IB data
+				 */
+
+				if (ret)
+					goto done;
+			}
+		}
+
+		else if (pkt_is_type0(src[i])) {
+			ret = ib_parse_type0(device, &src[i], process,
+					ib_obj_list, &ib_parse_vars);
+			if (ret)
+				goto done;
+		}
+
+		i += pktsize;
+		rem -= pktsize;
+	}
+
+done:
+	/*
+	 * For set draw objects there may not be a draw_indx packet at its end
+	 * to signal that we need to save the found objects in it, so just save
+	 * it here.
+	 */
+	if (!ret && SNAPSHOT_GPU_OBJECT_DRAW == obj_type)
+		ret = ib_add_type0_entries(device, process, ib_obj_list,
+			&ib_parse_vars);
+
+	kgsl_memdesc_unmap(&entry->memdesc);
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+
+
+/*
+ * adreno_ib_create_object_list() - Find all the memory objects in IB
+ * @device: The device pointer on which the IB executes
+ * @process: The process in which the IB and all contained objects are mapped
+ * @gpuaddr: The gpu address of the IB
+ * @dwords: Size of ib in dwords
+ * @ib2base: Base address of active IB2
+ * @ib_obj_list: The list in which the IB and the objects in it are added.
+ *
+ * Find all the memory objects that an IB needs for execution and place
+ * them in a list including the IB.
+ * Returns the ib object list. On success 0 is returned, on failure error
+ * code is returned along with number of objects that was saved before
+ * error occurred. If no objects found then the list pointer is set to
+ * NULL.
+ */
+int adreno_ib_create_object_list(struct kgsl_device *device,
+		struct kgsl_process_private *process,
+		uint64_t gpuaddr, uint64_t dwords, uint64_t ib2base,
+		struct adreno_ib_object_list **out_ib_obj_list)
+{
+	int ret = 0;
+	struct adreno_ib_object_list *ib_obj_list;
+
+	if (!out_ib_obj_list)
+		return -EINVAL;
+
+	*out_ib_obj_list = NULL;
+
+	ib_obj_list = kzalloc(sizeof(*ib_obj_list), GFP_KERNEL);
+	if (!ib_obj_list)
+		return -ENOMEM;
+
+	ib_obj_list->obj_list = vmalloc(MAX_IB_OBJS *
+					sizeof(struct adreno_ib_object));
+
+	if (!ib_obj_list->obj_list) {
+		kfree(ib_obj_list);
+		return -ENOMEM;
+	}
+
+	ret = adreno_ib_find_objs(device, process, gpuaddr, dwords, ib2base,
+		SNAPSHOT_GPU_OBJECT_IB, ib_obj_list, 1);
+
+	/* Even if there was an error return the remaining objects found */
+	if (ib_obj_list->num_objs)
+		*out_ib_obj_list = ib_obj_list;
+
+	return ret;
+}
+
+/*
+ * adreno_ib_destroy_obj_list() - Destroy an ib object list
+ * @ib_obj_list: List to destroy
+ *
+ * Free up all resources used by an ib_obj_list
+ */
+void adreno_ib_destroy_obj_list(struct adreno_ib_object_list *ib_obj_list)
+{
+	int i;
+
+	if (!ib_obj_list)
+		return;
+
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		if (ib_obj_list->obj_list[i].entry)
+			kgsl_mem_entry_put(ib_obj_list->obj_list[i].entry);
+	}
+	vfree(ib_obj_list->obj_list);
+	kfree(ib_obj_list);
+}

+ 175 - 0
qcom/opensource/graphics-kernel/adreno_cp_parser.h

@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013-2014, 2017, 2019, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef __ADRENO_IB_PARSER__
+#define __ADRENO_IB_PARSER__
+
+#include "adreno.h"
+
+extern const unsigned int a3xx_cp_addr_regs[];
+extern const unsigned int a4xx_cp_addr_regs[];
+
+/*
+ * struct adreno_ib_object - Structure containing information about an
+ * address range found in an IB
+ * @gpuaddr: The starting gpuaddress of the range
+ * @size: Size of the range
+ * @snapshot_obj_type - Type of range used in snapshot
+ * @entry: The memory entry in which this range is found
+ */
+struct adreno_ib_object {
+	uint64_t gpuaddr;
+	uint64_t size;
+	int snapshot_obj_type;
+	struct kgsl_mem_entry *entry;
+};
+
+/*
+ * struct adreno_ib_object_list - List of address ranges found in IB
+ * @obj_list: The address range list
+ * @num_objs: Number of objects in list
+ */
+struct adreno_ib_object_list {
+	struct adreno_ib_object *obj_list;
+	int num_objs;
+};
+
+/*
+ * adreno registers used during IB parsing, there contain addresses
+ * and sizes of the addresses that present in an IB
+ */
+enum adreno_cp_addr_regs {
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0 = 0,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_17,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_18,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_19,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_20,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_21,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_22,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_23,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_24,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_25,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_26,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_27,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_28,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_29,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_30,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31,
+	ADRENO_CP_ADDR_VSC_SIZE_ADDRESS,
+	ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR,
+	ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR,
+	ADRENO_CP_ADDR_SP_VS_OBJ_START_REG,
+	ADRENO_CP_ADDR_SP_FS_OBJ_START_REG,
+	ADRENO_CP_UCHE_INVALIDATE0,
+	ADRENO_CP_UCHE_INVALIDATE1,
+	ADRENO_CP_ADDR_MAX,
+};
+
+/*
+ * adreno_ib_init_ib_obj() - Create an ib object structure and initialize it
+ * with gpuaddress and size
+ * @gpuaddr: gpuaddr with which to initialize the object with
+ * @size: Size in bytes with which the object is initialized
+ * @ib_type: The IB type used by snapshot
+ *
+ * Returns the object pointer on success else error code in the pointer
+ */
+static inline void adreno_ib_init_ib_obj(uint64_t gpuaddr,
+			uint64_t size, int obj_type,
+			struct kgsl_mem_entry *entry,
+			struct adreno_ib_object *ib_obj)
+{
+	ib_obj->gpuaddr = gpuaddr;
+	ib_obj->size = size;
+	ib_obj->snapshot_obj_type = obj_type;
+	ib_obj->entry = entry;
+}
+
+/*
+ * adreno_cp_parser_getreg() - Returns the value of register offset
+ * @adreno_dev: The adreno device being operated upon
+ * @reg_enum: Enum index of the register whose offset is returned
+ */
+static inline int adreno_cp_parser_getreg(struct adreno_device *adreno_dev,
+					enum adreno_cp_addr_regs reg_enum)
+{
+	if (reg_enum == ADRENO_CP_ADDR_MAX)
+		return -EEXIST;
+
+	if (!adreno_is_a3xx(adreno_dev))
+		return -EEXIST;
+	return a3xx_cp_addr_regs[reg_enum];
+}
+
+/*
+ * adreno_cp_parser_regindex() - Returns enum index for a given register offset
+ * @adreno_dev: The adreno device being operated upon
+ * @offset: Register offset
+ * @start: The start index to search from
+ * @end: The last index to search
+ *
+ * Checks the list of registers defined for the device and returns the index
+ * whose offset value matches offset parameter.
+ */
+static inline int adreno_cp_parser_regindex(struct adreno_device *adreno_dev,
+				unsigned int offset,
+				enum adreno_cp_addr_regs start,
+				enum adreno_cp_addr_regs end)
+{
+	int i;
+	const unsigned int *regs;
+
+	if (!adreno_is_a3xx(adreno_dev))
+		return -EEXIST;
+
+	regs = a3xx_cp_addr_regs;
+
+	for (i = start; i <= end && i < ADRENO_CP_ADDR_MAX; i++)
+		if (regs[i] == offset)
+			return i;
+	return -EEXIST;
+}
+
+int adreno_ib_create_object_list(
+		struct kgsl_device *device,
+		struct kgsl_process_private *process,
+		uint64_t gpuaddr, uint64_t dwords, uint64_t ib2base,
+		struct adreno_ib_object_list **out_ib_obj_list);
+
+void adreno_ib_destroy_obj_list(struct adreno_ib_object_list *ib_obj_list);
+
+#endif

+ 680 - 0
qcom/opensource/graphics-kernel/adreno_debugfs.c

@@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2002,2008-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/debugfs.h>
+
+#include "adreno.h"
+extern struct dentry *kgsl_debugfs_dir;
+
+static void set_isdb(struct adreno_device *adreno_dev, void *priv)
+{
+	set_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+}
+
+static int _isdb_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* Once ISDB goes enabled it stays enabled */
+	if (test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv))
+		return 0;
+
+	/*
+	 * Bring down the GPU so we can bring it back up with the correct power
+	 * and clock settings
+	 */
+	return  adreno_power_cycle(adreno_dev, set_isdb, NULL);
+}
+
+static int _isdb_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	*val = (u64) test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(_isdb_fops, _isdb_get, _isdb_set, "%llu\n");
+
+static int _ctxt_record_size_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+
+	device->snapshot_ctxt_record_size = val;
+
+	return 0;
+}
+
+static int _ctxt_record_size_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+
+	*val = device->snapshot_ctxt_record_size;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(_ctxt_record_size_fops, _ctxt_record_size_get,
+		_ctxt_record_size_set, "%llu\n");
+
+static int _lm_limit_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM))
+		return 0;
+
+	/* assure value is between 3A and 10A */
+	if (val > 10000)
+		val = 10000;
+	else if (val < 3000)
+		val = 3000;
+
+	if (adreno_dev->lm_enabled)
+		return adreno_power_cycle_u32(adreno_dev,
+			&adreno_dev->lm_limit, val);
+
+	return 0;
+}
+
+static int _lm_limit_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM))
+		*val = 0;
+
+	*val = (u64) adreno_dev->lm_limit;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(_lm_limit_fops, _lm_limit_get,
+		_lm_limit_set, "%llu\n");
+
+static int _lm_threshold_count_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM))
+		*val = 0;
+	else
+		*val = (u64) adreno_dev->lm_threshold_cross;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(_lm_threshold_fops, _lm_threshold_count_get,
+	NULL, "%llu\n");
+
+static int _active_count_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	unsigned int i = atomic_read(&device->active_cnt);
+
+	*val = (u64) i;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(_active_count_fops, _active_count_get, NULL, "%llu\n");
+
+static int _coop_reset_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_COOP_RESET))
+		adreno_dev->cooperative_reset = val ? true : false;
+	return 0;
+}
+
+static int _coop_reset_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	*val = (u64) adreno_dev->cooperative_reset;
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(_coop_reset_fops, _coop_reset_get,
+				_coop_reset_set, "%llu\n");
+
+static void set_gpu_client_pf(struct adreno_device *adreno_dev, void *priv)
+{
+	adreno_dev->uche_client_pf = *((u32 *)priv);
+	adreno_dev->patch_reglist = false;
+}
+
+static int _gpu_client_pf_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+
+	return adreno_power_cycle(ADRENO_DEVICE(device), set_gpu_client_pf, &val);
+}
+
+static int _gpu_client_pf_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	*val = (u64) adreno_dev->uche_client_pf;
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(_gpu_client_pf_fops, _gpu_client_pf_get,
+				_gpu_client_pf_set, "%llu\n");
+
+typedef void (*reg_read_init_t)(struct kgsl_device *device);
+typedef void (*reg_read_fill_t)(struct kgsl_device *device, int i,
+	unsigned int *vals, int linec);
+
+
+static void sync_event_print(struct seq_file *s,
+		struct kgsl_drawobj_sync_event *sync_event)
+{
+	switch (sync_event->type) {
+	case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: {
+		seq_printf(s, "sync: ctx: %u ts: %u",
+				sync_event->context->id, sync_event->timestamp);
+		break;
+	}
+	case KGSL_CMD_SYNCPOINT_TYPE_FENCE: {
+		int i;
+		struct event_fence_info *info = sync_event->priv;
+
+		for (i = 0; info && i < info->num_fences; i++)
+			seq_printf(s, "sync: %s",
+				info->fences[i].name);
+		break;
+	}
+	case KGSL_CMD_SYNCPOINT_TYPE_TIMELINE: {
+		int j;
+		struct event_timeline_info *info = sync_event->priv;
+
+		for (j = 0; info && info[j].timeline; j++)
+			seq_printf(s, "timeline: %d seqno: %lld",
+				info[j].timeline, info[j].seqno);
+		break;
+	}
+	default:
+		seq_printf(s, "sync: type: %d", sync_event->type);
+		break;
+	}
+}
+
+struct flag_entry {
+	unsigned long mask;
+	const char *str;
+};
+
+static void _print_flags(struct seq_file *s, const struct flag_entry *table,
+			unsigned long flags)
+{
+	int i;
+	int first = 1;
+
+	for (i = 0; table[i].str; i++) {
+		if (flags & table[i].mask) {
+			seq_printf(s, "%c%s", first ? '\0' : '|', table[i].str);
+			flags &= ~(table[i].mask);
+			first = 0;
+		}
+	}
+	if (flags) {
+		seq_printf(s, "%c0x%lx", first ? '\0' : '|', flags);
+		first = 0;
+	}
+	if (first)
+		seq_puts(s, "None");
+}
+
+#define print_flags(_s, _flag, _array...)		\
+	({						\
+		const struct flag_entry symbols[] =   \
+			{ _array, { -1, NULL } };	\
+		_print_flags(_s, symbols, _flag);	\
+	 })
+
+static void syncobj_print(struct seq_file *s,
+			struct kgsl_drawobj_sync *syncobj)
+{
+	struct kgsl_drawobj_sync_event *event;
+	unsigned int i;
+
+	seq_puts(s, " syncobj ");
+
+	for (i = 0; i < syncobj->numsyncs; i++) {
+		event = &syncobj->synclist[i];
+
+		if (!kgsl_drawobj_event_pending(syncobj, i))
+			continue;
+
+		sync_event_print(s, event);
+		seq_puts(s, "\n");
+	}
+}
+
+static void cmdobj_print(struct seq_file *s,
+			struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+	if (drawobj->type == CMDOBJ_TYPE)
+		seq_puts(s, " cmdobj ");
+	else
+		seq_puts(s, " markerobj ");
+
+	seq_printf(s, "\t %u ", drawobj->timestamp);
+
+	seq_puts(s, " priv: ");
+	print_flags(s, cmdobj->priv,
+		{ BIT(CMDOBJ_SKIP), "skip"},
+		{ BIT(CMDOBJ_FORCE_PREAMBLE), "force_preamble"},
+		{ BIT(CMDOBJ_WFI), "wait_for_idle" });
+}
+
+static void drawobj_print(struct seq_file *s,
+			struct kgsl_drawobj *drawobj)
+{
+	if (!kref_get_unless_zero(&drawobj->refcount))
+		return;
+
+	if (drawobj->type == SYNCOBJ_TYPE)
+		syncobj_print(s, SYNCOBJ(drawobj));
+	else if ((drawobj->type == CMDOBJ_TYPE) ||
+			(drawobj->type == MARKEROBJ_TYPE))
+		cmdobj_print(s, CMDOBJ(drawobj));
+
+	seq_puts(s, " flags: ");
+	print_flags(s, drawobj->flags, KGSL_DRAWOBJ_FLAGS);
+	kgsl_drawobj_put(drawobj);
+	seq_puts(s, "\n");
+}
+
+static int ctx_print(struct seq_file *s, void *unused)
+{
+	struct adreno_context *drawctxt = s->private;
+	unsigned int i;
+	struct kgsl_event *event;
+	unsigned int queued = 0, consumed = 0, retired = 0;
+
+	seq_printf(s, "id: %u type: %s priority: %d process: %s (%d) tid: %d\n",
+		   drawctxt->base.id,
+		   kgsl_context_type(drawctxt->type),
+		   drawctxt->base.priority,
+		   drawctxt->base.proc_priv->comm,
+		   pid_nr(drawctxt->base.proc_priv->pid),
+		   drawctxt->base.tid);
+
+	seq_puts(s, "flags: ");
+	print_flags(s, drawctxt->base.flags & ~(KGSL_CONTEXT_PRIORITY_MASK
+		| KGSL_CONTEXT_TYPE_MASK), KGSL_CONTEXT_FLAGS);
+	seq_puts(s, " priv: ");
+	print_flags(s, drawctxt->base.priv,
+		{ BIT(KGSL_CONTEXT_PRIV_SUBMITTED), "submitted"},
+		{ BIT(KGSL_CONTEXT_PRIV_DETACHED), "detached"},
+		{ BIT(KGSL_CONTEXT_PRIV_INVALID), "invalid"},
+		{ BIT(KGSL_CONTEXT_PRIV_PAGEFAULT), "pagefault"},
+		{ BIT(ADRENO_CONTEXT_FAULT), "fault"},
+		{ BIT(ADRENO_CONTEXT_GPU_HANG), "gpu_hang"},
+		{ BIT(ADRENO_CONTEXT_GPU_HANG_FT), "gpu_hang_ft"},
+		{ BIT(ADRENO_CONTEXT_SKIP_EOF), "skip_end_of_frame" },
+		{ BIT(ADRENO_CONTEXT_FORCE_PREAMBLE), "force_preamble"});
+	seq_puts(s, "\n");
+
+	seq_puts(s, "timestamps: ");
+	kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base,
+				KGSL_TIMESTAMP_QUEUED, &queued);
+	kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base,
+				KGSL_TIMESTAMP_CONSUMED, &consumed);
+	kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base,
+				KGSL_TIMESTAMP_RETIRED, &retired);
+	seq_printf(s, "queued: %u consumed: %u retired: %u global:%u\n",
+		   queued, consumed, retired,
+		   drawctxt->internal_timestamp);
+
+	seq_puts(s, "drawqueue:\n");
+
+	spin_lock(&drawctxt->lock);
+	for (i = drawctxt->drawqueue_head;
+		i != drawctxt->drawqueue_tail;
+		i = DRAWQUEUE_NEXT(i, ADRENO_CONTEXT_DRAWQUEUE_SIZE))
+		drawobj_print(s, drawctxt->drawqueue[i]);
+	spin_unlock(&drawctxt->lock);
+
+	seq_puts(s, "events:\n");
+	spin_lock(&drawctxt->base.events.lock);
+	list_for_each_entry(event, &drawctxt->base.events.events, node)
+		seq_printf(s, "\t%d: %pS created: %u\n", event->timestamp,
+				event->func, event->created);
+	spin_unlock(&drawctxt->base.events.lock);
+
+	return 0;
+}
+
+static int ctx_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	struct adreno_context *ctx = inode->i_private;
+
+	if (!_kgsl_context_get(&ctx->base))
+		return -ENODEV;
+
+	ret = single_open(file, ctx_print, &ctx->base);
+	if (ret)
+		kgsl_context_put(&ctx->base);
+	return ret;
+}
+
+static int ctx_release(struct inode *inode, struct file *file)
+{
+	struct kgsl_context *context;
+
+	context = ((struct seq_file *)file->private_data)->private;
+
+	kgsl_context_put(context);
+
+	return single_release(inode, file);
+}
+
+static const struct file_operations ctx_fops = {
+	.open = ctx_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = ctx_release,
+};
+
+
+void
+adreno_context_debugfs_init(struct adreno_device *adreno_dev,
+			    struct adreno_context *ctx)
+{
+	unsigned char name[16];
+
+	/*
+	 * Get the context here to make sure it still exists for the life of the
+	 * file
+	 */
+	_kgsl_context_get(&ctx->base);
+
+	snprintf(name, sizeof(name), "%d", ctx->base.id);
+
+	ctx->debug_root = debugfs_create_file(name, 0444,
+				adreno_dev->ctx_d_debugfs, ctx, &ctx_fops);
+}
+
+static int _bcl_sid0_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device);
+
+	if (ops && ops->bcl_sid_set)
+		return ops->bcl_sid_set(device, 0, val);
+
+	return 0;
+}
+
+static int _bcl_sid0_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device);
+
+	if (ops && ops->bcl_sid_get)
+		*val = ops->bcl_sid_get(device, 0);
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(_sid0_fops, _bcl_sid0_get, _bcl_sid0_set, "%llu\n");
+
+static int _bcl_sid1_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device);
+
+	if (ops && ops->bcl_sid_set)
+		return ops->bcl_sid_set(device, 1, val);
+
+	return 0;
+}
+
+static int _bcl_sid1_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device);
+
+	if (ops && ops->bcl_sid_get)
+		*val = ops->bcl_sid_get(device, 1);
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(_sid1_fops, _bcl_sid1_get, _bcl_sid1_set, "%llu\n");
+
+static int _bcl_sid2_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device);
+
+	if (ops && ops->bcl_sid_set)
+		return ops->bcl_sid_set(device, 2, val);
+
+	return 0;
+}
+
+static int _bcl_sid2_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device);
+
+	if (ops && ops->bcl_sid_get)
+		*val = ops->bcl_sid_get(device, 2);
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(_sid2_fops, _bcl_sid2_get, _bcl_sid2_set, "%llu\n");
+
+static int _bcl_throttle_time_us_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_BCL))
+		*val = 0;
+	else
+		*val = (u64) adreno_dev->bcl_throttle_time_us;
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(_bcl_throttle_fops, _bcl_throttle_time_us_get, NULL, "%llu\n");
+
+static int _skipsaverestore_store(void *data, u64 val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	if (adreno_dev->hwsched_enabled)
+		return adreno_power_cycle_bool(adreno_dev,
+			&adreno_dev->preempt.skipsaverestore, val);
+
+	adreno_dev->preempt.skipsaverestore = val ? true : false;
+	return 0;
+
+}
+
+static int _skipsaverestore_show(void *data, u64 *val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	*val = (u64) adreno_dev->preempt.skipsaverestore;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(skipsaverestore_fops, _skipsaverestore_show, _skipsaverestore_store,
+	"%llu\n");
+
+static int _usesgmem_store(void *data, u64 val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	if (adreno_dev->hwsched_enabled)
+		return adreno_power_cycle_bool(adreno_dev,
+			&adreno_dev->preempt.usesgmem, val);
+
+	adreno_dev->preempt.usesgmem = val ? true : false;
+	return 0;
+
+}
+
+static int _usesgmem_show(void *data, u64 *val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	*val = (u64) adreno_dev->preempt.usesgmem;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(usesgmem_fops, _usesgmem_show, _usesgmem_store, "%llu\n");
+
+static int _preempt_level_store(void *data, u64 val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	if (adreno_dev->hwsched_enabled)
+		return adreno_power_cycle_u32(adreno_dev,
+			&adreno_dev->preempt.preempt_level,
+			min_t(u64, val, 2));
+
+	adreno_dev->preempt.preempt_level = min_t(u64, val, 2);
+	return 0;
+
+}
+
+static int _preempt_level_show(void *data, u64 *val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	*val = (u64) adreno_dev->preempt.preempt_level;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(preempt_level_fops, _preempt_level_show, _preempt_level_store, "%llu\n");
+
+static int _warmboot_show(void *data, u64 *val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	*val = (u64)adreno_dev->warmboot_enabled;
+	return 0;
+}
+
+/*
+ * When warmboot feature is enabled from debugfs, the first slumber exit will be a cold boot
+ * and all hfi messages will be recorded, so that warmboot can happen on subsequent slumber
+ * exit. When warmboot feature is disabled from debugfs, every slumber exit will be a coldboot.
+ */
+static int _warmboot_store(void *data, u64 val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	if (adreno_dev->warmboot_enabled == val)
+		return 0;
+
+	return adreno_power_cycle_bool(adreno_dev, &adreno_dev->warmboot_enabled, val);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(warmboot_fops, _warmboot_show, _warmboot_store, "%llu\n");
+
+static int _ifpc_hyst_store(void *data, u64 val)
+{
+	struct adreno_device *adreno_dev = data;
+	u32 hyst;
+
+	if (!gmu_core_dev_ifpc_isenabled(KGSL_DEVICE(adreno_dev)))
+		return -EINVAL;
+
+	/* IFPC hysteresis timer is 16 bits */
+	hyst = max_t(u32, (u32) (FIELD_GET(GENMASK(15, 0), val)),
+		     adreno_dev->ifpc_hyst_floor);
+
+	if (hyst == adreno_dev->ifpc_hyst)
+		return 0;
+
+	return adreno_power_cycle_u32(adreno_dev,
+			&adreno_dev->ifpc_hyst, hyst);
+}
+
+static int _ifpc_hyst_show(void *data, u64 *val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	*val = (u64) adreno_dev->ifpc_hyst;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ifpc_hyst_fops, _ifpc_hyst_show, _ifpc_hyst_store, "%llu\n");
+
+void adreno_debugfs_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct dentry *snapshot_dir;
+
+	if (IS_ERR_OR_NULL(device->d_debugfs))
+		return;
+
+	debugfs_create_file("active_cnt", 0444, device->d_debugfs, device,
+			    &_active_count_fops);
+	adreno_dev->ctx_d_debugfs = debugfs_create_dir("ctx",
+							device->d_debugfs);
+	snapshot_dir = debugfs_lookup("snapshot", kgsl_debugfs_dir);
+
+	if (!IS_ERR_OR_NULL(snapshot_dir))
+		debugfs_create_file("coop_reset", 0644, snapshot_dir, device,
+					&_coop_reset_fops);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LM)) {
+		debugfs_create_file("lm_limit", 0644, device->d_debugfs, device,
+			&_lm_limit_fops);
+		debugfs_create_file("lm_threshold_count", 0444,
+			device->d_debugfs, device, &_lm_threshold_fops);
+	}
+
+	if (adreno_is_a5xx(adreno_dev))
+		debugfs_create_file("isdb", 0644, device->d_debugfs,
+			device, &_isdb_fops);
+
+	if (gmu_core_isenabled(device))
+		debugfs_create_file("ifpc_hyst", 0644, device->d_debugfs,
+			device, &ifpc_hyst_fops);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_GMU_WARMBOOT))
+		debugfs_create_file("warmboot", 0644, device->d_debugfs,
+			device, &warmboot_fops);
+
+	debugfs_create_file("ctxt_record_size", 0644, snapshot_dir,
+		device, &_ctxt_record_size_fops);
+	debugfs_create_file("gpu_client_pf", 0644, snapshot_dir,
+		device, &_gpu_client_pf_fops);
+	debugfs_create_bool("dump_all_ibs", 0644, snapshot_dir,
+		&device->dump_all_ibs);
+
+	adreno_dev->bcl_debugfs_dir = debugfs_create_dir("bcl", device->d_debugfs);
+	if (!IS_ERR_OR_NULL(adreno_dev->bcl_debugfs_dir)) {
+		debugfs_create_file("sid0", 0644, adreno_dev->bcl_debugfs_dir, device, &_sid0_fops);
+		debugfs_create_file("sid1", 0644, adreno_dev->bcl_debugfs_dir, device, &_sid1_fops);
+		debugfs_create_file("sid2", 0644, adreno_dev->bcl_debugfs_dir, device, &_sid2_fops);
+		debugfs_create_file("bcl_throttle_time_us", 0444, adreno_dev->bcl_debugfs_dir,
+						device, &_bcl_throttle_fops);
+	}
+
+	adreno_dev->preemption_debugfs_dir = debugfs_create_dir("preemption", device->d_debugfs);
+	if (!IS_ERR_OR_NULL(adreno_dev->preemption_debugfs_dir)) {
+		debugfs_create_file("preempt_level", 0644, adreno_dev->preemption_debugfs_dir,
+			device, &preempt_level_fops);
+		debugfs_create_file("usesgmem", 0644, adreno_dev->preemption_debugfs_dir, device,
+			&usesgmem_fops);
+		debugfs_create_file("skipsaverestore", 0644, adreno_dev->preemption_debugfs_dir,
+			device, &skipsaverestore_fops);
+	}
+}

+ 2884 - 0
qcom/opensource/graphics-kernel/adreno_dispatch.c

@@ -0,0 +1,2884 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <soc/qcom/msm_performance.h>
+#include "adreno.h"
+#include "adreno_sysfs.h"
+#include "adreno_trace.h"
+#include "kgsl_bus.h"
+#include "kgsl_eventlog.h"
+#include "kgsl_gmu_core.h"
+#include "kgsl_timeline.h"
+
+#define DRAWQUEUE_NEXT(_i, _s) (((_i) + 1) % (_s))
+
+/* Number of commands that can be queued in a context before it sleeps */
+static unsigned int _context_drawqueue_size = 50;
+
+/* Number of milliseconds to wait for the context queue to clear */
+static unsigned int _context_queue_wait = 10000;
+
+/* Number of drawobjs sent at a time from a single context */
+static unsigned int _context_drawobj_burst = 5;
+
+/*
+ * GFT throttle parameters. If GFT recovered more than
+ * X times in Y ms invalidate the context and do not attempt recovery.
+ * X -> _fault_throttle_burst
+ * Y -> _fault_throttle_time
+ */
+static unsigned int _fault_throttle_time = 3000;
+static unsigned int _fault_throttle_burst = 3;
+
+/*
+ * Maximum ringbuffer inflight for the single submitting context case - this
+ * should be sufficiently high to keep the GPU loaded
+ */
+static unsigned int _dispatcher_q_inflight_hi = 15;
+
+/*
+ * Minimum inflight for the multiple context case - this should sufficiently low
+ * to allow for lower latency context switching
+ */
+static unsigned int _dispatcher_q_inflight_lo = 4;
+
+/* Command batch timeout (in milliseconds) */
+unsigned int adreno_drawobj_timeout = 2000;
+
+/* Interval for reading and comparing fault detection registers */
+static unsigned int _fault_timer_interval = 200;
+
+/* Use a kmem cache to speed up allocations for dispatcher jobs */
+static struct kmem_cache *jobs_cache;
+
+#define DRAWQUEUE_RB(_drawqueue) \
+	((struct adreno_ringbuffer *) \
+		container_of((_drawqueue),\
+		struct adreno_ringbuffer, dispatch_q))
+
+#define DRAWQUEUE(_ringbuffer) (&(_ringbuffer)->dispatch_q)
+
+static bool adreno_drawqueue_is_empty(struct adreno_dispatcher_drawqueue *drawqueue)
+{
+	return (drawqueue && drawqueue->head == drawqueue->tail);
+}
+
+static int adreno_dispatch_retire_drawqueue(struct adreno_device *adreno_dev,
+		struct adreno_dispatcher_drawqueue *drawqueue);
+
+static inline bool drawqueue_is_current(
+		struct adreno_dispatcher_drawqueue *drawqueue)
+{
+	struct adreno_ringbuffer *rb = DRAWQUEUE_RB(drawqueue);
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+
+	return (adreno_dev->cur_rb == rb);
+}
+
+/*
+ *  If only one context has queued in the last 100 milliseconds increase
+ *  inflight to a high number to load up the GPU. If multiple contexts
+ *  have queued drop the inflight for better context switch latency.
+ *  If no contexts have queued what are you even doing here?
+ */
+
+static inline int
+_drawqueue_inflight(struct adreno_dispatcher_drawqueue *drawqueue)
+{
+	return (drawqueue->active_context_count > 1)
+		? _dispatcher_q_inflight_lo : _dispatcher_q_inflight_hi;
+}
+
+static void fault_detect_read(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int i;
+
+	if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv))
+		return;
+
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		struct adreno_ringbuffer *rb = &(adreno_dev->ringbuffers[i]);
+
+		adreno_rb_readtimestamp(adreno_dev, rb,
+			KGSL_TIMESTAMP_RETIRED, &(rb->fault_detect_ts));
+	}
+
+	for (i = 0; i < adreno_dev->soft_ft_count; i++) {
+		if (adreno_dev->soft_ft_regs[i])
+			kgsl_regread(device, adreno_dev->soft_ft_regs[i],
+				&adreno_dev->soft_ft_vals[i]);
+	}
+}
+
+void adreno_dispatcher_start_fault_timer(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	if (adreno_soft_fault_detect(adreno_dev))
+		mod_timer(&dispatcher->fault_timer,
+			jiffies + msecs_to_jiffies(_fault_timer_interval));
+}
+
+/*
+ * This takes a kgsl_device pointer so that it can be used for the function
+ * hook in adreno.c too
+ */
+void adreno_dispatcher_stop_fault_timer(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_SOFT_FAULT_DETECT))
+		del_timer_sync(&dispatcher->fault_timer);
+}
+
+/**
+ * _retire_timestamp() - Retire object without sending it
+ * to the hardware
+ * @drawobj: Pointer to the object to retire
+ *
+ * In some cases ibs can be retired by the software
+ * without going to the GPU.  In those cases, update the
+ * memstore from the CPU, kick off the event engine to handle
+ * expired events and destroy the ib.
+ */
+static void _retire_timestamp(struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_context *context = drawobj->context;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct kgsl_device *device = context->device;
+	struct adreno_ringbuffer *rb = drawctxt->rb;
+	struct retire_info info = {0};
+
+	/*
+	 * Write the start and end timestamp to the memstore to keep the
+	 * accounting sane
+	 */
+	kgsl_sharedmem_writel(device->memstore,
+		KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+		drawobj->timestamp);
+
+	kgsl_sharedmem_writel(device->memstore,
+		KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+		drawobj->timestamp);
+
+	drawctxt->submitted_timestamp = drawobj->timestamp;
+
+	/* Retire pending GPU events for the object */
+	kgsl_process_event_group(device, &context->events);
+
+	info.inflight = -1;
+	info.rb_id = rb->id;
+	info.wptr = rb->wptr;
+	info.timestamp = drawobj->timestamp;
+
+	msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_RETIRED,
+				pid_nr(context->proc_priv->pid),
+				context->id, drawobj->timestamp,
+				!!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME));
+
+	if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME) {
+		atomic64_inc(&context->proc_priv->frame_count);
+		atomic_inc(&context->proc_priv->period->frames);
+	}
+
+	/*
+	 * For A3xx we still get the rptr from the CP_RB_RPTR instead of
+	 * rptr scratch out address. At this point GPU clocks turned off.
+	 * So avoid reading GPU register directly for A3xx.
+	 */
+	if (adreno_is_a3xx(ADRENO_DEVICE(device))) {
+		trace_adreno_cmdbatch_retired(context, &info,
+			drawobj->flags, rb->dispatch_q.inflight, 0);
+	} else {
+		info.rptr = adreno_get_rptr(rb);
+
+		trace_adreno_cmdbatch_retired(context, &info,
+			drawobj->flags, rb->dispatch_q.inflight, 0);
+	}
+
+	log_kgsl_cmdbatch_retired_event(context->id, drawobj->timestamp,
+		context->priority, drawobj->flags, 0, 0);
+
+	kgsl_drawobj_destroy(drawobj);
+}
+
+static int _check_context_queue(struct adreno_context *drawctxt, u32 count)
+{
+	int ret;
+
+	spin_lock(&drawctxt->lock);
+
+	/*
+	 * Wake up if there is room in the context or if the whole thing got
+	 * invalidated while we were asleep
+	 */
+
+	if (kgsl_context_invalid(&drawctxt->base))
+		ret = 1;
+	else
+		ret = ((drawctxt->queued + count) < _context_drawqueue_size) ? 1 : 0;
+
+	spin_unlock(&drawctxt->lock);
+
+	return ret;
+}
+
+/*
+ * return true if this is a marker command and the dependent timestamp has
+ * retired
+ */
+static bool _marker_expired(struct kgsl_drawobj_cmd *markerobj)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(markerobj);
+
+	return (drawobj->flags & KGSL_DRAWOBJ_MARKER) &&
+		kgsl_check_timestamp(drawobj->device, drawobj->context,
+			markerobj->marker_timestamp);
+}
+
+static inline void _pop_drawobj(struct adreno_context *drawctxt)
+{
+	drawctxt->drawqueue_head = DRAWQUEUE_NEXT(drawctxt->drawqueue_head,
+		ADRENO_CONTEXT_DRAWQUEUE_SIZE);
+	drawctxt->queued--;
+}
+
+static int dispatch_retire_markerobj(struct kgsl_drawobj *drawobj,
+				struct adreno_context *drawctxt)
+{
+	struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj);
+
+	if (_marker_expired(cmdobj)) {
+		_pop_drawobj(drawctxt);
+		_retire_timestamp(drawobj);
+		return 0;
+	}
+
+	/*
+	 * If the marker isn't expired but the SKIP bit
+	 * is set then there are real commands following
+	 * this one in the queue. This means that we
+	 * need to dispatch the command so that we can
+	 * keep the timestamp accounting correct. If
+	 * skip isn't set then we block this queue
+	 * until the dependent timestamp expires
+	 */
+	return test_bit(CMDOBJ_SKIP, &cmdobj->priv) ? 1 : -EAGAIN;
+}
+
+static int dispatch_retire_syncobj(struct kgsl_drawobj *drawobj,
+				struct adreno_context *drawctxt)
+{
+	struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj);
+
+	if (!kgsl_drawobj_events_pending(syncobj)) {
+		_pop_drawobj(drawctxt);
+		kgsl_drawobj_destroy(drawobj);
+		return 0;
+	}
+
+	/*
+	 * If we got here, there are pending events for sync object.
+	 * Start the canary timer if it hasnt been started already.
+	 */
+	if (!syncobj->timeout_jiffies) {
+		syncobj->timeout_jiffies = jiffies + msecs_to_jiffies(5000);
+			mod_timer(&syncobj->timer, syncobj->timeout_jiffies);
+	}
+
+	return -EAGAIN;
+}
+
+static int drawqueue_retire_timelineobj(struct kgsl_drawobj *drawobj,
+		struct adreno_context *drawctxt)
+{
+	_pop_drawobj(drawctxt);
+	kgsl_drawobj_destroy(drawobj);
+	return 0;
+}
+
+static int drawqueue_retire_bindobj(struct kgsl_drawobj *drawobj,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_drawobj_bind *bindobj = BINDOBJ(drawobj);
+
+	if (test_bit(KGSL_BINDOBJ_STATE_DONE, &bindobj->state)) {
+		_pop_drawobj(drawctxt);
+		_retire_timestamp(drawobj);
+		return 0;
+	}
+
+	if (!test_and_set_bit(KGSL_BINDOBJ_STATE_START, &bindobj->state)) {
+		/*
+		 * Take a referencre to the drawobj and the context because both
+		 * get referenced in the bind callback
+		 */
+		_kgsl_context_get(&drawctxt->base);
+		kref_get(&drawobj->refcount);
+
+		kgsl_sharedmem_bind_ranges(bindobj->bind);
+	}
+
+	return -EAGAIN;
+}
+
+/*
+ * Retires all expired marker and sync objs from the context
+ * queue and returns one of the below
+ * a) next drawobj that needs to be sent to ringbuffer
+ * b) -EAGAIN for syncobj with syncpoints pending.
+ * c) -EAGAIN for markerobj whose marker timestamp has not expired yet.
+ * c) NULL for no commands remaining in drawqueue.
+ */
+static struct kgsl_drawobj *_process_drawqueue_get_next_drawobj(
+				struct adreno_context *drawctxt)
+{
+	struct kgsl_drawobj *drawobj;
+	unsigned int i = drawctxt->drawqueue_head;
+
+	if (drawctxt->drawqueue_head == drawctxt->drawqueue_tail)
+		return NULL;
+
+	for (i = drawctxt->drawqueue_head; i != drawctxt->drawqueue_tail;
+			i = DRAWQUEUE_NEXT(i, ADRENO_CONTEXT_DRAWQUEUE_SIZE)) {
+		int ret = 0;
+
+		drawobj = drawctxt->drawqueue[i];
+		if (!drawobj)
+			return NULL;
+
+		switch (drawobj->type) {
+		case CMDOBJ_TYPE:
+			return drawobj;
+		case MARKEROBJ_TYPE:
+			ret = dispatch_retire_markerobj(drawobj, drawctxt);
+			/* Special case where marker needs to be sent to GPU */
+			if (ret == 1)
+				return drawobj;
+			break;
+		case SYNCOBJ_TYPE:
+			ret = dispatch_retire_syncobj(drawobj, drawctxt);
+			break;
+		case BINDOBJ_TYPE:
+			ret = drawqueue_retire_bindobj(drawobj, drawctxt);
+			break;
+		case TIMELINEOBJ_TYPE:
+			ret = drawqueue_retire_timelineobj(drawobj, drawctxt);
+			break;
+		default:
+			ret = -EINVAL;
+			break;
+		}
+
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
+	return NULL;
+}
+
+/**
+ * adreno_dispatcher_requeue_cmdobj() - Put a command back on the context
+ * queue
+ * @drawctxt: Pointer to the adreno draw context
+ * @cmdobj: Pointer to the KGSL command object to requeue
+ *
+ * Failure to submit a command to the ringbuffer isn't the fault of the command
+ * being submitted so if a failure happens, push it back on the head of the
+ * context queue to be reconsidered again unless the context got detached.
+ */
+static inline int adreno_dispatcher_requeue_cmdobj(
+		struct adreno_context *drawctxt,
+		struct kgsl_drawobj_cmd *cmdobj)
+{
+	unsigned int prev;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+	spin_lock(&drawctxt->lock);
+
+	if (kgsl_context_is_bad(&drawctxt->base)) {
+		spin_unlock(&drawctxt->lock);
+		/* get rid of this drawobj since the context is bad */
+		kgsl_drawobj_destroy(drawobj);
+		return -ENOENT;
+	}
+
+	prev = drawctxt->drawqueue_head == 0 ?
+		(ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1) :
+		(drawctxt->drawqueue_head - 1);
+
+	/*
+	 * The maximum queue size always needs to be one less then the size of
+	 * the ringbuffer queue so there is "room" to put the drawobj back in
+	 */
+
+	WARN_ON(prev == drawctxt->drawqueue_tail);
+
+	drawctxt->drawqueue[prev] = drawobj;
+	drawctxt->queued++;
+
+	/* Reset the command queue head to reflect the newly requeued change */
+	drawctxt->drawqueue_head = prev;
+	cmdobj->requeue_cnt++;
+	spin_unlock(&drawctxt->lock);
+	return 0;
+}
+
+/**
+ * dispatcher_queue_context() - Queue a context in the dispatcher pending list
+ * @dispatcher: Pointer to the adreno dispatcher struct
+ * @drawctxt: Pointer to the adreno draw context
+ *
+ * Add a context to the dispatcher pending list.
+ */
+static int dispatcher_queue_context(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct adreno_dispatch_job *job;
+
+	/* Refuse to queue a detached context */
+	if (kgsl_context_detached(&drawctxt->base))
+		return 0;
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return 0;
+
+	/* This function can be called in an atomic context */
+	job = kmem_cache_alloc(jobs_cache, GFP_ATOMIC);
+	if (!job) {
+		kgsl_context_put(&drawctxt->base);
+		return -ENOMEM;
+	}
+
+	job->drawctxt = drawctxt;
+
+	trace_dispatch_queue_context(drawctxt);
+	llist_add(&job->node, &dispatcher->jobs[drawctxt->base.priority]);
+
+	return 0;
+}
+
+/*
+ * Real time clients may demand high BW and have strict latency requirement.
+ * GPU bus DCVS is not fast enough to account for sudden BW requirements.
+ * Bus hint helps to bump up the bus vote (IB) upfront for known time-critical
+ * workloads.
+ */
+static void process_rt_bus_hint(struct kgsl_device *device, bool on)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_dispatcher_drawqueue *drawqueue =
+			DRAWQUEUE(&adreno_dev->ringbuffers[0]);
+
+	if (!adreno_is_preemption_enabled(adreno_dev) ||
+		!device->pwrctrl.rt_bus_hint)
+		return;
+
+	if (device->pwrctrl.rt_bus_hint_active == on)
+		return;
+
+	if (on && drawqueue->inflight == 1)
+		kgsl_bus_update(device, KGSL_BUS_VOTE_RT_HINT_ON);
+
+	if (!on && drawqueue->inflight == 0)
+		kgsl_bus_update(device, KGSL_BUS_VOTE_RT_HINT_OFF);
+}
+
+#define ADRENO_DRAWOBJ_PROFILE_COUNT \
+	(PAGE_SIZE / sizeof(struct adreno_drawobj_profile_entry))
+
+/**
+ * sendcmd() - Send a drawobj to the GPU hardware
+ * @dispatcher: Pointer to the adreno dispatcher struct
+ * @drawobj: Pointer to the KGSL drawobj being sent
+ *
+ * Send a KGSL drawobj to the GPU hardware
+ */
+static int sendcmd(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct kgsl_context *context = drawobj->context;
+	struct adreno_dispatcher_drawqueue *dispatch_q = &drawctxt->rb->dispatch_q;
+	struct adreno_submit_time time;
+	uint64_t secs = 0;
+	unsigned long nsecs = 0;
+	int ret;
+	struct submission_info info = {0};
+
+	mutex_lock(&device->mutex);
+	if (adreno_gpu_halt(adreno_dev) != 0) {
+		mutex_unlock(&device->mutex);
+		return -EBUSY;
+	}
+
+	memset(&time, 0x0, sizeof(time));
+
+	dispatcher->inflight++;
+	dispatch_q->inflight++;
+
+	if (dispatcher->inflight == 1 &&
+			!test_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv)) {
+		/* Time to make the donuts.  Turn on the GPU */
+		ret = adreno_active_count_get(adreno_dev);
+		if (ret) {
+			dispatcher->inflight--;
+			dispatch_q->inflight--;
+			mutex_unlock(&device->mutex);
+			return ret;
+		}
+
+		set_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+	}
+
+	if (test_bit(ADRENO_DEVICE_DRAWOBJ_PROFILE, &adreno_dev->priv)) {
+		set_bit(CMDOBJ_PROFILE, &cmdobj->priv);
+		cmdobj->profile_index = adreno_dev->profile_index;
+		adreno_dev->profile_index =
+			(adreno_dev->profile_index + 1) %
+			ADRENO_DRAWOBJ_PROFILE_COUNT;
+	}
+
+	process_rt_bus_hint(device, true);
+
+	ret = adreno_ringbuffer_submitcmd(adreno_dev, cmdobj, &time);
+
+	/*
+	 * On the first command, if the submission was successful, then read the
+	 * fault registers.  If it failed then turn off the GPU. Sad face.
+	 */
+
+	if (dispatcher->inflight == 1) {
+		if (ret == 0) {
+
+			/* Stop fault timer before reading fault registers */
+			adreno_dispatcher_stop_fault_timer(device);
+
+			fault_detect_read(adreno_dev);
+
+			/* Start the fault timer on first submission */
+			adreno_dispatcher_start_fault_timer(adreno_dev);
+
+			if (!test_and_set_bit(ADRENO_DISPATCHER_ACTIVE,
+				&dispatcher->priv))
+				reinit_completion(&dispatcher->idle_gate);
+		} else {
+			adreno_active_count_put(adreno_dev);
+			clear_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+		}
+	}
+
+
+	if (ret) {
+		dispatcher->inflight--;
+		dispatch_q->inflight--;
+
+		process_rt_bus_hint(device, false);
+
+		mutex_unlock(&device->mutex);
+
+		/*
+		 * Don't log a message in case of:
+		 * -ENOENT means that the context was detached before the
+		 * command was submitted
+		 * -ENOSPC means that there temporarily isn't any room in the
+		 *  ringbuffer
+		 *  -PROTO means that a fault is currently being worked
+		 */
+
+		if (ret != -ENOENT && ret != -ENOSPC && ret != -EPROTO)
+			dev_err(device->dev,
+				     "Unable to submit command to the ringbuffer %d\n",
+				     ret);
+		return ret;
+	}
+
+	secs = time.ktime;
+	nsecs = do_div(secs, 1000000000);
+
+	/*
+	 * For the first submission in any given command queue update the
+	 * expected expire time - this won't actually be used / updated until
+	 * the command queue in question goes current, but universally setting
+	 * it here avoids the possibilty of some race conditions with preempt
+	 */
+
+	if (dispatch_q->inflight == 1)
+		dispatch_q->expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+
+	info.inflight = (int) dispatcher->inflight;
+	info.rb_id = drawctxt->rb->id;
+	info.rptr = adreno_get_rptr(drawctxt->rb);
+	info.wptr = drawctxt->rb->wptr;
+	info.gmu_dispatch_queue = -1;
+
+	msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_SUBMIT,
+			       pid_nr(context->proc_priv->pid),
+			       context->id, drawobj->timestamp,
+			       !!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME));
+
+	trace_adreno_cmdbatch_submitted(drawobj, &info,
+			time.ticks, (unsigned long) secs, nsecs / 1000,
+			dispatch_q->inflight);
+
+	log_kgsl_cmdbatch_submitted_event(context->id, drawobj->timestamp,
+		context->priority, drawobj->flags);
+
+	mutex_unlock(&device->mutex);
+
+	cmdobj->submit_ticks = time.ticks;
+
+	dispatch_q->cmd_q[dispatch_q->tail] = cmdobj;
+	dispatch_q->tail = (dispatch_q->tail + 1) %
+		ADRENO_DISPATCH_DRAWQUEUE_SIZE;
+
+	/*
+	 * If we believe ourselves to be current and preemption isn't a thing,
+	 * then set up the timer.  If this misses, then preemption is indeed a
+	 * thing and the timer will be set up in due time
+	 */
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+		if (drawqueue_is_current(dispatch_q))
+			mod_timer(&dispatcher->timer, dispatch_q->expires);
+	}
+
+
+	/*
+	 * we just submitted something, readjust ringbuffer
+	 * execution level
+	 */
+	if (gpudev->preemption_schedule)
+		gpudev->preemption_schedule(adreno_dev);
+	return 0;
+}
+
+/**
+ * dispatcher_context_sendcmds() - Send commands from a context to the GPU
+ * @adreno_dev: Pointer to the adreno device struct
+ * @drawctxt: Pointer to the adreno context to dispatch commands from
+ *
+ * Dequeue and send a burst of commands from the specified context to the GPU
+ * Returns postive if the context needs to be put back on the pending queue
+ * 0 if the context is empty or detached and negative on error
+ */
+static int dispatcher_context_sendcmds(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	struct adreno_dispatcher_drawqueue *dispatch_q =
+					&(drawctxt->rb->dispatch_q);
+	int count = 0;
+	int ret = 0;
+	int inflight = _drawqueue_inflight(dispatch_q);
+	unsigned int timestamp;
+
+	if (dispatch_q->inflight >= inflight) {
+		spin_lock(&drawctxt->lock);
+		_process_drawqueue_get_next_drawobj(drawctxt);
+		spin_unlock(&drawctxt->lock);
+		return -EBUSY;
+	}
+
+	/*
+	 * Each context can send a specific number of drawobjs per cycle
+	 */
+	while ((count < _context_drawobj_burst) &&
+		(dispatch_q->inflight < inflight)) {
+		struct kgsl_drawobj *drawobj;
+		struct kgsl_drawobj_cmd *cmdobj;
+		struct kgsl_context *context;
+
+		if (adreno_gpu_fault(adreno_dev) != 0)
+			break;
+
+		spin_lock(&drawctxt->lock);
+		drawobj = _process_drawqueue_get_next_drawobj(drawctxt);
+
+		/*
+		 * adreno_context_get_drawobj returns -EAGAIN if the current
+		 * drawobj has pending sync points so no more to do here.
+		 * When the sync points are satisfied then the context will get
+		 * reqeueued
+		 */
+
+		if (IS_ERR_OR_NULL(drawobj)) {
+			if (IS_ERR(drawobj))
+				ret = PTR_ERR(drawobj);
+			spin_unlock(&drawctxt->lock);
+			break;
+		}
+		_pop_drawobj(drawctxt);
+		spin_unlock(&drawctxt->lock);
+
+		timestamp = drawobj->timestamp;
+		cmdobj = CMDOBJ(drawobj);
+		context = drawobj->context;
+		trace_adreno_cmdbatch_ready(context->id, context->priority,
+			drawobj->timestamp, cmdobj->requeue_cnt);
+		ret = sendcmd(adreno_dev, cmdobj);
+
+		/*
+		 * On error from sendcmd() try to requeue the cmdobj
+		 * unless we got back -ENOENT which means that the context has
+		 * been detached and there will be no more deliveries from here
+		 */
+		if (ret != 0) {
+			/* Destroy the cmdobj on -ENOENT */
+			if (ret == -ENOENT)
+				kgsl_drawobj_destroy(drawobj);
+			else {
+				/*
+				 * If the requeue returns an error, return that
+				 * instead of whatever sendcmd() sent us
+				 */
+				int r = adreno_dispatcher_requeue_cmdobj(
+					drawctxt, cmdobj);
+				if (r)
+					ret = r;
+			}
+
+			break;
+		}
+
+		drawctxt->submitted_timestamp = timestamp;
+
+		count++;
+	}
+
+	/*
+	 * Wake up any snoozing threads if we have consumed any real commands
+	 * or marker commands and we have room in the context queue.
+	 */
+
+	if (_check_context_queue(drawctxt, 0))
+		wake_up_all(&drawctxt->wq);
+
+	if (!ret)
+		ret = count;
+
+	/* Return error or the number of commands queued */
+	return ret;
+}
+
+static bool adreno_gpu_stopped(struct adreno_device *adreno_dev)
+{
+	return (adreno_gpu_fault(adreno_dev) || adreno_gpu_halt(adreno_dev));
+}
+
+static void dispatcher_handle_jobs_list(struct adreno_device *adreno_dev,
+		int id, unsigned long *map, struct llist_node *list)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct adreno_dispatch_job *job, *next;
+
+	if (!list)
+		return;
+
+	/* Reverse the order so the oldest context is considered first */
+	list = llist_reverse_order(list);
+
+	llist_for_each_entry_safe(job, next, list, node) {
+		int ret;
+
+		if (kgsl_context_is_bad(&job->drawctxt->base)) {
+			kgsl_context_put(&job->drawctxt->base);
+			kmem_cache_free(jobs_cache, job);
+			continue;
+		}
+
+		/*
+		 * Due to the nature of the lockless queue the same context
+		 * might have multiple jobs on the list. We allow this so we
+		 * don't have to query the list on the producer side but on the
+		 * consumer side we only want each context to be considered
+		 * once. Use a bitmap to remember which contexts we've already
+		 * seen and quietly discard duplicate jobs
+		 */
+		if (test_and_set_bit(job->drawctxt->base.id, map)) {
+			kgsl_context_put(&job->drawctxt->base);
+			kmem_cache_free(jobs_cache, job);
+			continue;
+		}
+
+		/*
+		 * If gpu is in fault or dispatcher is halted, add back the jobs
+		 * so that they are processed after recovery or when dispatcher
+		 * is resumed.
+		 */
+		if (adreno_gpu_stopped(adreno_dev)) {
+			llist_add(&job->node, &dispatcher->jobs[id]);
+			continue;
+		}
+
+		ret = dispatcher_context_sendcmds(adreno_dev, job->drawctxt);
+
+		/*
+		 * If the context had nothing queued or the context has been
+		 * destroyed then drop the job
+		 */
+		if (!ret || ret == -ENOENT) {
+			kgsl_context_put(&job->drawctxt->base);
+			kmem_cache_free(jobs_cache, job);
+			continue;
+		}
+
+		/*
+		 * If the ringbuffer is full then requeue the job to be
+		 * considered first next time. Otherwise the context
+		 * either successfully submmitted to the GPU or another error
+		 * happened and it should go back on the regular queue
+		 */
+		if (ret == -EBUSY)
+			llist_add(&job->node, &dispatcher->requeue[id]);
+		else
+			llist_add(&job->node, &dispatcher->jobs[id]);
+	}
+}
+
+static void dispatcher_handle_jobs(struct adreno_device *adreno_dev, int id)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	unsigned long map[BITS_TO_LONGS(KGSL_MEMSTORE_MAX)];
+	struct llist_node *requeue, *jobs;
+
+	memset(map, 0, sizeof(map));
+
+	requeue = llist_del_all(&dispatcher->requeue[id]);
+	jobs = llist_del_all(&dispatcher->jobs[id]);
+
+	dispatcher_handle_jobs_list(adreno_dev, id, map, requeue);
+	dispatcher_handle_jobs_list(adreno_dev, id, map, jobs);
+}
+
+/**
+ * _adreno_dispatcher_issuecmds() - Issue commmands from pending contexts
+ * @adreno_dev: Pointer to the adreno device struct
+ *
+ * Issue as many commands as possible (up to inflight) from the pending contexts
+ * This function assumes the dispatcher mutex has been locked.
+ */
+static void _adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int i;
+
+	/* Leave early if the dispatcher isn't in a happy state */
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(dispatcher->jobs); i++)
+		dispatcher_handle_jobs(adreno_dev, i);
+}
+
+/* Update the dispatcher timers */
+static void _dispatcher_update_timers(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	/* Kick the idle timer */
+	mutex_lock(&device->mutex);
+	kgsl_pwrscale_update(device);
+	process_rt_bus_hint(device, false);
+	kgsl_start_idle_timer(device);
+	mutex_unlock(&device->mutex);
+
+	/* Check to see if we need to update the command timer */
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+		struct adreno_dispatcher_drawqueue *drawqueue =
+			DRAWQUEUE(adreno_dev->cur_rb);
+
+		if (!adreno_drawqueue_is_empty(drawqueue))
+			mod_timer(&dispatcher->timer, drawqueue->expires);
+	}
+}
+
+static inline void _decrement_submit_now(struct kgsl_device *device)
+{
+	spin_lock(&device->submit_lock);
+	device->submit_now--;
+	spin_unlock(&device->submit_lock);
+}
+
+/**
+ * adreno_dispatcher_issuecmds() - Issue commmands from pending contexts
+ * @adreno_dev: Pointer to the adreno device struct
+ *
+ * Lock the dispatcher and call _adreno_dispatcher_issueibcmds
+ */
+static void adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	spin_lock(&device->submit_lock);
+	/* If state is not ACTIVE, schedule the work for later */
+	if (device->skip_inline_submit) {
+		spin_unlock(&device->submit_lock);
+		goto done;
+	}
+	device->submit_now++;
+	spin_unlock(&device->submit_lock);
+
+	/* If the dispatcher is busy then schedule the work for later */
+	if (!mutex_trylock(&dispatcher->mutex)) {
+		_decrement_submit_now(device);
+		goto done;
+	}
+
+	_adreno_dispatcher_issuecmds(adreno_dev);
+
+	if (dispatcher->inflight)
+		_dispatcher_update_timers(adreno_dev);
+
+	mutex_unlock(&dispatcher->mutex);
+	_decrement_submit_now(device);
+	return;
+done:
+	adreno_dispatcher_schedule(device);
+}
+
+/**
+ * get_timestamp() - Return the next timestamp for the context
+ * @drawctxt - Pointer to an adreno draw context struct
+ * @drawobj - Pointer to a drawobj
+ * @timestamp - Pointer to a timestamp value possibly passed from the user
+ * @user_ts - user generated timestamp
+ *
+ * Assign a timestamp based on the settings of the draw context and the command
+ * batch.
+ */
+static int get_timestamp(struct adreno_context *drawctxt,
+		struct kgsl_drawobj *drawobj, unsigned int *timestamp,
+		unsigned int user_ts)
+{
+
+	if (drawctxt->base.flags & KGSL_CONTEXT_USER_GENERATED_TS) {
+		/*
+		 * User specified timestamps need to be greater than the last
+		 * issued timestamp in the context
+		 */
+		if (timestamp_cmp(drawctxt->timestamp, user_ts) >= 0)
+			return -ERANGE;
+
+		drawctxt->timestamp = user_ts;
+	} else
+		drawctxt->timestamp++;
+
+	*timestamp = drawctxt->timestamp;
+	drawobj->timestamp = *timestamp;
+	return 0;
+}
+
+static void _set_ft_policy(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt,
+		struct kgsl_drawobj_cmd *cmdobj)
+{
+	/*
+	 * Set the fault tolerance policy for the command batch - assuming the
+	 * context hasn't disabled FT use the current device policy
+	 */
+	if (drawctxt->base.flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
+		set_bit(KGSL_FT_DISABLE, &cmdobj->fault_policy);
+	/*
+	 *  Set the fault tolerance policy to FT_REPLAY - As context wants
+	 *  to invalidate it after a replay attempt fails. This doesn't
+	 *  require to execute the default FT policy.
+	 */
+	else if (drawctxt->base.flags & KGSL_CONTEXT_INVALIDATE_ON_FAULT)
+		set_bit(KGSL_FT_REPLAY, &cmdobj->fault_policy);
+	else
+		cmdobj->fault_policy = adreno_dev->ft_policy;
+}
+
+static void _cmdobj_set_flags(struct adreno_context *drawctxt,
+			struct kgsl_drawobj_cmd *cmdobj)
+{
+	/*
+	 * Force the preamble for this submission only - this is usually
+	 * requested by the dispatcher as part of fault recovery
+	 */
+	if (test_and_clear_bit(ADRENO_CONTEXT_FORCE_PREAMBLE,
+				&drawctxt->base.priv))
+		set_bit(CMDOBJ_FORCE_PREAMBLE, &cmdobj->priv);
+
+	/*
+	 * Force the premable if set from userspace in the context or
+	 * command obj flags
+	 */
+	if ((drawctxt->base.flags & KGSL_CONTEXT_CTX_SWITCH) ||
+		(cmdobj->base.flags & KGSL_DRAWOBJ_CTX_SWITCH))
+		set_bit(CMDOBJ_FORCE_PREAMBLE, &cmdobj->priv);
+
+	/* Skip this ib if IFH_NOP is enabled */
+	if (drawctxt->base.flags & KGSL_CONTEXT_IFH_NOP)
+		set_bit(CMDOBJ_SKIP, &cmdobj->priv);
+
+	/*
+	 * If we are waiting for the end of frame and it hasn't appeared yet,
+	 * then mark the command obj as skipped.  It will still progress
+	 * through the pipeline but it won't actually send any commands
+	 */
+
+	if (test_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->base.priv)) {
+		set_bit(CMDOBJ_SKIP, &cmdobj->priv);
+
+		/*
+		 * If this command obj represents the EOF then clear the way
+		 * for the dispatcher to continue submitting
+		 */
+
+		if (cmdobj->base.flags & KGSL_DRAWOBJ_END_OF_FRAME) {
+			clear_bit(ADRENO_CONTEXT_SKIP_EOF,
+				  &drawctxt->base.priv);
+
+			/*
+			 * Force the preamble on the next command to ensure that
+			 * the state is correct
+			 */
+			set_bit(ADRENO_CONTEXT_FORCE_PREAMBLE,
+				&drawctxt->base.priv);
+		}
+	}
+}
+
+static inline int _wait_for_room_in_context_queue(
+	struct adreno_context *drawctxt, u32 count) __must_hold(&drawctxt->lock)
+{
+	int ret = 0;
+
+	/*
+	 * There is always a possibility that dispatcher may end up pushing
+	 * the last popped draw object back to the context drawqueue. Hence,
+	 * we can only queue up to _context_drawqueue_size - 1 here to make
+	 * sure we never let drawqueue->queued exceed _context_drawqueue_size.
+	 */
+	if ((drawctxt->queued + count) > (_context_drawqueue_size - 1)) {
+		trace_adreno_drawctxt_sleep(drawctxt);
+		spin_unlock(&drawctxt->lock);
+
+		ret = wait_event_interruptible_timeout(drawctxt->wq,
+			_check_context_queue(drawctxt, count),
+			msecs_to_jiffies(_context_queue_wait));
+
+		spin_lock(&drawctxt->lock);
+		trace_adreno_drawctxt_wake(drawctxt);
+
+		/*
+		 * Account for the possibility that the context got invalidated
+		 * while we were sleeping
+		 */
+		if (ret > 0)
+			ret = kgsl_check_context_state(&drawctxt->base);
+		else if (ret == 0)
+			ret = -ETIMEDOUT;
+	}
+
+	return ret;
+}
+
+static unsigned int _check_context_state_to_queue_cmds(
+	struct adreno_context *drawctxt, u32 count)
+{
+	int ret = kgsl_check_context_state(&drawctxt->base);
+
+	if (ret)
+		return ret;
+
+	return _wait_for_room_in_context_queue(drawctxt, count);
+}
+
+static void _queue_drawobj(struct adreno_context *drawctxt,
+	struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_context *context = drawobj->context;
+
+	/* Put the command into the queue */
+	drawctxt->drawqueue[drawctxt->drawqueue_tail] = drawobj;
+	drawctxt->drawqueue_tail = (drawctxt->drawqueue_tail + 1) %
+			ADRENO_CONTEXT_DRAWQUEUE_SIZE;
+	drawctxt->queued++;
+	msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_QUEUE,
+				pid_nr(context->proc_priv->pid),
+				context->id, drawobj->timestamp,
+				!!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME));
+	trace_adreno_cmdbatch_queued(drawobj, drawctxt->queued);
+}
+
+static int drawctxt_queue_bindobj(struct adreno_context *drawctxt,
+	struct kgsl_drawobj *drawobj, u32 *timestamp, u32 user_ts)
+{
+	int ret;
+
+	ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts);
+	if (ret)
+		return ret;
+
+	drawctxt->queued_timestamp = *timestamp;
+	_queue_drawobj(drawctxt, drawobj);
+
+	return 0;
+}
+
+static void drawctxt_queue_timelineobj(struct adreno_context *drawctxt,
+	struct kgsl_drawobj *drawobj)
+{
+	/*
+	 * This drawobj is not submitted to the GPU so use a timestamp of 0.
+	 * Update the timestamp through a subsequent marker to keep userspace
+	 * happy.
+	 */
+	drawobj->timestamp = 0;
+
+	_queue_drawobj(drawctxt, drawobj);
+}
+
+static int drawctxt_queue_markerobj(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, struct kgsl_drawobj *drawobj,
+	uint32_t *timestamp, unsigned int user_ts)
+{
+	struct kgsl_drawobj_cmd *markerobj = CMDOBJ(drawobj);
+	int ret;
+
+	ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts);
+	if (ret)
+		return ret;
+
+	/*
+	 * See if we can fastpath this thing - if nothing is queued
+	 * and nothing is inflight retire without bothering the GPU
+	 */
+	if (!drawctxt->queued && kgsl_check_timestamp(drawobj->device,
+			drawobj->context, drawctxt->queued_timestamp)) {
+		_retire_timestamp(drawobj);
+		return 1;
+	}
+
+	/*
+	 * Remember the last queued timestamp - the marker will block
+	 * until that timestamp is expired (unless another command
+	 * comes along and forces the marker to execute)
+	 */
+
+	markerobj->marker_timestamp = drawctxt->queued_timestamp;
+	drawctxt->queued_timestamp = *timestamp;
+	_set_ft_policy(adreno_dev, drawctxt, markerobj);
+	_cmdobj_set_flags(drawctxt, markerobj);
+
+	_queue_drawobj(drawctxt, drawobj);
+
+	return 0;
+}
+
+static int drawctxt_queue_cmdobj(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, struct kgsl_drawobj *drawobj,
+	uint32_t *timestamp, unsigned int user_ts)
+{
+	struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj);
+	unsigned int j;
+	int ret;
+
+	ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts);
+	if (ret)
+		return ret;
+
+	/*
+	 * If this is a real command then we need to force any markers
+	 * queued before it to dispatch to keep time linear - set the
+	 * skip bit so the commands get NOPed.
+	 */
+	j = drawctxt->drawqueue_head;
+
+	while (j != drawctxt->drawqueue_tail) {
+		if (drawctxt->drawqueue[j]->type == MARKEROBJ_TYPE) {
+			struct kgsl_drawobj_cmd *markerobj =
+				CMDOBJ(drawctxt->drawqueue[j]);
+				set_bit(CMDOBJ_SKIP, &markerobj->priv);
+		}
+
+		j = DRAWQUEUE_NEXT(j, ADRENO_CONTEXT_DRAWQUEUE_SIZE);
+	}
+
+	drawctxt->queued_timestamp = *timestamp;
+	_set_ft_policy(adreno_dev, drawctxt, cmdobj);
+	_cmdobj_set_flags(drawctxt, cmdobj);
+
+	_queue_drawobj(drawctxt, drawobj);
+
+	return 0;
+}
+
+static void drawctxt_queue_syncobj(struct adreno_context *drawctxt,
+	struct kgsl_drawobj *drawobj, uint32_t *timestamp)
+{
+	*timestamp = 0;
+	drawobj->timestamp = 0;
+
+	_queue_drawobj(drawctxt, drawobj);
+}
+
+/*
+ * Queue a command in the context - if there isn't any room in the queue, then
+ * block until there is
+ */
+static int adreno_dispatcher_queue_cmds(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, struct kgsl_drawobj *drawobj[],
+		uint32_t count, uint32_t *timestamp)
+
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct adreno_dispatcher_drawqueue *dispatch_q;
+	struct adreno_dispatch_job *job;
+	int ret;
+	unsigned int i, user_ts;
+
+	/*
+	 * There is always a possibility that dispatcher may end up pushing
+	 * the last popped draw object back to the context drawqueue. Hence,
+	 * we can only queue up to _context_drawqueue_size - 1 here to make
+	 * sure we never let drawqueue->queued exceed _context_drawqueue_size.
+	 */
+	if (!count || count > _context_drawqueue_size - 1)
+		return -EINVAL;
+
+	ret = kgsl_check_context_state(&drawctxt->base);
+	if (ret)
+		return ret;
+
+	ret = adreno_verify_cmdobj(dev_priv, context, drawobj, count);
+	if (ret)
+		return ret;
+
+	/* wait for the suspend gate */
+	wait_for_completion(&device->halt_gate);
+
+	job = kmem_cache_alloc(jobs_cache, GFP_KERNEL);
+	if (!job)
+		return -ENOMEM;
+
+	job->drawctxt = drawctxt;
+
+	spin_lock(&drawctxt->lock);
+
+	ret = _check_context_state_to_queue_cmds(drawctxt, count);
+	if (ret) {
+		spin_unlock(&drawctxt->lock);
+		kmem_cache_free(jobs_cache, job);
+		return ret;
+	}
+
+	user_ts = *timestamp;
+
+	/*
+	 * If there is only one drawobj in the array and it is of
+	 * type SYNCOBJ_TYPE, skip comparing user_ts as it can be 0
+	 */
+	if (!(count == 1 && drawobj[0]->type == SYNCOBJ_TYPE) &&
+		(drawctxt->base.flags & KGSL_CONTEXT_USER_GENERATED_TS)) {
+		/*
+		 * User specified timestamps need to be greater than the last
+		 * issued timestamp in the context
+		 */
+		if (timestamp_cmp(drawctxt->timestamp, user_ts) >= 0) {
+			spin_unlock(&drawctxt->lock);
+			kmem_cache_free(jobs_cache, job);
+			return -ERANGE;
+		}
+	}
+
+	for (i = 0; i < count; i++) {
+
+		switch (drawobj[i]->type) {
+		case MARKEROBJ_TYPE:
+			ret = drawctxt_queue_markerobj(adreno_dev, drawctxt,
+				drawobj[i], timestamp, user_ts);
+			if (ret) {
+				spin_unlock(&drawctxt->lock);
+				kmem_cache_free(jobs_cache, job);
+			}
+
+			if (ret == 1)
+				goto done;
+			else if (ret)
+				return ret;
+			break;
+		case CMDOBJ_TYPE:
+			ret = drawctxt_queue_cmdobj(adreno_dev, drawctxt,
+				drawobj[i], timestamp, user_ts);
+			if (ret) {
+				spin_unlock(&drawctxt->lock);
+				kmem_cache_free(jobs_cache, job);
+				return ret;
+			}
+			break;
+		case SYNCOBJ_TYPE:
+			drawctxt_queue_syncobj(drawctxt, drawobj[i], timestamp);
+			break;
+		case BINDOBJ_TYPE:
+			ret = drawctxt_queue_bindobj(drawctxt, drawobj[i],
+				timestamp, user_ts);
+			if (ret) {
+				spin_unlock(&drawctxt->lock);
+				kmem_cache_free(jobs_cache, job);
+				return ret;
+			}
+			break;
+		case TIMELINEOBJ_TYPE:
+			drawctxt_queue_timelineobj(drawctxt, drawobj[i]);
+			break;
+		default:
+			spin_unlock(&drawctxt->lock);
+			kmem_cache_free(jobs_cache, job);
+			return -EINVAL;
+		}
+
+	}
+
+	dispatch_q = &(ADRENO_CONTEXT(drawobj[0]->context)->rb->dispatch_q);
+
+	adreno_track_context(adreno_dev, dispatch_q, drawctxt);
+
+	spin_unlock(&drawctxt->lock);
+
+	/* Add the context to the dispatcher pending list */
+	if (_kgsl_context_get(&drawctxt->base)) {
+		trace_dispatch_queue_context(drawctxt);
+		llist_add(&job->node,
+			&adreno_dev->dispatcher.jobs[drawctxt->base.priority]);
+	} else {
+		kmem_cache_free(jobs_cache, job);
+		goto done;
+	}
+
+	/*
+	 * Only issue commands if inflight is less than burst -this prevents us
+	 * from sitting around waiting for the mutex on a busy system - the work
+	 * loop will schedule it for us. Inflight is mutex protected but the
+	 * worse that can happen is that it will go to 0 after we check and if
+	 * it goes to 0 it is because the work loop decremented it and the work
+	 * queue will try to schedule new commands anyway.
+	 */
+
+	if (dispatch_q->inflight < _context_drawobj_burst)
+		adreno_dispatcher_issuecmds(adreno_dev);
+done:
+	if (test_and_clear_bit(ADRENO_CONTEXT_FAULT, &context->priv))
+		return -EPROTO;
+
+	return 0;
+}
+
+/*
+ * If an IB inside of the drawobj has a gpuaddr that matches the base
+ * passed in then zero the size which effectively skips it when it is submitted
+ * in the ringbuffer.
+ */
+static void _skip_ib(struct kgsl_drawobj_cmd *cmdobj, uint64_t base)
+{
+	struct kgsl_memobj_node *ib;
+
+	list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+		if (ib->gpuaddr == base) {
+			ib->priv |= MEMOBJ_SKIP;
+			if (base)
+				return;
+		}
+	}
+}
+
+static void _skip_cmd(struct kgsl_drawobj_cmd *cmdobj,
+	struct kgsl_drawobj_cmd **replay, int count)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	int i;
+
+	/*
+	 * SKIPCMD policy: next IB issued for this context is tentative
+	 * if it fails we assume that GFT failed and if it succeeds
+	 * we mark GFT as a success.
+	 *
+	 * Find next commandbatch for the faulting context
+	 * If commandbatch is found
+	 * a) store the current commandbatch fault_policy in context's next
+	 *    commandbatch fault_policy
+	 * b) force preamble for next commandbatch
+	 */
+	for (i = 1; i < count; i++) {
+		if (DRAWOBJ(replay[i])->context->id == drawobj->context->id) {
+			replay[i]->fault_policy = replay[0]->fault_policy;
+			set_bit(CMDOBJ_FORCE_PREAMBLE, &replay[i]->priv);
+			set_bit(KGSL_FT_SKIPCMD, &replay[i]->fault_recovery);
+			break;
+		}
+	}
+
+	/*
+	 * If we did not find the next cmd then
+	 * a) set a flag for next command issued in this context
+	 * b) store the fault_policy, this fault_policy becomes the policy of
+	 *    next command issued in this context
+	 */
+	if ((i == count) && drawctxt) {
+		set_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv);
+		drawctxt->fault_policy = replay[0]->fault_policy;
+	}
+
+	/* set the flags to skip this cmdobj */
+	set_bit(CMDOBJ_SKIP, &cmdobj->priv);
+	cmdobj->fault_recovery = 0;
+}
+
+static void _skip_frame(struct kgsl_drawobj_cmd *cmdobj,
+	struct kgsl_drawobj_cmd **replay, int count)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	int skip = 1;
+	int i;
+
+	for (i = 0; i < count; i++) {
+
+		struct kgsl_drawobj *replay_obj = DRAWOBJ(replay[i]);
+
+		/*
+		 * Only operate on drawobj's that belong to the
+		 * faulting context
+		 */
+
+		if (replay_obj->context->id != drawobj->context->id)
+			continue;
+
+		/*
+		 * Skip all the drawobjs in this context until
+		 * the EOF flag is seen.  If the EOF flag is seen then
+		 * force the preamble for the next command.
+		 */
+
+		if (skip) {
+			set_bit(CMDOBJ_SKIP, &replay[i]->priv);
+
+			if (replay_obj->flags & KGSL_DRAWOBJ_END_OF_FRAME)
+				skip = 0;
+		} else {
+			set_bit(CMDOBJ_FORCE_PREAMBLE, &replay[i]->priv);
+			return;
+		}
+	}
+
+	/*
+	 * If the EOF flag hasn't been seen yet then set the flag in the
+	 * drawctxt to keep looking for it
+	 */
+
+	if (skip && drawctxt)
+		set_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->base.priv);
+
+	/*
+	 * If we did see the EOF flag then force the preamble on for the
+	 * next command issued on this context
+	 */
+
+	if (!skip && drawctxt)
+		set_bit(ADRENO_CONTEXT_FORCE_PREAMBLE, &drawctxt->base.priv);
+}
+
+static void remove_invalidated_cmdobjs(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd **replay, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		struct kgsl_drawobj_cmd *cmdobj = replay[i];
+		struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+		if (cmdobj == NULL)
+			continue;
+
+		if (kgsl_context_is_bad(drawobj->context)) {
+			replay[i] = NULL;
+
+			mutex_lock(&device->mutex);
+			kgsl_cancel_events_timestamp(device,
+				&drawobj->context->events, drawobj->timestamp);
+			mutex_unlock(&device->mutex);
+
+			kgsl_drawobj_destroy(drawobj);
+		}
+	}
+}
+
+#define pr_fault(_d, _c, fmt, args...) \
+		pr_context(_d, (_c)->context, fmt, ##args)
+
+static void adreno_fault_header(struct kgsl_device *device,
+		struct adreno_ringbuffer *rb, struct kgsl_drawobj_cmd *cmdobj,
+		int fault)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt =
+			drawobj ? ADRENO_CONTEXT(drawobj->context) : NULL;
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int status, rptr, wptr, ib1sz, ib2sz;
+	uint64_t ib1base, ib2base;
+	bool gx_on = adreno_gx_is_on(adreno_dev);
+	int id = (rb != NULL) ? rb->id : -1;
+	const char *type = fault & ADRENO_GMU_FAULT ? "gmu" : "gpu";
+
+	if (!gx_on) {
+		if (drawobj != NULL) {
+			pr_fault(device, drawobj,
+				"%s fault ctx %u ctx_type %s ts %u and GX is OFF\n",
+				type, drawobj->context->id,
+				kgsl_context_type(drawctxt->type),
+				drawobj->timestamp);
+			pr_fault(device, drawobj, "cmdline: %s\n",
+					drawctxt->base.proc_priv->cmdline);
+		} else
+			dev_err(device->dev, "RB[%d] : %s fault and GX is OFF\n",
+				id, type);
+
+		return;
+	}
+
+	if (gpudev->fault_header)
+		return gpudev->fault_header(adreno_dev, drawobj);
+
+	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, &status);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr);
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
+					  ADRENO_REG_CP_IB1_BASE_HI, &ib1base);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &ib1sz);
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB2_BASE,
+					   ADRENO_REG_CP_IB2_BASE_HI, &ib2base);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ib2sz);
+
+	if (drawobj != NULL) {
+		drawctxt->base.total_fault_count++;
+		drawctxt->base.last_faulted_cmd_ts = drawobj->timestamp;
+
+		trace_adreno_gpu_fault(drawobj->context->id,
+			drawobj->timestamp,
+			status, rptr, wptr, ib1base, ib1sz,
+			ib2base, ib2sz, drawctxt->rb->id);
+
+		pr_fault(device, drawobj,
+			"%s fault ctx %u ctx_type %s ts %u status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
+			type, drawobj->context->id,
+			kgsl_context_type(drawctxt->type),
+			drawobj->timestamp, status,
+			rptr, wptr, ib1base, ib1sz, ib2base, ib2sz);
+
+		pr_fault(device, drawobj, "cmdline: %s\n",
+				drawctxt->base.proc_priv->cmdline);
+
+		if (rb != NULL)
+			pr_fault(device, drawobj,
+				"%s fault rb %d rb sw r/w %4.4x/%4.4x\n",
+				type, rb->id, rptr, rb->wptr);
+	} else {
+		dev_err(device->dev,
+			"RB[%d] : %s fault status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
+			id, type, status, rptr, wptr, ib1base, ib1sz, ib2base,
+			ib2sz);
+		if (rb != NULL)
+			dev_err(device->dev,
+				"RB[%d] : %s fault rb sw r/w %4.4x/%4.4x\n",
+				rb->id, type, rptr, rb->wptr);
+	}
+}
+
+void adreno_fault_skipcmd_detached(struct adreno_device *adreno_dev,
+				 struct adreno_context *drawctxt,
+				 struct kgsl_drawobj *drawobj)
+{
+	if (test_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv) &&
+			kgsl_context_detached(&drawctxt->base)) {
+		pr_context(KGSL_DEVICE(adreno_dev), drawobj->context,
+			"gpu detached context %d\n", drawobj->context->id);
+		clear_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv);
+	}
+}
+
+/**
+ * process_cmdobj_fault() - Process a cmdobj for fault policies
+ * @device: Device on which the cmdobj caused a fault
+ * @replay: List of cmdobj's that are to be replayed on the device. The
+ * first command in the replay list is the faulting command and the remaining
+ * cmdobj's in the list are commands that were submitted to the same queue
+ * as the faulting one.
+ * @count: Number of cmdobj's in replay
+ * @base: The IB1 base at the time of fault
+ * @fault: The fault type
+ */
+static void process_cmdobj_fault(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd **replay, int count,
+		unsigned int base,
+		int fault)
+{
+	struct kgsl_drawobj_cmd *cmdobj = replay[0];
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	int i;
+	char *state = "failed";
+
+	/*
+	 * If GFT recovered more than X times in Y ms invalidate the context
+	 * and do not attempt recovery.
+	 * Example: X==3 and Y==3000 ms, GPU hung at 500ms, 1700ms, 25000ms and
+	 * 3000ms for the same context, we will not try FT and invalidate the
+	 * context @3000ms because context triggered GFT more than 3 times in
+	 * last 3 seconds. If a context caused recoverable GPU hangs
+	 * where 1st and 4th gpu hang are more than 3 seconds apart we
+	 * won't disable GFT and invalidate the context.
+	 */
+	if (test_bit(KGSL_FT_THROTTLE, &cmdobj->fault_policy)) {
+		if (ktime_ms_delta(ktime_get(), drawobj->context->fault_time) >
+				_fault_throttle_time) {
+			drawobj->context->fault_time = ktime_get();
+			drawobj->context->fault_count = 1;
+		} else {
+			drawobj->context->fault_count++;
+			if (drawobj->context->fault_count >
+					_fault_throttle_burst) {
+				set_bit(KGSL_FT_DISABLE,
+						&cmdobj->fault_policy);
+				pr_context(device, drawobj->context,
+					 "gpu fault threshold exceeded %d faults in %d msecs\n",
+					 _fault_throttle_burst,
+					 _fault_throttle_time);
+			}
+		}
+	}
+
+	/*
+	 * If FT is disabled for this cmdobj invalidate immediately
+	 */
+
+	if (test_bit(KGSL_FT_DISABLE, &cmdobj->fault_policy) ||
+		test_bit(KGSL_FT_TEMP_DISABLE, &cmdobj->fault_policy)) {
+		state = "skipped";
+		bitmap_zero(&cmdobj->fault_policy, BITS_PER_LONG);
+	}
+
+	/* If the context is detached do not run FT on context */
+	if (kgsl_context_detached(drawobj->context)) {
+		state = "detached";
+		bitmap_zero(&cmdobj->fault_policy, BITS_PER_LONG);
+	}
+
+	/*
+	 * Set a flag so we don't print another PM dump if the cmdobj fails
+	 * again on replay
+	 */
+
+	set_bit(KGSL_FT_SKIP_PMDUMP, &cmdobj->fault_policy);
+
+	/*
+	 * A hardware fault generally means something was deterministically
+	 * wrong with the cmdobj - no point in trying to replay it
+	 * Clear the replay bit and move on to the next policy level
+	 */
+
+	if (fault & ADRENO_HARD_FAULT)
+		clear_bit(KGSL_FT_REPLAY, &(cmdobj->fault_policy));
+
+	/*
+	 * A timeout fault means the IB timed out - clear the policy and
+	 * invalidate - this will clear the FT_SKIP_PMDUMP bit but that is okay
+	 * because we won't see this cmdobj again
+	 */
+
+	if ((fault & ADRENO_TIMEOUT_FAULT) ||
+				(fault & ADRENO_CTX_DETATCH_TIMEOUT_FAULT))
+		bitmap_zero(&cmdobj->fault_policy, BITS_PER_LONG);
+
+	/*
+	 * If the context had a GPU page fault then it is likely it would fault
+	 * again if replayed
+	 */
+
+	if (test_bit(KGSL_CONTEXT_PRIV_PAGEFAULT,
+		     &drawobj->context->priv)) {
+		/* we'll need to resume the mmu later... */
+		clear_bit(KGSL_FT_REPLAY, &cmdobj->fault_policy);
+		clear_bit(KGSL_CONTEXT_PRIV_PAGEFAULT,
+			  &drawobj->context->priv);
+	}
+
+	/*
+	 * Execute the fault tolerance policy. Each cmdobj stores the
+	 * current fault policy that was set when it was queued.
+	 * As the options are tried in descending priority
+	 * (REPLAY -> SKIPIBS -> SKIPFRAME -> NOTHING) the bits are cleared
+	 * from the cmdobj policy so the next thing can be tried if the
+	 * change comes around again
+	 */
+
+	/* Replay the hanging cmdobj again */
+	if (test_and_clear_bit(KGSL_FT_REPLAY, &cmdobj->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdobj, BIT(KGSL_FT_REPLAY));
+		set_bit(KGSL_FT_REPLAY, &cmdobj->fault_recovery);
+		return;
+	}
+
+	/*
+	 * Skip the last IB1 that was played but replay everything else.
+	 * Note that the last IB1 might not be in the "hung" cmdobj
+	 * because the CP may have caused a page-fault while it was prefetching
+	 * the next IB1/IB2. walk all outstanding commands and zap the
+	 * supposedly bad IB1 where ever it lurks.
+	 */
+
+	if (test_and_clear_bit(KGSL_FT_SKIPIB, &cmdobj->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdobj, BIT(KGSL_FT_SKIPIB));
+		set_bit(KGSL_FT_SKIPIB, &cmdobj->fault_recovery);
+
+		for (i = 0; i < count; i++) {
+			if (replay[i] != NULL &&
+				DRAWOBJ(replay[i])->context->id ==
+					drawobj->context->id)
+				_skip_ib(replay[i], base);
+		}
+
+		return;
+	}
+
+	/* Skip the faulted cmdobj submission */
+	if (test_and_clear_bit(KGSL_FT_SKIPCMD, &cmdobj->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdobj, BIT(KGSL_FT_SKIPCMD));
+
+		/* Skip faulting cmdobj */
+		_skip_cmd(cmdobj, replay, count);
+
+		return;
+	}
+
+	if (test_and_clear_bit(KGSL_FT_SKIPFRAME, &cmdobj->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdobj,
+			BIT(KGSL_FT_SKIPFRAME));
+		set_bit(KGSL_FT_SKIPFRAME, &cmdobj->fault_recovery);
+
+		/*
+		 * Skip all the pending cmdobj's for this context until
+		 * the EOF frame is seen
+		 */
+		_skip_frame(cmdobj, replay, count);
+		return;
+	}
+
+	/* If we get here then all the policies failed */
+
+	pr_context(device, drawobj->context, "gpu %s ctx %d ts %u\n",
+		state, drawobj->context->id, drawobj->timestamp);
+
+	/* Mark the context as failed and invalidate it */
+	adreno_drawctxt_set_guilty(device, drawobj->context);
+}
+
+/**
+ * recover_dispatch_q() - Recover all commands in a dispatch queue by
+ * resubmitting the commands
+ * @device: Device on which recovery is performed
+ * @dispatch_q: The command queue to recover
+ * @fault: Faults caused by the command in the dispatch q
+ * @base: The IB1 base during the fault
+ */
+static void recover_dispatch_q(struct kgsl_device *device,
+		struct adreno_dispatcher_drawqueue *dispatch_q,
+		int fault,
+		unsigned int base)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_drawobj_cmd **replay;
+	unsigned int ptr;
+	int first = 0;
+	int count = 0;
+	int i;
+
+	/* Allocate memory to store the inflight commands */
+	replay = kcalloc(dispatch_q->inflight, sizeof(*replay), GFP_KERNEL);
+
+	if (replay == NULL) {
+		unsigned int ptr = dispatch_q->head;
+
+		/* Recovery failed - mark everybody on this q guilty */
+		while (ptr != dispatch_q->tail) {
+			struct kgsl_drawobj_cmd *cmdobj =
+						dispatch_q->cmd_q[ptr];
+			struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+			adreno_drawctxt_set_guilty(device, drawobj->context);
+			kgsl_drawobj_destroy(drawobj);
+
+			ptr = DRAWQUEUE_NEXT(ptr,
+				ADRENO_DISPATCH_DRAWQUEUE_SIZE);
+		}
+
+		/*
+		 * Set the replay count to zero - this will ensure that the
+		 * hardware gets reset but nothing else gets played
+		 */
+
+		count = 0;
+		goto replay;
+	}
+
+	/* Copy the inflight cmdobj's into the temporary storage */
+	ptr = dispatch_q->head;
+
+	while (ptr != dispatch_q->tail) {
+		replay[count++] = dispatch_q->cmd_q[ptr];
+		ptr = DRAWQUEUE_NEXT(ptr, ADRENO_DISPATCH_DRAWQUEUE_SIZE);
+	}
+
+	if (fault && count)
+		process_cmdobj_fault(device, replay,
+					count, base, fault);
+replay:
+	dispatch_q->inflight = 0;
+	dispatch_q->head = dispatch_q->tail = 0;
+	/* Remove any pending cmdobj's that have been invalidated */
+	remove_invalidated_cmdobjs(device, replay, count);
+
+	/* Replay the pending command buffers */
+	for (i = 0; i < count; i++) {
+
+		int ret;
+
+		if (replay[i] == NULL)
+			continue;
+
+		/*
+		 * Force the preamble on the first command (if applicable) to
+		 * avoid any strange stage issues
+		 */
+
+		if (first == 0) {
+			set_bit(CMDOBJ_FORCE_PREAMBLE, &replay[i]->priv);
+			first = 1;
+		}
+
+		/*
+		 * Force each cmdobj to wait for idle - this avoids weird
+		 * CP parse issues
+		 */
+
+		set_bit(CMDOBJ_WFI, &replay[i]->priv);
+
+		ret = sendcmd(adreno_dev, replay[i]);
+
+		/*
+		 * If sending the command fails, then try to recover by
+		 * invalidating the context
+		 */
+
+		if (ret) {
+			pr_context(device, replay[i]->base.context,
+				"gpu reset failed ctx %u ts %u\n",
+				replay[i]->base.context->id,
+				replay[i]->base.timestamp);
+
+			/* Mark this context as guilty (failed recovery) */
+			adreno_drawctxt_set_guilty(device, replay[i]->base.context);
+			remove_invalidated_cmdobjs(device, &replay[i],
+				count - i);
+		}
+	}
+
+	/* Clear the fault bit */
+	clear_bit(ADRENO_DEVICE_FAULT, &adreno_dev->priv);
+
+	kfree(replay);
+}
+
+static void do_header_and_snapshot(struct kgsl_device *device, int fault,
+		struct adreno_ringbuffer *rb, struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+	/* Always dump the snapshot on a non-drawobj failure */
+	if (cmdobj == NULL) {
+		adreno_fault_header(device, rb, NULL, fault);
+
+		/* GMU snapshot will also pull a full device snapshot */
+		if (fault & ADRENO_GMU_FAULT)
+			gmu_core_fault_snapshot(device);
+		else
+			kgsl_device_snapshot(device, NULL, NULL, false);
+		return;
+	}
+
+	/* Skip everything if the PMDUMP flag is set */
+	if (test_bit(KGSL_FT_SKIP_PMDUMP, &cmdobj->fault_policy))
+		return;
+
+	/* Print the fault header */
+	adreno_fault_header(device, rb, cmdobj, fault);
+
+	if (!(drawobj->context->flags & KGSL_CONTEXT_NO_SNAPSHOT))
+		kgsl_device_snapshot(device, drawobj->context, NULL,
+					fault & ADRENO_GMU_FAULT);
+}
+
+static int dispatcher_do_fault(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct adreno_dispatcher_drawqueue *dispatch_q = NULL, *dispatch_q_temp;
+	struct adreno_ringbuffer *rb;
+	struct adreno_ringbuffer *hung_rb = NULL;
+	unsigned int reg;
+	uint64_t base = 0;
+	struct kgsl_drawobj_cmd *cmdobj = NULL;
+	int ret, i;
+	int fault;
+	int halt;
+	bool gx_on;
+
+	fault = atomic_xchg(&dispatcher->fault, 0);
+	if (fault == 0)
+		return 0;
+
+	mutex_lock(&device->mutex);
+
+	/*
+	 * In the very unlikely case that the power is off, do nothing - the
+	 * state will be reset on power up and everybody will be happy
+	 */
+	if (!kgsl_state_is_awake(device)) {
+		mutex_unlock(&device->mutex);
+		return 0;
+	}
+
+	/* Mask all GMU interrupts */
+	if (gmu_core_isenabled(device)) {
+		adreno_write_gmureg(adreno_dev,
+			ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK,
+			0xFFFFFFFF);
+		adreno_write_gmureg(adreno_dev,
+			ADRENO_REG_GMU_GMU2HOST_INTR_MASK,
+			0xFFFFFFFF);
+	}
+
+	gx_on = adreno_gx_is_on(adreno_dev);
+
+	/*
+	 * On non-A3xx, Check if this function was entered after a pagefault. If so, only
+	 * proceed if the fault handler has already run in the IRQ thread,
+	 * else return early to give the fault handler a chance to run.
+	 */
+	if (!(fault & ADRENO_IOMMU_PAGE_FAULT) &&
+		!adreno_is_a3xx(adreno_dev) && gx_on) {
+
+		if (adreno_smmu_is_stalled(adreno_dev)) {
+			mutex_unlock(&device->mutex);
+			dev_err(device->dev,
+				"SMMU is stalled without a pagefault\n");
+			return -EBUSY;
+		}
+	}
+
+	/* Turn off all the timers */
+	del_timer_sync(&dispatcher->timer);
+
+	adreno_dispatcher_stop_fault_timer(device);
+
+	/*
+	 * Deleting uninitialized timer will block for ever on kernel debug
+	 * disable build. Hence skip del timer if it is not initialized.
+	 */
+	if (adreno_is_preemption_enabled(adreno_dev))
+		del_timer_sync(&adreno_dev->preempt.timer);
+
+	if (gx_on)
+		adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE,
+			ADRENO_REG_CP_RB_BASE_HI, &base);
+
+	/*
+	 * Force the CP off for anything but a hard fault to make sure it is
+	 * good and stopped
+	 */
+	if (!(fault & ADRENO_HARD_FAULT) && gx_on) {
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, &reg);
+		if (adreno_is_a3xx(adreno_dev))
+			reg |= (1 << 27) | (1 << 28);
+		else if (adreno_is_a5xx(adreno_dev) || adreno_is_a6xx(adreno_dev))
+			reg |= 1 | (1 << 1);
+		else
+			reg = 0x0;
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg);
+	}
+	/*
+	 * retire cmdobj's from all the dispatch_q's before starting recovery
+	 */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		adreno_dispatch_retire_drawqueue(adreno_dev,
+			&(rb->dispatch_q));
+		/* Select the active dispatch_q */
+		if (base == rb->buffer_desc->gpuaddr) {
+			dispatch_q = &(rb->dispatch_q);
+			hung_rb = rb;
+			if (adreno_dev->cur_rb != hung_rb) {
+				adreno_dev->prev_rb = adreno_dev->cur_rb;
+				adreno_dev->cur_rb = hung_rb;
+			}
+		}
+	}
+
+	if (dispatch_q && !adreno_drawqueue_is_empty(dispatch_q)) {
+		cmdobj = dispatch_q->cmd_q[dispatch_q->head];
+		trace_adreno_cmdbatch_fault(cmdobj, fault);
+	}
+
+	if (gx_on)
+		adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
+			ADRENO_REG_CP_IB1_BASE_HI, &base);
+
+	if (!test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy)
+		&& adreno_dev->cooperative_reset)
+		gmu_core_dev_cooperative_reset(device);
+
+	if (!(fault & ADRENO_GMU_FAULT_SKIP_SNAPSHOT))
+		do_header_and_snapshot(device, fault, hung_rb, cmdobj);
+
+	/* Turn off the KEEPALIVE vote from the ISR for hard fault */
+	if (gpudev->gpu_keepalive && fault & ADRENO_HARD_FAULT)
+		gpudev->gpu_keepalive(adreno_dev, false);
+
+	/* Terminate the stalled transaction and resume the IOMMU */
+	if (fault & ADRENO_IOMMU_PAGE_FAULT)
+		kgsl_mmu_pagefault_resume(&device->mmu, true);
+
+	/* Reset the dispatcher queue */
+	dispatcher->inflight = 0;
+
+	/* Remove the bus hint */
+	device->pwrctrl.rt_bus_hint_active = false;
+
+	/* Reset the GPU and make sure halt is not set during recovery */
+	halt = adreno_gpu_halt(adreno_dev);
+	adreno_clear_gpu_halt(adreno_dev);
+
+	/*
+	 * If there is a stall in the ringbuffer after all commands have been
+	 * retired then we could hit problems if contexts are waiting for
+	 * internal timestamps that will never retire
+	 */
+
+	if (hung_rb != NULL) {
+		kgsl_sharedmem_writel(device->memstore,
+			MEMSTORE_RB_OFFSET(hung_rb, soptimestamp),
+			hung_rb->timestamp);
+
+		kgsl_sharedmem_writel(device->memstore,
+				MEMSTORE_RB_OFFSET(hung_rb, eoptimestamp),
+				hung_rb->timestamp);
+
+		/* Schedule any pending events to be run */
+		kgsl_process_event_group(device, &hung_rb->events);
+	}
+
+	ret = adreno_reset(device, fault);
+
+	mutex_unlock(&device->mutex);
+
+	/* If adreno_reset() fails then what hope do we have for the future? */
+	BUG_ON(ret);
+
+	/* if any other fault got in until reset then ignore */
+	atomic_set(&dispatcher->fault, 0);
+
+	/* recover all the dispatch_q's starting with the one that hung */
+	if (dispatch_q)
+		recover_dispatch_q(device, dispatch_q, fault, base);
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		dispatch_q_temp = &(rb->dispatch_q);
+		if (dispatch_q_temp != dispatch_q)
+			recover_dispatch_q(device, dispatch_q_temp, 0, base);
+	}
+
+	atomic_add(halt, &adreno_dev->halt);
+
+	return 1;
+}
+
+static inline int drawobj_consumed(struct kgsl_drawobj *drawobj,
+		unsigned int consumed, unsigned int retired)
+{
+	return ((timestamp_cmp(drawobj->timestamp, consumed) >= 0) &&
+		(timestamp_cmp(retired, drawobj->timestamp) < 0));
+}
+
+static const char *_ft_type(enum kgsl_ft_policy_bits nr)
+{
+	if (nr == KGSL_FT_OFF)
+		return "off";
+	else if (nr == KGSL_FT_REPLAY)
+		return "replay";
+	else if (nr == KGSL_FT_SKIPIB)
+		return "skipib";
+	else if (nr == KGSL_FT_SKIPFRAME)
+		return "skipfame";
+	else if (nr == KGSL_FT_DISABLE)
+		return "disable";
+	else if (nr == KGSL_FT_TEMP_DISABLE)
+		return "temp";
+	else if (nr == KGSL_FT_THROTTLE)
+		return "throttle";
+	else if (nr == KGSL_FT_SKIPCMD)
+		return "skipcmd";
+
+	return "";
+}
+
+static void _print_recovery(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd *cmdobj)
+{
+	int nr = find_first_bit(&cmdobj->fault_recovery, BITS_PER_LONG);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+	pr_context(device, drawobj->context,
+		"gpu %s ctx %u ts %u policy %lX\n",
+		_ft_type(nr), drawobj->context->id, drawobj->timestamp,
+		cmdobj->fault_recovery);
+}
+
+static void cmdobj_profile_ticks(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj_cmd *cmdobj, uint64_t *start, uint64_t *retire,
+	uint64_t *active)
+{
+	void *ptr = adreno_dev->profile_buffer->hostptr;
+	struct adreno_drawobj_profile_entry *entry;
+
+	entry = (struct adreno_drawobj_profile_entry *)
+		(ptr + (cmdobj->profile_index * sizeof(*entry)));
+
+	/* get updated values of started and retired */
+	rmb();
+	*start = entry->started;
+	*retire = entry->retired;
+	if (ADRENO_GPUREV(adreno_dev) < 600)
+		*active = entry->retired - entry->started;
+	else
+		*active = entry->ctx_end - entry->ctx_start;
+}
+
+static void retire_cmdobj(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct adreno_ringbuffer *rb = drawctxt->rb;
+	struct kgsl_context *context = drawobj->context;
+	uint64_t start = 0, end = 0, active = 0;
+	struct retire_info info = {0};
+
+	if (cmdobj->fault_recovery != 0) {
+		set_bit(ADRENO_CONTEXT_FAULT, &drawobj->context->priv);
+		_print_recovery(KGSL_DEVICE(adreno_dev), cmdobj);
+	}
+
+	if (test_bit(CMDOBJ_PROFILE, &cmdobj->priv))
+		cmdobj_profile_ticks(adreno_dev, cmdobj, &start, &end, &active);
+
+	info.inflight = (int)dispatcher->inflight;
+	info.rb_id = rb->id;
+	info.wptr = rb->wptr;
+	info.timestamp = drawobj->timestamp;
+	info.sop = start;
+	info.eop = end;
+	info.active = active;
+
+	/* protected GPU work must not be reported */
+	if  (!(context->flags & KGSL_CONTEXT_SECURE))
+		kgsl_work_period_update(KGSL_DEVICE(adreno_dev),
+					     context->proc_priv->period, active);
+
+	msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_RETIRED,
+			       pid_nr(context->proc_priv->pid),
+			       context->id, drawobj->timestamp,
+			       !!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME));
+
+	if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME) {
+		atomic64_inc(&context->proc_priv->frame_count);
+		atomic_inc(&context->proc_priv->period->frames);
+	}
+
+	/*
+	 * For A3xx we still get the rptr from the CP_RB_RPTR instead of
+	 * rptr scratch out address. At this point GPU clocks turned off.
+	 * So avoid reading GPU register directly for A3xx.
+	 */
+	if (adreno_is_a3xx(adreno_dev)) {
+		trace_adreno_cmdbatch_retired(drawobj->context, &info,
+			drawobj->flags, rb->dispatch_q.inflight,
+			cmdobj->fault_recovery);
+	} else {
+		info.rptr = adreno_get_rptr(rb);
+		trace_adreno_cmdbatch_retired(drawobj->context, &info,
+			drawobj->flags, rb->dispatch_q.inflight,
+			cmdobj->fault_recovery);
+	}
+
+	log_kgsl_cmdbatch_retired_event(context->id, drawobj->timestamp,
+		context->priority, drawobj->flags, start, end);
+
+	drawctxt->submit_retire_ticks[drawctxt->ticks_index] =
+		end - cmdobj->submit_ticks;
+
+	drawctxt->ticks_index = (drawctxt->ticks_index + 1) %
+		SUBMIT_RETIRE_TICKS_SIZE;
+
+	trace_adreno_cmdbatch_done(drawobj->context->id,
+		drawobj->context->priority, drawobj->timestamp);
+	kgsl_drawobj_destroy(drawobj);
+}
+
+static int adreno_dispatch_retire_drawqueue(struct adreno_device *adreno_dev,
+		struct adreno_dispatcher_drawqueue *drawqueue)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int count = 0;
+
+	while (!adreno_drawqueue_is_empty(drawqueue)) {
+		struct kgsl_drawobj_cmd *cmdobj =
+			drawqueue->cmd_q[drawqueue->head];
+		struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+		if (!kgsl_check_timestamp(device, drawobj->context,
+			drawobj->timestamp))
+			break;
+
+		retire_cmdobj(adreno_dev, cmdobj);
+
+		dispatcher->inflight--;
+		drawqueue->inflight--;
+
+		drawqueue->cmd_q[drawqueue->head] = NULL;
+
+		drawqueue->head = DRAWQUEUE_NEXT(drawqueue->head,
+			ADRENO_DISPATCH_DRAWQUEUE_SIZE);
+
+		count++;
+	}
+
+	return count;
+}
+
+static void _adreno_dispatch_check_timeout(struct adreno_device *adreno_dev,
+		struct adreno_dispatcher_drawqueue *drawqueue)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj =
+			DRAWOBJ(drawqueue->cmd_q[drawqueue->head]);
+
+	/* Don't timeout if the timer hasn't expired yet (duh) */
+	if (time_is_after_jiffies(drawqueue->expires))
+		return;
+
+	/* Don't timeout if the IB timeout is disabled globally */
+	if (!adreno_long_ib_detect(adreno_dev))
+		return;
+
+	/* Don't time out if the context has disabled it */
+	if (drawobj->context->flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
+		return;
+
+	pr_context(device, drawobj->context, "gpu timeout ctx %u ts %u\n",
+		drawobj->context->id, drawobj->timestamp);
+
+	adreno_set_gpu_fault(adreno_dev, ADRENO_TIMEOUT_FAULT);
+
+	/*
+	 * This makes sure dispatcher doesn't run endlessly in cases where
+	 * we couldn't run recovery
+	 */
+	drawqueue->expires = jiffies + msecs_to_jiffies(adreno_drawobj_timeout);
+}
+
+static int adreno_dispatch_process_drawqueue(struct adreno_device *adreno_dev,
+		struct adreno_dispatcher_drawqueue *drawqueue)
+{
+	int count = adreno_dispatch_retire_drawqueue(adreno_dev, drawqueue);
+
+	/* Nothing to do if there are no pending commands */
+	if (adreno_drawqueue_is_empty(drawqueue))
+		return count;
+
+	/* Don't update the drawqueue timeout if it isn't active */
+	if (!drawqueue_is_current(drawqueue))
+		return count;
+
+	/*
+	 * If the current ringbuffer retired any commands then universally
+	 * reset the timeout
+	 */
+
+	if (count) {
+		drawqueue->expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+		return count;
+	}
+
+	/*
+	 * If we get here then 1) the ringbuffer is current and 2) we haven't
+	 * retired anything.  Check to see if the timeout if valid for the
+	 * current drawobj and fault if it has expired
+	 */
+	_adreno_dispatch_check_timeout(adreno_dev, drawqueue);
+	return 0;
+}
+
+/* Take down the dispatcher and release any power states */
+static void _dispatcher_power_down(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	mutex_lock(&device->mutex);
+
+	if (test_and_clear_bit(ADRENO_DISPATCHER_ACTIVE, &dispatcher->priv))
+		complete_all(&dispatcher->idle_gate);
+
+	adreno_dispatcher_stop_fault_timer(device);
+	process_rt_bus_hint(device, false);
+
+	if (test_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv)) {
+		adreno_active_count_put(adreno_dev);
+		clear_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+	}
+
+	mutex_unlock(&device->mutex);
+}
+
+static void adreno_dispatcher_work(struct kthread_work *work)
+{
+	struct adreno_dispatcher *dispatcher =
+		container_of(work, struct adreno_dispatcher, work);
+	struct adreno_device *adreno_dev =
+		container_of(dispatcher, struct adreno_device, dispatcher);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int count = 0;
+	unsigned int i = 0;
+
+	mutex_lock(&dispatcher->mutex);
+
+	/*
+	 * As long as there are inflight commands, process retired comamnds from
+	 * all drawqueues
+	 */
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		struct adreno_dispatcher_drawqueue *drawqueue =
+			DRAWQUEUE(&adreno_dev->ringbuffers[i]);
+
+		count += adreno_dispatch_process_drawqueue(adreno_dev,
+			drawqueue);
+		if (dispatcher->inflight == 0)
+			break;
+	}
+
+	kgsl_process_event_groups(device);
+
+	/*
+	 * dispatcher_do_fault() returns 0 if no faults occurred. If that is the
+	 * case, then clean up preemption and try to schedule more work
+	 */
+	if (dispatcher_do_fault(adreno_dev) == 0) {
+
+		/* Clean up after preemption */
+		if (gpudev->preemption_schedule)
+			gpudev->preemption_schedule(adreno_dev);
+
+		/* Run the scheduler for to dispatch new commands */
+		_adreno_dispatcher_issuecmds(adreno_dev);
+	}
+
+	/*
+	 * If there are commands pending, update the timers, otherwise release
+	 * the power state to prepare for power down
+	 */
+	if (dispatcher->inflight > 0)
+		_dispatcher_update_timers(adreno_dev);
+	else
+		_dispatcher_power_down(adreno_dev);
+
+	mutex_unlock(&dispatcher->mutex);
+}
+
+void adreno_dispatcher_schedule(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	kthread_queue_work(dispatcher->worker, &dispatcher->work);
+}
+
+/*
+ * Put a draw context on the dispatcher pending queue and schedule the
+ * dispatcher. This is used to reschedule changes that might have been blocked
+ * for sync points or other concerns
+ */
+static void adreno_dispatcher_queue_context(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	dispatcher_queue_context(adreno_dev, drawctxt);
+	adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+}
+
+void adreno_dispatcher_fault(struct adreno_device *adreno_dev,
+		u32 fault)
+{
+	adreno_set_gpu_fault(adreno_dev, fault);
+	adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+}
+
+/*
+ * This is called when the timer expires - it either means the GPU is hung or
+ * the IB is taking too long to execute
+ */
+static void adreno_dispatcher_timer(struct timer_list *t)
+{
+	struct adreno_dispatcher *dispatcher = from_timer(dispatcher, t, timer);
+	struct adreno_device *adreno_dev = container_of(dispatcher,
+					struct adreno_device, dispatcher);
+
+	adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+}
+
+/**
+ * adreno_dispatcher_start() - activate the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ */
+void adreno_dispatcher_start(struct kgsl_device *device)
+{
+	complete_all(&device->halt_gate);
+
+	/* Schedule the work loop to get things going */
+	adreno_dispatcher_schedule(device);
+}
+
+/**
+ * adreno_dispatcher_stop() - stop the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ * Stop the dispatcher and close all the timers
+ */
+void adreno_dispatcher_stop(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	del_timer_sync(&dispatcher->timer);
+
+	adreno_dispatcher_stop_fault_timer(KGSL_DEVICE(adreno_dev));
+}
+
+/* Return the ringbuffer that matches the draw context priority */
+static struct adreno_ringbuffer *dispatch_get_rb(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	int level;
+
+	/* If preemption is disabled everybody goes on the same ringbuffer */
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return &adreno_dev->ringbuffers[0];
+
+	/*
+	 * Math to convert the priority field in context structure to an RB ID.
+	 * Divide up the context priority based on number of ringbuffer levels.
+	 */
+	level = min_t(int, drawctxt->base.priority / adreno_dev->num_ringbuffers,
+		adreno_dev->num_ringbuffers - 1);
+
+	return &adreno_dev->ringbuffers[level];
+}
+
+static void adreno_dispatcher_setup_context(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	drawctxt->rb = dispatch_get_rb(adreno_dev, drawctxt);
+}
+
+static void change_preemption(struct adreno_device *adreno_dev, void *priv)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_context *context;
+	struct adreno_context *drawctxt;
+	struct adreno_ringbuffer *rb;
+	int id, i, ret;
+
+	/* Make sure all ringbuffers are finished */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp,
+			2 * 1000);
+		if (ret) {
+			dev_err(device->dev,
+				"Cannot disable preemption because couldn't idle ringbuffer[%d] ret: %d\n",
+				rb->id, ret);
+			return;
+		}
+	}
+
+	change_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+	adreno_dev->cur_rb = &adreno_dev->ringbuffers[0];
+	adreno_dev->next_rb = NULL;
+	adreno_dev->prev_rb = NULL;
+
+	/* Update the ringbuffer for each draw context */
+	write_lock(&device->context_lock);
+	idr_for_each_entry(&device->context_idr, context, id) {
+		drawctxt = ADRENO_CONTEXT(context);
+		drawctxt->rb = dispatch_get_rb(adreno_dev, drawctxt);
+
+		/*
+		 * Make sure context destroy checks against the correct
+		 * ringbuffer's timestamp.
+		 */
+		adreno_rb_readtimestamp(adreno_dev, drawctxt->rb,
+			KGSL_TIMESTAMP_RETIRED, &drawctxt->internal_timestamp);
+	}
+	write_unlock(&device->context_lock);
+}
+
+static int _preemption_store(struct adreno_device *adreno_dev, bool val)
+{
+	if (!adreno_preemption_feature_set(adreno_dev) ||
+		(test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv) == val))
+		return 0;
+
+	return adreno_power_cycle(adreno_dev, change_preemption, NULL);
+}
+
+static bool _preemption_show(struct adreno_device *adreno_dev)
+{
+	return adreno_is_preemption_enabled(adreno_dev);
+}
+
+static unsigned int _preempt_count_show(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->preempt.count;
+}
+
+static int _ft_long_ib_detect_store(struct adreno_device *adreno_dev, bool val)
+{
+	adreno_dev->long_ib_detect = val ? true : false;
+	return 0;
+}
+
+static bool _ft_long_ib_detect_show(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->long_ib_detect;
+}
+
+static ADRENO_SYSFS_BOOL(preemption);
+static ADRENO_SYSFS_RO_U32(preempt_count);
+static ADRENO_SYSFS_BOOL(ft_long_ib_detect);
+
+static const struct attribute *_dispatch_attr_list[] = {
+	&adreno_attr_preemption.attr.attr,
+	&adreno_attr_preempt_count.attr.attr,
+	&adreno_attr_ft_long_ib_detect.attr.attr,
+	NULL,
+};
+
+static void adreno_dispatcher_close(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int i;
+	struct adreno_ringbuffer *rb;
+
+	mutex_lock(&dispatcher->mutex);
+	del_timer_sync(&dispatcher->timer);
+
+	adreno_dispatcher_stop_fault_timer(KGSL_DEVICE(adreno_dev));
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		struct adreno_dispatcher_drawqueue *dispatch_q =
+			&(rb->dispatch_q);
+		while (!adreno_drawqueue_is_empty(dispatch_q)) {
+			kgsl_drawobj_destroy(
+				DRAWOBJ(dispatch_q->cmd_q[dispatch_q->head]));
+			dispatch_q->head = (dispatch_q->head + 1)
+				% ADRENO_DISPATCH_DRAWQUEUE_SIZE;
+		}
+	}
+
+	mutex_unlock(&dispatcher->mutex);
+
+	kthread_destroy_worker(dispatcher->worker);
+
+	adreno_set_dispatch_ops(adreno_dev, NULL);
+
+	kobject_put(&dispatcher->kobj);
+
+	kmem_cache_destroy(jobs_cache);
+
+	clear_bit(ADRENO_DISPATCHER_INIT, &dispatcher->priv);
+}
+
+struct dispatcher_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct adreno_dispatcher *dispatcher,
+			struct dispatcher_attribute *attr, char *buf);
+	ssize_t (*store)(struct adreno_dispatcher *dispatcher,
+			struct dispatcher_attribute *attr, const char *buf,
+			size_t count);
+	unsigned int max;
+	unsigned int *value;
+};
+
+#define DISPATCHER_UINT_ATTR(_name, _mode, _max, _value) \
+	struct dispatcher_attribute dispatcher_attr_##_name =  { \
+		.attr = { .name = __stringify(_name), .mode = _mode }, \
+		.show = _show_uint, \
+		.store = _store_uint, \
+		.max = _max, \
+		.value = &(_value), \
+	}
+
+#define to_dispatcher_attr(_a) \
+	container_of((_a), struct dispatcher_attribute, attr)
+#define to_dispatcher(k) container_of(k, struct adreno_dispatcher, kobj)
+
+static ssize_t _store_uint(struct adreno_dispatcher *dispatcher,
+		struct dispatcher_attribute *attr,
+		const char *buf, size_t size)
+{
+	unsigned int val = 0;
+	int ret;
+
+	ret = kstrtou32(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	if (!val || (attr->max && (val > attr->max)))
+		return -EINVAL;
+
+	*((unsigned int *) attr->value) = val;
+	return size;
+}
+
+static ssize_t _show_uint(struct adreno_dispatcher *dispatcher,
+		struct dispatcher_attribute *attr,
+		char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%u\n",
+		*((unsigned int *) attr->value));
+}
+
+static DISPATCHER_UINT_ATTR(inflight, 0644, ADRENO_DISPATCH_DRAWQUEUE_SIZE,
+	_dispatcher_q_inflight_hi);
+
+static DISPATCHER_UINT_ATTR(inflight_low_latency, 0644,
+	ADRENO_DISPATCH_DRAWQUEUE_SIZE, _dispatcher_q_inflight_lo);
+/*
+ * Our code that "puts back" a command from the context is much cleaner
+ * if we are sure that there will always be enough room in the
+ * ringbuffer so restrict the maximum size of the context queue to
+ * ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1
+ */
+static DISPATCHER_UINT_ATTR(context_drawqueue_size, 0644,
+	ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1, _context_drawqueue_size);
+static DISPATCHER_UINT_ATTR(context_burst_count, 0644, 0,
+	_context_drawobj_burst);
+static DISPATCHER_UINT_ATTR(drawobj_timeout, 0644, 0,
+	adreno_drawobj_timeout);
+static DISPATCHER_UINT_ATTR(context_queue_wait, 0644, 0, _context_queue_wait);
+static DISPATCHER_UINT_ATTR(fault_detect_interval, 0644, 0,
+	_fault_timer_interval);
+static DISPATCHER_UINT_ATTR(fault_throttle_time, 0644, 0,
+	_fault_throttle_time);
+static DISPATCHER_UINT_ATTR(fault_throttle_burst, 0644, 0,
+	_fault_throttle_burst);
+
+static struct attribute *dispatcher_attrs[] = {
+	&dispatcher_attr_inflight.attr,
+	&dispatcher_attr_inflight_low_latency.attr,
+	&dispatcher_attr_context_drawqueue_size.attr,
+	&dispatcher_attr_context_burst_count.attr,
+	&dispatcher_attr_drawobj_timeout.attr,
+	&dispatcher_attr_context_queue_wait.attr,
+	&dispatcher_attr_fault_detect_interval.attr,
+	&dispatcher_attr_fault_throttle_time.attr,
+	&dispatcher_attr_fault_throttle_burst.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(dispatcher);
+
+static ssize_t dispatcher_sysfs_show(struct kobject *kobj,
+				   struct attribute *attr, char *buf)
+{
+	struct adreno_dispatcher *dispatcher = to_dispatcher(kobj);
+	struct dispatcher_attribute *pattr = to_dispatcher_attr(attr);
+	ssize_t ret = -EIO;
+
+	if (pattr->show)
+		ret = pattr->show(dispatcher, pattr, buf);
+
+	return ret;
+}
+
+static ssize_t dispatcher_sysfs_store(struct kobject *kobj,
+				    struct attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct adreno_dispatcher *dispatcher = to_dispatcher(kobj);
+	struct dispatcher_attribute *pattr = to_dispatcher_attr(attr);
+	ssize_t ret = -EIO;
+
+	if (pattr->store)
+		ret = pattr->store(dispatcher, pattr, buf, count);
+
+	return ret;
+}
+
+static const struct sysfs_ops dispatcher_sysfs_ops = {
+	.show = dispatcher_sysfs_show,
+	.store = dispatcher_sysfs_store
+};
+
+static struct kobj_type ktype_dispatcher = {
+	.sysfs_ops = &dispatcher_sysfs_ops,
+	.default_groups = dispatcher_groups,
+};
+
+static const struct adreno_dispatch_ops swsched_ops = {
+	.close = adreno_dispatcher_close,
+	.queue_cmds = adreno_dispatcher_queue_cmds,
+	.setup_context = adreno_dispatcher_setup_context,
+	.queue_context = adreno_dispatcher_queue_context,
+	.fault = adreno_dispatcher_fault,
+	.get_fault = adreno_gpu_fault,
+};
+
+/**
+ * adreno_dispatcher_init() - Initialize the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ * Initialize the dispatcher
+ */
+int adreno_dispatcher_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int ret, i;
+
+	if (test_bit(ADRENO_DISPATCHER_INIT, &dispatcher->priv))
+		return 0;
+
+	ret = kobject_init_and_add(&dispatcher->kobj, &ktype_dispatcher,
+		&device->dev->kobj, "dispatch");
+	if (ret)
+		return ret;
+
+	dispatcher->worker = kthread_create_worker(0, "kgsl_dispatcher");
+	if (IS_ERR(dispatcher->worker)) {
+		kobject_put(&dispatcher->kobj);
+		return PTR_ERR(dispatcher->worker);
+	}
+
+	WARN_ON(sysfs_create_files(&device->dev->kobj, _dispatch_attr_list));
+
+	mutex_init(&dispatcher->mutex);
+
+	timer_setup(&dispatcher->timer, adreno_dispatcher_timer, 0);
+
+	kthread_init_work(&dispatcher->work, adreno_dispatcher_work);
+
+	init_completion(&dispatcher->idle_gate);
+	complete_all(&dispatcher->idle_gate);
+
+	jobs_cache = KMEM_CACHE(adreno_dispatch_job, 0);
+
+	for (i = 0; i < ARRAY_SIZE(dispatcher->jobs); i++) {
+		init_llist_head(&dispatcher->jobs[i]);
+		init_llist_head(&dispatcher->requeue[i]);
+	}
+
+	adreno_set_dispatch_ops(adreno_dev, &swsched_ops);
+
+	sched_set_fifo(dispatcher->worker->task);
+
+	set_bit(ADRENO_DISPATCHER_INIT, &dispatcher->priv);
+
+	return 0;
+}
+
+/*
+ * adreno_dispatcher_idle() - Wait for dispatcher to idle
+ * @adreno_dev: Adreno device whose dispatcher needs to idle
+ *
+ * Signal dispatcher to stop sending more commands and complete
+ * the commands that have already been submitted. This function
+ * should not be called when dispatcher mutex is held.
+ * The caller must hold the device mutex.
+ */
+int adreno_dispatcher_idle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int ret;
+
+	if (device->state != KGSL_STATE_ACTIVE)
+		return 0;
+
+	/*
+	 * Ensure that this function is not called when dispatcher
+	 * mutex is held and device is started
+	 */
+
+	if (WARN_ON(mutex_is_locked(&dispatcher->mutex)))
+		return -EDEADLK;
+
+	adreno_get_gpu_halt(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+
+	/*
+	 * Flush the worker to make sure all executing
+	 * or pending dispatcher works on worker are
+	 * finished
+	 */
+	kthread_flush_worker(dispatcher->worker);
+
+	ret = wait_for_completion_timeout(&dispatcher->idle_gate,
+			msecs_to_jiffies(ADRENO_IDLE_TIMEOUT));
+	if (ret == 0) {
+		ret = -ETIMEDOUT;
+		WARN(1, "Dispatcher halt timeout\n");
+	} else if (ret < 0) {
+		dev_err(device->dev, "Dispatcher halt failed %d\n", ret);
+	} else {
+		ret = 0;
+	}
+
+	mutex_lock(&device->mutex);
+	adreno_put_gpu_halt(adreno_dev);
+	/*
+	 * requeue dispatcher work to resubmit pending commands
+	 * that may have been blocked due to this idling request
+	 */
+	adreno_dispatcher_schedule(device);
+	return ret;
+}

+ 112 - 0
qcom/opensource/graphics-kernel/adreno_dispatch.h

@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef ____ADRENO_DISPATCHER_H
+#define ____ADRENO_DISPATCHER_H
+
+#include <linux/kobject.h>
+#include <linux/kthread.h>
+#include <linux/llist.h>
+
+extern unsigned int adreno_drawobj_timeout;
+
+/*
+ * Maximum size of the dispatcher ringbuffer - the actual inflight size will be
+ * smaller then this but this size will allow for a larger range of inflight
+ * sizes that can be chosen at runtime
+ */
+
+#define ADRENO_DISPATCH_DRAWQUEUE_SIZE 128
+
+#define DRAWQUEUE_NEXT(_i, _s) (((_i) + 1) % (_s))
+
+/**
+ * struct adreno_dispatcher_drawqueue - List of commands for a RB level
+ * @cmd_q: List of command obj's submitted to dispatcher
+ * @inflight: Number of commands inflight in this q
+ * @head: Head pointer to the q
+ * @tail: Queues tail pointer
+ * @active_context_count: Number of active contexts seen in this rb drawqueue
+ * @expires: The jiffies value at which this drawqueue has run too long
+ */
+struct adreno_dispatcher_drawqueue {
+	struct kgsl_drawobj_cmd *cmd_q[ADRENO_DISPATCH_DRAWQUEUE_SIZE];
+	unsigned int inflight;
+	unsigned int head;
+	unsigned int tail;
+	int active_context_count;
+	unsigned long expires;
+};
+
+/**
+ * struct adreno_dispatch_job - An instance of work for the dispatcher
+ * @node: llist node for the list of jobs
+ * @drawctxt: A pointer to an adreno draw context
+ *
+ * This struct defines work for the dispatcher. When a drawctxt is ready to send
+ * commands it will attach itself to the appropriate list for it's priority.
+ * The dispatcher will process all jobs on each priority every time it goes
+ * through a dispatch cycle
+ */
+struct adreno_dispatch_job {
+	struct llist_node node;
+	struct adreno_context *drawctxt;
+};
+
+/**
+ * struct adreno_dispatcher - container for the adreno GPU dispatcher
+ * @mutex: Mutex to protect the structure
+ * @state: Current state of the dispatcher (active or paused)
+ * @timer: Timer to monitor the progress of the drawobjs
+ * @inflight: Number of drawobj operations pending in the ringbuffer
+ * @fault: Non-zero if a fault was detected.
+ * @pending: Priority list of contexts waiting to submit drawobjs
+ * @work: work_struct to put the dispatcher in a work queue
+ * @kobj: kobject for the dispatcher directory in the device sysfs node
+ * @idle_gate: Gate to wait on for dispatcher to idle
+ */
+struct adreno_dispatcher {
+	struct mutex mutex;
+	unsigned long priv;
+	struct timer_list timer;
+	struct timer_list fault_timer;
+	unsigned int inflight;
+	atomic_t fault;
+	/** @jobs - Array of dispatch job lists for each priority level */
+	struct llist_head jobs[16];
+	/** @requeue - Array of lists for dispatch jobs that got requeued */
+	struct llist_head requeue[16];
+	struct kthread_work work;
+	struct kobject kobj;
+	struct completion idle_gate;
+	struct kthread_worker *worker;
+};
+
+enum adreno_dispatcher_flags {
+	ADRENO_DISPATCHER_POWER = 0,
+	ADRENO_DISPATCHER_ACTIVE,
+	ADRENO_DISPATCHER_INIT,
+};
+
+struct adreno_device;
+struct kgsl_device;
+
+void adreno_dispatcher_start(struct kgsl_device *device);
+int adreno_dispatcher_init(struct adreno_device *adreno_dev);
+int adreno_dispatcher_idle(struct adreno_device *adreno_dev);
+void adreno_dispatcher_stop(struct adreno_device *adreno_dev);
+
+void adreno_dispatcher_start_fault_timer(struct adreno_device *adreno_dev);
+void adreno_dispatcher_stop_fault_timer(struct kgsl_device *device);
+
+void adreno_dispatcher_schedule(struct kgsl_device *device);
+
+/**
+ * adreno_dispatcher_fault - Set dispatcher fault to request recovery
+ * @adreno_dev: A handle to adreno device
+ * @fault: The type of fault
+ */
+void adreno_dispatcher_fault(struct adreno_device *adreno_dev, u32 fault);
+#endif /* __ADRENO_DISPATCHER_H */

+ 678 - 0
qcom/opensource/graphics-kernel/adreno_drawctxt.c

@@ -0,0 +1,678 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/debugfs.h>
+
+#include "adreno.h"
+#include "adreno_trace.h"
+
+static void wait_callback(struct kgsl_device *device,
+		struct kgsl_event_group *group, void *priv, int result)
+{
+	struct adreno_context *drawctxt = priv;
+
+	wake_up_all(&drawctxt->waiting);
+}
+
+static int _check_context_timestamp(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int timestamp)
+{
+	/* Bail if the drawctxt has been invalidated or destroyed */
+	if (kgsl_context_is_bad(context))
+		return 1;
+
+	return kgsl_check_timestamp(device, context, timestamp);
+}
+
+/**
+ * adreno_drawctxt_dump() - dump information about a draw context
+ * @device: KGSL device that owns the context
+ * @context: KGSL context to dump information about
+ *
+ * Dump specific information about the context to the kernel log.  Used for
+ * fence timeout callbacks
+ */
+void adreno_drawctxt_dump(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	unsigned int queue, start, retire;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int index, pos;
+	char buf[120];
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, &queue);
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_CONSUMED, &start);
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &retire);
+
+	/*
+	 * We may have kgsl sync obj timer running, which also uses same
+	 * lock, take a lock with software interrupt disabled (bh)
+	 * to avoid spin lock recursion.
+	 *
+	 * Use Spin trylock because dispatcher can acquire drawctxt->lock
+	 * if context is pending and the fence it is waiting on just got
+	 * signalled. Dispatcher acquires drawctxt->lock and tries to
+	 * delete the sync obj timer using del_timer_sync().
+	 * del_timer_sync() waits till timer and its pending handlers
+	 * are deleted. But if the timer expires at the same time,
+	 * timer handler could be waiting on drawctxt->lock leading to a
+	 * deadlock. To prevent this use spin_trylock_bh.
+	 */
+	if (!spin_trylock_bh(&drawctxt->lock)) {
+		dev_err(device->dev, "  context[%u]: could not get lock\n",
+			context->id);
+		return;
+	}
+
+	dev_err(device->dev,
+		"  context[%u]: queue=%u, submit=%u, start=%u, retire=%u\n",
+		context->id, queue, drawctxt->submitted_timestamp,
+		start, retire);
+
+	if (drawctxt->drawqueue_head != drawctxt->drawqueue_tail) {
+		struct kgsl_drawobj *drawobj =
+			drawctxt->drawqueue[drawctxt->drawqueue_head];
+
+		if (test_bit(ADRENO_CONTEXT_FENCE_LOG, &context->priv)) {
+			dev_err(device->dev,
+				"  possible deadlock. Context %u might be blocked for itself\n",
+				context->id);
+			goto stats;
+		}
+
+		if (!kref_get_unless_zero(&drawobj->refcount))
+			goto stats;
+
+		if (drawobj->type == SYNCOBJ_TYPE) {
+			struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj);
+
+			if (kgsl_drawobj_events_pending(syncobj)) {
+				dev_err(device->dev,
+					"  context[%u] (ts=%u) Active sync points:\n",
+					context->id, drawobj->timestamp);
+
+				kgsl_dump_syncpoints(device, syncobj);
+			}
+		}
+
+		kgsl_drawobj_put(drawobj);
+	}
+
+stats:
+	memset(buf, 0, sizeof(buf));
+
+	pos = 0;
+
+	for (index = 0; index < SUBMIT_RETIRE_TICKS_SIZE; index++) {
+		uint64_t msecs;
+		unsigned int usecs;
+
+		if (!drawctxt->submit_retire_ticks[index])
+			continue;
+		msecs = drawctxt->submit_retire_ticks[index] * 10;
+		usecs = do_div(msecs, 192);
+		usecs = do_div(msecs, 1000);
+		pos += scnprintf(buf + pos, sizeof(buf) - pos, "%u.%0u ",
+			(unsigned int)msecs, usecs);
+	}
+	dev_err(device->dev, "  context[%u]: submit times: %s\n",
+		context->id, buf);
+
+	spin_unlock_bh(&drawctxt->lock);
+}
+
+/**
+ * adreno_drawctxt_wait() - sleep until a timestamp expires
+ * @adreno_dev: pointer to the adreno_device struct
+ * @drawctxt: Pointer to the draw context to sleep for
+ * @timetamp: Timestamp to wait on
+ * @timeout: Number of jiffies to wait (0 for infinite)
+ *
+ * Register an event to wait for a timestamp on a context and sleep until it
+ * has past.  Returns < 0 on error, -ETIMEDOUT if the timeout expires or 0
+ * on success
+ */
+int adreno_drawctxt_wait(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int ret;
+	long ret_temp;
+
+	if (kgsl_context_detached(context))
+		return -ENOENT;
+
+	if (kgsl_context_invalid(context))
+		return -EDEADLK;
+
+	trace_adreno_drawctxt_wait_start(-1, context->id, timestamp);
+
+	ret = kgsl_add_event(device, &context->events, timestamp,
+		wait_callback, (void *) drawctxt);
+	if (ret)
+		goto done;
+
+	/*
+	 * If timeout is 0, wait forever. msecs_to_jiffies will force
+	 * values larger than INT_MAX to an infinite timeout.
+	 */
+	if (timeout == 0)
+		timeout = UINT_MAX;
+
+	ret_temp = wait_event_interruptible_timeout(drawctxt->waiting,
+			_check_context_timestamp(device, context, timestamp),
+			msecs_to_jiffies(timeout));
+
+	if (ret_temp <= 0) {
+		kgsl_cancel_event(device, &context->events, timestamp,
+			wait_callback, (void *)drawctxt);
+
+		ret = ret_temp ? (int)ret_temp : -ETIMEDOUT;
+		goto done;
+	}
+	ret = 0;
+
+	/* -EDEADLK if the context was invalidated while we were waiting */
+	if (kgsl_context_invalid(context))
+		ret = -EDEADLK;
+
+
+	/* Return -EINVAL if the context was detached while we were waiting */
+	if (kgsl_context_detached(context))
+		ret = -ENOENT;
+
+done:
+	trace_adreno_drawctxt_wait_done(-1, context->id, timestamp, ret);
+	return ret;
+}
+
+/**
+ * adreno_drawctxt_wait_rb() - Wait for the last RB timestamp at which this
+ * context submitted a command to the corresponding RB
+ * @adreno_dev: The device on which the timestamp is active
+ * @context: The context which subbmitted command to RB
+ * @timestamp: The RB timestamp of last command submitted to RB by context
+ * @timeout: Timeout value for the wait
+ * Caller must hold the device mutex
+ */
+static int adreno_drawctxt_wait_rb(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int ret = 0;
+
+	/*
+	 * If the context is invalid (OR) not submitted commands to GPU
+	 * then return immediately - we may end up waiting for a timestamp
+	 * that will never come
+	 */
+	if (kgsl_context_invalid(context) ||
+			!test_bit(KGSL_CONTEXT_PRIV_SUBMITTED, &context->priv))
+		goto done;
+
+	trace_adreno_drawctxt_wait_start(drawctxt->rb->id, context->id,
+					timestamp);
+
+	ret = adreno_ringbuffer_waittimestamp(drawctxt->rb, timestamp, timeout);
+done:
+	trace_adreno_drawctxt_wait_done(drawctxt->rb->id, context->id,
+					timestamp, ret);
+	return ret;
+}
+
+static int drawctxt_detach_drawobjs(struct adreno_context *drawctxt,
+		struct kgsl_drawobj **list)
+{
+	int count = 0;
+
+	while (drawctxt->drawqueue_head != drawctxt->drawqueue_tail) {
+		struct kgsl_drawobj *drawobj =
+			drawctxt->drawqueue[drawctxt->drawqueue_head];
+
+		drawctxt->drawqueue_head = (drawctxt->drawqueue_head + 1) %
+			ADRENO_CONTEXT_DRAWQUEUE_SIZE;
+
+		list[count++] = drawobj;
+	}
+
+	return count;
+}
+
+/**
+ * adreno_drawctxt_invalidate() - Invalidate an adreno draw context
+ * @device: Pointer to the KGSL device structure for the GPU
+ * @context: Pointer to the KGSL context structure
+ *
+ * Invalidate the context and remove all queued commands and cancel any pending
+ * waiters
+ */
+void adreno_drawctxt_invalidate(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct kgsl_drawobj *list[ADRENO_CONTEXT_DRAWQUEUE_SIZE];
+	int i, count;
+
+	trace_adreno_drawctxt_invalidate(drawctxt);
+
+	spin_lock(&drawctxt->lock);
+	set_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv);
+
+	/*
+	 * set the timestamp to the last value since the context is invalidated
+	 * and we want the pending events for this context to go away
+	 */
+	kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+			drawctxt->timestamp);
+
+	kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+			drawctxt->timestamp);
+
+	/* Get rid of commands still waiting in the queue */
+	count = drawctxt_detach_drawobjs(drawctxt, list);
+	spin_unlock(&drawctxt->lock);
+
+	for (i = 0; i < count; i++) {
+		kgsl_cancel_events_timestamp(device, &context->events,
+			list[i]->timestamp);
+		kgsl_drawobj_destroy(list[i]);
+	}
+
+	/* Make sure all pending events are processed or cancelled */
+	kgsl_flush_event_group(device, &context->events);
+
+	/* Give the bad news to everybody waiting around */
+	wake_up_all(&drawctxt->waiting);
+	wake_up_all(&drawctxt->wq);
+	wake_up_all(&drawctxt->timeout);
+}
+
+void adreno_drawctxt_set_guilty(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	if (!context)
+		return;
+
+	context->reset_status = KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
+
+	adreno_drawctxt_invalidate(device, context);
+}
+
+#define KGSL_CONTEXT_PRIORITY_MED	0x8
+
+/**
+ * adreno_drawctxt_create - create a new adreno draw context
+ * @dev_priv: the owner of the context
+ * @flags: flags for the context (passed from user space)
+ *
+ * Create and return a new draw context for the 3D core.
+ */
+struct kgsl_context *
+adreno_drawctxt_create(struct kgsl_device_private *dev_priv,
+			uint32_t *flags)
+{
+	struct adreno_context *drawctxt;
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ret;
+	unsigned int local;
+
+	local = *flags & (KGSL_CONTEXT_PREAMBLE |
+		KGSL_CONTEXT_NO_GMEM_ALLOC |
+		KGSL_CONTEXT_PER_CONTEXT_TS |
+		KGSL_CONTEXT_USER_GENERATED_TS |
+		KGSL_CONTEXT_NO_FAULT_TOLERANCE |
+		KGSL_CONTEXT_INVALIDATE_ON_FAULT |
+		KGSL_CONTEXT_CTX_SWITCH |
+		KGSL_CONTEXT_PRIORITY_MASK |
+		KGSL_CONTEXT_TYPE_MASK |
+		KGSL_CONTEXT_PWR_CONSTRAINT |
+		KGSL_CONTEXT_IFH_NOP |
+		KGSL_CONTEXT_SECURE |
+		KGSL_CONTEXT_PREEMPT_STYLE_MASK |
+		KGSL_CONTEXT_LPAC |
+		KGSL_CONTEXT_NO_SNAPSHOT |
+		KGSL_CONTEXT_FAULT_INFO);
+
+	/* Check for errors before trying to initialize */
+
+	/* If preemption is not supported, ignore preemption request */
+	if (!adreno_preemption_feature_set(adreno_dev))
+		local &= ~KGSL_CONTEXT_PREEMPT_STYLE_MASK;
+
+	/* We no longer support legacy context switching */
+	if ((local & KGSL_CONTEXT_PREAMBLE) == 0 ||
+		(local & KGSL_CONTEXT_NO_GMEM_ALLOC) == 0) {
+		dev_err_once(device->dev,
+			"legacy context switch not supported\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Make sure that our target can support secure contexts if requested */
+	if (!kgsl_mmu_is_secured(&dev_priv->device->mmu) &&
+			(local & KGSL_CONTEXT_SECURE)) {
+		dev_err_once(device->dev, "Secure context not supported\n");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	if ((local & KGSL_CONTEXT_LPAC) &&
+			(!(adreno_dev->lpac_enabled))) {
+		dev_err_once(device->dev, "LPAC context not supported\n");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	if ((local & KGSL_CONTEXT_LPAC) && (local & KGSL_CONTEXT_SECURE)) {
+		dev_err_once(device->dev, "LPAC secure context not supported\n");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	drawctxt = kzalloc(sizeof(struct adreno_context), GFP_KERNEL);
+
+	if (drawctxt == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	drawctxt->timestamp = 0;
+
+	drawctxt->base.flags = local;
+
+	/* Always enable per-context timestamps */
+	drawctxt->base.flags |= KGSL_CONTEXT_PER_CONTEXT_TS;
+	drawctxt->type = (drawctxt->base.flags & KGSL_CONTEXT_TYPE_MASK)
+		>> KGSL_CONTEXT_TYPE_SHIFT;
+	spin_lock_init(&drawctxt->lock);
+	init_waitqueue_head(&drawctxt->wq);
+	init_waitqueue_head(&drawctxt->waiting);
+	init_waitqueue_head(&drawctxt->timeout);
+
+	/* If the priority is not set by user, set it for them */
+	if ((drawctxt->base.flags & KGSL_CONTEXT_PRIORITY_MASK) ==
+			KGSL_CONTEXT_PRIORITY_UNDEF)
+		drawctxt->base.flags |= (KGSL_CONTEXT_PRIORITY_MED <<
+				KGSL_CONTEXT_PRIORITY_SHIFT);
+
+	/* Store the context priority */
+	drawctxt->base.priority =
+		(drawctxt->base.flags & KGSL_CONTEXT_PRIORITY_MASK) >>
+		KGSL_CONTEXT_PRIORITY_SHIFT;
+
+	/*
+	 * Now initialize the common part of the context. This allocates the
+	 * context id, and then possibly another thread could look it up.
+	 * So we want all of our initializtion that doesn't require the context
+	 * id to be done before this call.
+	 */
+	ret = kgsl_context_init(dev_priv, &drawctxt->base);
+	if (ret != 0) {
+		kfree(drawctxt);
+		return ERR_PTR(ret);
+	}
+
+	kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, soptimestamp),
+			0);
+	kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, eoptimestamp),
+			0);
+
+	adreno_context_debugfs_init(ADRENO_DEVICE(device), drawctxt);
+
+	INIT_LIST_HEAD(&drawctxt->active_node);
+	INIT_LIST_HEAD(&drawctxt->hw_fence_list);
+	INIT_LIST_HEAD(&drawctxt->hw_fence_inflight_list);
+
+	if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->setup_context)
+		adreno_dev->dispatch_ops->setup_context(adreno_dev, drawctxt);
+
+	if (gpudev->preemption_context_init) {
+		ret = gpudev->preemption_context_init(&drawctxt->base);
+		if (ret != 0) {
+			kgsl_context_detach(&drawctxt->base);
+			return ERR_PTR(ret);
+		}
+	}
+
+	/* copy back whatever flags we dediced were valid */
+	*flags = drawctxt->base.flags;
+
+	return &drawctxt->base;
+}
+
+static void wait_for_timestamp_rb(struct kgsl_device *device,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_context *context = &drawctxt->base;
+	int ret;
+
+	/*
+	 * internal_timestamp is set in adreno_ringbuffer_addcmds,
+	 * which holds the device mutex.
+	 */
+	mutex_lock(&device->mutex);
+
+	/*
+	 * Wait for the last global timestamp to pass before continuing.
+	 * The maxumum wait time is 30s, some large IB's can take longer
+	 * than 10s and if hang happens then the time for the context's
+	 * commands to retire will be greater than 10s. 30s should be sufficient
+	 * time to wait for the commands even if a hang happens.
+	 */
+	ret = adreno_drawctxt_wait_rb(adreno_dev, &drawctxt->base,
+		drawctxt->internal_timestamp, 30 * 1000);
+
+	/*
+	 * If the wait for global fails due to timeout then mark it as
+	 * context detach timeout fault and schedule dispatcher to kick
+	 * in GPU recovery. For a ADRENO_CTX_DETATCH_TIMEOUT_FAULT we clear
+	 * the policy and invalidate the context. If EAGAIN error is returned
+	 * then recovery will kick in and there will be no more commands in the
+	 * RB pipe from this context which is what we are waiting for, so ignore
+	 * -EAGAIN error.
+	 */
+	if (ret && ret != -EAGAIN) {
+		dev_err(device->dev,
+				"Wait for global ctx=%u ts=%u type=%d error=%d\n",
+				drawctxt->base.id, drawctxt->internal_timestamp,
+				drawctxt->type, ret);
+
+		adreno_set_gpu_fault(adreno_dev,
+				ADRENO_CTX_DETATCH_TIMEOUT_FAULT);
+		mutex_unlock(&device->mutex);
+
+		/* Schedule dispatcher to kick in recovery */
+		adreno_dispatcher_schedule(device);
+
+		/* Wait for context to be invalidated and release context */
+		wait_event_interruptible_timeout(drawctxt->timeout,
+					kgsl_context_invalid(&drawctxt->base),
+					msecs_to_jiffies(5000));
+		return;
+	}
+
+	kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+			drawctxt->timestamp);
+
+	kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+			drawctxt->timestamp);
+
+	adreno_profile_process_results(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+void adreno_drawctxt_detach(struct kgsl_context *context)
+{
+	struct kgsl_device *device;
+	struct adreno_device *adreno_dev;
+	const struct adreno_gpudev *gpudev;
+	struct adreno_context *drawctxt;
+	int count, i;
+	struct kgsl_drawobj *list[ADRENO_CONTEXT_DRAWQUEUE_SIZE];
+
+	if (context == NULL)
+		return;
+
+	device = context->device;
+	adreno_dev = ADRENO_DEVICE(device);
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	drawctxt = ADRENO_CONTEXT(context);
+
+	spin_lock(&drawctxt->lock);
+
+	spin_lock(&adreno_dev->active_list_lock);
+	list_del_init(&drawctxt->active_node);
+	spin_unlock(&adreno_dev->active_list_lock);
+
+	count = drawctxt_detach_drawobjs(drawctxt, list);
+	spin_unlock(&drawctxt->lock);
+
+	for (i = 0; i < count; i++) {
+		/*
+		 * If the context is detached while we are waiting for
+		 * the next command in GFT SKIP CMD, print the context
+		 * detached status here.
+		 */
+		adreno_fault_skipcmd_detached(adreno_dev, drawctxt, list[i]);
+		kgsl_drawobj_destroy(list[i]);
+	}
+
+	debugfs_remove_recursive(drawctxt->debug_root);
+	/* The debugfs file has a reference, release it */
+	if (drawctxt->debug_root)
+		kgsl_context_put(context);
+
+	if (gpudev->context_detach)
+		gpudev->context_detach(drawctxt);
+	else
+		wait_for_timestamp_rb(device, drawctxt);
+
+	if (context->user_ctxt_record) {
+		gpumem_free_entry(context->user_ctxt_record);
+
+		/* Put the extra ref from gpumem_alloc_entry() */
+		kgsl_mem_entry_put(context->user_ctxt_record);
+	}
+
+	/* wake threads waiting to submit commands from this context */
+	wake_up_all(&drawctxt->waiting);
+	wake_up_all(&drawctxt->wq);
+}
+
+void adreno_drawctxt_destroy(struct kgsl_context *context)
+{
+	struct adreno_context *drawctxt;
+	struct adreno_device *adreno_dev;
+	const struct adreno_gpudev *gpudev;
+
+	if (context == NULL)
+		return;
+
+	drawctxt = ADRENO_CONTEXT(context);
+
+	adreno_dev = ADRENO_DEVICE(context->device);
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->context_destroy)
+		gpudev->context_destroy(adreno_dev, drawctxt);
+	kfree(drawctxt);
+}
+
+static void _drawctxt_switch_wait_callback(struct kgsl_device *device,
+		struct kgsl_event_group *group,
+		void *priv, int result)
+{
+	struct adreno_context *drawctxt = (struct adreno_context *) priv;
+
+	kgsl_context_put(&drawctxt->base);
+}
+
+void adreno_put_drawctxt_on_timestamp(struct kgsl_device *device,
+		struct adreno_context *drawctxt,
+		struct adreno_ringbuffer *rb, u32 timestamp)
+{
+	if (!drawctxt)
+		return;
+
+	if (kgsl_add_event(device, &rb->events, timestamp,
+		_drawctxt_switch_wait_callback, drawctxt))
+		kgsl_context_put(&drawctxt->base);
+}
+
+static void _add_context(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	/* Remove it from the list */
+	list_del_init(&drawctxt->active_node);
+
+	/* And push it to the front */
+	drawctxt->active_time = jiffies;
+	list_add(&drawctxt->active_node, &adreno_dev->active_list);
+}
+
+static int __count_context(struct adreno_context *drawctxt, void *data)
+{
+	unsigned long expires = drawctxt->active_time + msecs_to_jiffies(100);
+
+	return time_after(jiffies, expires) ? 0 : 1;
+}
+
+static int __count_drawqueue_context(struct adreno_context *drawctxt,
+				void *data)
+{
+	unsigned long expires = drawctxt->active_time + msecs_to_jiffies(100);
+
+	if (time_after(jiffies, expires))
+		return 0;
+
+	return (&drawctxt->rb->dispatch_q ==
+			(struct adreno_dispatcher_drawqueue *) data) ? 1 : 0;
+}
+
+static int _adreno_count_active_contexts(struct adreno_device *adreno_dev,
+		int (*func)(struct adreno_context *, void *), void *data)
+{
+	struct adreno_context *ctxt;
+	int count = 0;
+
+	list_for_each_entry(ctxt, &adreno_dev->active_list, active_node) {
+		if (func(ctxt, data) == 0)
+			return count;
+
+		count++;
+	}
+
+	return count;
+}
+
+void adreno_track_context(struct adreno_device *adreno_dev,
+		struct adreno_dispatcher_drawqueue *drawqueue,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	spin_lock(&adreno_dev->active_list_lock);
+
+	_add_context(adreno_dev, drawctxt);
+
+	device->active_context_count =
+			_adreno_count_active_contexts(adreno_dev,
+					__count_context, NULL);
+
+	if (drawqueue)
+		drawqueue->active_context_count =
+				_adreno_count_active_contexts(adreno_dev,
+					__count_drawqueue_context, drawqueue);
+
+	spin_unlock(&adreno_dev->active_list_lock);
+}

+ 206 - 0
qcom/opensource/graphics-kernel/adreno_drawctxt.h

@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_DRAWCTXT_H
+#define __ADRENO_DRAWCTXT_H
+
+#include <linux/types.h>
+
+#include "kgsl_device.h"
+
+struct adreno_context_type {
+	unsigned int type;
+	const char *str;
+};
+
+#define ADRENO_CONTEXT_DRAWQUEUE_SIZE 128
+#define SUBMIT_RETIRE_TICKS_SIZE 7
+
+struct kgsl_device;
+struct adreno_device;
+struct kgsl_device_private;
+
+/**
+ * struct adreno_context - Adreno GPU draw context
+ * @timestamp: Last issued context-specific timestamp
+ * @internal_timestamp: Global timestamp of the last issued command
+ *			NOTE: guarded by device->mutex, not drawctxt->mutex!
+ * @type: Context type (GL, CL, RS)
+ * @mutex: Mutex to protect the drawqueue
+ * @drawqueue: Queue of drawobjs waiting to be dispatched for this
+ *			context
+ * @drawqueue_head: Head of the drawqueue queue
+ * @drawqueue_tail: Tail of the drawqueue queue
+ * @wq: Workqueue structure for contexts to sleep pending room in the queue
+ * @waiting: Workqueue structure for contexts waiting for a timestamp or event
+ * @timeout: Workqueue structure for contexts waiting to invalidate
+ * @queued: Number of commands queued in the drawqueue
+ * @fault_policy: GFT fault policy set in _skip_cmd();
+ * @debug_root: debugfs entry for this context.
+ * @queued_timestamp: The last timestamp that was queued on this context
+ * @rb: The ringbuffer in which this context submits commands.
+ * @submitted_timestamp: The last timestamp that was submitted for this context
+ * @submit_retire_ticks: Array to hold command obj execution times from submit
+ *                       to retire
+ * @ticks_index: The index into submit_retire_ticks[] where the new delta will
+ *		 be written.
+ * @active_node: Linkage for nodes in active_list
+ * @active_time: Time when this context last seen
+ */
+struct adreno_context {
+	struct kgsl_context base;
+	unsigned int timestamp;
+	unsigned int internal_timestamp;
+	unsigned int type;
+	spinlock_t lock;
+
+	/* Dispatcher */
+	struct kgsl_drawobj *drawqueue[ADRENO_CONTEXT_DRAWQUEUE_SIZE];
+	unsigned int drawqueue_head;
+	unsigned int drawqueue_tail;
+
+	wait_queue_head_t wq;
+	wait_queue_head_t waiting;
+	wait_queue_head_t timeout;
+
+	int queued;
+	unsigned int fault_policy;
+	struct dentry *debug_root;
+	unsigned int queued_timestamp;
+	struct adreno_ringbuffer *rb;
+	unsigned int submitted_timestamp;
+	uint64_t submit_retire_ticks[SUBMIT_RETIRE_TICKS_SIZE];
+	int ticks_index;
+
+	struct list_head active_node;
+	unsigned long active_time;
+	/** @gmu_context_queue: Queue to dispatch submissions to GMU */
+	struct kgsl_memdesc gmu_context_queue;
+	/** @gmu_hw_fence_queue: Queue for GMU to store hardware fences for this context */
+	struct kgsl_memdesc gmu_hw_fence_queue;
+	/** @hw_fence_list: List of hardware fences(sorted by timestamp) not yet submitted to GMU */
+	struct list_head hw_fence_list;
+	/** @hw_fence_inflight_list: List of hardware fences submitted to GMU */
+	struct list_head hw_fence_inflight_list;
+	/** @hw_fence_count: Number of hardware fences not yet sent to Tx Queue */
+	u32 hw_fence_count;
+	/** @syncobj_timestamp: Timestamp to check whether GMU has consumed a syncobj */
+	u32 syncobj_timestamp;
+};
+
+/* Flag definitions for flag field in adreno_context */
+
+/**
+ * enum adreno_context_priv - Private flags for an adreno draw context
+ * @ADRENO_CONTEXT_FAULT - set if the context has faulted (and recovered)
+ * @ADRENO_CONTEXT_GPU_HANG - Context has caused a GPU hang
+ * @ADRENO_CONTEXT_GPU_HANG_FT - Context has caused a GPU hang
+ *      and fault tolerance was successful
+ * @ADRENO_CONTEXT_SKIP_EOF - Context skip IBs until the next end of frame
+ *      marker.
+ * @ADRENO_CONTEXT_FORCE_PREAMBLE - Force the preamble for the next submission.
+ * @ADRENO_CONTEXT_SKIP_CMD - Context's drawobj's skipped during
+	fault tolerance.
+ * @ADRENO_CONTEXT_FENCE_LOG - Dump fences on this context.
+ */
+enum adreno_context_priv {
+	ADRENO_CONTEXT_FAULT = KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC,
+	ADRENO_CONTEXT_GPU_HANG,
+	ADRENO_CONTEXT_GPU_HANG_FT,
+	ADRENO_CONTEXT_SKIP_EOF,
+	ADRENO_CONTEXT_FORCE_PREAMBLE,
+	ADRENO_CONTEXT_SKIP_CMD,
+	ADRENO_CONTEXT_FENCE_LOG,
+};
+
+struct kgsl_context *adreno_drawctxt_create(
+			struct kgsl_device_private *dev_priv,
+			uint32_t *flags);
+
+void adreno_drawctxt_detach(struct kgsl_context *context);
+
+void adreno_drawctxt_destroy(struct kgsl_context *context);
+
+struct adreno_ringbuffer;
+struct adreno_dispatcher_drawqueue;
+
+int adreno_drawctxt_wait(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout);
+
+void adreno_drawctxt_invalidate(struct kgsl_device *device,
+		struct kgsl_context *context);
+
+void adreno_drawctxt_dump(struct kgsl_device *device,
+		struct kgsl_context *context);
+
+/**
+ * adreno_drawctxt_detached - Helper function to check if a context is detached
+ * @drawctxt: Adreno drawctxt to check
+ *
+ * Return: True if the context isn't null and it has been detached
+ */
+static inline bool adreno_drawctxt_detached(struct adreno_context *drawctxt)
+{
+	return (drawctxt && kgsl_context_detached(&drawctxt->base));
+}
+
+/**
+ * adreno_put_drawctxt_on_timestamp - Put the refcount on the drawctxt when the
+ * timestamp expires
+ * @device: A KGSL device handle
+ * @drawctxt: The draw context to put away
+ * @rb: The ringbuffer that will trigger the timestamp event
+ * @timestamp: The timestamp on @rb that will trigger the event
+ *
+ * Add an event to put the refcount on @drawctxt after @timestamp expires on
+ * @rb. This is used by the context switch to safely put away the context after
+ * a new context is switched in.
+ */
+void adreno_put_drawctxt_on_timestamp(struct kgsl_device *device,
+		struct adreno_context *drawctxt,
+		struct adreno_ringbuffer *rb, u32 timestamp);
+
+/**
+ * adreno_drawctxt_get_pagetable - Helper function to return the pagetable for a
+ * context
+ * @drawctxt: The adreno draw context to query
+ *
+ * Return: A pointer to the pagetable for the process that owns the context or
+ * NULL
+ */
+static inline struct kgsl_pagetable *
+adreno_drawctxt_get_pagetable(struct adreno_context *drawctxt)
+{
+	if (drawctxt)
+		return drawctxt->base.proc_priv->pagetable;
+
+	return NULL;
+}
+
+/**
+ * adreno_drawctxt_set_guilty - Mark a context as guilty and invalidate it
+ * @device: Pointer to a GPU device handle
+ * @context: Pointer to the context to invalidate
+ *
+ * Mark the specified context as guilty and invalidate it
+ */
+void adreno_drawctxt_set_guilty(struct kgsl_device *device,
+		struct kgsl_context *context);
+
+/**
+ * adreno_track_context - Add a context to active list and keep track of active contexts
+ * @adreno_dev: Pointer to adreno device
+ * @drawqueue: Pointer to the dispatch queue to which context send commands
+ * @drawctxt: Draw context which is to be tracked
+ *
+ * Add the given draw context to the active list and update number of contexts which
+ * are active overall as well as which are active on the dispatch queue to which
+ * the given context sends commands.
+ */
+void adreno_track_context(struct adreno_device *adreno_dev,
+		struct adreno_dispatcher_drawqueue *drawqueue,
+		struct adreno_context *drawctxt);
+#endif  /* __ADRENO_DRAWCTXT_H */

+ 2253 - 0
qcom/opensource/graphics-kernel/adreno_gen7.c

@@ -0,0 +1,2253 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/soc/qcom/llcc-qcom.h>
+#include <soc/qcom/of_common.h>
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_gen7_hwsched.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+#include "kgsl_pwrscale.h"
+#include "kgsl_trace.h"
+#include "kgsl_util.h"
+
+/* IFPC & Preemption static powerup restore list */
+static const u32 gen7_pwrup_reglist[] = {
+	GEN7_UCHE_TRAP_BASE_LO,
+	GEN7_UCHE_TRAP_BASE_HI,
+	GEN7_UCHE_WRITE_THRU_BASE_LO,
+	GEN7_UCHE_WRITE_THRU_BASE_HI,
+	GEN7_UCHE_GMEM_RANGE_MIN_LO,
+	GEN7_UCHE_GMEM_RANGE_MIN_HI,
+	GEN7_UCHE_GMEM_RANGE_MAX_LO,
+	GEN7_UCHE_GMEM_RANGE_MAX_HI,
+	GEN7_UCHE_CACHE_WAYS,
+	GEN7_UCHE_MODE_CNTL,
+	GEN7_RB_NC_MODE_CNTL,
+	GEN7_RB_CMP_DBG_ECO_CNTL,
+	GEN7_GRAS_NC_MODE_CNTL,
+	GEN7_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE,
+	GEN7_UCHE_GBIF_GX_CONFIG,
+	GEN7_UCHE_CLIENT_PF,
+	GEN7_TPL1_DBG_ECO_CNTL1,
+};
+
+static const u32 gen7_0_0_pwrup_reglist[] = {
+	GEN7_UCHE_TRAP_BASE_LO,
+	GEN7_UCHE_TRAP_BASE_HI,
+	GEN7_UCHE_WRITE_THRU_BASE_LO,
+	GEN7_UCHE_WRITE_THRU_BASE_HI,
+	GEN7_UCHE_GMEM_RANGE_MIN_LO,
+	GEN7_UCHE_GMEM_RANGE_MIN_HI,
+	GEN7_UCHE_GMEM_RANGE_MAX_LO,
+	GEN7_UCHE_GMEM_RANGE_MAX_HI,
+	GEN7_UCHE_CACHE_WAYS,
+	GEN7_UCHE_MODE_CNTL,
+	GEN7_RB_NC_MODE_CNTL,
+	GEN7_RB_CMP_DBG_ECO_CNTL,
+	GEN7_SP_NC_MODE_CNTL,
+	GEN7_GRAS_NC_MODE_CNTL,
+	GEN7_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE,
+	GEN7_UCHE_GBIF_GX_CONFIG,
+	GEN7_UCHE_CLIENT_PF,
+};
+
+/* IFPC only static powerup restore list */
+static const u32 gen7_ifpc_pwrup_reglist[] = {
+	GEN7_TPL1_NC_MODE_CNTL,
+	GEN7_SP_NC_MODE_CNTL,
+	GEN7_CP_DBG_ECO_CNTL,
+	GEN7_CP_PROTECT_CNTL,
+	GEN7_CP_LPAC_PROTECT_CNTL,
+	GEN7_CP_PROTECT_REG,
+	GEN7_CP_PROTECT_REG+1,
+	GEN7_CP_PROTECT_REG+2,
+	GEN7_CP_PROTECT_REG+3,
+	GEN7_CP_PROTECT_REG+4,
+	GEN7_CP_PROTECT_REG+5,
+	GEN7_CP_PROTECT_REG+6,
+	GEN7_CP_PROTECT_REG+7,
+	GEN7_CP_PROTECT_REG+8,
+	GEN7_CP_PROTECT_REG+9,
+	GEN7_CP_PROTECT_REG+10,
+	GEN7_CP_PROTECT_REG+11,
+	GEN7_CP_PROTECT_REG+12,
+	GEN7_CP_PROTECT_REG+13,
+	GEN7_CP_PROTECT_REG+14,
+	GEN7_CP_PROTECT_REG+15,
+	GEN7_CP_PROTECT_REG+16,
+	GEN7_CP_PROTECT_REG+17,
+	GEN7_CP_PROTECT_REG+18,
+	GEN7_CP_PROTECT_REG+19,
+	GEN7_CP_PROTECT_REG+20,
+	GEN7_CP_PROTECT_REG+21,
+	GEN7_CP_PROTECT_REG+22,
+	GEN7_CP_PROTECT_REG+23,
+	GEN7_CP_PROTECT_REG+24,
+	GEN7_CP_PROTECT_REG+25,
+	GEN7_CP_PROTECT_REG+26,
+	GEN7_CP_PROTECT_REG+27,
+	GEN7_CP_PROTECT_REG+28,
+	GEN7_CP_PROTECT_REG+29,
+	GEN7_CP_PROTECT_REG+30,
+	GEN7_CP_PROTECT_REG+31,
+	GEN7_CP_PROTECT_REG+32,
+	GEN7_CP_PROTECT_REG+33,
+	GEN7_CP_PROTECT_REG+34,
+	GEN7_CP_PROTECT_REG+35,
+	GEN7_CP_PROTECT_REG+36,
+	GEN7_CP_PROTECT_REG+37,
+	GEN7_CP_PROTECT_REG+38,
+	GEN7_CP_PROTECT_REG+39,
+	GEN7_CP_PROTECT_REG+40,
+	GEN7_CP_PROTECT_REG+41,
+	GEN7_CP_PROTECT_REG+42,
+	GEN7_CP_PROTECT_REG+43,
+	GEN7_CP_PROTECT_REG+44,
+	GEN7_CP_PROTECT_REG+45,
+	GEN7_CP_PROTECT_REG+46,
+	GEN7_CP_PROTECT_REG+47,
+	GEN7_CP_AHB_CNTL,
+};
+
+static const u32 gen7_0_0_ifpc_pwrup_reglist[] = {
+	GEN7_TPL1_NC_MODE_CNTL,
+	GEN7_CP_DBG_ECO_CNTL,
+	GEN7_CP_PROTECT_CNTL,
+	GEN7_CP_LPAC_PROTECT_CNTL,
+	GEN7_CP_PROTECT_REG,
+	GEN7_CP_PROTECT_REG+1,
+	GEN7_CP_PROTECT_REG+2,
+	GEN7_CP_PROTECT_REG+3,
+	GEN7_CP_PROTECT_REG+4,
+	GEN7_CP_PROTECT_REG+5,
+	GEN7_CP_PROTECT_REG+6,
+	GEN7_CP_PROTECT_REG+7,
+	GEN7_CP_PROTECT_REG+8,
+	GEN7_CP_PROTECT_REG+9,
+	GEN7_CP_PROTECT_REG+10,
+	GEN7_CP_PROTECT_REG+11,
+	GEN7_CP_PROTECT_REG+12,
+	GEN7_CP_PROTECT_REG+13,
+	GEN7_CP_PROTECT_REG+14,
+	GEN7_CP_PROTECT_REG+15,
+	GEN7_CP_PROTECT_REG+16,
+	GEN7_CP_PROTECT_REG+17,
+	GEN7_CP_PROTECT_REG+18,
+	GEN7_CP_PROTECT_REG+19,
+	GEN7_CP_PROTECT_REG+20,
+	GEN7_CP_PROTECT_REG+21,
+	GEN7_CP_PROTECT_REG+22,
+	GEN7_CP_PROTECT_REG+23,
+	GEN7_CP_PROTECT_REG+24,
+	GEN7_CP_PROTECT_REG+25,
+	GEN7_CP_PROTECT_REG+26,
+	GEN7_CP_PROTECT_REG+27,
+	GEN7_CP_PROTECT_REG+28,
+	GEN7_CP_PROTECT_REG+29,
+	GEN7_CP_PROTECT_REG+30,
+	GEN7_CP_PROTECT_REG+31,
+	GEN7_CP_PROTECT_REG+32,
+	GEN7_CP_PROTECT_REG+33,
+	GEN7_CP_PROTECT_REG+34,
+	GEN7_CP_PROTECT_REG+35,
+	GEN7_CP_PROTECT_REG+36,
+	GEN7_CP_PROTECT_REG+37,
+	GEN7_CP_PROTECT_REG+38,
+	GEN7_CP_PROTECT_REG+39,
+	GEN7_CP_PROTECT_REG+40,
+	GEN7_CP_PROTECT_REG+41,
+	GEN7_CP_PROTECT_REG+42,
+	GEN7_CP_PROTECT_REG+43,
+	GEN7_CP_PROTECT_REG+44,
+	GEN7_CP_PROTECT_REG+45,
+	GEN7_CP_PROTECT_REG+46,
+	GEN7_CP_PROTECT_REG+47,
+	GEN7_CP_AHB_CNTL,
+};
+
+/* Gen7_9_x IFPC only static powerup restore list */
+static const u32 gen7_9_x_ifpc_pwrup_reglist[] = {
+	GEN7_TPL1_BICUBIC_WEIGHTS_TABLE_1,
+	GEN7_TPL1_BICUBIC_WEIGHTS_TABLE_2,
+	GEN7_TPL1_BICUBIC_WEIGHTS_TABLE_3,
+	GEN7_TPL1_BICUBIC_WEIGHTS_TABLE_4,
+};
+
+static int acd_calibrate_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u32 debug_val = (u32) val;
+	int ret;
+
+	mutex_lock(&device->mutex);
+	ret = adreno_active_count_get(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen7_hfi_send_set_value(adreno_dev, HFI_VALUE_DBG, F_PWR_ACD_CALIBRATE, debug_val);
+	if (!ret)
+		gmu->acd_debug_val = debug_val;
+
+	adreno_active_count_put(adreno_dev);
+err:
+	mutex_unlock(&device->mutex);
+	return ret;
+}
+
+static int acd_calibrate_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	*val = (u64) gmu->acd_debug_val;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(acd_cal_fops, acd_calibrate_get, acd_calibrate_set, "%llu\n");
+
+void gen7_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds)
+{
+	u32 i = 0, mask = 0;
+
+	/* Disable concurrent binning before sending CP init */
+	cmds[i++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[i++] = BIT(27);
+
+	/* Use multiple HW contexts */
+	mask |= BIT(0);
+
+	/* Enable error detection */
+	mask |= BIT(1);
+
+	/* Set default reset state */
+	mask |= BIT(3);
+
+	/* Disable save/restore of performance counters across preemption */
+	mask |= BIT(6);
+
+	/* Enable the register init list with the spinlock */
+	mask |= BIT(8);
+
+	/* By default DMS is enabled from CP side, disable it if not supported */
+	if (!adreno_dev->dms_enabled)
+		mask |= BIT(11);
+
+	cmds[i++] = cp_type7_packet(CP_ME_INIT, 7);
+
+	/* Enabled ordinal mask */
+	cmds[i++] = mask;
+	cmds[i++] = 0x00000003; /* Set number of HW contexts */
+	cmds[i++] = 0x20000000; /* Enable error detection */
+	cmds[i++] = 0x00000002; /* Operation mode mask */
+
+	/* Register initialization list with spinlock */
+	cmds[i++] = lower_32_bits(adreno_dev->pwrup_reglist->gpuaddr);
+	cmds[i++] = upper_32_bits(adreno_dev->pwrup_reglist->gpuaddr);
+	/*
+	 * Gen7 targets with concurrent binning are expected to have a dynamic
+	 * power up list with triplets which contains the pipe id in it.
+	 * Bit 31 of POWER_UP_REGISTER_LIST_LENGTH is reused here to let CP
+	 * know if the power up contains the triplets. If
+	 * REGISTER_INIT_LIST_WITH_SPINLOCK is set and bit 31 below is set,
+	 * CP expects a dynamic list with triplets.
+	 */
+	cmds[i++] = BIT(31);
+}
+
+int gen7_fenced_write(struct adreno_device *adreno_dev, u32 offset,
+		u32 value, u32 mask)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int status, i;
+	u64 ts1, ts2;
+
+	kgsl_regwrite(device, offset, value);
+	ts1 = gpudev->read_alwayson(adreno_dev);
+	for (i = 0; i < GMU_CORE_LONG_WAKEUP_RETRY_LIMIT; i++) {
+		/*
+		 * Make sure the previous register write is posted before
+		 * checking the fence status
+		 */
+		mb();
+
+		gmu_core_regread(device, GEN7_GMU_AHB_FENCE_STATUS, &status);
+
+		/*
+		 * If !writedropped0/1, then the write to fenced register
+		 * was successful
+		 */
+		if (!(status & mask))
+			break;
+
+		/* Wait a small amount of time before trying again */
+		udelay(GMU_CORE_WAKEUP_DELAY_US);
+
+		/* Try to write the fenced register again */
+		kgsl_regwrite(device, offset, value);
+	}
+
+	if (i < GMU_CORE_SHORT_WAKEUP_RETRY_LIMIT)
+		return 0;
+
+	if (i == GMU_CORE_LONG_WAKEUP_RETRY_LIMIT) {
+		ts2 = gpudev->read_alwayson(adreno_dev);
+		dev_err(device->dev,
+				"Timed out waiting %d usecs to write fenced register 0x%x, timestamps: %llx %llx\n",
+				i * GMU_CORE_WAKEUP_DELAY_US, offset, ts1, ts2);
+		return -ETIMEDOUT;
+	}
+
+	dev_info(device->dev,
+		"Waited %d usecs to write fenced register 0x%x\n",
+		i * GMU_CORE_WAKEUP_DELAY_US, offset);
+
+	return 0;
+}
+
+int gen7_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	u64 freq = gen7_core->gmu_hub_clk_freq;
+
+	adreno_dev->highest_bank_bit = gen7_core->highest_bank_bit;
+	adreno_dev->gmu_hub_clk_freq = freq ? freq : 150000000;
+	adreno_dev->ahb_timeout_val = adreno_get_ahb_timeout_val(adreno_dev,
+			gen7_core->noc_timeout_us);
+	adreno_dev->bcl_data = gen7_core->bcl_data;
+
+	adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev,
+			ADRENO_COOP_RESET);
+
+	/* If the memory type is DDR 4, override the existing configuration */
+	if (of_fdt_get_ddrtype() == 0x7)
+		adreno_dev->highest_bank_bit = 14;
+
+	gen7_crashdump_init(adreno_dev);
+
+	return adreno_allocate_global(device, &adreno_dev->pwrup_reglist,
+		PAGE_SIZE, 0, 0, KGSL_MEMDESC_PRIVILEGED,
+		"powerup_register_list");
+}
+
+#define CX_TIMER_INIT_SAMPLES 16
+void gen7_cx_timer_init(struct adreno_device *adreno_dev)
+{
+	u64 seed_val, tmr, skew = 0;
+	int i;
+	unsigned long flags;
+
+	/*
+	 * Only gen7_9_x has the CX timer. Set it up during first boot or
+	 * after suspend resume.
+	 */
+	if (!adreno_is_gen7_9_x(adreno_dev) ||
+		test_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv))
+		return;
+
+	/* Disable irqs to get accurate timings */
+	local_irq_save(flags);
+
+	/* Calculate the overhead of timer reads and register writes */
+	for (i = 0; i < CX_TIMER_INIT_SAMPLES; i++) {
+		u64 tmr1, tmr2, tmr3;
+
+		/* Measure time for two reads of the CPU timer */
+		tmr1 = arch_timer_read_counter();
+		tmr2 = arch_timer_read_counter();
+
+		/* Write to the register and time it */
+		adreno_cx_misc_regwrite(adreno_dev,
+					GEN7_GPU_CX_MISC_AO_COUNTER_LO,
+					lower_32_bits(tmr2));
+		adreno_cx_misc_regwrite(adreno_dev,
+					GEN7_GPU_CX_MISC_AO_COUNTER_HI,
+					upper_32_bits(tmr2));
+
+		/* Barrier to make sure the write completes before timing it */
+		mb();
+		tmr3 = arch_timer_read_counter();
+
+		/* Calculate difference between register write and CPU timer */
+		skew += (tmr3 - tmr2) - (tmr2 - tmr1);
+	}
+
+	local_irq_restore(flags);
+
+	/* Get the average over all our readings, to the closest integer */
+	skew = (skew + CX_TIMER_INIT_SAMPLES / 2) / CX_TIMER_INIT_SAMPLES;
+
+	local_irq_save(flags);
+	tmr = arch_timer_read_counter();
+
+	seed_val = tmr + skew;
+
+	/* Seed the GPU CX counter with the adjusted timer */
+	adreno_cx_misc_regwrite(adreno_dev,
+			GEN7_GPU_CX_MISC_AO_COUNTER_LO, lower_32_bits(seed_val));
+	adreno_cx_misc_regwrite(adreno_dev,
+			GEN7_GPU_CX_MISC_AO_COUNTER_HI, upper_32_bits(seed_val));
+
+	local_irq_restore(flags);
+
+	set_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv);
+}
+
+void gen7_get_gpu_feature_info(struct adreno_device *adreno_dev)
+{
+	u32 feature_fuse = 0;
+
+	/* Only Gen7_9_x has the HW feature information */
+	if (!adreno_is_gen7_9_x(adreno_dev))
+		return;
+
+	/* Get HW feature soft fuse value */
+	adreno_cx_misc_regread(adreno_dev, GEN7_GPU_CX_MISC_SW_FUSE_VALUE,
+			       &feature_fuse);
+
+	adreno_dev->fastblend_enabled = feature_fuse & BIT(GEN7_FASTBLEND_SW_FUSE);
+	adreno_dev->raytracing_enabled = feature_fuse & BIT(GEN7_RAYTRACING_SW_FUSE);
+
+	/* If software enables LPAC without HW support, disable it */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC))
+		adreno_dev->lpac_enabled = feature_fuse & BIT(GEN7_LPAC_SW_FUSE);
+
+	adreno_dev->feature_fuse = feature_fuse;
+}
+
+#define GEN7_PROTECT_DEFAULT (BIT(0) | BIT(1) | BIT(3))
+static void gen7_protect_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	const struct gen7_protected_regs *regs = gen7_core->protected_regs;
+	int i;
+
+	/*
+	 * Enable access protection to privileged registers, fault on an access
+	 * protect violation and select the last span to protect from the start
+	 * address all the way to the end of the register address space
+	 */
+	kgsl_regwrite(device, GEN7_CP_PROTECT_CNTL, GEN7_PROTECT_DEFAULT);
+
+	if (adreno_dev->lpac_enabled)
+		kgsl_regwrite(device, GEN7_CP_LPAC_PROTECT_CNTL, GEN7_PROTECT_DEFAULT);
+
+	/* Program each register defined by the core definition */
+	for (i = 0; regs[i].reg; i++) {
+		u32 count;
+
+		/*
+		 * This is the offset of the end register as counted from the
+		 * start, i.e. # of registers in the range - 1
+		 */
+		count = regs[i].end - regs[i].start;
+
+		kgsl_regwrite(device, regs[i].reg,
+				FIELD_PREP(GENMASK(17, 0), regs[i].start) |
+				FIELD_PREP(GENMASK(30, 18), count) |
+				FIELD_PREP(BIT(31), regs[i].noaccess));
+	}
+}
+
+#define RBBM_CLOCK_CNTL_ON 0x8aa8aa82
+
+static void gen7_hwcg_set(struct adreno_device *adreno_dev, bool on)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	unsigned int value;
+	int i;
+
+	if (!adreno_dev->hwcg_enabled)
+		on = false;
+
+	for (i = 0; i < gen7_core->ao_hwcg_count; i++)
+		gmu_core_regwrite(device, gen7_core->ao_hwcg[i].offset,
+			on ? gen7_core->ao_hwcg[i].val : 0);
+
+	if (!gen7_core->hwcg) {
+		kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL_GLOBAL, 1);
+		kgsl_regwrite(device, GEN7_RBBM_CGC_GLOBAL_LOAD_CMD, on ? 1 : 0);
+
+		if (on) {
+			u32 retry = 3;
+
+			kgsl_regwrite(device, GEN7_RBBM_CGC_P2S_TRIG_CMD, 1);
+			/* Poll for the TXDONE:BIT(0) status */
+			do {
+				/* Wait for small amount of time for TXDONE status*/
+				udelay(1);
+				kgsl_regread(device, GEN7_RBBM_CGC_P2S_STATUS, &value);
+			} while (!(value & BIT(0)) && --retry);
+
+			if (!(value & BIT(0))) {
+				dev_err(device->dev, "RBBM_CGC_P2S_STATUS:TXDONE Poll failed\n");
+				kgsl_device_snapshot(device, NULL, NULL, false);
+				return;
+			}
+			kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL_GLOBAL, 0);
+		}
+		return;
+	}
+
+	kgsl_regread(device, GEN7_RBBM_CLOCK_CNTL, &value);
+
+	if (value == RBBM_CLOCK_CNTL_ON && on)
+		return;
+
+	if (value == 0 && !on)
+		return;
+
+	for (i = 0; i < gen7_core->hwcg_count; i++)
+		kgsl_regwrite(device, gen7_core->hwcg[i].offset,
+			on ? gen7_core->hwcg[i].val : 0);
+
+	/* enable top level HWCG */
+	kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL,
+		on ? RBBM_CLOCK_CNTL_ON : 0);
+}
+
+static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev)
+{
+	struct adreno_reglist_list reglist[3];
+	void *ptr = adreno_dev->pwrup_reglist->hostptr;
+	struct cpu_gpu_lock *lock = ptr;
+	u32 items = 0, i, j;
+	u32 *dest = ptr + sizeof(*lock);
+
+	/* Static IFPC-only registers */
+	if (adreno_is_gen7_0_x_family(adreno_dev)) {
+		reglist[items].regs = gen7_0_0_ifpc_pwrup_reglist;
+		reglist[items].count = ARRAY_SIZE(gen7_0_0_ifpc_pwrup_reglist);
+	} else {
+		reglist[items].regs = gen7_ifpc_pwrup_reglist;
+		reglist[items].count = ARRAY_SIZE(gen7_ifpc_pwrup_reglist);
+	}
+	lock->ifpc_list_len = reglist[items].count;
+	items++;
+
+	if (adreno_is_gen7_9_x(adreno_dev)) {
+		reglist[items].regs = gen7_9_x_ifpc_pwrup_reglist;
+		reglist[items].count = ARRAY_SIZE(gen7_9_x_ifpc_pwrup_reglist);
+		lock->ifpc_list_len += reglist[items].count;
+		items++;
+	}
+
+	/* Static IFPC + preemption registers */
+	if (adreno_is_gen7_0_x_family(adreno_dev)) {
+		reglist[items].regs = gen7_0_0_pwrup_reglist;
+		reglist[items].count = ARRAY_SIZE(gen7_0_0_pwrup_reglist);
+	} else {
+		reglist[items].regs = gen7_pwrup_reglist;
+		reglist[items].count = ARRAY_SIZE(gen7_pwrup_reglist);
+	}
+	lock->preemption_list_len = reglist[items].count;
+	items++;
+
+	/*
+	 * For each entry in each of the lists, write the offset and the current
+	 * register value into the GPU buffer
+	 */
+	for (i = 0; i < items; i++) {
+		const u32 *r = reglist[i].regs;
+
+		for (j = 0; j < reglist[i].count; j++) {
+			*dest++ = r[j];
+			kgsl_regread(KGSL_DEVICE(adreno_dev), r[j], dest++);
+		}
+	}
+
+	/*
+	 * The overall register list is composed of
+	 * 1. Static IFPC-only registers
+	 * 2. Static IFPC + preemption registers
+	 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects)
+	 *
+	 * The first two lists are static. Size of these lists are stored as
+	 * number of pairs in ifpc_list_len and preemption_list_len
+	 * respectively. With concurrent binning, Some of the perfcounter
+	 * registers being virtualized, CP needs to know the pipe id to program
+	 * the aperture inorder to restore the same. Thus, third list is a
+	 * dynamic list with triplets as
+	 * (<aperture, shifted 12 bits> <address> <data>), and the length is
+	 * stored as number for triplets in dynamic_list_len.
+	 */
+	lock->dynamic_list_len = 0;
+}
+
+/* _llc_configure_gpu_scid() - Program the sub-cache ID for all GPU blocks */
+static void _llc_configure_gpu_scid(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 gpu_scid;
+
+	if (IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice) ||
+		!adreno_dev->gpu_llc_slice_enable)
+		return;
+
+	if (llcc_slice_activate(adreno_dev->gpu_llc_slice))
+		return;
+
+	gpu_scid = llcc_get_slice_id(adreno_dev->gpu_llc_slice);
+
+	/* 6 blocks at 5 bits per block */
+	kgsl_regwrite(device, GEN7_GBIF_SCACHE_CNTL1,
+			FIELD_PREP(GENMASK(29, 25), gpu_scid) |
+			FIELD_PREP(GENMASK(24, 20), gpu_scid) |
+			FIELD_PREP(GENMASK(19, 15), gpu_scid) |
+			FIELD_PREP(GENMASK(14, 10), gpu_scid) |
+			FIELD_PREP(GENMASK(9, 5), gpu_scid) |
+			FIELD_PREP(GENMASK(4, 0), gpu_scid));
+
+	kgsl_regwrite(device, GEN7_GBIF_SCACHE_CNTL0,
+			FIELD_PREP(GENMASK(14, 10), gpu_scid) | BIT(8));
+}
+
+static void _llc_gpuhtw_slice_activate(struct adreno_device *adreno_dev)
+{
+	if (IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice) ||
+		!adreno_dev->gpuhtw_llc_slice_enable)
+		return;
+
+	llcc_slice_activate(adreno_dev->gpuhtw_llc_slice);
+}
+
+static void _set_secvid(struct kgsl_device *device)
+{
+	kgsl_regwrite(device, GEN7_RBBM_SECVID_TSB_CNTL, 0x0);
+	kgsl_regwrite(device, GEN7_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
+		lower_32_bits(KGSL_IOMMU_SECURE_BASE32));
+	kgsl_regwrite(device, GEN7_RBBM_SECVID_TSB_TRUSTED_BASE_HI,
+		upper_32_bits(KGSL_IOMMU_SECURE_BASE32));
+	kgsl_regwrite(device, GEN7_RBBM_SECVID_TSB_TRUSTED_SIZE,
+		FIELD_PREP(GENMASK(31, 12),
+		(KGSL_IOMMU_SECURE_SIZE(&device->mmu) / SZ_4K)));
+}
+
+/* Set UCHE_TRAP_BASE to a page below the top of the memory space */
+#define GEN7_UCHE_TRAP_BASE 0x1FFFFFFFFF000ULL
+
+static u64 gen7_get_uche_trap_base(void)
+{
+	return GEN7_UCHE_TRAP_BASE;
+}
+
+/*
+ * All Gen7 targets support marking certain transactions as always privileged
+ * which allows us to mark more memory as privileged without having to
+ * explicitly set the APRIV bit. Choose the following transactions to be
+ * privileged by default:
+ * CDWRITE     [6:6] - Crashdumper writes
+ * CDREAD      [5:5] - Crashdumper reads
+ * RBRPWB      [3:3] - RPTR shadow writes
+ * RBPRIVLEVEL [2:2] - Memory accesses from PM4 packets in the ringbuffer
+ * RBFETCH     [1:1] - Ringbuffer reads
+ * ICACHE      [0:0] - Instruction cache fetches
+ */
+
+#define GEN7_APRIV_DEFAULT (BIT(3) | BIT(2) | BIT(1) | BIT(0))
+/* Add crashdumper permissions for the BR APRIV */
+#define GEN7_BR_APRIV_DEFAULT (GEN7_APRIV_DEFAULT | BIT(6) | BIT(5))
+
+void gen7_enable_ahb_timeout_detection(struct adreno_device *adreno_dev)
+{
+	u32 val;
+
+	if (!adreno_dev->ahb_timeout_val)
+		return;
+
+	val = (ADRENO_AHB_CNTL_DEFAULT | FIELD_PREP(GENMASK(4, 0),
+			adreno_dev->ahb_timeout_val));
+	adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_AON_CNTL, val);
+	adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_GMU_CNTL, val);
+	adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_CP_CNTL, val);
+	adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL, val);
+	adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_CX_AHB_HOST_CNTL, val);
+}
+
+int gen7_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	u32 mal, mode = 0, rgb565_predicator = 0;
+	/*
+	 * HBB values 13 to 16 can represented LSB of HBB from 0 to 3.
+	 * Any HBB value beyond 16 needs programming MSB of HBB.
+	 * By default highest bank bit is 14, Hence set default HBB LSB
+	 * to "1" and MSB to "0".
+	 */
+	u32 hbb_lo = 1, hbb_hi = 0;
+	struct cpu_gpu_lock *pwrup_lock = adreno_dev->pwrup_reglist->hostptr;
+	u64 uche_trap_base = gen7_get_uche_trap_base();
+
+	/* Set up GBIF registers from the GPU core definition */
+	kgsl_regmap_multi_write(&device->regmap, gen7_core->gbif,
+		gen7_core->gbif_count);
+
+	kgsl_regwrite(device, GEN7_UCHE_GBIF_GX_CONFIG, 0x10240e0);
+
+	/* Make all blocks contribute to the GPU BUSY perf counter */
+	kgsl_regwrite(device, GEN7_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
+
+	/*
+	 * Set UCHE_WRITE_THRU_BASE to the UCHE_TRAP_BASE effectively
+	 * disabling L2 bypass
+	 */
+	kgsl_regwrite(device, GEN7_UCHE_TRAP_BASE_LO, lower_32_bits(uche_trap_base));
+	kgsl_regwrite(device, GEN7_UCHE_TRAP_BASE_HI, upper_32_bits(uche_trap_base));
+	kgsl_regwrite(device, GEN7_UCHE_WRITE_THRU_BASE_LO, lower_32_bits(uche_trap_base));
+	kgsl_regwrite(device, GEN7_UCHE_WRITE_THRU_BASE_HI, upper_32_bits(uche_trap_base));
+
+	/*
+	 * Some gen7 targets don't use a programmed UCHE GMEM base address,
+	 * so skip programming the register for such targets.
+	 */
+	if (adreno_dev->uche_gmem_base) {
+		kgsl_regwrite(device, GEN7_UCHE_GMEM_RANGE_MIN_LO,
+				adreno_dev->uche_gmem_base);
+		kgsl_regwrite(device, GEN7_UCHE_GMEM_RANGE_MIN_HI, 0x0);
+		kgsl_regwrite(device, GEN7_UCHE_GMEM_RANGE_MAX_LO,
+				adreno_dev->uche_gmem_base +
+				adreno_dev->gpucore->gmem_size - 1);
+		kgsl_regwrite(device, GEN7_UCHE_GMEM_RANGE_MAX_HI, 0x0);
+	}
+
+	kgsl_regwrite(device, GEN7_UCHE_CACHE_WAYS, 0x800000);
+
+	/*
+	 * Disable LPAC hard sync event to fix lock up issue when BR/BV event
+	 * fifo is full.
+	 */
+	if (adreno_dev->lpac_enabled)
+		kgsl_regrmw(device, GEN7_UCHE_DBG_CNTL_1, BIT(30), BIT(30));
+
+	kgsl_regwrite(device, GEN7_UCHE_CMDQ_CONFIG,
+			FIELD_PREP(GENMASK(19, 16), 6) |
+			FIELD_PREP(GENMASK(15, 12), 6) |
+			FIELD_PREP(GENMASK(11, 8), 9) |
+			BIT(3) | BIT(2) |
+			FIELD_PREP(GENMASK(1, 0), 2));
+
+	/*
+	 * CP takes care of the restore during IFPC exit. We need to restore at slumber
+	 * boundary as well
+	 */
+	if (pwrup_lock->dynamic_list_len > 0)
+		kgsl_regwrite(device, GEN7_RBBM_PERFCTR_CNTL, 0x1);
+
+	/* Turn on the IFPC counter (countable 4 on XOCLK4) */
+	kgsl_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1,
+			FIELD_PREP(GENMASK(7, 0), 0x4));
+
+	/* Turn on counter to count total time spent in BCL throttle */
+	if (adreno_dev->bcl_enabled && adreno_is_gen7_2_x_family(adreno_dev))
+		kgsl_regrmw(device, GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1, GENMASK(15, 8),
+				FIELD_PREP(GENMASK(15, 8), 0x26));
+
+	if (of_property_read_u32(device->pdev->dev.of_node,
+		"qcom,min-access-length", &mal))
+		mal = 32;
+
+	of_property_read_u32(device->pdev->dev.of_node,
+			"qcom,ubwc-mode", &mode);
+
+	if (!WARN_ON(!adreno_dev->highest_bank_bit)) {
+		hbb_lo = (adreno_dev->highest_bank_bit - 13) & 3;
+		hbb_hi = ((adreno_dev->highest_bank_bit - 13) >> 2) & 1;
+	}
+
+	if (mode == KGSL_UBWC_4_0)
+		rgb565_predicator = 1;
+
+	kgsl_regwrite(device, GEN7_RB_NC_MODE_CNTL,
+			((rgb565_predicator == 1) ? BIT(11) : 0) |
+			((hbb_hi == 1) ? BIT(10) : 0) |
+			BIT(4) | /*AMSBC is enabled on UBWC 3.0 and 4.0 */
+			((mal == 64) ? BIT(3) : 0) |
+			FIELD_PREP(GENMASK(2, 1), hbb_lo));
+
+	kgsl_regwrite(device, GEN7_TPL1_NC_MODE_CNTL,
+			((hbb_hi == 1) ? BIT(4) : 0) |
+			((mal == 64) ? BIT(3) : 0) |
+			FIELD_PREP(GENMASK(2, 1), hbb_lo));
+
+	/* Configure TP bicubic registers */
+	if (adreno_is_gen7_9_x(adreno_dev)) {
+		kgsl_regwrite(device, GEN7_TPL1_BICUBIC_WEIGHTS_TABLE_1, 0x3fe05ff4);
+		kgsl_regwrite(device, GEN7_TPL1_BICUBIC_WEIGHTS_TABLE_2, 0x3fa0ebee);
+		kgsl_regwrite(device, GEN7_TPL1_BICUBIC_WEIGHTS_TABLE_3, 0x3f5193ed);
+		kgsl_regwrite(device, GEN7_TPL1_BICUBIC_WEIGHTS_TABLE_4, 0x3f0243f0);
+	}
+
+	kgsl_regwrite(device, GEN7_SP_NC_MODE_CNTL,
+			FIELD_PREP(GENMASK(11, 10), hbb_hi) |
+			FIELD_PREP(GENMASK(5, 4), 2) |
+			((mal == 64) ? BIT(3) : 0) |
+			FIELD_PREP(GENMASK(2, 1), hbb_lo));
+
+	kgsl_regwrite(device, GEN7_GRAS_NC_MODE_CNTL,
+			FIELD_PREP(GENMASK(8, 5),
+				(adreno_dev->highest_bank_bit - 13)));
+
+	kgsl_regwrite(device, GEN7_UCHE_MODE_CNTL,
+			((mal == 64) ? BIT(23) : 0) |
+			FIELD_PREP(GENMASK(22, 21), hbb_lo));
+
+	kgsl_regwrite(device, GEN7_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) |
+			FIELD_PREP(GENMASK(27, 0),
+				gen7_core->hang_detect_cycles));
+	kgsl_regwrite(device, GEN7_UCHE_CLIENT_PF, BIT(7) |
+			FIELD_PREP(GENMASK(3, 0), adreno_dev->uche_client_pf));
+
+	/* Enable the GMEM save/restore feature for preemption */
+	if (adreno_is_preemption_enabled(adreno_dev))
+		kgsl_regwrite(device, GEN7_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE,
+			0x1);
+
+	if (adreno_is_gen7_9_x(adreno_dev)) {
+		/* Disable ubwc merged UFC request feature */
+		kgsl_regrmw(device, GEN7_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19));
+
+		/* Enable TP flaghint and other performance settings */
+		kgsl_regwrite(device, GEN7_TPL1_DBG_ECO_CNTL1, 0xc0700);
+	} else {
+		/* Disable non-ubwc read reqs from passing write reqs */
+		kgsl_regrmw(device, GEN7_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11));
+	}
+
+	/* Enable GMU power counter 0 to count GPU busy */
+	kgsl_regwrite(device, GEN7_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
+	kgsl_regrmw(device, GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0,
+			0xFF, 0x20);
+	kgsl_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0x1);
+
+	gen7_protect_init(adreno_dev);
+
+	/* Configure LLCC */
+	_llc_configure_gpu_scid(adreno_dev);
+	_llc_gpuhtw_slice_activate(adreno_dev);
+
+	kgsl_regwrite(device, GEN7_CP_APRIV_CNTL, GEN7_BR_APRIV_DEFAULT);
+	kgsl_regwrite(device, GEN7_CP_BV_APRIV_CNTL, GEN7_APRIV_DEFAULT);
+	kgsl_regwrite(device, GEN7_CP_LPAC_APRIV_CNTL, GEN7_APRIV_DEFAULT);
+
+	/* Marking AQE Instruction cache fetches as privileged */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE))
+		kgsl_regwrite(device, GEN7_CP_AQE_APRIV_CNTL, BIT(0));
+
+	if (adreno_is_gen7_9_x(adreno_dev))
+		kgsl_regrmw(device, GEN7_GBIF_CX_CONFIG, GENMASK(31, 29),
+				FIELD_PREP(GENMASK(31, 29), 1));
+
+	/*
+	 * CP Icache prefetch brings no benefit on few gen7 variants because of
+	 * the prefetch granularity size.
+	 */
+	if (adreno_is_gen7_0_0(adreno_dev) || adreno_is_gen7_0_1(adreno_dev) ||
+		adreno_is_gen7_4_0(adreno_dev) || adreno_is_gen7_2_0(adreno_dev)
+		|| adreno_is_gen7_2_1(adreno_dev) || adreno_is_gen7_11_0(adreno_dev)) {
+		kgsl_regwrite(device, GEN7_CP_CHICKEN_DBG, 0x1);
+		kgsl_regwrite(device, GEN7_CP_BV_CHICKEN_DBG, 0x1);
+		kgsl_regwrite(device, GEN7_CP_LPAC_CHICKEN_DBG, 0x1);
+	}
+
+	_set_secvid(device);
+
+	/*
+	 * Enable hardware clock gating here to prevent any register access
+	 * issue due to internal clock gating.
+	 */
+	gen7_hwcg_set(adreno_dev, true);
+
+	/*
+	 * All registers must be written before this point so that we don't
+	 * miss any register programming when we patch the power up register
+	 * list.
+	 */
+	if (!adreno_dev->patch_reglist &&
+		(adreno_dev->pwrup_reglist->gpuaddr != 0)) {
+		gen7_patch_pwrup_reglist(adreno_dev);
+		adreno_dev->patch_reglist = true;
+	}
+
+	return 0;
+}
+
+/* Offsets into the MX/CX mapped register regions */
+#define GEN7_RDPM_MX_OFFSET 0xf00
+#define GEN7_RDPM_CX_OFFSET 0xf14
+
+void gen7_rdpm_mx_freq_update(struct gen7_gmu_device *gmu, u32 freq)
+{
+	if (gmu->rdpm_mx_virt) {
+		writel_relaxed(freq/1000, (gmu->rdpm_mx_virt + GEN7_RDPM_MX_OFFSET));
+
+		/*
+		 * ensure previous writes post before this one,
+		 * i.e. act like normal writel()
+		 */
+		wmb();
+	}
+}
+
+void gen7_rdpm_cx_freq_update(struct gen7_gmu_device *gmu, u32 freq)
+{
+	if (gmu->rdpm_cx_virt) {
+		writel_relaxed(freq/1000, (gmu->rdpm_cx_virt + GEN7_RDPM_CX_OFFSET));
+
+		/*
+		 * ensure previous writes post before this one,
+		 * i.e. act like normal writel()
+		 */
+		wmb();
+	}
+}
+
+int gen7_scm_gpu_init_cx_regs(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 gpu_req = GPU_ALWAYS_EN_REQ;
+	int ret;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL))
+		gpu_req |= GPU_BCL_EN_REQ;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_CLX))
+		gpu_req |= GPU_CLX_EN_REQ;
+
+	if (adreno_is_gen7_9_x(adreno_dev))
+		gpu_req |= GPU_TSENSE_EN_REQ;
+
+	ret = kgsl_scm_gpu_init_regs(&device->pdev->dev, gpu_req);
+
+	/*
+	 * For targets that support this scm call to program BCL id , enable BCL.
+	 * For other targets, BCL is enabled after first GMU boot.
+	 */
+	if (!ret && ADRENO_FEATURE(adreno_dev, ADRENO_BCL))
+		adreno_dev->bcl_enabled = true;
+
+	/* If programming TZ CLX was successful, then program KMD owned CLX regs */
+	if (!ret && ADRENO_FEATURE(adreno_dev, ADRENO_CLX))
+		adreno_dev->clx_enabled = true;
+
+	/*
+	 * If scm call returned EOPNOTSUPP, either we are on a kernel version
+	 * lesser than 6.1 where scm call is not supported or we are sending an
+	 * empty request. Ignore the error in such cases.
+	 */
+	return (ret == -EOPNOTSUPP) ? 0 : ret;
+}
+
+void gen7_spin_idle_debug(struct adreno_device *adreno_dev,
+				const char *str)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int rptr, wptr;
+	unsigned int status, status3, intstatus;
+	unsigned int hwfault;
+
+	dev_err(device->dev, str);
+
+	kgsl_regread(device, GEN7_CP_RB_RPTR, &rptr);
+	kgsl_regread(device, GEN7_CP_RB_WPTR, &wptr);
+
+	kgsl_regread(device, GEN7_RBBM_STATUS, &status);
+	kgsl_regread(device, GEN7_RBBM_STATUS3, &status3);
+	kgsl_regread(device, GEN7_RBBM_INT_0_STATUS, &intstatus);
+	kgsl_regread(device, GEN7_CP_HW_FAULT, &hwfault);
+
+	dev_err(device->dev,
+		"rb=%d pos=%X/%X rbbm_status=%8.8X/%8.8X int_0_status=%8.8X\n",
+		adreno_dev->cur_rb ? adreno_dev->cur_rb->id : -1, rptr, wptr,
+		status, status3, intstatus);
+
+	dev_err(device->dev, " hwfault=%8.8X\n", hwfault);
+
+	kgsl_device_snapshot(device, NULL, NULL, false);
+}
+
+/*
+ * gen7_send_cp_init() - Initialize ringbuffer
+ * @adreno_dev: Pointer to adreno device
+ * @rb: Pointer to the ringbuffer of device
+ *
+ * Submit commands for ME initialization,
+ */
+static int gen7_send_cp_init(struct adreno_device *adreno_dev,
+			 struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds;
+	int ret;
+
+	cmds = adreno_ringbuffer_allocspace(rb, GEN7_CP_INIT_DWORDS);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	gen7_cp_init_cmds(adreno_dev, cmds);
+
+	ret = gen7_ringbuffer_submit(rb, NULL);
+	if (!ret) {
+		ret = adreno_spin_idle(adreno_dev, 2000);
+		if (ret) {
+			gen7_spin_idle_debug(adreno_dev,
+				"CP initialization failed to idle\n");
+			rb->wptr = 0;
+			rb->_wptr = 0;
+		}
+	}
+
+	return ret;
+}
+
+static int gen7_post_start(struct adreno_device *adreno_dev)
+{
+	int ret;
+	unsigned int *cmds;
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	u64 kmd_postamble_addr;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev));
+	gen7_preemption_prepare_postamble(adreno_dev);
+
+	cmds = adreno_ringbuffer_allocspace(rb, (preempt->postamble_bootup_len ? 16 : 12));
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 6);
+	*cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR;
+	*cmds++ = lower_32_bits(rb->preemption_desc->gpuaddr);
+	*cmds++ = upper_32_bits(rb->preemption_desc->gpuaddr);
+
+	*cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR;
+	*cmds++ = lower_32_bits(rb->secure_preemption_desc->gpuaddr);
+	*cmds++ = upper_32_bits(rb->secure_preemption_desc->gpuaddr);
+
+	if (preempt->postamble_bootup_len) {
+		*cmds++ = cp_type7_packet(CP_SET_AMBLE, 3);
+		*cmds++ = lower_32_bits(kmd_postamble_addr);
+		*cmds++ = upper_32_bits(kmd_postamble_addr);
+		*cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE)
+			| (FIELD_PREP(GENMASK(19, 0), adreno_dev->preempt.postamble_bootup_len));
+	}
+
+	*cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	*cmds++ = 0;
+	*cmds++ = 0;
+	*cmds++ = 0;
+	/* generate interrupt on preemption completion */
+	*cmds++ = 0;
+
+	ret = gen7_ringbuffer_submit(rb, NULL);
+	if (!ret) {
+		ret = adreno_spin_idle(adreno_dev, 2000);
+		if (ret)
+			gen7_spin_idle_debug(adreno_dev,
+				"hw preemption initialization failed to idle\n");
+	}
+
+	return ret;
+}
+
+int gen7_rb_start(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb;
+	u64 addr;
+	int ret, i;
+	unsigned int *cmds;
+
+	/* Clear all the ringbuffers */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE);
+		kgsl_sharedmem_writel(device->scratch,
+			SCRATCH_RB_OFFSET(rb->id, rptr), 0);
+		kgsl_sharedmem_writel(device->scratch,
+			SCRATCH_RB_OFFSET(rb->id, bv_rptr), 0);
+
+		rb->wptr = 0;
+		rb->_wptr = 0;
+		rb->wptr_preempt_end = UINT_MAX;
+	}
+
+	gen7_preemption_start(adreno_dev);
+
+	/* Set up the current ringbuffer */
+	rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+
+	addr = SCRATCH_RB_GPU_ADDR(device, rb->id, rptr);
+	kgsl_regwrite(device, GEN7_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr));
+	kgsl_regwrite(device, GEN7_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr));
+
+	addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_rptr);
+	kgsl_regwrite(device, GEN7_CP_BV_RB_RPTR_ADDR_LO, lower_32_bits(addr));
+	kgsl_regwrite(device, GEN7_CP_BV_RB_RPTR_ADDR_HI, upper_32_bits(addr));
+
+	kgsl_regwrite(device, GEN7_CP_RB_CNTL, GEN7_CP_RB_CNTL_DEFAULT);
+
+	kgsl_regwrite(device, GEN7_CP_RB_BASE,
+		lower_32_bits(rb->buffer_desc->gpuaddr));
+	kgsl_regwrite(device, GEN7_CP_RB_BASE_HI,
+		upper_32_bits(rb->buffer_desc->gpuaddr));
+
+	/* Program the ucode base for CP */
+	kgsl_regwrite(device, GEN7_CP_SQE_INSTR_BASE_LO,
+		lower_32_bits(fw->memdesc->gpuaddr));
+	kgsl_regwrite(device, GEN7_CP_SQE_INSTR_BASE_HI,
+		upper_32_bits(fw->memdesc->gpuaddr));
+
+	/* Clear the SQE_HALT to start the CP engine */
+	kgsl_regwrite(device, GEN7_CP_SQE_CNTL, 1);
+
+	ret = gen7_send_cp_init(adreno_dev, rb);
+	if (ret)
+		return ret;
+
+	ret = adreno_zap_shader_load(adreno_dev, gen7_core->zap_name);
+	if (ret)
+		return ret;
+
+	/*
+	 * Take the GPU out of secure mode. Try the zap shader if it is loaded,
+	 * otherwise just try to write directly to the secure control register
+	 */
+	if (!adreno_dev->zap_loaded)
+		kgsl_regwrite(device, GEN7_RBBM_SECVID_TRUST_CNTL, 0);
+	else {
+		cmds = adreno_ringbuffer_allocspace(rb, 2);
+		if (IS_ERR(cmds))
+			return PTR_ERR(cmds);
+
+		*cmds++ = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		*cmds++ = 0;
+
+		ret = gen7_ringbuffer_submit(rb, NULL);
+		if (!ret) {
+			ret = adreno_spin_idle(adreno_dev, 2000);
+			if (ret) {
+				gen7_spin_idle_debug(adreno_dev,
+					"Switch to unsecure failed to idle\n");
+				return ret;
+			}
+		}
+	}
+
+	return gen7_post_start(adreno_dev);
+}
+
+/*
+ * gen7_gpu_keepalive() - GMU reg write to request GPU stays on
+ * @adreno_dev: Pointer to the adreno device that has the GMU
+ * @state: State to set: true is ON, false is OFF
+ */
+static void gen7_gpu_keepalive(struct adreno_device *adreno_dev,
+		bool state)
+{
+	gmu_core_regwrite(KGSL_DEVICE(adreno_dev),
+			GEN7_GMU_GMU_PWR_COL_KEEPALIVE, state);
+}
+
+bool gen7_hw_isidle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	gmu_core_regread(device, GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS, &reg);
+
+	/* Bit 23 is GPUBUSYIGNAHB */
+	return (reg & BIT(23)) ? false : true;
+}
+
+int gen7_microcode_read(struct adreno_device *adreno_dev)
+{
+	struct adreno_firmware *sqe_fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+
+	return adreno_get_firmware(adreno_dev, gen7_core->sqefw_name, sqe_fw);
+}
+
+/* CP Interrupt bits */
+#define CP_INT_OPCODEERROR 0
+#define CP_INT_UCODEERROR 1
+#define CP_INT_CPHWFAULT 2
+#define CP_INT_REGISTERPROTECTION 4
+#define CP_INT_VSDPARITYERROR 6
+#define CP_INT_ILLEGALINSTRUCTION 7
+#define CP_INT_OPCODEERRORLPAC 8
+#define CP_INT_UCODEERRORLPAC 9
+#define CP_INT_CPHWFAULTLPAC 10
+#define CP_INT_REGISTERPROTECTIONLPAC 11
+#define CP_INT_ILLEGALINSTRUCTIONLPAC 12
+#define CP_INT_OPCODEERRORBV 13
+#define CP_INT_UCODEERRORBV 14
+#define CP_INT_CPHWFAULTBV 15
+#define CP_INT_REGISTERPROTECTIONBV 16
+#define CP_INT_ILLEGALINSTRUCTIONBV 17
+
+static void gen7_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status1, status2;
+	struct device *dev = device->dev;
+	unsigned int opcode;
+
+	kgsl_regread(device, GEN7_CP_INTERRUPT_STATUS, &status1);
+
+	if (status1 & BIT(CP_INT_OPCODEERROR)) {
+		kgsl_regwrite(device, GEN7_CP_SQE_STAT_ADDR, 1);
+		kgsl_regread(device, GEN7_CP_SQE_STAT_DATA, &opcode);
+		dev_crit_ratelimited(dev,
+			"CP opcode error interrupt | opcode=0x%8.8x\n", opcode);
+	}
+
+	if (status1 & BIT(CP_INT_UCODEERROR))
+		dev_crit_ratelimited(dev, "CP ucode error interrupt\n");
+
+	if (status1 & BIT(CP_INT_CPHWFAULT)) {
+		kgsl_regread(device, GEN7_CP_HW_FAULT, &status2);
+		dev_crit_ratelimited(dev,
+			"CP | Ringbuffer HW fault | status=%x\n", status2);
+	}
+
+	if (status1 & BIT(CP_INT_REGISTERPROTECTION)) {
+		kgsl_regread(device, GEN7_CP_PROTECT_STATUS, &status2);
+		dev_crit_ratelimited(dev,
+			"CP | Protected mode error | %s | addr=%x | status=%x\n",
+			status2 & BIT(20) ? "READ" : "WRITE",
+			status2 & 0x3ffff, status2);
+	}
+
+	if (status1 & BIT(CP_INT_VSDPARITYERROR))
+		dev_crit_ratelimited(dev, "CP VSD decoder parity error\n");
+
+	if (status1 & BIT(CP_INT_ILLEGALINSTRUCTION))
+		dev_crit_ratelimited(dev, "CP Illegal instruction error\n");
+
+	if (status1 & BIT(CP_INT_OPCODEERRORLPAC))
+		dev_crit_ratelimited(dev, "CP Opcode error LPAC\n");
+
+	if (status1 & BIT(CP_INT_UCODEERRORLPAC))
+		dev_crit_ratelimited(dev, "CP ucode error LPAC\n");
+
+	if (status1 & BIT(CP_INT_CPHWFAULTLPAC))
+		dev_crit_ratelimited(dev, "CP hw fault LPAC\n");
+
+	if (status1 & BIT(CP_INT_REGISTERPROTECTIONLPAC))
+		dev_crit_ratelimited(dev, "CP register protection LPAC\n");
+
+	if (status1 & BIT(CP_INT_ILLEGALINSTRUCTIONLPAC))
+		dev_crit_ratelimited(dev, "CP illegal instruction LPAC\n");
+
+	if (status1 & BIT(CP_INT_OPCODEERRORBV)) {
+		kgsl_regwrite(device, GEN7_CP_BV_SQE_STAT_ADDR, 1);
+		kgsl_regread(device, GEN7_CP_BV_SQE_STAT_DATA, &opcode);
+		dev_crit_ratelimited(dev, "CP opcode error BV | opcode=0x%8.8x\n", opcode);
+	}
+
+	if (status1 & BIT(CP_INT_UCODEERRORBV))
+		dev_crit_ratelimited(dev, "CP ucode error BV\n");
+
+	if (status1 & BIT(CP_INT_CPHWFAULTBV)) {
+		kgsl_regread(device, GEN7_CP_BV_HW_FAULT, &status2);
+		dev_crit_ratelimited(dev,
+			"CP BV | Ringbuffer HW fault | status=%x\n", status2);
+	}
+
+	if (status1 & BIT(CP_INT_REGISTERPROTECTIONBV)) {
+		kgsl_regread(device, GEN7_CP_BV_PROTECT_STATUS, &status2);
+		dev_crit_ratelimited(dev,
+			"CP BV | Protected mode error | %s | addr=%x | status=%x\n",
+			status2 & BIT(20) ? "READ" : "WRITE",
+			status2 & 0x3ffff, status2);
+	}
+
+	if (status1 & BIT(CP_INT_ILLEGALINSTRUCTIONBV))
+		dev_crit_ratelimited(dev, "CP illegal instruction BV\n");
+}
+
+static void gen7_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct device *dev = device->dev;
+
+	switch (bit) {
+	case GEN7_INT_AHBERROR:
+		{
+		u32 err_details_0, err_details_1;
+
+		kgsl_regread(device, GEN7_CP_RL_ERROR_DETAILS_0, &err_details_0);
+		kgsl_regread(device, GEN7_CP_RL_ERROR_DETAILS_1, &err_details_1);
+		dev_crit_ratelimited(dev,
+			"CP: AHB bus error, CP_RL_ERROR_DETAILS_0:0x%x CP_RL_ERROR_DETAILS_1:0x%x\n",
+			err_details_0, err_details_1);
+		break;
+		}
+	case GEN7_INT_ATBASYNCFIFOOVERFLOW:
+		dev_crit_ratelimited(dev, "RBBM: ATB ASYNC overflow\n");
+		break;
+	case GEN7_INT_ATBBUSOVERFLOW:
+		dev_crit_ratelimited(dev, "RBBM: ATB bus overflow\n");
+		break;
+	case GEN7_INT_OUTOFBOUNDACCESS:
+		dev_crit_ratelimited(dev, "UCHE: Out of bounds access\n");
+		break;
+	case GEN7_INT_UCHETRAPINTERRUPT:
+		dev_crit_ratelimited(dev, "UCHE: Trap interrupt\n");
+		break;
+	case GEN7_INT_TSBWRITEERROR:
+		{
+		u32 lo, hi;
+
+		kgsl_regread(device, GEN7_RBBM_SECVID_TSB_STATUS_LO, &lo);
+		kgsl_regread(device, GEN7_RBBM_SECVID_TSB_STATUS_HI, &hi);
+
+		dev_crit_ratelimited(dev, "TSB: Write error interrupt: Address: 0x%lx MID: %lu\n",
+			FIELD_GET(GENMASK(16, 0), hi) << 32 | lo,
+			FIELD_GET(GENMASK(31, 23), hi));
+		break;
+		}
+	default:
+		dev_crit_ratelimited(dev, "Unknown interrupt %d\n", bit);
+	}
+}
+
+static const char *const uche_client[] = {
+	"BR_VFD", "BR_SP", "BR_VSC", "BR_VPC",
+	"BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
+	"BV_VFD", "BV_SP", "BV_VSC", "BV_VPC",
+	"BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP"
+};
+
+static const char *const uche_lpac_client[] = {
+	"-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC"
+};
+
+#define SCOOBYDOO 0x5c00bd00
+
+static const char *gen7_fault_block_uche(struct kgsl_device *device,
+		char *str, int size, bool lpac)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int uche_client_id = adreno_dev->uche_client_pf;
+	const char *uche_client_str, *fault_block;
+
+	/*
+	 * Smmu driver takes a vote on CX gdsc before calling the kgsl
+	 * pagefault handler. If there is contention for device mutex in this
+	 * path and the dispatcher fault handler is holding this lock, trying
+	 * to turn off CX gdsc will fail during the reset. So to avoid blocking
+	 * here, try to lock device mutex and return if it fails.
+	 */
+	if (!mutex_trylock(&device->mutex))
+		goto regread_fail;
+
+	if (!kgsl_state_is_awake(device)) {
+		mutex_unlock(&device->mutex);
+		goto regread_fail;
+	}
+
+	kgsl_regread(device, GEN7_UCHE_CLIENT_PF, &uche_client_id);
+	mutex_unlock(&device->mutex);
+
+	/* Ignore the value if the gpu is in IFPC */
+	if (uche_client_id == SCOOBYDOO) {
+		uche_client_id = adreno_dev->uche_client_pf;
+		goto regread_fail;
+	}
+
+	/* UCHE client id mask is bits [6:0] */
+	uche_client_id &= GENMASK(6, 0);
+
+regread_fail:
+	if (lpac) {
+		fault_block = "UCHE_LPAC";
+		if (uche_client_id >= ARRAY_SIZE(uche_lpac_client))
+			goto fail;
+		uche_client_str = uche_lpac_client[uche_client_id];
+	} else {
+		fault_block = "UCHE";
+		if (uche_client_id >= ARRAY_SIZE(uche_client))
+			goto fail;
+		uche_client_str = uche_client[uche_client_id];
+	}
+
+	snprintf(str, size, "%s: %s", fault_block, uche_client_str);
+	return str;
+
+fail:
+	snprintf(str, size, "%s: Unknown (client_id: %u)",
+			fault_block, uche_client_id);
+	return str;
+}
+
+static const char *gen7_iommu_fault_block(struct kgsl_device *device,
+		unsigned int fsynr1)
+{
+	unsigned int mid = fsynr1 & 0xff;
+	static char str[36];
+
+	switch (mid) {
+	case 0x0:
+		return "CP";
+	case 0x1:
+		return "UCHE: Unknown";
+	case 0x2:
+		return "UCHE_LPAC: Unknown";
+	case 0x3:
+		return gen7_fault_block_uche(device, str, sizeof(str), false);
+	case 0x4:
+		return "CCU";
+	case 0x5:
+		return "Flag cache";
+	case 0x6:
+		return "PREFETCH";
+	case 0x7:
+		return "GMU";
+	case 0x8:
+		return gen7_fault_block_uche(device, str, sizeof(str), true);
+	}
+
+	snprintf(str, sizeof(str), "Unknown (mid: %u)", mid);
+	return str;
+}
+
+static void gen7_cp_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (adreno_is_preemption_enabled(adreno_dev))
+		gen7_preemption_trigger(adreno_dev, true);
+
+	adreno_dispatcher_schedule(device);
+}
+
+/*
+ * gen7_gpc_err_int_callback() - Isr for GPC error interrupts
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+static void gen7_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/*
+	 * GPC error is typically the result of mistake SW programming.
+	 * Force GPU fault for this interrupt so that we can debug it
+	 * with help of register dump.
+	 */
+
+	dev_crit(device->dev, "RBBM: GPC error\n");
+	adreno_irqctrl(adreno_dev, 0);
+
+	/* Trigger a fault in the dispatcher - this will effect a restart */
+	adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT);
+}
+
+/*
+ * gen7_swfuse_violation_callback() - ISR for software fuse violation interrupt
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+static void gen7_swfuse_violation_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status;
+
+	/*
+	 * SWFUSEVIOLATION error is typically the result of enabling software
+	 * feature which is not supported by the hardware. Following are the
+	 * Feature violation will be reported
+	 * 1) FASTBLEND (BIT:0): NO Fault, RB will send the workload to legacy
+	 * blender HW pipeline.
+	 * 2) LPAC (BIT:1): Fault
+	 * 3) RAYTRACING (BIT:2): Fault
+	 */
+	kgsl_regread(device, GEN7_RBBM_SW_FUSE_INT_STATUS, &status);
+
+	/*
+	 * RBBM_INT_CLEAR_CMD will not clear SWFUSEVIOLATION interrupt. Hence
+	 * do explicit swfuse irq clear.
+	 */
+	kgsl_regwrite(device, GEN7_RBBM_SW_FUSE_INT_MASK, 0);
+
+	dev_crit_ratelimited(device->dev,
+		"RBBM: SW Feature Fuse violation status=0x%8.8x\n", status);
+
+	/* Trigger a fault in the dispatcher for LPAC and RAYTRACING violation */
+	if (status & GENMASK(GEN7_RAYTRACING_SW_FUSE, GEN7_LPAC_SW_FUSE)) {
+		adreno_irqctrl(adreno_dev, 0);
+		adreno_dispatcher_fault(adreno_dev, ADRENO_HARD_FAULT);
+	}
+}
+
+static const struct adreno_irq_funcs gen7_irq_funcs[32] = {
+	ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
+	ADRENO_IRQ_CALLBACK(gen7_err_callback), /* 1 - RBBM_AHB_ERROR */
+	ADRENO_IRQ_CALLBACK(NULL), /* 2 - UNUSED */
+	ADRENO_IRQ_CALLBACK(NULL), /* 3 - UNUSED */
+	ADRENO_IRQ_CALLBACK(NULL), /* 4 - CPIPCINT0 */
+	ADRENO_IRQ_CALLBACK(NULL), /* 5 - CPIPCINT1 */
+	ADRENO_IRQ_CALLBACK(gen7_err_callback), /* 6 - ATBASYNCOVERFLOW */
+	ADRENO_IRQ_CALLBACK(gen7_gpc_err_int_callback), /* 7 - GPC_ERR */
+	ADRENO_IRQ_CALLBACK(gen7_preemption_callback),/* 8 - CP_SW */
+	ADRENO_IRQ_CALLBACK(gen7_cp_hw_err_callback), /* 9 - CP_HW_ERROR */
+	ADRENO_IRQ_CALLBACK(NULL), /* 10 - CP_CCU_FLUSH_DEPTH_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 11 - CP_CCU_FLUSH_COLOR_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_CCU_RESOLVE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 13 - UNUSED */
+	ADRENO_IRQ_CALLBACK(NULL), /* 14 - UNUSED */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */
+	ADRENO_IRQ_CALLBACK(NULL), /* 16 - CP_RB_INT_LPAC*/
+	ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 18 - UNUSED */
+	ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNUSED */
+	ADRENO_IRQ_CALLBACK(gen7_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 21 - CP_CACHE_TS_LPAC */
+	ADRENO_IRQ_CALLBACK(gen7_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */
+	ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), /* 23 - MISHANGDETECT */
+	ADRENO_IRQ_CALLBACK(gen7_err_callback), /* 24 - UCHE_OOB_ACCESS */
+	ADRENO_IRQ_CALLBACK(gen7_err_callback), /* 25 - UCHE_TRAP_INTR */
+	ADRENO_IRQ_CALLBACK(NULL), /* 26 - DEBBUS_INTR_0 */
+	ADRENO_IRQ_CALLBACK(NULL), /* 27 - DEBBUS_INTR_1 */
+	ADRENO_IRQ_CALLBACK(gen7_err_callback), /* 28 - TSBWRITEERROR */
+	ADRENO_IRQ_CALLBACK(gen7_swfuse_violation_callback), /* 29 - SWFUSEVIOLATION */
+	ADRENO_IRQ_CALLBACK(NULL), /* 30 - ISDB_CPU_IRQ */
+	ADRENO_IRQ_CALLBACK(NULL), /* 31 - ISDB_UNDER_DEBUG */
+};
+
+/*
+ * If the AHB fence is not in ALLOW mode when we receive an RBBM
+ * interrupt, something went wrong. This means that we cannot proceed
+ * since the IRQ status and clear registers are not accessible.
+ * This is usually harmless because the GMU will abort power collapse
+ * and change the fence back to ALLOW. Poll so that this can happen.
+ */
+static int gen7_irq_poll_fence(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	u32 status, fence, fence_retries = 0;
+	u64 a, b, c;
+
+	a = gpudev->read_alwayson(adreno_dev);
+
+	kgsl_regread(device, GEN7_GMU_AO_AHB_FENCE_CTRL, &fence);
+
+	while (fence != 0) {
+		b = gpudev->read_alwayson(adreno_dev);
+
+		/* Wait for small time before trying again */
+		udelay(1);
+		kgsl_regread(device, GEN7_GMU_AO_AHB_FENCE_CTRL, &fence);
+
+		if (fence_retries == 100 && fence != 0) {
+			c = gpudev->read_alwayson(adreno_dev);
+
+			kgsl_regread(device, GEN7_GMU_RBBM_INT_UNMASKED_STATUS,
+				&status);
+
+			dev_crit_ratelimited(device->dev,
+				"status=0x%x Unmasked status=0x%x Mask=0x%x timestamps: %llx %llx %llx\n",
+					status & adreno_dev->irq_mask, status,
+					adreno_dev->irq_mask, a, b, c);
+				return -ETIMEDOUT;
+		}
+
+		fence_retries++;
+	}
+
+	return 0;
+}
+
+static irqreturn_t gen7_irq_handler(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	irqreturn_t ret = IRQ_NONE;
+	u32 status;
+
+	/*
+	 * GPU can power down once the INT_0_STATUS is read below.
+	 * But there still might be some register reads required so
+	 * force the GMU/GPU into KEEPALIVE mode until done with the ISR.
+	 */
+	gen7_gpu_keepalive(adreno_dev, true);
+
+	if (gen7_irq_poll_fence(adreno_dev)) {
+		adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT);
+		goto done;
+	}
+
+	kgsl_regread(device, GEN7_RBBM_INT_0_STATUS, &status);
+
+	kgsl_regwrite(device, GEN7_RBBM_INT_CLEAR_CMD, status);
+
+	ret = adreno_irq_callbacks(adreno_dev, gen7_irq_funcs, status);
+
+	trace_kgsl_gen7_irq_status(adreno_dev, status);
+
+done:
+	/* If hard fault, then let snapshot turn off the keepalive */
+	if (!(adreno_gpu_fault(adreno_dev) & ADRENO_HARD_FAULT))
+		gen7_gpu_keepalive(adreno_dev, false);
+
+	return ret;
+}
+
+int gen7_probe_common(struct platform_device *pdev,
+	struct adreno_device *adreno_dev, u32 chipid,
+	const struct adreno_gpu_core *gpucore)
+{
+	const struct adreno_gpudev *gpudev = gpucore->gpudev;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen7_core *gen7_core = container_of(gpucore,
+			struct adreno_gen7_core, base);
+	int ret;
+
+	adreno_dev->gpucore = gpucore;
+	adreno_dev->chipid = chipid;
+	adreno_dev->cx_misc_base = GEN7_CX_MISC_BASE;
+
+	adreno_reg_offset_init(gpudev->reg_offsets);
+
+	adreno_dev->hwcg_enabled = true;
+	adreno_dev->uche_client_pf = 1;
+
+	kgsl_pwrscale_fast_bus_hint(gen7_core->fast_bus_hint);
+
+	device->pwrctrl.rt_bus_hint = gen7_core->rt_bus_hint;
+	device->pwrctrl.cx_cfg_gdsc_offset = adreno_is_gen7_11_0(adreno_dev) ?
+					GEN7_11_0_GPU_CC_CX_CFG_GDSCR : GEN7_GPU_CC_CX_CFG_GDSCR;
+
+	ret = adreno_device_probe(pdev, adreno_dev);
+	if (ret)
+		return ret;
+
+	if (adreno_preemption_feature_set(adreno_dev)) {
+		const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+
+		adreno_dev->preempt.preempt_level = gen7_core->preempt_level;
+		adreno_dev->preempt.skipsaverestore = true;
+		adreno_dev->preempt.usesgmem = true;
+		set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+	}
+
+	/* debugfs node for ACD calibration */
+	debugfs_create_file("acd_calibrate", 0644, device->d_debugfs, device, &acd_cal_fops);
+
+	gen7_coresight_init(adreno_dev);
+
+	/* Dump additional AQE 16KB data on top of default 96KB(48(BR)+48(BV)) */
+	device->snapshot_ctxt_record_size = ADRENO_FEATURE(adreno_dev, ADRENO_AQE) ?
+					112 * SZ_1K : 96 * SZ_1K;
+
+	return 0;
+}
+
+/* Register offset defines for Gen7, in order of enum adreno_regs */
+static unsigned int gen7_register_offsets[ADRENO_REG_REGISTER_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, GEN7_CP_RB_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, GEN7_CP_RB_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, GEN7_CP_RB_RPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, GEN7_CP_RB_WPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, GEN7_CP_SQE_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, GEN7_CP_IB1_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, GEN7_CP_IB1_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, GEN7_CP_IB1_REM_SIZE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, GEN7_CP_IB2_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, GEN7_CP_IB2_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, GEN7_CP_IB2_REM_SIZE),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, GEN7_RBBM_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, GEN7_RBBM_STATUS3),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, GEN7_RBBM_INT_0_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, GEN7_RBBM_SW_RESET_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK,
+			GEN7_GMU_AO_HOST_INTERRUPT_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_GMU_GMU2HOST_INTR_MASK,
+			GEN7_GMU_GMU2HOST_INTR_MASK),
+};
+
+
+static u32 _get_pipeid(u32 groupid)
+{
+	if (groupid == KGSL_PERFCOUNTER_GROUP_BV_TSE || groupid == KGSL_PERFCOUNTER_GROUP_BV_RAS
+						|| groupid == KGSL_PERFCOUNTER_GROUP_BV_LRZ
+						|| groupid == KGSL_PERFCOUNTER_GROUP_BV_HLSQ)
+		return PIPE_BV;
+	else if (groupid == KGSL_PERFCOUNTER_GROUP_HLSQ || groupid == KGSL_PERFCOUNTER_GROUP_TSE
+						|| groupid == KGSL_PERFCOUNTER_GROUP_RAS
+						|| groupid == KGSL_PERFCOUNTER_GROUP_LRZ)
+		return PIPE_BR;
+	else
+		return PIPE_NONE;
+}
+
+int gen7_perfcounter_remove(struct adreno_device *adreno_dev,
+	struct adreno_perfcount_register *reg, u32 groupid)
+{
+	const struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	const struct adreno_perfcount_group *group;
+	void *ptr = adreno_dev->pwrup_reglist->hostptr;
+	struct cpu_gpu_lock *lock = ptr;
+	u32 *data = ptr + sizeof(*lock);
+	int offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2;
+	int i, second_last_offset, last_offset;
+	bool remove_counter = false;
+	u32 pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid));
+
+	if (!lock->dynamic_list_len)
+		return -EINVAL;
+
+	group = &(counters->groups[groupid]);
+
+	if (!(group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE)) {
+		if (lock->dynamic_list_len != 1)
+			return 0;
+
+		if (kgsl_hwlock(lock)) {
+			kgsl_hwunlock(lock);
+			return -EBUSY;
+		}
+		goto disable_perfcounter;
+	}
+
+	second_last_offset = offset + (lock->dynamic_list_len - 2) * 3;
+	last_offset = second_last_offset + 3;
+
+	/* Look for the perfcounter to remove in the list */
+	for (i = 0; i < lock->dynamic_list_len - 1; i++) {
+		if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) {
+			remove_counter = true;
+			break;
+		}
+		offset += 3;
+	}
+
+	if (!remove_counter)
+		return -ENOENT;
+
+	if (kgsl_hwlock(lock)) {
+		kgsl_hwunlock(lock);
+		return -EBUSY;
+	}
+
+	/*
+	 * If the entry is found, remove it from the list by overwriting with second last
+	 * entry. Skip this if data at offset is already second last entry
+	 */
+	if (offset != second_last_offset)
+		memcpy(&data[offset], &data[second_last_offset], 3 * sizeof(u32));
+
+	/*
+	 * Overwrite the second last entry with last entry as last entry always has to be
+	 * GEN7_RBBM_PERFCTR_CNTL.
+	 */
+	memcpy(&data[second_last_offset], &data[last_offset], 3 * sizeof(u32));
+
+	/* Clear the last entry */
+	memset(&data[last_offset], 0, 3 * sizeof(u32));
+
+	lock->dynamic_list_len--;
+
+disable_perfcounter:
+	/*
+	 * If dynamic list length is 1 and no_restore_count is 0, then we can remove the
+	 * only entry in the list, which is the GEN7_RBBM_PERFCTRL_CNTL.
+	 */
+	if (lock->dynamic_list_len == 1 && !adreno_dev->no_restore_count) {
+		memset(&data[offset], 0, 3 * sizeof(u32));
+		lock->dynamic_list_len = 0;
+	}
+
+	kgsl_hwunlock(lock);
+	return 0;
+}
+
+int gen7_perfcounter_update(struct adreno_device *adreno_dev,
+	struct adreno_perfcount_register *reg, bool update_reg, u32 pipe, unsigned long flags)
+{
+	void *ptr = adreno_dev->pwrup_reglist->hostptr;
+	struct cpu_gpu_lock *lock = ptr;
+	u32 *data = ptr + sizeof(*lock);
+	int i, offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2;
+	bool select_reg_present = false;
+
+	if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) {
+		for (i = 0; i < lock->dynamic_list_len; i++) {
+			if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) {
+				select_reg_present = true;
+				break;
+			}
+
+			if (data[offset + 1] == GEN7_RBBM_PERFCTR_CNTL)
+				break;
+
+			offset += 3;
+		}
+	} else if (lock->dynamic_list_len) {
+		goto update;
+	}
+
+	if (kgsl_hwlock(lock)) {
+		kgsl_hwunlock(lock);
+		return -EBUSY;
+	}
+
+	/*
+	 * If the perfcounter select register is already present in reglist
+	 * update it, otherwise append the <aperture, select register, value>
+	 * triplet to the end of the list.
+	 */
+	if (select_reg_present) {
+		data[offset + 2] = reg->countable;
+		kgsl_hwunlock(lock);
+		goto update;
+	}
+
+	/* Initialize the lock->dynamic_list_len to account for GEN7_RBBM_PERFCTR_CNTL */
+	if (!lock->dynamic_list_len)
+		lock->dynamic_list_len = 1;
+
+	/*
+	 * For all targets GEN7_RBBM_PERFCTR_CNTL needs to be the last entry,
+	 * so overwrite the existing GEN7_RBBM_PERFCTR_CNTL and add it back to
+	 * the end.
+	 */
+	if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) {
+		data[offset++] = pipe;
+		data[offset++] = reg->select;
+		data[offset++] = reg->countable;
+		lock->dynamic_list_len++;
+	}
+
+	data[offset++] = FIELD_PREP(GENMASK(13, 12), PIPE_NONE);
+	data[offset++] = GEN7_RBBM_PERFCTR_CNTL;
+	data[offset++] = 1;
+
+	kgsl_hwunlock(lock);
+
+update:
+	if (update_reg)
+		kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg->select,
+			reg->countable);
+	return 0;
+}
+
+static u64 gen7_9_0_read_alwayson(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 lo = 0, hi = 0, tmp = 0;
+
+	/* Always use the GMU AO counter when doing a AHB read */
+	gmu_core_regread(device, GEN7_GMU_CX_AO_COUNTER_HI, &hi);
+	gmu_core_regread(device, GEN7_GMU_CX_AO_COUNTER_LO, &lo);
+
+	/* Check for overflow */
+	gmu_core_regread(device, GEN7_GMU_CX_AO_COUNTER_HI, &tmp);
+
+	if (hi != tmp) {
+		gmu_core_regread(device, GEN7_GMU_CX_AO_COUNTER_LO,
+				&lo);
+		hi = tmp;
+	}
+
+	return (((u64) hi) << 32) | lo;
+}
+
+static u64 gen7_read_alwayson(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 lo = 0, hi = 0, tmp = 0;
+
+	/* Always use the GMU AO counter when doing a AHB read */
+	gmu_core_regread(device, GEN7_GMU_ALWAYS_ON_COUNTER_H, &hi);
+	gmu_core_regread(device, GEN7_GMU_ALWAYS_ON_COUNTER_L, &lo);
+
+	/* Check for overflow */
+	gmu_core_regread(device, GEN7_GMU_ALWAYS_ON_COUNTER_H, &tmp);
+
+	if (hi != tmp) {
+		gmu_core_regread(device, GEN7_GMU_ALWAYS_ON_COUNTER_L,
+				&lo);
+		hi = tmp;
+	}
+
+	return (((u64) hi) << 32) | lo;
+}
+
+static int gen7_9_0_lpac_store(struct adreno_device *adreno_dev, bool enable)
+{
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LPAC))
+		return -EINVAL;
+
+	if (!(adreno_dev->feature_fuse & BIT(GEN7_LPAC_SW_FUSE)) ||
+		(adreno_dev->lpac_enabled == enable))
+		return 0;
+
+	/* Power down the GPU before changing the lpac setting */
+	return adreno_power_cycle_bool(adreno_dev, &adreno_dev->lpac_enabled,
+				       enable);
+}
+
+static int gen7_lpac_store(struct adreno_device *adreno_dev, bool enable)
+{
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LPAC))
+		return -EINVAL;
+
+	if (adreno_dev->lpac_enabled == enable)
+		return 0;
+
+	/* Power down the GPU before changing the lpac setting */
+	return adreno_power_cycle_bool(adreno_dev, &adreno_dev->lpac_enabled,
+				       enable);
+}
+
+static void gen7_remove(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	/* Make sure timer is initialized, otherwise WARN_ON is generated */
+	if (adreno_preemption_feature_set(adreno_dev) &&
+	    (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)))
+		del_timer(&adreno_dev->preempt.timer);
+}
+
+static void gen7_read_bus_stats(struct kgsl_device *device,
+		struct kgsl_power_stats *stats,
+		struct adreno_busy_data *busy)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u64 ram_cycles, starved_ram;
+
+	ram_cycles = counter_delta(device, adreno_dev->ram_cycles_lo,
+		&busy->bif_ram_cycles);
+
+	starved_ram = counter_delta(device, adreno_dev->starved_ram_lo,
+		&busy->bif_starved_ram);
+
+	ram_cycles += counter_delta(device,
+		adreno_dev->ram_cycles_lo_ch1_read,
+		&busy->bif_ram_cycles_read_ch1);
+
+	ram_cycles += counter_delta(device,
+		adreno_dev->ram_cycles_lo_ch0_write,
+		&busy->bif_ram_cycles_write_ch0);
+
+	ram_cycles += counter_delta(device,
+		adreno_dev->ram_cycles_lo_ch1_write,
+		&busy->bif_ram_cycles_write_ch1);
+
+	starved_ram += counter_delta(device,
+		adreno_dev->starved_ram_lo_ch1,
+		&busy->bif_starved_ram_ch1);
+
+	stats->ram_time = ram_cycles;
+	stats->ram_wait = starved_ram;
+}
+
+static void gen7_power_stats(struct adreno_device *adreno_dev,
+		struct kgsl_power_stats *stats)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_busy_data *busy = &adreno_dev->busy_data;
+	u64 gpu_busy;
+
+	/* Set the GPU busy counter for frequency scaling */
+	gpu_busy = counter_delta(device, GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
+		&busy->gpu_busy);
+
+	stats->busy_time = gpu_busy * 10;
+	do_div(stats->busy_time, 192);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) {
+		u32 ifpc = counter_delta(device,
+			GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L,
+			&busy->num_ifpc);
+
+		adreno_dev->ifpc_count += ifpc;
+		if (ifpc > 0)
+			trace_adreno_ifpc_count(adreno_dev->ifpc_count);
+	}
+
+	if (device->pwrctrl.bus_control)
+		gen7_read_bus_stats(device, stats, busy);
+
+	if (adreno_dev->bcl_enabled) {
+		u32 a, b, c;
+
+		a = counter_delta(device, GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L,
+			&busy->throttle_cycles[0]);
+
+		b = counter_delta(device, GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L,
+			&busy->throttle_cycles[1]);
+
+		c = counter_delta(device, GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L,
+			&busy->throttle_cycles[2]);
+
+		if (a || b || c)
+			trace_kgsl_bcl_clock_throttling(a, b, c);
+
+		if (adreno_is_gen7_2_x_family(adreno_dev)) {
+			u32 bcl_throttle = counter_delta(device,
+				GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_L, &busy->bcl_throttle);
+			/*
+			 * This counts number of cycles throttled in XO cycles. Convert it to
+			 * micro seconds by dividing by XO freq which is 19.2MHz.
+			 */
+			adreno_dev->bcl_throttle_time_us += ((bcl_throttle * 10) / 192);
+		}
+	}
+}
+
+static int gen7_setproperty(struct kgsl_device_private *dev_priv,
+		u32 type, void __user *value, u32 sizebytes)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u32 enable;
+
+	if (type != KGSL_PROP_PWRCTRL)
+		return -ENODEV;
+
+	if (sizebytes != sizeof(enable))
+		return -EINVAL;
+
+	if (copy_from_user(&enable, value, sizeof(enable)))
+		return -EFAULT;
+
+	mutex_lock(&device->mutex);
+
+	if (enable) {
+		clear_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags);
+
+		kgsl_pwrscale_enable(device);
+	} else {
+		set_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags);
+
+		if (!adreno_active_count_get(adreno_dev))
+			adreno_active_count_put(adreno_dev);
+
+		kgsl_pwrscale_disable(device, true);
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static void gen7_set_isdb_breakpoint_registers(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct clk *clk;
+	int ret;
+
+	if (!device->set_isdb_breakpoint || device->ftbl->is_hwcg_on(device)
+			|| device->qdss_gfx_virt == NULL || !device->force_panic)
+		return;
+
+	clk = clk_get(&device->pdev->dev, "apb_pclk");
+
+	if (IS_ERR(clk)) {
+		dev_err(device->dev, "Unable to get QDSS clock\n");
+		goto err;
+	}
+
+	ret = clk_prepare_enable(clk);
+
+	if (ret) {
+		dev_err(device->dev, "QDSS Clock enable error: %d\n", ret);
+		clk_put(clk);
+		goto err;
+	}
+
+	/* Issue break command for eight SPs */
+	isdb_write(device->qdss_gfx_virt, 0x0000);
+	isdb_write(device->qdss_gfx_virt, 0x1000);
+	isdb_write(device->qdss_gfx_virt, 0x2000);
+	isdb_write(device->qdss_gfx_virt, 0x3000);
+	isdb_write(device->qdss_gfx_virt, 0x4000);
+	isdb_write(device->qdss_gfx_virt, 0x5000);
+	isdb_write(device->qdss_gfx_virt, 0x6000);
+	isdb_write(device->qdss_gfx_virt, 0x7000);
+
+	/* gen7_2_x has additional SPs */
+	if (adreno_is_gen7_2_x_family(adreno_dev)) {
+		isdb_write(device->qdss_gfx_virt, 0x8000);
+		isdb_write(device->qdss_gfx_virt, 0x9000);
+		isdb_write(device->qdss_gfx_virt, 0xa000);
+		isdb_write(device->qdss_gfx_virt, 0xb000);
+	}
+
+	clk_disable_unprepare(clk);
+	clk_put(clk);
+
+	return;
+
+err:
+	/* Do not force kernel panic if isdb writes did not go through */
+	device->force_panic = false;
+}
+
+static void gen7_swfuse_irqctrl(struct adreno_device *adreno_dev, bool state)
+{
+	if (adreno_is_gen7_9_x(adreno_dev))
+		kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN7_RBBM_SW_FUSE_INT_MASK,
+			state ? GEN7_SW_FUSE_INT_MASK : 0);
+}
+
+static void gen7_lpac_fault_header(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj_lpac)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_context *drawctxt_lpac;
+	u32 status = 0, lpac_rptr = 0, lpac_wptr = 0, lpac_ib1sz = 0, lpac_ib2sz = 0;
+	u64 lpac_ib1base = 0, lpac_ib2base = 0;
+	bool gx_on = adreno_gx_is_on(adreno_dev);
+
+	drawctxt_lpac = ADRENO_CONTEXT(drawobj_lpac->context);
+	drawobj_lpac->context->last_faulted_cmd_ts = drawobj_lpac->timestamp;
+	drawobj_lpac->context->total_fault_count++;
+
+	pr_context(device, drawobj_lpac->context,
+		"LPAC ctx %d ctx_type %s ts %d dispatch_queue=%d\n",
+		drawobj_lpac->context->id, kgsl_context_type(drawctxt_lpac->type),
+		drawobj_lpac->timestamp, drawobj_lpac->context->gmu_dispatch_queue);
+
+	pr_context(device, drawobj_lpac->context, "lpac cmdline: %s\n",
+			drawctxt_lpac->base.proc_priv->cmdline);
+	if (!gx_on)
+		goto done;
+
+	kgsl_regread(device, GEN7_RBBM_STATUS, &status);
+	kgsl_regread(device, GEN7_CP_LPAC_RB_RPTR, &lpac_rptr);
+	kgsl_regread(device, GEN7_CP_LPAC_RB_WPTR, &lpac_wptr);
+	kgsl_regread64(device, GEN7_CP_LPAC_IB1_BASE_HI,
+		       GEN7_CP_LPAC_IB1_BASE, &lpac_ib1base);
+	kgsl_regread(device, GEN7_CP_LPAC_IB1_REM_SIZE, &lpac_ib1sz);
+	kgsl_regread64(device, GEN7_CP_LPAC_IB2_BASE_HI,
+		       GEN7_CP_LPAC_IB2_BASE, &lpac_ib2base);
+	kgsl_regread(device, GEN7_CP_LPAC_IB2_REM_SIZE, &lpac_ib2sz);
+
+	pr_context(device, drawobj_lpac->context,
+		"LPAC: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
+		status, lpac_rptr, lpac_wptr, lpac_ib1base,
+		lpac_ib1sz, lpac_ib2base, lpac_ib2sz);
+
+done:
+	trace_adreno_gpu_fault(drawobj_lpac->context->id, drawobj_lpac->timestamp, status,
+		lpac_rptr, lpac_wptr, lpac_ib1base, lpac_ib1sz, lpac_ib2base, lpac_ib2sz,
+		adreno_get_level(drawobj_lpac->context));
+}
+
+const struct gen7_gpudev adreno_gen7_9_0_hwsched_gpudev = {
+	.base = {
+		.reg_offsets = gen7_register_offsets,
+		.probe = gen7_hwsched_probe,
+		.snapshot = gen7_hwsched_snapshot,
+		.irq_handler = gen7_irq_handler,
+		.iommu_fault_block = gen7_iommu_fault_block,
+		.preemption_context_init = gen7_preemption_context_init,
+		.context_detach = gen7_hwsched_context_detach,
+		.read_alwayson = gen7_9_0_read_alwayson,
+		.reset = gen7_hwsched_reset_replay,
+		.power_ops = &gen7_hwsched_power_ops,
+		.power_stats = gen7_power_stats,
+		.setproperty = gen7_setproperty,
+		.hw_isidle = gen7_hw_isidle,
+		.add_to_va_minidump = gen7_hwsched_add_to_minidump,
+		.gx_is_on = gen7_gmu_gx_is_on,
+		.send_recurring_cmdobj = gen7_hwsched_send_recurring_cmdobj,
+		.perfcounter_remove = gen7_perfcounter_remove,
+		.set_isdb_breakpoint_registers = gen7_set_isdb_breakpoint_registers,
+		.context_destroy = gen7_hwsched_context_destroy,
+		.lpac_store = gen7_9_0_lpac_store,
+		.get_uche_trap_base = gen7_get_uche_trap_base,
+		.lpac_fault_header = gen7_lpac_fault_header,
+	},
+	.hfi_probe = gen7_hwsched_hfi_probe,
+	.hfi_remove = gen7_hwsched_hfi_remove,
+	.handle_watchdog = gen7_hwsched_handle_watchdog,
+};
+
+const struct gen7_gpudev adreno_gen7_hwsched_gpudev = {
+	.base = {
+		.reg_offsets = gen7_register_offsets,
+		.probe = gen7_hwsched_probe,
+		.snapshot = gen7_hwsched_snapshot,
+		.irq_handler = gen7_irq_handler,
+		.iommu_fault_block = gen7_iommu_fault_block,
+		.preemption_context_init = gen7_preemption_context_init,
+		.context_detach = gen7_hwsched_context_detach,
+		.read_alwayson = gen7_read_alwayson,
+		.reset = gen7_hwsched_reset_replay,
+		.power_ops = &gen7_hwsched_power_ops,
+		.power_stats = gen7_power_stats,
+		.setproperty = gen7_setproperty,
+		.hw_isidle = gen7_hw_isidle,
+		.add_to_va_minidump = gen7_hwsched_add_to_minidump,
+		.gx_is_on = gen7_gmu_gx_is_on,
+		.send_recurring_cmdobj = gen7_hwsched_send_recurring_cmdobj,
+		.perfcounter_remove = gen7_perfcounter_remove,
+		.set_isdb_breakpoint_registers = gen7_set_isdb_breakpoint_registers,
+		.context_destroy = gen7_hwsched_context_destroy,
+		.lpac_store = gen7_lpac_store,
+		.get_uche_trap_base = gen7_get_uche_trap_base,
+		.lpac_fault_header = gen7_lpac_fault_header,
+	},
+	.hfi_probe = gen7_hwsched_hfi_probe,
+	.hfi_remove = gen7_hwsched_hfi_remove,
+	.handle_watchdog = gen7_hwsched_handle_watchdog,
+};
+
+const struct gen7_gpudev adreno_gen7_gmu_gpudev = {
+	.base = {
+		.reg_offsets = gen7_register_offsets,
+		.probe = gen7_gmu_device_probe,
+		.snapshot = gen7_gmu_snapshot,
+		.irq_handler = gen7_irq_handler,
+		.rb_start = gen7_rb_start,
+		.gpu_keepalive = gen7_gpu_keepalive,
+		.hw_isidle = gen7_hw_isidle,
+		.iommu_fault_block = gen7_iommu_fault_block,
+		.reset = gen7_gmu_reset,
+		.preemption_schedule = gen7_preemption_schedule,
+		.preemption_context_init = gen7_preemption_context_init,
+		.read_alwayson = gen7_read_alwayson,
+		.power_ops = &gen7_gmu_power_ops,
+		.remove = gen7_remove,
+		.ringbuffer_submitcmd = gen7_ringbuffer_submitcmd,
+		.power_stats = gen7_power_stats,
+		.setproperty = gen7_setproperty,
+		.add_to_va_minidump = gen7_gmu_add_to_minidump,
+		.gx_is_on = gen7_gmu_gx_is_on,
+		.perfcounter_remove = gen7_perfcounter_remove,
+		.set_isdb_breakpoint_registers = gen7_set_isdb_breakpoint_registers,
+		.swfuse_irqctrl = gen7_swfuse_irqctrl,
+		.get_uche_trap_base = gen7_get_uche_trap_base,
+	},
+	.hfi_probe = gen7_gmu_hfi_probe,
+	.handle_watchdog = gen7_gmu_handle_watchdog,
+};

+ 519 - 0
qcom/opensource/graphics-kernel/adreno_gen7.h

@@ -0,0 +1,519 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_GEN7_H_
+#define _ADRENO_GEN7_H_
+
+#include <linux/delay.h>
+
+#include "gen7_reg.h"
+#include "adreno_gen7_gmu.h"
+
+/* Forward struct declaration */
+struct gen7_snapshot_block_list;
+
+extern const struct adreno_power_ops gen7_gmu_power_ops;
+extern const struct adreno_power_ops gen7_hwsched_power_ops;
+extern const struct adreno_perfcounters adreno_gen7_perfcounters;
+extern const struct adreno_perfcounters adreno_gen7_hwsched_perfcounters;
+extern const struct adreno_perfcounters adreno_gen7_9_0_hwsched_perfcounters;
+
+struct gen7_gpudev {
+	struct adreno_gpudev base;
+	int (*hfi_probe)(struct adreno_device *adreno_dev);
+	void (*hfi_remove)(struct adreno_device *adreno_dev);
+	void (*handle_watchdog)(struct adreno_device *adreno_dev);
+};
+
+extern const struct gen7_gpudev adreno_gen7_gmu_gpudev;
+extern const struct gen7_gpudev adreno_gen7_hwsched_gpudev;
+extern const struct gen7_gpudev adreno_gen7_9_0_hwsched_gpudev;
+
+/**
+ * struct gen7_device - Container for the gen7_device
+ */
+struct gen7_device {
+	/** @gmu: Container for the gen7 GMU device */
+	struct gen7_gmu_device gmu;
+	/** @adreno_dev: Container for the generic adreno device */
+	struct adreno_device adreno_dev;
+};
+
+/**
+ * struct gen7_protected_regs - container for a protect register span
+ */
+struct gen7_protected_regs {
+	/** @reg: Physical protected mode register to write to */
+	u32 reg;
+	/** @start: Dword offset of the starting register in the range */
+	u32 start;
+	/**
+	 * @end: Dword offset of the ending register in the range
+	 * (inclusive)
+	 */
+	u32 end;
+	/**
+	 * @noaccess: 1 if the register should not be accessible from
+	 * userspace, 0 if it can be read (but not written)
+	 */
+	u32 noaccess;
+};
+
+/**
+ * struct adreno_gen7_core - gen7 specific GPU core definitions
+ */
+struct adreno_gen7_core {
+	/** @base: Container for the generic GPU definitions */
+	struct adreno_gpu_core base;
+	/** @gmu_fw_version: Minimum firmware version required to support this core */
+	u32 gmu_fw_version;
+	/** @sqefw_name: Name of the SQE microcode file */
+	const char *sqefw_name;
+	/** @aqefw_name: Name of the AQE microcode file */
+	const char *aqefw_name;
+	/** @gmufw_name: Name of the GMU firmware file */
+	const char *gmufw_name;
+	/** @gmufw_name: Name of the backup GMU firmware file */
+	const char *gmufw_bak_name;
+	/** @zap_name: Name of the CPZ zap file */
+	const char *zap_name;
+	/** @hwcg: List of registers and values to write for HWCG */
+	const struct kgsl_regmap_list *hwcg;
+	/** @hwcg_count: Number of registers in @hwcg */
+	u32 hwcg_count;
+	/** @ao_hwcg: List of registers and values to write for HWCG in AO block */
+	const struct kgsl_regmap_list *ao_hwcg;
+	/** @ao_hwcg_count: Number of registers in @ao_hwcg */
+	u32 ao_hwcg_count;
+	/** @gbif: List of registers and values to write for GBIF */
+	const struct kgsl_regmap_list *gbif;
+	/** @gbif_count: Number of registers in @gbif */
+	u32 gbif_count;
+	/** @hang_detect_cycles: Hang detect counter timeout value */
+	u32 hang_detect_cycles;
+	/** @protected_regs: Array of protected registers for the target */
+	const struct gen7_protected_regs *protected_regs;
+	/** @ctxt_record_size: Size of the preemption record in bytes */
+	u64 ctxt_record_size;
+	/** @highest_bank_bit: Highest bank bit value */
+	u32 highest_bank_bit;
+	/** @gen7_snapshot_block_list: Device-specific blocks dumped in the snapshot */
+	const struct gen7_snapshot_block_list *gen7_snapshot_block_list;
+	/** @gmu_hub_clk_freq: Gmu hub interface clock frequency */
+	u64 gmu_hub_clk_freq;
+	/**
+	 * @bcl_data: bit 0 contains response type for bcl alarms and bits 1:21 controls sid vals
+	 * to configure throttle levels for bcl alarm levels 0-2. If sid vals are not set,
+	 * gmu fw sets default throttle levels.
+	 */
+	u32 bcl_data;
+	/** @preempt_level: Preemption level valid ranges [0 to 2] */
+	u32 preempt_level;
+	/** @qos_value: GPU qos value to set for each RB. */
+	const u32 *qos_value;
+	/**
+	 * @acv_perfmode_ddr_freq: Vote perfmode when DDR frequency >= acv_perfmode_ddr_freq.
+	 * If not specified, vote perfmode for highest DDR level only.
+	 */
+	u32 acv_perfmode_ddr_freq;
+	/** @acv_perfmode_vote: ACV vote for GPU perfmode */
+	u32 acv_perfmode_vote;
+	/** @rt_bus_hint: IB level hint for real time clients i.e. RB-0 */
+	const u32 rt_bus_hint;
+	/** @fast_bus_hint: Whether or not to increase IB vote on high ddr stall */
+	bool fast_bus_hint;
+	/** @noc_timeout_us: GPU config NOC port timeout in usec */
+	u32 noc_timeout_us;
+};
+
+/**
+ * struct gen7_cp_preemption_record - CP context record for
+ * preemption.
+ * @magic: (00) Value at this offset must be equal to
+ * GEN7_CP_CTXRECORD_MAGIC_REF.
+ * @info: (04) Type of record. Written non-zero (usually) by CP.
+ * we must set to zero for all ringbuffers.
+ * @errno: (08) Error code. Initialize this to GEN7_CP_CTXRECORD_ERROR_NONE.
+ * CP will update to another value if a preemption error occurs.
+ * @data: (12) DATA field in YIELD and SET_MARKER packets.
+ * Written by CP when switching out. Not used on switch-in. Initialized to 0.
+ * @cntl: (16) RB_CNTL, saved and restored by CP. We must initialize this.
+ * @rptr: (20) RB_RPTR, saved and restored by CP. We must initialize this.
+ * @wptr: (24) RB_WPTR, saved and restored by CP. We must initialize this.
+ * @_pad28: (28) Reserved/padding.
+ * @rptr_addr: (32) RB_RPTR_ADDR_LO|HI saved and restored. We must initialize.
+ * rbase: (40) RB_BASE_LO|HI saved and restored.
+ * counter: (48) Pointer to preemption counter.
+ * @bv_rptr_addr: (56) BV_RB_RPTR_ADDR_LO|HI save and restored. We must initialize.
+ */
+struct gen7_cp_preemption_record {
+	u32 magic;
+	u32 info;
+	u32 errno;
+	u32 data;
+	u32 cntl;
+	u32 rptr;
+	u32 wptr;
+	u32 _pad28;
+	u64 rptr_addr;
+	u64 rbase;
+	u64 counter;
+	u64 bv_rptr_addr;
+};
+
+/**
+ * struct gen7_cp_smmu_info - CP preemption SMMU info.
+ * @magic: (00) The value at this offset must be equal to
+ * GEN7_CP_SMMU_INFO_MAGIC_REF
+ * @_pad4: (04) Reserved/padding
+ * @ttbr0: (08) Base address of the page table for the * incoming context
+ * @asid: (16) Address Space IDentifier (ASID) of the incoming context
+ * @context_idr: (20) Context Identification Register value
+ * @context_bank: (24) Which Context Bank in SMMU to update
+ */
+struct gen7_cp_smmu_info {
+	u32 magic;
+	u32 _pad4;
+	u64 ttbr0;
+	u32 asid;
+	u32 context_idr;
+	u32 context_bank;
+};
+
+#define GEN7_CP_SMMU_INFO_MAGIC_REF		0x241350d5UL
+
+#define GEN7_CP_CTXRECORD_MAGIC_REF		0xae399d6eUL
+/* Size of each CP preemption record */
+#define GEN7_CP_CTXRECORD_SIZE_IN_BYTES		(4192 * 1024)
+/* Size of the user context record block (in bytes) */
+#define GEN7_CP_CTXRECORD_USER_RESTORE_SIZE	(192 * 1024)
+/* Size of the performance counter save/restore block (in bytes) */
+#define GEN7_CP_PERFCOUNTER_SAVE_RESTORE_SIZE	(4 * 1024)
+
+#define GEN7_CP_RB_CNTL_DEFAULT \
+	(FIELD_PREP(GENMASK(7, 0), ilog2(KGSL_RB_DWORDS >> 1)) | \
+	 FIELD_PREP(GENMASK(12, 8), ilog2(4)))
+
+/* Size of the CP_INIT pm4 stream in dwords */
+#define GEN7_CP_INIT_DWORDS 10
+
+#define GEN7_INT_MASK \
+	((1 << GEN7_INT_AHBERROR) |			\
+	 (1 << GEN7_INT_ATBASYNCFIFOOVERFLOW) |		\
+	 (1 << GEN7_INT_GPCERROR) |			\
+	 (1 << GEN7_INT_SWINTERRUPT) |			\
+	 (1 << GEN7_INT_HWERROR) |			\
+	 (1 << GEN7_INT_PM4CPINTERRUPT) |		\
+	 (1 << GEN7_INT_RB_DONE_TS) |			\
+	 (1 << GEN7_INT_CACHE_CLEAN_TS) |		\
+	 (1 << GEN7_INT_ATBBUSOVERFLOW) |		\
+	 (1 << GEN7_INT_HANGDETECTINTERRUPT) |		\
+	 (1 << GEN7_INT_OUTOFBOUNDACCESS) |		\
+	 (1 << GEN7_INT_UCHETRAPINTERRUPT) |		\
+	 (1 << GEN7_INT_TSBWRITEERROR) |		\
+	 (1 << GEN7_INT_SWFUSEVIOLATION))
+
+#define GEN7_HWSCHED_INT_MASK \
+	((1 << GEN7_INT_AHBERROR) |			\
+	 (1 << GEN7_INT_ATBASYNCFIFOOVERFLOW) |		\
+	 (1 << GEN7_INT_ATBBUSOVERFLOW) |		\
+	 (1 << GEN7_INT_OUTOFBOUNDACCESS) |		\
+	 (1 << GEN7_INT_UCHETRAPINTERRUPT))
+
+/**
+ * to_gen7_core - return the gen7 specific GPU core struct
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Returns:
+ * A pointer to the gen7 specific GPU core struct
+ */
+static inline const struct adreno_gen7_core *
+to_gen7_core(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpu_core *core = adreno_dev->gpucore;
+
+	return container_of(core, struct adreno_gen7_core, base);
+}
+
+/* Preemption functions */
+void gen7_preemption_trigger(struct adreno_device *adreno_dev, bool atomic);
+void gen7_preemption_schedule(struct adreno_device *adreno_dev);
+void gen7_preemption_start(struct adreno_device *adreno_dev);
+int gen7_preemption_init(struct adreno_device *adreno_dev);
+
+u32 gen7_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+		unsigned int *cmds);
+u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds);
+
+unsigned int gen7_set_marker(unsigned int *cmds,
+		enum adreno_cp_marker_type type);
+
+void gen7_preemption_callback(struct adreno_device *adreno_dev, int bit);
+
+int gen7_preemption_context_init(struct kgsl_context *context);
+
+void gen7_preemption_context_destroy(struct kgsl_context *context);
+
+void gen7_preemption_prepare_postamble(struct adreno_device *adreno_dev);
+
+void gen7_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+void gen7_crashdump_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_snapshot_external_core_regs - Dump external registers into snapshot
+ * @device: Pointer to KGSL device
+ * @snapshot: Pointer to the snapshot
+ *
+ * Dump external core registers like GPUCC, CPR into GPU snapshot.
+ */
+void gen7_snapshot_external_core_regs(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot);
+
+/**
+ * gen7_start - Program gen7 registers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does all gen7 register programming every
+ * time we boot the gpu
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_init - Initialize gen7 resources
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does gen7 specific one time initialization
+ * and is invoked when the very first client opens a
+ * kgsl instance
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int gen7_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_cx_timer_init - Initialize the CX timer on Gen7 devices
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Synchronize the GPU CX timer (if we have one) with the CPU timer
+ */
+void gen7_cx_timer_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_get_gpu_feature_info - Get hardware supported feature info
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Get HW supported feature info and update sofware feature configuration
+ */
+void gen7_get_gpu_feature_info(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_rb_start - Gen7 specific ringbuffer setup
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does gen7 specific ringbuffer setup and
+ * attempts to submit CP INIT and bring GPU out of secure mode
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int gen7_rb_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_microcode_read - Get the cp microcode from the filesystem
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function gets the firmware from filesystem and sets up
+ * the micorocode global buffer
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int gen7_microcode_read(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_probe_common - Probe common gen7 resources
+ * @pdev: Pointer to the platform device
+ * @adreno_dev: Pointer to the adreno device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore strucure
+ *
+ * This function sets up the gen7 resources common across all
+ * gen7 targets
+ */
+int gen7_probe_common(struct platform_device *pdev,
+	struct adreno_device *adreno_dev, u32 chipid,
+	const struct adreno_gpu_core *gpucore);
+
+/**
+ * gen7_hw_isidle - Check whether gen7 gpu is idle or not
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Return: True if gpu is idle, otherwise false
+ */
+bool gen7_hw_isidle(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_spin_idle_debug - Debug logging used when gpu fails to idle
+ * @adreno_dev: An Adreno GPU handle
+ * @str: String describing the failure
+ *
+ * This function logs interesting registers and triggers a snapshot
+ */
+void gen7_spin_idle_debug(struct adreno_device *adreno_dev,
+	const char *str);
+
+/**
+ * gen7_perfcounter_update - Update the IFPC perfcounter list
+ * @adreno_dev: An Adreno GPU handle
+ * @reg: Perfcounter reg struct to add/remove to the list
+ * @update_reg: true if the perfcounter needs to be programmed by the CPU
+ * @pipe: pipe id for CP aperture control
+ * @flags: Flags set for requested perfcounter group
+ *
+ * Return: 0 on success or -EBUSY if the lock couldn't be taken
+ */
+int gen7_perfcounter_update(struct adreno_device *adreno_dev,
+	struct adreno_perfcount_register *reg, bool update_reg, u32 pipe,
+	unsigned long flags);
+
+/*
+ * gen7_ringbuffer_init - Initialize the ringbuffers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Initialize the ringbuffer(s) for a5xx.
+ * Return: 0 on success or negative on failure
+ */
+int gen7_ringbuffer_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_ringbuffer_submitcmd - Submit a user command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @cmdobj: Pointer to a user command object
+ * @flags: Internal submit flags
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time);
+
+/**
+ * gen7_ringbuffer_submit - Submit a command to the ringbuffer
+ * @rb: Ringbuffer pointer
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time);
+
+/**
+ * gen7_fenced_write - Write to a fenced register
+ * @adreno_dev: An Adreno GPU handle
+ * @offset: Register offset
+ * @value: Value to write
+ * @mask: Expected FENCE_STATUS for successful write
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_fenced_write(struct adreno_device *adreno_dev, u32 offset,
+		u32 value, u32 mask);
+
+/**
+ * gen77ringbuffer_addcmds - Wrap and submit commands to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @rb: Ringbuffer pointer
+ * @drawctxt: Draw context submitting the commands
+ * @flags: Submission flags
+ * @in: Input buffer to write to ringbuffer
+ * @dwords: Dword length of @in
+ * @timestamp: Draw context timestamp for the submission
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time);
+
+/**
+ * gen7_cp_init_cmds - Create the CP_INIT commands
+ * @adreno_dev: An Adreno GPU handle
+ * @cmd: Buffer to write the CP_INIT commands into
+ */
+void gen7_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds);
+
+/**
+ * gen7_gmu_hfi_probe - Probe Gen7 HFI specific data
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_gmu_hfi_probe(struct adreno_device *adreno_dev);
+
+static inline const struct gen7_gpudev *
+to_gen7_gpudev(const struct adreno_gpudev *gpudev)
+{
+	return container_of(gpudev, struct gen7_gpudev, base);
+}
+
+/**
+ * gen7_reset_preempt_records - Reset the preemption buffers
+ * @adreno_dev: Handle to the adreno device
+ *
+ * Reset the preemption records at the time of hard reset
+ */
+void gen7_reset_preempt_records(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_enable_ahb_timeout_detection - Program AHB control registers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Program AHB control registers to enable AHB timeout detection.
+ */
+void gen7_enable_ahb_timeout_detection(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_rdpm_mx_freq_update - Update the mx frequency
+ * @gmu: An Adreno GMU handle
+ * @freq: Frequency in KHz
+ *
+ * This function communicates GPU mx frequency(in Mhz) changes to rdpm.
+ */
+void gen7_rdpm_mx_freq_update(struct gen7_gmu_device *gmu, u32 freq);
+
+/**
+ * gen7_rdpm_cx_freq_update - Update the cx frequency
+ * @gmu: An Adreno GMU handle
+ * @freq: Frequency in KHz
+ *
+ * This function communicates GPU cx frequency(in Mhz) changes to rdpm.
+ */
+void gen7_rdpm_cx_freq_update(struct gen7_gmu_device *gmu, u32 freq);
+
+/**
+ * gen7_scm_gpu_init_cx_regs - Program gpu regs for feature support
+ * @adreno_dev: Handle to the adreno device
+ *
+ * Program gpu regs for feature support. Scm call for the same
+ * is added from kernel version 6.0 onwards.
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_scm_gpu_init_cx_regs(struct adreno_device *adreno_dev);
+
+#ifdef CONFIG_QCOM_KGSL_CORESIGHT
+void gen7_coresight_init(struct adreno_device *device);
+#else
+static inline void gen7_coresight_init(struct adreno_device *device) { }
+#endif
+
+#endif

+ 927 - 0
qcom/opensource/graphics-kernel/adreno_gen7_0_0_snapshot.h

@@ -0,0 +1,927 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN7_0_0_SNAPSHOT_H
+#define __ADRENO_GEN7_0_0_SNAPSHOT_H
+
+#include "adreno_gen7_snapshot.h"
+
+static const u32 gen7_0_0_debugbus_blocks[] = {
+	DEBUGBUS_CP_0_0,
+	DEBUGBUS_CP_0_1,
+	DEBUGBUS_RBBM,
+	DEBUGBUS_HLSQ,
+	DEBUGBUS_UCHE_0,
+	DEBUGBUS_TESS_BR,
+	DEBUGBUS_TESS_BV,
+	DEBUGBUS_PC_BR,
+	DEBUGBUS_PC_BV,
+	DEBUGBUS_VFDP_BR,
+	DEBUGBUS_VFDP_BV,
+	DEBUGBUS_VPC_BR,
+	DEBUGBUS_VPC_BV,
+	DEBUGBUS_TSE_BR,
+	DEBUGBUS_TSE_BV,
+	DEBUGBUS_RAS_BR,
+	DEBUGBUS_RAS_BV,
+	DEBUGBUS_VSC,
+	DEBUGBUS_COM_0,
+	DEBUGBUS_LRZ_BR,
+	DEBUGBUS_LRZ_BV,
+	DEBUGBUS_UFC_0,
+	DEBUGBUS_UFC_1,
+	DEBUGBUS_GMU_GX,
+	DEBUGBUS_DBGC,
+	DEBUGBUS_GPC_BR,
+	DEBUGBUS_GPC_BV,
+	DEBUGBUS_LARC,
+	DEBUGBUS_HLSQ_SPTP,
+	DEBUGBUS_RB_0,
+	DEBUGBUS_RB_1,
+	DEBUGBUS_RB_2,
+	DEBUGBUS_RB_3,
+	DEBUGBUS_UCHE_WRAPPER,
+	DEBUGBUS_CCU_0,
+	DEBUGBUS_CCU_1,
+	DEBUGBUS_CCU_2,
+	DEBUGBUS_CCU_3,
+	DEBUGBUS_VFD_BR_0,
+	DEBUGBUS_VFD_BR_1,
+	DEBUGBUS_VFD_BR_2,
+	DEBUGBUS_VFD_BR_3,
+	DEBUGBUS_VFD_BR_4,
+	DEBUGBUS_VFD_BR_5,
+	DEBUGBUS_VFD_BR_6,
+	DEBUGBUS_VFD_BR_7,
+	DEBUGBUS_VFD_BV_0,
+	DEBUGBUS_VFD_BV_1,
+	DEBUGBUS_VFD_BV_2,
+	DEBUGBUS_VFD_BV_3,
+	DEBUGBUS_USP_0,
+	DEBUGBUS_USP_1,
+	DEBUGBUS_USP_2,
+	DEBUGBUS_USP_3,
+	DEBUGBUS_TP_0,
+	DEBUGBUS_TP_1,
+	DEBUGBUS_TP_2,
+	DEBUGBUS_TP_3,
+	DEBUGBUS_TP_4,
+	DEBUGBUS_TP_5,
+	DEBUGBUS_TP_6,
+	DEBUGBUS_TP_7,
+	DEBUGBUS_USPTP_0,
+	DEBUGBUS_USPTP_1,
+	DEBUGBUS_USPTP_2,
+	DEBUGBUS_USPTP_3,
+	DEBUGBUS_USPTP_4,
+	DEBUGBUS_USPTP_5,
+	DEBUGBUS_USPTP_6,
+	DEBUGBUS_USPTP_7,
+};
+
+static struct gen7_shader_block gen7_0_0_shader_blocks[] = {
+	{TP0_TMO_DATA,                 0x200, 4, 2, PIPE_BR, USPTP},
+	{TP0_SMO_DATA,                  0x80, 4, 2, PIPE_BR, USPTP},
+	{TP0_MIPMAP_BASE_DATA,         0x3c0, 4, 2, PIPE_BR, USPTP},
+	{SP_INST_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_INST_DATA_1,               0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_0_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_1_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_2_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_3_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_4_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_5_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_6_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_7_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_CB_RAM,                    0x390, 4, 2, PIPE_BR, USPTP,},
+	{SP_INST_TAG,                   0x90, 4, 2, PIPE_BR, USPTP},
+	{SP_INST_DATA_2,               0x200, 4, 2, PIPE_BR, USPTP},
+	{SP_TMO_TAG,                    0x80, 4, 2, PIPE_BR, USPTP},
+	{SP_SMO_TAG,                    0x80, 4, 2, PIPE_BR, USPTP},
+	{SP_STATE_DATA,                 0x40, 4, 2, PIPE_BR, USPTP},
+	{SP_HWAVE_RAM,                 0x100, 4, 2, PIPE_BR, USPTP},
+	{SP_L0_INST_BUF,                0x50, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_8_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_9_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_10_DATA,                0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_11_DATA,                0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_12_DATA,                0x200, 4, 2, PIPE_BR, USPTP},
+	{HLSQ_CVS_BE_CTXT_BUF_RAM_TAG,  0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CVS_BE_CTXT_BUF_RAM_TAG,  0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CPS_BE_CTXT_BUF_RAM_TAG,  0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM,           0x1c0, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM,           0x1c0, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM,           0x300, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM,           0x300, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM_TAG,        0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM_TAG,        0x40, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM_TAG,        0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM_TAG,        0x40, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_ICB_CVS_CB_BASE_TAG,      0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_ICB_CVS_CB_BASE_TAG,      0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_ICB_CPS_CB_BASE_TAG,      0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_ICB_CPS_CB_BASE_TAG,      0x10, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM,            0x280, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM,            0x280, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM,            0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM,            0x800, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM_1,          0x200, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_INST_RAM,                0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_INST_RAM,                0x800, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_INST_RAM,                0x800, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM,       0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM,       0x800, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM,       0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM,       0x800, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_INST_RAM_TAG,             0x80, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_INST_RAM_TAG,             0x80, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_INST_RAM_TAG,             0x80, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM_TAG,    0x64, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM_TAG,    0x64, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM_TAG,    0x64, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM_TAG,    0x64, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_INST_RAM_1,              0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_STPROC_META,              0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BV_BE_META,               0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BV_BE_META,               0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_DATAPATH_META,            0x20, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_FRONTEND_META,            0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_FRONTEND_META,            0x40, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_FRONTEND_META,            0x40, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_INDIRECT_META,            0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BACKEND_META,             0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BACKEND_META,             0x40, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_BACKEND_META,             0x40, 1, 1, PIPE_LPAC, HLSQ_STATE},
+};
+
+static const u32 gen7_0_0_pre_crashdumper_gpu_registers[] = {
+	0x00210, 0x00210, 0x00212, 0x00213, 0x03c00, 0x03c0b, 0x03c40, 0x03c42,
+	0x03c45, 0x03c47, 0x03c49, 0x03c4a, 0x03cc0, 0x03cd1,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_pre_crashdumper_gpu_registers), 8));
+
+static const u32 gen7_0_0_post_crashdumper_registers[] = {
+	0x00535, 0x00535, 0x0f400, 0x0f400, 0x0f800, 0x0f803, 0x0fc00, 0x0fc01,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_post_crashdumper_registers), 8));
+
+static const u32 gen7_0_0_gpu_registers[] = {
+	0x00000, 0x00000, 0x00002, 0x00002, 0x00011, 0x00012, 0x00016, 0x0001b,
+	0x0001f, 0x00032, 0x00038, 0x0003c, 0x00042, 0x00042, 0x00044, 0x00044,
+	0x00047, 0x00047, 0x00049, 0x0004a, 0x0004c, 0x0004c, 0x00050, 0x00050,
+	0x00056, 0x00056, 0x00073, 0x00075, 0x000ad, 0x000ae, 0x000b0, 0x000b0,
+	0x000b4, 0x000b4, 0x000b8, 0x000b8, 0x000bc, 0x000bc, 0x000c0, 0x000c0,
+	0x000c4, 0x000c4, 0x000c8, 0x000c8, 0x000cc, 0x000cc, 0x000d0, 0x000d0,
+	0x000d4, 0x000d4, 0x000d8, 0x000d8, 0x000dc, 0x000dc, 0x000e0, 0x000e0,
+	0x000e4, 0x000e4, 0x000e8, 0x000e8, 0x000ec, 0x000ec, 0x000f0, 0x000f0,
+	0x000f4, 0x000f4, 0x000f8, 0x000f8, 0x00100, 0x00100, 0x00104, 0x0010b,
+	0x0010f, 0x0011d, 0x0012f, 0x0012f, 0x00200, 0x0020d, 0x00211, 0x00211,
+	0x00215, 0x00243, 0x00260, 0x00268, 0x00272, 0x00274, 0x00281, 0x0028d,
+	0x00300, 0x00401, 0x00410, 0x00451, 0x00460, 0x004a3, 0x004c0, 0x004d1,
+	0x00500, 0x00500, 0x00507, 0x0050b, 0x0050f, 0x0050f, 0x00511, 0x00511,
+	0x00533, 0x00534, 0x00536, 0x00536, 0x00540, 0x00555, 0x00564, 0x00567,
+	0x00574, 0x00577, 0x005fb, 0x005ff, 0x00800, 0x00808, 0x00810, 0x00813,
+	0x00820, 0x00821, 0x00823, 0x00827, 0x00830, 0x00834, 0x0083f, 0x00841,
+	0x00843, 0x00847, 0x0084f, 0x00886, 0x008a0, 0x008ab, 0x008c0, 0x008c0,
+	0x008c4, 0x008c5, 0x008d0, 0x008dd, 0x008e0, 0x008e6, 0x008f0, 0x008f3,
+	0x00900, 0x00903, 0x00908, 0x00911, 0x00928, 0x0093e, 0x00942, 0x0094d,
+	0x00980, 0x00984, 0x0098d, 0x0098f, 0x009b0, 0x009b4, 0x009c2, 0x009c9,
+	0x009ce, 0x009d7, 0x009e0, 0x009e7, 0x00a00, 0x00a00, 0x00a02, 0x00a03,
+	0x00a10, 0x00a4f, 0x00a61, 0x00a9f, 0x00ad0, 0x00adb, 0x00b00, 0x00b31,
+	0x00b35, 0x00b3c, 0x00b40, 0x00b40, 0x00c00, 0x00c00, 0x00c02, 0x00c04,
+	0x00c06, 0x00c06, 0x00c10, 0x00cd9, 0x00ce0, 0x00d0c, 0x00df0, 0x00df4,
+	0x00e01, 0x00e02, 0x00e07, 0x00e0e, 0x00e10, 0x00e13, 0x00e17, 0x00e19,
+	0x00e1b, 0x00e2b, 0x00e30, 0x00e32, 0x00e38, 0x00e3c,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_gpu_registers), 8));
+
+static const u32 gen7_0_0_gmu_registers[] = {
+	0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403,
+	0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03,
+	0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403,
+	0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03,
+	0x1f400, 0x1f40d, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507,
+	0x1f509, 0x1f50b, 0x1f800, 0x1f804, 0x1f807, 0x1f808, 0x1f80b, 0x1f80c,
+	0x1f80f, 0x1f80f, 0x1f811, 0x1f811, 0x1f813, 0x1f817, 0x1f819, 0x1f81c,
+	0x1f824, 0x1f82a, 0x1f82d, 0x1f830, 0x1f840, 0x1f853, 0x1f860, 0x1f860,
+	0x1f870, 0x1f879, 0x1f87f, 0x1f87f, 0x1f888, 0x1f889, 0x1f8a0, 0x1f8a2,
+	0x1f8a4, 0x1f8af, 0x1f8c0, 0x1f8c1, 0x1f8c3, 0x1f8c4, 0x1f8d0, 0x1f8d0,
+	0x1f8ec, 0x1f8ec, 0x1f8f0, 0x1f8f1, 0x1f910, 0x1f914, 0x1f920, 0x1f921,
+	0x1f924, 0x1f925, 0x1f928, 0x1f929, 0x1f92c, 0x1f92d, 0x1f940, 0x1f940,
+	0x1f942, 0x1f944, 0x1f948, 0x1f94a, 0x1f94f, 0x1f951, 0x1f958, 0x1f95a,
+	0x1f95d, 0x1f95d, 0x1f962, 0x1f962, 0x1f964, 0x1f96b, 0x1f970, 0x1f979,
+	0x1f980, 0x1f981, 0x1f984, 0x1f986, 0x1f992, 0x1f993, 0x1f996, 0x1f99e,
+	0x1f9c0, 0x1f9c0, 0x1f9c5, 0x1f9d4, 0x1f9f0, 0x1f9f1, 0x1f9f8, 0x1f9fa,
+	0x1fa00, 0x1fa03, 0x20000, 0x20005, 0x20008, 0x2000c, 0x20010, 0x20012,
+	0x20018, 0x20018, 0x20020, 0x20023, 0x20030, 0x20031, 0x23801, 0x23801,
+	0x23803, 0x23803, 0x23805, 0x23805, 0x23807, 0x23807, 0x23809, 0x23809,
+	0x2380b, 0x2380b, 0x2380d, 0x2380d, 0x2380f, 0x2380f, 0x23811, 0x23811,
+	0x23813, 0x23813, 0x23815, 0x23815, 0x23817, 0x23817, 0x23819, 0x23819,
+	0x2381b, 0x2381b, 0x2381d, 0x2381d, 0x2381f, 0x23820, 0x23822, 0x23822,
+	0x23824, 0x23824, 0x23826, 0x23826, 0x23828, 0x23828, 0x2382a, 0x2382a,
+	0x2382c, 0x2382c, 0x2382e, 0x2382e, 0x23830, 0x23830, 0x23832, 0x23832,
+	0x23834, 0x23834, 0x23836, 0x23836, 0x23838, 0x23838, 0x2383a, 0x2383a,
+	0x2383c, 0x2383c, 0x2383e, 0x2383e, 0x23840, 0x23847, 0x23b00, 0x23b01,
+	0x23b03, 0x23b03, 0x23b05, 0x23b0e, 0x23b10, 0x23b13, 0x23b15, 0x23b16,
+	0x23b20, 0x23b20, 0x23b28, 0x23b28, 0x23b30, 0x23b30,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_gmu_registers), 8));
+
+static const u32 gen7_0_0_gmugx_registers[] = {
+	0x1a400, 0x1a41f, 0x1a440, 0x1a45f, 0x1a480, 0x1a49f, 0x1a4c0, 0x1a4df,
+	0x1a500, 0x1a51f, 0x1a540, 0x1a55f, 0x1a580, 0x1a59f, 0x1a5c0, 0x1a5df,
+	0x1a780, 0x1a781, 0x1a783, 0x1a785, 0x1a787, 0x1a789, 0x1a78b, 0x1a78d,
+	0x1a78f, 0x1a791, 0x1a793, 0x1a795, 0x1a797, 0x1a799, 0x1a79b, 0x1a79b,
+	0x1a7c0, 0x1a7c1, 0x1a7c4, 0x1a7c5, 0x1a7c8, 0x1a7c9, 0x1a7cc, 0x1a7cd,
+	0x1a7d0, 0x1a7d1, 0x1a7d4, 0x1a7d5, 0x1a7d8, 0x1a7d9, 0x1a7fc, 0x1a7fd,
+	0x1a800, 0x1a802, 0x1a804, 0x1a804, 0x1a816, 0x1a816, 0x1a81e, 0x1a81e,
+	0x1a826, 0x1a826, 0x1a82e, 0x1a82e, 0x1a836, 0x1a836, 0x1a83e, 0x1a83e,
+	0x1a846, 0x1a846, 0x1a860, 0x1a862, 0x1a864, 0x1a867, 0x1a870, 0x1a870,
+	0x1a883, 0x1a884, 0x1a8c0, 0x1a8c2, 0x1a8c4, 0x1a8c7, 0x1a8d0, 0x1a8d3,
+	0x1a900, 0x1a92b, 0x1a940, 0x1a940,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_gmugx_registers), 8));
+
+static const u32 gen7_0_0_noncontext_pipe_br_registers[] = {
+	0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b,
+	0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640,
+	0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a,
+	0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16,
+	0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31,
+	0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79,
+	0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f,
+	0x0a630, 0x0a631, 0x0a638, 0x0a638,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_pipe_br_registers), 8));
+
+static const u32 gen7_0_0_noncontext_pipe_bv_registers[] = {
+	0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b,
+	0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640,
+	0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a,
+	0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16,
+	0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31,
+	0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79,
+	0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f,
+	0x0a630, 0x0a631, 0x0a638, 0x0a638,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_pipe_bv_registers), 8));
+
+static const u32 gen7_0_0_noncontext_pipe_lpac_registers[] = {
+	0x00887, 0x0088c, 0x00f80, 0x00f80,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_pipe_lpac_registers), 8));
+
+static const u32 gen7_0_0_noncontext_rb_rac_pipe_br_registers[] = {
+	0x08e10, 0x08e1c, 0x08e20, 0x08e25, 0x08e51, 0x08e5a,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_rb_rac_pipe_br_registers), 8));
+
+static const u32 gen7_0_0_noncontext_rb_rbp_pipe_br_registers[] = {
+	0x08e01, 0x08e01, 0x08e04, 0x08e04, 0x08e06, 0x08e09, 0x08e0c, 0x08e0c,
+	0x08e28, 0x08e28, 0x08e2c, 0x08e35, 0x08e3b, 0x08e3f, 0x08e50, 0x08e50,
+	0x08e5b, 0x08e5d, 0x08e5f, 0x08e5f, 0x08e61, 0x08e61, 0x08e63, 0x08e65,
+	0x08e68, 0x08e68, 0x08e70, 0x08e79, 0x08e80, 0x08e8f,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_rb_rbp_pipe_br_registers), 8));
+
+/* Block: GRAS Cluster: CLUSTER_GRAS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_gras_cluster_gras_pipe_br_registers[] = {
+	0x08000, 0x08008, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d,
+	0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa,
+	0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08110, 0x08120, 0x0813f,
+	0x08400, 0x08406, 0x0840a, 0x0840b,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_gras_cluster_gras_pipe_br_registers), 8));
+
+/* Block: GRAS Cluster: CLUSTER_GRAS Pipeline: PIPE_BV */
+static const u32 gen7_0_0_gras_cluster_gras_pipe_bv_registers[] = {
+	0x08000, 0x08008, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d,
+	0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa,
+	0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08110, 0x08120, 0x0813f,
+	0x08400, 0x08406, 0x0840a, 0x0840b,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_gras_cluster_gras_pipe_bv_registers), 8));
+
+/* Block: PC Cluster: CLUSTER_FE Pipeline: PIPE_BR */
+static const u32 gen7_0_0_pc_cluster_fe_pipe_br_registers[] = {
+	0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886,
+	0x09b00, 0x09b08,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_pc_cluster_fe_pipe_br_registers), 8));
+
+/* Block: PC Cluster: CLUSTER_FE Pipeline: PIPE_BV */
+static const u32 gen7_0_0_pc_cluster_fe_pipe_bv_registers[] = {
+	0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886,
+	0x09b00, 0x09b08,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_pc_cluster_fe_pipe_bv_registers), 8));
+
+/* Block: RB_RAC Cluster: CLUSTER_PS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_rb_rac_cluster_ps_pipe_br_registers[] = {
+	0x08802, 0x08802, 0x08804, 0x08806, 0x08809, 0x0880a, 0x0880e, 0x08811,
+	0x08818, 0x0881e, 0x08821, 0x08821, 0x08823, 0x08826, 0x08829, 0x08829,
+	0x0882b, 0x0882e, 0x08831, 0x08831, 0x08833, 0x08836, 0x08839, 0x08839,
+	0x0883b, 0x0883e, 0x08841, 0x08841, 0x08843, 0x08846, 0x08849, 0x08849,
+	0x0884b, 0x0884e, 0x08851, 0x08851, 0x08853, 0x08856, 0x08859, 0x08859,
+	0x0885b, 0x0885e, 0x08860, 0x08864, 0x08870, 0x08870, 0x08873, 0x08876,
+	0x08878, 0x08879, 0x08882, 0x08885, 0x08887, 0x08889, 0x08891, 0x08891,
+	0x08898, 0x08898, 0x088c0, 0x088c1, 0x088e5, 0x088e5, 0x088f4, 0x088f5,
+	0x08a00, 0x08a05, 0x08a10, 0x08a15, 0x08a20, 0x08a25, 0x08a30, 0x08a35,
+	0x08c00, 0x08c01, 0x08c18, 0x08c1f, 0x08c26, 0x08c34,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_rb_rac_cluster_ps_pipe_br_registers), 8));
+
+/* Block: RB_RBP Cluster: CLUSTER_PS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers[] = {
+	0x08800, 0x08801, 0x08803, 0x08803, 0x0880b, 0x0880d, 0x08812, 0x08812,
+	0x08820, 0x08820, 0x08822, 0x08822, 0x08827, 0x08828, 0x0882a, 0x0882a,
+	0x0882f, 0x08830, 0x08832, 0x08832, 0x08837, 0x08838, 0x0883a, 0x0883a,
+	0x0883f, 0x08840, 0x08842, 0x08842, 0x08847, 0x08848, 0x0884a, 0x0884a,
+	0x0884f, 0x08850, 0x08852, 0x08852, 0x08857, 0x08858, 0x0885a, 0x0885a,
+	0x0885f, 0x0885f, 0x08865, 0x08865, 0x08871, 0x08872, 0x08877, 0x08877,
+	0x08880, 0x08881, 0x08886, 0x08886, 0x08890, 0x08890, 0x088d0, 0x088e4,
+	0x088e8, 0x088ea, 0x088f0, 0x088f0, 0x08900, 0x0891a, 0x08927, 0x08928,
+	0x08c17, 0x08c17, 0x08c20, 0x08c25,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers[] = {
+	0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a99e, 0x0a9a7, 0x0a9a7,
+	0x0a9aa, 0x0a9aa, 0x0a9ae, 0x0a9b0, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9ba,
+	0x0a9bc, 0x0a9bc, 0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e0, 0x0a9fc,
+	0x0aa00, 0x0aa00, 0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab03,
+	0x0ab05, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers[] = {
+	0x0a9b0, 0x0a9b0, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc,
+	0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9fc,
+	0x0aa00, 0x0aa00, 0x0aa31, 0x0aa31, 0x0ab00, 0x0ab01,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: HLSQ_DP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers[] = {
+	0x0a9b1, 0x0a9b1, 0x0a9c6, 0x0a9cb, 0x0a9d4, 0x0a9df,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: HLSQ_DP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers[] = {
+	0x0a9b1, 0x0a9b1, 0x0a9d4, 0x0a9df,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: SP_TOP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers[] = {
+	0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a9a2, 0x0a9a7, 0x0a9a8,
+	0x0a9aa, 0x0a9aa, 0x0a9ae, 0x0a9ae, 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5,
+	0x0a9ba, 0x0a9bc, 0x0a9e0, 0x0a9f9, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00,
+	0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: SP_TOP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers[] = {
+	0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9bc, 0x0a9e2, 0x0a9e3,
+	0x0a9e6, 0x0a9f9, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: uSPTP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers[] = {
+	0x0a980, 0x0a982, 0x0a985, 0x0a9a6, 0x0a9a8, 0x0a9a9, 0x0a9ab, 0x0a9ae,
+	0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9bf, 0x0a9c2, 0x0a9c3,
+	0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf,
+	0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: uSPTP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers[] = {
+	0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9be, 0x0a9c2, 0x0a9c3,
+	0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa31, 0x0aa31, 0x0ab00, 0x0ab01,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers[] = {
+	0x0a800, 0x0a800, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a833, 0x0a835, 0x0a83a, 0x0a83a,
+	0x0a83c, 0x0a83c, 0x0a83f, 0x0a840, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a88c, 0x0a88e,
+	0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898, 0x0a89a, 0x0a89d,
+	0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05,
+	0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers[] = {
+	0x0a800, 0x0a800, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a833, 0x0a835, 0x0a83a, 0x0a83a,
+	0x0a83c, 0x0a83c, 0x0a83f, 0x0a840, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a88c, 0x0a88e,
+	0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898, 0x0a89a, 0x0a89d,
+	0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab02, 0x0ab0a, 0x0ab1b,
+	0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR Location: SP_TOP */
+static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers[] = {
+	0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a830, 0x0a831,
+	0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840, 0x0a85c, 0x0a85d,
+	0x0a862, 0x0a864, 0x0a870, 0x0a871, 0x0a88d, 0x0a88e, 0x0a893, 0x0a895,
+	0x0a8a0, 0x0a8af, 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05,
+	0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV Location: SP_TOP */
+static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers[] = {
+	0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a830, 0x0a831,
+	0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840, 0x0a85c, 0x0a85d,
+	0x0a862, 0x0a864, 0x0a870, 0x0a871, 0x0a88d, 0x0a88e, 0x0a893, 0x0a895,
+	0x0a8a0, 0x0a8af, 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab0a, 0x0ab1b,
+	0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR Location: uSPTP */
+static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers[] = {
+	0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a830, 0x0a833,
+	0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861, 0x0a863, 0x0a867,
+	0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a898, 0x0a8c0, 0x0a8c3,
+	0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV Location: uSPTP */
+static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers[] = {
+	0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a830, 0x0a833,
+	0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861, 0x0a863, 0x0a867,
+	0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a898, 0x0a8c0, 0x0a8c3,
+	0x0ab00, 0x0ab02, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers), 8));
+
+/* Block: TPL1 Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers[] = {
+	0x0b180, 0x0b183, 0x0b190, 0x0b195, 0x0b2c0, 0x0b2d5, 0x0b300, 0x0b307,
+	0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_bv_hlsq_state_registers[] = {
+	0x0ab00, 0x0ab02, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_bv_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV Location: SP_TOP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_bv_sp_top_registers[] = {
+	0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_bv_sp_top_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV Location: uSPTP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_bv_usptp_registers[] = {
+	0x0ab00, 0x0ab02, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_bv_usptp_registers), 8));
+
+/* Block: TPL1 Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV */
+static const u32 gen7_0_0_tpl1_cluster_sp_ps_pipe_bv_registers[] = {
+	0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_ps_pipe_bv_registers), 8));
+
+/* Block: TPL1 Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC */
+static const u32 gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers[] = {
+	0x0b180, 0x0b181, 0x0b300, 0x0b301, 0x0b307, 0x0b307, 0x0b309, 0x0b309,
+	0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers), 8));
+
+/* Block: TPL1 Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers[] = {
+	0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers), 8));
+
+/* Block: TPL1 Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV */
+static const u32 gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers[] = {
+	0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers), 8));
+
+/* Block: VFD Cluster: CLUSTER_FE Pipeline: PIPE_BR */
+static const u32 gen7_0_0_vfd_cluster_fe_pipe_br_registers[] = {
+	0x0a000, 0x0a009, 0x0a00e, 0x0a0ef,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vfd_cluster_fe_pipe_br_registers), 8));
+
+/* Block: VFD Cluster: CLUSTER_FE Pipeline: PIPE_BV */
+static const u32 gen7_0_0_vfd_cluster_fe_pipe_bv_registers[] = {
+	0x0a000, 0x0a009, 0x0a00e, 0x0a0ef,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vfd_cluster_fe_pipe_bv_registers), 8));
+
+/* Block: VPC Cluster: CLUSTER_FE Pipeline: PIPE_BR */
+static const u32 gen7_0_0_vpc_cluster_fe_pipe_br_registers[] = {
+	0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_fe_pipe_br_registers), 8));
+
+/* Block: VPC Cluster: CLUSTER_FE Pipeline: PIPE_BV */
+static const u32 gen7_0_0_vpc_cluster_fe_pipe_bv_registers[] = {
+	0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_fe_pipe_bv_registers), 8));
+
+/* Block: VPC Cluster: CLUSTER_PC_VS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers[] = {
+	0x09101, 0x0910c, 0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers), 8));
+
+/* Block: VPC Cluster: CLUSTER_PC_VS Pipeline: PIPE_BV */
+static const u32 gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers[] = {
+	0x09101, 0x0910c, 0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers), 8));
+
+/* Block: VPC Cluster: CLUSTER_VPC_PS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers[] = {
+	0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x09236, 0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers), 8));
+
+/* Block: VPC Cluster: CLUSTER_VPC_PS Pipeline: PIPE_BV */
+static const u32 gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers[] = {
+	0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x09236, 0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers), 8));
+
+/* Block: SP Cluster: noncontext Pipeline: PIPE_BR Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers[] = {
+	0x0ae52, 0x0ae52, 0x0ae60, 0x0ae67, 0x0ae69, 0x0ae73,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: noncontext Pipeline: PIPE_BR Location: SP_TOP */
+static const u32 gen7_0_0_sp_noncontext_pipe_br_sp_top_registers[] = {
+	0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae09, 0x0ae0c, 0x0ae0c,
+	0x0ae0f, 0x0ae0f, 0x0ae28, 0x0ae2b, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3f,
+	0x0ae50, 0x0ae52, 0x0ae80, 0x0aea3,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_br_sp_top_registers), 8));
+
+/* Block: SP Cluster: noncontext Pipeline: PIPE_BR Location: uSPTP */
+static const u32 gen7_0_0_sp_noncontext_pipe_br_usptp_registers[] = {
+	0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae09, 0x0ae0c, 0x0ae0c,
+	0x0ae0f, 0x0ae0f, 0x0ae30, 0x0ae32, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3b,
+	0x0ae3e, 0x0ae3f, 0x0ae50, 0x0ae52,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_br_usptp_registers), 8));
+
+/* Block: SP Cluster: noncontext Pipeline: PIPE_LPAC Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_noncontext_pipe_lpac_hlsq_state_registers[] = {
+	0x0af88, 0x0af8a,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: noncontext Pipeline: PIPE_LPAC Location: SP_TOP */
+static const u32 gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers[] = {
+	0x0af80, 0x0af84,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers), 8));
+
+/* Block: SP Cluster: noncontext Pipeline: PIPE_LPAC Location: uSPTP */
+static const u32 gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers[] = {
+	0x0af80, 0x0af84, 0x0af90, 0x0af92,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers), 8));
+
+/* Block: TPl1 Cluster: noncontext Pipeline: PIPE_BR */
+static const u32 gen7_0_0_tpl1_noncontext_pipe_br_registers[] = {
+	0x0b600, 0x0b600, 0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b608, 0x0b60c,
+	0x0b60f, 0x0b621, 0x0b630, 0x0b633,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_br_registers), 8));
+
+/* Block: TPl1 Cluster: noncontext Pipeline: PIPE_LPAC */
+static const u32 gen7_0_0_tpl1_noncontext_pipe_lpac_registers[] = {
+	0x0b780, 0x0b780,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_lpac_registers), 8));
+
+static const struct gen7_sel_reg  gen7_0_0_rb_rac_sel = {
+	.host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0x0,
+};
+
+static const struct gen7_sel_reg gen7_0_0_rb_rbp_sel = {
+	.host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0x9,
+};
+
+static struct gen7_cluster_registers gen7_0_0_clusters[] = {
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_0_0_noncontext_pipe_br_registers, },
+	{ CLUSTER_NONE, PIPE_BV, STATE_NON_CONTEXT,
+		gen7_0_0_noncontext_pipe_bv_registers, },
+	{ CLUSTER_NONE, PIPE_LPAC, STATE_NON_CONTEXT,
+		gen7_0_0_noncontext_pipe_lpac_registers, },
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_0_0_noncontext_rb_rac_pipe_br_registers, &gen7_0_0_rb_rac_sel, },
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_0_0_noncontext_rb_rbp_pipe_br_registers, &gen7_0_0_rb_rbp_sel, },
+	{ CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_gras_cluster_gras_pipe_br_registers, },
+	{ CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_gras_cluster_gras_pipe_bv_registers, },
+	{ CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_gras_cluster_gras_pipe_br_registers, },
+	{ CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_gras_cluster_gras_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_pc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_pc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_pc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_pc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rac_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rac_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rbp_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rbp_sel, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vfd_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vfd_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vfd_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vfd_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, },
+	{ CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers, },
+	{ CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, },
+	{ CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, },
+};
+
+static struct gen7_sptp_cluster_registers gen7_0_0_sptp_clusters[] = {
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, SP_TOP,
+		gen7_0_0_sp_noncontext_pipe_br_sp_top_registers, 0xae00 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_sp_noncontext_pipe_br_usptp_registers, 0xae00 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen7_0_0_sp_noncontext_pipe_lpac_hlsq_state_registers, 0xaf80 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, SP_TOP,
+		gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers, 0xaf80 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers, 0xaf80 },
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_tpl1_noncontext_pipe_br_registers, 0xb600 },
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_tpl1_noncontext_pipe_lpac_registers, 0xb780 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 },
+};
+
+static const u32 gen7_0_0_rscc_registers[] = {
+	0x14000, 0x14036, 0x14040, 0x14042, 0x14080, 0x14084, 0x14089, 0x1408c,
+	0x14091, 0x14094, 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac,
+	0x14100, 0x14102, 0x14114, 0x14119, 0x14124, 0x1412e, 0x14140, 0x14143,
+	0x14180, 0x14197, 0x14340, 0x14342, 0x14344, 0x14347, 0x1434c, 0x14373,
+	0x143ec, 0x143ef, 0x143f4, 0x1441b, 0x14494, 0x14497, 0x1449c, 0x144c3,
+	0x1453c, 0x1453f, 0x14544, 0x1456b, 0x145e4, 0x145e7, 0x145ec, 0x14613,
+	0x1468c, 0x1468f, 0x14694, 0x146bb, 0x14734, 0x14737, 0x1473c, 0x14763,
+	0x147dc, 0x147df, 0x147e4, 0x1480b, 0x14884, 0x14887, 0x1488c, 0x148b3,
+	0x1492c, 0x1492f, 0x14934, 0x1495b, 0x14f51, 0x14f54,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_rscc_registers), 8));
+
+static const u32 gen7_0_0_cpr_registers[] = {
+	0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c,
+	0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850,
+	0x26880, 0x26898, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee,
+	0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f,
+	0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274ac,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_cpr_registers), 8));
+
+static const u32 gen7_0_0_gpucc_registers[] = {
+	0x24000, 0x2400e, 0x24400, 0x2440e, 0x24800, 0x24805, 0x24c00, 0x24cff,
+	0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, 0x26400, 0x26405,
+	0x26414, 0x2641d, 0x2642a, 0x26430, 0x26432, 0x26432, 0x26441, 0x26455,
+	0x26466, 0x26468, 0x26478, 0x2647a, 0x26489, 0x2648a, 0x2649c, 0x2649e,
+	0x264a0, 0x264a3, 0x264b3, 0x264b5, 0x264c5, 0x264c7, 0x264d6, 0x264d8,
+	0x264e8, 0x264e9, 0x264f9, 0x264fc, 0x2650b, 0x2650c, 0x2651c, 0x2651e,
+	0x26540, 0x26570, 0x26600, 0x26616, 0x26620, 0x2662d,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_gpucc_registers), 8));
+
+static const u32 gen7_0_0_cx_misc_registers[] = {
+	0x27800, 0x27800, 0x27810, 0x27814, 0x27820, 0x27824, 0x27832, 0x27857,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_cx_misc_registers), 8));
+
+static const u32 gen7_0_0_dpm_registers[] = {
+	0x1aa00, 0x1aa06, 0x1aa09, 0x1aa0a, 0x1aa0c, 0x1aa0d, 0x1aa0f, 0x1aa12,
+	0x1aa14, 0x1aa47, 0x1aa50, 0x1aa51,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_dpm_registers), 8));
+
+static struct gen7_reg_list gen7_0_0_reg_list[] = {
+	{ gen7_0_0_gpu_registers, NULL },
+	{ gen7_0_0_dpm_registers, NULL },
+	{ NULL, NULL },
+};
+
+static const u32 *gen7_0_0_external_core_regs[] = {
+	gen7_0_0_gpucc_registers,
+	gen7_0_0_cpr_registers,
+};
+#endif /*_ADRENO_GEN7_0_0_SNAPSHOT_H */

+ 1276 - 0
qcom/opensource/graphics-kernel/adreno_gen7_11_0_snapshot.h

@@ -0,0 +1,1276 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN7_11_0_SNAPSHOT_H
+#define __ADRENO_GEN7_11_0_SNAPSHOT_H
+
+#include "adreno_gen7_snapshot.h"
+
+static const u32 gen7_11_0_debugbus_blocks[] = {
+	DEBUGBUS_CP_0_0,
+	DEBUGBUS_CP_0_1,
+	DEBUGBUS_RBBM,
+	DEBUGBUS_HLSQ,
+	DEBUGBUS_UCHE_0,
+	DEBUGBUS_TESS_BR,
+	DEBUGBUS_TESS_BV,
+	DEBUGBUS_PC_BR,
+	DEBUGBUS_PC_BV,
+	DEBUGBUS_VFDP_BR,
+	DEBUGBUS_VFDP_BV,
+	DEBUGBUS_VPC_BR,
+	DEBUGBUS_VPC_BV,
+	DEBUGBUS_TSE_BR,
+	DEBUGBUS_TSE_BV,
+	DEBUGBUS_RAS_BR,
+	DEBUGBUS_RAS_BV,
+	DEBUGBUS_VSC,
+	DEBUGBUS_COM_0,
+	DEBUGBUS_LRZ_BR,
+	DEBUGBUS_LRZ_BV,
+	DEBUGBUS_UFC_0,
+	DEBUGBUS_UFC_1,
+	DEBUGBUS_GMU_GX,
+	DEBUGBUS_DBGC,
+	DEBUGBUS_CX,
+	DEBUGBUS_GMU_CX,
+	DEBUGBUS_GPC_BR,
+	DEBUGBUS_GPC_BV,
+	DEBUGBUS_LARC,
+	DEBUGBUS_HLSQ_SPTP,
+	DEBUGBUS_RB_0,
+	DEBUGBUS_RB_1,
+	DEBUGBUS_RB_2,
+	DEBUGBUS_UCHE_WRAPPER,
+	DEBUGBUS_CCU_0,
+	DEBUGBUS_CCU_1,
+	DEBUGBUS_CCU_2,
+	DEBUGBUS_VFD_BR_0,
+	DEBUGBUS_VFD_BR_1,
+	DEBUGBUS_VFD_BR_2,
+	DEBUGBUS_VFD_BV_0,
+	DEBUGBUS_USP_0,
+	DEBUGBUS_USP_1,
+	DEBUGBUS_USP_2,
+	DEBUGBUS_TP_0,
+	DEBUGBUS_TP_1,
+	DEBUGBUS_TP_2,
+	DEBUGBUS_TP_3,
+	DEBUGBUS_TP_4,
+	DEBUGBUS_TP_5,
+	DEBUGBUS_USPTP_0,
+	DEBUGBUS_USPTP_1,
+	DEBUGBUS_USPTP_2,
+	DEBUGBUS_USPTP_3,
+	DEBUGBUS_USPTP_4,
+	DEBUGBUS_USPTP_5,
+};
+
+static const u32 gen7_11_0_gbif_debugbus_blocks[] = {
+	DEBUGBUS_GBIF_GX,
+};
+
+static const u32 gen7_11_0_cx_debugbus_blocks[] = {
+	DEBUGBUS_CX,
+	DEBUGBUS_GMU_CX,
+	DEBUGBUS_GBIF_CX,
+};
+
+static struct gen7_shader_block gen7_11_0_shader_blocks[] = {
+	{ TP0_TMO_DATA, 0x0200, 3, 2, PIPE_BR, USPTP },
+	{ TP0_SMO_DATA, 0x0080, 3, 2, PIPE_BR, USPTP },
+	{ TP0_MIPMAP_BASE_DATA, 0x03C0, 3, 2, PIPE_BR, USPTP },
+	{ SP_INST_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_INST_DATA_1, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_0_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_1_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_2_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_3_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_4_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_5_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_6_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_7_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_CB_RAM, 0x0390, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_13_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_14_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_INST_TAG, 0x00C0, 3, 2, PIPE_BR, USPTP },
+	{ SP_INST_DATA_2, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_TMO_TAG, 0x0080, 3, 2, PIPE_BR, USPTP },
+	{ SP_SMO_TAG, 0x0080, 3, 2, PIPE_BR, USPTP },
+	{ SP_STATE_DATA, 0x0040, 3, 2, PIPE_BR, USPTP },
+	{ SP_HWAVE_RAM, 0x0100, 3, 2, PIPE_BR, USPTP },
+	{ SP_L0_INST_BUF, 0x0050, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_8_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_9_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_10_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_11_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ SP_LB_12_DATA, 0x0800, 3, 2, PIPE_BR, USPTP },
+	{ HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0300, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_CHUNK_CPS_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CVS_MISC_RAM, 0x0280, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CVS_MISC_RAM, 0x0280, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_CPS_MISC_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CPS_MISC_RAM_1, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CVS_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CVS_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_CPS_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_INST_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_INST_RAM_TAG, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0064, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0038, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0064, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_INST_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_STPROC_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_BV_BE_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_BV_BE_META, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_DATAPATH_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_INDIRECT_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_BACKEND_META, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_BACKEND_META, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE },
+};
+
+/*
+ * Block   : ['GBIF']
+ * Pipeline: PIPE_NONE
+ * pairs   : 5 (Regs:38)
+ */
+static const u32 gen7_11_0_gbif_registers[] = {
+	0x03c00, 0x03c0b, 0x03c40, 0x03c42, 0x03c45, 0x03c47, 0x03c49, 0x03c4a,
+	0x03cc0, 0x03cd1,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_gbif_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'CP', 'GRAS', 'PC']
+ * Block   : ['RBBM', 'RDVM', 'UCHE']
+ * Block   : ['VFD', 'VPC', 'VSC']
+ * Pipeline: PIPE_NONE
+ * pairs   : 162 (Regs:1489)
+ */
+static const u32 gen7_11_0_gpu_registers[] = {
+	0x00000, 0x00000, 0x00002, 0x00002, 0x00011, 0x00012, 0x00016, 0x0001b,
+	0x0001f, 0x00032, 0x00038, 0x0003c, 0x00042, 0x00042, 0x00044, 0x00044,
+	0x00047, 0x00047, 0x00049, 0x0004a, 0x0004c, 0x0004c, 0x00050, 0x00050,
+	0x00056, 0x00056, 0x000ad, 0x000ae, 0x000b0, 0x000b0, 0x000b4, 0x000b4,
+	0x000b8, 0x000b8, 0x000bc, 0x000bc, 0x000c0, 0x000c0, 0x000c4, 0x000c4,
+	0x000c8, 0x000c8, 0x000cc, 0x000cc, 0x000d0, 0x000d0, 0x000d4, 0x000d4,
+	0x000d8, 0x000d8, 0x000dc, 0x000dc, 0x000e0, 0x000e0, 0x000e4, 0x000e4,
+	0x000e8, 0x000e8, 0x000ec, 0x000ec, 0x000f0, 0x000f0, 0x000f4, 0x000f4,
+	0x000f8, 0x000f8, 0x00100, 0x00100, 0x00104, 0x0010c, 0x0010f, 0x0011d,
+	0x0012f, 0x0012f, 0x00200, 0x0020d, 0x00210, 0x00213, 0x00215, 0x0023d,
+	0x00260, 0x00270, 0x00272, 0x00274, 0x00281, 0x0028d, 0x00300, 0x00401,
+	0x00410, 0x00451, 0x00460, 0x004a3, 0x004c0, 0x004d1, 0x00500, 0x00500,
+	0x00507, 0x0050b, 0x0050f, 0x0050f, 0x00511, 0x00511, 0x00533, 0x00536,
+	0x00540, 0x00555, 0x00564, 0x00567, 0x00574, 0x00577, 0x00584, 0x0059b,
+	0x00800, 0x00808, 0x00810, 0x00813, 0x00820, 0x00821, 0x00823, 0x00827,
+	0x00830, 0x00834, 0x0083f, 0x00841, 0x00843, 0x00847, 0x0084f, 0x00886,
+	0x008a0, 0x008ab, 0x008c0, 0x008c0, 0x008c4, 0x008c6, 0x008d0, 0x008dd,
+	0x008e0, 0x008e6, 0x008f0, 0x008f3, 0x00900, 0x00903, 0x00908, 0x00911,
+	0x00928, 0x0093e, 0x00942, 0x0094d, 0x00980, 0x00984, 0x0098d, 0x0098f,
+	0x009b0, 0x009b4, 0x009c2, 0x009c9, 0x009ce, 0x009d7, 0x009e0, 0x009e7,
+	0x00a00, 0x00a00, 0x00a02, 0x00a03, 0x00a10, 0x00a4f, 0x00a61, 0x00a9f,
+	0x00ad0, 0x00adb, 0x00b00, 0x00b31, 0x00b35, 0x00b3c, 0x00b40, 0x00b40,
+	0x00c00, 0x00c00, 0x00c02, 0x00c04, 0x00c06, 0x00c06, 0x00c10, 0x00cd9,
+	0x00ce0, 0x00d0c, 0x00df0, 0x00df4, 0x00e01, 0x00e02, 0x00e07, 0x00e0e,
+	0x00e10, 0x00e13, 0x00e17, 0x00e19, 0x00e1b, 0x00e2b, 0x00e30, 0x00e32,
+	0x00e38, 0x00e3c, 0x00e40, 0x00e4b, 0x0ec00, 0x0ec01, 0x0ec05, 0x0ec05,
+	0x0ec07, 0x0ec07, 0x0ec0a, 0x0ec0a, 0x0ec12, 0x0ec12, 0x0ec26, 0x0ec28,
+	0x0ec2b, 0x0ec2d, 0x0ec2f, 0x0ec2f, 0x0ec40, 0x0ec41, 0x0ec45, 0x0ec45,
+	0x0ec47, 0x0ec47, 0x0ec4a, 0x0ec4a, 0x0ec52, 0x0ec52, 0x0ec66, 0x0ec68,
+	0x0ec6b, 0x0ec6d, 0x0ec6f, 0x0ec6f, 0x0ec80, 0x0ec81, 0x0ec85, 0x0ec85,
+	0x0ec87, 0x0ec87, 0x0ec8a, 0x0ec8a, 0x0ec92, 0x0ec92, 0x0eca6, 0x0eca8,
+	0x0ecab, 0x0ecad, 0x0ecaf, 0x0ecaf, 0x0ecc0, 0x0ecc1, 0x0ecc5, 0x0ecc5,
+	0x0ecc7, 0x0ecc7, 0x0ecca, 0x0ecca, 0x0ecd2, 0x0ecd2, 0x0ece6, 0x0ece8,
+	0x0eceb, 0x0eced, 0x0ecef, 0x0ecef, 0x0ed00, 0x0ed01, 0x0ed05, 0x0ed05,
+	0x0ed07, 0x0ed07, 0x0ed0a, 0x0ed0a, 0x0ed12, 0x0ed12, 0x0ed26, 0x0ed28,
+	0x0ed2b, 0x0ed2d, 0x0ed2f, 0x0ed2f, 0x0ed40, 0x0ed41, 0x0ed45, 0x0ed45,
+	0x0ed47, 0x0ed47, 0x0ed4a, 0x0ed4a, 0x0ed52, 0x0ed52, 0x0ed66, 0x0ed68,
+	0x0ed6b, 0x0ed6d, 0x0ed6f, 0x0ed6f, 0x0ed80, 0x0ed81, 0x0ed85, 0x0ed85,
+	0x0ed87, 0x0ed87, 0x0ed8a, 0x0ed8a, 0x0ed92, 0x0ed92, 0x0eda6, 0x0eda8,
+	0x0edab, 0x0edad, 0x0edaf, 0x0edaf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_gpu_registers), 8));
+
+/*
+ * Block   : ['GMUAO', 'GMUCX', 'GMUCX_RAM']
+ * Pipeline: PIPE_NONE
+ * pairs   : 126 (Regs:334)
+ */
+static const u32 gen7_11_0_gmu_registers[] = {
+	0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403,
+	0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03,
+	0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403,
+	0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03,
+	0x1f400, 0x1f40b, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507,
+	0x1f509, 0x1f50b, 0x1f705, 0x1f705, 0x1f710, 0x1f711, 0x1f713, 0x1f716,
+	0x1f720, 0x1f724, 0x1f729, 0x1f729, 0x1f730, 0x1f747, 0x1f800, 0x1f804,
+	0x1f807, 0x1f808, 0x1f80b, 0x1f80c, 0x1f80f, 0x1f80f, 0x1f811, 0x1f811,
+	0x1f813, 0x1f817, 0x1f819, 0x1f81c, 0x1f824, 0x1f82a, 0x1f82d, 0x1f830,
+	0x1f840, 0x1f853, 0x1f860, 0x1f860, 0x1f862, 0x1f864, 0x1f868, 0x1f868,
+	0x1f870, 0x1f879, 0x1f87f, 0x1f87f, 0x1f888, 0x1f889, 0x1f890, 0x1f892,
+	0x1f894, 0x1f896, 0x1f8a0, 0x1f8a2, 0x1f8a4, 0x1f8af, 0x1f8b8, 0x1f8b9,
+	0x1f8c0, 0x1f8c1, 0x1f8c3, 0x1f8c4, 0x1f8d0, 0x1f8d0, 0x1f8ec, 0x1f8ec,
+	0x1f8f0, 0x1f8f1, 0x1f910, 0x1f913, 0x1f920, 0x1f921, 0x1f924, 0x1f925,
+	0x1f928, 0x1f929, 0x1f92c, 0x1f92d, 0x1f940, 0x1f940, 0x1f942, 0x1f944,
+	0x1f948, 0x1f94a, 0x1f951, 0x1f951, 0x1f95d, 0x1f95d, 0x1f962, 0x1f962,
+	0x1f973, 0x1f973, 0x1f980, 0x1f981, 0x1f984, 0x1f986, 0x1f992, 0x1f993,
+	0x1f996, 0x1f99e, 0x1f9c0, 0x1f9c0, 0x1f9c5, 0x1f9d4, 0x1f9f0, 0x1f9f1,
+	0x1f9f8, 0x1f9fa, 0x1f9fc, 0x1f9fc, 0x1fa00, 0x1fa03, 0x20000, 0x2000b,
+	0x20010, 0x20012, 0x20018, 0x20018, 0x2001a, 0x2001a, 0x20020, 0x20021,
+	0x20024, 0x20024, 0x20030, 0x20031, 0x20034, 0x20036, 0x23801, 0x23801,
+	0x23803, 0x23803, 0x23805, 0x23805, 0x23807, 0x23807, 0x23809, 0x23809,
+	0x2380b, 0x2380b, 0x2380d, 0x2380d, 0x2380f, 0x2380f, 0x23811, 0x23811,
+	0x23813, 0x23813, 0x23815, 0x23815, 0x23817, 0x23817, 0x23819, 0x23819,
+	0x2381b, 0x2381b, 0x2381d, 0x2381d, 0x2381f, 0x23820, 0x23822, 0x23822,
+	0x23824, 0x23824, 0x23826, 0x23826, 0x23828, 0x23828, 0x2382a, 0x2382a,
+	0x2382c, 0x2382c, 0x2382e, 0x2382e, 0x23830, 0x23830, 0x23832, 0x23832,
+	0x23834, 0x23834, 0x23836, 0x23836, 0x23838, 0x23838, 0x2383a, 0x2383a,
+	0x2383c, 0x2383c, 0x2383e, 0x2383e, 0x23840, 0x23847, 0x23b00, 0x23b01,
+	0x23b03, 0x23b03, 0x23b05, 0x23b0e, 0x23b10, 0x23b13, 0x23b15, 0x23b16,
+	0x23b28, 0x23b28, 0x23b30, 0x23b30,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_gmu_registers), 8));
+
+/*
+ * Block   : ['GMUGX']
+ * Pipeline: PIPE_NONE
+ * pairs   : 4 (Regs:48)
+ */
+static const u32 gen7_11_0_gmugx_registers[] = {
+	0x1a802, 0x1a802, 0x1a883, 0x1a884, 0x1a900, 0x1a92b, 0x1a940, 0x1a940,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_gmugx_registers), 8));
+
+/*
+ * Block   : ['CX_MISC']
+ * Pipeline: PIPE_NONE
+ * pairs   : 5 (Regs:52)
+ */
+static const u32 gen7_11_0_cx_misc_registers[] = {
+	0x27800, 0x27800, 0x27810, 0x27814, 0x27820, 0x27824, 0x27828, 0x2782a,
+	0x27832, 0x27857,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_cx_misc_registers), 8));
+
+/*
+ * Block   : ['DBGC']
+ * Pipeline: PIPE_NONE
+ * pairs   : 8 (Regs:94)
+ */
+static const u32 gen7_11_0_dbgc_registers[] = {
+	0x00600, 0x0061c, 0x0061e, 0x00634, 0x00640, 0x0065b, 0x00679, 0x0067b,
+	0x00699, 0x00699, 0x0069b, 0x0069e, 0x006a0, 0x006a3, 0x006c0, 0x006c1,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_dbgc_registers), 8));
+
+/*
+ * Block   : ['CX_DBGC']
+ * Pipeline: PIPE_NONE
+ * pairs   : 5 (Regs:85)
+ */
+static const u32 gen7_11_0_cx_dbgc_registers[] = {
+	0x18400, 0x1841c, 0x1841e, 0x18434, 0x18440, 0x1845b, 0x18479, 0x1847b,
+	0x18580, 0x18581,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_cx_dbgc_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'CP', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF']
+ * Block   : ['GMUAO', 'GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM']
+ * Block   : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 30 (Regs:529)
+ */
+static const u32 gen7_11_0_non_context_pipe_br_registers[] = {
+	0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b,
+	0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640,
+	0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a,
+	0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16,
+	0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31,
+	0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79,
+	0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f,
+	0x0a630, 0x0a631, 0x0a638, 0x0a63c,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_pipe_br_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'CP', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF']
+ * Block   : ['GMUAO', 'GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM']
+ * Block   : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_NONE
+ * pairs   : 30 (Regs:529)
+ */
+static const u32 gen7_11_0_non_context_pipe_bv_registers[] = {
+	0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b,
+	0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640,
+	0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a,
+	0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16,
+	0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31,
+	0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79,
+	0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f,
+	0x0a630, 0x0a631, 0x0a638, 0x0a63c,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_pipe_bv_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'CP', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF']
+ * Block   : ['GMUAO', 'GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM']
+ * Block   : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_NONE
+ * pairs   : 2 (Regs:7)
+ */
+static const u32 gen7_11_0_non_context_pipe_lpac_registers[] = {
+	0x00887, 0x0088c, 0x00f80, 0x00f80,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_pipe_lpac_registers), 8));
+
+/*
+ * Block   : ['RB']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 4 (Regs:33)
+ */
+static const u32 gen7_11_0_non_context_rb_pipe_br_rac_registers[] = {
+	0x08e10, 0x08e1c, 0x08e20, 0x08e25, 0x08e51, 0x08e5a, 0x08ea0, 0x08ea3,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_rb_pipe_br_rac_registers), 8));
+
+/*
+ * Block   : ['RB']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 15 (Regs:62)
+ */
+static const u32 gen7_11_0_non_context_rb_pipe_br_rbp_registers[] = {
+	0x08e01, 0x08e01, 0x08e04, 0x08e04, 0x08e06, 0x08e09, 0x08e0c, 0x08e0c,
+	0x08e28, 0x08e28, 0x08e2c, 0x08e35, 0x08e3b, 0x08e40, 0x08e50, 0x08e50,
+	0x08e5b, 0x08e5d, 0x08e5f, 0x08e5f, 0x08e61, 0x08e61, 0x08e63, 0x08e66,
+	0x08e68, 0x08e69, 0x08e70, 0x08e79, 0x08e80, 0x08e8f,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_rb_pipe_br_rbp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * Location: HLSQ_STATE
+ * pairs   : 3 (Regs:20)
+ */
+static const u32 gen7_11_0_non_context_sp_pipe_br_hlsq_state_registers[] = {
+	0x0ae52, 0x0ae52, 0x0ae60, 0x0ae67, 0x0ae69, 0x0ae73,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_sp_pipe_br_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * Location: SP_TOP
+ * pairs   : 10 (Regs:60)
+ */
+static const u32 gen7_11_0_non_context_sp_pipe_br_sp_top_registers[] = {
+	0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae09, 0x0ae0c, 0x0ae0c,
+	0x0ae0f, 0x0ae0f, 0x0ae28, 0x0ae2b, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3f,
+	0x0ae50, 0x0ae52, 0x0ae80, 0x0aea3,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_sp_pipe_br_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * Location: USPTP
+ * pairs   : 10 (Regs:21)
+ */
+static const u32 gen7_11_0_non_context_sp_pipe_br_usptp_registers[] = {
+	0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae09, 0x0ae0c, 0x0ae0c,
+	0x0ae0f, 0x0ae0f, 0x0ae30, 0x0ae32, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3b,
+	0x0ae3e, 0x0ae3f, 0x0ae50, 0x0ae52,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_sp_pipe_br_usptp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_NONE
+ * Location: HLSQ_STATE
+ * pairs   : 1 (Regs:4)
+ */
+static const u32 gen7_11_0_non_context_sp_pipe_lpac_hlsq_state_registers[] = {
+	0x0af88, 0x0af8b,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_sp_pipe_lpac_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_NONE
+ * Location: SP_TOP
+ * pairs   : 1 (Regs:5)
+ */
+static const u32 gen7_11_0_non_context_sp_pipe_lpac_sp_top_registers[] = {
+	0x0af80, 0x0af84,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_sp_pipe_lpac_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_NONE
+ * Location: USPTP
+ * pairs   : 2 (Regs:8)
+ */
+static const u32 gen7_11_0_non_context_sp_pipe_lpac_usptp_registers[] = {
+	0x0af80, 0x0af84, 0x0af90, 0x0af92,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_sp_pipe_lpac_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_NONE
+ * Cluster : CLUSTER_NONE
+ * Location: USPTP
+ * pairs   : 5 (Regs:30)
+ */
+static const u32 gen7_11_0_non_context_tpl1_pipe_none_usptp_registers[] = {
+	0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b608, 0x0b60c, 0x0b60f, 0x0b621,
+	0x0b630, 0x0b633,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_tpl1_pipe_none_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * Location: USPTP
+ * pairs   : 1 (Regs:1)
+ */
+static const u32 gen7_11_0_non_context_tpl1_pipe_br_usptp_registers[] = {
+	0x0b600, 0x0b600,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_tpl1_pipe_br_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_NONE
+ * Location: USPTP
+ * pairs   : 1 (Regs:1)
+ */
+static const u32 gen7_11_0_non_context_tpl1_pipe_lpac_usptp_registers[] = {
+	0x0b780, 0x0b780,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_non_context_tpl1_pipe_lpac_usptp_registers), 8));
+
+/*
+ * Block   : ['GRAS']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_GRAS
+ * pairs   : 14 (Regs:290)
+ */
+static const u32 gen7_11_0_gras_pipe_br_cluster_gras_registers[] = {
+	0x08000, 0x0800c, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d,
+	0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa,
+	0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08113, 0x08120, 0x0813f,
+	0x08400, 0x08406, 0x0840a, 0x0840b,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_gras_pipe_br_cluster_gras_registers), 8));
+
+/*
+ * Block   : ['GRAS']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_GRAS
+ * pairs   : 14 (Regs:290)
+ */
+static const u32 gen7_11_0_gras_pipe_bv_cluster_gras_registers[] = {
+	0x08000, 0x0800c, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d,
+	0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa,
+	0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08113, 0x08120, 0x0813f,
+	0x08400, 0x08406, 0x0840a, 0x0840b,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_gras_pipe_bv_cluster_gras_registers), 8));
+
+/*
+ * Block   : ['PC']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_FE
+ * pairs   : 6 (Regs:27)
+ */
+static const u32 gen7_11_0_pc_pipe_br_cluster_fe_registers[] = {
+	0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886,
+	0x09970, 0x09972, 0x09b00, 0x09b08,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_pc_pipe_br_cluster_fe_registers), 8));
+
+/*
+ * Block   : ['PC']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_FE
+ * pairs   : 6 (Regs:27)
+ */
+static const u32 gen7_11_0_pc_pipe_bv_cluster_fe_registers[] = {
+	0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886,
+	0x09970, 0x09972, 0x09b00, 0x09b08,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_pc_pipe_bv_cluster_fe_registers), 8));
+
+/*
+ * Block   : ['VFD']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_FE
+ * pairs   : 2 (Regs:236)
+ */
+static const u32 gen7_11_0_vfd_pipe_br_cluster_fe_registers[] = {
+	0x0a000, 0x0a009, 0x0a00e, 0x0a0ef,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_vfd_pipe_br_cluster_fe_registers), 8));
+
+/*
+ * Block   : ['VFD']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_FE
+ * pairs   : 2 (Regs:236)
+ */
+static const u32 gen7_11_0_vfd_pipe_bv_cluster_fe_registers[] = {
+	0x0a000, 0x0a009, 0x0a00e, 0x0a0ef,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_vfd_pipe_bv_cluster_fe_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_FE
+ * pairs   : 1 (Regs:8)
+ */
+static const u32 gen7_11_0_vpc_pipe_br_cluster_fe_registers[] = {
+	0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_vpc_pipe_br_cluster_fe_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_PC_VS
+ * pairs   : 2 (Regs:20)
+ */
+static const u32 gen7_11_0_vpc_pipe_br_cluster_pc_vs_registers[] = {
+	0x09101, 0x0910c, 0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_vpc_pipe_br_cluster_pc_vs_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_VPC_PS
+ * pairs   : 4 (Regs:60)
+ */
+static const u32 gen7_11_0_vpc_pipe_br_cluster_vpc_ps_registers[] = {
+	0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x09236, 0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_vpc_pipe_br_cluster_vpc_ps_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_FE
+ * pairs   : 1 (Regs:8)
+ */
+static const u32 gen7_11_0_vpc_pipe_bv_cluster_fe_registers[] = {
+	0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_vpc_pipe_bv_cluster_fe_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_PC_VS
+ * pairs   : 2 (Regs:20)
+ */
+static const u32 gen7_11_0_vpc_pipe_bv_cluster_pc_vs_registers[] = {
+	0x09101, 0x0910c, 0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_vpc_pipe_bv_cluster_pc_vs_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_VPC_PS
+ * pairs   : 4 (Regs:60)
+ */
+static const u32 gen7_11_0_vpc_pipe_bv_cluster_vpc_ps_registers[] = {
+	0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x09236, 0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_vpc_pipe_bv_cluster_vpc_ps_registers), 8));
+
+/*
+ * Block   : ['RB']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_PS
+ * pairs   : 39 (Regs:133)
+ */
+static const u32 gen7_11_0_rb_pipe_br_cluster_ps_rac_registers[] = {
+	0x08802, 0x08802, 0x08804, 0x08806, 0x08809, 0x0880a, 0x0880e, 0x08811,
+	0x08818, 0x0881e, 0x08821, 0x08821, 0x08823, 0x08826, 0x08829, 0x08829,
+	0x0882b, 0x0882e, 0x08831, 0x08831, 0x08833, 0x08836, 0x08839, 0x08839,
+	0x0883b, 0x0883e, 0x08841, 0x08841, 0x08843, 0x08846, 0x08849, 0x08849,
+	0x0884b, 0x0884e, 0x08851, 0x08851, 0x08853, 0x08856, 0x08859, 0x08859,
+	0x0885b, 0x0885e, 0x08860, 0x08864, 0x08870, 0x08870, 0x08873, 0x08876,
+	0x08878, 0x08879, 0x08882, 0x08885, 0x08887, 0x08889, 0x08891, 0x08891,
+	0x08898, 0x08899, 0x088c0, 0x088c1, 0x088e5, 0x088e5, 0x088f4, 0x088f5,
+	0x08a00, 0x08a05, 0x08a10, 0x08a15, 0x08a20, 0x08a25, 0x08a30, 0x08a35,
+	0x08c00, 0x08c01, 0x08c18, 0x08c1f, 0x08c26, 0x08c34,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_rb_pipe_br_cluster_ps_rac_registers), 8));
+
+/*
+ * Block   : ['RB']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_PS
+ * pairs   : 34 (Regs:100)
+ */
+static const u32 gen7_11_0_rb_pipe_br_cluster_ps_rbp_registers[] = {
+	0x08800, 0x08801, 0x08803, 0x08803, 0x0880b, 0x0880d, 0x08812, 0x08812,
+	0x08820, 0x08820, 0x08822, 0x08822, 0x08827, 0x08828, 0x0882a, 0x0882a,
+	0x0882f, 0x08830, 0x08832, 0x08832, 0x08837, 0x08838, 0x0883a, 0x0883a,
+	0x0883f, 0x08840, 0x08842, 0x08842, 0x08847, 0x08848, 0x0884a, 0x0884a,
+	0x0884f, 0x08850, 0x08852, 0x08852, 0x08857, 0x08858, 0x0885a, 0x0885a,
+	0x0885f, 0x0885f, 0x08865, 0x08865, 0x08871, 0x08872, 0x08877, 0x08877,
+	0x08880, 0x08881, 0x08886, 0x08886, 0x08890, 0x08890, 0x088d0, 0x088e4,
+	0x088e8, 0x088ea, 0x088f0, 0x088f0, 0x08900, 0x0891a, 0x08927, 0x08928,
+	0x08c17, 0x08c17, 0x08c20, 0x08c25,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_rb_pipe_br_cluster_ps_rbp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: HLSQ_STATE
+ * pairs   : 28 (Regs:211)
+ */
+static const u32 gen7_11_0_sp_pipe_br_cluster_sp_vs_hlsq_state_registers[] = {
+	0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a,
+	0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872,
+	0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898,
+	0x0a89a, 0x0a89d, 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab03,
+	0x0ab05, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_vs_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: SP_TOP
+ * pairs   : 21 (Regs:69)
+ */
+static const u32 gen7_11_0_sp_pipe_br_cluster_sp_vs_sp_top_registers[] = {
+	0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a82d, 0x0a82d,
+	0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840,
+	0x0a85c, 0x0a85d, 0x0a862, 0x0a864, 0x0a868, 0x0a868, 0x0a870, 0x0a871,
+	0x0a88d, 0x0a88e, 0x0a893, 0x0a895, 0x0a899, 0x0a899, 0x0a8a0, 0x0a8af,
+	0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab0a, 0x0ab1b,
+	0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_vs_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 16 (Regs:269)
+ */
+static const u32 gen7_11_0_sp_pipe_br_cluster_sp_vs_usptp_registers[] = {
+	0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d,
+	0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861,
+	0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899,
+	0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_vs_usptp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_STATE
+ * pairs   : 19 (Regs:331)
+ */
+static const u32 gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_state_registers[] = {
+	0x0a980, 0x0a984, 0x0a99e, 0x0a99e, 0x0a9a7, 0x0a9a7, 0x0a9aa, 0x0a9aa,
+	0x0a9ae, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc,
+	0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e0, 0x0a9fc, 0x0aa00, 0x0aa00,
+	0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05,
+	0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_DP
+ * pairs   : 3 (Regs:19)
+ */
+static const u32 gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers[] = {
+	0x0a9b1, 0x0a9b1, 0x0a9c6, 0x0a9cb, 0x0a9d4, 0x0a9df,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: SP_TOP
+ * pairs   : 17 (Regs:73)
+ */
+static const u32 gen7_11_0_sp_pipe_br_cluster_sp_ps_sp_top_registers[] = {
+	0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a9a2, 0x0a9a7, 0x0a9a8,
+	0x0a9aa, 0x0a9aa, 0x0a9ae, 0x0a9ae, 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5,
+	0x0a9ba, 0x0a9bc, 0x0a9c5, 0x0a9c5, 0x0a9e0, 0x0a9f9, 0x0aa00, 0x0aa01,
+	0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab0a, 0x0ab1b,
+	0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_ps_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 17 (Regs:331)
+ */
+static const u32 gen7_11_0_sp_pipe_br_cluster_sp_ps_usptp_registers[] = {
+	0x0a980, 0x0a982, 0x0a985, 0x0a9a6, 0x0a9a8, 0x0a9a9, 0x0a9ab, 0x0a9ae,
+	0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9bf, 0x0a9c2, 0x0a9c3,
+	0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa01, 0x0aa01,
+	0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22,
+	0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_br_cluster_sp_ps_usptp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: HLSQ_STATE
+ * pairs   : 27 (Regs:209)
+ */
+static const u32 gen7_11_0_sp_pipe_bv_cluster_sp_vs_hlsq_state_registers[] = {
+	0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a,
+	0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872,
+	0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898,
+	0x0a89a, 0x0a89d, 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab02,
+	0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_bv_cluster_sp_vs_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: SP_TOP
+ * pairs   : 20 (Regs:67)
+ */
+static const u32 gen7_11_0_sp_pipe_bv_cluster_sp_vs_sp_top_registers[] = {
+	0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a82d, 0x0a82d,
+	0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840,
+	0x0a85c, 0x0a85d, 0x0a862, 0x0a864, 0x0a868, 0x0a868, 0x0a870, 0x0a871,
+	0x0a88d, 0x0a88e, 0x0a893, 0x0a895, 0x0a899, 0x0a899, 0x0a8a0, 0x0a8af,
+	0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_bv_cluster_sp_vs_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 16 (Regs:266)
+ */
+static const u32 gen7_11_0_sp_pipe_bv_cluster_sp_vs_usptp_registers[] = {
+	0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d,
+	0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861,
+	0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899,
+	0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab02, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_bv_cluster_sp_vs_usptp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_STATE
+ * pairs   : 13 (Regs:294)
+ */
+static const u32 gen7_11_0_sp_pipe_lpac_cluster_sp_ps_hlsq_state_registers[] = {
+	0x0a9b0, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc,
+	0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9fc,
+	0x0aa00, 0x0aa00, 0x0aa31, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab01,
+	0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_lpac_cluster_sp_ps_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_DP
+ * pairs   : 2 (Regs:13)
+ */
+static const u32 gen7_11_0_sp_pipe_lpac_cluster_sp_ps_hlsq_dp_registers[] = {
+	0x0a9b1, 0x0a9b1, 0x0a9d4, 0x0a9df,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_lpac_cluster_sp_ps_hlsq_dp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: SP_TOP
+ * pairs   : 8 (Regs:33)
+ */
+static const u32 gen7_11_0_sp_pipe_lpac_cluster_sp_ps_sp_top_registers[] = {
+	0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9bc, 0x0a9c5, 0x0a9c5,
+	0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9f9, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_lpac_cluster_sp_ps_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 11 (Regs:279)
+ */
+static const u32 gen7_11_0_sp_pipe_lpac_cluster_sp_ps_usptp_registers[] = {
+	0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9be, 0x0a9c2, 0x0a9c3,
+	0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa31, 0x0aa31,
+	0x0aa40, 0x0aabf, 0x0ab00, 0x0ab01, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_sp_pipe_lpac_cluster_sp_ps_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 3 (Regs:10)
+ */
+static const u32 gen7_11_0_tpl1_pipe_br_cluster_sp_vs_usptp_registers[] = {
+	0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_tpl1_pipe_br_cluster_sp_vs_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 6 (Regs:42)
+ */
+static const u32 gen7_11_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers[] = {
+	0x0b180, 0x0b183, 0x0b190, 0x0b195, 0x0b2c0, 0x0b2d5, 0x0b300, 0x0b307,
+	0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 3 (Regs:10)
+ */
+static const u32 gen7_11_0_tpl1_pipe_bv_cluster_sp_vs_usptp_registers[] = {
+	0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_tpl1_pipe_bv_cluster_sp_vs_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 5 (Regs:7)
+ */
+static const u32 gen7_11_0_tpl1_pipe_lpac_cluster_sp_ps_usptp_registers[] = {
+	0x0b180, 0x0b181, 0x0b300, 0x0b301, 0x0b307, 0x0b307, 0x0b309, 0x0b309,
+	0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_tpl1_pipe_lpac_cluster_sp_ps_usptp_registers), 8));
+
+static const struct gen7_sel_reg gen7_11_0_rb_rac_sel = {
+	.host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0,
+};
+
+static const struct gen7_sel_reg gen7_11_0_rb_rbp_sel = {
+	.host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0x9,
+};
+
+static struct gen7_cluster_registers gen7_11_0_clusters[] = {
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_11_0_non_context_pipe_br_registers,  },
+	{ CLUSTER_NONE, PIPE_BV, STATE_NON_CONTEXT,
+		gen7_11_0_non_context_pipe_bv_registers,  },
+	{ CLUSTER_NONE, PIPE_LPAC, STATE_NON_CONTEXT,
+		gen7_11_0_non_context_pipe_lpac_registers,  },
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_11_0_non_context_rb_pipe_br_rac_registers, &gen7_11_0_rb_rac_sel, },
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_11_0_non_context_rb_pipe_br_rbp_registers, &gen7_11_0_rb_rbp_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_11_0_rb_pipe_br_cluster_ps_rac_registers, &gen7_11_0_rb_rac_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_11_0_rb_pipe_br_cluster_ps_rac_registers, &gen7_11_0_rb_rac_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_11_0_rb_pipe_br_cluster_ps_rbp_registers, &gen7_11_0_rb_rbp_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_11_0_rb_pipe_br_cluster_ps_rbp_registers, &gen7_11_0_rb_rbp_sel, },
+	{ CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_11_0_gras_pipe_br_cluster_gras_registers,  },
+	{ CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_11_0_gras_pipe_br_cluster_gras_registers,  },
+	{ CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_11_0_gras_pipe_bv_cluster_gras_registers,  },
+	{ CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_11_0_gras_pipe_bv_cluster_gras_registers,  },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_11_0_pc_pipe_br_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_11_0_pc_pipe_br_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_11_0_pc_pipe_bv_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_11_0_pc_pipe_bv_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_11_0_vfd_pipe_br_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_11_0_vfd_pipe_br_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_11_0_vfd_pipe_bv_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_11_0_vfd_pipe_bv_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_11_0_vpc_pipe_br_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_11_0_vpc_pipe_br_cluster_fe_registers,  },
+	{ CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_11_0_vpc_pipe_br_cluster_pc_vs_registers,  },
+	{ CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_11_0_vpc_pipe_br_cluster_pc_vs_registers,  },
+	{ CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_11_0_vpc_pipe_br_cluster_vpc_ps_registers,  },
+	{ CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_11_0_vpc_pipe_br_cluster_vpc_ps_registers,  },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_11_0_vpc_pipe_bv_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_11_0_vpc_pipe_bv_cluster_fe_registers,  },
+	{ CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_11_0_vpc_pipe_bv_cluster_pc_vs_registers,  },
+	{ CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_11_0_vpc_pipe_bv_cluster_pc_vs_registers,  },
+	{ CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_11_0_vpc_pipe_bv_cluster_vpc_ps_registers,  },
+	{ CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_11_0_vpc_pipe_bv_cluster_vpc_ps_registers,  },
+};
+
+static struct gen7_sptp_cluster_registers gen7_11_0_sptp_clusters[] = {
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_11_0_non_context_sp_pipe_br_hlsq_state_registers, 0xae00},
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, SP_TOP,
+		gen7_11_0_non_context_sp_pipe_br_sp_top_registers, 0xae00},
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen7_11_0_non_context_sp_pipe_br_usptp_registers, 0xae00},
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen7_11_0_non_context_sp_pipe_lpac_hlsq_state_registers, 0xaf80},
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, SP_TOP,
+		gen7_11_0_non_context_sp_pipe_lpac_sp_top_registers, 0xaf80},
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, USPTP,
+		gen7_11_0_non_context_sp_pipe_lpac_usptp_registers, 0xaf80},
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_NONE, 0, USPTP,
+		gen7_11_0_non_context_tpl1_pipe_none_usptp_registers, 0xb600},
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen7_11_0_non_context_tpl1_pipe_br_usptp_registers, 0xb600},
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_LPAC, 0, USPTP,
+		gen7_11_0_non_context_tpl1_pipe_lpac_usptp_registers, 0xb780},
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_11_0_sp_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP,
+		gen7_11_0_sp_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen7_11_0_sp_pipe_br_cluster_sp_vs_usptp_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE,
+		gen7_11_0_sp_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP,
+		gen7_11_0_sp_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen7_11_0_sp_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen7_11_0_sp_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP,
+		gen7_11_0_sp_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen7_11_0_sp_pipe_br_cluster_sp_vs_usptp_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE,
+		gen7_11_0_sp_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP,
+		gen7_11_0_sp_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen7_11_0_sp_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen7_11_0_sp_pipe_lpac_cluster_sp_ps_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP,
+		gen7_11_0_sp_pipe_lpac_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP,
+		gen7_11_0_sp_pipe_lpac_cluster_sp_ps_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen7_11_0_sp_pipe_lpac_cluster_sp_ps_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP,
+		gen7_11_0_sp_pipe_br_cluster_sp_ps_usptp_registers, 0xa800},
+	{ CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen7_11_0_tpl1_pipe_br_cluster_sp_vs_usptp_registers, 0xb000},
+	{ CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen7_11_0_tpl1_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000},
+	{ CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen7_11_0_tpl1_pipe_br_cluster_sp_vs_usptp_registers, 0xb000},
+	{ CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen7_11_0_tpl1_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen7_11_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen7_11_0_tpl1_pipe_lpac_cluster_sp_ps_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen7_11_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP,
+		gen7_11_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP,
+		gen7_11_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000},
+};
+
+/*
+ * Before dumping the CP MVC
+ * Program CP_APERTURE_CNTL_* with pipeID={CP_PIPE}
+ * Then dump corresponding {Register_PIPE}
+ */
+
+static struct gen7_cp_indexed_reg gen7_11_0_cp_indexed_reg_list[] = {
+	{ GEN7_CP_SQE_STAT_ADDR, GEN7_CP_SQE_STAT_DATA, 0x00040},
+	{ GEN7_CP_DRAW_STATE_ADDR, GEN7_CP_DRAW_STATE_DATA, 0x00100},
+	{ GEN7_CP_ROQ_DBG_ADDR, GEN7_CP_ROQ_DBG_DATA, 0x00800},
+	{ GEN7_CP_SQE_UCODE_DBG_ADDR, GEN7_CP_SQE_UCODE_DBG_DATA, 0x08000},
+	{ GEN7_CP_BV_DRAW_STATE_ADDR, GEN7_CP_BV_DRAW_STATE_DATA, 0x00100},
+	{ GEN7_CP_BV_ROQ_DBG_ADDR, GEN7_CP_BV_ROQ_DBG_DATA, 0x00800},
+	{ GEN7_CP_BV_SQE_UCODE_DBG_ADDR, GEN7_CP_BV_SQE_UCODE_DBG_DATA, 0x08000},
+	{ GEN7_CP_BV_SQE_STAT_ADDR, GEN7_CP_BV_SQE_STAT_DATA, 0x00040},
+	{ GEN7_CP_RESOURCE_TABLE_DBG_ADDR, GEN7_CP_RESOURCE_TABLE_DBG_DATA, 0x04100},
+	{ GEN7_CP_LPAC_DRAW_STATE_ADDR, GEN7_CP_LPAC_DRAW_STATE_DATA, 0x00100},
+	{ GEN7_CP_LPAC_ROQ_DBG_ADDR, GEN7_CP_LPAC_ROQ_DBG_DATA, 0x00200},
+	{ GEN7_CP_SQE_AC_UCODE_DBG_ADDR, GEN7_CP_SQE_AC_UCODE_DBG_DATA, 0x08000},
+	{ GEN7_CP_SQE_AC_STAT_ADDR, GEN7_CP_SQE_AC_STAT_DATA, 0x00040},
+	{ GEN7_CP_LPAC_FIFO_DBG_ADDR, GEN7_CP_LPAC_FIFO_DBG_DATA, 0x00040},
+};
+
+/*
+ * Block   : ['DPM_LEAKAGE']
+ * Pipeline: PIPE_NONE
+ * pairs   : 9 (Regs:26)
+ */
+static const u32 gen7_11_0_dpm_lkg_registers[] = {
+	0x21c00, 0x21c00, 0x21c08, 0x21c09, 0x21c0e, 0x21c0f, 0x21c4f, 0x21c50,
+	0x21c52, 0x21c52, 0x21c54, 0x21c56, 0x21c58, 0x21c5a, 0x21c5c, 0x21c60,
+	UINT_MAX, UINT_MAX,
+};
+
+/*
+ * Block   : ['GPU_CC_GPU_CC_REG']
+ * Pipeline: PIPE_NONE
+ */
+static const u32 gen7_11_0_gpucc_registers[] = {
+	0x24000, 0x2400f, 0x24400, 0x2440f, 0x24800, 0x24805, 0x24c00, 0x24cff,
+	0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004,
+	0x26400, 0x26405, 0x2640a, 0x26413, 0x26418, 0x26448, 0x2644d, 0x2644e,
+	0x26450, 0x26452, 0x26454, 0x2645b, 0x26460, 0x26468, 0x2646d, 0x2646f,
+	0x26540, 0x2654e, 0x26554, 0x26573, 0x26576, 0x26576, 0x26600, 0x26616,
+	0x26620, 0x2662d, 0x26630, 0x26631, 0x26635, 0x26635, 0x26637, 0x26637,
+	0x2663a, 0x2663a, 0x26642, 0x26642, 0x26656, 0x26658, 0x2665b, 0x2665d,
+	0x2665f, 0x26662, UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_gpucc_registers), 8));
+
+/*
+ * Block   : ['CPR']
+ * Pipeline: PIPE_NONE
+ * pairs   : 20 (Regs:471)
+ */
+static const u32 gen7_11_0_cpr_registers[] = {
+	0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c,
+	0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850,
+	0x26880, 0x26897, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee,
+	0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f,
+	0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274c4,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_cpr_registers), 8));
+
+/*
+ * Block   : ['RSCC_RSC']
+ * Pipeline: PIPE_NONE
+ * pairs   : 99 (Regs:598)
+ */
+static const u32 gen7_11_0_rscc_registers[] = {
+	0x14000, 0x14034, 0x14036, 0x14036, 0x14040, 0x14042, 0x14044, 0x14045,
+	0x14047, 0x14047, 0x14080, 0x14084, 0x14089, 0x1408c, 0x14091, 0x14094,
+	0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac, 0x14100, 0x14104,
+	0x14114, 0x14119, 0x14124, 0x14132, 0x14154, 0x1416b, 0x14340, 0x14341,
+	0x14344, 0x14344, 0x14346, 0x1437c, 0x143f0, 0x143f8, 0x143fa, 0x143fe,
+	0x14400, 0x14404, 0x14406, 0x1440a, 0x1440c, 0x14410, 0x14412, 0x14416,
+	0x14418, 0x1441c, 0x1441e, 0x14422, 0x14424, 0x14424, 0x14498, 0x144a0,
+	0x144a2, 0x144a6, 0x144a8, 0x144ac, 0x144ae, 0x144b2, 0x144b4, 0x144b8,
+	0x144ba, 0x144be, 0x144c0, 0x144c4, 0x144c6, 0x144ca, 0x144cc, 0x144cc,
+	0x14540, 0x14548, 0x1454a, 0x1454e, 0x14550, 0x14554, 0x14556, 0x1455a,
+	0x1455c, 0x14560, 0x14562, 0x14566, 0x14568, 0x1456c, 0x1456e, 0x14572,
+	0x14574, 0x14574, 0x145e8, 0x145f0, 0x145f2, 0x145f6, 0x145f8, 0x145fc,
+	0x145fe, 0x14602, 0x14604, 0x14608, 0x1460a, 0x1460e, 0x14610, 0x14614,
+	0x14616, 0x1461a, 0x1461c, 0x1461c, 0x14690, 0x14698, 0x1469a, 0x1469e,
+	0x146a0, 0x146a4, 0x146a6, 0x146aa, 0x146ac, 0x146b0, 0x146b2, 0x146b6,
+	0x146b8, 0x146bc, 0x146be, 0x146c2, 0x146c4, 0x146c4, 0x14738, 0x14740,
+	0x14742, 0x14746, 0x14748, 0x1474c, 0x1474e, 0x14752, 0x14754, 0x14758,
+	0x1475a, 0x1475e, 0x14760, 0x14764, 0x14766, 0x1476a, 0x1476c, 0x1476c,
+	0x147e0, 0x147e8, 0x147ea, 0x147ee, 0x147f0, 0x147f4, 0x147f6, 0x147fa,
+	0x147fc, 0x14800, 0x14802, 0x14806, 0x14808, 0x1480c, 0x1480e, 0x14812,
+	0x14814, 0x14814, 0x14888, 0x14890, 0x14892, 0x14896, 0x14898, 0x1489c,
+	0x1489e, 0x148a2, 0x148a4, 0x148a8, 0x148aa, 0x148ae, 0x148b0, 0x148b4,
+	0x148b6, 0x148ba, 0x148bc, 0x148bc, 0x14930, 0x14938, 0x1493a, 0x1493e,
+	0x14940, 0x14944, 0x14946, 0x1494a, 0x1494c, 0x14950, 0x14952, 0x14956,
+	0x14958, 0x1495c, 0x1495e, 0x14962, 0x14964, 0x14964,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_11_0_rscc_registers), 8));
+
+static struct gen7_reg_list gen7_11_0_reg_list[] = {
+	{ gen7_11_0_gpu_registers, NULL },
+	{ gen7_11_0_dbgc_registers, NULL },
+	{ gen7_11_0_cx_dbgc_registers, NULL },
+	{ NULL, NULL },
+};
+
+static const u32 *gen7_11_0_external_core_regs[] = {
+	gen7_11_0_gpucc_registers,
+	gen7_11_0_cpr_registers,
+	gen7_11_0_dpm_lkg_registers,
+};
+#endif

+ 752 - 0
qcom/opensource/graphics-kernel/adreno_gen7_2_0_snapshot.h

@@ -0,0 +1,752 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN7_2_0_SNAPSHOT_H
+#define __ADRENO_GEN7_2_0_SNAPSHOT_H
+
+#include "adreno_gen7_snapshot.h"
+
+static const u32 gen7_2_0_debugbus_blocks[] = {
+	DEBUGBUS_CP_0_0,
+	DEBUGBUS_CP_0_1,
+	DEBUGBUS_RBBM,
+	DEBUGBUS_HLSQ,
+	DEBUGBUS_UCHE_0,
+	DEBUGBUS_UCHE_1,
+	DEBUGBUS_TESS_BR,
+	DEBUGBUS_TESS_BV,
+	DEBUGBUS_PC_BR,
+	DEBUGBUS_PC_BV,
+	DEBUGBUS_VFDP_BR,
+	DEBUGBUS_VFDP_BV,
+	DEBUGBUS_VPC_BR,
+	DEBUGBUS_VPC_BV,
+	DEBUGBUS_TSE_BR,
+	DEBUGBUS_TSE_BV,
+	DEBUGBUS_RAS_BR,
+	DEBUGBUS_RAS_BV,
+	DEBUGBUS_VSC,
+	DEBUGBUS_COM_0,
+	DEBUGBUS_LRZ_BR,
+	DEBUGBUS_LRZ_BV,
+	DEBUGBUS_UFC_0,
+	DEBUGBUS_UFC_1,
+	DEBUGBUS_GMU_GX,
+	DEBUGBUS_DBGC,
+	DEBUGBUS_GPC_BR,
+	DEBUGBUS_GPC_BV,
+	DEBUGBUS_LARC,
+	DEBUGBUS_HLSQ_SPTP,
+	DEBUGBUS_RB_0,
+	DEBUGBUS_RB_1,
+	DEBUGBUS_RB_2,
+	DEBUGBUS_RB_3,
+	DEBUGBUS_RB_4,
+	DEBUGBUS_RB_5,
+	DEBUGBUS_UCHE_WRAPPER,
+	DEBUGBUS_CCU_0,
+	DEBUGBUS_CCU_1,
+	DEBUGBUS_CCU_2,
+	DEBUGBUS_CCU_3,
+	DEBUGBUS_CCU_4,
+	DEBUGBUS_CCU_5,
+	DEBUGBUS_VFD_BR_0,
+	DEBUGBUS_VFD_BR_1,
+	DEBUGBUS_VFD_BR_2,
+	DEBUGBUS_VFD_BR_3,
+	DEBUGBUS_VFD_BR_4,
+	DEBUGBUS_VFD_BR_5,
+	DEBUGBUS_VFD_BV_0,
+	DEBUGBUS_VFD_BV_1,
+	DEBUGBUS_USP_0,
+	DEBUGBUS_USP_1,
+	DEBUGBUS_USP_2,
+	DEBUGBUS_USP_3,
+	DEBUGBUS_USP_4,
+	DEBUGBUS_USP_5,
+	DEBUGBUS_TP_0,
+	DEBUGBUS_TP_1,
+	DEBUGBUS_TP_2,
+	DEBUGBUS_TP_3,
+	DEBUGBUS_TP_4,
+	DEBUGBUS_TP_5,
+	DEBUGBUS_TP_6,
+	DEBUGBUS_TP_7,
+	DEBUGBUS_TP_8,
+	DEBUGBUS_TP_9,
+	DEBUGBUS_TP_10,
+	DEBUGBUS_TP_11,
+	DEBUGBUS_USPTP_0,
+	DEBUGBUS_USPTP_1,
+	DEBUGBUS_USPTP_2,
+	DEBUGBUS_USPTP_3,
+	DEBUGBUS_USPTP_4,
+	DEBUGBUS_USPTP_5,
+	DEBUGBUS_USPTP_6,
+	DEBUGBUS_USPTP_7,
+	DEBUGBUS_USPTP_8,
+	DEBUGBUS_USPTP_9,
+	DEBUGBUS_USPTP_10,
+	DEBUGBUS_USPTP_11,
+	DEBUGBUS_CCHE_0,
+	DEBUGBUS_CCHE_1,
+	DEBUGBUS_CCHE_2,
+};
+
+static struct gen7_shader_block gen7_2_0_shader_blocks[] = {
+	{TP0_TMO_DATA,                 0x200, 6, 2, PIPE_BR, USPTP},
+	{TP0_SMO_DATA,                  0x80, 6, 2, PIPE_BR, USPTP},
+	{TP0_MIPMAP_BASE_DATA,         0x3c0, 6, 2, PIPE_BR, USPTP},
+	{SP_INST_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_INST_DATA_1,               0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_0_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_1_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_2_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_3_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_4_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_5_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_6_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_7_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_CB_RAM,                    0x390, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_13_DATA,                0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_14_DATA,                0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_INST_TAG,                   0xc0, 6, 2, PIPE_BR, USPTP},
+	{SP_INST_DATA_2,               0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_TMO_TAG,                    0x80, 6, 2, PIPE_BR, USPTP},
+	{SP_SMO_TAG,                    0x80, 6, 2, PIPE_BR, USPTP},
+	{SP_STATE_DATA,                 0x40, 6, 2, PIPE_BR, USPTP},
+	{SP_HWAVE_RAM,                 0x100, 6, 2, PIPE_BR, USPTP},
+	{SP_L0_INST_BUF,                0x50, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_8_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_9_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_10_DATA,                0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_11_DATA,                0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_12_DATA,                0x800, 6, 2, PIPE_BR, USPTP},
+	{HLSQ_CVS_BE_CTXT_BUF_RAM_TAG,  0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CVS_BE_CTXT_BUF_RAM_TAG,  0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CPS_BE_CTXT_BUF_RAM_TAG,  0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM,           0x1c0, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM,           0x1c0, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM,           0x300, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM,           0x180, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM_TAG,        0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM_TAG,        0x40, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM_TAG,        0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM_TAG,        0x40, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_ICB_CVS_CB_BASE_TAG,      0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_ICB_CVS_CB_BASE_TAG,      0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_ICB_CPS_CB_BASE_TAG,      0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_ICB_CPS_CB_BASE_TAG,      0x10, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM,            0x280, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM,            0x280, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM,            0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM,            0x200, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM_1,          0x1c0, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_INST_RAM,                0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_INST_RAM,                0x800, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_INST_RAM,                0x200, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_INST_RAM_TAG,             0x80, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_INST_RAM_TAG,             0x80, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_INST_RAM_TAG,             0x80, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM_TAG,    0x64, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM_TAG,    0x38, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM_TAG,    0x64, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM_TAG,    0x10, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM,       0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM,       0x800, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM,       0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM,       0x800, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_INST_RAM_1,              0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_STPROC_META,              0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BV_BE_META,               0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BV_BE_META,               0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_DATAPATH_META,            0x20, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_FRONTEND_META,            0x80, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_FRONTEND_META,            0x80, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_FRONTEND_META,            0x80, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_INDIRECT_META,            0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BACKEND_META,             0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BACKEND_META,             0x40, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_BACKEND_META,             0x40, 1, 1, PIPE_LPAC, HLSQ_STATE},
+};
+
+static const u32 gen7_2_0_gpu_registers[] = {
+	0x00000, 0x00000, 0x00002, 0x00002, 0x00011, 0x00012, 0x00016, 0x0001b,
+	0x0001f, 0x00032, 0x00038, 0x0003c, 0x00042, 0x00042, 0x00044, 0x00044,
+	0x00047, 0x00047, 0x00049, 0x0004a, 0x0004c, 0x0004c, 0x00050, 0x00050,
+	0x00056, 0x00056, 0x00073, 0x0007d, 0x000ad, 0x000ae, 0x000b0, 0x000b0,
+	0x000b4, 0x000b4, 0x000b8, 0x000b8, 0x000bc, 0x000bc, 0x000c0, 0x000c0,
+	0x000c4, 0x000c4, 0x000c8, 0x000c8, 0x000cc, 0x000cc, 0x000d0, 0x000d0,
+	0x000d4, 0x000d4, 0x000d8, 0x000d8, 0x000dc, 0x000dc, 0x000e0, 0x000e0,
+	0x000e4, 0x000e4, 0x000e8, 0x000e8, 0x000ec, 0x000ec, 0x000f0, 0x000f0,
+	0x000f4, 0x000f4, 0x000f8, 0x000f8, 0x00100, 0x00100, 0x00104, 0x0010c,
+	0x0010f, 0x0011d, 0x0012f, 0x0012f, 0x00200, 0x0020d, 0x00211, 0x00211,
+	0x00215, 0x00253, 0x00260, 0x00270, 0x00272, 0x00274, 0x00281, 0x0028d,
+	0x00300, 0x00401, 0x00410, 0x00451, 0x00460, 0x004a3, 0x004c0, 0x004d1,
+	0x00500, 0x00500, 0x00507, 0x0050b, 0x0050f, 0x0050f, 0x00511, 0x00511,
+	0x00533, 0x00536, 0x00540, 0x00555, 0x00564, 0x00567, 0x00574, 0x00577,
+	0x00584, 0x0059b, 0x005fb, 0x005ff, 0x00800, 0x00808, 0x00810, 0x00813,
+	0x00820, 0x00821, 0x00823, 0x00827, 0x00830, 0x00834, 0x0083f, 0x00841,
+	0x00843, 0x00847, 0x0084f, 0x00886, 0x008a0, 0x008ab, 0x008c0, 0x008c0,
+	0x008c4, 0x008c6, 0x008d0, 0x008dd, 0x008e0, 0x008e6, 0x008f0, 0x008f3,
+	0x00900, 0x00903, 0x00908, 0x00911, 0x00928, 0x0093e, 0x00942, 0x0094d,
+	0x00980, 0x00984, 0x0098d, 0x0098f, 0x009b0, 0x009b4, 0x009c2, 0x009c9,
+	0x009ce, 0x009d7, 0x009e0, 0x009e7, 0x00a00, 0x00a00, 0x00a02, 0x00a03,
+	0x00a10, 0x00a4f, 0x00a61, 0x00a9f, 0x00ad0, 0x00adb, 0x00b00, 0x00b31,
+	0x00b35, 0x00b3c, 0x00b40, 0x00b40, 0x00c00, 0x00c00, 0x00c02, 0x00c04,
+	0x00c06, 0x00c06, 0x00c10, 0x00cd9, 0x00ce0, 0x00d0c, 0x00df0, 0x00df4,
+	0x00e01, 0x00e02, 0x00e07, 0x00e0e, 0x00e10, 0x00e13, 0x00e17, 0x00e19,
+	0x00e1b, 0x00e2b, 0x00e30, 0x00e32, 0x00e38, 0x00e3c, 0x00e40, 0x00e4b,
+	0x0ec00, 0x0ec01, 0x0ec05, 0x0ec05, 0x0ec07, 0x0ec07, 0x0ec0a, 0x0ec0a,
+	0x0ec12, 0x0ec12, 0x0ec26, 0x0ec28, 0x0ec2b, 0x0ec2d, 0x0ec2f, 0x0ec2f,
+	0x0ec40, 0x0ec41, 0x0ec45, 0x0ec45, 0x0ec47, 0x0ec47, 0x0ec4a, 0x0ec4a,
+	0x0ec52, 0x0ec52, 0x0ec66, 0x0ec68, 0x0ec6b, 0x0ec6d, 0x0ec6f, 0x0ec6f,
+	0x0ec80, 0x0ec81, 0x0ec85, 0x0ec85, 0x0ec87, 0x0ec87, 0x0ec8a, 0x0ec8a,
+	0x0ec92, 0x0ec92, 0x0eca6, 0x0eca8, 0x0ecab, 0x0ecad, 0x0ecaf, 0x0ecaf,
+	0x0ecc0, 0x0ecc1, 0x0ecc5, 0x0ecc5, 0x0ecc7, 0x0ecc7, 0x0ecca, 0x0ecca,
+	0x0ecd2, 0x0ecd2, 0x0ece6, 0x0ece8, 0x0eceb, 0x0eced, 0x0ecef, 0x0ecef,
+	0x0ed00, 0x0ed01, 0x0ed05, 0x0ed05, 0x0ed07, 0x0ed07, 0x0ed0a, 0x0ed0a,
+	0x0ed12, 0x0ed12, 0x0ed26, 0x0ed28, 0x0ed2b, 0x0ed2d, 0x0ed2f, 0x0ed2f,
+	0x0ed40, 0x0ed41, 0x0ed45, 0x0ed45, 0x0ed47, 0x0ed47, 0x0ed4a, 0x0ed4a,
+	0x0ed52, 0x0ed52, 0x0ed66, 0x0ed68, 0x0ed6b, 0x0ed6d, 0x0ed6f, 0x0ed6f,
+	0x0ed80, 0x0ed81, 0x0ed85, 0x0ed85, 0x0ed87, 0x0ed87, 0x0ed8a, 0x0ed8a,
+	0x0ed92, 0x0ed92, 0x0eda6, 0x0eda8, 0x0edab, 0x0edad, 0x0edaf, 0x0edaf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_gpu_registers), 8));
+
+static const u32 gen7_2_0_gmu_registers[] = {
+	0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403,
+	0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03,
+	0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403,
+	0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03,
+	0x1a79b, 0x1a79b, 0x1a7ac, 0x1a7b9, 0x1a7dc, 0x1a7dd, 0x1a7e0, 0x1a7e1,
+	0x1a803, 0x1a803, 0x1a805, 0x1a806, 0x1a84e, 0x1a84e, 0x1a856, 0x1a856,
+	0x1f400, 0x1f40d, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507,
+	0x1f509, 0x1f50b, 0x1f700, 0x1f701, 0x1f704, 0x1f706, 0x1f708, 0x1f709,
+	0x1f70c, 0x1f70d, 0x1f710, 0x1f711, 0x1f713, 0x1f716, 0x1f720, 0x1f724,
+	0x1f729, 0x1f729, 0x1f730, 0x1f747, 0x1f760, 0x1f761, 0x1f764, 0x1f76b,
+	0x1f800, 0x1f804, 0x1f807, 0x1f808, 0x1f80b, 0x1f80c, 0x1f80f, 0x1f80f,
+	0x1f811, 0x1f811, 0x1f813, 0x1f817, 0x1f819, 0x1f81c, 0x1f824, 0x1f82a,
+	0x1f82d, 0x1f830, 0x1f840, 0x1f853, 0x1f860, 0x1f860, 0x1f862, 0x1f864,
+	0x1f868, 0x1f868, 0x1f870, 0x1f879, 0x1f87f, 0x1f87f, 0x1f888, 0x1f889,
+	0x1f8a0, 0x1f8a2, 0x1f890, 0x1f892, 0x1f894, 0x1f896, 0x1f8a4, 0x1f8af,
+	0x1f8b8, 0x1f8b9, 0x1f8c0, 0x1f8c1, 0x1f8c3, 0x1f8c4, 0x1f8d0, 0x1f8d0,
+	0x1f8ec, 0x1f8ec, 0x1f8f0, 0x1f8f1, 0x1f910, 0x1f917, 0x1f920, 0x1f921,
+	0x1f924, 0x1f925, 0x1f928, 0x1f929, 0x1f92c, 0x1f92d, 0x1f940, 0x1f940,
+	0x1f942, 0x1f944, 0x1f948, 0x1f94a, 0x1f94f, 0x1f951, 0x1f954, 0x1f955,
+	0x1f958, 0x1f95a, 0x1f95d, 0x1f95d, 0x1f962, 0x1f96b, 0x1f970, 0x1f979,
+	0x1f97c, 0x1f97c, 0x1f980, 0x1f981, 0x1f984, 0x1f986, 0x1f992, 0x1f993,
+	0x1f996, 0x1f99e, 0x1f9c0, 0x1f9c0, 0x1f9c5, 0x1f9d4, 0x1f9f0, 0x1f9f1,
+	0x1f9f8, 0x1f9fa, 0x1f9fc, 0x1f9fc, 0x1fa00, 0x1fa03, 0x20000, 0x20012,
+	0x20018, 0x20018, 0x2001a, 0x2001a, 0x20020, 0x20024, 0x20030, 0x20031,
+	0x20034, 0x20036, 0x23801, 0x23801, 0x23803, 0x23803, 0x23805, 0x23805,
+	0x23807, 0x23807, 0x23809, 0x23809, 0x2380b, 0x2380b, 0x2380d, 0x2380d,
+	0x2380f, 0x2380f, 0x23811, 0x23811, 0x23813, 0x23813, 0x23815, 0x23815,
+	0x23817, 0x23817, 0x23819, 0x23819, 0x2381b, 0x2381b, 0x2381d, 0x2381d,
+	0x2381f, 0x23820, 0x23822, 0x23822, 0x23824, 0x23824, 0x23826, 0x23826,
+	0x23828, 0x23828, 0x2382a, 0x2382a, 0x2382c, 0x2382c, 0x2382e, 0x2382e,
+	0x23830, 0x23830, 0x23832, 0x23832, 0x23834, 0x23834, 0x23836, 0x23836,
+	0x23838, 0x23838, 0x2383a, 0x2383a, 0x2383c, 0x2383c, 0x2383e, 0x2383e,
+	0x23840, 0x23847, 0x23b00, 0x23b01, 0x23b03, 0x23b03, 0x23b05, 0x23b0e,
+	0x23b10, 0x23b13, 0x23b15, 0x23b16, 0x23b20, 0x23b20, 0x23b28, 0x23b28,
+	0x23b30, 0x23b30,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_gmu_registers), 8));
+
+static const u32 gen7_2_0_gmugx_registers[] = {
+	0x1a400, 0x1a41f, 0x1a440, 0x1a45f, 0x1a480, 0x1a49f, 0x1a4c0, 0x1a4df,
+	0x1a500, 0x1a51f, 0x1a540, 0x1a55f, 0x1a580, 0x1a59f, 0x1a5c0, 0x1a5df,
+	0x1a600, 0x1a61f, 0x1a640, 0x1a65f, 0x1a780, 0x1a781, 0x1a783, 0x1a785,
+	0x1a787, 0x1a789, 0x1a78b, 0x1a78d, 0x1a78f, 0x1a791, 0x1a793, 0x1a795,
+	0x1a797, 0x1a799, 0x1a79c, 0x1a79d, 0x1a79f, 0x1a79f, 0x1a7a0, 0x1a7a1,
+	0x1a7a3, 0x1a7a3, 0x1a7a8, 0x1a7ab, 0x1a7c0, 0x1a7c1, 0x1a7c4, 0x1a7c5,
+	0x1a7c8, 0x1a7c9, 0x1a7cc, 0x1a7cd, 0x1a7d0, 0x1a7d1, 0x1a7d4, 0x1a7d5,
+	0x1a7d8, 0x1a7d9, 0x1a7fc, 0x1a7fd, 0x1a800, 0x1a802, 0x1a804, 0x1a804,
+	0x1a816, 0x1a816, 0x1a81e, 0x1a81e, 0x1a826, 0x1a826, 0x1a82e, 0x1a82e,
+	0x1a836, 0x1a836, 0x1a83e, 0x1a83e, 0x1a846, 0x1a846, 0x1a860, 0x1a862,
+	0x1a864, 0x1a867, 0x1a870, 0x1a870, 0x1a883, 0x1a884, 0x1a8c0, 0x1a8c2,
+	0x1a8c4, 0x1a8c7, 0x1a8d0, 0x1a8d3, 0x1a900, 0x1a92b, 0x1a940, 0x1a940,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_gmugx_registers), 8));
+
+static const u32 gen7_2_0_noncontext_pipe_br_registers[] = {
+	0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b,
+	0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640,
+	0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a,
+	0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16,
+	0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31,
+	0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79,
+	0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f,
+	0x0a630, 0x0a631, 0x0a638, 0x0a63c,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_noncontext_pipe_br_registers), 8));
+
+static const u32 gen7_2_0_noncontext_pipe_bv_registers[] = {
+	0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b,
+	0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640,
+	0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a,
+	0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16,
+	0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31,
+	0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79,
+	0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f,
+	0x0a630, 0x0a631, 0x0a638, 0x0a63c,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_noncontext_pipe_bv_registers), 8));
+
+static const u32 gen7_2_0_noncontext_rb_rac_pipe_br_registers[] = {
+	0x08e10, 0x08e1c, 0x08e20, 0x08e25, 0x08e51, 0x08e5a, 0x08ea0, 0x08ea3,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_noncontext_rb_rac_pipe_br_registers), 8));
+
+static const u32 gen7_2_0_noncontext_rb_rbp_pipe_br_registers[] = {
+	0x08e01, 0x08e01, 0x08e04, 0x08e04, 0x08e06, 0x08e09, 0x08e0c, 0x08e0c,
+	0x08e28, 0x08e28, 0x08e2c, 0x08e35, 0x08e3b, 0x08e40, 0x08e50, 0x08e50,
+	0x08e5b, 0x08e5d, 0x08e5f, 0x08e5f, 0x08e61, 0x08e61, 0x08e63, 0x08e66,
+	0x08e68, 0x08e69, 0x08e70, 0x08e79, 0x08e80, 0x08e8f,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_noncontext_rb_rbp_pipe_br_registers), 8));
+
+static const u32 gen7_2_0_gras_cluster_gras_pipe_br_registers[] = {
+	0x08000, 0x0800c, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d,
+	0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa,
+	0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08113, 0x08120, 0x0813f,
+	0x08400, 0x08406, 0x0840a, 0x0840b,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_gras_cluster_gras_pipe_br_registers), 8));
+
+static const u32 gen7_2_0_gras_cluster_gras_pipe_bv_registers[] = {
+	0x08000, 0x0800c, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d,
+	0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa,
+	0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08113, 0x08120, 0x0813f,
+	0x08400, 0x08406, 0x0840a, 0x0840b,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_gras_cluster_gras_pipe_bv_registers), 8));
+
+static const u32 gen7_2_0_rb_rac_cluster_ps_pipe_br_registers[] = {
+	0x08802, 0x08802, 0x08804, 0x08806, 0x08809, 0x0880a, 0x0880e, 0x08811,
+	0x08818, 0x0881e, 0x08821, 0x08821, 0x08823, 0x08826, 0x08829, 0x08829,
+	0x0882b, 0x0882e, 0x08831, 0x08831, 0x08833, 0x08836, 0x08839, 0x08839,
+	0x0883b, 0x0883e, 0x08841, 0x08841, 0x08843, 0x08846, 0x08849, 0x08849,
+	0x0884b, 0x0884e, 0x08851, 0x08851, 0x08853, 0x08856, 0x08859, 0x08859,
+	0x0885b, 0x0885e, 0x08860, 0x08864, 0x08870, 0x08870, 0x08873, 0x08876,
+	0x08878, 0x08879, 0x08882, 0x08885, 0x08887, 0x08889, 0x08891, 0x08891,
+	0x08898, 0x08899, 0x088c0, 0x088c1, 0x088e5, 0x088e5, 0x088f4, 0x088f5,
+	0x08a00, 0x08a05, 0x08a10, 0x08a15, 0x08a20, 0x08a25, 0x08a30, 0x08a35,
+	0x08c00, 0x08c01, 0x08c18, 0x08c1f, 0x08c26, 0x08c34,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_rb_rac_cluster_ps_pipe_br_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers[] = {
+	0x0a980, 0x0a984, 0x0a99e, 0x0a99e, 0x0a9a7, 0x0a9a7, 0x0a9aa, 0x0a9aa,
+	0x0a9ae, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc,
+	0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e0, 0x0a9fc, 0x0aa00, 0x0aa00,
+	0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05,
+	0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_ps_pipe_br_sp_top_registers[] = {
+	0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a9a2, 0x0a9a7, 0x0a9a8,
+	0x0a9aa, 0x0a9aa, 0x0a9ae, 0x0a9ae, 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5,
+	0x0a9ba, 0x0a9bc, 0x0a9c5, 0x0a9c5, 0x0a9e0, 0x0a9f9, 0x0aa00, 0x0aa01,
+	0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab0a, 0x0ab1b,
+	0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_ps_pipe_br_sp_top_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_ps_pipe_br_usptp_registers[] = {
+	0x0a980, 0x0a982, 0x0a985, 0x0a9a6, 0x0a9a8, 0x0a9a9, 0x0a9ab, 0x0a9ae,
+	0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9bf, 0x0a9c2, 0x0a9c3,
+	0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa01, 0x0aa01,
+	0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22,
+	0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_ps_pipe_br_usptp_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers[] = {
+	0x0a9b0, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc,
+	0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9fc,
+	0x0aa00, 0x0aa00, 0x0aa31, 0x0aa31, 0x0ab00, 0x0ab01,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers[] = {
+	0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9bc, 0x0a9c5, 0x0a9c5,
+	0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9f9, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers[] = {
+	0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9be, 0x0a9c2, 0x0a9c3,
+	0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa31, 0x0aa31,
+	0x0ab00, 0x0ab01,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers[] = {
+	0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a,
+	0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872,
+	0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898,
+	0x0a89a, 0x0a89d, 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab03,
+	0x0ab05, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_vs_pipe_br_sp_top_registers[] = {
+	0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a82d, 0x0a82d,
+	0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840,
+	0x0a85c, 0x0a85d, 0x0a862, 0x0a864, 0x0a868, 0x0a868, 0x0a870, 0x0a871,
+	0x0a88d, 0x0a88e, 0x0a893, 0x0a895, 0x0a899, 0x0a899, 0x0a8a0, 0x0a8af,
+	0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab0a, 0x0ab1b,
+	0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_vs_pipe_br_sp_top_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_vs_pipe_br_usptp_registers[] = {
+	0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d,
+	0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861,
+	0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899,
+	0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_vs_pipe_br_usptp_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers[] = {
+	0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a,
+	0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872,
+	0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898,
+	0x0a89a, 0x0a89d, 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab02,
+	0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers[] = {
+	0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a82d, 0x0a82d,
+	0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840,
+	0x0a85c, 0x0a85d, 0x0a862, 0x0a864, 0x0a868, 0x0a868, 0x0a870, 0x0a871,
+	0x0a88d, 0x0a88e, 0x0a893, 0x0a895, 0x0a899, 0x0a899, 0x0a8a0, 0x0a8af,
+	0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_vs_pipe_bv_usptp_registers[] = {
+	0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d,
+	0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861,
+	0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899,
+	0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab02, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_vs_pipe_bv_usptp_registers), 8));
+
+static const u32 gen7_2_0_sp_noncontext_pipe_lpac_hlsq_state_registers[] = {
+	0x0af88, 0x0af8b,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_noncontext_pipe_lpac_hlsq_state_registers), 8));
+
+static const struct gen7_sel_reg  gen7_2_0_rb_rac_sel = {
+	.host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0x0,
+};
+
+static const struct gen7_sel_reg gen7_2_0_rb_rbp_sel = {
+	.host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0x9,
+};
+
+static struct gen7_cluster_registers gen7_2_0_clusters[] = {
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_2_0_noncontext_pipe_br_registers, },
+	{ CLUSTER_NONE, PIPE_BV, STATE_NON_CONTEXT,
+		gen7_2_0_noncontext_pipe_bv_registers, },
+	{ CLUSTER_NONE, PIPE_LPAC, STATE_NON_CONTEXT,
+		gen7_0_0_noncontext_pipe_lpac_registers, },
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_2_0_noncontext_rb_rac_pipe_br_registers, &gen7_2_0_rb_rac_sel, },
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_2_0_noncontext_rb_rbp_pipe_br_registers, &gen7_2_0_rb_rbp_sel, },
+	{ CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_2_0_gras_cluster_gras_pipe_br_registers, },
+	{ CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_2_0_gras_cluster_gras_pipe_bv_registers, },
+	{ CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_2_0_gras_cluster_gras_pipe_br_registers, },
+	{ CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_2_0_gras_cluster_gras_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_pc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_pc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_pc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_pc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_2_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_2_0_rb_rac_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_2_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_2_0_rb_rac_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_2_0_rb_rbp_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_2_0_rb_rbp_sel, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vfd_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vfd_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vfd_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vfd_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, },
+	{ CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers, },
+	{ CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, },
+	{ CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, },
+};
+
+static struct gen7_sptp_cluster_registers gen7_2_0_sptp_clusters[] = {
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, SP_TOP,
+		gen7_0_0_sp_noncontext_pipe_br_sp_top_registers, 0xae00 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_sp_noncontext_pipe_br_usptp_registers, 0xae00 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen7_2_0_sp_noncontext_pipe_lpac_hlsq_state_registers, 0xaf80 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, SP_TOP,
+		gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers, 0xaf80 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers, 0xaf80 },
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_tpl1_noncontext_pipe_br_registers, 0xb600 },
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_tpl1_noncontext_pipe_lpac_registers, 0xb780 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_bv_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_bv_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 },
+};
+
+static const u32 gen7_2_0_dbgc_registers[] = {
+	0x005ff, 0x0061c, 0x0061e, 0x00634, 0x00640, 0x0065e, 0x00679, 0x0067e,
+	0x00699, 0x00699, 0x0069b, 0x0069e, 0x006a0, 0x006a3, 0x006c0, 0x006c1,
+	0x18400, 0x1841c, 0x1841e, 0x18434, 0x18440, 0x1845c, 0x18479, 0x1847c,
+	0x18580, 0x18581,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_dbgc_registers), 8));
+
+static const u32 gen7_2_0_rscc_registers[] = {
+	0x14000, 0x14036, 0x14040, 0x14047, 0x14080, 0x14084, 0x14089, 0x1408c,
+	0x14091, 0x14094, 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac,
+	0x14100, 0x14104, 0x14114, 0x14119, 0x14124, 0x14132, 0x14154, 0x1416b,
+	0x14340, 0x14342, 0x14344, 0x1437c, 0x143f0, 0x143f8, 0x143fa, 0x143fe,
+	0x14400, 0x14404, 0x14406, 0x1440a, 0x1440c, 0x14410, 0x14412, 0x14416,
+	0x14418, 0x1441c, 0x1441e, 0x14422, 0x14424, 0x14424, 0x14498, 0x144a0,
+	0x144a2, 0x144a6, 0x144a8, 0x144ac, 0x144ae, 0x144b2, 0x144b4, 0x144b8,
+	0x144ba, 0x144be, 0x144c0, 0x144c4, 0x144c6, 0x144ca, 0x144cc, 0x144cc,
+	0x14540, 0x14548, 0x1454a, 0x1454e, 0x14550, 0x14554, 0x14556, 0x1455a,
+	0x1455c, 0x14560, 0x14562, 0x14566, 0x14568, 0x1456c, 0x1456e, 0x14572,
+	0x14574, 0x14574, 0x145e8, 0x145f0, 0x145f2, 0x145f6, 0x145f8, 0x145fc,
+	0x145fe, 0x14602, 0x14604, 0x14608, 0x1460a, 0x1460e, 0x14610, 0x14614,
+	0x14616, 0x1461a, 0x1461c, 0x1461c, 0x14690, 0x14698, 0x1469a, 0x1469e,
+	0x146a0, 0x146a4, 0x146a6, 0x146aa, 0x146ac, 0x146b0, 0x146b2, 0x146b6,
+	0x146b8, 0x146bc, 0x146be, 0x146c2, 0x146c4, 0x146c4, 0x14738, 0x14740,
+	0x14742, 0x14746, 0x14748, 0x1474c, 0x1474e, 0x14752, 0x14754, 0x14758,
+	0x1475a, 0x1475e, 0x14760, 0x14764, 0x14766, 0x1476a, 0x1476c, 0x1476c,
+	0x147e0, 0x147e8, 0x147ea, 0x147ee, 0x147f0, 0x147f4, 0x147f6, 0x147fa,
+	0x147fc, 0x14800, 0x14802, 0x14806, 0x14808, 0x1480c, 0x1480e, 0x14812,
+	0x14814, 0x14814, 0x14888, 0x14890, 0x14892, 0x14896, 0x14898, 0x1489c,
+	0x1489e, 0x148a2, 0x148a4, 0x148a8, 0x148aa, 0x148ae, 0x148b0, 0x148b4,
+	0x148b6, 0x148ba, 0x148bc, 0x148bc, 0x14930, 0x14938, 0x1493a, 0x1493e,
+	0x14940, 0x14944, 0x14946, 0x1494a, 0x1494c, 0x14950, 0x14952, 0x14956,
+	0x14958, 0x1495c, 0x1495e, 0x14962, 0x14964, 0x14964,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_rscc_registers), 8));
+
+static const u32 gen7_2_0_cpr_registers[] = {
+	0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c,
+	0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850,
+	0x26880, 0x2689e, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee,
+	0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f,
+	0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274ad,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_cpr_registers), 8));
+
+static const u32 gen7_2_0_dpm_lkg_registers[] = {
+	0x21c00, 0x21c00, 0x21c08, 0x21c09, 0x21c0e, 0x21c0f, 0x21c4f, 0x21c50,
+	0x21c52, 0x21c52, 0x21c54, 0x21c56, 0x21c58, 0x21c5a, 0x21c5c, 0x21c60,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_dpm_lkg_registers), 8));
+
+static const u32 gen7_2_0_gpucc_registers[] = {
+	0x24000, 0x2400f, 0x24400, 0x2440f, 0x24800, 0x24805, 0x24c00, 0x24cff,
+	0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004,
+	0x26400, 0x26405, 0x26414, 0x2641d, 0x2642a, 0x26430, 0x26432, 0x26433,
+	0x26441, 0x2644b, 0x2644d, 0x26457, 0x26466, 0x26468, 0x26478, 0x2647a,
+	0x26489, 0x2648a, 0x2649c, 0x2649e, 0x264a0, 0x264a4, 0x264c5, 0x264c7,
+	0x264d6, 0x264d8, 0x264e8, 0x264e9, 0x264f9, 0x264fc, 0x2651c, 0x2651e,
+	0x26540, 0x26576, 0x26600, 0x26616, 0x26620, 0x2662d, 0x26630, 0x26631,
+	0x26635, 0x26635, 0x26637, 0x26637, 0x2663a, 0x2663a, 0x26642, 0x26642,
+	0x26656, 0x26658, 0x2665b, 0x2665d, 0x2665f, 0x26662,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_gpucc_registers), 8));
+
+static const u32 gen7_2_0_cx_misc_registers[] = {
+	0x27800, 0x27800, 0x27810, 0x27814, 0x27820, 0x27824, 0x27832, 0x27857,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_cx_misc_registers), 8));
+
+static const u32 gen7_2_0_dpm_registers[] = {
+	0x1aa00, 0x1aa06, 0x1aa09, 0x1aa0a, 0x1aa0c, 0x1aa0d, 0x1aa0f, 0x1aa12,
+	0x1aa14, 0x1aa47, 0x1aa50, 0x1aa51,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_dpm_registers), 8));
+
+static struct gen7_reg_list gen7_2_0_reg_list[] = {
+	{ gen7_2_0_gpu_registers, NULL },
+	{ gen7_2_0_dpm_registers, NULL },
+	{ gen7_2_0_dbgc_registers, NULL },
+	{ NULL, NULL },
+};
+
+static const u32 *gen7_2_0_external_core_regs[] = {
+	gen7_2_0_gpucc_registers,
+	gen7_2_0_cpr_registers,
+	gen7_2_0_dpm_lkg_registers,
+};
+#endif /*_ADRENO_GEN7_2_0_SNAPSHOT_H */

+ 1425 - 0
qcom/opensource/graphics-kernel/adreno_gen7_9_0_snapshot.h

@@ -0,0 +1,1425 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN7_9_0_SNAPSHOT_H
+#define __ADRENO_GEN7_9_0_SNAPSHOT_H
+
+#include "adreno_gen7_snapshot.h"
+
+static const u32 gen7_9_0_debugbus_blocks[] = {
+	DEBUGBUS_CP_0_0,
+	DEBUGBUS_CP_0_1,
+	DEBUGBUS_RBBM,
+	DEBUGBUS_HLSQ,
+	DEBUGBUS_UCHE_0,
+	DEBUGBUS_UCHE_1,
+	DEBUGBUS_TESS_BR,
+	DEBUGBUS_TESS_BV,
+	DEBUGBUS_PC_BR,
+	DEBUGBUS_PC_BV,
+	DEBUGBUS_VFDP_BR,
+	DEBUGBUS_VFDP_BV,
+	DEBUGBUS_VPC_BR,
+	DEBUGBUS_VPC_BV,
+	DEBUGBUS_TSE_BR,
+	DEBUGBUS_TSE_BV,
+	DEBUGBUS_RAS_BR,
+	DEBUGBUS_RAS_BV,
+	DEBUGBUS_VSC,
+	DEBUGBUS_COM_0,
+	DEBUGBUS_LRZ_BR,
+	DEBUGBUS_LRZ_BV,
+	DEBUGBUS_UFC_0,
+	DEBUGBUS_UFC_1,
+	DEBUGBUS_GMU_GX,
+	DEBUGBUS_DBGC,
+	DEBUGBUS_GPC_BR,
+	DEBUGBUS_GPC_BV,
+	DEBUGBUS_LARC,
+	DEBUGBUS_HLSQ_SPTP,
+	DEBUGBUS_RB_0,
+	DEBUGBUS_RB_1,
+	DEBUGBUS_RB_2,
+	DEBUGBUS_RB_3,
+	DEBUGBUS_RB_4,
+	DEBUGBUS_RB_5,
+	DEBUGBUS_UCHE_WRAPPER,
+	DEBUGBUS_CCU_0,
+	DEBUGBUS_CCU_1,
+	DEBUGBUS_CCU_2,
+	DEBUGBUS_CCU_3,
+	DEBUGBUS_CCU_4,
+	DEBUGBUS_CCU_5,
+	DEBUGBUS_VFD_BR_0,
+	DEBUGBUS_VFD_BR_1,
+	DEBUGBUS_VFD_BR_2,
+	DEBUGBUS_VFD_BV_0,
+	DEBUGBUS_VFD_BV_1,
+	DEBUGBUS_VFD_BV_2,
+	DEBUGBUS_USP_0,
+	DEBUGBUS_USP_1,
+	DEBUGBUS_USP_2,
+	DEBUGBUS_USP_3,
+	DEBUGBUS_USP_4,
+	DEBUGBUS_USP_5,
+	DEBUGBUS_TP_0,
+	DEBUGBUS_TP_1,
+	DEBUGBUS_TP_2,
+	DEBUGBUS_TP_3,
+	DEBUGBUS_TP_4,
+	DEBUGBUS_TP_5,
+	DEBUGBUS_TP_6,
+	DEBUGBUS_TP_7,
+	DEBUGBUS_TP_8,
+	DEBUGBUS_TP_9,
+	DEBUGBUS_TP_10,
+	DEBUGBUS_TP_11,
+	DEBUGBUS_USPTP_0,
+	DEBUGBUS_USPTP_1,
+	DEBUGBUS_USPTP_2,
+	DEBUGBUS_USPTP_3,
+	DEBUGBUS_USPTP_4,
+	DEBUGBUS_USPTP_5,
+	DEBUGBUS_USPTP_6,
+	DEBUGBUS_USPTP_7,
+	DEBUGBUS_USPTP_8,
+	DEBUGBUS_USPTP_9,
+	DEBUGBUS_USPTP_10,
+	DEBUGBUS_USPTP_11,
+	DEBUGBUS_CCHE_0,
+	DEBUGBUS_CCHE_1,
+	DEBUGBUS_CCHE_2,
+	DEBUGBUS_VPC_DSTR_0,
+	DEBUGBUS_VPC_DSTR_1,
+	DEBUGBUS_VPC_DSTR_2,
+	DEBUGBUS_HLSQ_DP_STR_0,
+	DEBUGBUS_HLSQ_DP_STR_1,
+	DEBUGBUS_HLSQ_DP_STR_2,
+	DEBUGBUS_HLSQ_DP_STR_3,
+	DEBUGBUS_HLSQ_DP_STR_4,
+	DEBUGBUS_HLSQ_DP_STR_5,
+	DEBUGBUS_UFC_DSTR_0,
+	DEBUGBUS_UFC_DSTR_1,
+	DEBUGBUS_UFC_DSTR_2,
+	DEBUGBUS_CGC_SUBCORE,
+	DEBUGBUS_CGC_CORE,
+};
+
+static const u32 gen7_9_0_gbif_debugbus_blocks[] = {
+	DEBUGBUS_GBIF_GX,
+};
+
+static const u32 gen7_9_0_cx_debugbus_blocks[] = {
+	DEBUGBUS_CX,
+	DEBUGBUS_GMU_CX,
+	DEBUGBUS_GBIF_CX,
+};
+
+static struct gen7_shader_block gen7_9_0_shader_blocks[] = {
+	{ TP0_TMO_DATA, 0x0200, 6, 2, PIPE_BR, USPTP },
+	{ TP0_SMO_DATA, 0x0080, 6, 2, PIPE_BR, USPTP },
+	{ TP0_MIPMAP_BASE_DATA, 0x03C0, 6, 2, PIPE_BR, USPTP },
+	{ SP_INST_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_INST_DATA_1, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_0_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_1_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_2_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_3_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_4_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_5_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_6_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_7_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_CB_RAM, 0x0390, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_13_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_14_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_INST_TAG, 0x00C0, 6, 2, PIPE_BR, USPTP },
+	{ SP_INST_DATA_2, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_TMO_TAG, 0x0080, 6, 2, PIPE_BR, USPTP },
+	{ SP_SMO_TAG, 0x0080, 6, 2, PIPE_BR, USPTP },
+	{ SP_STATE_DATA, 0x0040, 6, 2, PIPE_BR, USPTP },
+	{ SP_HWAVE_RAM, 0x0100, 6, 2, PIPE_BR, USPTP },
+	{ SP_L0_INST_BUF, 0x0050, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_8_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_9_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_10_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_11_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ SP_LB_12_DATA, 0x0800, 6, 2, PIPE_BR, USPTP },
+	{ HLSQ_DATAPATH_DSTR_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_DATAPATH_DSTR_META, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_L2STC_TAG_RAM, 0x0200, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_L2STC_INFO_CMD, 0x0474, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_CHUNK_CPS_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CHUNK_CPS_RAM, 0x0180, 1, 1, PIPE_LPAC, HLSQ_STATE },
+	{ HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_LPAC, HLSQ_STATE },
+	{ HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_LPAC, HLSQ_STATE },
+	{ HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_CPS_MISC_RAM, 0x0640, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CPS_MISC_RAM, 0x00B0, 1, 1, PIPE_LPAC, HLSQ_STATE },
+	{ HLSQ_CPS_MISC_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_INST_RAM, 0x0200, 1, 1, PIPE_LPAC, HLSQ_STATE },
+	{ HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_LPAC, HLSQ_STATE },
+	{ HLSQ_CVS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CVS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_CPS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_CPS_MISC_RAM_TAG, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE },
+	{ HLSQ_INST_RAM_TAG, 0x0014, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_INST_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_INST_RAM_TAG, 0x0004, 1, 1, PIPE_LPAC, HLSQ_STATE },
+	{ HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE },
+	{ HLSQ_GFX_LOCAL_MISC_RAM, 0x03C0, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_GFX_LOCAL_MISC_RAM, 0x0280, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_GFX_LOCAL_MISC_RAM, 0x0050, 1, 1, PIPE_LPAC, HLSQ_STATE },
+	{ HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0008, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_INST_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_STPROC_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_BV_BE_META, 0x0018, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_BV_BE_META, 0x0018, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_INST_RAM_2, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_DATAPATH_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_LPAC, HLSQ_STATE },
+	{ HLSQ_INDIRECT_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_BACKEND_META, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE },
+	{ HLSQ_BACKEND_META, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE },
+	{ HLSQ_BACKEND_META, 0x0040, 1, 1, PIPE_LPAC, HLSQ_STATE },
+};
+
+/*
+ * Block   : ['PRE_CRASHDUMPER', 'GBIF']
+ * pairs   : 2 (Regs:5), 5 (Regs:38)
+ */
+static const u32 gen7_9_0_pre_crashdumper_gpu_registers[] = {
+	 0x00210, 0x00213, 0x00536, 0x00536, 0x03c00, 0x03c0b, 0x03c40, 0x03c42,
+	 0x03c45, 0x03c47, 0x03c49, 0x03c4a, 0x03cc0, 0x03cd1,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_pre_crashdumper_gpu_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'CP', 'GRAS', 'GXCLKCTL']
+ * Block   : ['PC', 'RBBM', 'RDVM', 'UCHE']
+ * Block   : ['VFD', 'VPC', 'VSC']
+ * Pipeline: PIPE_NONE
+ * pairs   : 196 (Regs:1778)
+ */
+static const u32 gen7_9_0_gpu_registers[] = {
+	 0x00000, 0x00000, 0x00002, 0x00002, 0x00011, 0x00012, 0x00016, 0x0001b,
+	 0x0001f, 0x00032, 0x00038, 0x0003c, 0x00044, 0x00044, 0x00047, 0x00047,
+	 0x00049, 0x0004a, 0x0004c, 0x0004c, 0x00056, 0x00056, 0x00073, 0x0007d,
+	 0x00090, 0x000a8, 0x000ad, 0x000ad, 0x00117, 0x00117, 0x00120, 0x00122,
+	 0x00130, 0x0013f, 0x00142, 0x0015f, 0x00162, 0x00164, 0x00166, 0x00171,
+	 0x00173, 0x00174, 0x00176, 0x0017b, 0x0017e, 0x00180, 0x00183, 0x00192,
+	 0x00195, 0x00196, 0x00199, 0x0019a, 0x0019d, 0x001a2, 0x001aa, 0x001ae,
+	 0x001b9, 0x001b9, 0x001bb, 0x001bb, 0x001be, 0x001be, 0x001c1, 0x001c2,
+	 0x001c5, 0x001c5, 0x001c7, 0x001c7, 0x001c9, 0x001c9, 0x001cb, 0x001ce,
+	 0x001d1, 0x001df, 0x001e1, 0x001e3, 0x001e5, 0x001e5, 0x001e7, 0x001e9,
+	 0x00200, 0x0020d, 0x00215, 0x00253, 0x00260, 0x00260, 0x00264, 0x00270,
+	 0x00272, 0x00274, 0x00281, 0x00281, 0x00283, 0x00283, 0x00289, 0x0028d,
+	 0x00290, 0x002a2, 0x002c0, 0x002c1, 0x00300, 0x00401, 0x00410, 0x00451,
+	 0x00460, 0x004a3, 0x004c0, 0x004d1, 0x00500, 0x00500, 0x00507, 0x0050b,
+	 0x0050f, 0x0050f, 0x00511, 0x00511, 0x00533, 0x00535, 0x00540, 0x0055b,
+	 0x00564, 0x00567, 0x00574, 0x00577, 0x00584, 0x0059b, 0x005fb, 0x005ff,
+	 0x00800, 0x00808, 0x00810, 0x00813, 0x00820, 0x00821, 0x00823, 0x00827,
+	 0x00830, 0x00834, 0x0083f, 0x00841, 0x00843, 0x00847, 0x0084f, 0x00886,
+	 0x008a0, 0x008ab, 0x008c0, 0x008c0, 0x008c4, 0x008c4, 0x008c6, 0x008c6,
+	 0x008d0, 0x008dd, 0x008e0, 0x008e6, 0x008f0, 0x008f3, 0x00900, 0x00903,
+	 0x00908, 0x00911, 0x00928, 0x0093e, 0x00942, 0x0094d, 0x00980, 0x00984,
+	 0x0098d, 0x0098f, 0x009b0, 0x009b4, 0x009c2, 0x009c9, 0x009ce, 0x009d7,
+	 0x009e0, 0x009e7, 0x00a00, 0x00a00, 0x00a02, 0x00a03, 0x00a10, 0x00a4f,
+	 0x00a61, 0x00a9f, 0x00ad0, 0x00adb, 0x00b00, 0x00b31, 0x00b35, 0x00b3c,
+	 0x00b40, 0x00b40, 0x00b70, 0x00b73, 0x00b78, 0x00b79, 0x00b7c, 0x00b7d,
+	 0x00b80, 0x00b81, 0x00b84, 0x00b85, 0x00b88, 0x00b89, 0x00b8c, 0x00b8d,
+	 0x00b90, 0x00b93, 0x00b98, 0x00b99, 0x00b9c, 0x00b9d, 0x00ba0, 0x00ba1,
+	 0x00ba4, 0x00ba5, 0x00ba8, 0x00ba9, 0x00bac, 0x00bad, 0x00bb0, 0x00bb1,
+	 0x00bb4, 0x00bb5, 0x00bb8, 0x00bb9, 0x00bbc, 0x00bbd, 0x00bc0, 0x00bc1,
+	 0x00c00, 0x00c00, 0x00c02, 0x00c04, 0x00c06, 0x00c06, 0x00c10, 0x00cd9,
+	 0x00ce0, 0x00d0c, 0x00df0, 0x00df4, 0x00e01, 0x00e02, 0x00e07, 0x00e0e,
+	 0x00e10, 0x00e13, 0x00e17, 0x00e19, 0x00e1c, 0x00e2b, 0x00e30, 0x00e32,
+	 0x00e3a, 0x00e3d, 0x00e50, 0x00e5b, 0x02840, 0x0287f, 0x0ec00, 0x0ec01,
+	 0x0ec05, 0x0ec05, 0x0ec07, 0x0ec07, 0x0ec0a, 0x0ec0a, 0x0ec12, 0x0ec12,
+	 0x0ec26, 0x0ec28, 0x0ec2b, 0x0ec2d, 0x0ec2f, 0x0ec2f, 0x0ec40, 0x0ec41,
+	 0x0ec45, 0x0ec45, 0x0ec47, 0x0ec47, 0x0ec4a, 0x0ec4a, 0x0ec52, 0x0ec52,
+	 0x0ec66, 0x0ec68, 0x0ec6b, 0x0ec6d, 0x0ec6f, 0x0ec6f, 0x0ec80, 0x0ec81,
+	 0x0ec85, 0x0ec85, 0x0ec87, 0x0ec87, 0x0ec8a, 0x0ec8a, 0x0ec92, 0x0ec92,
+	 0x0eca6, 0x0eca8, 0x0ecab, 0x0ecad, 0x0ecaf, 0x0ecaf, 0x0ecc0, 0x0ecc1,
+	 0x0ecc5, 0x0ecc5, 0x0ecc7, 0x0ecc7, 0x0ecca, 0x0ecca, 0x0ecd2, 0x0ecd2,
+	 0x0ece6, 0x0ece8, 0x0eceb, 0x0eced, 0x0ecef, 0x0ecef, 0x0ed00, 0x0ed01,
+	 0x0ed05, 0x0ed05, 0x0ed07, 0x0ed07, 0x0ed0a, 0x0ed0a, 0x0ed12, 0x0ed12,
+	 0x0ed26, 0x0ed28, 0x0ed2b, 0x0ed2d, 0x0ed2f, 0x0ed2f, 0x0ed40, 0x0ed41,
+	 0x0ed45, 0x0ed45, 0x0ed47, 0x0ed47, 0x0ed4a, 0x0ed4a, 0x0ed52, 0x0ed52,
+	 0x0ed66, 0x0ed68, 0x0ed6b, 0x0ed6d, 0x0ed6f, 0x0ed6f, 0x0ed80, 0x0ed81,
+	 0x0ed85, 0x0ed85, 0x0ed87, 0x0ed87, 0x0ed8a, 0x0ed8a, 0x0ed92, 0x0ed92,
+	 0x0eda6, 0x0eda8, 0x0edab, 0x0edad, 0x0edaf, 0x0edaf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_gpu_registers), 8));
+
+static const u32 gen7_9_0_gxclkctl_registers[] = {
+	 0x18800, 0x18800, 0x18808, 0x1880b, 0x18820, 0x18822, 0x18830, 0x18830,
+	 0x18834, 0x1883b,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_gxclkctl_registers), 8));
+
+/*
+ * Block   : ['GMUAO', 'GMUCX', 'GMUCX_RAM']
+ * Pipeline: PIPE_NONE
+ * pairs   : 134 (Regs:429)
+ */
+static const u32 gen7_9_0_gmu_registers[] = {
+	 0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403,
+	 0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03,
+	 0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403,
+	 0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03,
+	 0x1f400, 0x1f40b, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507,
+	 0x1f509, 0x1f50b, 0x1f700, 0x1f701, 0x1f704, 0x1f706, 0x1f708, 0x1f709,
+	 0x1f70c, 0x1f70d, 0x1f710, 0x1f711, 0x1f713, 0x1f716, 0x1f718, 0x1f71d,
+	 0x1f720, 0x1f724, 0x1f729, 0x1f729, 0x1f730, 0x1f747, 0x1f750, 0x1f756,
+	 0x1f758, 0x1f759, 0x1f75c, 0x1f75c, 0x1f760, 0x1f761, 0x1f764, 0x1f76b,
+	 0x1f770, 0x1f775, 0x1f780, 0x1f785, 0x1f790, 0x1f798, 0x1f7a0, 0x1f7a8,
+	 0x1f7b0, 0x1f7b3, 0x1f800, 0x1f804, 0x1f807, 0x1f808, 0x1f80b, 0x1f80c,
+	 0x1f80f, 0x1f80f, 0x1f811, 0x1f811, 0x1f813, 0x1f817, 0x1f819, 0x1f81c,
+	 0x1f824, 0x1f82a, 0x1f82d, 0x1f830, 0x1f840, 0x1f853, 0x1f860, 0x1f860,
+	 0x1f862, 0x1f866, 0x1f868, 0x1f869, 0x1f870, 0x1f879, 0x1f87f, 0x1f881,
+	 0x1f890, 0x1f896, 0x1f8a0, 0x1f8a2, 0x1f8a4, 0x1f8af, 0x1f8b8, 0x1f8b9,
+	 0x1f8c0, 0x1f8c1, 0x1f8c3, 0x1f8c4, 0x1f8d0, 0x1f8d0, 0x1f8ec, 0x1f8ec,
+	 0x1f8f0, 0x1f8f1, 0x1f910, 0x1f917, 0x1f920, 0x1f921, 0x1f924, 0x1f925,
+	 0x1f928, 0x1f929, 0x1f92c, 0x1f92d, 0x1f942, 0x1f944, 0x1f948, 0x1f94a,
+	 0x1f94f, 0x1f951, 0x1f954, 0x1f955, 0x1f95d, 0x1f95d, 0x1f962, 0x1f96b,
+	 0x1f970, 0x1f971, 0x1f973, 0x1f977, 0x1f97c, 0x1f97c, 0x1f980, 0x1f981,
+	 0x1f984, 0x1f986, 0x1f992, 0x1f993, 0x1f996, 0x1f99e, 0x1f9c5, 0x1f9d4,
+	 0x1f9f0, 0x1f9f1, 0x1f9f8, 0x1f9fa, 0x1f9fc, 0x1f9fc, 0x1fa00, 0x1fa03,
+	 0x20000, 0x20013, 0x20018, 0x2001a, 0x20020, 0x20021, 0x20024, 0x20025,
+	 0x2002a, 0x2002c, 0x20030, 0x20031, 0x20034, 0x20036, 0x23801, 0x23801,
+	 0x23803, 0x23803, 0x23805, 0x23805, 0x23807, 0x23807, 0x23809, 0x23809,
+	 0x2380b, 0x2380b, 0x2380d, 0x2380d, 0x2380f, 0x2380f, 0x23811, 0x23811,
+	 0x23813, 0x23813, 0x23815, 0x23815, 0x23817, 0x23817, 0x23819, 0x23819,
+	 0x2381b, 0x2381b, 0x2381d, 0x2381d, 0x2381f, 0x23820, 0x23822, 0x23822,
+	 0x23824, 0x23824, 0x23826, 0x23826, 0x23828, 0x23828, 0x2382a, 0x2382a,
+	 0x2382c, 0x2382c, 0x2382e, 0x2382e, 0x23830, 0x23830, 0x23832, 0x23832,
+	 0x23834, 0x23834, 0x23836, 0x23836, 0x23838, 0x23838, 0x2383a, 0x2383a,
+	 0x2383c, 0x2383c, 0x2383e, 0x2383e, 0x23840, 0x23847, 0x23b00, 0x23b01,
+	 0x23b03, 0x23b03, 0x23b05, 0x23b0e, 0x23b10, 0x23b13, 0x23b15, 0x23b16,
+	 0x23b28, 0x23b28, 0x23b30, 0x23b30,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_gmu_registers), 8));
+
+/*
+ * Block   : ['GMUGX']
+ * Pipeline: PIPE_NONE
+ * pairs   : 44 (Regs:454)
+ */
+static const u32 gen7_9_0_gmugx_registers[] = {
+	 0x1a400, 0x1a41f, 0x1a440, 0x1a45f, 0x1a480, 0x1a49f, 0x1a4c0, 0x1a4df,
+	 0x1a500, 0x1a51f, 0x1a540, 0x1a55f, 0x1a580, 0x1a59f, 0x1a600, 0x1a61f,
+	 0x1a640, 0x1a65f, 0x1a780, 0x1a781, 0x1a783, 0x1a785, 0x1a787, 0x1a789,
+	 0x1a78b, 0x1a78d, 0x1a78f, 0x1a791, 0x1a793, 0x1a795, 0x1a797, 0x1a799,
+	 0x1a79b, 0x1a79d, 0x1a79f, 0x1a7a1, 0x1a7a3, 0x1a7a3, 0x1a7a8, 0x1a7b9,
+	 0x1a7c0, 0x1a7c1, 0x1a7c4, 0x1a7c5, 0x1a7c8, 0x1a7c9, 0x1a7cc, 0x1a7cd,
+	 0x1a7d0, 0x1a7d1, 0x1a7d4, 0x1a7d5, 0x1a7d8, 0x1a7d9, 0x1a7dc, 0x1a7dd,
+	 0x1a7e0, 0x1a7e1, 0x1a7fc, 0x1a7fd, 0x1a800, 0x1a808, 0x1a816, 0x1a816,
+	 0x1a81e, 0x1a81e, 0x1a826, 0x1a826, 0x1a82e, 0x1a82e, 0x1a836, 0x1a836,
+	 0x1a83e, 0x1a83e, 0x1a846, 0x1a846, 0x1a84e, 0x1a84e, 0x1a856, 0x1a856,
+	 0x1a883, 0x1a884, 0x1a890, 0x1a8b3, 0x1a900, 0x1a92b, 0x1a940, 0x1a940,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_gmugx_registers), 8));
+
+/*
+ * Block   : ['CX_MISC']
+ * Pipeline: PIPE_NONE
+ * pairs   : 7 (Regs:56)
+ */
+static const u32 gen7_9_0_cx_misc_registers[] = {
+	 0x27800, 0x27800, 0x27810, 0x27814, 0x27820, 0x27824, 0x27828, 0x2782a,
+	 0x27832, 0x27857, 0x27880, 0x27881, 0x27c00, 0x27c01,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_cx_misc_registers), 8));
+
+/*
+ * Block   : ['DBGC']
+ * Pipeline: PIPE_NONE
+ * pairs   : 19 (Regs:155)
+ */
+static const u32 gen7_9_0_dbgc_registers[] = {
+	 0x00600, 0x0061c, 0x0061e, 0x00634, 0x00640, 0x00643, 0x0064e, 0x00652,
+	 0x00654, 0x0065e, 0x00699, 0x00699, 0x0069b, 0x0069e, 0x006c2, 0x006e4,
+	 0x006e6, 0x006e6, 0x006e9, 0x006e9, 0x006eb, 0x006eb, 0x006f1, 0x006f4,
+	 0x00700, 0x00707, 0x00718, 0x00718, 0x00720, 0x00729, 0x00740, 0x0074a,
+	 0x00758, 0x00758, 0x00760, 0x00762,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_dbgc_registers), 8));
+
+/*
+ * Block   : ['CX_DBGC']
+ * Pipeline: PIPE_NONE
+ * pairs   : 7 (Regs:75)
+ */
+static const u32 gen7_9_0_cx_dbgc_registers[] = {
+	 0x18400, 0x1841c, 0x1841e, 0x18434, 0x18440, 0x18443, 0x1844e, 0x18452,
+	 0x18454, 0x1845e, 0x18520, 0x18520, 0x18580, 0x18581,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_cx_dbgc_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'CP', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF']
+ * Block   : ['GMUAO', 'GMUCX', 'GMUGX', 'GRAS', 'GXCLKCTL', 'PC']
+ * Block   : ['RBBM', 'RDVM', 'UCHE', 'VFD', 'VPC', 'VSC']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 29 (Regs:573)
+ */
+static const u32 gen7_9_0_non_context_pipe_br_registers[] = {
+	 0x00887, 0x0088c, 0x08600, 0x08602, 0x08610, 0x0861b, 0x08620, 0x08620,
+	 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640, 0x09600, 0x09603,
+	 0x0960a, 0x09616, 0x09624, 0x0963a, 0x09640, 0x09640, 0x09e00, 0x09e00,
+	 0x09e02, 0x09e07, 0x09e0a, 0x09e16, 0x09e18, 0x09e1a, 0x09e1c, 0x09e1c,
+	 0x09e20, 0x09e25, 0x09e30, 0x09e31, 0x09e40, 0x09e51, 0x09e64, 0x09e6c,
+	 0x09e70, 0x09e72, 0x09e78, 0x09e79, 0x09e80, 0x09fff, 0x0a600, 0x0a600,
+	 0x0a603, 0x0a603, 0x0a610, 0x0a61f, 0x0a630, 0x0a631, 0x0a638, 0x0a63c,
+	 0x0a640, 0x0a65f,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_pipe_br_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'CP', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF']
+ * Block   : ['GMUAO', 'GMUCX', 'GMUGX', 'GRAS', 'GXCLKCTL', 'PC']
+ * Block   : ['RBBM', 'RDVM', 'UCHE', 'VFD', 'VPC', 'VSC']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_NONE
+ * pairs   : 29 (Regs:573)
+ */
+static const u32 gen7_9_0_non_context_pipe_bv_registers[] = {
+	 0x00887, 0x0088c, 0x08600, 0x08602, 0x08610, 0x0861b, 0x08620, 0x08620,
+	 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640, 0x09600, 0x09603,
+	 0x0960a, 0x09616, 0x09624, 0x0963a, 0x09640, 0x09640, 0x09e00, 0x09e00,
+	 0x09e02, 0x09e07, 0x09e0a, 0x09e16, 0x09e18, 0x09e1a, 0x09e1c, 0x09e1c,
+	 0x09e20, 0x09e25, 0x09e30, 0x09e31, 0x09e40, 0x09e51, 0x09e64, 0x09e6c,
+	 0x09e70, 0x09e72, 0x09e78, 0x09e79, 0x09e80, 0x09fff, 0x0a600, 0x0a600,
+	 0x0a603, 0x0a603, 0x0a610, 0x0a61f, 0x0a630, 0x0a631, 0x0a638, 0x0a63c,
+	 0x0a640, 0x0a65f,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_pipe_bv_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'CP', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF']
+ * Block   : ['GMUAO', 'GMUCX', 'GMUGX', 'GRAS', 'GXCLKCTL', 'PC']
+ * Block   : ['RBBM', 'RDVM', 'UCHE', 'VFD', 'VPC', 'VSC']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_NONE
+ * pairs   : 2 (Regs:7)
+ */
+static const u32 gen7_9_0_non_context_pipe_lpac_registers[] = {
+	 0x00887, 0x0088c, 0x00f80, 0x00f80,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_pipe_lpac_registers), 8));
+
+/*
+ * Block   : ['RB']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 5 (Regs:37)
+ */
+static const u32 gen7_9_0_non_context_rb_pipe_br_rac_registers[] = {
+	 0x08e10, 0x08e1c, 0x08e20, 0x08e25, 0x08e51, 0x08e5a, 0x08e6a, 0x08e6d,
+	 0x08ea0, 0x08ea3,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_rb_pipe_br_rac_registers), 8));
+
+/*
+ * Block   : ['RB']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 15 (Regs:66)
+ */
+static const u32 gen7_9_0_non_context_rb_pipe_br_rbp_registers[] = {
+	 0x08e01, 0x08e01, 0x08e04, 0x08e04, 0x08e06, 0x08e09, 0x08e0c, 0x08e0c,
+	 0x08e28, 0x08e28, 0x08e2c, 0x08e35, 0x08e3b, 0x08e40, 0x08e50, 0x08e50,
+	 0x08e5b, 0x08e5d, 0x08e5f, 0x08e5f, 0x08e61, 0x08e61, 0x08e63, 0x08e66,
+	 0x08e68, 0x08e69, 0x08e70, 0x08e7d, 0x08e80, 0x08e8f,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_rb_pipe_br_rbp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * Location: HLSQ_STATE
+ * pairs   : 4 (Regs:28)
+ */
+static const u32 gen7_9_0_non_context_sp_pipe_br_hlsq_state_registers[] = {
+	 0x0ae52, 0x0ae52, 0x0ae60, 0x0ae67, 0x0ae69, 0x0ae75, 0x0aec0, 0x0aec5,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_sp_pipe_br_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * Location: SP_TOP
+ * pairs   : 10 (Regs:61)
+ */
+static const u32 gen7_9_0_non_context_sp_pipe_br_sp_top_registers[] = {
+	 0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae0a, 0x0ae0c, 0x0ae0c,
+	 0x0ae0f, 0x0ae0f, 0x0ae28, 0x0ae2b, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3f,
+	 0x0ae50, 0x0ae52, 0x0ae80, 0x0aea3,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_sp_pipe_br_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * Location: USPTP
+ * pairs   : 12 (Regs:62)
+ */
+static const u32 gen7_9_0_non_context_sp_pipe_br_usptp_registers[] = {
+	 0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae0a, 0x0ae0c, 0x0ae0c,
+	 0x0ae0f, 0x0ae0f, 0x0ae28, 0x0ae2b, 0x0ae30, 0x0ae32, 0x0ae35, 0x0ae35,
+	 0x0ae3a, 0x0ae3b, 0x0ae3e, 0x0ae3f, 0x0ae50, 0x0ae52, 0x0ae80, 0x0aea3,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_sp_pipe_br_usptp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * Location: HLSQ_DP_STR
+ * pairs   : 2 (Regs:5)
+ */
+static const u32 gen7_9_0_non_context_sp_pipe_br_hlsq_dp_str_registers[] = {
+	 0x0ae6b, 0x0ae6c, 0x0ae73, 0x0ae75,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_sp_pipe_br_hlsq_dp_str_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_NONE
+ * Location: HLSQ_STATE
+ * pairs   : 1 (Regs:5)
+ */
+static const u32 gen7_9_0_non_context_sp_pipe_lpac_hlsq_state_registers[] = {
+	 0x0af88, 0x0af8c,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_sp_pipe_lpac_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_NONE
+ * Location: SP_TOP
+ * pairs   : 1 (Regs:6)
+ */
+static const u32 gen7_9_0_non_context_sp_pipe_lpac_sp_top_registers[] = {
+	 0x0af80, 0x0af85,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_sp_pipe_lpac_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_NONE
+ * Location: USPTP
+ * pairs   : 2 (Regs:9)
+ */
+static const u32 gen7_9_0_non_context_sp_pipe_lpac_usptp_registers[] = {
+	 0x0af80, 0x0af85, 0x0af90, 0x0af92,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_sp_pipe_lpac_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_NONE
+ * Cluster : CLUSTER_NONE
+ * Location: USPTP
+ * pairs   : 5 (Regs:29)
+ */
+static const u32 gen7_9_0_non_context_tpl1_pipe_none_usptp_registers[] = {
+	 0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b608, 0x0b60c, 0x0b610, 0x0b621,
+	 0x0b630, 0x0b633,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_tpl1_pipe_none_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * Location: USPTP
+ * pairs   : 1 (Regs:1)
+ */
+static const u32 gen7_9_0_non_context_tpl1_pipe_br_usptp_registers[] = {
+	 0x0b600, 0x0b600,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_tpl1_pipe_br_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_NONE
+ * Location: USPTP
+ * pairs   : 1 (Regs:1)
+ */
+static const u32 gen7_9_0_non_context_tpl1_pipe_lpac_usptp_registers[] = {
+	 0x0b780, 0x0b780,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_non_context_tpl1_pipe_lpac_usptp_registers), 8));
+
+/*
+ * Block   : ['GRAS']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_GRAS
+ * pairs   : 14 (Regs:293)
+ */
+static const u32 gen7_9_0_gras_pipe_br_cluster_gras_registers[] = {
+	 0x08000, 0x0800c, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d,
+	 0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa,
+	 0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08116, 0x08120, 0x0813f,
+	 0x08400, 0x08406, 0x0840a, 0x0840b,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_gras_pipe_br_cluster_gras_registers), 8));
+
+/*
+ * Block   : ['GRAS']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_GRAS
+ * pairs   : 14 (Regs:293)
+ */
+static const u32 gen7_9_0_gras_pipe_bv_cluster_gras_registers[] = {
+	 0x08000, 0x0800c, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d,
+	 0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa,
+	 0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08116, 0x08120, 0x0813f,
+	 0x08400, 0x08406, 0x0840a, 0x0840b,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_gras_pipe_bv_cluster_gras_registers), 8));
+
+/*
+ * Block   : ['PC']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_FE
+ * pairs   : 6 (Regs:31)
+ */
+static const u32 gen7_9_0_pc_pipe_br_cluster_fe_registers[] = {
+	 0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886,
+	 0x09970, 0x09972, 0x09b00, 0x09b0c,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_pc_pipe_br_cluster_fe_registers), 8));
+
+/*
+ * Block   : ['PC']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_FE
+ * pairs   : 6 (Regs:31)
+ */
+static const u32 gen7_9_0_pc_pipe_bv_cluster_fe_registers[] = {
+	 0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886,
+	 0x09970, 0x09972, 0x09b00, 0x09b0c,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_pc_pipe_bv_cluster_fe_registers), 8));
+
+/*
+ * Block   : ['VFD']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_FE
+ * pairs   : 2 (Regs:236)
+ */
+static const u32 gen7_9_0_vfd_pipe_br_cluster_fe_registers[] = {
+	 0x0a000, 0x0a009, 0x0a00e, 0x0a0ef,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_vfd_pipe_br_cluster_fe_registers), 8));
+
+/*
+ * Block   : ['VFD']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_FE
+ * pairs   : 2 (Regs:236)
+ */
+static const u32 gen7_9_0_vfd_pipe_bv_cluster_fe_registers[] = {
+	 0x0a000, 0x0a009, 0x0a00e, 0x0a0ef,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_vfd_pipe_bv_cluster_fe_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_FE
+ * pairs   : 2 (Regs:18)
+ */
+static const u32 gen7_9_0_vpc_pipe_br_cluster_fe_registers[] = {
+	 0x09300, 0x0930a, 0x09311, 0x09317,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_vpc_pipe_br_cluster_fe_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_PC_VS
+ * pairs   : 3 (Regs:30)
+ */
+static const u32 gen7_9_0_vpc_pipe_br_cluster_pc_vs_registers[] = {
+	 0x09101, 0x0910c, 0x09300, 0x0930a, 0x09311, 0x09317,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_vpc_pipe_br_cluster_pc_vs_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_VPC_PS
+ * pairs   : 5 (Regs:76)
+ */
+static const u32 gen7_9_0_vpc_pipe_br_cluster_vpc_ps_registers[] = {
+	 0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x0923c, 0x09300, 0x0930a,
+	 0x09311, 0x09317,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_vpc_pipe_br_cluster_vpc_ps_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_FE
+ * pairs   : 2 (Regs:18)
+ */
+static const u32 gen7_9_0_vpc_pipe_bv_cluster_fe_registers[] = {
+	 0x09300, 0x0930a, 0x09311, 0x09317,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_vpc_pipe_bv_cluster_fe_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_PC_VS
+ * pairs   : 3 (Regs:30)
+ */
+static const u32 gen7_9_0_vpc_pipe_bv_cluster_pc_vs_registers[] = {
+	 0x09101, 0x0910c, 0x09300, 0x0930a, 0x09311, 0x09317,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_vpc_pipe_bv_cluster_pc_vs_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_VPC_PS
+ * pairs   : 5 (Regs:76)
+ */
+static const u32 gen7_9_0_vpc_pipe_bv_cluster_vpc_ps_registers[] = {
+	 0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x0923c, 0x09300, 0x0930a,
+	 0x09311, 0x09317,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_vpc_pipe_bv_cluster_vpc_ps_registers), 8));
+
+/*
+ * Block   : ['RB']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_PS
+ * pairs   : 39 (Regs:133)
+ */
+static const u32 gen7_9_0_rb_pipe_br_cluster_ps_rac_registers[] = {
+	 0x08802, 0x08802, 0x08804, 0x08806, 0x08809, 0x0880a, 0x0880e, 0x08811,
+	 0x08818, 0x0881e, 0x08821, 0x08821, 0x08823, 0x08826, 0x08829, 0x08829,
+	 0x0882b, 0x0882e, 0x08831, 0x08831, 0x08833, 0x08836, 0x08839, 0x08839,
+	 0x0883b, 0x0883e, 0x08841, 0x08841, 0x08843, 0x08846, 0x08849, 0x08849,
+	 0x0884b, 0x0884e, 0x08851, 0x08851, 0x08853, 0x08856, 0x08859, 0x08859,
+	 0x0885b, 0x0885e, 0x08860, 0x08864, 0x08870, 0x08870, 0x08873, 0x08876,
+	 0x08878, 0x08879, 0x08882, 0x08885, 0x08887, 0x08889, 0x08891, 0x08891,
+	 0x08898, 0x08899, 0x088c0, 0x088c1, 0x088e5, 0x088e5, 0x088f4, 0x088f5,
+	 0x08a00, 0x08a05, 0x08a10, 0x08a15, 0x08a20, 0x08a25, 0x08a30, 0x08a35,
+	 0x08c00, 0x08c01, 0x08c18, 0x08c1f, 0x08c26, 0x08c34,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_rb_pipe_br_cluster_ps_rac_registers), 8));
+
+/*
+ * Block   : ['RB']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_PS
+ * pairs   : 34 (Regs:100)
+ */
+static const u32 gen7_9_0_rb_pipe_br_cluster_ps_rbp_registers[] = {
+	 0x08800, 0x08801, 0x08803, 0x08803, 0x0880b, 0x0880d, 0x08812, 0x08812,
+	 0x08820, 0x08820, 0x08822, 0x08822, 0x08827, 0x08828, 0x0882a, 0x0882a,
+	 0x0882f, 0x08830, 0x08832, 0x08832, 0x08837, 0x08838, 0x0883a, 0x0883a,
+	 0x0883f, 0x08840, 0x08842, 0x08842, 0x08847, 0x08848, 0x0884a, 0x0884a,
+	 0x0884f, 0x08850, 0x08852, 0x08852, 0x08857, 0x08858, 0x0885a, 0x0885a,
+	 0x0885f, 0x0885f, 0x08865, 0x08865, 0x08871, 0x08872, 0x08877, 0x08877,
+	 0x08880, 0x08881, 0x08886, 0x08886, 0x08890, 0x08890, 0x088d0, 0x088e4,
+	 0x088e8, 0x088ea, 0x088f0, 0x088f0, 0x08900, 0x0891a, 0x08927, 0x08928,
+	 0x08c17, 0x08c17, 0x08c20, 0x08c25,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_rb_pipe_br_cluster_ps_rbp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: HLSQ_STATE
+ * pairs   : 29 (Regs:215)
+ */
+static const u32 gen7_9_0_sp_pipe_br_cluster_sp_vs_hlsq_state_registers[] = {
+	 0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	 0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a,
+	 0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	 0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872,
+	 0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898,
+	 0x0a89a, 0x0a89d, 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0a974, 0x0a977,
+	 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20,
+	 0x0ab40, 0x0abbf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_br_cluster_sp_vs_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: SP_TOP
+ * pairs   : 22 (Regs:73)
+ */
+static const u32 gen7_9_0_sp_pipe_br_cluster_sp_vs_sp_top_registers[] = {
+	 0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a82d, 0x0a82d,
+	 0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840,
+	 0x0a85c, 0x0a85d, 0x0a862, 0x0a864, 0x0a868, 0x0a868, 0x0a870, 0x0a871,
+	 0x0a88d, 0x0a88e, 0x0a893, 0x0a895, 0x0a899, 0x0a899, 0x0a8a0, 0x0a8af,
+	 0x0a974, 0x0a977, 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05,
+	 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_br_cluster_sp_vs_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 16 (Regs:269)
+ */
+static const u32 gen7_9_0_sp_pipe_br_cluster_sp_vs_usptp_registers[] = {
+	 0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d,
+	 0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861,
+	 0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899,
+	 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_br_cluster_sp_vs_usptp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_STATE
+ * pairs   : 21 (Regs:334)
+ */
+static const u32 gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_state_registers[] = {
+	 0x0a980, 0x0a984, 0x0a99e, 0x0a99e, 0x0a9a7, 0x0a9a7, 0x0a9aa, 0x0a9aa,
+	 0x0a9ae, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc,
+	 0x0a9c4, 0x0a9c4, 0x0a9c6, 0x0a9c6, 0x0a9cd, 0x0a9cd, 0x0a9e0, 0x0a9fc,
+	 0x0aa00, 0x0aa00, 0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0aaf2, 0x0aaf3,
+	 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20,
+	 0x0ab40, 0x0abbf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_DP
+ * pairs   : 3 (Regs:19)
+ */
+static const u32 gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers[] = {
+	 0x0a9b1, 0x0a9b1, 0x0a9c6, 0x0a9cb, 0x0a9d4, 0x0a9df,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: SP_TOP
+ * pairs   : 18 (Regs:77)
+ */
+static const u32 gen7_9_0_sp_pipe_br_cluster_sp_ps_sp_top_registers[] = {
+	 0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a9a2, 0x0a9a7, 0x0a9a8,
+	 0x0a9aa, 0x0a9aa, 0x0a9ae, 0x0a9ae, 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5,
+	 0x0a9ba, 0x0a9bc, 0x0a9c5, 0x0a9c5, 0x0a9e0, 0x0a9f9, 0x0aa00, 0x0aa03,
+	 0x0aaf2, 0x0aaf3, 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05,
+	 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_br_cluster_sp_ps_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 17 (Regs:333)
+ */
+static const u32 gen7_9_0_sp_pipe_br_cluster_sp_ps_usptp_registers[] = {
+	 0x0a980, 0x0a982, 0x0a985, 0x0a9a6, 0x0a9a8, 0x0a9a9, 0x0a9ab, 0x0a9ae,
+	 0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9bf, 0x0a9c2, 0x0a9c3,
+	 0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa01, 0x0aa03,
+	 0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22,
+	 0x0ab40, 0x0abbf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_br_cluster_sp_ps_usptp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_DP_STR
+ * pairs   : 1 (Regs:6)
+ */
+static const u32 gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_str_registers[] = {
+	 0x0a9c6, 0x0a9cb,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_str_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: HLSQ_STATE
+ * pairs   : 28 (Regs:213)
+ */
+static const u32 gen7_9_0_sp_pipe_bv_cluster_sp_vs_hlsq_state_registers[] = {
+	 0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	 0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a,
+	 0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	 0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872,
+	 0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898,
+	 0x0a89a, 0x0a89d, 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0a974, 0x0a977,
+	 0x0ab00, 0x0ab02, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_bv_cluster_sp_vs_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: SP_TOP
+ * pairs   : 21 (Regs:71)
+ */
+static const u32 gen7_9_0_sp_pipe_bv_cluster_sp_vs_sp_top_registers[] = {
+	 0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a82d, 0x0a82d,
+	 0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840,
+	 0x0a85c, 0x0a85d, 0x0a862, 0x0a864, 0x0a868, 0x0a868, 0x0a870, 0x0a871,
+	 0x0a88d, 0x0a88e, 0x0a893, 0x0a895, 0x0a899, 0x0a899, 0x0a8a0, 0x0a8af,
+	 0x0a974, 0x0a977, 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab0a, 0x0ab1b,
+	 0x0ab20, 0x0ab20,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_bv_cluster_sp_vs_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 16 (Regs:266)
+ */
+static const u32 gen7_9_0_sp_pipe_bv_cluster_sp_vs_usptp_registers[] = {
+	 0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d,
+	 0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861,
+	 0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899,
+	 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab02, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_bv_cluster_sp_vs_usptp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_STATE
+ * pairs   : 14 (Regs:299)
+ */
+static const u32 gen7_9_0_sp_pipe_lpac_cluster_sp_ps_hlsq_state_registers[] = {
+	 0x0a9b0, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc,
+	 0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9fc,
+	 0x0aa00, 0x0aa00, 0x0aa31, 0x0aa35, 0x0aa40, 0x0aabf, 0x0aaf3, 0x0aaf3,
+	 0x0ab00, 0x0ab01, 0x0ab40, 0x0abbf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_lpac_cluster_sp_ps_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_DP
+ * pairs   : 2 (Regs:13)
+ */
+static const u32 gen7_9_0_sp_pipe_lpac_cluster_sp_ps_hlsq_dp_registers[] = {
+	 0x0a9b1, 0x0a9b1, 0x0a9d4, 0x0a9df,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_lpac_cluster_sp_ps_hlsq_dp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: SP_TOP
+ * pairs   : 9 (Regs:34)
+ */
+static const u32 gen7_9_0_sp_pipe_lpac_cluster_sp_ps_sp_top_registers[] = {
+	 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9bc, 0x0a9c5, 0x0a9c5,
+	 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9f9, 0x0aa00, 0x0aa00, 0x0aaf3, 0x0aaf3,
+	 0x0ab00, 0x0ab00,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_lpac_cluster_sp_ps_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 11 (Regs:279)
+ */
+static const u32 gen7_9_0_sp_pipe_lpac_cluster_sp_ps_usptp_registers[] = {
+	 0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9be, 0x0a9c2, 0x0a9c3,
+	 0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa31, 0x0aa31,
+	 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab01, 0x0ab40, 0x0abbf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_sp_pipe_lpac_cluster_sp_ps_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 3 (Regs:10)
+ */
+static const u32 gen7_9_0_tpl1_pipe_br_cluster_sp_vs_usptp_registers[] = {
+	 0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_tpl1_pipe_br_cluster_sp_vs_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 6 (Regs:42)
+ */
+static const u32 gen7_9_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers[] = {
+	 0x0b180, 0x0b183, 0x0b190, 0x0b195, 0x0b2c0, 0x0b2d5, 0x0b300, 0x0b307,
+	 0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 3 (Regs:10)
+ */
+static const u32 gen7_9_0_tpl1_pipe_bv_cluster_sp_vs_usptp_registers[] = {
+	 0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_tpl1_pipe_bv_cluster_sp_vs_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 5 (Regs:7)
+ */
+static const u32 gen7_9_0_tpl1_pipe_lpac_cluster_sp_ps_usptp_registers[] = {
+	 0x0b180, 0x0b181, 0x0b300, 0x0b301, 0x0b307, 0x0b307, 0x0b309, 0x0b309,
+	 0x0b310, 0x0b310,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_tpl1_pipe_lpac_cluster_sp_ps_usptp_registers), 8));
+
+static const struct gen7_sel_reg gen7_9_0_rb_rac_sel = {
+	.host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0,
+};
+
+static const struct gen7_sel_reg gen7_9_0_rb_rbp_sel = {
+	.host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0x9,
+};
+
+static struct gen7_cluster_registers gen7_9_0_clusters[] = {
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_9_0_non_context_pipe_br_registers,  },
+	{ CLUSTER_NONE, PIPE_BV, STATE_NON_CONTEXT,
+		gen7_9_0_non_context_pipe_bv_registers,  },
+	{ CLUSTER_NONE, PIPE_LPAC, STATE_NON_CONTEXT,
+		gen7_9_0_non_context_pipe_lpac_registers,  },
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_9_0_non_context_rb_pipe_br_rac_registers, &gen7_9_0_rb_rac_sel, },
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_9_0_non_context_rb_pipe_br_rbp_registers, &gen7_9_0_rb_rbp_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_9_0_rb_pipe_br_cluster_ps_rac_registers, &gen7_9_0_rb_rac_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_9_0_rb_pipe_br_cluster_ps_rac_registers, &gen7_9_0_rb_rac_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_9_0_rb_pipe_br_cluster_ps_rbp_registers, &gen7_9_0_rb_rbp_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_9_0_rb_pipe_br_cluster_ps_rbp_registers, &gen7_9_0_rb_rbp_sel, },
+	{ CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_9_0_gras_pipe_br_cluster_gras_registers,  },
+	{ CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_9_0_gras_pipe_br_cluster_gras_registers,  },
+	{ CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_9_0_gras_pipe_bv_cluster_gras_registers,  },
+	{ CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_9_0_gras_pipe_bv_cluster_gras_registers,  },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_9_0_pc_pipe_br_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_9_0_pc_pipe_br_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_9_0_pc_pipe_bv_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_9_0_pc_pipe_bv_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_9_0_vfd_pipe_br_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_9_0_vfd_pipe_br_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_9_0_vfd_pipe_bv_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_9_0_vfd_pipe_bv_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_9_0_vpc_pipe_br_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_9_0_vpc_pipe_br_cluster_fe_registers,  },
+	{ CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_9_0_vpc_pipe_br_cluster_pc_vs_registers,  },
+	{ CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_9_0_vpc_pipe_br_cluster_pc_vs_registers,  },
+	{ CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_9_0_vpc_pipe_br_cluster_vpc_ps_registers,  },
+	{ CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_9_0_vpc_pipe_br_cluster_vpc_ps_registers,  },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_9_0_vpc_pipe_bv_cluster_fe_registers,  },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_9_0_vpc_pipe_bv_cluster_fe_registers,  },
+	{ CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_9_0_vpc_pipe_bv_cluster_pc_vs_registers,  },
+	{ CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_9_0_vpc_pipe_bv_cluster_pc_vs_registers,  },
+	{ CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_9_0_vpc_pipe_bv_cluster_vpc_ps_registers,  },
+	{ CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_9_0_vpc_pipe_bv_cluster_vpc_ps_registers,  },
+};
+
+static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] = {
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_9_0_non_context_sp_pipe_br_hlsq_state_registers, 0xae00},
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, SP_TOP,
+		gen7_9_0_non_context_sp_pipe_br_sp_top_registers, 0xae00},
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen7_9_0_non_context_sp_pipe_br_usptp_registers, 0xae00},
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, HLSQ_DP_STR,
+		gen7_9_0_non_context_sp_pipe_br_hlsq_dp_str_registers, 0xae00},
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen7_9_0_non_context_sp_pipe_lpac_hlsq_state_registers, 0xaf80},
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, SP_TOP,
+		gen7_9_0_non_context_sp_pipe_lpac_sp_top_registers, 0xaf80},
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, USPTP,
+		gen7_9_0_non_context_sp_pipe_lpac_usptp_registers, 0xaf80},
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_NONE, 0, USPTP,
+		gen7_9_0_non_context_tpl1_pipe_none_usptp_registers, 0xb600},
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen7_9_0_non_context_tpl1_pipe_br_usptp_registers, 0xb600},
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_LPAC, 0, USPTP,
+		gen7_9_0_non_context_tpl1_pipe_lpac_usptp_registers, 0xb780},
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_9_0_sp_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP,
+		gen7_9_0_sp_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen7_9_0_sp_pipe_br_cluster_sp_vs_usptp_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE,
+		gen7_9_0_sp_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP,
+		gen7_9_0_sp_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen7_9_0_sp_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen7_9_0_sp_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP,
+		gen7_9_0_sp_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen7_9_0_sp_pipe_br_cluster_sp_vs_usptp_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE,
+		gen7_9_0_sp_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP,
+		gen7_9_0_sp_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen7_9_0_sp_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP_STR,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_str_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen7_9_0_sp_pipe_lpac_cluster_sp_ps_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP,
+		gen7_9_0_sp_pipe_lpac_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP,
+		gen7_9_0_sp_pipe_lpac_cluster_sp_ps_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen7_9_0_sp_pipe_lpac_cluster_sp_ps_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP_STR,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_str_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP_STR,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_str_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP_STR,
+		gen7_9_0_sp_pipe_br_cluster_sp_ps_hlsq_dp_str_registers, 0xa800},
+	{ CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen7_9_0_tpl1_pipe_br_cluster_sp_vs_usptp_registers, 0xb000},
+	{ CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen7_9_0_tpl1_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000},
+	{ CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen7_9_0_tpl1_pipe_br_cluster_sp_vs_usptp_registers, 0xb000},
+	{ CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen7_9_0_tpl1_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen7_9_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen7_9_0_tpl1_pipe_lpac_cluster_sp_ps_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen7_9_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP,
+		gen7_9_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP,
+		gen7_9_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000},
+};
+
+static struct gen7_cp_indexed_reg gen7_9_0_cp_indexed_reg_list[] = {
+	{ GEN7_CP_SQE_STAT_ADDR, GEN7_CP_SQE_STAT_DATA, 0x00040},
+	{ GEN7_CP_DRAW_STATE_ADDR, GEN7_CP_DRAW_STATE_DATA, 0x00200},
+	{ GEN7_CP_ROQ_DBG_ADDR, GEN7_CP_ROQ_DBG_DATA, 0x00800},
+	{ GEN7_CP_SQE_UCODE_DBG_ADDR, GEN7_CP_SQE_UCODE_DBG_DATA, 0x08000},
+	{ GEN7_CP_BV_DRAW_STATE_ADDR, GEN7_CP_BV_DRAW_STATE_DATA, 0x00200},
+	{ GEN7_CP_BV_ROQ_DBG_ADDR, GEN7_CP_BV_ROQ_DBG_DATA, 0x00800},
+	{ GEN7_CP_BV_SQE_UCODE_DBG_ADDR, GEN7_CP_BV_SQE_UCODE_DBG_DATA, 0x08000},
+	{ GEN7_CP_BV_SQE_STAT_ADDR, GEN7_CP_BV_SQE_STAT_DATA, 0x00040},
+	{ GEN7_CP_RESOURCE_TABLE_DBG_ADDR, GEN7_CP_RESOURCE_TABLE_DBG_DATA, 0x04100},
+	{ GEN7_CP_LPAC_DRAW_STATE_ADDR, GEN7_CP_LPAC_DRAW_STATE_DATA, 0x00200},
+	{ GEN7_CP_LPAC_ROQ_DBG_ADDR, GEN7_CP_LPAC_ROQ_DBG_DATA, 0x00200},
+	{ GEN7_CP_SQE_AC_UCODE_DBG_ADDR, GEN7_CP_SQE_AC_UCODE_DBG_DATA, 0x08000},
+	{ GEN7_CP_SQE_AC_STAT_ADDR, GEN7_CP_SQE_AC_STAT_DATA, 0x00040},
+	{ GEN7_CP_LPAC_FIFO_DBG_ADDR, GEN7_CP_LPAC_FIFO_DBG_DATA, 0x00040},
+	{ GEN7_CP_AQE_ROQ_DBG_ADDR_0, GEN7_CP_AQE_ROQ_DBG_DATA_0, 0x00100},
+	{ GEN7_CP_AQE_ROQ_DBG_ADDR_1, GEN7_CP_AQE_ROQ_DBG_DATA_1, 0x00100},
+	{ GEN7_CP_AQE_UCODE_DBG_ADDR_0, GEN7_CP_AQE_UCODE_DBG_DATA_0, 0x08000},
+	{ GEN7_CP_AQE_UCODE_DBG_ADDR_1, GEN7_CP_AQE_UCODE_DBG_DATA_1, 0x08000},
+	{ GEN7_CP_AQE_STAT_ADDR_0, GEN7_CP_AQE_STAT_DATA_0, 0x00040},
+	{ GEN7_CP_AQE_STAT_ADDR_1, GEN7_CP_AQE_STAT_DATA_1, 0x00040},
+};
+
+static struct gen7_reg_list gen7_9_0_reg_list[] = {
+	{ gen7_9_0_gpu_registers, NULL},
+	{ gen7_9_0_cx_dbgc_registers, NULL},
+	{ gen7_9_0_dbgc_registers, NULL},
+	{ NULL, NULL},
+};
+
+static const u32 gen7_9_0_cpr_registers[] = {
+	0x26800, 0x26805, 0x26808, 0x2680d, 0x26814, 0x26815, 0x2681c, 0x2681c,
+	0x26820, 0x26839, 0x26840, 0x26841, 0x26848, 0x26849, 0x26850, 0x26851,
+	0x26880, 0x268a1, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee,
+	0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f,
+	0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274c4,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_cpr_registers), 8));
+
+static const u32 gen7_9_0_dpm_registers[] = {
+	0x1aa00, 0x1aa06, 0x1aa09, 0x1aa0a, 0x1aa0c, 0x1aa0d, 0x1aa0f, 0x1aa12,
+	0x1aa14, 0x1aa47, 0x1aa50, 0x1aa51,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_dpm_registers), 8));
+
+static const u32 gen7_9_0_dpm_leakage_registers[] = {
+	0x21c00, 0x21c00, 0x21c08, 0x21c09, 0x21c0e, 0x21c0f, 0x21c4f, 0x21c50,
+	0x21c52, 0x21c52, 0x21c54, 0x21c56, 0x21c58, 0x21c5a, 0x21c5c, 0x21c60,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_dpm_leakage_registers), 8));
+
+static const u32 gen7_9_0_gfx_gpu_acd_registers[] = {
+	0x18c00, 0x18c16, 0x18c20, 0x18c2d, 0x18c30, 0x18c31, 0x18c35, 0x18c35,
+	0x18c37, 0x18c37, 0x18c3a, 0x18c3a, 0x18c42, 0x18c42, 0x18c56, 0x18c58,
+	0x18c5b, 0x18c5d, 0x18c5f, 0x18c62,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_gfx_gpu_acd_registers), 8));
+
+static const u32 gen7_9_0_gpucc_registers[] = {
+	0x24000, 0x2400f, 0x24400, 0x2440f, 0x24800, 0x24805, 0x24c00, 0x24cff,
+	0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004,
+	0x26400, 0x26405, 0x26414, 0x2641d, 0x2642a, 0x26430, 0x26432, 0x26434,
+	0x26441, 0x2644b, 0x2644d, 0x26463, 0x26466, 0x26468, 0x26478, 0x2647a,
+	0x26489, 0x2648a, 0x2649c, 0x2649e, 0x264a0, 0x264a6, 0x264c5, 0x264c7,
+	0x264d6, 0x264d8, 0x264e8, 0x264e9, 0x264f9, 0x264fc, 0x2650b, 0x2650b,
+	0x2651c, 0x2651e, 0x26540, 0x2654e, 0x26554, 0x26573, 0x26576, 0x2657a,
+	UINT_MAX, UINT_MAX,
+
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_gpucc_registers), 8));
+
+static const u32 gen7_9_0_isense_registers[] = {
+	0x22c3a, 0x22c3c, 0x22c41, 0x22c41, 0x22c46, 0x22c47, 0x22c4c, 0x22c4c,
+	0x22c51, 0x22c51, 0x22c56, 0x22c56, 0x22c5b, 0x22c5b, 0x22c60, 0x22c60,
+	0x22c65, 0x22c65, 0x22c6a, 0x22c70, 0x22c75, 0x22c75, 0x22c7a, 0x22c7a,
+	0x22c7f, 0x22c7f, 0x22c84, 0x22c85, 0x22c8a, 0x22c8a, 0x22c8f, 0x22c8f,
+	0x23000, 0x23009, 0x2300e, 0x2300e, 0x23013, 0x23013, 0x23018, 0x23018,
+	0x2301d, 0x2301d, 0x23022, 0x23022, 0x23027, 0x23032, 0x23037, 0x23037,
+	0x2303c, 0x2303c, 0x23041, 0x23041, 0x23046, 0x23046, 0x2304b, 0x2304b,
+	0x23050, 0x23050, 0x23055, 0x23055, 0x2305a, 0x2305a, 0x2305f, 0x2305f,
+	0x23064, 0x23064, 0x23069, 0x2306a, 0x2306f, 0x2306f, 0x23074, 0x23075,
+	0x2307a, 0x2307e, 0x23083, 0x23083, 0x23088, 0x23088, 0x2308d, 0x2308d,
+	0x23092, 0x23092, 0x230e2, 0x230e2,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_isense_registers), 8));
+
+static const u32 gen7_9_0_rscc_registers[] = {
+	0x14000, 0x14036, 0x14040, 0x14047, 0x14080, 0x14084, 0x14089, 0x1408c,
+	0x14091, 0x14094, 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac,
+	0x14100, 0x14104, 0x14114, 0x14119, 0x14124, 0x14132, 0x14154, 0x1416b,
+	0x14340, 0x14342, 0x14344, 0x1437c, 0x143f0, 0x143f8, 0x143fa, 0x143fe,
+	0x14400, 0x14404, 0x14406, 0x1440a, 0x1440c, 0x14410, 0x14412, 0x14416,
+	0x14418, 0x1441c, 0x1441e, 0x14422, 0x14424, 0x14424, 0x14498, 0x144a0,
+	0x144a2, 0x144a6, 0x144a8, 0x144ac, 0x144ae, 0x144b2, 0x144b4, 0x144b8,
+	0x144ba, 0x144be, 0x144c0, 0x144c4, 0x144c6, 0x144ca, 0x144cc, 0x144cc,
+	0x14540, 0x14548, 0x1454a, 0x1454e, 0x14550, 0x14554, 0x14556, 0x1455a,
+	0x1455c, 0x14560, 0x14562, 0x14566, 0x14568, 0x1456c, 0x1456e, 0x14572,
+	0x14574, 0x14574, 0x145e8, 0x145f0, 0x145f2, 0x145f6, 0x145f8, 0x145fc,
+	0x145fe, 0x14602, 0x14604, 0x14608, 0x1460a, 0x1460e, 0x14610, 0x14614,
+	0x14616, 0x1461a, 0x1461c, 0x1461c, 0x14690, 0x14698, 0x1469a, 0x1469e,
+	0x146a0, 0x146a4, 0x146a6, 0x146aa, 0x146ac, 0x146b0, 0x146b2, 0x146b6,
+	0x146b8, 0x146bc, 0x146be, 0x146c2, 0x146c4, 0x146c4, 0x14738, 0x14740,
+	0x14742, 0x14746, 0x14748, 0x1474c, 0x1474e, 0x14752, 0x14754, 0x14758,
+	0x1475a, 0x1475e, 0x14760, 0x14764, 0x14766, 0x1476a, 0x1476c, 0x1476c,
+	0x147e0, 0x147e8, 0x147ea, 0x147ee, 0x147f0, 0x147f4, 0x147f6, 0x147fa,
+	0x147fc, 0x14800, 0x14802, 0x14806, 0x14808, 0x1480c, 0x1480e, 0x14812,
+	0x14814, 0x14814, 0x14888, 0x14890, 0x14892, 0x14896, 0x14898, 0x1489c,
+	0x1489e, 0x148a2, 0x148a4, 0x148a8, 0x148aa, 0x148ae, 0x148b0, 0x148b4,
+	0x148b6, 0x148ba, 0x148bc, 0x148bc, 0x14930, 0x14938, 0x1493a, 0x1493e,
+	0x14940, 0x14944, 0x14946, 0x1494a, 0x1494c, 0x14950, 0x14952, 0x14956,
+	0x14958, 0x1495c, 0x1495e, 0x14962, 0x14964, 0x14964,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_9_0_rscc_registers), 8));
+
+static const u32 *gen7_9_0_external_core_regs[] = {
+	gen7_9_0_gpucc_registers,
+	gen7_9_0_gxclkctl_registers,
+	gen7_9_0_cpr_registers,
+	gen7_9_0_dpm_registers,
+	gen7_9_0_dpm_leakage_registers,
+	gen7_9_0_gfx_gpu_acd_registers,
+};
+#endif /*_ADRENO_GEN7_9_0_SNAPSHOT_H */

+ 473 - 0
qcom/opensource/graphics-kernel/adreno_gen7_coresight.c

@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/amba/bus.h>
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_coresight.h"
+
+static struct adreno_coresight_register gen7_coresight_regs[] = {
+	{ GEN7_DBGC_CFG_DBGBUS_SEL_A },
+	{ GEN7_DBGC_CFG_DBGBUS_SEL_B },
+	{ GEN7_DBGC_CFG_DBGBUS_SEL_C },
+	{ GEN7_DBGC_CFG_DBGBUS_SEL_D },
+	{ GEN7_DBGC_CFG_DBGBUS_CNTLT },
+	{ GEN7_DBGC_CFG_DBGBUS_CNTLM },
+	{ GEN7_DBGC_CFG_DBGBUS_OPL },
+	{ GEN7_DBGC_CFG_DBGBUS_OPE },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTL_0 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTL_1 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTL_2 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTL_3 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKL_0 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKL_1 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKL_2 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKL_3 },
+	{ GEN7_DBGC_CFG_DBGBUS_BYTEL_0 },
+	{ GEN7_DBGC_CFG_DBGBUS_BYTEL_1 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTE_0 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTE_1 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTE_2 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTE_3 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKE_0 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKE_1 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKE_2 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKE_3 },
+	{ GEN7_DBGC_CFG_DBGBUS_NIBBLEE },
+	{ GEN7_DBGC_CFG_DBGBUS_PTRC0 },
+	{ GEN7_DBGC_CFG_DBGBUS_PTRC1 },
+	{ GEN7_DBGC_CFG_DBGBUS_LOADREG },
+	{ GEN7_DBGC_CFG_DBGBUS_IDX },
+	{ GEN7_DBGC_CFG_DBGBUS_CLRC },
+	{ GEN7_DBGC_CFG_DBGBUS_LOADIVT },
+	{ GEN7_DBGC_VBIF_DBG_CNTL },
+	{ GEN7_DBGC_DBG_LO_HI_GPIO },
+	{ GEN7_DBGC_EXT_TRACE_BUS_CNTL },
+	{ GEN7_DBGC_READ_AHB_THROUGH_DBG },
+	{ GEN7_DBGC_CFG_DBGBUS_TRACE_BUF1 },
+	{ GEN7_DBGC_CFG_DBGBUS_TRACE_BUF2 },
+	{ GEN7_DBGC_EVT_CFG },
+	{ GEN7_DBGC_EVT_INTF_SEL_0 },
+	{ GEN7_DBGC_EVT_INTF_SEL_1 },
+	{ GEN7_DBGC_PERF_ATB_CFG },
+	{ GEN7_DBGC_PERF_ATB_COUNTER_SEL_0 },
+	{ GEN7_DBGC_PERF_ATB_COUNTER_SEL_1 },
+	{ GEN7_DBGC_PERF_ATB_COUNTER_SEL_2 },
+	{ GEN7_DBGC_PERF_ATB_COUNTER_SEL_3 },
+	{ GEN7_DBGC_PERF_ATB_TRIG_INTF_SEL_0 },
+	{ GEN7_DBGC_PERF_ATB_TRIG_INTF_SEL_1 },
+	{ GEN7_DBGC_PERF_ATB_DRAIN_CMD },
+	{ GEN7_DBGC_ECO_CNTL },
+	{ GEN7_DBGC_AHB_DBG_CNTL },
+};
+
+static struct adreno_coresight_register gen7_coresight_regs_cx[] = {
+	{ GEN7_CX_DBGC_CFG_DBGBUS_SEL_A },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_SEL_B },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_SEL_C },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_SEL_D },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_CNTLT },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_CNTLM },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_OPL },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_OPE },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTL_0 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTL_1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTL_2 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTL_3 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKL_0 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKL_1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKL_2 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKL_3 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_0 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTE_0 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTE_1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTE_2 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTE_3 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKE_0 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKE_1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKE_2 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKE_3 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_NIBBLEE },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_PTRC0 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_PTRC1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_LOADREG },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IDX },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_CLRC },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_LOADIVT },
+	{ GEN7_CX_DBGC_VBIF_DBG_CNTL },
+	{ GEN7_CX_DBGC_DBG_LO_HI_GPIO },
+	{ GEN7_CX_DBGC_EXT_TRACE_BUS_CNTL },
+	{ GEN7_CX_DBGC_READ_AHB_THROUGH_DBG },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 },
+	{ GEN7_CX_DBGC_EVT_CFG },
+	{ GEN7_CX_DBGC_EVT_INTF_SEL_0 },
+	{ GEN7_CX_DBGC_EVT_INTF_SEL_1 },
+	{ GEN7_CX_DBGC_PERF_ATB_CFG },
+	{ GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_0 },
+	{ GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_1 },
+	{ GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_2 },
+	{ GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_3 },
+	{ GEN7_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 },
+	{ GEN7_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 },
+	{ GEN7_CX_DBGC_PERF_ATB_DRAIN_CMD },
+	{ GEN7_CX_DBGC_ECO_CNTL },
+	{ GEN7_CX_DBGC_AHB_DBG_CNTL },
+};
+
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &gen7_coresight_regs[0]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &gen7_coresight_regs[1]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &gen7_coresight_regs[2]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &gen7_coresight_regs[3]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &gen7_coresight_regs[4]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &gen7_coresight_regs[5]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &gen7_coresight_regs[6]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &gen7_coresight_regs[7]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &gen7_coresight_regs[8]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &gen7_coresight_regs[9]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &gen7_coresight_regs[10]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &gen7_coresight_regs[11]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &gen7_coresight_regs[12]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &gen7_coresight_regs[13]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &gen7_coresight_regs[14]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &gen7_coresight_regs[15]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &gen7_coresight_regs[16]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &gen7_coresight_regs[17]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &gen7_coresight_regs[18]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &gen7_coresight_regs[19]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &gen7_coresight_regs[20]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &gen7_coresight_regs[21]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &gen7_coresight_regs[22]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &gen7_coresight_regs[23]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &gen7_coresight_regs[24]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &gen7_coresight_regs[25]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &gen7_coresight_regs[26]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &gen7_coresight_regs[27]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &gen7_coresight_regs[28]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &gen7_coresight_regs[29]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &gen7_coresight_regs[30]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &gen7_coresight_regs[31]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &gen7_coresight_regs[32]);
+static ADRENO_CORESIGHT_ATTR(vbif_dbg_cntl, &gen7_coresight_regs[33]);
+static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &gen7_coresight_regs[34]);
+static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &gen7_coresight_regs[35]);
+static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg, &gen7_coresight_regs[36]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1, &gen7_coresight_regs[37]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2, &gen7_coresight_regs[38]);
+static ADRENO_CORESIGHT_ATTR(evt_cfg, &gen7_coresight_regs[39]);
+static ADRENO_CORESIGHT_ATTR(evt_intf_sel_0, &gen7_coresight_regs[40]);
+static ADRENO_CORESIGHT_ATTR(evt_intf_sel_1, &gen7_coresight_regs[41]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_cfg, &gen7_coresight_regs[42]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_0, &gen7_coresight_regs[43]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_1, &gen7_coresight_regs[44]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_2, &gen7_coresight_regs[45]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_3, &gen7_coresight_regs[46]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_trig_intf_sel_0,
+				&gen7_coresight_regs[47]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_trig_intf_sel_1,
+				&gen7_coresight_regs[48]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_drain_cmd, &gen7_coresight_regs[49]);
+static ADRENO_CORESIGHT_ATTR(eco_cntl, &gen7_coresight_regs[50]);
+static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &gen7_coresight_regs[51]);
+
+/*CX debug registers*/
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_a,
+				&gen7_coresight_regs_cx[0]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_b,
+				&gen7_coresight_regs_cx[1]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_c,
+				&gen7_coresight_regs_cx[2]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_d,
+				&gen7_coresight_regs_cx[3]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlt,
+				&gen7_coresight_regs_cx[4]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlm,
+				&gen7_coresight_regs_cx[5]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_opl,
+				&gen7_coresight_regs_cx[6]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ope,
+				&gen7_coresight_regs_cx[7]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_0,
+				&gen7_coresight_regs_cx[8]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_1,
+				&gen7_coresight_regs_cx[9]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_2,
+				&gen7_coresight_regs_cx[10]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_3,
+				&gen7_coresight_regs_cx[11]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_0,
+				&gen7_coresight_regs_cx[12]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_1,
+				&gen7_coresight_regs_cx[13]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_2,
+				&gen7_coresight_regs_cx[14]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_3,
+				&gen7_coresight_regs_cx[15]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_0,
+				&gen7_coresight_regs_cx[16]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_1,
+				&gen7_coresight_regs_cx[17]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_0,
+				&gen7_coresight_regs_cx[18]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_1,
+				&gen7_coresight_regs_cx[19]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_2,
+				&gen7_coresight_regs_cx[20]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_3,
+				&gen7_coresight_regs_cx[21]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_0,
+				&gen7_coresight_regs_cx[22]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_1,
+				&gen7_coresight_regs_cx[23]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_2,
+				&gen7_coresight_regs_cx[24]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_3,
+				&gen7_coresight_regs_cx[25]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_nibblee,
+				&gen7_coresight_regs_cx[26]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc0,
+				&gen7_coresight_regs_cx[27]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc1,
+				&gen7_coresight_regs_cx[28]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadreg,
+				&gen7_coresight_regs_cx[29]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_idx,
+				&gen7_coresight_regs_cx[30]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_clrc,
+				&gen7_coresight_regs_cx[31]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadivt,
+				&gen7_coresight_regs_cx[32]);
+static ADRENO_CORESIGHT_ATTR(cx_vbif_dbg_cntl,
+				&gen7_coresight_regs_cx[33]);
+static ADRENO_CORESIGHT_ATTR(cx_dbg_lo_hi_gpio,
+				&gen7_coresight_regs_cx[34]);
+static ADRENO_CORESIGHT_ATTR(cx_ext_trace_bus_cntl,
+				&gen7_coresight_regs_cx[35]);
+static ADRENO_CORESIGHT_ATTR(cx_read_ahb_through_dbg,
+				&gen7_coresight_regs_cx[36]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf1,
+				&gen7_coresight_regs_cx[37]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf2,
+				&gen7_coresight_regs_cx[38]);
+static ADRENO_CORESIGHT_ATTR(cx_evt_cfg,
+				&gen7_coresight_regs_cx[39]);
+static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_0,
+				&gen7_coresight_regs_cx[40]);
+static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_1,
+				&gen7_coresight_regs_cx[41]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_cfg,
+				&gen7_coresight_regs_cx[42]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_0,
+				&gen7_coresight_regs_cx[43]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_1,
+				&gen7_coresight_regs_cx[44]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_2,
+				&gen7_coresight_regs_cx[45]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_3,
+				&gen7_coresight_regs_cx[46]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_trig_intf_sel_0,
+				&gen7_coresight_regs_cx[47]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_trig_intf_sel_1,
+				&gen7_coresight_regs_cx[48]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_drain_cmd,
+				&gen7_coresight_regs_cx[49]);
+static ADRENO_CORESIGHT_ATTR(cx_eco_cntl,
+				&gen7_coresight_regs_cx[50]);
+static ADRENO_CORESIGHT_ATTR(cx_ahb_dbg_cntl,
+				&gen7_coresight_regs_cx[51]);
+
+static struct attribute *gen7_coresight_attrs[] = {
+	&coresight_attr_cfg_dbgbus_sel_a.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_b.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_c.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_d.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlt.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlm.attr.attr,
+	&coresight_attr_cfg_dbgbus_opl.attr.attr,
+	&coresight_attr_cfg_dbgbus_ope.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_nibblee.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc1.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadreg.attr.attr,
+	&coresight_attr_cfg_dbgbus_idx.attr.attr,
+	&coresight_attr_cfg_dbgbus_clrc.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadivt.attr.attr,
+	&coresight_attr_vbif_dbg_cntl.attr.attr,
+	&coresight_attr_dbg_lo_hi_gpio.attr.attr,
+	&coresight_attr_ext_trace_bus_cntl.attr.attr,
+	&coresight_attr_read_ahb_through_dbg.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf1.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf2.attr.attr,
+	&coresight_attr_evt_cfg.attr.attr,
+	&coresight_attr_evt_intf_sel_0.attr.attr,
+	&coresight_attr_evt_intf_sel_1.attr.attr,
+	&coresight_attr_perf_atb_cfg.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_0.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_1.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_2.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_3.attr.attr,
+	&coresight_attr_perf_atb_trig_intf_sel_0.attr.attr,
+	&coresight_attr_perf_atb_trig_intf_sel_1.attr.attr,
+	&coresight_attr_perf_atb_drain_cmd.attr.attr,
+	&coresight_attr_eco_cntl.attr.attr,
+	&coresight_attr_ahb_dbg_cntl.attr.attr,
+	NULL,
+};
+
+/*cx*/
+static struct attribute *gen7_coresight_attrs_cx[] = {
+	&coresight_attr_cx_cfg_dbgbus_sel_a.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_sel_b.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_sel_c.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_sel_d.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_cntlt.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_cntlm.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_opl.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ope.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_bytel_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_bytel_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_nibblee.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ptrc0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ptrc1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_loadreg.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_idx.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_clrc.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_loadivt.attr.attr,
+	&coresight_attr_cx_vbif_dbg_cntl.attr.attr,
+	&coresight_attr_cx_dbg_lo_hi_gpio.attr.attr,
+	&coresight_attr_cx_ext_trace_bus_cntl.attr.attr,
+	&coresight_attr_cx_read_ahb_through_dbg.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_trace_buf1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_trace_buf2.attr.attr,
+	&coresight_attr_cx_evt_cfg.attr.attr,
+	&coresight_attr_cx_evt_intf_sel_0.attr.attr,
+	&coresight_attr_cx_evt_intf_sel_1.attr.attr,
+	&coresight_attr_cx_perf_atb_cfg.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_0.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_1.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_2.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_3.attr.attr,
+	&coresight_attr_cx_perf_atb_trig_intf_sel_0.attr.attr,
+	&coresight_attr_cx_perf_atb_trig_intf_sel_1.attr.attr,
+	&coresight_attr_cx_perf_atb_drain_cmd.attr.attr,
+	&coresight_attr_cx_eco_cntl.attr.attr,
+	&coresight_attr_cx_ahb_dbg_cntl.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group gen7_coresight_group = {
+	.attrs = gen7_coresight_attrs,
+};
+
+static const struct attribute_group *gen7_coresight_groups[] = {
+	&gen7_coresight_group,
+	NULL,
+};
+
+static const struct attribute_group gen7_coresight_group_cx = {
+	.attrs = gen7_coresight_attrs_cx,
+};
+
+static const struct attribute_group *gen7_coresight_groups_cx[] = {
+	&gen7_coresight_group_cx,
+	NULL,
+};
+
+static const struct adreno_coresight gen7_coresight = {
+	.registers = gen7_coresight_regs,
+	.count = ARRAY_SIZE(gen7_coresight_regs),
+	.groups = gen7_coresight_groups,
+};
+
+static const struct adreno_coresight gen7_coresight_cx = {
+	.registers = gen7_coresight_regs_cx,
+	.count = ARRAY_SIZE(gen7_coresight_regs_cx),
+	.groups = gen7_coresight_groups_cx,
+};
+
+static int name_match(struct device *dev, void *data)
+{
+	char *child_name = data;
+
+	return strcmp(child_name, dev_name(dev)) == 0;
+}
+
+void gen7_coresight_init(struct adreno_device *adreno_dev)
+{
+	struct adreno_funnel_device *funnel_gfx = &adreno_dev->funnel_gfx;
+	struct device *amba_dev;
+
+	/* Find the amba funnel device associated with gfx coresight funnel */
+	amba_dev = bus_find_device_by_name(&amba_bustype, NULL, "10963000.funnel");
+	if (!amba_dev)
+		return;
+
+	funnel_gfx->funnel_dev = device_find_child(amba_dev, "coresight-funnel-gfx", name_match);
+	if (funnel_gfx->funnel_dev == NULL)
+		return;
+
+	funnel_gfx->funnel_csdev = to_coresight_device(funnel_gfx->funnel_dev);
+	if (funnel_gfx->funnel_csdev == NULL)
+		return;
+
+	/*
+	 * Since coresight_funnel_gfx component is in graphics block, GPU has to be powered up
+	 * before enabling the funnel. Currently the generic coresight driver doesnt handle that.
+	 * Override the funnel ops set by coresight driver with graphics funnel ops, so that the
+	 * GPU can be brought up before enabling the funnel.
+	 */
+	funnel_gfx->funnel_ops = funnel_gfx->funnel_csdev->ops;
+	funnel_gfx->funnel_csdev->ops = NULL;
+
+	/*
+	 * The read-only sysfs node (funnel_ctrl) associated with gfx funnel reads the control
+	 * register and could cause a NOC error when gpu is in slumber. Since we do not require
+	 * such node, remove the attribute groups for the funnel.
+	 */
+	sysfs_remove_groups(&funnel_gfx->funnel_dev->kobj, funnel_gfx->funnel_csdev->dev.groups);
+
+	adreno_coresight_add_device(adreno_dev, "qcom,gpu-coresight-gx",
+		&gen7_coresight, &adreno_dev->gx_coresight);
+
+	adreno_coresight_add_device(adreno_dev, "qcom,gpu-coresight-cx",
+		&gen7_coresight_cx, &adreno_dev->cx_coresight);
+}

+ 3437 - 0
qcom/opensource/graphics-kernel/adreno_gen7_gmu.c

@@ -0,0 +1,3437 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <dt-bindings/regulator/qcom,rpmh-regulator-levels.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/delay.h>
+#include <linux/dma-map-ops.h>
+#include <linux/firmware.h>
+#include <linux/interconnect.h>
+#include <linux/io.h>
+#include <linux/kobject.h>
+#include <linux/of_platform.h>
+#include <linux/qcom-iommu-util.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/mailbox/qmp.h>
+#include <soc/qcom/cmd-db.h>
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_trace.h"
+#include "kgsl_bus.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+#include "kgsl_util.h"
+
+static struct gmu_vma_entry gen7_gmu_vma[] = {
+	[GMU_ITCM] = {
+			.start = 0x00000000,
+			.size = SZ_16K,
+		},
+	[GMU_CACHE] = {
+			.start = SZ_16K,
+			.size = (SZ_16M - SZ_16K),
+			.next_va = SZ_16K,
+		},
+	[GMU_DTCM] = {
+			.start = SZ_256M + SZ_16K,
+			.size = SZ_16K,
+		},
+	[GMU_DCACHE] = {
+			.start = 0x0,
+			.size = 0x0,
+		},
+	[GMU_NONCACHED_KERNEL] = {
+			.start = 0x60000000,
+			.size = SZ_512M,
+			.next_va = 0x60000000,
+		},
+	[GMU_NONCACHED_KERNEL_EXTENDED] = {
+			.start = 0xc0000000,
+			.size = SZ_512M,
+			.next_va = 0xc0000000,
+		},
+};
+
+static ssize_t log_stream_enable_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, log_kobj);
+	bool val;
+	int ret;
+
+	ret = kstrtobool(buf, &val);
+	if (ret)
+		return ret;
+
+	gmu->log_stream_enable = val;
+	adreno_mark_for_coldboot(gen7_gmu_to_adreno(gmu));
+	return count;
+}
+
+static ssize_t log_stream_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, log_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", gmu->log_stream_enable);
+}
+
+static ssize_t log_group_mask_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, log_kobj);
+	u32 val;
+	int ret;
+
+	ret = kstrtou32(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	gmu->log_group_mask = val;
+	adreno_mark_for_coldboot(gen7_gmu_to_adreno(gmu));
+	return count;
+}
+
+static ssize_t log_group_mask_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, log_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->log_group_mask);
+}
+
+static struct kobj_attribute log_stream_enable_attr =
+	__ATTR(log_stream_enable, 0644, log_stream_enable_show, log_stream_enable_store);
+
+static struct kobj_attribute log_group_mask_attr =
+	__ATTR(log_group_mask, 0644, log_group_mask_show, log_group_mask_store);
+
+static struct attribute *log_attrs[] = {
+	&log_stream_enable_attr.attr,
+	&log_group_mask_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(log);
+
+static struct kobj_type log_kobj_type = {
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = log_groups,
+};
+
+static ssize_t stats_enable_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, stats_kobj);
+	bool val;
+	int ret;
+
+	ret = kstrtobool(buf, &val);
+	if (ret)
+		return ret;
+
+	gmu->stats_enable = val;
+	adreno_mark_for_coldboot(gen7_gmu_to_adreno(gmu));
+	return count;
+}
+
+static ssize_t stats_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, stats_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", gmu->stats_enable);
+}
+
+static ssize_t stats_mask_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, stats_kobj);
+	u32 val;
+	int ret;
+
+	ret = kstrtou32(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	gmu->stats_mask = val;
+	adreno_mark_for_coldboot(gen7_gmu_to_adreno(gmu));
+	return count;
+}
+
+static ssize_t stats_mask_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, stats_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->stats_mask);
+}
+
+static ssize_t stats_interval_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, stats_kobj);
+	u32 val;
+	int ret;
+
+	ret = kstrtou32(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	gmu->stats_interval = val;
+	adreno_mark_for_coldboot(gen7_gmu_to_adreno(gmu));
+	return count;
+}
+
+static ssize_t stats_interval_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, stats_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->stats_interval);
+}
+
+static struct kobj_attribute stats_enable_attr =
+	__ATTR(stats_enable, 0644, stats_enable_show, stats_enable_store);
+
+static struct kobj_attribute stats_mask_attr =
+	__ATTR(stats_mask, 0644, stats_mask_show, stats_mask_store);
+
+static struct kobj_attribute stats_interval_attr =
+	__ATTR(stats_interval, 0644, stats_interval_show, stats_interval_store);
+
+static struct attribute *stats_attrs[] = {
+	&stats_enable_attr.attr,
+	&stats_mask_attr.attr,
+	&stats_interval_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(stats);
+
+static struct kobj_type stats_kobj_type = {
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = stats_groups,
+};
+
+static int gen7_timed_poll_check_rscc(struct gen7_gmu_device *gmu,
+		unsigned int offset, unsigned int expected_ret,
+		unsigned int timeout, unsigned int mask)
+{
+	u32 value;
+
+	return readl_poll_timeout(gmu->rscc_virt + (offset << 2), value,
+		(value & mask) == expected_ret, 100, timeout * 1000);
+}
+
+struct gen7_gmu_device *to_gen7_gmu(struct adreno_device *adreno_dev)
+{
+	struct gen7_device *gen7_dev = container_of(adreno_dev,
+					struct gen7_device, adreno_dev);
+
+	return &gen7_dev->gmu;
+}
+
+struct adreno_device *gen7_gmu_to_adreno(struct gen7_gmu_device *gmu)
+{
+	struct gen7_device *gen7_dev =
+			container_of(gmu, struct gen7_device, gmu);
+
+	return &gen7_dev->adreno_dev;
+}
+
+#define RSC_CMD_OFFSET 2
+
+static void _regwrite(void __iomem *regbase,
+		unsigned int offsetwords, unsigned int value)
+{
+	void __iomem *reg;
+
+	reg = regbase + (offsetwords << 2);
+	__raw_writel(value, reg);
+}
+
+void gen7_load_rsc_ucode(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	void __iomem *rscc = gmu->rscc_virt;
+	unsigned int seq_offset = GEN7_RSCC_SEQ_MEM_0_DRV0;
+
+	/* Disable SDE clock gating */
+	_regwrite(rscc, GEN7_GPU_RSCC_RSC_STATUS0_DRV0, BIT(24));
+
+	/* Setup RSC PDC handshake for sleep and wakeup */
+	_regwrite(rscc, GEN7_RSCC_PDC_SLAVE_ID_DRV0, 1);
+	_regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_DATA, 0);
+	_regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_ADDR, 0);
+	_regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_DATA + RSC_CMD_OFFSET, 0);
+	_regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_ADDR + RSC_CMD_OFFSET, 0);
+	_regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_DATA + RSC_CMD_OFFSET * 2,
+		adreno_is_gen7_2_x_family(adreno_dev) ?  0x80000021 : 0x80000000);
+	_regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_ADDR + RSC_CMD_OFFSET * 2, 0);
+	_regwrite(rscc, GEN7_RSCC_OVERRIDE_START_ADDR, 0);
+	_regwrite(rscc, GEN7_RSCC_PDC_SEQ_START_ADDR, 0x4520);
+	_regwrite(rscc, GEN7_RSCC_PDC_MATCH_VALUE_LO, 0x4510);
+	_regwrite(rscc, GEN7_RSCC_PDC_MATCH_VALUE_HI, 0x4514);
+
+	if (adreno_is_gen7_2_x_family(adreno_dev))
+		seq_offset = GEN7_2_0_RSCC_SEQ_MEM_0_DRV0;
+
+	/* Load RSC sequencer uCode for sleep and wakeup */
+	_regwrite(rscc, seq_offset, 0xeaaae5a0);
+	_regwrite(rscc, seq_offset + 1, 0xe1a1ebab);
+	_regwrite(rscc, seq_offset + 2, 0xa2e0a581);
+	_regwrite(rscc, seq_offset + 3, 0xecac82e2);
+	_regwrite(rscc, seq_offset + 4, 0x0020edad);
+}
+
+int gen7_load_pdc_ucode(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct resource *res_cfg;
+	void __iomem *cfg = NULL;
+
+	res_cfg = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM,
+			"gmu_pdc");
+	if (res_cfg)
+		cfg = ioremap(res_cfg->start, resource_size(res_cfg));
+
+	if (!cfg) {
+		dev_err(&gmu->pdev->dev, "Failed to map PDC CFG\n");
+		return -ENODEV;
+	}
+
+	/* Setup GPU PDC */
+	_regwrite(cfg, GEN7_PDC_GPU_SEQ_START_ADDR, 0);
+	_regwrite(cfg, GEN7_PDC_GPU_ENABLE_PDC, 0x80000001);
+
+	iounmap(cfg);
+
+	return 0;
+}
+
+/* Configure and enable GMU low power mode */
+static void gen7_gmu_power_config(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Disable GMU WB/RB buffer and caches at boot */
+	gmu_core_regwrite(device, GEN7_GMU_SYS_BUS_CONFIG, 0x1);
+	gmu_core_regwrite(device, GEN7_GMU_ICACHE_CONFIG, 0x1);
+	gmu_core_regwrite(device, GEN7_GMU_DCACHE_CONFIG, 0x1);
+}
+
+static void gmu_ao_sync_event(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned long flags;
+	u64 ticks;
+
+	/*
+	 * Get the GMU always on ticks and log it in a trace message. This
+	 * will be used to map GMU ticks to ftrace time. Do this in atomic
+	 * context to ensure nothing happens between reading the always
+	 * on ticks and doing the trace.
+	 */
+
+	local_irq_save(flags);
+
+	ticks = gpudev->read_alwayson(adreno_dev);
+
+	trace_gmu_ao_sync(ticks);
+
+	local_irq_restore(flags);
+}
+
+int gen7_gmu_device_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	gmu_core_reset_trace_header(&gmu->trace);
+
+	gmu_ao_sync_event(adreno_dev);
+
+	/* Bring GMU out of reset */
+	gmu_core_regwrite(device, GEN7_GMU_CM3_SYSRESET, 0);
+
+	/* Make sure the write is posted before moving ahead */
+	wmb();
+
+	if (gmu_core_timed_poll_check(device, GEN7_GMU_CM3_FW_INIT_RESULT,
+			BIT(8), 100, GENMASK(8, 0))) {
+		dev_err(&gmu->pdev->dev, "GMU failed to come out of reset\n");
+		gmu_core_fault_snapshot(device);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/*
+ * gen7_gmu_hfi_start() - Write registers and start HFI.
+ * @device: Pointer to KGSL device
+ */
+int gen7_gmu_hfi_start(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	gmu_core_regwrite(device, GEN7_GMU_HFI_CTRL_INIT, 1);
+
+	if (gmu_core_timed_poll_check(device, GEN7_GMU_HFI_CTRL_STATUS,
+			BIT(0), 100, BIT(0))) {
+		dev_err(&gmu->pdev->dev, "GMU HFI init failed\n");
+		gmu_core_fault_snapshot(device);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+int gen7_rscc_wakeup_sequence(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct device *dev = &gmu->pdev->dev;
+
+	/* Skip wakeup sequence if we didn't do the sleep sequence */
+	if (!test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags))
+		return 0;
+
+	/* RSC wake sequence */
+	gmu_core_regwrite(device, GEN7_GMU_RSCC_CONTROL_REQ, BIT(1));
+
+	/* Write request before polling */
+	wmb();
+
+	if (gmu_core_timed_poll_check(device, GEN7_GMU_RSCC_CONTROL_ACK,
+				BIT(1), 100, BIT(1))) {
+		dev_err(dev, "Failed to do GPU RSC power on\n");
+		return -ETIMEDOUT;
+	}
+
+	if (gen7_timed_poll_check_rscc(gmu, GEN7_RSCC_SEQ_BUSY_DRV0,
+				0x0, 100, UINT_MAX)) {
+		dev_err(dev, "GPU RSC sequence stuck in waking up GPU\n");
+		return -ETIMEDOUT;
+	}
+
+	gmu_core_regwrite(device, GEN7_GMU_RSCC_CONTROL_REQ, 0);
+
+	clear_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags);
+
+	return 0;
+}
+
+int gen7_rscc_sleep_sequence(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret;
+
+	if (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))
+		return 0;
+
+	if (test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags))
+		return 0;
+
+	gmu_core_regwrite(device, GEN7_GMU_CM3_SYSRESET, 1);
+	/* Make sure M3 is in reset before going on */
+	wmb();
+
+	gmu_core_regread(device, GEN7_GMU_GENERAL_9, &gmu->log_wptr_retention);
+
+	gmu_core_regwrite(device, GEN7_GMU_RSCC_CONTROL_REQ, BIT(0));
+	/* Make sure the request completes before continuing */
+	wmb();
+
+	ret = gen7_timed_poll_check_rscc(gmu, GEN7_GPU_RSCC_RSC_STATUS0_DRV0,
+			BIT(16), 100, BIT(16));
+	if (ret) {
+		dev_err(&gmu->pdev->dev, "GPU RSC power off fail\n");
+		return -ETIMEDOUT;
+	}
+
+	gmu_core_regwrite(device, GEN7_GMU_RSCC_CONTROL_REQ, 0);
+
+	set_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags);
+
+	return 0;
+}
+
+static struct kgsl_memdesc *find_gmu_memdesc(struct gen7_gmu_device *gmu,
+	u32 addr, u32 size)
+{
+	int i;
+
+	for (i = 0; i < gmu->global_entries; i++) {
+		struct kgsl_memdesc *md = &gmu->gmu_globals[i];
+
+		if ((addr >= md->gmuaddr) &&
+				(((addr + size) <= (md->gmuaddr + md->size))))
+			return md;
+	}
+
+	return NULL;
+}
+
+static int find_vma_block(struct gen7_gmu_device *gmu, u32 addr, u32 size)
+{
+	int i;
+
+	for (i = 0; i < GMU_MEM_TYPE_MAX; i++) {
+		struct gmu_vma_entry *vma = &gmu->vma[i];
+
+		if ((addr >= vma->start) &&
+			((addr + size) <= (vma->start + vma->size)))
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+static void load_tcm(struct adreno_device *adreno_dev, const u8 *src,
+	u32 tcm_start, u32 base, const struct gmu_block_header *blk)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 tcm_offset = tcm_start + ((blk->addr - base)/sizeof(u32));
+
+	kgsl_regmap_bulk_write(&device->regmap, tcm_offset, src,
+		blk->size >> 2);
+}
+
+int gen7_gmu_load_fw(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	const u8 *fw = (const u8 *)gmu->fw_image->data;
+
+	while (fw < gmu->fw_image->data + gmu->fw_image->size) {
+		const struct gmu_block_header *blk =
+					(const struct gmu_block_header *)fw;
+		int id;
+
+		fw += sizeof(*blk);
+
+		/* Don't deal with zero size blocks */
+		if (blk->size == 0)
+			continue;
+
+		id = find_vma_block(gmu, blk->addr, blk->size);
+
+		if (id < 0) {
+			dev_err(&gmu->pdev->dev,
+				"Unknown block in GMU FW addr:0x%x size:0x%x\n",
+				blk->addr, blk->size);
+			return -EINVAL;
+		}
+
+		if (id == GMU_ITCM) {
+			load_tcm(adreno_dev, fw,
+				GEN7_GMU_CM3_ITCM_START,
+				gmu->vma[GMU_ITCM].start, blk);
+		} else if (id == GMU_DTCM) {
+			load_tcm(adreno_dev, fw,
+				GEN7_GMU_CM3_DTCM_START,
+				gmu->vma[GMU_DTCM].start, blk);
+		} else {
+			/* The firmware block for memory needs to be copied on first boot only */
+			if (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) {
+				struct kgsl_memdesc *md =
+					find_gmu_memdesc(gmu, blk->addr, blk->size);
+
+				if (!md) {
+					dev_err(&gmu->pdev->dev,
+						"No backing memory for GMU FW block addr:0x%x size:0x%x\n",
+						blk->addr, blk->size);
+					return -EINVAL;
+				}
+
+				memcpy(md->hostptr + (blk->addr - md->gmuaddr), fw,
+					blk->size);
+			}
+		}
+
+		fw += blk->size;
+	}
+
+	/* Proceed only after the FW is written */
+	wmb();
+	return 0;
+}
+
+static const char *oob_to_str(enum oob_request req)
+{
+	switch (req) {
+	case oob_gpu:
+		return "oob_gpu";
+	case oob_perfcntr:
+		return "oob_perfcntr";
+	case oob_boot_slumber:
+		return "oob_boot_slumber";
+	case oob_dcvs:
+		return "oob_dcvs";
+	default:
+		return "unknown";
+	}
+}
+
+static void trigger_reset_recovery(struct adreno_device *adreno_dev,
+	enum oob_request req)
+{
+	/*
+	 * Trigger recovery for perfcounter oob only since only
+	 * perfcounter oob can happen alongside an actively rendering gpu.
+	 */
+	if (req != oob_perfcntr)
+		return;
+
+	if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->fault)
+		adreno_dev->dispatch_ops->fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+}
+
+int gen7_gmu_oob_set(struct kgsl_device *device,
+		enum oob_request req)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret = 0;
+	int set, check;
+
+	if (req == oob_perfcntr && gmu->num_oob_perfcntr++)
+		return 0;
+
+	if (req >= oob_boot_slumber) {
+		dev_err(&gmu->pdev->dev,
+			"Unsupported OOB request %s\n",
+			oob_to_str(req));
+		return -EINVAL;
+	}
+
+	set = BIT(30 - req * 2);
+	check = BIT(31 - req);
+
+	gmu_core_regwrite(device, GEN7_GMU_HOST2GMU_INTR_SET, set);
+
+	if (gmu_core_timed_poll_check(device, GEN7_GMU_GMU2HOST_INTR_INFO, check,
+				100, check)) {
+		if (req == oob_perfcntr)
+			gmu->num_oob_perfcntr--;
+		gmu_core_fault_snapshot(device);
+		ret = -ETIMEDOUT;
+		WARN(1, "OOB request %s timed out\n", oob_to_str(req));
+		trigger_reset_recovery(adreno_dev, req);
+	}
+
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, check);
+
+	trace_kgsl_gmu_oob_set(set);
+	return ret;
+}
+
+void gen7_gmu_oob_clear(struct kgsl_device *device,
+		enum oob_request req)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int clear = BIT(31 - req * 2);
+
+	if (req == oob_perfcntr && --gmu->num_oob_perfcntr)
+		return;
+
+	if (req >= oob_boot_slumber) {
+		dev_err(&gmu->pdev->dev, "Unsupported OOB clear %s\n",
+				oob_to_str(req));
+		return;
+	}
+
+	gmu_core_regwrite(device, GEN7_GMU_HOST2GMU_INTR_SET, clear);
+	trace_kgsl_gmu_oob_clear(clear);
+}
+
+void gen7_gmu_irq_enable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hfi *hfi = &gmu->hfi;
+
+	/* Clear pending IRQs and Unmask needed IRQs */
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, UINT_MAX);
+	gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_CLR, UINT_MAX);
+
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_MASK,
+			(unsigned int)~HFI_IRQ_MASK);
+	gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK,
+			(unsigned int)~GMU_AO_INT_MASK);
+
+	/* Enable all IRQs on host */
+	enable_irq(hfi->irq);
+	enable_irq(gmu->irq);
+}
+
+void gen7_gmu_irq_disable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hfi *hfi = &gmu->hfi;
+
+	/* Disable all IRQs on host */
+	disable_irq(gmu->irq);
+	disable_irq(hfi->irq);
+
+	/* Mask all IRQs and clear pending IRQs */
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_MASK, UINT_MAX);
+	gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, UINT_MAX);
+
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, UINT_MAX);
+	gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_CLR, UINT_MAX);
+}
+
+static int gen7_gmu_hfi_start_msg(struct adreno_device *adreno_dev)
+{
+	struct hfi_start_cmd req;
+	int ret;
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_START);
+	if (ret)
+		return ret;
+
+	return gen7_hfi_send_generic_req(adreno_dev, &req, sizeof(req));
+}
+
+static u32 gen7_rscc_tcsm_drv0_status_reglist[] = {
+	GEN7_RSCC_TCS0_DRV0_STATUS,
+	GEN7_RSCC_TCS1_DRV0_STATUS,
+	GEN7_RSCC_TCS2_DRV0_STATUS,
+	GEN7_RSCC_TCS3_DRV0_STATUS,
+	GEN7_RSCC_TCS4_DRV0_STATUS,
+	GEN7_RSCC_TCS5_DRV0_STATUS,
+	GEN7_RSCC_TCS6_DRV0_STATUS,
+	GEN7_RSCC_TCS7_DRV0_STATUS,
+	GEN7_RSCC_TCS8_DRV0_STATUS,
+	GEN7_RSCC_TCS9_DRV0_STATUS,
+};
+
+static u32 gen7_2_0_rscc_tcsm_drv0_status_reglist[] = {
+	GEN7_2_0_RSCC_TCS0_DRV0_STATUS,
+	GEN7_2_0_RSCC_TCS1_DRV0_STATUS,
+	GEN7_2_0_RSCC_TCS2_DRV0_STATUS,
+	GEN7_2_0_RSCC_TCS3_DRV0_STATUS,
+	GEN7_2_0_RSCC_TCS4_DRV0_STATUS,
+	GEN7_2_0_RSCC_TCS5_DRV0_STATUS,
+	GEN7_2_0_RSCC_TCS6_DRV0_STATUS,
+	GEN7_2_0_RSCC_TCS7_DRV0_STATUS,
+	GEN7_2_0_RSCC_TCS8_DRV0_STATUS,
+	GEN7_2_0_RSCC_TCS9_DRV0_STATUS,
+};
+
+static int gen7_complete_rpmh_votes(struct gen7_gmu_device *gmu,
+		u32 timeout)
+{
+	struct adreno_device *adreno_dev = gen7_gmu_to_adreno(gmu);
+	int i, ret = 0;
+
+	if (adreno_is_gen7_2_x_family(adreno_dev)) {
+		for (i = 0; i < ARRAY_SIZE(gen7_2_0_rscc_tcsm_drv0_status_reglist); i++)
+			ret |= gen7_timed_poll_check_rscc(gmu,
+				gen7_2_0_rscc_tcsm_drv0_status_reglist[i], BIT(0), timeout,
+				BIT(0));
+	} else {
+		for (i = 0; i < ARRAY_SIZE(gen7_rscc_tcsm_drv0_status_reglist); i++)
+			ret |= gen7_timed_poll_check_rscc(gmu,
+				gen7_rscc_tcsm_drv0_status_reglist[i], BIT(0), timeout,
+				BIT(0));
+	}
+
+	if (ret)
+		dev_err(&gmu->pdev->dev, "RPMH votes timedout: %d\n", ret);
+
+	return ret;
+}
+
+#define GX_GDSC_POWER_OFF	BIT(0)
+#define GX_CLK_OFF		BIT(1)
+#define is_on(val)		(!(val & (GX_GDSC_POWER_OFF | GX_CLK_OFF)))
+
+bool gen7_gmu_gx_is_on(struct adreno_device *adreno_dev)
+{
+	unsigned int val;
+
+	gmu_core_regread(KGSL_DEVICE(adreno_dev),
+			GEN7_GMU_GFX_PWR_CLK_STATUS, &val);
+	return is_on(val);
+}
+
+static const char *idle_level_name(int level)
+{
+	if (level == GPU_HW_ACTIVE)
+		return "GPU_HW_ACTIVE";
+	else if (level == GPU_HW_IFPC)
+		return "GPU_HW_IFPC";
+
+	return "(Unknown)";
+}
+
+int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	u32 reg, reg1, reg2, reg3, reg4;
+	unsigned long t;
+	u64 ts1, ts2;
+
+	ts1 = gpudev->read_alwayson(adreno_dev);
+
+	t = jiffies + msecs_to_jiffies(100);
+	do {
+		gmu_core_regread(device,
+			GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, &reg);
+		gmu_core_regread(device, GEN7_GMU_GFX_PWR_CLK_STATUS, &reg1);
+
+		/*
+		 * Check that we are at lowest level. If lowest level is IFPC
+		 * double check that GFX clock is off.
+		 */
+		if (gmu->idle_level == reg)
+			if (!(gmu->idle_level == GPU_HW_IFPC && is_on(reg1)))
+				return 0;
+
+		/* Wait 100us to reduce unnecessary AHB bus traffic */
+		usleep_range(10, 100);
+	} while (!time_after(jiffies, t));
+
+	/* Check one last time */
+	gmu_core_regread(device, GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, &reg);
+	gmu_core_regread(device, GEN7_GMU_GFX_PWR_CLK_STATUS, &reg1);
+
+	/*
+	 * Check that we are at lowest level. If lowest level is IFPC
+	 * double check that GFX clock is off.
+	 */
+	if (gmu->idle_level == reg)
+		if (!(gmu->idle_level == GPU_HW_IFPC && is_on(reg1)))
+			return 0;
+
+	ts2 = gpudev->read_alwayson(adreno_dev);
+
+	/* Collect abort data to help with debugging */
+	gmu_core_regread(device, GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS, &reg2);
+	gmu_core_regread(device, GEN7_GMU_RBBM_INT_UNMASKED_STATUS, &reg3);
+	gmu_core_regread(device, GEN7_GMU_GMU_PWR_COL_KEEPALIVE, &reg4);
+
+	dev_err(&gmu->pdev->dev,
+		"----------------------[ GMU error ]----------------------\n");
+	dev_err(&gmu->pdev->dev, "Timeout waiting for lowest idle level %s\n",
+		idle_level_name(gmu->idle_level));
+	dev_err(&gmu->pdev->dev, "Start: %llx (absolute ticks)\n", ts1);
+	dev_err(&gmu->pdev->dev, "Poll: %llx (ticks relative to start)\n", ts2-ts1);
+	dev_err(&gmu->pdev->dev, "RPMH_POWER_STATE=%x GFX_PWR_CLK_STATUS=%x\n", reg, reg1);
+	dev_err(&gmu->pdev->dev, "CX_BUSY_STATUS=%x\n", reg2);
+	dev_err(&gmu->pdev->dev, "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", reg3, reg4);
+
+	/* Access GX registers only when GX is ON */
+	if (is_on(reg1)) {
+		kgsl_regread(device, GEN7_CP_STATUS_1, &reg2);
+		kgsl_regread(device, GEN7_CP_CP2GMU_STATUS, &reg3);
+		kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, &reg4);
+
+		dev_err(&gmu->pdev->dev, "GEN7_CP_STATUS_1=%x\n", reg2);
+		dev_err(&gmu->pdev->dev, "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", reg3, reg4);
+	}
+
+	WARN_ON(1);
+	gmu_core_fault_snapshot(device);
+	return -ETIMEDOUT;
+}
+
+/* Bitmask for GPU idle status check */
+#define CXGXCPUBUSYIGNAHB	BIT(30)
+int gen7_gmu_wait_for_idle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	u32 status2;
+	u64 ts1;
+
+	ts1 = gpudev->read_alwayson(adreno_dev);
+	if (gmu_core_timed_poll_check(device, GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS,
+			0, 100, CXGXCPUBUSYIGNAHB)) {
+		gmu_core_regread(device,
+				GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS2, &status2);
+		dev_err(&gmu->pdev->dev,
+				"GMU not idling: status2=0x%x %llx %llx\n",
+				status2, ts1,
+				gpudev->read_alwayson(adreno_dev));
+		gmu_core_fault_snapshot(device);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+int gen7_gmu_version_info(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+
+	/* GMU version info is at a fixed offset in the DTCM */
+	gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xff8,
+			&gmu->ver.core);
+	gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xff9,
+			&gmu->ver.core_dev);
+	gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xffa,
+			&gmu->ver.pwr);
+	gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xffb,
+			&gmu->ver.pwr_dev);
+	gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xffc,
+			&gmu->ver.hfi);
+
+	/* Check if gmu fw version on device is compatible with kgsl driver */
+	if (gmu->ver.core < gen7_core->gmu_fw_version) {
+		dev_err_once(&gmu->pdev->dev,
+			     "GMU FW version 0x%x error (expected 0x%x)\n",
+			     gmu->ver.core, gen7_core->gmu_fw_version);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int gen7_gmu_itcm_shadow(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u32 i, *dest;
+
+	if (gmu->itcm_shadow)
+		return 0;
+
+	gmu->itcm_shadow = vzalloc(gmu->vma[GMU_ITCM].size);
+	if (!gmu->itcm_shadow)
+		return -ENOMEM;
+
+	dest = (u32 *)gmu->itcm_shadow;
+
+	for (i = 0; i < (gmu->vma[GMU_ITCM].size >> 2); i++)
+		gmu_core_regread(KGSL_DEVICE(adreno_dev),
+			GEN7_GMU_CM3_ITCM_START + i, dest++);
+
+	return 0;
+}
+
+void gen7_gmu_register_config(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 val;
+
+	/* Clear any previously set cm3 fault */
+	atomic_set(&gmu->cm3_fault, 0);
+
+	/* Vote veto for FAL10 */
+	gmu_core_regwrite(device, GEN7_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1);
+	gmu_core_regwrite(device, GEN7_GPU_GMU_CX_GMU_CX_FAL_INTF, 0x1);
+
+	/* Clear init result to make sure we are getting fresh value */
+	gmu_core_regwrite(device, GEN7_GMU_CM3_FW_INIT_RESULT, 0);
+	gmu_core_regwrite(device, GEN7_GMU_CM3_BOOT_CONFIG, 0x2);
+
+	gmu_core_regwrite(device, GEN7_GMU_HFI_QTBL_ADDR,
+			gmu->hfi.hfi_mem->gmuaddr);
+	gmu_core_regwrite(device, GEN7_GMU_HFI_QTBL_INFO, 1);
+
+	gmu_core_regwrite(device, GEN7_GMU_AHB_FENCE_RANGE_0, BIT(31) |
+			FIELD_PREP(GENMASK(30, 18), 0x32) |
+			FIELD_PREP(GENMASK(17, 0), 0x8a0));
+
+	/*
+	 * Make sure that CM3 state is at reset value. Snapshot is changing
+	 * NMI bit and if we boot up GMU with NMI bit set GMU will boot
+	 * straight in to NMI handler without executing __main code
+	 */
+	gmu_core_regwrite(device, GEN7_GMU_CM3_CFG, 0x4052);
+
+	/**
+	 * We may have asserted gbif halt as part of reset sequence which may
+	 * not get cleared if the gdsc was not reset. So clear it before
+	 * attempting GMU boot.
+	 */
+	kgsl_regwrite(device, GEN7_GBIF_HALT, 0x0);
+
+	/* Set vrb address before starting GMU */
+	if (!IS_ERR_OR_NULL(gmu->vrb))
+		gmu_core_regwrite(device, GEN7_GMU_GENERAL_11, gmu->vrb->gmuaddr);
+
+	/* Set the log wptr index */
+	gmu_core_regwrite(device, GEN7_GMU_GENERAL_9,
+			gmu->log_wptr_retention);
+
+	/* Pass chipid to GMU FW, must happen before starting GMU */
+	gmu_core_regwrite(device, GEN7_GMU_GENERAL_10,
+			ADRENO_GMU_REV(ADRENO_GPUREV(adreno_dev)));
+
+	/* Log size is encoded in (number of 4K units - 1) */
+	val = (gmu->gmu_log->gmuaddr & GENMASK(31, 12)) |
+		((GMU_LOG_SIZE/SZ_4K - 1) & GENMASK(7, 0));
+	gmu_core_regwrite(device, GEN7_GMU_GENERAL_8, val);
+
+	/* Configure power control and bring the GMU out of reset */
+	gen7_gmu_power_config(adreno_dev);
+
+	/*
+	 * Enable BCL throttling -
+	 * XOCLK1: countable: 0x13 (25% throttle)
+	 * XOCLK2: countable: 0x17 (58% throttle)
+	 * XOCLK3: countable: 0x19 (75% throttle)
+	 * POWER_CONTROL_SELECT_0 controls counters 0 - 3, each selector
+	 * is 8 bits wide.
+	 */
+	if (adreno_dev->bcl_enabled)
+		gmu_core_regrmw(device, GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0,
+			0xffffff00, FIELD_PREP(GENMASK(31, 24), 0x19) |
+			FIELD_PREP(GENMASK(23, 16), 0x17) |
+			FIELD_PREP(GENMASK(15, 8), 0x13));
+}
+
+static struct gmu_vma_node *find_va(struct gmu_vma_entry *vma, u32 addr, u32 size)
+{
+	struct rb_node *node = vma->vma_root.rb_node;
+
+	while (node != NULL) {
+		struct gmu_vma_node *data = rb_entry(node, struct gmu_vma_node, node);
+
+		if (addr + size <= data->va)
+			node = node->rb_left;
+		else if (addr >= data->va + data->size)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+/* Return true if VMA supports dynamic allocations */
+static bool vma_is_dynamic(int vma_id)
+{
+	/* Dynamic allocations are done in the GMU_NONCACHED_KERNEL space */
+	return vma_id == GMU_NONCACHED_KERNEL;
+}
+
+static int insert_va(struct gmu_vma_entry *vma, u32 addr, u32 size)
+{
+	struct rb_node **node, *parent = NULL;
+	struct gmu_vma_node *new = kzalloc(sizeof(*new), GFP_NOWAIT);
+
+	if (new == NULL)
+		return -ENOMEM;
+
+	new->va = addr;
+	new->size = size;
+
+	node = &vma->vma_root.rb_node;
+	while (*node != NULL) {
+		struct gmu_vma_node *this;
+
+		parent = *node;
+		this = rb_entry(parent, struct gmu_vma_node, node);
+
+		if (addr + size <= this->va)
+			node = &parent->rb_left;
+		else if (addr >= this->va + this->size)
+			node = &parent->rb_right;
+		else {
+			kfree(new);
+			return -EEXIST;
+		}
+	}
+
+	/* Add new node and rebalance tree */
+	rb_link_node(&new->node, parent, node);
+	rb_insert_color(&new->node, &vma->vma_root);
+
+	return 0;
+}
+
+static u32 find_unmapped_va(struct gmu_vma_entry *vma, u32 size, u32 va_align)
+{
+	struct rb_node *node = rb_first(&vma->vma_root);
+	u32 cur = vma->start;
+	bool found = false;
+
+	cur = ALIGN(cur, va_align);
+
+	while (node) {
+		struct gmu_vma_node *data = rb_entry(node, struct gmu_vma_node, node);
+
+		if (cur + size <= data->va) {
+			found = true;
+			break;
+		}
+
+		cur = ALIGN(data->va + data->size, va_align);
+		node = rb_next(node);
+	}
+
+	/* Do we have space after the last node? */
+	if (!found && (cur + size <= vma->start + vma->size))
+		found = true;
+	return found ? cur : 0;
+}
+
+static int _map_gmu_dynamic(struct gen7_gmu_device *gmu,
+	struct kgsl_memdesc *md,
+	u32 addr, u32 vma_id, int attrs, u32 align)
+{
+	int ret;
+	struct gmu_vma_entry *vma = &gmu->vma[vma_id];
+	struct gmu_vma_node *vma_node = NULL;
+	u32 size = ALIGN(md->size, hfi_get_gmu_sz_alignment(align));
+
+	spin_lock(&vma->lock);
+	if (!addr) {
+		/*
+		 * We will end up with a hole (GMU VA range not backed by physical mapping) if
+		 * the aligned size is greater than the size of the physical mapping
+		 */
+		addr = find_unmapped_va(vma, size, hfi_get_gmu_va_alignment(align));
+		if (addr == 0) {
+			spin_unlock(&vma->lock);
+			dev_err(&gmu->pdev->dev,
+				"Insufficient VA space size: %x\n", size);
+			return -ENOMEM;
+		}
+	}
+
+	ret = insert_va(vma, addr, size);
+	spin_unlock(&vma->lock);
+	if (ret < 0) {
+		dev_err(&gmu->pdev->dev,
+			"Could not insert va: %x size %x\n", addr, size);
+		return ret;
+	}
+
+	ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs);
+	if (!ret) {
+		md->gmuaddr = addr;
+		return 0;
+	}
+
+	/* Failed to map to GMU */
+	dev_err(&gmu->pdev->dev,
+		"Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n",
+		addr, md->size, ret);
+
+	spin_lock(&vma->lock);
+	vma_node = find_va(vma, md->gmuaddr, size);
+	if (vma_node)
+		rb_erase(&vma_node->node, &vma->vma_root);
+	spin_unlock(&vma->lock);
+	kfree(vma_node);
+
+	return ret;
+}
+
+static int _map_gmu_static(struct gen7_gmu_device *gmu,
+	struct kgsl_memdesc *md,
+	u32 addr, u32 vma_id, int attrs, u32 align)
+{
+	int ret;
+	struct gmu_vma_entry *vma = &gmu->vma[vma_id];
+	u32 size = ALIGN(md->size, hfi_get_gmu_sz_alignment(align));
+
+	if (!addr)
+		addr = ALIGN(vma->next_va, hfi_get_gmu_va_alignment(align));
+
+	ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs);
+	if (ret) {
+		dev_err(&gmu->pdev->dev,
+			"Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n",
+			addr, md->size, ret);
+		return ret;
+	}
+	md->gmuaddr = addr;
+	/*
+	 * We will end up with a hole (GMU VA range not backed by physical mapping) if the aligned
+	 * size is greater than the size of the physical mapping
+	 */
+	vma->next_va = md->gmuaddr + size;
+	return 0;
+}
+
+static int _map_gmu(struct gen7_gmu_device *gmu,
+	struct kgsl_memdesc *md,
+	u32 addr, u32 vma_id, int attrs, u32 align)
+{
+	return vma_is_dynamic(vma_id) ?
+			_map_gmu_dynamic(gmu, md, addr, vma_id, attrs, align) :
+			_map_gmu_static(gmu, md, addr, vma_id, attrs, align);
+}
+
+int gen7_gmu_import_buffer(struct gen7_gmu_device *gmu, u32 vma_id,
+				struct kgsl_memdesc *md, u32 attrs, u32 align)
+{
+	return _map_gmu(gmu, md, 0, vma_id, attrs, align);
+}
+
+struct kgsl_memdesc *gen7_reserve_gmu_kernel_block(struct gen7_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, u32 align)
+{
+	int ret;
+	struct kgsl_memdesc *md;
+	struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu));
+	int attrs = IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV;
+
+	if (gmu->global_entries == ARRAY_SIZE(gmu->gmu_globals))
+		return ERR_PTR(-ENOMEM);
+
+	md = &gmu->gmu_globals[gmu->global_entries];
+
+	ret = kgsl_allocate_kernel(device, md, size, 0, KGSL_MEMDESC_SYSMEM);
+	if (ret) {
+		memset(md, 0x0, sizeof(*md));
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret = _map_gmu(gmu, md, addr, vma_id, attrs, align);
+	if (ret) {
+		kgsl_sharedmem_free(md);
+		memset(md, 0x0, sizeof(*md));
+		return ERR_PTR(ret);
+	}
+
+	gmu->global_entries++;
+
+	return md;
+}
+
+struct kgsl_memdesc *gen7_reserve_gmu_kernel_block_fixed(struct gen7_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, const char *resource, int attrs, u32 align)
+{
+	int ret;
+	struct kgsl_memdesc *md;
+	struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu));
+
+	if (gmu->global_entries == ARRAY_SIZE(gmu->gmu_globals))
+		return ERR_PTR(-ENOMEM);
+
+	md = &gmu->gmu_globals[gmu->global_entries];
+
+	ret = kgsl_memdesc_init_fixed(device, gmu->pdev, resource, md);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ret = _map_gmu(gmu, md, addr, vma_id, attrs, align);
+
+	sg_free_table(md->sgt);
+	kfree(md->sgt);
+	md->sgt = NULL;
+
+	if (!ret)
+		gmu->global_entries++;
+	else {
+		dev_err(&gmu->pdev->dev,
+			"Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n",
+			addr, md->size, ret);
+		memset(md, 0x0, sizeof(*md));
+		md = ERR_PTR(ret);
+	}
+	return md;
+}
+
+int gen7_alloc_gmu_kernel_block(struct gen7_gmu_device *gmu,
+	struct kgsl_memdesc *md, u32 size, u32 vma_id, int attrs)
+{
+	int ret;
+	struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu));
+
+	ret = kgsl_allocate_kernel(device, md, size, 0, KGSL_MEMDESC_SYSMEM);
+	if (ret)
+		return ret;
+
+	ret = _map_gmu(gmu, md, 0, vma_id, attrs, 0);
+	if (ret)
+		kgsl_sharedmem_free(md);
+
+	return ret;
+}
+
+void gen7_free_gmu_block(struct gen7_gmu_device *gmu, struct kgsl_memdesc *md)
+{
+	int vma_id = find_vma_block(gmu, md->gmuaddr, md->size);
+	struct gmu_vma_entry *vma;
+	struct gmu_vma_node *vma_node;
+
+	if ((vma_id < 0) || !vma_is_dynamic(vma_id))
+		return;
+
+	vma = &gmu->vma[vma_id];
+
+	/*
+	 * Do not remove the vma node if we failed to unmap the entire buffer. This is because the
+	 * iommu driver considers remapping an already mapped iova as fatal.
+	 */
+	if (md->size != iommu_unmap(gmu->domain, md->gmuaddr, md->size))
+		goto free;
+
+	spin_lock(&vma->lock);
+	vma_node = find_va(vma, md->gmuaddr, md->size);
+	if (vma_node)
+		rb_erase(&vma_node->node, &vma->vma_root);
+	spin_unlock(&vma->lock);
+	kfree(vma_node);
+free:
+	kgsl_sharedmem_free(md);
+}
+
+static int gen7_gmu_process_prealloc(struct gen7_gmu_device *gmu,
+	struct gmu_block_header *blk)
+{
+	struct kgsl_memdesc *md;
+
+	int id = find_vma_block(gmu, blk->addr, blk->value);
+
+	if (id < 0) {
+		dev_err(&gmu->pdev->dev,
+			"Invalid prealloc block addr: 0x%x value:%d\n",
+			blk->addr, blk->value);
+		return id;
+	}
+
+	/* Nothing to do for TCM blocks or user uncached */
+	if (id == GMU_ITCM || id == GMU_DTCM || id == GMU_NONCACHED_USER)
+		return 0;
+
+	/* Check if the block is already allocated */
+	md = find_gmu_memdesc(gmu, blk->addr, blk->value);
+	if (md != NULL)
+		return 0;
+
+	md = gen7_reserve_gmu_kernel_block(gmu, blk->addr, blk->value, id, 0);
+
+	return PTR_ERR_OR_ZERO(md);
+}
+
+int gen7_gmu_parse_fw(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	struct gmu_block_header *blk;
+	int ret, offset = 0;
+	const char *gmufw_name = gen7_core->gmufw_name;
+
+	/*
+	 * If GMU fw already saved and verified, do nothing new.
+	 * Skip only request_firmware and allow preallocation to
+	 * ensure in scenario where GMU request firmware succeeded
+	 * but preallocation fails, we don't return early without
+	 * successful preallocations on next open call.
+	 */
+	if (!gmu->fw_image) {
+
+		if (gen7_core->gmufw_name == NULL)
+			return -EINVAL;
+
+		ret = request_firmware(&gmu->fw_image, gmufw_name,
+				&gmu->pdev->dev);
+		if (ret) {
+			if (gen7_core->gmufw_bak_name) {
+				gmufw_name = gen7_core->gmufw_bak_name;
+				ret = request_firmware(&gmu->fw_image, gmufw_name,
+					&gmu->pdev->dev);
+			}
+
+			if (ret) {
+				dev_err(&gmu->pdev->dev,
+					"request_firmware (%s) failed: %d\n",
+					gmufw_name, ret);
+
+				return ret;
+			}
+		}
+	}
+
+	/*
+	 * Zero payload fw blocks contain metadata and are
+	 * guaranteed to precede fw load data. Parse the
+	 * metadata blocks.
+	 */
+	while (offset < gmu->fw_image->size) {
+		blk = (struct gmu_block_header *)&gmu->fw_image->data[offset];
+
+		if (offset + sizeof(*blk) > gmu->fw_image->size) {
+			dev_err(&gmu->pdev->dev, "Invalid FW Block\n");
+			return -EINVAL;
+		}
+
+		/* Done with zero length blocks so return */
+		if (blk->size)
+			break;
+
+		offset += sizeof(*blk);
+
+		if (blk->type == GMU_BLK_TYPE_PREALLOC_REQ ||
+			blk->type == GMU_BLK_TYPE_PREALLOC_PERSIST_REQ) {
+			ret = gen7_gmu_process_prealloc(gmu, blk);
+
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+int gen7_gmu_memory_init(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	/* GMU master log */
+	if (IS_ERR_OR_NULL(gmu->gmu_log))
+		gmu->gmu_log = gen7_reserve_gmu_kernel_block(gmu, 0,
+				GMU_LOG_SIZE, GMU_NONCACHED_KERNEL, 0);
+
+	return PTR_ERR_OR_ZERO(gmu->gmu_log);
+}
+
+static int gen7_gmu_init(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = gen7_gmu_parse_fw(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_gmu_memory_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	return gen7_hfi_init(adreno_dev);
+}
+
+static void _do_gbif_halt(struct kgsl_device *device, u32 reg, u32 ack_reg,
+	u32 mask, const char *client)
+{
+	u32 ack;
+	unsigned long t;
+
+	kgsl_regwrite(device, reg, mask);
+
+	t = jiffies + msecs_to_jiffies(100);
+	do {
+		kgsl_regread(device, ack_reg, &ack);
+		if ((ack & mask) == mask)
+			return;
+
+		/*
+		 * If we are attempting recovery in case of stall-on-fault
+		 * then the halt sequence will not complete as long as SMMU
+		 * is stalled.
+		 */
+		kgsl_mmu_pagefault_resume(&device->mmu, false);
+
+		usleep_range(10, 100);
+	} while (!time_after(jiffies, t));
+
+	/* Check one last time */
+	kgsl_mmu_pagefault_resume(&device->mmu, false);
+
+	kgsl_regread(device, ack_reg, &ack);
+	if ((ack & mask) == mask)
+		return;
+
+	dev_err(device->dev, "%s GBIF halt timed out\n", client);
+}
+
+static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev)
+{
+	int ret = 0;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	/* Disconnect GPU from BUS is not needed if CX GDSC goes off later */
+
+	/*
+	 * GEMNOC can enter power collapse state during GPU power down sequence.
+	 * This could abort CX GDSC collapse. Assert Qactive to avoid this.
+	 */
+	gmu_core_regwrite(device, GEN7_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1);
+
+	/* Check no outstanding RPMh voting */
+	gen7_complete_rpmh_votes(gmu, 1);
+
+	/* Clear the WRITEDROPPED fields and set fence to allow mode */
+	gmu_core_regwrite(device, GEN7_GMU_AHB_FENCE_STATUS_CLR, 0x7);
+	gmu_core_regwrite(device, GEN7_GMU_AO_AHB_FENCE_CTRL, 0);
+
+	/* Make sure above writes are committed before we proceed to recovery */
+	wmb();
+
+	gmu_core_regwrite(device, GEN7_GMU_CM3_SYSRESET, 1);
+
+	/* Halt GX traffic */
+	if (gen7_gmu_gx_is_on(adreno_dev))
+		_do_gbif_halt(device, GEN7_RBBM_GBIF_HALT,
+				GEN7_RBBM_GBIF_HALT_ACK,
+				GEN7_GBIF_GX_HALT_MASK,
+				"GX");
+
+	/* Halt CX traffic */
+	_do_gbif_halt(device, GEN7_GBIF_HALT, GEN7_GBIF_HALT_ACK,
+			GEN7_GBIF_ARB_HALT_MASK, "CX");
+
+	if (gen7_gmu_gx_is_on(adreno_dev))
+		kgsl_regwrite(device, GEN7_RBBM_SW_RESET_CMD, 0x1);
+
+	/* Make sure above writes are posted before turning off power resources */
+	wmb();
+
+	/* Allow the software reset to complete */
+	udelay(100);
+
+	/*
+	 * This is based on the assumption that GMU is the only one controlling
+	 * the GX HS. This code path is the only client voting for GX through
+	 * the regulator interface.
+	 */
+	if (pwr->gx_gdsc) {
+		if (gen7_gmu_gx_is_on(adreno_dev)) {
+			/* Switch gx gdsc control from GMU to CPU
+			 * force non-zero reference count in clk driver
+			 * so next disable call will turn
+			 * off the GDSC
+			 */
+			ret = regulator_enable(pwr->gx_gdsc);
+			if (ret)
+				dev_err(&gmu->pdev->dev,
+					"suspend fail: gx enable %d\n", ret);
+
+			ret = regulator_disable(pwr->gx_gdsc);
+			if (ret)
+				dev_err(&gmu->pdev->dev,
+					"suspend fail: gx disable %d\n", ret);
+
+			if (gen7_gmu_gx_is_on(adreno_dev))
+				dev_err(&gmu->pdev->dev,
+					"gx is stuck on\n");
+		}
+	}
+}
+
+/*
+ * gen7_gmu_notify_slumber() - initiate request to GMU to prepare to slumber
+ * @device: Pointer to KGSL device
+ */
+static int gen7_gmu_notify_slumber(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int bus_level = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq;
+	int perf_idx = gmu->dcvs_table.gpu_level_num -
+			pwr->default_pwrlevel - 1;
+	struct hfi_prep_slumber_cmd req = {
+		.freq = perf_idx,
+		.bw = bus_level,
+	};
+	int ret;
+
+	req.bw |= gen7_bus_ab_quantize(adreno_dev, 0);
+
+	/* Disable the power counter so that the GMU is not busy */
+	gmu_core_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0);
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_PREPARE_SLUMBER);
+	if (ret)
+		return ret;
+
+	ret = gen7_hfi_send_generic_req(adreno_dev, &req, sizeof(req));
+
+	/* Make sure the fence is in ALLOW mode */
+	gmu_core_regwrite(device, GEN7_GMU_AO_AHB_FENCE_CTRL, 0);
+
+	/*
+	 * GEMNOC can enter power collapse state during GPU power down sequence.
+	 * This could abort CX GDSC collapse. Assert Qactive to avoid this.
+	 */
+	gmu_core_regwrite(device, GEN7_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1);
+
+	return ret;
+}
+
+void gen7_gmu_suspend(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	gen7_gmu_pwrctrl_suspend(adreno_dev);
+
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen7_rdpm_cx_freq_update(gmu, 0);
+
+	dev_err(&gmu->pdev->dev, "Suspended GMU\n");
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE);
+}
+
+static int gen7_gmu_dcvs_set(struct adreno_device *adreno_dev,
+		int gpu_pwrlevel, int bus_level, u32 ab)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_dcvs_table *table = &gmu->dcvs_table;
+	struct hfi_gx_bw_perf_vote_cmd req = {
+		.ack_type = DCVS_ACK_BLOCK,
+		.freq = INVALID_DCVS_IDX,
+		.bw = INVALID_DCVS_IDX,
+	};
+	int ret = 0;
+
+	if (!test_bit(GMU_PRIV_HFI_STARTED, &gmu->flags))
+		return 0;
+
+	/* Do not set to XO and lower GPU clock vote from GMU */
+	if ((gpu_pwrlevel != INVALID_DCVS_IDX) &&
+			(gpu_pwrlevel >= table->gpu_level_num - 1))
+		return -EINVAL;
+
+	if (gpu_pwrlevel < table->gpu_level_num - 1)
+		req.freq = table->gpu_level_num - gpu_pwrlevel - 1;
+
+	if (bus_level < pwr->ddr_table_count && bus_level > 0)
+		req.bw = bus_level;
+
+	req.bw |=  gen7_bus_ab_quantize(adreno_dev, ab);
+
+	/* GMU will vote for slumber levels through the sleep sequence */
+	if ((req.freq == INVALID_DCVS_IDX) && (req.bw == INVALID_BW_VOTE))
+		return 0;
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_GX_BW_PERF_VOTE);
+	if (ret)
+		return ret;
+
+	ret = gen7_hfi_send_generic_req(adreno_dev, &req, sizeof(req));
+	if (ret) {
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"Failed to set GPU perf idx %u, bw idx %u\n",
+			req.freq, req.bw);
+
+		/*
+		 * If this was a dcvs request along side an active gpu, request
+		 * dispatcher based reset and recovery.
+		 */
+		if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+			adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT |
+				ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	}
+
+	if (req.freq != INVALID_DCVS_IDX)
+		gen7_rdpm_mx_freq_update(gmu,
+			gmu->dcvs_table.gx_votes[req.freq].freq);
+
+	return ret;
+}
+
+static int gen7_gmu_clock_set(struct adreno_device *adreno_dev, u32 pwrlevel)
+{
+	return gen7_gmu_dcvs_set(adreno_dev, pwrlevel, INVALID_DCVS_IDX, INVALID_AB_VALUE);
+}
+
+static int gen7_gmu_ifpc_store(struct kgsl_device *device,
+		unsigned int val)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	unsigned int requested_idle_level;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_IFPC))
+		return -EINVAL;
+
+	if (val)
+		requested_idle_level = GPU_HW_IFPC;
+	else
+		requested_idle_level = GPU_HW_ACTIVE;
+
+	if (gmu->idle_level == requested_idle_level)
+		return 0;
+
+	/* Power down the GPU before changing the idle level */
+	return adreno_power_cycle_u32(adreno_dev, &gmu->idle_level,
+		requested_idle_level);
+}
+
+static unsigned int gen7_gmu_ifpc_isenabled(struct kgsl_device *device)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(ADRENO_DEVICE(device));
+
+	return gmu->idle_level == GPU_HW_IFPC;
+}
+
+/* Send an NMI to the GMU */
+void gen7_gmu_send_nmi(struct kgsl_device *device, bool force)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u32 result;
+
+	/*
+	 * Do not send NMI if the SMMU is stalled because GMU will not be able
+	 * to save cm3 state to DDR.
+	 */
+	if (gen7_gmu_gx_is_on(adreno_dev) && adreno_smmu_is_stalled(adreno_dev)) {
+		dev_err(&gmu->pdev->dev,
+			"Skipping NMI because SMMU is stalled\n");
+		return;
+	}
+
+	if (force)
+		goto nmi;
+
+	/*
+	 * We should not send NMI if there was a CM3 fault reported because we
+	 * don't want to overwrite the critical CM3 state captured by gmu before
+	 * it sent the CM3 fault interrupt. Also don't send NMI if GMU reset is
+	 * already active. We could have hit a GMU assert and NMI might have
+	 * already been triggered.
+	 */
+
+	/* make sure we're reading the latest cm3_fault */
+	smp_rmb();
+
+	if (atomic_read(&gmu->cm3_fault))
+		return;
+
+	gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &result);
+
+	if (result & 0xE00)
+		return;
+
+nmi:
+	/* Mask so there's no interrupt caused by NMI */
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_MASK, UINT_MAX);
+
+	/* Make sure the interrupt is masked before causing it */
+	wmb();
+
+	/* This will cause the GMU to save it's internal state to ddr */
+	gmu_core_regrmw(device, GEN7_GMU_CM3_CFG, BIT(9), BIT(9));
+
+	/* Make sure the NMI is invoked before we proceed*/
+	wmb();
+
+	/* Wait for the NMI to be handled */
+	udelay(200);
+}
+
+static void gen7_gmu_cooperative_reset(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	unsigned int result;
+
+	gmu_core_regwrite(device, GEN7_GMU_CX_GMU_WDOG_CTRL, 0);
+	gmu_core_regwrite(device, GEN7_GMU_HOST2GMU_INTR_SET, BIT(17));
+
+	/*
+	 * After triggering graceful death wait for snapshot ready
+	 * indication from GMU.
+	 */
+	if (!gmu_core_timed_poll_check(device, GEN7_GMU_CM3_FW_INIT_RESULT,
+				0x800, 2, 0x800))
+		return;
+
+	gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &result);
+	dev_err(&gmu->pdev->dev,
+		"GMU cooperative reset timed out 0x%x\n", result);
+	/*
+	 * If we dont get a snapshot ready from GMU, trigger NMI
+	 * and if we still timeout then we just continue with reset.
+	 */
+	gen7_gmu_send_nmi(device, true);
+
+	gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &result);
+	if ((result & 0x800) != 0x800)
+		dev_err(&gmu->pdev->dev,
+			"GMU cooperative reset NMI timed out 0x%x\n", result);
+}
+
+static int gen7_gmu_wait_for_active_transition(struct kgsl_device *device)
+{
+	unsigned int reg;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(ADRENO_DEVICE(device));
+
+	if (gmu_core_timed_poll_check(device, GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE,
+			GPU_HW_ACTIVE, 100, GENMASK(3, 0))) {
+		gmu_core_regread(device, GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, &reg);
+		dev_err(&gmu->pdev->dev,
+			"GMU failed to move to ACTIVE state, Current state: 0x%x\n",
+			reg);
+
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static bool gen7_gmu_scales_bandwidth(struct kgsl_device *device)
+{
+	return true;
+}
+
+void gen7_gmu_handle_watchdog(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 mask;
+
+	/* Temporarily mask the watchdog interrupt to prevent a storm */
+	gmu_core_regread(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, &mask);
+	gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK,
+			(mask | GMU_INT_WDOG_BITE));
+
+	gen7_gmu_send_nmi(device, false);
+
+	dev_err_ratelimited(&gmu->pdev->dev,
+			"GMU watchdog expired interrupt received\n");
+}
+
+static irqreturn_t gen7_gmu_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	const struct gen7_gpudev *gen7_gpudev =
+		to_gen7_gpudev(ADRENO_GPU_DEVICE(adreno_dev));
+	unsigned int status = 0;
+
+	gmu_core_regread(device, GEN7_GMU_AO_HOST_INTERRUPT_STATUS, &status);
+	gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_CLR, status);
+
+	if (status & GMU_INT_HOST_AHB_BUS_ERR)
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"AHB bus error interrupt received\n");
+
+	if (status & GMU_INT_WDOG_BITE)
+		gen7_gpudev->handle_watchdog(adreno_dev);
+
+	if (status & GMU_INT_FENCE_ERR) {
+		unsigned int fence_status;
+
+		gmu_core_regread(device, GEN7_GMU_AHB_FENCE_STATUS,
+			&fence_status);
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"FENCE error interrupt received %x\n", fence_status);
+	}
+
+	if (status & ~GMU_AO_INT_MASK)
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"Unhandled GMU interrupts 0x%lx\n",
+				status & ~GMU_AO_INT_MASK);
+
+	return IRQ_HANDLED;
+}
+
+void gen7_gmu_aop_send_acd_state(struct gen7_gmu_device *gmu, bool flag)
+{
+	struct qmp_pkt msg;
+	char msg_buf[36];
+	u32 size;
+	int ret;
+
+	if (IS_ERR_OR_NULL(gmu->mailbox.channel))
+		return;
+
+	size = scnprintf(msg_buf, sizeof(msg_buf),
+			"{class: gpu, res: acd, val: %d}", flag);
+
+	/* mailbox controller expects 4-byte aligned buffer */
+	msg.size = ALIGN((size + 1), SZ_4);
+	msg.data = msg_buf;
+
+	ret = mbox_send_message(gmu->mailbox.channel, &msg);
+
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+			"AOP mbox send message failed: %d\n", ret);
+}
+
+int gen7_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	gen7_rdpm_cx_freq_update(gmu, gmu->freqs[level] / 1000);
+
+	ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk",
+			gmu->freqs[level]);
+	if (ret) {
+		dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n",
+			gmu->freqs[level], ret);
+		return ret;
+	}
+
+	ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "hub_clk",
+			adreno_dev->gmu_hub_clk_freq);
+	if (ret && ret != -ENODEV) {
+		dev_err(&gmu->pdev->dev, "Unable to set the HUB clock\n");
+		return ret;
+	}
+
+	ret = clk_bulk_prepare_enable(gmu->num_clks, gmu->clks);
+	if (ret) {
+		dev_err(&gmu->pdev->dev, "Cannot enable GMU clocks\n");
+		return ret;
+	}
+
+	device->state = KGSL_STATE_AWARE;
+
+	return 0;
+}
+
+static int gen7_gmu_first_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int level, ret;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE);
+
+	gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled);
+
+	ret = kgsl_pwrctrl_enable_cx_gdsc(device);
+	if (ret)
+		return ret;
+
+	ret = gen7_gmu_enable_clks(adreno_dev, 0);
+	if (ret)
+		goto gdsc_off;
+
+	/*
+	 * Enable AHB timeout detection to catch any register access taking longer
+	 * time before NOC timeout gets detected. Enable this logic before any
+	 * register access which happens to be just after enabling clocks.
+	 */
+	gen7_enable_ahb_timeout_detection(adreno_dev);
+
+	/* Initialize the CX timer */
+	gen7_cx_timer_init(adreno_dev);
+
+	ret = gen7_gmu_load_fw(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = gen7_gmu_version_info(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = gen7_gmu_itcm_shadow(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = gen7_scm_gpu_init_cx_regs(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	gen7_gmu_register_config(adreno_dev);
+
+	gen7_gmu_irq_enable(adreno_dev);
+
+	/* Vote for minimal DDR BW for GMU to init */
+	level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min;
+	icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level]));
+
+	/* Clear any GPU faults that might have been left over */
+	adreno_clear_gpu_fault(adreno_dev);
+
+	ret = gen7_gmu_device_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) {
+		ret = gen7_load_pdc_ucode(adreno_dev);
+		if (ret)
+			goto err;
+
+		gen7_load_rsc_ucode(adreno_dev);
+		set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags);
+	}
+
+	ret = gen7_gmu_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	gen7_get_gpu_feature_info(adreno_dev);
+
+	ret = gen7_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	if (gen7_hfi_send_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE, 0) == 1 &&
+		!WARN_ONCE(!adreno_dev->gpucore->num_ddr_channels,
+			"Number of DDR channel is not specified in gpu core")) {
+		adreno_dev->gmu_ab = true;
+		set_bit(ADRENO_DEVICE_GMU_AB, &adreno_dev->priv);
+	}
+
+	icc_set_bw(pwr->icc_path, 0, 0);
+
+	device->gmu_fault = false;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+
+	return 0;
+
+err:
+	gen7_gmu_irq_disable(adreno_dev);
+
+	if (device->gmu_fault) {
+		gen7_gmu_suspend(adreno_dev);
+		return ret;
+	}
+
+clks_gdsc_off:
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+gdsc_off:
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen7_rdpm_cx_freq_update(gmu, 0);
+
+	return ret;
+}
+
+static int gen7_gmu_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret = 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE);
+
+	ret = kgsl_pwrctrl_enable_cx_gdsc(device);
+	if (ret)
+		return ret;
+
+	ret = gen7_gmu_enable_clks(adreno_dev, 0);
+	if (ret)
+		goto gdsc_off;
+
+	/*
+	 * Enable AHB timeout detection to catch any register access taking longer
+	 * time before NOC timeout gets detected. Enable this logic before any
+	 * register access which happens to be just after enabling clocks.
+	 */
+	gen7_enable_ahb_timeout_detection(adreno_dev);
+
+	/* Initialize the CX timer */
+	gen7_cx_timer_init(adreno_dev);
+
+	ret = gen7_rscc_wakeup_sequence(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = gen7_gmu_load_fw(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	gen7_gmu_register_config(adreno_dev);
+
+	gen7_gmu_irq_enable(adreno_dev);
+
+	/* Clear any GPU faults that might have been left over */
+	adreno_clear_gpu_fault(adreno_dev);
+
+	ret = gen7_gmu_device_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen7_gmu_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen7_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	device->gmu_fault = false;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+
+	return 0;
+
+err:
+	gen7_gmu_irq_disable(adreno_dev);
+
+	if (device->gmu_fault) {
+		gen7_gmu_suspend(adreno_dev);
+		return ret;
+	}
+
+clks_gdsc_off:
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+gdsc_off:
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen7_rdpm_cx_freq_update(gmu, 0);
+
+	return ret;
+}
+
+static void set_acd(struct adreno_device *adreno_dev, void *priv)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	adreno_dev->acd_enabled = *((bool *)priv);
+	gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled);
+}
+
+static int gen7_gmu_acd_set(struct kgsl_device *device, bool val)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	if (IS_ERR_OR_NULL(gmu->mailbox.channel))
+		return -EINVAL;
+
+	/* Don't do any unneeded work if ACD is already in the correct state */
+	if (adreno_dev->acd_enabled == val)
+		return 0;
+
+	/* Power cycle the GPU for changes to take effect */
+	return adreno_power_cycle(adreno_dev, set_acd, &val);
+}
+
+#define BCL_RESP_TYPE_MASK   BIT(0)
+#define BCL_SID0_MASK        GENMASK(7, 1)
+#define BCL_SID1_MASK        GENMASK(14, 8)
+#define BCL_SID2_MASK        GENMASK(21, 15)
+
+static int gen7_bcl_sid_set(struct kgsl_device *device, u32 sid_id, u64 sid_val)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u32 bcl_data, val = (u32) sid_val;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_BCL) ||
+		!FIELD_GET(BCL_RESP_TYPE_MASK, adreno_dev->bcl_data))
+		return -EINVAL;
+
+	switch (sid_id) {
+	case 0:
+		adreno_dev->bcl_data &= ~BCL_SID0_MASK;
+		bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID0_MASK, val);
+		break;
+	case 1:
+		adreno_dev->bcl_data &= ~BCL_SID1_MASK;
+		bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID1_MASK, val);
+		break;
+	case 2:
+		adreno_dev->bcl_data &= ~BCL_SID2_MASK;
+		bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID2_MASK, val);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return adreno_power_cycle_u32(adreno_dev, &adreno_dev->bcl_data, bcl_data);
+}
+
+static u64 gen7_bcl_sid_get(struct kgsl_device *device, u32 sid_id)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_BCL) ||
+		!FIELD_GET(BCL_RESP_TYPE_MASK, adreno_dev->bcl_data))
+		return 0;
+
+	switch (sid_id) {
+	case 0:
+		return ((u64) FIELD_GET(BCL_SID0_MASK, adreno_dev->bcl_data));
+	case 1:
+		return ((u64) FIELD_GET(BCL_SID1_MASK, adreno_dev->bcl_data));
+	case 2:
+		return ((u64) FIELD_GET(BCL_SID2_MASK, adreno_dev->bcl_data));
+	default:
+		return 0;
+	}
+}
+
+static void gen7_send_tlb_hint(struct kgsl_device *device, bool val)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	if (!gmu->domain)
+		return;
+
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+	qcom_skip_tlb_management(&gmu->pdev->dev, val);
+#endif
+	if (!val)
+		iommu_flush_iotlb_all(gmu->domain);
+}
+
+static const struct gmu_dev_ops gen7_gmudev = {
+	.oob_set = gen7_gmu_oob_set,
+	.oob_clear = gen7_gmu_oob_clear,
+	.ifpc_store = gen7_gmu_ifpc_store,
+	.ifpc_isenabled = gen7_gmu_ifpc_isenabled,
+	.cooperative_reset = gen7_gmu_cooperative_reset,
+	.wait_for_active_transition = gen7_gmu_wait_for_active_transition,
+	.scales_bandwidth = gen7_gmu_scales_bandwidth,
+	.acd_set = gen7_gmu_acd_set,
+	.bcl_sid_set = gen7_bcl_sid_set,
+	.bcl_sid_get = gen7_bcl_sid_get,
+	.send_nmi = gen7_gmu_send_nmi,
+	.send_tlb_hint = gen7_send_tlb_hint,
+};
+
+static int gen7_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel,
+	u32 ab)
+{
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int ret = 0;
+
+	/* Skip icc path for targets that supports ACV vote from GMU */
+	if (!gen7_core->acv_perfmode_vote)
+		kgsl_icc_set_tag(pwr, buslevel);
+
+	if (buslevel == pwr->cur_buslevel)
+		buslevel = INVALID_DCVS_IDX;
+
+	if ((ab == pwr->cur_ab) || (ab == 0))
+		ab = INVALID_AB_VALUE;
+
+	if ((ab == INVALID_AB_VALUE) && (buslevel == INVALID_DCVS_IDX))
+		return 0;
+
+	ret = gen7_gmu_dcvs_set(adreno_dev, INVALID_DCVS_IDX,
+			buslevel, ab);
+	if (ret)
+		return ret;
+
+	if (buslevel != INVALID_DCVS_IDX)
+		pwr->cur_buslevel = buslevel;
+
+	if (ab != INVALID_AB_VALUE) {
+		if (!adreno_dev->gmu_ab)
+			icc_set_bw(pwr->icc_path, MBps_to_icc(ab), 0);
+		pwr->cur_ab = ab;
+	}
+
+	trace_kgsl_buslevel(device, pwr->active_pwrlevel, pwr->cur_buslevel, pwr->cur_ab);
+	return ret;
+}
+
+u32 gen7_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab)
+{
+	u16 vote = 0;
+	u32 max_bw, max_ab;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	if (!adreno_dev->gmu_ab || (ab == INVALID_AB_VALUE))
+		return (FIELD_PREP(GENMASK(31, 16), INVALID_AB_VALUE));
+
+	/*
+	 * max ddr bandwidth (kbps) = (Max bw in kbps per channel * number of channel)
+	 * max ab (Mbps) = max ddr bandwidth (kbps) / 1000
+	 */
+	max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * adreno_dev->gpucore->num_ddr_channels;
+	max_ab = max_bw / 1000;
+
+	/*
+	 * If requested AB is higher than theoretical max bandwidth, set AB vote as max
+	 * allowable quantized AB value.
+	 *
+	 * Power FW supports a 16 bit AB BW level. We can quantize the entire vote-able BW
+	 * range to a 16 bit space and the quantized value can be used to vote for AB though
+	 * GMU. Quantization can be performed as below.
+	 *
+	 * quantized_vote = (ab vote (kbps) * 2^16) / max ddr bandwidth (kbps)
+	 */
+	if (ab >= max_ab)
+		vote = MAX_AB_VALUE;
+	else
+		vote = (u16)(((u64)ab * 1000 * (1 << 16)) / max_bw);
+
+	/*
+	 * Vote will be calculated as 0 for smaller AB values.
+	 * Set a minimum non-zero vote in such cases.
+	 */
+	if (ab && !vote)
+		vote = 0x1;
+
+	/*
+	 * Set ab enable mask and valid AB vote. req.bw is 32 bit value 0xABABENIB
+	 * and with this return we want to set the upper 16 bits and EN field specifies
+	 * if the AB vote is valid or not.
+	 */
+	return (FIELD_PREP(GENMASK(31, 16), vote) | FIELD_PREP(GENMASK(15, 8), 1));
+}
+
+static void gen7_free_gmu_globals(struct gen7_gmu_device *gmu)
+{
+	int i;
+
+	for (i = 0; i < gmu->global_entries && i < ARRAY_SIZE(gmu->gmu_globals); i++) {
+		struct kgsl_memdesc *md = &gmu->gmu_globals[i];
+
+		if (!md->gmuaddr)
+			continue;
+
+		iommu_unmap(gmu->domain, md->gmuaddr, md->size);
+
+		if (md->priv & KGSL_MEMDESC_SYSMEM)
+			kgsl_sharedmem_free(md);
+
+		memset(md, 0, sizeof(*md));
+	}
+
+	if (gmu->domain) {
+		iommu_detach_device(gmu->domain, &gmu->pdev->dev);
+		iommu_domain_free(gmu->domain);
+		gmu->domain = NULL;
+	}
+
+	gmu->global_entries = 0;
+}
+
+static int gen7_gmu_aop_mailbox_init(struct adreno_device *adreno_dev,
+		struct gen7_gmu_device *gmu)
+{
+	struct kgsl_mailbox *mailbox = &gmu->mailbox;
+
+	mailbox->client.dev = &gmu->pdev->dev;
+	mailbox->client.tx_block = true;
+	mailbox->client.tx_tout = 1000;
+	mailbox->client.knows_txdone = false;
+
+	mailbox->channel = mbox_request_channel(&mailbox->client, 0);
+	if (IS_ERR(mailbox->channel))
+		return PTR_ERR(mailbox->channel);
+
+	adreno_dev->acd_enabled = true;
+	return 0;
+}
+
+static void gen7_gmu_acd_probe(struct kgsl_device *device,
+		struct gen7_gmu_device *gmu, struct device_node *node)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwrlevel *pwrlevel =
+			&pwr->pwrlevels[pwr->num_pwrlevels - 1];
+	struct hfi_acd_table_cmd *cmd = &gmu->hfi.acd_table;
+	int ret, i, cmd_idx = 0;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_ACD))
+		return;
+
+	cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ACD_TBL, HFI_MSG_CMD);
+
+	cmd->version = 1;
+	cmd->stride = 1;
+	cmd->enable_by_level = 0;
+
+	/*
+	 * Iterate through each gpu power level and generate a mask for GMU
+	 * firmware for ACD enabled levels and store the corresponding control
+	 * register configurations to the acd_table structure.
+	 */
+	for (i = 0; i < pwr->num_pwrlevels; i++) {
+		if (pwrlevel->acd_level) {
+			cmd->enable_by_level |= (1 << (i + 1));
+			cmd->data[cmd_idx++] = pwrlevel->acd_level;
+		}
+		pwrlevel--;
+	}
+
+	if (!cmd->enable_by_level)
+		return;
+
+	cmd->num_levels = cmd_idx;
+
+	ret = gen7_gmu_aop_mailbox_init(adreno_dev, gmu);
+	if (ret)
+		dev_err(&gmu->pdev->dev,
+			"AOP mailbox init failed: %d\n", ret);
+}
+
+static int gen7_gmu_reg_probe(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret;
+
+	ret = kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu", NULL, NULL);
+
+	if (ret)
+		dev_err(&gmu->pdev->dev, "Unable to map the GMU registers\n");
+	/*
+	 * gmu_ao_blk_dec1 and gmu_ao_blk_dec2 are contiguous and contained within the gmu region
+	 * mapped above. gmu_ao_blk_dec0 is not within the gmu region and is mapped separately.
+	 */
+	kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu_ao_blk_dec0", NULL, NULL);
+
+	return ret;
+}
+
+static int gen7_gmu_clk_probe(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret, i;
+	int tbl_size;
+	int num_freqs;
+	int offset;
+
+	ret = devm_clk_bulk_get_all(&gmu->pdev->dev, &gmu->clks);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Voting for apb_pclk will enable power and clocks required for
+	 * QDSS path to function. However, if QCOM_KGSL_QDSS_STM is not enabled,
+	 * QDSS is essentially unusable. Hence, if QDSS cannot be used,
+	 * don't vote for this clock.
+	 */
+	if (!IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) {
+		for (i = 0; i < ret; i++) {
+			if (!strcmp(gmu->clks[i].id, "apb_pclk")) {
+				gmu->clks[i].clk = NULL;
+				break;
+			}
+		}
+	}
+
+	gmu->num_clks = ret;
+
+	/* Read the optional list of GMU frequencies */
+	if (of_get_property(gmu->pdev->dev.of_node,
+		"qcom,gmu-freq-table", &tbl_size) == NULL)
+		goto default_gmu_freq;
+
+	num_freqs = (tbl_size / sizeof(u32)) / 2;
+	if (num_freqs != ARRAY_SIZE(gmu->freqs))
+		goto default_gmu_freq;
+
+	for (i = 0; i < num_freqs; i++) {
+		offset = i * 2;
+		ret = of_property_read_u32_index(gmu->pdev->dev.of_node,
+			"qcom,gmu-freq-table", offset, &gmu->freqs[i]);
+		if (ret)
+			goto default_gmu_freq;
+		ret = of_property_read_u32_index(gmu->pdev->dev.of_node,
+			"qcom,gmu-freq-table", offset + 1, &gmu->vlvls[i]);
+		if (ret)
+			goto default_gmu_freq;
+	}
+	return 0;
+
+default_gmu_freq:
+	/* The GMU frequency table is missing or invalid. Go with a default */
+	gmu->freqs[0] = GMU_FREQ_MIN;
+	gmu->vlvls[0] = RPMH_REGULATOR_LEVEL_LOW_SVS;
+	gmu->freqs[1] = GMU_FREQ_MAX;
+	gmu->vlvls[1] = RPMH_REGULATOR_LEVEL_SVS;
+
+	return 0;
+}
+
+static void gen7_gmu_rdpm_probe(struct gen7_gmu_device *gmu,
+		struct kgsl_device *device)
+{
+	struct resource *res;
+
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "rdpm_cx");
+	if (res)
+		gmu->rdpm_cx_virt = devm_ioremap(&device->pdev->dev,
+				res->start, resource_size(res));
+
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "rdpm_mx");
+	if (res)
+		gmu->rdpm_mx_virt = devm_ioremap(&device->pdev->dev,
+				res->start, resource_size(res));
+}
+
+void gen7_gmu_remove(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	if (!IS_ERR_OR_NULL(gmu->mailbox.channel))
+		mbox_free_channel(gmu->mailbox.channel);
+
+	adreno_dev->acd_enabled = false;
+
+	if (gmu->fw_image)
+		release_firmware(gmu->fw_image);
+
+	gen7_free_gmu_globals(gmu);
+
+	vfree(gmu->itcm_shadow);
+	if (gmu->log_kobj.state_initialized)
+		kobject_put(&gmu->log_kobj);
+	if (gmu->stats_kobj.state_initialized)
+		kobject_put(&gmu->stats_kobj);
+}
+
+static int gen7_gmu_iommu_fault_handler(struct iommu_domain *domain,
+		struct device *dev, unsigned long addr, int flags, void *token)
+{
+	char *fault_type = "unknown";
+
+	if (flags & IOMMU_FAULT_TRANSLATION)
+		fault_type = "translation";
+	else if (flags & IOMMU_FAULT_PERMISSION)
+		fault_type = "permission";
+	else if (flags & IOMMU_FAULT_EXTERNAL)
+		fault_type = "external";
+	else if (flags & IOMMU_FAULT_TRANSACTION_STALLED)
+		fault_type = "transaction stalled";
+
+	dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n",
+			addr,
+			(flags & IOMMU_FAULT_WRITE) ? "write" : "read",
+			fault_type);
+
+	return 0;
+}
+
+static int gen7_gmu_iommu_init(struct gen7_gmu_device *gmu)
+{
+	int ret;
+
+	gmu->domain = iommu_domain_alloc(&platform_bus_type);
+	if (gmu->domain == NULL) {
+		dev_err(&gmu->pdev->dev, "Unable to allocate GMU IOMMU domain\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * Disable stall on fault for the GMU context bank.
+	 * This sets SCTLR.CFCFG = 0.
+	 * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default.
+	 */
+	qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL);
+
+	ret = iommu_attach_device(gmu->domain, &gmu->pdev->dev);
+	if (!ret) {
+		iommu_set_fault_handler(gmu->domain,
+			gen7_gmu_iommu_fault_handler, gmu);
+		return 0;
+	}
+
+	dev_err(&gmu->pdev->dev,
+		"Unable to attach GMU IOMMU domain: %d\n", ret);
+	iommu_domain_free(gmu->domain);
+	gmu->domain = NULL;
+
+	return ret;
+}
+
+/* Default IFPC timer (300usec) value */
+#define GEN7_GMU_LONG_IFPC_HYST	FIELD_PREP(GENMASK(15, 0), 0x1680)
+
+/* Minimum IFPC timer (200usec) allowed to override default value */
+#define GEN7_GMU_LONG_IFPC_HYST_FLOOR	FIELD_PREP(GENMASK(15, 0), 0x0F00)
+
+int gen7_gmu_probe(struct kgsl_device *device,
+		struct platform_device *pdev)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	int ret, i;
+
+	gmu->pdev = pdev;
+
+	dma_set_coherent_mask(&gmu->pdev->dev, DMA_BIT_MASK(64));
+	gmu->pdev->dev.dma_mask = &gmu->pdev->dev.coherent_dma_mask;
+	set_dma_ops(&gmu->pdev->dev, NULL);
+
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+						"rscc");
+	if (res) {
+		gmu->rscc_virt = devm_ioremap(&device->pdev->dev, res->start,
+						resource_size(res));
+		if (!gmu->rscc_virt) {
+			dev_err(&gmu->pdev->dev, "rscc ioremap failed\n");
+			return -ENOMEM;
+		}
+	}
+
+	/* Setup any rdpm register ranges */
+	gen7_gmu_rdpm_probe(gmu, device);
+
+	/* Set up GMU regulators */
+	ret = kgsl_pwrctrl_probe_regulators(device, pdev);
+	if (ret)
+		return ret;
+
+	ret = gen7_gmu_clk_probe(adreno_dev);
+	if (ret)
+		return ret;
+
+	/* Set up GMU IOMMU and shared memory with GMU */
+	ret = gen7_gmu_iommu_init(gmu);
+	if (ret)
+		goto error;
+
+	gmu->vma = gen7_gmu_vma;
+	for (i = 0; i < ARRAY_SIZE(gen7_gmu_vma); i++) {
+		struct gmu_vma_entry *vma = &gen7_gmu_vma[i];
+
+		vma->vma_root = RB_ROOT;
+		spin_lock_init(&vma->lock);
+	}
+
+	/* Map and reserve GMU CSRs registers */
+	ret = gen7_gmu_reg_probe(adreno_dev);
+	if (ret)
+		goto error;
+
+	/* Populates RPMh configurations */
+	ret = gen7_build_rpmh_tables(adreno_dev);
+	if (ret)
+		goto error;
+
+	/* Set up GMU idle state */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) {
+		gmu->idle_level = GPU_HW_IFPC;
+		adreno_dev->ifpc_hyst = GEN7_GMU_LONG_IFPC_HYST;
+		adreno_dev->ifpc_hyst_floor = GEN7_GMU_LONG_IFPC_HYST_FLOOR;
+	} else {
+		gmu->idle_level = GPU_HW_ACTIVE;
+	}
+
+	gen7_gmu_acd_probe(device, gmu, pdev->dev.of_node);
+
+	set_bit(GMU_ENABLED, &device->gmu_core.flags);
+
+	device->gmu_core.dev_ops = &gen7_gmudev;
+
+	/* Set default GMU attributes */
+	gmu->log_stream_enable = false;
+	gmu->log_group_mask = 0x3;
+
+	/* Initialize to zero to detect trace packet loss */
+	gmu->trace.seq_num = 0;
+
+	/* Disabled by default */
+	gmu->stats_enable = false;
+	/* Set default to CM3 busy cycles countable */
+	gmu->stats_mask = BIT(GEN7_GMU_CM3_BUSY_CYCLES);
+	/* Interval is in 50 us units. Set default sampling frequency to 4x50 us */
+	gmu->stats_interval = HFI_FEATURE_GMU_STATS_INTERVAL;
+
+	/* GMU sysfs nodes setup */
+	(void) kobject_init_and_add(&gmu->log_kobj, &log_kobj_type, &dev->kobj, "log");
+	(void) kobject_init_and_add(&gmu->stats_kobj, &stats_kobj_type, &dev->kobj, "stats");
+
+	of_property_read_u32(gmu->pdev->dev.of_node, "qcom,gmu-perf-ddr-bw",
+		&gmu->perf_ddr_bw);
+
+	spin_lock_init(&gmu->hfi.cmdq_lock);
+
+	gmu->irq = kgsl_request_irq(gmu->pdev, "gmu",
+		gen7_gmu_irq_handler, device);
+
+	if (gmu->irq >= 0)
+		return 0;
+
+	ret = gmu->irq;
+
+error:
+	gen7_gmu_remove(device);
+	return ret;
+}
+
+static void gen7_gmu_active_count_put(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return;
+
+	if (WARN(atomic_read(&device->active_cnt) == 0,
+		"Unbalanced get/put calls to KGSL active count\n"))
+		return;
+
+	if (atomic_dec_and_test(&device->active_cnt)) {
+		kgsl_pwrscale_update_stats(device);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+	}
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	wake_up(&device->active_cnt_wq);
+}
+
+int gen7_halt_gbif(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	/* Halt new client requests */
+	kgsl_regwrite(device, GEN7_GBIF_HALT, GEN7_GBIF_CLIENT_HALT_MASK);
+	ret = adreno_wait_for_halt_ack(device,
+		GEN7_GBIF_HALT_ACK, GEN7_GBIF_CLIENT_HALT_MASK);
+
+	/* Halt all AXI requests */
+	kgsl_regwrite(device, GEN7_GBIF_HALT, GEN7_GBIF_ARB_HALT_MASK);
+	ret = adreno_wait_for_halt_ack(device,
+		GEN7_GBIF_HALT_ACK, GEN7_GBIF_ARB_HALT_MASK);
+
+	/* De-assert the halts */
+	kgsl_regwrite(device, GEN7_GBIF_HALT, 0x0);
+
+	return ret;
+}
+
+static int gen7_gmu_power_off(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+
+	if (device->gmu_fault)
+		goto error;
+
+	/* Wait for the lowest idle level we requested */
+	ret = gen7_gmu_wait_for_lowest_idle(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = gen7_complete_rpmh_votes(gmu, 2);
+	if (ret)
+		goto error;
+
+	ret = gen7_gmu_notify_slumber(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = gen7_gmu_wait_for_idle(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = gen7_rscc_sleep_sequence(adreno_dev);
+	if (ret)
+		goto error;
+
+	gen7_rdpm_mx_freq_update(gmu, 0);
+
+	/* Now that we are done with GMU and GPU, Clear the GBIF */
+	ret = gen7_halt_gbif(adreno_dev);
+	if (ret)
+		goto error;
+
+	gen7_gmu_irq_disable(adreno_dev);
+
+	gen7_hfi_stop(adreno_dev);
+
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen7_rdpm_cx_freq_update(gmu, 0);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE);
+
+	return 0;
+
+error:
+	gen7_gmu_irq_disable(adreno_dev);
+	gen7_hfi_stop(adreno_dev);
+	gen7_gmu_suspend(adreno_dev);
+
+	return ret;
+}
+
+void gen7_enable_gpu_irq(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_pwrctrl_irq(device, true);
+
+	adreno_irqctrl(adreno_dev, 1);
+}
+
+void gen7_disable_gpu_irq(struct adreno_device *adreno_dev)
+{
+	kgsl_pwrctrl_irq(KGSL_DEVICE(adreno_dev), false);
+
+	if (gen7_gmu_gx_is_on(adreno_dev))
+		adreno_irqctrl(adreno_dev, 0);
+}
+
+static int gen7_gpu_boot(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	ret = kgsl_mmu_start(device);
+	if (ret)
+		goto err;
+
+	ret = gen7_gmu_oob_set(device, oob_gpu);
+	if (ret)
+		goto oob_clear;
+
+	ret = gen7_gmu_hfi_start_msg(adreno_dev);
+	if (ret)
+		goto oob_clear;
+
+	/* Clear the busy_data stats - we're starting over from scratch */
+	memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data));
+
+	gen7_start(adreno_dev);
+
+	if (gen7_core->qos_value && adreno_is_preemption_enabled(adreno_dev))
+		kgsl_regwrite(device, GEN7_RBBM_GBIF_CLIENT_QOS_CNTL,
+			gen7_core->qos_value[adreno_dev->cur_rb->id]);
+
+	/* Re-initialize the coresight registers if applicable */
+	adreno_coresight_start(adreno_dev);
+
+	adreno_perfcounter_start(adreno_dev);
+
+	/* Clear FSR here in case it is set from a previous pagefault */
+	kgsl_mmu_clear_fsr(&device->mmu);
+
+	gen7_enable_gpu_irq(adreno_dev);
+
+	ret = gen7_rb_start(adreno_dev);
+	if (ret) {
+		gen7_disable_gpu_irq(adreno_dev);
+		goto oob_clear;
+	}
+
+	/*
+	 * At this point it is safe to assume that we recovered. Setting
+	 * this field allows us to take a new snapshot for the next failure
+	 * if we are prioritizing the first unrecoverable snapshot.
+	 */
+	if (device->snapshot)
+		device->snapshot->recovered = true;
+
+	/* Start the dispatcher */
+	adreno_dispatcher_start(device);
+
+	device->reset_counter++;
+
+	gen7_gmu_oob_clear(device, oob_gpu);
+
+	return 0;
+
+oob_clear:
+	gen7_gmu_oob_clear(device, oob_gpu);
+
+err:
+	gen7_gmu_power_off(adreno_dev);
+
+	return ret;
+}
+
+static void gmu_idle_timer(struct timer_list *t)
+{
+	struct kgsl_device *device = container_of(t, struct kgsl_device,
+					idle_timer);
+
+	kgsl_schedule_work(&device->idle_check_ws);
+}
+
+static int gen7_boot(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (WARN_ON(test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = gen7_gmu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_start_idle_timer(device);
+	kgsl_pwrscale_wake(device);
+
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	return ret;
+}
+
+static int gen7_first_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) {
+		if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+			return gen7_boot(adreno_dev);
+
+		return 0;
+	}
+
+	ret = gen7_ringbuffer_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_gmu_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = gen7_gmu_first_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	adreno_get_bus_counters(adreno_dev);
+
+	adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev,
+						 ADRENO_COOP_RESET);
+
+	adreno_create_profile_buffer(adreno_dev);
+
+	set_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags);
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	/*
+	 * BCL needs respective Central Broadcast register to
+	 * be programed from TZ. For kernel version prior to 6.1, this
+	 * programing happens only when zap shader firmware load is successful.
+	 * Zap firmware load can fail in boot up path hence enable BCL only
+	 * after we successfully complete first boot to ensure that Central
+	 * Broadcast register was programed before enabling BCL.
+	 */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL))
+		adreno_dev->bcl_enabled = true;
+
+	/*
+	 * There is a possible deadlock scenario during kgsl firmware reading
+	 * (request_firmware) and devfreq update calls. During first boot, kgsl
+	 * device mutex is held and then request_firmware is called for reading
+	 * firmware. request_firmware internally takes dev_pm_qos_mtx lock.
+	 * Whereas in case of devfreq update calls triggered by thermal/bcl or
+	 * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then
+	 * tries to take kgsl device mutex as part of get_dev_status/target
+	 * calls. This results in deadlock when both thread are unable to acquire
+	 * the mutex held by other thread. Enable devfreq updates now as we are
+	 * done reading all firmware files.
+	 */
+	device->pwrscale.devfreq_enabled = true;
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	return 0;
+}
+
+static bool gen7_irq_pending(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status;
+
+	kgsl_regread(device, GEN7_RBBM_INT_0_STATUS, &status);
+
+	/* Return busy if a interrupt is pending */
+	return ((status & adreno_dev->irq_mask) ||
+		atomic_read(&adreno_dev->pending_irq_refcnt));
+}
+
+static int gen7_power_off(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret;
+
+	WARN_ON(!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags));
+
+	adreno_suspend_context(device);
+
+	/*
+	 * adreno_suspend_context() unlocks the device mutex, which
+	 * could allow a concurrent thread to attempt SLUMBER sequence.
+	 * Hence, check the flags again before proceeding with SLUMBER.
+	 */
+	if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER);
+
+	ret = gen7_gmu_oob_set(device, oob_gpu);
+	if (ret)
+		goto no_gx_power;
+
+	if (gen7_irq_pending(adreno_dev)) {
+		gen7_gmu_oob_clear(device, oob_gpu);
+		return -EBUSY;
+	}
+
+	kgsl_pwrscale_update_stats(device);
+
+	/* Save active coresight registers if applicable */
+	adreno_coresight_stop(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 0);
+
+no_gx_power:
+	gen7_gmu_oob_clear(device, oob_gpu);
+
+	kgsl_pwrctrl_irq(device, false);
+
+	gen7_gmu_power_off(adreno_dev);
+
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	adreno_dispatcher_stop(adreno_dev);
+
+	adreno_ringbuffer_stop(adreno_dev);
+
+	adreno_llcc_slice_deactivate(adreno_dev);
+
+	clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	del_timer_sync(&device->idle_timer);
+
+	kgsl_pwrscale_sleep(device);
+
+	kgsl_pwrctrl_clear_l3_vote(device);
+
+	/*
+	 * Reset the context records so that CP can start
+	 * at the correct read pointer for BV thread after
+	 * coming out of slumber.
+	 */
+	gen7_reset_preempt_records(adreno_dev);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
+
+	return ret;
+}
+
+static void gmu_idle_check(struct work_struct *work)
+{
+	struct kgsl_device *device = container_of(work,
+					struct kgsl_device, idle_check_ws);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret;
+
+	mutex_lock(&device->mutex);
+
+	if (test_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags))
+		goto done;
+
+	if (atomic_read(&device->active_cnt) || time_is_after_jiffies(device->idle_jiffies)) {
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		goto done;
+
+	spin_lock(&device->submit_lock);
+
+	if (device->submit_now) {
+		spin_unlock(&device->submit_lock);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	device->skip_inline_submit = true;
+	spin_unlock(&device->submit_lock);
+
+	ret = gen7_power_off(adreno_dev);
+	if (ret == -EBUSY) {
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+	}
+
+done:
+	mutex_unlock(&device->mutex);
+}
+
+static int gen7_gmu_first_open(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	/*
+	 * Do the one time settings that need to happen when we
+	 * attempt to boot the gpu the very first time
+	 */
+	ret = gen7_first_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	/*
+	 * A client that does a first_open but never closes the device
+	 * may prevent us from going back to SLUMBER. So trigger the idle
+	 * check by incrementing the active count and immediately releasing it.
+	 */
+	atomic_inc(&device->active_cnt);
+	gen7_gmu_active_count_put(adreno_dev);
+
+	return 0;
+}
+
+static int gen7_gmu_last_close(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return gen7_power_off(adreno_dev);
+
+	return 0;
+}
+
+static int gen7_gmu_active_count_get(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret = 0;
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EINVAL;
+
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags))
+		return -EINVAL;
+
+	if ((atomic_read(&device->active_cnt) == 0) &&
+		!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		ret = gen7_boot(adreno_dev);
+
+	if (ret == 0)
+		atomic_inc(&device->active_cnt);
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	return ret;
+}
+
+static int gen7_gmu_pm_suspend(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SUSPEND);
+
+	/* Halt any new submissions */
+	reinit_completion(&device->halt_gate);
+
+	/* wait for active count so device can be put in slumber */
+	ret = kgsl_active_count_wait(device, 0, HZ);
+	if (ret) {
+		dev_err(device->dev,
+			"Timed out waiting for the active count\n");
+		goto err;
+	}
+
+	ret = adreno_idle(device);
+	if (ret)
+		goto err;
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		gen7_power_off(adreno_dev);
+
+	set_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
+
+	adreno_get_gpu_halt(adreno_dev);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND);
+
+	return 0;
+err:
+	adreno_dispatcher_start(device);
+	return ret;
+}
+
+static void gen7_gmu_pm_resume(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags),
+		"resume invoked without a suspend\n"))
+		return;
+
+	adreno_put_gpu_halt(adreno_dev);
+
+	adreno_dispatcher_start(device);
+
+	clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
+}
+
+static void gen7_gmu_touch_wakeup(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret;
+
+	/*
+	 * Do not wake up a suspended device or until the first boot sequence
+	 * has been completed.
+	 */
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags) ||
+		!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))
+		return;
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		goto done;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = gen7_gmu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	ret = gen7_gpu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	kgsl_pwrscale_wake(device);
+
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+done:
+	/*
+	 * When waking up from a touch event we want to stay active long enough
+	 * for the user to send a draw command. The default idle timer timeout
+	 * is shorter than we want so go ahead and push the idle timer out
+	 * further for this special case
+	 */
+	mod_timer(&device->idle_timer, jiffies +
+			msecs_to_jiffies(adreno_wake_timeout));
+}
+
+const struct adreno_power_ops gen7_gmu_power_ops = {
+	.first_open = gen7_gmu_first_open,
+	.last_close = gen7_gmu_last_close,
+	.active_count_get = gen7_gmu_active_count_get,
+	.active_count_put = gen7_gmu_active_count_put,
+	.pm_suspend = gen7_gmu_pm_suspend,
+	.pm_resume = gen7_gmu_pm_resume,
+	.touch_wakeup = gen7_gmu_touch_wakeup,
+	.gpu_clock_set = gen7_gmu_clock_set,
+	.gpu_bus_set = gen7_gmu_bus_set,
+};
+
+int gen7_gmu_device_probe(struct platform_device *pdev,
+	u32 chipid, const struct adreno_gpu_core *gpucore)
+{
+	struct adreno_device *adreno_dev;
+	struct kgsl_device *device;
+	struct gen7_device *gen7_dev;
+	int ret;
+
+	gen7_dev = devm_kzalloc(&pdev->dev, sizeof(*gen7_dev),
+			GFP_KERNEL);
+	if (!gen7_dev)
+		return -ENOMEM;
+
+	adreno_dev = &gen7_dev->adreno_dev;
+
+	adreno_dev->irq_mask = GEN7_INT_MASK;
+
+	ret = gen7_probe_common(pdev, adreno_dev, chipid, gpucore);
+	if (ret)
+		return ret;
+
+	ret = adreno_dispatcher_init(adreno_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "adreno dispatcher init failed ret %d\n", ret);
+		return ret;
+	}
+
+	device = KGSL_DEVICE(adreno_dev);
+
+	INIT_WORK(&device->idle_check_ws, gmu_idle_check);
+
+	timer_setup(&device->idle_timer, gmu_idle_timer, 0);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_DMS)) {
+		set_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv);
+		adreno_dev->dms_enabled = true;
+	}
+
+	return 0;
+}
+
+int gen7_gmu_reset(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	gen7_disable_gpu_irq(adreno_dev);
+
+	gen7_gmu_irq_disable(adreno_dev);
+
+	gen7_hfi_stop(adreno_dev);
+
+	/* Hard reset the gmu and gpu */
+	gen7_gmu_suspend(adreno_dev);
+
+	gen7_reset_preempt_records(adreno_dev);
+
+	adreno_llcc_slice_deactivate(adreno_dev);
+
+	clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	/* Attempt to reboot the gmu and gpu */
+	return gen7_boot(adreno_dev);
+}
+
+int gen7_gmu_hfi_probe(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hfi *hfi = &gmu->hfi;
+
+	hfi->irq = kgsl_request_irq(gmu->pdev, "hfi",
+		gen7_hfi_irq_handler, KGSL_DEVICE(adreno_dev));
+
+	return hfi->irq < 0 ? hfi->irq : 0;
+}
+
+int gen7_gmu_add_to_minidump(struct adreno_device *adreno_dev)
+{
+	struct gen7_device *gen7_dev = container_of(adreno_dev,
+					struct gen7_device, adreno_dev);
+	int ret;
+
+	ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GEN7_DEVICE,
+			(void *)(gen7_dev), sizeof(struct gen7_device));
+	if (ret)
+		return ret;
+
+	ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GMU_LOG_ENTRY,
+			gen7_dev->gmu.gmu_log->hostptr, gen7_dev->gmu.gmu_log->size);
+	if (ret)
+		return ret;
+
+	ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HFIMEM_ENTRY,
+			gen7_dev->gmu.hfi.hfi_mem->hostptr, gen7_dev->gmu.hfi.hfi_mem->size);
+
+	return ret;
+}
+
+static int gen7_gmu_bind(struct device *dev, struct device *master, void *data)
+{
+	struct kgsl_device *device = dev_get_drvdata(master);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	const struct gen7_gpudev *gen7_gpudev = to_gen7_gpudev(gpudev);
+	int ret;
+
+	ret = gen7_gmu_probe(device, to_platform_device(dev));
+	if (ret)
+		return ret;
+
+	if (gen7_gpudev->hfi_probe) {
+		ret = gen7_gpudev->hfi_probe(adreno_dev);
+
+		if (ret) {
+			gen7_gmu_remove(device);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void gen7_gmu_unbind(struct device *dev, struct device *master,
+		void *data)
+{
+	struct kgsl_device *device = dev_get_drvdata(master);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	const struct gen7_gpudev *gen7_gpudev = to_gen7_gpudev(gpudev);
+
+	if (gen7_gpudev->hfi_remove)
+		gen7_gpudev->hfi_remove(adreno_dev);
+
+	gen7_gmu_remove(device);
+}
+
+static const struct component_ops gen7_gmu_component_ops = {
+	.bind = gen7_gmu_bind,
+	.unbind = gen7_gmu_unbind,
+};
+
+static int gen7_gmu_probe_dev(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &gen7_gmu_component_ops);
+}
+
+static int gen7_gmu_remove_dev(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &gen7_gmu_component_ops);
+	return 0;
+}
+
+static const struct of_device_id gen7_gmu_match_table[] = {
+	{ .compatible = "qcom,gen7-gmu" },
+	{ },
+};
+
+struct platform_driver gen7_gmu_driver = {
+	.probe = gen7_gmu_probe_dev,
+	.remove = gen7_gmu_remove_dev,
+	.driver = {
+		.name = "adreno-gen7-gmu",
+		.of_match_table = gen7_gmu_match_table,
+	},
+};

+ 510 - 0
qcom/opensource/graphics-kernel/adreno_gen7_gmu.h

@@ -0,0 +1,510 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN7_GMU_H
+#define __ADRENO_GEN7_GMU_H
+
+#include <linux/mailbox_client.h>
+
+#include "adreno_gen7_hfi.h"
+#include "kgsl_gmu_core.h"
+
+struct gen7_dcvs_table {
+	u32 gpu_level_num;
+	u32 gmu_level_num;
+	struct opp_gx_desc gx_votes[MAX_GX_LEVELS];
+	struct opp_desc cx_votes[MAX_CX_LEVELS];
+};
+
+/**
+ * struct gen7_gmu_device - GMU device structure
+ * @ver: GMU Version information
+ * @irq: GMU interrupt number
+ * @fw_image: GMU FW image
+ * @hfi_mem: pointer to HFI shared memory
+ * @dump_mem: pointer to GMU debug dump memory
+ * @gmu_log: gmu event log memory
+ * @hfi: HFI controller
+ * @num_gpupwrlevels: number GPU frequencies in GPU freq table
+ * @num_bwlevel: number of GPU BW levels
+ * @num_cnocbwlevel: number CNOC BW levels
+ * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling
+ * @clks: GPU subsystem clocks required for GMU functionality
+ * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different
+ *  than default power level
+ * @idle_level: Minimal GPU idle power level
+ * @fault_count: GMU fault count
+ * @mailbox: Messages to AOP for ACD enable/disable go through this
+ * @log_wptr_retention: Store the log wptr offset on slumber
+ */
+struct gen7_gmu_device {
+	struct {
+		u32 core;
+		u32 core_dev;
+		u32 pwr;
+		u32 pwr_dev;
+		u32 hfi;
+	} ver;
+	struct platform_device *pdev;
+	int irq;
+	const struct firmware *fw_image;
+	struct kgsl_memdesc *dump_mem;
+	struct kgsl_memdesc *gmu_log;
+	/** @gmu_init_scratch: Memory to store the initial HFI messages */
+	struct kgsl_memdesc *gmu_init_scratch;
+	/** @gpu_boot_scratch: Memory to store the bootup HFI messages */
+	struct kgsl_memdesc *gpu_boot_scratch;
+	/** @vrb: GMU virtual register bank memory */
+	struct kgsl_memdesc *vrb;
+	/** @trace: gmu trace container */
+	struct kgsl_gmu_trace trace;
+	struct gen7_hfi hfi;
+	struct clk_bulk_data *clks;
+	/** @num_clks: Number of entries in the @clks array */
+	int num_clks;
+	unsigned int idle_level;
+	/** @freqs: Array of GMU frequencies */
+	u32 freqs[GMU_MAX_PWRLEVELS];
+	/** @vlvls: Array of GMU voltage levels */
+	u32 vlvls[GMU_MAX_PWRLEVELS];
+	struct kgsl_mailbox mailbox;
+	/** @gmu_globals: Array to store gmu global buffers */
+	struct kgsl_memdesc gmu_globals[GMU_KERNEL_ENTRIES];
+	/** @global_entries: To keep track of number of gmu buffers */
+	u32 global_entries;
+	struct gmu_vma_entry *vma;
+	unsigned int log_wptr_retention;
+	/** @cm3_fault: whether gmu received a cm3 fault interrupt */
+	atomic_t cm3_fault;
+	/**
+	 * @itcm_shadow: Copy of the itcm block in firmware binary used for
+	 * snapshot
+	 */
+	void *itcm_shadow;
+	/** @flags: Internal gmu flags */
+	unsigned long flags;
+	/** @rscc_virt: Pointer where RSCC block is mapped */
+	void __iomem *rscc_virt;
+	/** @domain: IOMMU domain for the kernel context */
+	struct iommu_domain *domain;
+	/** @log_stream_enable: GMU log streaming enable. Disabled by default */
+	bool log_stream_enable;
+	/** @log_group_mask: Allows overriding default GMU log group mask */
+	u32 log_group_mask;
+	struct kobject log_kobj;
+	/*
+	 * @perf_ddr_bw: The lowest ddr bandwidth that puts CX at a corner at
+	 * which GMU can run at higher frequency.
+	 */
+	u32 perf_ddr_bw;
+	/** @rdpm_cx_virt: Pointer where the RDPM CX block is mapped */
+	void __iomem *rdpm_cx_virt;
+	/** @rdpm_mx_virt: Pointer where the RDPM MX block is mapped */
+	void __iomem *rdpm_mx_virt;
+	/** @num_oob_perfcntr: Number of active oob_perfcntr requests */
+	u32 num_oob_perfcntr;
+	/** @acd_debug_val: DVM value to calibrate ACD for a level */
+	u32 acd_debug_val;
+	/** @stats_enable: GMU stats feature enable */
+	bool stats_enable;
+	/** @stats_mask: GMU performance countables to enable */
+	u32 stats_mask;
+	/** @stats_interval: GMU performance counters sampling interval */
+	u32 stats_interval;
+	/** @stats_kobj: kernel object for GMU stats directory in sysfs */
+	struct kobject stats_kobj;
+	/** @cp_init_hdr: raw command header for cp_init */
+	u32 cp_init_hdr;
+	/** @switch_to_unsec_hdr: raw command header for switch to unsecure packet */
+	u32 switch_to_unsec_hdr;
+	/** @dcvs_table: Table for gpu dcvs levels */
+	struct gen7_dcvs_table dcvs_table;
+};
+
+/* Helper function to get to gen7 gmu device from adreno device */
+struct gen7_gmu_device *to_gen7_gmu(struct adreno_device *adreno_dev);
+
+/* Helper function to get to adreno device from gen7 gmu device */
+struct adreno_device *gen7_gmu_to_adreno(struct gen7_gmu_device *gmu);
+
+/**
+ * gen7_reserve_gmu_kernel_block() - Allocate a global gmu buffer
+ * @gmu: Pointer to the gen7 gmu device
+ * @addr: Desired gmu virtual address
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @align: Alignment for the GMU VA and GMU mapping size
+ *
+ * This function allocates a global gmu buffer and maps it in
+ * the desired gmu vma
+ *
+ * Return: Pointer to the memory descriptor or error pointer on failure
+ */
+struct kgsl_memdesc *gen7_reserve_gmu_kernel_block(struct gen7_gmu_device *gmu,
+		u32 addr, u32 size, u32 vma_id, u32 align);
+
+/**
+ * gen7_reserve_gmu_kernel_block_fixed() - Maps phyical resource address to gmu
+ * @gmu: Pointer to the gen7 gmu device
+ * @addr: Desired gmu virtual address
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @resource: Name of the resource to get the size and address to allocate
+ * @attrs: Attributes for the mapping
+ * @align: Alignment for the GMU VA and GMU mapping size
+ *
+ * This function maps the physcial resource address to desired gmu vma
+ *
+ * Return: Pointer to the memory descriptor or error pointer on failure
+ */
+struct kgsl_memdesc *gen7_reserve_gmu_kernel_block_fixed(struct gen7_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, const char *resource, int attrs, u32 align);
+
+/**
+ * gen7_alloc_gmu_kernel_block() - Allocate a gmu buffer
+ * @gmu: Pointer to the gen7 gmu device
+ * @md: Pointer to the memdesc
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @attrs: Attributes for the mapping
+ *
+ * This function allocates a buffer and maps it in the desired gmu vma
+ *
+ * Return: 0 on success or error code on failure
+ */
+int gen7_alloc_gmu_kernel_block(struct gen7_gmu_device *gmu,
+	struct kgsl_memdesc *md, u32 size, u32 vma_id, int attrs);
+
+/**
+ * gen7_gmu_import_buffer() - Import a gmu buffer
+ * @gmu: Pointer to the gen7 gmu device
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @md: Pointer to the memdesc to be mapped
+ * @attrs: Attributes for the mapping
+ * @align: Alignment for the GMU VA and GMU mapping size
+ *
+ * This function imports and maps a buffer to a gmu vma
+ *
+ * Return: 0 on success or error code on failure
+ */
+int gen7_gmu_import_buffer(struct gen7_gmu_device *gmu, u32 vma_id,
+			struct kgsl_memdesc *md, u32 attrs, u32 align);
+
+/**
+ * gen7_free_gmu_block() - Free a gmu buffer
+ * @gmu: Pointer to the gen7 gmu device
+ * @md: Pointer to the memdesc that is to be freed
+ *
+ * This function frees a gmu block allocated by gen7_reserve_gmu_kernel_block()
+ */
+void gen7_free_gmu_block(struct gen7_gmu_device *gmu, struct kgsl_memdesc *md);
+
+/**
+ * gen7_build_rpmh_tables - Build the rpmh tables
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function creates the gpu dcvs and bw tables
+ *
+ * Return: 0 on success and negative error on failure
+ */
+int gen7_build_rpmh_tables(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_gx_is_on - Check if GX is on
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function reads pwr status registers to check if GX
+ * is on or off
+ */
+bool gen7_gmu_gx_is_on(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_device_probe - GEN7 GMU snapshot function
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for gmu based gen7 targets.
+ */
+int gen7_gmu_device_probe(struct platform_device *pdev,
+		u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * gen7_gmu_reset - Reset and restart the gmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_reset(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_enable_gpu_irq - Enable gpu interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_enable_gpu_irq(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_disable_gpu_irq - Disable gpu interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_disable_gpu_irq(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_snapshot- Take snapshot for gmu targets
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot structure
+ *
+ * Send an NMI to gmu if we hit a gmu fault. Then take gmu
+ * snapshot and carry on with rest of the gen7 snapshot
+ */
+void gen7_gmu_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+
+/**
+ * gen7_gmu_probe - Probe gen7 gmu resources
+ * @device: Pointer to the kgsl device
+ * @pdev: Pointer to the gmu platform device
+ *
+ * Probe the gmu and hfi resources
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_probe(struct kgsl_device *device,
+		struct platform_device *pdev);
+
+/**
+ * gen7_gmu_parse_fw - Parse the gmu fw binary
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_parse_fw(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_memory_init - Allocate gmu memory
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Allocates the gmu log buffer and others if ndeeded.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_memory_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_aop_send_acd_state - Enable or disable acd feature in aop
+ * @gmu: Pointer to the gen7 gmu device
+ * @flag: Boolean to enable or disable acd in aop
+ *
+ * This function enables or disables gpu acd feature using mailbox
+ */
+void gen7_gmu_aop_send_acd_state(struct gen7_gmu_device *gmu, bool flag);
+
+/**
+ * gen7_gmu_load_fw - Load gmu firmware
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Loads the gmu firmware binary into TCMs and memory
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_load_fw(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_device_start - Bring gmu out of reset
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_device_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_hfi_start - Indicate hfi start to gmu
+ * @device: Pointer to the kgsl device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_itcm_shadow - Create itcm shadow copy for snapshot
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_itcm_shadow(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_register_config - gmu register configuration
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Program gmu regsiters based on features
+ */
+void gen7_gmu_register_config(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_version_info - Get gmu firmware version
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_version_info(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_irq_enable - Enable gmu interrupts
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_gmu_irq_enable(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_irq_disable - Disaable gmu interrupts
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_gmu_irq_disable(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_suspend - Hard reset the gpu and gmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * In case we hit a gmu fault, hard reset the gpu and gmu
+ * to recover from the fault
+ */
+void gen7_gmu_suspend(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_oob_set - send gmu oob request
+ * @device: Pointer to the kgsl device
+ * @req: Type of oob request as defined in enum oob_request
+ *
+ * Request gmu to keep gpu powered up till the oob is cleared
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_oob_set(struct kgsl_device *device, enum oob_request oob);
+
+/**
+ * gen7_gmu_oob_clear - clear an asserted oob request
+ * @device: Pointer to the kgsl device
+ * @req: Type of oob request as defined in enum oob_request
+ *
+ * Clear a previously requested oob so that gmu can power
+ * collapse the gpu
+ */
+void gen7_gmu_oob_clear(struct kgsl_device *device, enum oob_request oob);
+
+/**
+ * gen7_gmu_wait_for_lowest_idle - wait for gmu to complete ifpc
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * If ifpc is enabled, wait for gmu to put gpu into ifpc.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_wait_for_idle - Wait for gmu to become idle
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_wait_for_idle(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_rscc_sleep_sequence - Trigger rscc sleep sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_rscc_sleep_sequence(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_rscc_wakeup_sequence - Trigger rscc wakeup sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_rscc_wakeup_sequence(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_halt_gbif - Halt CX and GX requests in GBIF
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Clear any pending GX or CX transactions in GBIF and
+ * deassert GBIF halt
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_halt_gbif(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_load_pdc_ucode - Load and enable pdc sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_load_pdc_ucode(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_load_rsc_ucode - Load rscc sequence
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_load_rsc_ucode(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_remove - Clean up gmu probed resources
+ * @device: Pointer to the kgsl device
+ */
+void gen7_gmu_remove(struct kgsl_device *device);
+
+/**
+ * gen7_gmu_enable_clks - Enable gmu clocks
+ * @adreno_dev: Pointer to the adreno device
+ * @level: GMU frequency level
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level);
+
+/**
+ * gen7_gmu_handle_watchdog - Handle watchdog interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_gmu_handle_watchdog(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_send_nmi - Send NMI to GMU
+ * @device: Pointer to the kgsl device
+ * @force: Boolean to forcefully send NMI irrespective of GMU state
+ */
+void gen7_gmu_send_nmi(struct kgsl_device *device, bool force);
+
+/**
+ * gen7_gmu_add_to_minidump - Register gen7_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int gen7_gmu_add_to_minidump(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_snapshot_gmu_mem - Snapshot a GMU memory descriptor
+ * @device: Pointer to the kgsl device
+ * @buf: Destination snapshot buffer
+ * @remain: Remaining size of the snapshot buffer
+ * @priv: Opaque handle
+ *
+ * Return: Number of bytes written to snapshot buffer
+ */
+size_t gen7_snapshot_gmu_mem(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv);
+
+/**
+ * gen7_bus_ab_quantize - Calculate the AB vote that needs to be sent to GMU
+ * @adreno_dev: Handle to the adreno device
+ * @ab: ab request that needs to be scaled in MBps
+ *
+ * Returns the AB value that needs to be prefixed to bandwidth vote in kbps
+ */
+u32 gen7_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab);
+
+#endif

+ 317 - 0
qcom/opensource/graphics-kernel/adreno_gen7_gmu_snapshot.c

@@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "gen7_reg.h"
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_gen7_gmu.h"
+#include "adreno_snapshot.h"
+#include "adreno_gen7_0_0_snapshot.h"
+#include "adreno_gen7_2_0_snapshot.h"
+#include "kgsl_device.h"
+
+size_t gen7_snapshot_gmu_mem(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+		(struct kgsl_snapshot_gmu_mem *)buf;
+	unsigned int *data = (unsigned int *)
+		(buf + sizeof(*mem_hdr));
+	struct gmu_mem_type_desc *desc = priv;
+
+	if (priv == NULL || desc->memdesc->hostptr == NULL)
+		return 0;
+
+	if (remain < desc->memdesc->size + sizeof(*mem_hdr)) {
+		dev_err(device->dev,
+			"snapshot: Not enough memory for the gmu section %d\n",
+			desc->type);
+		return 0;
+	}
+
+	mem_hdr->type = desc->type;
+	mem_hdr->hostaddr = (u64)(uintptr_t)desc->memdesc->hostptr;
+	mem_hdr->gmuaddr = desc->memdesc->gmuaddr;
+	mem_hdr->gpuaddr = 0;
+
+	/* The hw fence queues are mapped as iomem in the kernel */
+	if (desc->type == SNAPSHOT_GMU_MEM_HW_FENCE)
+		memcpy_fromio(data, desc->memdesc->hostptr, desc->memdesc->size);
+	else
+		memcpy(data, desc->memdesc->hostptr, desc->memdesc->size);
+
+	return desc->memdesc->size + sizeof(*mem_hdr);
+}
+
+static size_t gen7_gmu_snapshot_dtcm(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+		(struct kgsl_snapshot_gmu_mem *)buf;
+	struct gen7_gmu_device *gmu = (struct gen7_gmu_device *)priv;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u32 *data = (u32 *)(buf + sizeof(*mem_hdr));
+	u32 i;
+
+	if (remain < gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU DTCM Memory");
+		return 0;
+	}
+
+	mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+	mem_hdr->hostaddr = 0;
+	mem_hdr->gmuaddr = gmu->vma[GMU_DTCM].start;
+	mem_hdr->gpuaddr = 0;
+
+	/*
+	 * Read of GMU TCMs over side-band debug controller interface is
+	 * supported on gen7_2_x family
+	 */
+	if (adreno_is_gen7_2_x_family(adreno_dev)) {
+		/*
+		 * region [20]: Dump ITCM/DTCM. Select 1 for DTCM.
+		 * autoInc [31]: Autoincrement the address field after each
+		 * access to TCM_DBG_DATA
+		 */
+		kgsl_regwrite(device, GEN7_CX_DBGC_TCM_DBG_ADDR, BIT(20) | BIT(31));
+
+		for (i = 0; i < (gmu->vma[GMU_DTCM].size >> 2); i++)
+			kgsl_regread(device, GEN7_CX_DBGC_TCM_DBG_DATA, data++);
+	} else {
+		for (i = 0; i < (gmu->vma[GMU_DTCM].size >> 2); i++)
+			gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + i, data++);
+	}
+
+	return gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr);
+}
+
+static size_t gen7_gmu_snapshot_itcm(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+			(struct kgsl_snapshot_gmu_mem *)buf;
+	void *dest = buf + sizeof(*mem_hdr);
+	struct gen7_gmu_device *gmu = (struct gen7_gmu_device *)priv;
+
+	if (!gmu->itcm_shadow) {
+		dev_err(&gmu->pdev->dev, "No memory allocated for ITCM shadow capture\n");
+		return 0;
+	}
+
+	if (remain < gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU ITCM Memory");
+		return 0;
+	}
+
+	mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+	mem_hdr->hostaddr = 0;
+	mem_hdr->gmuaddr = gmu->vma[GMU_ITCM].start;
+	mem_hdr->gpuaddr = 0;
+
+	memcpy(dest, gmu->itcm_shadow, gmu->vma[GMU_ITCM].size);
+
+	return gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr);
+}
+
+static void gen7_gmu_snapshot_memories(struct kgsl_device *device,
+	struct gen7_gmu_device *gmu, struct kgsl_snapshot *snapshot)
+{
+	struct gmu_mem_type_desc desc;
+	struct kgsl_memdesc *md;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(gmu->gmu_globals); i++) {
+
+		md = &gmu->gmu_globals[i];
+		if (!md->size)
+			continue;
+
+		desc.memdesc = md;
+		if (md == gmu->hfi.hfi_mem)
+			desc.type = SNAPSHOT_GMU_MEM_HFI;
+		else if (md == gmu->gmu_log)
+			desc.type = SNAPSHOT_GMU_MEM_LOG;
+		else if (md == gmu->dump_mem)
+			desc.type = SNAPSHOT_GMU_MEM_DEBUG;
+		else if ((md == gmu->gmu_init_scratch) || (md == gmu->gpu_boot_scratch))
+			desc.type = SNAPSHOT_GMU_MEM_WARMBOOT;
+		else if (md == gmu->vrb)
+			desc.type = SNAPSHOT_GMU_MEM_VRB;
+		else if (md == gmu->trace.md)
+			desc.type = SNAPSHOT_GMU_MEM_TRACE;
+		else
+			desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+			snapshot, gen7_snapshot_gmu_mem, &desc);
+	}
+}
+
+struct kgsl_snapshot_gmu_version {
+	u32 type;
+	u32 value;
+};
+
+static size_t gen7_snapshot_gmu_version(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	u32 *data = (u32 *) (buf + sizeof(*header));
+	struct kgsl_snapshot_gmu_version *ver = priv;
+
+	if (remain < DEBUG_SECTION_SZ(1)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU Version");
+		return 0;
+	}
+
+	header->type = ver->type;
+	header->size = 1;
+
+	*data = ver->value;
+
+	return DEBUG_SECTION_SZ(1);
+}
+
+static void gen7_gmu_snapshot_versions(struct kgsl_device *device,
+		struct gen7_gmu_device *gmu,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+
+	struct kgsl_snapshot_gmu_version gmu_vers[] = {
+		{ .type = SNAPSHOT_DEBUG_GMU_CORE_VERSION,
+			.value = gmu->ver.core, },
+		{ .type = SNAPSHOT_DEBUG_GMU_CORE_DEV_VERSION,
+			.value = gmu->ver.core_dev, },
+		{ .type = SNAPSHOT_DEBUG_GMU_PWR_VERSION,
+			.value = gmu->ver.pwr, },
+		{ .type = SNAPSHOT_DEBUG_GMU_PWR_DEV_VERSION,
+			.value = gmu->ver.pwr_dev, },
+		{ .type = SNAPSHOT_DEBUG_GMU_HFI_VERSION,
+			.value = gmu->ver.hfi, },
+	};
+
+	for (i = 0; i < ARRAY_SIZE(gmu_vers); i++)
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+				snapshot, gen7_snapshot_gmu_version,
+				&gmu_vers[i]);
+}
+
+#define RSCC_OFFSET_DWORDS 0x14000
+
+static size_t gen7_snapshot_rscc_registers(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	const u32 *regs = priv;
+	unsigned int *data = (unsigned int *)buf;
+	int count = 0, k;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	/* Figure out how many registers we are going to dump */
+	count = adreno_snapshot_regs_count(regs);
+
+	if (remain < (count * 4)) {
+		SNAPSHOT_ERR_NOMEM(device, "RSCC REGISTERS");
+		return 0;
+	}
+
+	for (regs = priv; regs[0] != UINT_MAX; regs += 2) {
+		unsigned int cnt = REG_COUNT(regs);
+
+		if (cnt == 1) {
+			*data++ = BIT(31) |  regs[0];
+			*data++ =  __raw_readl(gmu->rscc_virt +
+				((regs[0] - RSCC_OFFSET_DWORDS) << 2));
+			continue;
+		}
+		*data++ = regs[0];
+		*data++ = cnt;
+		for (k = regs[0]; k <= regs[1]; k++)
+			*data++ =  __raw_readl(gmu->rscc_virt +
+				((k - RSCC_OFFSET_DWORDS) << 2));
+	}
+
+	/* Return the size of the section */
+	return (count * 4);
+}
+
+/*
+ * gen7_gmu_device_snapshot() - GEN7 GMU snapshot function
+ * @device: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the GEN7 GMU specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+static void gen7_gmu_device_snapshot(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	const struct adreno_gen7_core *gpucore = to_gen7_core(ADRENO_DEVICE(device));
+	const struct gen7_snapshot_block_list *gen7_snapshot_block_list =
+						gpucore->gen7_snapshot_block_list;
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, gen7_gmu_snapshot_itcm, gmu);
+
+	gen7_gmu_snapshot_versions(device, gmu, snapshot);
+
+	gen7_gmu_snapshot_memories(device, gmu, snapshot);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot,
+		adreno_snapshot_registers_v2, (void *) gen7_snapshot_block_list->gmu_regs);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot,
+		gen7_snapshot_rscc_registers, (void *) gen7_snapshot_block_list->rscc_regs);
+
+	if (!gen7_gmu_gx_is_on(adreno_dev))
+		goto dtcm;
+
+	/* Set fence to ALLOW mode so registers can be read */
+	kgsl_regwrite(device, GEN7_GMU_AO_AHB_FENCE_CTRL, 0);
+	/* Make sure the previous write posted before reading */
+	wmb();
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot,
+		adreno_snapshot_registers_v2, (void *) gen7_snapshot_block_list->gmu_gx_regs);
+
+	/*
+	 * A stalled SMMU can lead to NoC timeouts when host accesses DTCM.
+	 * DTCM can be read through side-band DBGC interface on gen7_2_x family.
+	 */
+	if (adreno_smmu_is_stalled(adreno_dev) && !adreno_is_gen7_2_x_family(adreno_dev)) {
+		dev_err(&gmu->pdev->dev,
+			"Not dumping dtcm because SMMU is stalled\n");
+		return;
+	}
+
+dtcm:
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, gen7_gmu_snapshot_dtcm, gmu);
+}
+
+void gen7_gmu_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/*
+	 * Dump external register first to have GPUCC and other external
+	 * register in snapshot to analyze the system state even in partial
+	 * snapshot dump
+	 */
+	gen7_snapshot_external_core_regs(device, snapshot);
+
+	gen7_gmu_device_snapshot(device, snapshot);
+
+	gen7_snapshot(adreno_dev, snapshot);
+
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, UINT_MAX);
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_MASK, HFI_IRQ_MASK);
+}

+ 870 - 0
qcom/opensource/graphics-kernel/adreno_gen7_hfi.c

@@ -0,0 +1,870 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/delay.h>
+#include <linux/nvmem-consumer.h>
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_gen7_gmu.h"
+#include "adreno_gen7_hfi.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+
+/* Below section is for all structures related to HFI queues */
+#define HFI_QUEUE_MAX HFI_QUEUE_DEFAULT_CNT
+
+/* Total header sizes + queue sizes + 16 for alignment */
+#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \
+		(HFI_QUEUE_SIZE * HFI_QUEUE_MAX))
+
+#define HOST_QUEUE_START_ADDR(hfi_mem, i) \
+	((hfi_mem)->hostptr + HFI_QUEUE_OFFSET(i))
+
+struct gen7_hfi *to_gen7_hfi(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	return &gmu->hfi;
+}
+
+/* Size in below functions are in unit of dwords */
+int gen7_hfi_queue_read(struct gen7_gmu_device *gmu, u32 queue_idx,
+		unsigned int *output, unsigned int max_size)
+{
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	u32 *queue;
+	u32 msg_hdr;
+	u32 i, read;
+	u32 size;
+	int result = 0;
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+		return -EINVAL;
+
+	if (hdr->read_index == hdr->write_index)
+		return -ENODATA;
+
+	/* Clear the output data before populating */
+	memset(output, 0, max_size);
+
+	queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx);
+	msg_hdr = queue[hdr->read_index];
+	size = MSG_HDR_GET_SIZE(msg_hdr);
+
+	if (size > (max_size >> 2)) {
+		dev_err(&gmu->pdev->dev,
+		"HFI message too big: hdr:0x%x rd idx=%d\n",
+			msg_hdr, hdr->read_index);
+		result = -EMSGSIZE;
+		goto done;
+	}
+
+	read = hdr->read_index;
+
+	if (read < hdr->queue_size) {
+		for (i = 0; i < size && i < (max_size >> 2); i++) {
+			output[i] = queue[read];
+			read = (read + 1)%hdr->queue_size;
+		}
+		result = size;
+	} else {
+		/* In case FW messed up */
+		dev_err(&gmu->pdev->dev,
+			"Read index %d greater than queue size %d\n",
+			hdr->read_index, hdr->queue_size);
+		result = -ENODATA;
+	}
+
+	read = ALIGN(read, SZ_4) % hdr->queue_size;
+
+	hfi_update_read_idx(hdr, read);
+
+	/* For acks, trace the packet for which this ack was sent */
+	if (MSG_HDR_GET_TYPE(msg_hdr) == HFI_MSG_ACK)
+		trace_kgsl_hfi_receive(MSG_HDR_GET_ID(output[1]),
+			MSG_HDR_GET_SIZE(output[1]),
+			MSG_HDR_GET_SEQNUM(output[1]));
+	else
+		trace_kgsl_hfi_receive(MSG_HDR_GET_ID(msg_hdr),
+			MSG_HDR_GET_SIZE(msg_hdr), MSG_HDR_GET_SEQNUM(msg_hdr));
+
+done:
+	return result;
+}
+
+int gen7_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx,
+		u32 *msg, u32 size_bytes)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	u32 *queue;
+	u32 i, write_idx, read_idx, empty_space;
+	u32 size_dwords = size_bytes >> 2;
+	u32 align_size = ALIGN(size_dwords, SZ_4);
+	u32 id = MSG_HDR_GET_ID(*msg);
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED || !IS_ALIGNED(size_bytes, sizeof(u32)))
+		return -EINVAL;
+
+	queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx);
+
+	write_idx = hdr->write_index;
+	read_idx = hdr->read_index;
+
+	empty_space = (write_idx >= read_idx) ?
+			(hdr->queue_size - (write_idx - read_idx))
+			: (read_idx - write_idx);
+
+	if (empty_space <= align_size)
+		return -ENOSPC;
+
+	for (i = 0; i < size_dwords; i++) {
+		queue[write_idx] = msg[i];
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	/* Cookify any non used data at the end of the write buffer */
+	for (; i < align_size; i++) {
+		queue[write_idx] = 0xfafafafa;
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg));
+
+	hfi_update_write_idx(&hdr->write_index, write_idx);
+
+	return 0;
+}
+
+int gen7_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hfi *hfi = &gmu->hfi;
+	int ret;
+
+	spin_lock(&hfi->cmdq_lock);
+
+	if (test_bit(MSG_HDR_GET_ID(msg[0]), hfi->wb_set_record_bitmask))
+		*msg = RECORD_MSG_HDR(*msg);
+
+	ret = gen7_hfi_queue_write(adreno_dev, HFI_CMD_ID, msg, size_bytes);
+
+	/*
+	 * Some messages like ACD table and perf table are saved in memory, so we need
+	 * to reset the header to make sure we do not send a record enabled bit incase
+	 * we change the warmboot setting from debugfs
+	 */
+	*msg = CLEAR_RECORD_MSG_HDR(*msg);
+	/*
+	 * Memory barrier to make sure packet and write index are written before
+	 * an interrupt is raised
+	 */
+	wmb();
+
+	/* Send interrupt to GMU to receive the message */
+	if (!ret)
+		gmu_core_regwrite(KGSL_DEVICE(adreno_dev),
+			GEN7_GMU_HOST2GMU_INTR_SET, 0x1);
+
+	spin_unlock(&hfi->cmdq_lock);
+
+	return ret;
+}
+
+/* Sizes of the queue and message are in unit of dwords */
+static void init_queues(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	int i;
+	struct hfi_queue_table *tbl;
+	struct hfi_queue_header *hdr;
+	struct {
+		unsigned int idx;
+		unsigned int pri;
+		unsigned int status;
+	} queue[HFI_QUEUE_MAX] = {
+		{ HFI_CMD_ID, HFI_CMD_PRI, HFI_QUEUE_STATUS_ENABLED },
+		{ HFI_MSG_ID, HFI_MSG_PRI, HFI_QUEUE_STATUS_ENABLED },
+		{ HFI_DBG_ID, HFI_DBG_PRI, HFI_QUEUE_STATUS_ENABLED },
+	};
+
+	/* Fill Table Header */
+	tbl = mem_addr->hostptr;
+	tbl->qtbl_hdr.version = 0;
+	tbl->qtbl_hdr.size = sizeof(struct hfi_queue_table) >> 2;
+	tbl->qtbl_hdr.qhdr0_offset = sizeof(struct hfi_queue_table_header) >> 2;
+	tbl->qtbl_hdr.qhdr_size = sizeof(struct hfi_queue_header) >> 2;
+	tbl->qtbl_hdr.num_q = HFI_QUEUE_MAX;
+	tbl->qtbl_hdr.num_active_q = HFI_QUEUE_MAX;
+
+	memset(&tbl->qhdr[0], 0, sizeof(tbl->qhdr));
+
+	/* Fill Individual Queue Headers */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		hdr = &tbl->qhdr[i];
+		hdr->start_addr = GMU_QUEUE_START_ADDR(mem_addr->gmuaddr, i);
+		hdr->type = QUEUE_HDR_TYPE(queue[i].idx, queue[i].pri, 0, 0);
+		hdr->status = queue[i].status;
+		hdr->queue_size = HFI_QUEUE_SIZE >> 2; /* convert to dwords */
+	}
+}
+
+int gen7_hfi_init(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hfi *hfi = &gmu->hfi;
+
+	/* Allocates & maps memory for HFI */
+	if (IS_ERR_OR_NULL(hfi->hfi_mem)) {
+		hfi->hfi_mem = gen7_reserve_gmu_kernel_block(gmu, 0,
+				HFIMEM_SIZE, GMU_NONCACHED_KERNEL, 0);
+		if (!IS_ERR(hfi->hfi_mem))
+			init_queues(adreno_dev);
+	}
+
+	return PTR_ERR_OR_ZERO(hfi->hfi_mem);
+}
+
+int gen7_receive_ack_cmd(struct gen7_gmu_device *gmu, void *rcvd,
+	struct pending_cmd *ret_cmd)
+{
+	struct adreno_device *adreno_dev = gen7_gmu_to_adreno(gmu);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 *ack = rcvd;
+	u32 hdr = ack[0];
+	u32 req_hdr = ack[1];
+
+	if (ret_cmd == NULL)
+		return -EINVAL;
+
+	if (CMP_HFI_ACK_HDR(ret_cmd->sent_hdr, req_hdr)) {
+		memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2);
+		return 0;
+	}
+
+	/* Didn't find the sender, list the waiter */
+	dev_err_ratelimited(&gmu->pdev->dev,
+		"HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n",
+		req_hdr, ret_cmd->sent_hdr);
+
+	gmu_core_fault_snapshot(device);
+
+	return -ENODEV;
+}
+
+static int poll_gmu_reg(struct adreno_device *adreno_dev,
+	u32 offsetdwords, unsigned int expected_val,
+	unsigned int mask, unsigned int timeout_ms)
+{
+	unsigned int val;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+	bool nmi = false;
+
+	while (time_is_after_jiffies(timeout)) {
+		gmu_core_regread(device, offsetdwords, &val);
+		if ((val & mask) == expected_val)
+			return 0;
+
+		/*
+		 * If GMU firmware fails any assertion, error message is sent
+		 * to KMD and NMI is triggered. So check if GMU is in NMI and
+		 * timeout early. Bits [11:9] of A6XX_GMU_CM3_FW_INIT_RESULT
+		 * contain GMU reset status. Non zero value here indicates that
+		 * GMU reset is active, NMI handler would eventually complete
+		 * and GMU would wait for recovery.
+		 */
+		gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &val);
+		if (val & 0xE00) {
+			nmi = true;
+			break;
+		}
+
+		usleep_range(10, 100);
+	}
+
+	/* Check one last time */
+	gmu_core_regread(device, offsetdwords, &val);
+	if ((val & mask) == expected_val)
+		return 0;
+
+	dev_err(&gmu->pdev->dev,
+		"Reg poll %s: offset 0x%x, want 0x%x, got 0x%x\n",
+		nmi ? "abort" : "timeout", offsetdwords, expected_val,
+		val & mask);
+
+	return -ETIMEDOUT;
+}
+
+static int gen7_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev,
+	void *data, u32 size_bytes, struct pending_cmd *ret_cmd)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int rc;
+	u32 *cmd = data;
+	struct gen7_hfi *hfi = &gmu->hfi;
+	unsigned int seqnum = atomic_inc_return(&hfi->seqnum);
+
+	*cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2);
+
+	if (ret_cmd == NULL)
+		return gen7_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+
+	ret_cmd->sent_hdr = cmd[0];
+
+	rc = gen7_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+	if (rc)
+		return rc;
+
+	rc = poll_gmu_reg(adreno_dev, GEN7_GMU_GMU2HOST_INTR_INFO,
+		HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT);
+
+	if (rc) {
+		gmu_core_fault_snapshot(device);
+		dev_err(&gmu->pdev->dev,
+		"Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n",
+		cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd));
+		return rc;
+	}
+
+	/* Clear the interrupt */
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR,
+		HFI_IRQ_MSGQ_MASK);
+
+	rc = gen7_hfi_process_queue(gmu, HFI_MSG_ID, ret_cmd);
+
+	return rc;
+}
+
+int gen7_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes)
+{
+	struct pending_cmd ret_cmd;
+	int rc;
+
+	memset(&ret_cmd, 0, sizeof(ret_cmd));
+
+	rc = gen7_hfi_send_cmd_wait_inline(adreno_dev, cmd, size_bytes, &ret_cmd);
+	if (rc)
+		return rc;
+
+	if (ret_cmd.results[2]) {
+		struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		gmu_core_fault_snapshot(device);
+		dev_err(&gmu->pdev->dev,
+				"HFI ACK failure: Req=0x%8.8X, Result=0x%8.8X\n",
+				ret_cmd.results[1],
+				ret_cmd.results[2]);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int gen7_hfi_send_core_fw_start(struct adreno_device *adreno_dev)
+{
+	struct hfi_core_fw_start_cmd cmd = {
+		.handle = 0x0,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CORE_FW_START);
+	if (ret)
+		return ret;
+
+	return gen7_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static const char *feature_to_string(u32 feature)
+{
+	if (feature == HFI_FEATURE_ACD)
+		return "ACD";
+
+	return "unknown";
+}
+
+/* For sending hfi message inline to handle GMU return type error */
+int gen7_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd,
+		struct pending_cmd *ret_cmd, u32 size_bytes)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int rc;
+
+	if (GMU_VER_MINOR(gmu->ver.hfi) <= 4)
+		return gen7_hfi_send_generic_req(adreno_dev, cmd, size_bytes);
+
+	rc = gen7_hfi_send_cmd_wait_inline(adreno_dev, cmd, size_bytes, ret_cmd);
+	if (rc)
+		return rc;
+
+	switch (ret_cmd->results[3]) {
+	case GMU_SUCCESS:
+		rc = ret_cmd->results[2];
+		break;
+	case GMU_ERROR_NO_ENTRY:
+		/* Unique error to handle undefined HFI msgs by caller */
+		rc = -ENOENT;
+		break;
+	case GMU_ERROR_TIMEOUT:
+		rc = -EINVAL;
+		break;
+	default:
+		gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev));
+		dev_err(&gmu->pdev->dev,
+			"HFI ACK: Req=0x%8.8X, Result=0x%8.8X Error:0x%8.8X\n",
+			ret_cmd->results[1], ret_cmd->results[2], ret_cmd->results[3]);
+		rc = -EINVAL;
+		break;
+	}
+
+	return rc;
+}
+
+int gen7_hfi_send_feature_ctrl(struct adreno_device *adreno_dev,
+	u32 feature, u32 enable, u32 data)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	struct hfi_feature_ctrl_cmd cmd = {
+		.feature = feature,
+		.enable = enable,
+		.data = data,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_FEATURE_CTRL);
+	if (ret)
+		return ret;
+
+	ret = gen7_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd));
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+				"Unable to %s feature %s (%d)\n",
+				enable ? "enable" : "disable",
+				feature_to_string(feature),
+				feature);
+	return ret;
+}
+
+int gen7_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subtype)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	struct hfi_get_value_cmd cmd = {
+		.type = type,
+		.subtype = subtype,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE);
+	if (ret)
+		return ret;
+
+	ret = gen7_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd));
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+			"Unable to get HFI Value type: %d, subtype: %d, error = %d\n",
+			type, subtype, ret);
+
+	return ret;
+}
+
+int gen7_hfi_send_set_value(struct adreno_device *adreno_dev,
+		u32 type, u32 subtype, u32 data)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	struct hfi_set_value_cmd cmd = {
+		.type = type,
+		.subtype = subtype,
+		.data = data,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_SET_VALUE);
+	if (ret)
+		return ret;
+
+	ret = gen7_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd));
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+			"Unable to set HFI Value %d, %d to %d, error = %d\n",
+			type, subtype, data, ret);
+	return ret;
+}
+
+void adreno_gen7_receive_err_req(struct gen7_gmu_device *gmu, void *rcvd)
+{
+	struct hfi_err_cmd *cmd = rcvd;
+
+	dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n",
+			((cmd->error_code >> 16) & 0xffff),
+			(cmd->error_code & 0xffff),
+			(char *) cmd->data);
+}
+
+void adreno_gen7_receive_debug_req(struct gen7_gmu_device *gmu, void *rcvd)
+{
+	struct hfi_debug_cmd *cmd = rcvd;
+
+	dev_dbg(&gmu->pdev->dev, "HFI Debug Received: %d %d %d\n",
+			cmd->type, cmd->timestamp, cmd->data);
+}
+
+int gen7_hfi_process_queue(struct gen7_gmu_device *gmu,
+		u32 queue_idx, struct pending_cmd *ret_cmd)
+{
+	u32 rcvd[MAX_RCVD_SIZE];
+
+	while (gen7_hfi_queue_read(gmu, queue_idx, rcvd, sizeof(rcvd)) > 0) {
+		/* ACK Handler */
+		if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) {
+			int ret = gen7_receive_ack_cmd(gmu, rcvd, ret_cmd);
+
+			if (ret)
+				return ret;
+			continue;
+		}
+
+		/* Request Handler */
+		switch (MSG_HDR_GET_ID(rcvd[0])) {
+		case F2H_MSG_ERR: /* No Reply */
+			adreno_gen7_receive_err_req(gmu, rcvd);
+			break;
+		case F2H_MSG_DEBUG: /* No Reply */
+			adreno_gen7_receive_debug_req(gmu, rcvd);
+			break;
+		default: /* No Reply */
+			dev_err(&gmu->pdev->dev,
+				"HFI request %d not supported\n",
+				MSG_HDR_GET_ID(rcvd[0]));
+			break;
+		}
+	}
+
+	return 0;
+}
+
+int gen7_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	if (!adreno_dev->bcl_enabled)
+		return 0;
+
+	/*
+	 * BCL data is expected by gmu in below format
+	 * BIT[0] - response type
+	 * BIT[1:7] - Throttle level 1 (optional)
+	 * BIT[8:14] - Throttle level 2 (optional)
+	 * BIT[15:21] - Throttle level 3 (optional)
+	 */
+	return gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_BCL, 1, adreno_dev->bcl_data);
+}
+
+static int gen7_hfi_send_clx_v1_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	int ret;
+	struct hfi_clx_table_v1_cmd cmd = {0};
+
+	/* Make sure the table is valid before enabling feature */
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CLX_TBL);
+	if (ret)
+		return ret;
+
+	ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_CLX, 1, 0);
+	if (ret)
+		return ret;
+
+	/* GMU supports HW CLX V2 only with both HFI V1 and V2 data formats */
+	cmd.data0 = FIELD_PREP(GENMASK(31, 16), 0x2) | FIELD_PREP(GENMASK(15, 0), 0x1);
+	cmd.data1 = FIELD_PREP(GENMASK(31, 29), 1) |
+				FIELD_PREP(GENMASK(28, 28), 1) |
+				FIELD_PREP(GENMASK(27, 22), 1) |
+				FIELD_PREP(GENMASK(21, 16), 40) |
+				FIELD_PREP(GENMASK(15, 0), 0);
+	cmd.clxt = 0;
+	cmd.clxh = 0;
+	cmd.urgmode = 1;
+	cmd.lkgen = 0;
+
+	return gen7_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int gen7_hfi_send_clx_v2_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	int ret = 0;
+	struct hfi_clx_table_v2_cmd cmd = {0};
+
+	/* Make sure the table is valid before enabling feature */
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CLX_TBL);
+	if (ret)
+		return ret;
+
+	ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_CLX, 1, 0);
+	if (ret)
+		return ret;
+
+	cmd.version = FIELD_PREP(GENMASK(31, 16), 0x2) | FIELD_PREP(GENMASK(15, 0), 0x1);
+	/* cmd.domain[0] is never used but needed per hfi spec */
+	cmd.domain[1].data0 = FIELD_PREP(GENMASK(31, 29), 1) |
+				FIELD_PREP(GENMASK(28, 28), 1) |
+				FIELD_PREP(GENMASK(27, 22), 1) |
+				FIELD_PREP(GENMASK(21, 16), 40) |
+				FIELD_PREP(GENMASK(15, 0), 0);
+	cmd.domain[1].clxt = 0;
+	cmd.domain[1].clxh = 0;
+	cmd.domain[1].urgmode = 1;
+	cmd.domain[1].lkgen = 0;
+	cmd.domain[1].currbudget = 50;
+
+	return gen7_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+int gen7_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	if (!adreno_dev->clx_enabled)
+		return 0;
+
+	/* gen7_11_0 GPU uses HFI CLX data version 1 */
+	if (adreno_is_gen7_11_0(adreno_dev))
+		return gen7_hfi_send_clx_v1_feature_ctrl(adreno_dev);
+
+	return gen7_hfi_send_clx_v2_feature_ctrl(adreno_dev);
+}
+
+#define EVENT_PWR_ACD_THROTTLE_PROF 44
+
+int gen7_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret = 0;
+
+	if (adreno_dev->acd_enabled) {
+		ret = gen7_hfi_send_feature_ctrl(adreno_dev,
+			HFI_FEATURE_ACD, 1, 0);
+		if (ret)
+			return ret;
+
+		ret = gen7_hfi_send_generic_req(adreno_dev,
+				&gmu->hfi.acd_table, sizeof(gmu->hfi.acd_table));
+		if (ret)
+			return ret;
+
+		gen7_hfi_send_set_value(adreno_dev, HFI_VALUE_LOG_EVENT_ON,
+				EVENT_PWR_ACD_THROTTLE_PROF, 0);
+	}
+
+	return 0;
+}
+
+int gen7_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	if (gmu->idle_level == GPU_HW_IFPC)
+		return gen7_hfi_send_feature_ctrl(adreno_dev,
+				HFI_FEATURE_IFPC, 1, adreno_dev->ifpc_hyst);
+	return 0;
+}
+
+static void reset_hfi_queues(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr;
+	unsigned int i;
+
+	/* Flush HFI queues */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		hdr = &tbl->qhdr[i];
+		if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+			continue;
+
+		hdr->read_index = hdr->write_index;
+	}
+}
+
+/* Fill the entry and return the dword count written */
+static u32 _fill_table_entry(struct hfi_table_entry *entry, u32 count,
+		u32 stride_bytes, u32 *data)
+{
+	entry->count = count;
+	entry->stride = stride_bytes >> 2; /* entry->stride is in dwords */
+	memcpy(entry->data, data, stride_bytes * count);
+
+	/* Return total dword count of entry + data */
+	return (sizeof(*entry) >> 2) + (entry->count * entry->stride);
+}
+
+int gen7_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev)
+{
+	/*
+	 * Buffer to store either hfi_table_cmd or hfi_dcvstable_cmd.
+	 * Current max size for either is 165 dwords.
+	 */
+	static u32 cmd_buf[200];
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_dcvs_table *tbl = &gmu->dcvs_table;
+	int ret = 0;
+
+	/* Starting with GMU HFI Version 2.6.1, use H2F_MSG_TABLE */
+	if (gmu->ver.hfi >= HFI_VERSION(2, 6, 1)) {
+		struct hfi_table_cmd *cmd = (struct hfi_table_cmd *)&cmd_buf[0];
+		u32 dword_off;
+
+		/* Already setup, so just send cmd */
+		if (cmd->hdr)
+			return gen7_hfi_send_generic_req(adreno_dev, cmd,
+					MSG_HDR_GET_SIZE(cmd->hdr) << 2);
+
+		if (tbl->gpu_level_num > MAX_GX_LEVELS || tbl->gmu_level_num > MAX_CX_LEVELS)
+			return -EINVAL;
+
+		/* CMD starts with struct hfi_table_cmd data */
+		cmd->type = HFI_TABLE_GPU_PERF;
+		dword_off = sizeof(*cmd) >> 2;
+
+		/* Fill in the table entry and data starting at dword_off */
+		dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off],
+				tbl->gpu_level_num, sizeof(struct opp_gx_desc),
+				(u32 *)tbl->gx_votes);
+
+		/* Fill in the table entry and data starting at dword_off */
+		dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off],
+				tbl->gmu_level_num, sizeof(struct opp_desc),
+				(u32 *)tbl->cx_votes);
+
+		cmd->hdr = CREATE_MSG_HDR(H2F_MSG_TABLE, HFI_MSG_CMD);
+		cmd->hdr = MSG_HDR_SET_SIZE(cmd->hdr, dword_off);
+
+		ret = gen7_hfi_send_generic_req(adreno_dev, cmd, dword_off << 2);
+	} else {
+		struct hfi_dcvstable_cmd *cmd = (struct hfi_dcvstable_cmd *)&cmd_buf[0];
+
+		/* Already setup, so just send cmd */
+		if (cmd->hdr)
+			return gen7_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd));
+
+		if (tbl->gpu_level_num > MAX_GX_LEVELS_LEGACY || tbl->gmu_level_num > MAX_CX_LEVELS)
+			return -EINVAL;
+
+		ret = CMD_MSG_HDR(*cmd, H2F_MSG_PERF_TBL);
+		if (ret)
+			return ret;
+
+		cmd->gpu_level_num = tbl->gpu_level_num;
+		cmd->gmu_level_num = tbl->gmu_level_num;
+		memcpy(&cmd->gx_votes, tbl->gx_votes,
+				sizeof(struct opp_gx_desc) * cmd->gpu_level_num);
+		memcpy(&cmd->cx_votes, tbl->cx_votes,
+				sizeof(struct opp_desc) * cmd->gmu_level_num);
+
+		ret = gen7_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd));
+	}
+
+	return ret;
+}
+
+int gen7_hfi_start(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int result;
+
+	reset_hfi_queues(adreno_dev);
+
+	result = gen7_hfi_send_gpu_perf_table(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table,
+			sizeof(gmu->hfi.bw_table));
+	if (result)
+		goto err;
+
+	result = gen7_hfi_send_acd_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen7_hfi_send_bcl_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen7_hfi_send_clx_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen7_hfi_send_ifpc_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen7_hfi_send_core_fw_start(adreno_dev);
+	if (result)
+		goto err;
+
+	set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	/* Request default DCVS level */
+	result = kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+	if (result)
+		goto err;
+
+	/* Request default BW vote */
+	result = kgsl_pwrctrl_axi(device, true);
+
+err:
+	if (result)
+		gen7_hfi_stop(adreno_dev);
+
+	return result;
+
+}
+
+void gen7_hfi_stop(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_pwrctrl_axi(device, false);
+
+	clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+}
+
+/* HFI interrupt handler */
+irqreturn_t gen7_hfi_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(ADRENO_DEVICE(device));
+	unsigned int status = 0;
+
+	gmu_core_regread(device, GEN7_GMU_GMU2HOST_INTR_INFO, &status);
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, HFI_IRQ_MASK);
+
+	if (status & HFI_IRQ_DBGQ_MASK)
+		gen7_hfi_process_queue(gmu, HFI_DBG_ID, NULL);
+	if (status & HFI_IRQ_CM3_FAULT_MASK) {
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"GMU CM3 fault interrupt received\n");
+		atomic_set(&gmu->cm3_fault, 1);
+
+		/* make sure other CPUs see the update */
+		smp_wmb();
+	}
+	if (status & ~HFI_IRQ_MASK)
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"Unhandled HFI interrupts 0x%lx\n",
+				status & ~HFI_IRQ_MASK);
+
+	return IRQ_HANDLED;
+}

+ 234 - 0
qcom/opensource/graphics-kernel/adreno_gen7_hfi.h

@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN7_HFI_H
+#define __ADRENO_GEN7_HFI_H
+
+#include "adreno_hfi.h"
+
+/**
+ * struct gen7_hfi - HFI control structure
+ */
+struct gen7_hfi {
+	/** @irq: HFI interrupt line */
+	int irq;
+	/** @seqnum: atomic counter that is incremented for each message sent.
+	 *   The value of the counter is used as sequence number for HFI message.
+	 */
+	atomic_t seqnum;
+	/** @hfi_mem: Memory descriptor for the hfi memory */
+	struct kgsl_memdesc *hfi_mem;
+	/** @bw_table: HFI BW table buffer */
+	struct hfi_bwtable_cmd bw_table;
+	/** @acd_table: HFI table for ACD data */
+	struct hfi_acd_table_cmd acd_table;
+	/** @cmdq_lock: Spinlock for accessing the cmdq */
+	spinlock_t cmdq_lock;
+	/**
+	 * @wb_set_record_bitmask: Bitmask to enable or disable the recording
+	 * of messages in the GMU scratch.
+	 */
+	unsigned long wb_set_record_bitmask[BITS_TO_LONGS(HFI_MAX_ID)];
+};
+
+struct gen7_gmu_device;
+
+/* gen7_hfi_irq_handler - IRQ handler for HFI interripts */
+irqreturn_t gen7_hfi_irq_handler(int irq, void *data);
+
+/**
+ * gen7_hfi_start - Send the various HFIs during device boot up
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_start - Send the various HFIs during device boot up
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+void gen7_hfi_stop(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_init - Initialize hfi resources
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function allocates and sets up hfi queues
+ * when a process creates the very first kgsl instance
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_init(struct adreno_device *adreno_dev);
+
+/* Helper function to get to gen7 hfi struct from adreno device */
+struct gen7_hfi *to_gen7_hfi(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_queue_write - Write a command to hfi queue
+ * @adreno_dev: Pointer to the adreno device
+ * @queue_idx: destination queue id
+ * @msg: Data to be written to the queue
+ * @size_bytes: Size of the command in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx,
+		u32 *msg, u32 size_bytes);
+
+/**
+ * gen7_hfi_queue_read - Read data from hfi queue
+ * @gmu: Pointer to the gen7 gmu device
+ * @queue_idx: queue id to read from
+ * @output: Pointer to read the data into
+ * @max_size: Number of bytes to read from the queue
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_queue_read(struct gen7_gmu_device *gmu, u32 queue_idx,
+		u32 *output, u32 max_size);
+
+/**
+ * gen7_receive_ack_cmd - Process ack type packets
+ * @gmu: Pointer to the gen7 gmu device
+ * @rcvd: Pointer to the data read from hfi queue
+ * @ret_cmd: Container for the hfi packet for which this ack is received
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_receive_ack_cmd(struct gen7_gmu_device *gmu, void *rcvd,
+		struct pending_cmd *ret_cmd);
+
+/**
+ * gen7_hfi_send_feature_ctrl - Enable gmu feature via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @feature: feature to be enabled or disabled
+ * enable: Set 1 to enable or 0 to disable a feature
+ * @data: payload for the send feature hfi packet
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_feature_ctrl(struct adreno_device *adreno_dev,
+		u32 feature, u32 enable, u32 data);
+
+/**
+ * gen7_hfi_send_get_value - Send gmu get_values via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @type: GMU get_value type
+ * @subtype: GMU get_value subtype
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subtype);
+
+/**
+ * gen7_hfi_send_set_value - Send gmu set_values via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @type: GMU set_value type
+ * @subtype: GMU set_value subtype
+ * @data: Value to set
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_set_value(struct adreno_device *adreno_dev,
+		u32 type, u32 subtype, u32 data);
+
+/**
+ * gen7_hfi_send_core_fw_start - Send the core fw start hfi
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_core_fw_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_send_acd_feature_ctrl - Send the acd table and acd feature
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_send_generic_req - Send a generic hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ * @cmd: Pointer to the hfi packet header and data
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes);
+
+/**
+ * gen7_hfi_send_generic_req_v5 - Send a generic hfi packet with additional error handling
+ * @adreno_dev: Pointer to the adreno device
+ * @cmd: Pointer to the hfi packet header and data
+ * @ret_cmd: Ack for the command we just sent
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd,
+		struct pending_cmd *ret_cmd, u32 size_bytes);
+
+/**
+ * gen7_hfi_send_bcl_feature_ctrl - Send the bcl feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_send_clx_feature_ctrl - Send the clx feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_send_ifpc_feature_ctrl - Send the ipfc feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_send_gpu_perf_table - Send the gpu perf table hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev);
+
+/*
+ * gen7_hfi_process_queue - Check hfi queue for messages from gmu
+ * @gmu: Pointer to the gen7 gmu device
+ * @queue_idx: queue id to be processed
+ * @ret_cmd: Container for data needed for waiting for the ack
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_process_queue(struct gen7_gmu_device *gmu,
+		u32 queue_idx, struct pending_cmd *ret_cmd);
+
+/**
+ * gen7_hfi_cmdq_write - Write a command to command queue
+ * @adreno_dev: Pointer to the adreno device
+ * @msg: Data to be written to the queue
+ * @size_bytes: Size of the command in bytes
+ *
+ * This function takes the cmdq lock before writing data to the queue
+
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes);
+void adreno_gen7_receive_err_req(struct gen7_gmu_device *gmu, void *rcvd);
+void adreno_gen7_receive_debug_req(struct gen7_gmu_device *gmu, void *rcvd);
+#endif

+ 2063 - 0
qcom/opensource/graphics-kernel/adreno_gen7_hwsched.c

@@ -0,0 +1,2063 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/cpufreq.h>
+#include <linux/interconnect.h>
+#include <linux/pm_qos.h>
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_gen7_hwsched.h"
+#include "adreno_snapshot.h"
+#include "kgsl_bus.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+
+static void _wakeup_hw_fence_waiters(struct adreno_device *adreno_dev, u32 fault)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	bool lock = !in_interrupt();
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags))
+		return;
+
+	/*
+	 * We could be in interrupt context here, which means we need to use spin_lock_irqsave
+	 * (which disables interrupts) everywhere we take this lock. Instead of that, simply
+	 * avoid taking this lock if we are recording a fault from an interrupt handler.
+	 */
+	if (lock)
+		spin_lock(&hfi->hw_fence.lock);
+
+	clear_bit(GEN7_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags);
+
+	/* Avoid creating new hardware fences until recovery is complete */
+	set_bit(GEN7_HWSCHED_HW_FENCE_ABORT_BIT, &hfi->hw_fence.flags);
+
+	if (!lock)
+		/*
+		 * This barrier ensures that the above bitops complete before we wake up the waiters
+		 */
+		smp_wmb();
+	else
+		spin_unlock(&hfi->hw_fence.lock);
+
+	wake_up_all(&hfi->hw_fence.unack_wq);
+
+	del_timer_sync(&hfi->hw_fence_timer);
+}
+
+void gen7_hwsched_fault(struct adreno_device *adreno_dev, u32 fault)
+{
+	/*
+	 * Wake up any threads that may be sleeping waiting for the hardware fence unack count to
+	 * drop to a desired threshold.
+	 */
+	_wakeup_hw_fence_waiters(adreno_dev, fault);
+
+	adreno_hwsched_fault(adreno_dev, fault);
+}
+
+static size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	struct kgsl_memdesc *rb = (struct kgsl_memdesc *)priv;
+
+	if (remain < rb->size + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "RB");
+		return 0;
+	}
+
+	header->start = 0;
+	header->end = rb->size >> 2;
+	header->rptr = 0;
+	header->rbsize = rb->size >> 2;
+	header->count = rb->size >> 2;
+	header->timestamp_queued = 0;
+	header->timestamp_retired = 0;
+	header->gpuaddr = rb->gpuaddr;
+	header->id = 0;
+
+	memcpy(data, rb->hostptr, rb->size);
+
+	return rb->size + sizeof(*header);
+}
+
+static void gen7_hwsched_snapshot_preemption_record(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset)
+{
+	struct kgsl_snapshot_section_header *section_header =
+		(struct kgsl_snapshot_section_header *)snapshot->ptr;
+	u8 *dest = snapshot->ptr + sizeof(*section_header);
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)dest;
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(ADRENO_DEVICE(device));
+	u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES;
+	size_t section_size;
+
+	if (gen7_core->ctxt_record_size)
+		ctxt_record_size = gen7_core->ctxt_record_size;
+
+	ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size);
+
+	section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size;
+	if (snapshot->remain < section_size) {
+		SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD");
+		return;
+	}
+
+	section_header->magic = SNAPSHOT_SECTION_MAGIC;
+	section_header->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2;
+	section_header->size = section_size;
+
+	header->size = ctxt_record_size >> 2;
+	header->gpuaddr = md->gpuaddr + offset;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable);
+	header->type = SNAPSHOT_GPU_OBJECT_GLOBAL;
+
+	dest += sizeof(*header);
+
+	memcpy(dest, md->hostptr + offset, ctxt_record_size);
+
+	snapshot->ptr += section_header->size;
+	snapshot->remain -= section_header->size;
+	snapshot->size += section_header->size;
+}
+
+static void snapshot_preemption_records(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md)
+{
+	const struct adreno_gen7_core *gen7_core =
+		to_gen7_core(ADRENO_DEVICE(device));
+	u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES;
+	u64 offset;
+
+	if (gen7_core->ctxt_record_size)
+		ctxt_record_size = gen7_core->ctxt_record_size;
+
+	/* All preemption records exist as a single mem alloc entry */
+	for (offset = 0; offset < md->size; offset += ctxt_record_size)
+		gen7_hwsched_snapshot_preemption_record(device, snapshot, md,
+			offset);
+}
+
+static void *get_rb_hostptr(struct adreno_device *adreno_dev,
+	u64 gpuaddr, u32 size)
+{
+	struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev);
+	u64 offset;
+	u32 i;
+
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct kgsl_memdesc *md = hw_hfi->mem_alloc_table[i].md;
+
+		if (md && (gpuaddr >= md->gpuaddr) &&
+			((gpuaddr + size) <= (md->gpuaddr + md->size))) {
+			offset = gpuaddr - md->gpuaddr;
+			return md->hostptr + offset;
+		}
+	}
+
+	return NULL;
+}
+
+static u32 gen7_copy_gpu_global(void *out, void *in, u32 size)
+{
+	if (out && in) {
+		memcpy(out, in, size);
+		return size;
+	}
+
+	return 0;
+}
+
+static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot, struct payload_section *payload)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_snapshot_section_header *section_header =
+		(struct kgsl_snapshot_section_header *)snapshot->ptr;
+	u8 *buf = snapshot->ptr + sizeof(*section_header);
+	struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	u32 size = adreno_hwsched_parse_payload(payload, KEY_RB_SIZEDWORDS) << 2;
+	u64 lo, hi, gpuaddr;
+	void *rb_hostptr;
+	char str[16];
+
+	lo = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_LO);
+	hi = adreno_hwsched_parse_payload(payload, KEY_RB_GPUADDR_HI);
+	gpuaddr = hi << 32 | lo;
+
+	/* Sanity check to make sure there is enough for the header */
+	if (snapshot->remain < sizeof(*section_header))
+		goto err;
+
+	rb_hostptr = get_rb_hostptr(adreno_dev, gpuaddr, size);
+
+	/* If the gpuaddress and size don't match any allocation, then abort */
+	if (((snapshot->remain - sizeof(*section_header)) <
+	    (size + sizeof(*header))) ||
+	    !gen7_copy_gpu_global(data, rb_hostptr, size))
+		goto err;
+
+	if (device->dump_all_ibs) {
+		u64 rbaddr, lpac_rbaddr;
+
+		kgsl_regread64(device, GEN7_CP_RB_BASE,
+			       GEN7_CP_RB_BASE_HI, &rbaddr);
+		kgsl_regread64(device, GEN7_CP_LPAC_RB_BASE,
+			       GEN7_CP_LPAC_RB_BASE_HI, &lpac_rbaddr);
+
+		/* Parse all IBs from current RB */
+		if ((rbaddr == gpuaddr) || (lpac_rbaddr == gpuaddr))
+			adreno_snapshot_dump_all_ibs(device, rb_hostptr, snapshot);
+	}
+
+	header->start = 0;
+	header->end = size >> 2;
+	header->rptr = adreno_hwsched_parse_payload(payload, KEY_RB_RPTR);
+	header->wptr = adreno_hwsched_parse_payload(payload, KEY_RB_WPTR);
+	header->rbsize = size >> 2;
+	header->count = size >> 2;
+	header->timestamp_queued = adreno_hwsched_parse_payload(payload,
+			KEY_RB_QUEUED_TS);
+	header->timestamp_retired = adreno_hwsched_parse_payload(payload,
+			KEY_RB_RETIRED_TS);
+	header->gpuaddr = gpuaddr;
+	header->id = adreno_hwsched_parse_payload(payload, KEY_RB_ID);
+
+	section_header->magic = SNAPSHOT_SECTION_MAGIC;
+	section_header->id = KGSL_SNAPSHOT_SECTION_RB_V2;
+	section_header->size = size + sizeof(*header) + sizeof(*section_header);
+
+	snapshot->ptr += section_header->size;
+	snapshot->remain -= section_header->size;
+	snapshot->size += section_header->size;
+
+	return;
+err:
+	snprintf(str, sizeof(str), "RB addr:0x%llx", gpuaddr);
+	SNAPSHOT_ERR_NOMEM(device, str);
+}
+
+static bool parse_payload_rb_legacy(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+	bool ret = false;
+
+	/* Skip if we didn't receive a context bad HFI */
+	if (!cmd->hdr)
+		return false;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd_legacy, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == PAYLOAD_RB) {
+			adreno_hwsched_snapshot_rb_payload(adreno_dev,
+							   snapshot, payload);
+			ret = true;
+		}
+
+		i += sizeof(*payload) + (payload->dwords << 2);
+	}
+
+	return ret;
+}
+
+static bool parse_payload_rb(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+	bool ret = false;
+
+	/* Skip if we didn't receive a context bad HFI */
+	if (!cmd->hdr)
+		return false;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == PAYLOAD_RB) {
+			adreno_hwsched_snapshot_rb_payload(adreno_dev,
+							   snapshot, payload);
+			ret = true;
+		}
+
+		i += sizeof(*payload) + (payload->dwords << 2);
+	}
+
+	return ret;
+}
+
+static int snapshot_context_queue(int id, void *ptr, void *data)
+{
+	struct kgsl_snapshot *snapshot = data;
+	struct kgsl_context *context = ptr;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct gmu_mem_type_desc desc;
+
+	if (!context->gmu_registered)
+		return 0;
+
+	desc.memdesc = &drawctxt->gmu_context_queue;
+	desc.type = SNAPSHOT_GMU_MEM_CONTEXT_QUEUE;
+
+	kgsl_snapshot_add_section(context->device,
+		KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, gen7_snapshot_gmu_mem, &desc);
+
+	return 0;
+}
+
+/* Snapshot AQE buffer */
+static size_t snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_memdesc *memdesc = priv;
+
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)buf;
+
+	u8 *ptr = buf + sizeof(*header);
+
+	if (IS_ERR_OR_NULL(memdesc) || memdesc->size == 0)
+		return 0;
+
+	if (remain < (memdesc->size + sizeof(*header))) {
+		SNAPSHOT_ERR_NOMEM(device, "AQE BUFFER");
+		return 0;
+	}
+
+	header->size = memdesc->size >> 2;
+	header->gpuaddr = memdesc->gpuaddr;
+	header->ptbase = MMU_DEFAULT_TTBR0(device);
+	header->type = SNAPSHOT_GPU_OBJECT_GLOBAL;
+
+	memcpy(ptr, memdesc->hostptr, memdesc->size);
+
+	return memdesc->size + sizeof(*header);
+}
+
+void gen7_hwsched_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev);
+	bool skip_memkind_rb = false;
+	u32 i;
+	bool parse_payload;
+
+	gen7_gmu_snapshot(adreno_dev, snapshot);
+
+	adreno_hwsched_parse_fault_cmdobj(adreno_dev, snapshot);
+
+	/*
+	 * First try to dump ringbuffers using context bad HFI payloads
+	 * because they have all the ringbuffer parameters. If ringbuffer
+	 * payloads are not present, fall back to dumping ringbuffers
+	 * based on MEMKIND_RB
+	 */
+	if (GMU_VER_MINOR(gmu->ver.hfi) < 2)
+		parse_payload = parse_payload_rb_legacy(adreno_dev, snapshot);
+	else
+		parse_payload = parse_payload_rb(adreno_dev, snapshot);
+
+	if (parse_payload)
+		skip_memkind_rb = true;
+
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i];
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_RB && !skip_memkind_rb)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_RB_V2,
+				snapshot, adreno_hwsched_snapshot_rb,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_SCRATCH)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, adreno_snapshot_global,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_PROFILE)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, adreno_snapshot_global,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_CSW_SMMU_INFO)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, adreno_snapshot_global,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_CSW_PRIV_NON_SECURE)
+			snapshot_preemption_records(device, snapshot,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_PREEMPT_SCRATCH)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, adreno_snapshot_global,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_AQE_BUFFER)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, snapshot_aqe_buffer,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_HW_FENCE) {
+			struct gmu_mem_type_desc desc;
+
+			desc.memdesc = entry->md;
+			desc.type = SNAPSHOT_GMU_MEM_HW_FENCE;
+
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+				snapshot, gen7_snapshot_gmu_mem, &desc);
+		}
+
+	}
+
+	if (!adreno_hwsched_context_queue_enabled(adreno_dev))
+		return;
+
+	read_lock(&device->context_lock);
+	idr_for_each(&device->context_idr, snapshot_context_queue, snapshot);
+	read_unlock(&device->context_lock);
+}
+
+static int gmu_clock_set_rate(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret = 0;
+
+	/* Switch to min GMU clock */
+	gen7_rdpm_cx_freq_update(gmu, gmu->freqs[0] / 1000);
+
+	ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk",
+			gmu->freqs[0]);
+	if (ret)
+		dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n",
+			gmu->freqs[0], ret);
+
+	trace_kgsl_gmu_pwrlevel(gmu->freqs[0], gmu->freqs[GMU_MAX_PWRLEVELS - 1]);
+
+	return ret;
+}
+
+static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int level, ret = 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE);
+
+	gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled);
+
+	ret = kgsl_pwrctrl_enable_cx_gdsc(device);
+	if (ret)
+		return ret;
+
+	ret = gen7_gmu_enable_clks(adreno_dev, GMU_MAX_PWRLEVELS - 1);
+	if (ret)
+		goto gdsc_off;
+
+	/*
+	 * Enable AHB timeout detection to catch any register access taking longer
+	 * time before NOC timeout gets detected. Enable this logic before any
+	 * register access which happens to be just after enabling clocks.
+	 */
+	gen7_enable_ahb_timeout_detection(adreno_dev);
+
+	/* Initialize the CX timer */
+	gen7_cx_timer_init(adreno_dev);
+
+	ret = gen7_gmu_load_fw(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = gen7_gmu_itcm_shadow(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) {
+		ret = gen7_load_pdc_ucode(adreno_dev);
+		if (ret)
+			goto clks_gdsc_off;
+
+		gen7_load_rsc_ucode(adreno_dev);
+		set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags);
+	}
+
+	ret = gen7_scm_gpu_init_cx_regs(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	gen7_gmu_register_config(adreno_dev);
+
+	ret = gen7_gmu_version_info(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	if (GMU_VER_MINOR(gmu->ver.hfi) < 2)
+		set_bit(ADRENO_HWSCHED_CTX_BAD_LEGACY, &adreno_dev->hwsched.flags);
+
+	gen7_gmu_irq_enable(adreno_dev);
+
+	/* Vote for minimal DDR BW for GMU to init */
+	level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min;
+
+	/* From this GMU FW all RBBM interrupts are handled at GMU */
+	if (gmu->ver.core >= GMU_VERSION(5, 01, 06))
+		adreno_irq_free(adreno_dev);
+
+	icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level]));
+
+	/* Clear any hwsched faults that might have been left over */
+	adreno_hwsched_clear_fault(adreno_dev);
+
+	ret = gen7_gmu_device_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	gen7_get_gpu_feature_info(adreno_dev);
+
+	ret = gen7_hwsched_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gmu_clock_set_rate(adreno_dev);
+	if (ret) {
+		gen7_hwsched_hfi_stop(adreno_dev);
+		goto err;
+	}
+
+	if (gen7_hwsched_hfi_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE) == 1 &&
+		!WARN_ONCE(!adreno_dev->gpucore->num_ddr_channels,
+			"Number of DDR channel is not specified in gpu core")) {
+		adreno_dev->gmu_ab = true;
+		set_bit(ADRENO_DEVICE_GMU_AB, &adreno_dev->priv);
+	}
+
+	icc_set_bw(pwr->icc_path, 0, 0);
+
+	device->gmu_fault = false;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+
+	return 0;
+
+err:
+	gen7_gmu_irq_disable(adreno_dev);
+
+	if (device->gmu_fault) {
+		gen7_gmu_suspend(adreno_dev);
+
+		return ret;
+	}
+
+clks_gdsc_off:
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+gdsc_off:
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen7_rdpm_cx_freq_update(gmu, 0);
+
+	return ret;
+}
+
+static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret = 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE);
+
+	ret = kgsl_pwrctrl_enable_cx_gdsc(device);
+	if (ret)
+		return ret;
+
+	ret = gen7_gmu_enable_clks(adreno_dev, GMU_MAX_PWRLEVELS - 1);
+	if (ret)
+		goto gdsc_off;
+
+	/*
+	 * Enable AHB timeout detection to catch any register access taking longer
+	 * time before NOC timeout gets detected. Enable this logic before any
+	 * register access which happens to be just after enabling clocks.
+	 */
+	gen7_enable_ahb_timeout_detection(adreno_dev);
+
+	/* Initialize the CX timer */
+	gen7_cx_timer_init(adreno_dev);
+
+	ret = gen7_rscc_wakeup_sequence(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = gen7_gmu_load_fw(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	gen7_gmu_register_config(adreno_dev);
+
+	gen7_gmu_irq_enable(adreno_dev);
+
+	/* Clear any hwsched faults that might have been left over */
+	adreno_hwsched_clear_fault(adreno_dev);
+
+	ret = gen7_gmu_device_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen7_hwsched_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gmu_clock_set_rate(adreno_dev);
+	if (ret) {
+		gen7_hwsched_hfi_stop(adreno_dev);
+		goto err;
+	}
+
+	device->gmu_fault = false;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+
+	return 0;
+err:
+	gen7_gmu_irq_disable(adreno_dev);
+
+	if (device->gmu_fault) {
+		gen7_gmu_suspend(adreno_dev);
+
+		return ret;
+	}
+
+clks_gdsc_off:
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+gdsc_off:
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen7_rdpm_cx_freq_update(gmu, 0);
+
+	return ret;
+}
+
+void gen7_hwsched_active_count_put(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return;
+
+	if (WARN(atomic_read(&device->active_cnt) == 0,
+		"Unbalanced get/put calls to KGSL active count\n"))
+		return;
+
+	if (atomic_dec_and_test(&device->active_cnt)) {
+		kgsl_pwrscale_update_stats(device);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+	}
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	wake_up(&device->active_cnt_wq);
+}
+
+static int gen7_hwsched_notify_slumber(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct hfi_prep_slumber_cmd req;
+	int ret;
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_PREPARE_SLUMBER);
+	if (ret)
+		return ret;
+
+	req.freq = gmu->dcvs_table.gpu_level_num - pwr->default_pwrlevel - 1;
+	req.bw = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq;
+
+	req.bw |= gen7_bus_ab_quantize(adreno_dev, 0);
+	/* Disable the power counter so that the GMU is not busy */
+	gmu_core_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0);
+
+	ret = gen7_hfi_send_cmd_async(adreno_dev, &req, sizeof(req));
+
+	/*
+	 * GEMNOC can enter power collapse state during GPU power down sequence.
+	 * This could abort CX GDSC collapse. Assert Qactive to avoid this.
+	 */
+	gmu_core_regwrite(device, GEN7_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1);
+
+	return ret;
+}
+static int gen7_hwsched_gmu_power_off(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret = 0;
+
+	if (device->gmu_fault)
+		goto error;
+
+	/* Wait for the lowest idle level we requested */
+	ret = gen7_gmu_wait_for_lowest_idle(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = gen7_hwsched_notify_slumber(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = gen7_gmu_wait_for_idle(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = gen7_rscc_sleep_sequence(adreno_dev);
+
+	gen7_rdpm_mx_freq_update(gmu, 0);
+
+	/* Now that we are done with GMU and GPU, Clear the GBIF */
+	ret = gen7_halt_gbif(adreno_dev);
+
+	gen7_gmu_irq_disable(adreno_dev);
+
+	gen7_hwsched_hfi_stop(adreno_dev);
+
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen7_rdpm_cx_freq_update(gmu, 0);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE);
+
+	return ret;
+
+error:
+	gen7_gmu_irq_disable(adreno_dev);
+	gen7_hwsched_hfi_stop(adreno_dev);
+	gen7_gmu_suspend(adreno_dev);
+
+	return ret;
+}
+
+static void gen7_hwsched_init_ucode_regs(struct adreno_device *adreno_dev)
+{
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Program the ucode base for CP */
+	kgsl_regwrite(device, GEN7_CP_SQE_INSTR_BASE_LO,
+		lower_32_bits(fw->memdesc->gpuaddr));
+	kgsl_regwrite(device, GEN7_CP_SQE_INSTR_BASE_HI,
+		upper_32_bits(fw->memdesc->gpuaddr));
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) {
+		fw = ADRENO_FW(adreno_dev, ADRENO_FW_AQE);
+
+		/* Program the ucode base for AQE0 (BV coprocessor) */
+		kgsl_regwrite(device, GEN7_CP_AQE_INSTR_BASE_LO_0,
+			lower_32_bits(fw->memdesc->gpuaddr));
+		kgsl_regwrite(device, GEN7_CP_AQE_INSTR_BASE_HI_0,
+			upper_32_bits(fw->memdesc->gpuaddr));
+
+		/* Program the ucode base for AQE1 (LPAC coprocessor) */
+		if (adreno_dev->lpac_enabled) {
+			kgsl_regwrite(device, GEN7_CP_AQE_INSTR_BASE_LO_1,
+				      lower_32_bits(fw->memdesc->gpuaddr));
+			kgsl_regwrite(device, GEN7_CP_AQE_INSTR_BASE_HI_1,
+				      upper_32_bits(fw->memdesc->gpuaddr));
+		}
+	}
+}
+
+static int gen7_hwsched_gpu_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	ret = kgsl_mmu_start(device);
+	if (ret)
+		goto err;
+
+	ret = gen7_gmu_oob_set(device, oob_gpu);
+	if (ret)
+		goto err;
+
+	/* Clear the busy_data stats - we're starting over from scratch */
+	memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data));
+
+	gen7_start(adreno_dev);
+
+	/* Re-initialize the coresight registers if applicable */
+	adreno_coresight_start(adreno_dev);
+
+	adreno_perfcounter_start(adreno_dev);
+
+	/* Clear FSR here in case it is set from a previous pagefault */
+	kgsl_mmu_clear_fsr(&device->mmu);
+
+	gen7_enable_gpu_irq(adreno_dev);
+
+	gen7_hwsched_init_ucode_regs(adreno_dev);
+
+	ret = gen7_hwsched_boot_gpu(adreno_dev);
+	if (ret)
+		goto err;
+
+	/*
+	 * At this point it is safe to assume that we recovered. Setting
+	 * this field allows us to take a new snapshot for the next failure
+	 * if we are prioritizing the first unrecoverable snapshot.
+	 */
+	if (device->snapshot)
+		device->snapshot->recovered = true;
+
+	device->reset_counter++;
+
+	/*
+	 * If warmboot is enabled and we switched a sysfs node, we will do a coldboot
+	 * in the subseqent slumber exit. Once that is done we need to mark this bool
+	 * as false so that in the next run we can do warmboot
+	 */
+	clear_bit(ADRENO_DEVICE_FORCE_COLDBOOT, &adreno_dev->priv);
+err:
+	gen7_gmu_oob_clear(device, oob_gpu);
+
+	if (ret)
+		gen7_hwsched_gmu_power_off(adreno_dev);
+
+	return ret;
+}
+
+static void hwsched_idle_timer(struct timer_list *t)
+{
+	struct kgsl_device *device = container_of(t, struct kgsl_device,
+					idle_timer);
+
+	kgsl_schedule_work(&device->idle_check_ws);
+}
+
+static int gen7_gmu_warmboot_init(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret = 0;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_GMU_WARMBOOT))
+		return ret;
+
+	if (IS_ERR_OR_NULL(gmu->gmu_init_scratch)) {
+		gmu->gmu_init_scratch = gen7_reserve_gmu_kernel_block(gmu, 0,
+				SZ_4K, GMU_CACHE, 0);
+		ret = PTR_ERR_OR_ZERO(gmu->gmu_init_scratch);
+		if (ret)
+			return ret;
+	}
+
+	if (IS_ERR_OR_NULL(gmu->gpu_boot_scratch)) {
+		gmu->gpu_boot_scratch = gen7_reserve_gmu_kernel_block(gmu, 0,
+				SZ_4K, GMU_CACHE, 0);
+		ret = PTR_ERR_OR_ZERO(gmu->gpu_boot_scratch);
+	}
+
+	return ret;
+}
+
+static int gen7_hwsched_gmu_memory_init(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	/* GMU Virtual register bank */
+	if (IS_ERR_OR_NULL(gmu->vrb)) {
+		gmu->vrb = gen7_reserve_gmu_kernel_block(gmu, 0, GMU_VRB_SIZE,
+						GMU_NONCACHED_KERNEL, 0);
+
+		if (IS_ERR(gmu->vrb))
+			return PTR_ERR(gmu->vrb);
+
+		/* Populate size of the virtual register bank */
+		gmu_core_set_vrb_register(gmu->vrb->hostptr, VRB_SIZE_IDX,
+					gmu->vrb->size >> 2);
+	}
+
+	/* GMU trace log */
+	if (IS_ERR_OR_NULL(gmu->trace.md)) {
+		gmu->trace.md = gen7_reserve_gmu_kernel_block(gmu, 0,
+					GMU_TRACE_SIZE, GMU_NONCACHED_KERNEL, 0);
+
+		if (IS_ERR(gmu->trace.md))
+			return PTR_ERR(gmu->trace.md);
+
+		/* Pass trace buffer address to GMU through the VRB */
+		gmu_core_set_vrb_register(gmu->vrb->hostptr,
+					VRB_TRACE_BUFFER_ADDR_IDX,
+					gmu->trace.md->gmuaddr);
+
+		/* Initialize the GMU trace buffer header */
+		gmu_core_trace_header_init(&gmu->trace);
+	}
+
+	return 0;
+}
+
+static int gen7_hwsched_gmu_init(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = gen7_gmu_parse_fw(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_gmu_memory_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_gmu_warmboot_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_hwsched_gmu_memory_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	return gen7_hwsched_hfi_init(adreno_dev);
+}
+
+static void gen7_hwsched_touch_wakeup(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret;
+
+	/*
+	 * Do not wake up a suspended device or until the first boot sequence
+	 * has been completed.
+	 */
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags) ||
+		!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))
+		return;
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		goto done;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = gen7_hwsched_gmu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	ret = gen7_hwsched_gpu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	kgsl_pwrscale_wake(device);
+
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+done:
+	/*
+	 * When waking up from a touch event we want to stay active long enough
+	 * for the user to send a draw command. The default idle timer timeout
+	 * is shorter than we want so go ahead and push the idle timer out
+	 * further for this special case
+	 */
+	mod_timer(&device->idle_timer, jiffies +
+		msecs_to_jiffies(adreno_wake_timeout));
+}
+
+static int gen7_hwsched_boot(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	adreno_hwsched_start(adreno_dev);
+
+	ret = gen7_hwsched_gmu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_hwsched_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_start_idle_timer(device);
+	kgsl_pwrscale_wake(device);
+
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	return ret;
+}
+
+static int gen7_aqe_microcode_read(struct adreno_device *adreno_dev)
+{
+	struct adreno_firmware *aqe_fw = ADRENO_FW(adreno_dev, ADRENO_FW_AQE);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_AQE))
+		return 0;
+
+	return adreno_get_firmware(adreno_dev, gen7_core->aqefw_name, aqe_fw);
+}
+
+static int gen7_hwsched_first_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))
+		return gen7_hwsched_boot(adreno_dev);
+
+	adreno_hwsched_start(adreno_dev);
+
+	ret = gen7_microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_aqe_microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_hwsched_gmu_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = gen7_hwsched_gmu_first_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen7_hwsched_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	adreno_get_bus_counters(adreno_dev);
+
+	adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev,
+						 ADRENO_COOP_RESET);
+
+	set_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags);
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	/*
+	 * BCL needs respective Central Broadcast register to
+	 * be programed from TZ. This programing happens only
+	 * when zap shader firmware load is successful. Zap firmware
+	 * load can fail in boot up path hence enable BCL only after we
+	 * successfully complete first boot to ensure that Central
+	 * Broadcast register was programed before enabling BCL.
+	 */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL))
+		adreno_dev->bcl_enabled = true;
+
+	/*
+	 * There is a possible deadlock scenario during kgsl firmware reading
+	 * (request_firmware) and devfreq update calls. During first boot, kgsl
+	 * device mutex is held and then request_firmware is called for reading
+	 * firmware. request_firmware internally takes dev_pm_qos_mtx lock.
+	 * Whereas in case of devfreq update calls triggered by thermal/bcl or
+	 * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then
+	 * tries to take kgsl device mutex as part of get_dev_status/target
+	 * calls. This results in deadlock when both thread are unable to acquire
+	 * the mutex held by other thread. Enable devfreq updates now as we are
+	 * done reading all firmware files.
+	 */
+	device->pwrscale.devfreq_enabled = true;
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	return 0;
+}
+
+/**
+ * drain_ctx_hw_fences_cpu - Force trigger the hardware fences that
+ * were not sent to TxQueue by the GMU
+ */
+static void drain_ctx_hw_fences_cpu(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_hw_fence_entry *entry, *tmp;
+
+	spin_lock(&drawctxt->lock);
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) {
+		gen7_trigger_hw_fence_cpu(adreno_dev, entry);
+		gen7_remove_hw_fence_entry(adreno_dev, entry);
+	}
+	spin_unlock(&drawctxt->lock);
+}
+
+static void drain_hw_fences_cpu(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_context *context;
+	int id;
+
+	read_lock(&device->context_lock);
+	idr_for_each_entry(&device->context_idr, context, id) {
+		if (context->gmu_registered)
+			drain_ctx_hw_fences_cpu(adreno_dev, ADRENO_CONTEXT(context));
+	}
+	read_unlock(&device->context_lock);
+}
+
+/**
+ * check_inflight_hw_fences - During SLUMBER entry, we must make sure all hardware fences across
+ * all registered contexts have been sent to TxQueue. If not, take a snapshot
+ */
+static int check_inflight_hw_fences(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_context *context;
+	int id, ret = 0;
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags))
+		return 0;
+
+	read_lock(&device->context_lock);
+	idr_for_each_entry(&device->context_idr, context, id) {
+
+		if (context->gmu_registered) {
+			ret = gen7_hwsched_check_context_inflight_hw_fences(adreno_dev,
+				ADRENO_CONTEXT(context));
+			if (ret)
+				break;
+		}
+	}
+	read_unlock(&device->context_lock);
+
+	if (ret)
+		gmu_core_fault_snapshot(device);
+
+	return ret;
+}
+
+static int gen7_hwsched_power_off(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret = 0;
+	bool drain_cpu = false;
+
+	if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER);
+
+	ret = gen7_gmu_oob_set(device, oob_gpu);
+	if (ret) {
+		gen7_gmu_oob_clear(device, oob_gpu);
+		goto no_gx_power;
+	}
+
+	kgsl_pwrscale_update_stats(device);
+
+	/* Save active coresight registers if applicable */
+	adreno_coresight_stop(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 0);
+
+	gen7_gmu_oob_clear(device, oob_gpu);
+
+no_gx_power:
+	kgsl_pwrctrl_irq(device, false);
+
+	/* Make sure GMU has sent all hardware fences to TxQueue */
+	if (check_inflight_hw_fences(adreno_dev))
+		drain_cpu = true;
+
+	gen7_hwsched_gmu_power_off(adreno_dev);
+
+	/* Now that we are sure that GMU is powered off, drain pending fences */
+	if (drain_cpu)
+		drain_hw_fences_cpu(adreno_dev);
+
+	adreno_hwsched_unregister_contexts(adreno_dev);
+
+	adreno_llcc_slice_deactivate(adreno_dev);
+
+	clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	del_timer_sync(&device->idle_timer);
+
+	kgsl_pwrscale_sleep(device);
+
+	kgsl_pwrctrl_clear_l3_vote(device);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
+
+	return ret;
+}
+
+static void check_hw_fence_unack_count(struct adreno_device *adreno_dev)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u32 unack_count;
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags))
+		return;
+
+	gen7_hwsched_process_msgq(adreno_dev);
+
+	spin_lock(&hfi->hw_fence.lock);
+	unack_count = hfi->hw_fence.unack_count;
+	spin_unlock(&hfi->hw_fence.lock);
+
+	if (!unack_count)
+		return;
+
+	dev_err(&gmu->pdev->dev, "hardware fence unack_count(%d) isn't zero before SLUMBER\n",
+		unack_count);
+	gmu_core_fault_snapshot(device);
+}
+
+static void hwsched_idle_check(struct work_struct *work)
+{
+	struct kgsl_device *device = container_of(work,
+					struct kgsl_device, idle_check_ws);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	mutex_lock(&device->mutex);
+
+	if (test_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags))
+		goto done;
+
+	if (atomic_read(&device->active_cnt) || time_is_after_jiffies(device->idle_jiffies)) {
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	spin_lock(&device->submit_lock);
+	if (device->submit_now) {
+		spin_unlock(&device->submit_lock);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	device->skip_inline_submit = true;
+	spin_unlock(&device->submit_lock);
+
+	if (!gen7_hw_isidle(adreno_dev)) {
+		dev_err(device->dev, "GPU isn't idle before SLUMBER\n");
+		gmu_core_fault_snapshot(device);
+	}
+
+	check_hw_fence_unack_count(adreno_dev);
+
+	gen7_hwsched_power_off(adreno_dev);
+
+done:
+	mutex_unlock(&device->mutex);
+}
+
+static int gen7_hwsched_first_open(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	/*
+	 * Do the one time settings that need to happen when we
+	 * attempt to boot the gpu the very first time
+	 */
+	ret = gen7_hwsched_first_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	/*
+	 * A client that does a first_open but never closes the device
+	 * may prevent us from going back to SLUMBER. So trigger the idle
+	 * check by incrementing the active count and immediately releasing it.
+	 */
+	atomic_inc(&device->active_cnt);
+	gen7_hwsched_active_count_put(adreno_dev);
+
+	return 0;
+}
+
+int gen7_hwsched_active_count_get(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret = 0;
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EINVAL;
+
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags))
+		return -EINVAL;
+
+	if ((atomic_read(&device->active_cnt) == 0))
+		ret = gen7_hwsched_boot(adreno_dev);
+
+	if (ret == 0)
+		atomic_inc(&device->active_cnt);
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	return ret;
+}
+
+static DEFINE_PER_CPU(struct freq_qos_request, qos_min_req);
+#define GEN7_9_X_GPU_BUSY_THRESHOLD (65)
+#define GEN7_9_X_GPU_FREQ_THRESHOLD_KHZ (700 * 1000)
+#define GEN7_9_X_FREQ_QOS_CPUID_0_KHZ 960000
+#define GEN7_9_X_FREQ_QOS_CPUID_5_KHZ 1132800
+#define CPUID_0 0
+#define CPUID_5 5
+
+static void _cpu_perf_vote_req_init(u32 cpu)
+{
+	struct cpufreq_policy *policy;
+	struct freq_qos_request *req;
+
+	policy = cpufreq_cpu_get(cpu);
+	if (!policy)
+		return;
+
+	req = &per_cpu(qos_min_req, cpu);
+
+	freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE);
+
+	cpufreq_cpu_put(policy);
+}
+
+static void _cpu_perf_vote_update(u32 cpu, u32 freq)
+{
+	freq_qos_update_request(&per_cpu(qos_min_req, cpu), freq);
+}
+
+/*
+ * Make the vote based on the enable/disable param.
+ * Return true on enable, false if skipped or disabled.
+ */
+static bool _cpu_perf_vote_req(struct adreno_device *adreno_dev, bool enable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_clk_stats *stats = &device->pwrctrl.clk_stats;
+	u32 busy_percent = 0;
+
+	/* Get GPU busy percentage */
+	if (stats->total_old != 0)
+		busy_percent = (stats->busy_old * 100) / stats->total_old;
+
+	/* Skip if enabling AND we aren't busy */
+	if (enable && (busy_percent <= GEN7_9_X_GPU_BUSY_THRESHOLD))
+		return false;
+
+	_cpu_perf_vote_update(CPUID_0, enable ? GEN7_9_X_FREQ_QOS_CPUID_0_KHZ :
+				FREQ_QOS_MIN_DEFAULT_VALUE);
+	_cpu_perf_vote_update(CPUID_5, enable ? GEN7_9_X_FREQ_QOS_CPUID_5_KHZ :
+				FREQ_QOS_MIN_DEFAULT_VALUE);
+
+	/* Return the requested enablement */
+	return enable;
+}
+
+static void _cpu_perf_vote_init(void)
+{
+	static bool init_done;
+
+	if (init_done)
+		return;
+
+	_cpu_perf_vote_req_init(CPUID_0);
+	_cpu_perf_vote_req_init(CPUID_5);
+	init_done = true;
+}
+
+static void _cpu_perf_vote(struct adreno_device *adreno_dev, u32 req_freq)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	static bool cpu_vote_state;
+	u32 freq;
+
+	if (!adreno_is_gen7_9_x(adreno_dev))
+		return;
+
+	_cpu_perf_vote_init();
+
+	freq = gmu->dcvs_table.gx_votes[req_freq].freq;
+	if (!cpu_vote_state && (freq >= GEN7_9_X_GPU_FREQ_THRESHOLD_KHZ))
+		cpu_vote_state = _cpu_perf_vote_req(adreno_dev, true);
+	else if (cpu_vote_state && (freq < GEN7_9_X_GPU_FREQ_THRESHOLD_KHZ))
+		cpu_vote_state = _cpu_perf_vote_req(adreno_dev, false);
+}
+
+static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev,
+		int gpu_pwrlevel, int bus_level, u32 ab)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_dcvs_table *table = &gmu->dcvs_table;
+	struct hfi_gx_bw_perf_vote_cmd req = {
+		.ack_type = DCVS_ACK_BLOCK,
+		.freq = INVALID_DCVS_IDX,
+		.bw = INVALID_DCVS_IDX,
+	};
+	int ret;
+
+	if (!test_bit(GMU_PRIV_HFI_STARTED, &gmu->flags))
+		return 0;
+
+	/* Do not set to XO and lower GPU clock vote from GMU */
+	if ((gpu_pwrlevel != INVALID_DCVS_IDX) &&
+			(gpu_pwrlevel >= table->gpu_level_num - 1)) {
+		dev_err(&gmu->pdev->dev, "Invalid gpu dcvs request: %d\n",
+			gpu_pwrlevel);
+		return -EINVAL;
+	}
+
+	if (gpu_pwrlevel < table->gpu_level_num - 1)
+		req.freq = table->gpu_level_num - gpu_pwrlevel - 1;
+
+	if (bus_level < pwr->ddr_table_count && bus_level > 0)
+		req.bw = bus_level;
+
+	req.bw |=  gen7_bus_ab_quantize(adreno_dev, ab);
+
+	/* GMU will vote for slumber levels through the sleep sequence */
+	if ((req.freq == INVALID_DCVS_IDX) && (req.bw == INVALID_BW_VOTE))
+		return 0;
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_GX_BW_PERF_VOTE);
+	if (ret)
+		return ret;
+
+	ret = gen7_hfi_send_cmd_async(adreno_dev, &req, sizeof(req));
+
+	if (ret) {
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"Failed to set GPU perf idx %u, bw idx %u\n",
+			req.freq, req.bw);
+
+		/*
+		 * If this was a dcvs request along side an active gpu, request
+		 * dispatcher based reset and recovery.
+		 */
+		if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+			gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+	}
+
+	if (!ret && req.freq != INVALID_DCVS_IDX) {
+		gen7_rdpm_mx_freq_update(gmu, gmu->dcvs_table.gx_votes[req.freq].freq);
+		_cpu_perf_vote(adreno_dev, req.freq);
+	}
+
+	return ret;
+}
+
+static int gen7_hwsched_clock_set(struct adreno_device *adreno_dev,
+	u32 pwrlevel)
+{
+	return gen7_hwsched_dcvs_set(adreno_dev, pwrlevel, INVALID_DCVS_IDX, INVALID_AB_VALUE);
+}
+
+static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	static unsigned long prev_freq;
+	unsigned long freq = gmu->freqs[0];
+
+	if (!gmu->perf_ddr_bw)
+		return;
+
+	/*
+	 * Scale the GMU if DDR is at a CX corner at which GMU can run at
+	 * a higher frequency
+	 */
+	if (pwr->ddr_table[buslevel] >= gmu->perf_ddr_bw)
+		freq = gmu->freqs[GMU_MAX_PWRLEVELS - 1];
+
+	if (prev_freq == freq)
+		return;
+
+	if (kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", freq)) {
+		dev_err(&gmu->pdev->dev, "Unable to set the GMU clock to %ld\n",
+			freq);
+		return;
+	}
+
+	gen7_rdpm_cx_freq_update(gmu, freq / 1000);
+
+	trace_kgsl_gmu_pwrlevel(freq, prev_freq);
+
+	prev_freq = freq;
+}
+
+static int gen7_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel,
+	u32 ab)
+{
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int ret = 0;
+
+	/* Skip icc path for targets that supports ACV vote from GMU */
+	if (!gen7_core->acv_perfmode_vote)
+		kgsl_icc_set_tag(pwr, buslevel);
+
+	if (buslevel == pwr->cur_buslevel)
+		buslevel = INVALID_DCVS_IDX;
+
+	if ((ab == pwr->cur_ab) || (ab == 0))
+		ab = INVALID_AB_VALUE;
+
+	if ((ab == INVALID_AB_VALUE) && (buslevel == INVALID_DCVS_IDX))
+		return 0;
+
+	ret = gen7_hwsched_dcvs_set(adreno_dev, INVALID_DCVS_IDX,
+			buslevel, ab);
+	if (ret)
+		return ret;
+
+	if (buslevel != INVALID_DCVS_IDX) {
+		scale_gmu_frequency(adreno_dev, buslevel);
+
+		pwr->cur_buslevel = buslevel;
+	}
+
+	if (ab != INVALID_AB_VALUE) {
+		if (!adreno_dev->gmu_ab)
+			icc_set_bw(pwr->icc_path, MBps_to_icc(ab), 0);
+		pwr->cur_ab = ab;
+	}
+
+	trace_kgsl_buslevel(device, pwr->active_pwrlevel, pwr->cur_buslevel, pwr->cur_ab);
+	return ret;
+}
+
+static int gen7_hwsched_pm_suspend(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SUSPEND);
+
+	/* Halt any new submissions */
+	reinit_completion(&device->halt_gate);
+
+	/*
+	 * Wait for the dispatcher to retire everything by waiting
+	 * for the active count to go to zero.
+	 */
+	ret = kgsl_active_count_wait(device, 0, msecs_to_jiffies(100));
+	if (ret) {
+		dev_err(device->dev, "Timed out waiting for the active count\n");
+		goto err;
+	}
+
+	ret = adreno_hwsched_idle(adreno_dev);
+	if (ret)
+		goto err;
+
+	gen7_hwsched_power_off(adreno_dev);
+
+	adreno_get_gpu_halt(adreno_dev);
+
+	set_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND);
+
+	return 0;
+
+err:
+	adreno_hwsched_start(adreno_dev);
+
+	return ret;
+}
+
+static void gen7_hwsched_pm_resume(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags),
+		"resume invoked without a suspend\n"))
+		return;
+
+	adreno_put_gpu_halt(adreno_dev);
+
+	adreno_hwsched_start(adreno_dev);
+
+	clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
+}
+
+void gen7_hwsched_handle_watchdog(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 mask;
+
+	/* Temporarily mask the watchdog interrupt to prevent a storm */
+	gmu_core_regread(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK,
+		&mask);
+	gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK,
+			(mask | GMU_INT_WDOG_BITE));
+
+	gen7_gmu_send_nmi(device, false);
+
+	dev_err_ratelimited(&gmu->pdev->dev,
+			"GMU watchdog expired interrupt received\n");
+
+	gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+}
+
+static void gen7_hwsched_drain_ctxt_unregister(struct adreno_device *adreno_dev)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct pending_cmd *cmd = NULL;
+
+	read_lock(&hfi->msglock);
+
+	list_for_each_entry(cmd, &hfi->msglist, node) {
+		if (MSG_HDR_GET_ID(cmd->sent_hdr) == H2F_MSG_UNREGISTER_CONTEXT)
+			complete(&cmd->complete);
+	}
+
+	read_unlock(&hfi->msglock);
+}
+
+/**
+ * process_context_hw_fences_after_reset - This function processes all hardware fences that were
+ * sent to GMU prior to recovery. If a fence is not retired by the GPU, and the context is still
+ * good, then move them to the reset list.
+ */
+static void process_context_hw_fences_after_reset(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, struct list_head *reset_list)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_hw_fence_entry *entry, *tmp;
+
+	spin_lock(&drawctxt->lock);
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) {
+		struct adreno_context *drawctxt = entry->drawctxt;
+		struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr;
+		bool retired = kgsl_check_timestamp(device, &drawctxt->base, (u32)entry->cmd.ts);
+
+		/* Delete the fences that GMU has sent to the TxQueue */
+		if (timestamp_cmp(hdr->out_fence_ts, (u32)entry->cmd.ts) >= 0) {
+			gen7_remove_hw_fence_entry(adreno_dev, entry);
+			continue;
+		}
+
+		/*
+		 * Force retire the fences if the corresponding submission is retired by GPU
+		 * or if the context has gone bad
+		 */
+		if (retired || kgsl_context_is_bad(&drawctxt->base))
+			entry->cmd.flags |= HW_FENCE_FLAG_SKIP_MEMSTORE;
+
+		list_add_tail(&entry->reset_node, reset_list);
+	}
+	spin_unlock(&drawctxt->lock);
+}
+
+/**
+ * process_inflight_hw_fences_after_reset - Send hardware fences from all contexts back to the GMU
+ * after fault recovery. We must wait for ack when sending each of these fences to GMU so as to
+ * avoid sending a large number of hardware fences in a short span of time.
+ */
+static int process_inflight_hw_fences_after_reset(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_context *context = NULL;
+	int id, ret = 0;
+	struct list_head hw_fence_list;
+	struct adreno_hw_fence_entry *entry, *tmp;
+
+	/*
+	 * Since we need to wait for ack from GMU when sending each inflight fence back to GMU, we
+	 * cannot send them from within atomic context. Hence, walk list of such hardware fences
+	 * for each context and add it to this local list and then walk this list to send all these
+	 * fences to GMU.
+	 */
+	INIT_LIST_HEAD(&hw_fence_list);
+
+	read_lock(&device->context_lock);
+	idr_for_each_entry(&device->context_idr, context, id) {
+		process_context_hw_fences_after_reset(adreno_dev, ADRENO_CONTEXT(context),
+			&hw_fence_list);
+	}
+	read_unlock(&device->context_lock);
+
+	list_for_each_entry_safe(entry, tmp, &hw_fence_list, reset_node) {
+
+		/*
+		 * This is part of the reset sequence and any error in this path will be handled by
+		 * the caller.
+		 */
+		ret = gen7_send_hw_fence_hfi_wait_ack(adreno_dev, entry, 0);
+		if (ret)
+			break;
+
+		list_del_init(&entry->reset_node);
+	}
+
+	return ret;
+}
+
+/**
+ * process_detached_hw_fences_after_reset - Send fences that couldn't be sent to GMU when a context
+ * got detached. We must wait for ack when sending each of these fences to GMU so as to avoid
+ * sending a large number of hardware fences in a short span of time.
+ */
+static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_dev)
+{
+	struct adreno_hw_fence_entry *entry, *tmp;
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct kgsl_context *context = NULL;
+	int ret = 0;
+
+	list_for_each_entry_safe(entry, tmp, &hfi->detached_hw_fence_list, node) {
+
+		/*
+		 * This is part of the reset sequence and any error in this path will be handled by
+		 * the caller.
+		 */
+		ret = gen7_send_hw_fence_hfi_wait_ack(adreno_dev, entry,
+			HW_FENCE_FLAG_SKIP_MEMSTORE);
+		if (ret)
+			return ret;
+
+		context = &entry->drawctxt->base;
+
+		gen7_remove_hw_fence_entry(adreno_dev, entry);
+
+		kgsl_context_put(context);
+	}
+
+	return ret;
+}
+
+static int drain_guilty_context_hw_fences(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_context *context = NULL;
+	struct adreno_context *guilty = NULL;
+	int id, ret = 0;
+
+	read_lock(&device->context_lock);
+	idr_for_each_entry(&device->context_idr, context, id) {
+		if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv) &&
+			_kgsl_context_get(context)) {
+			guilty = ADRENO_CONTEXT(context);
+			break;
+		}
+	}
+	read_unlock(&device->context_lock);
+
+	if (!guilty)
+		return 0;
+
+	/*
+	 * We don't need drawctxt spinlock to signal these fences since the only other place
+	 * which can access these fences is the context detach path and device mutex
+	 * ensures mutual exclusion between recovery thread and detach thread.
+	 */
+	ret = gen7_hwsched_drain_context_hw_fences(adreno_dev, guilty);
+
+	kgsl_context_put(&guilty->base);
+
+	return ret;
+}
+
+static int handle_hw_fences_after_reset(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = drain_guilty_context_hw_fences(adreno_dev);
+	if (ret)
+		return ret;
+
+	/*
+	 * We must do this after adreno_hwsched_replay() so that context registration
+	 * is done before we re-send the un-retired hardware fences to the GMU
+	 */
+	ret = process_inflight_hw_fences_after_reset(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = process_detached_hw_fences_after_reset(adreno_dev);
+	if (ret)
+		return ret;
+
+	return gen7_hwsched_disable_hw_fence_throttle(adreno_dev);
+}
+
+int gen7_hwsched_reset_replay(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	int ret;
+
+	/*
+	 * Any pending context unregister packets will be lost
+	 * since we hard reset the GMU. This means any threads waiting
+	 * for context unregister hfi ack will timeout. Wake them
+	 * to avoid false positive ack timeout messages later.
+	 */
+	gen7_hwsched_drain_ctxt_unregister(adreno_dev);
+
+	if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return 0;
+
+	gen7_disable_gpu_irq(adreno_dev);
+
+	gen7_gmu_irq_disable(adreno_dev);
+
+	gen7_hwsched_hfi_stop(adreno_dev);
+
+	gen7_gmu_suspend(adreno_dev);
+
+	adreno_hwsched_unregister_contexts(adreno_dev);
+
+	adreno_llcc_slice_deactivate(adreno_dev);
+
+	clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	spin_lock(&hfi->hw_fence.lock);
+
+	/* Reset the unack count back to zero as we start afresh */
+	hfi->hw_fence.unack_count = 0;
+
+	spin_unlock(&hfi->hw_fence.lock);
+
+	/*
+	 * When we reset, we want to coldboot incase any scratch corruption
+	 * has occurred before we faulted.
+	 */
+	adreno_mark_for_coldboot(adreno_dev);
+
+	ret = gen7_hwsched_boot(adreno_dev);
+	if (ret)
+		goto done;
+
+	adreno_hwsched_replay(adreno_dev);
+
+	ret = handle_hw_fences_after_reset(adreno_dev);
+done:
+	BUG_ON(ret);
+
+	return ret;
+}
+
+const struct adreno_power_ops gen7_hwsched_power_ops = {
+	.first_open = gen7_hwsched_first_open,
+	.last_close = gen7_hwsched_power_off,
+	.active_count_get = gen7_hwsched_active_count_get,
+	.active_count_put = gen7_hwsched_active_count_put,
+	.touch_wakeup = gen7_hwsched_touch_wakeup,
+	.pm_suspend = gen7_hwsched_pm_suspend,
+	.pm_resume = gen7_hwsched_pm_resume,
+	.gpu_clock_set = gen7_hwsched_clock_set,
+	.gpu_bus_set = gen7_hwsched_bus_set,
+};
+
+const struct adreno_hwsched_ops gen7_hwsched_ops = {
+	.submit_drawobj = gen7_hwsched_submit_drawobj,
+	.preempt_count = gen7_hwsched_preempt_count_get,
+	.create_hw_fence = gen7_hwsched_create_hw_fence,
+};
+
+int gen7_hwsched_probe(struct platform_device *pdev,
+		u32 chipid, const struct adreno_gpu_core *gpucore)
+{
+	struct adreno_device *adreno_dev;
+	struct kgsl_device *device;
+	struct gen7_hwsched_device *gen7_hwsched_dev;
+	int ret;
+
+	gen7_hwsched_dev = devm_kzalloc(&pdev->dev, sizeof(*gen7_hwsched_dev),
+				GFP_KERNEL);
+	if (!gen7_hwsched_dev)
+		return -ENOMEM;
+
+	adreno_dev = &gen7_hwsched_dev->gen7_dev.adreno_dev;
+
+	adreno_dev->hwsched_enabled = true;
+
+	adreno_dev->irq_mask = GEN7_HWSCHED_INT_MASK;
+
+	ret = gen7_probe_common(pdev, adreno_dev, chipid, gpucore);
+	if (ret)
+		return ret;
+
+	device = KGSL_DEVICE(adreno_dev);
+
+	INIT_WORK(&device->idle_check_ws, hwsched_idle_check);
+
+	timer_setup(&device->idle_timer, hwsched_idle_timer, 0);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC))
+		adreno_dev->lpac_enabled = true;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_DMS)) {
+		set_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv);
+		adreno_dev->dms_enabled = true;
+	}
+
+	kgsl_mmu_set_feature(device, KGSL_MMU_PAGEFAULT_TERMINATE);
+
+	ret = adreno_hwsched_init(adreno_dev, &gen7_hwsched_ops);
+	if (ret)
+		dev_err(&pdev->dev, "adreno hardware scheduler init failed ret %d\n", ret);
+
+	return ret;
+}
+
+int gen7_hwsched_add_to_minidump(struct adreno_device *adreno_dev)
+{
+	struct gen7_device *gen7_dev = container_of(adreno_dev,
+					struct gen7_device, adreno_dev);
+	struct gen7_hwsched_device *gen7_hwsched = container_of(gen7_dev,
+					struct gen7_hwsched_device, gen7_dev);
+	struct gen7_hwsched_hfi *hw_hfi = &gen7_hwsched->hwsched_hfi;
+	int ret, i;
+
+	ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HWSCHED_DEVICE,
+			(void *)(gen7_hwsched), sizeof(struct gen7_hwsched_device));
+	if (ret)
+		return ret;
+
+	if (!IS_ERR_OR_NULL(gen7_dev->gmu.gmu_log)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_GMU_LOG_ENTRY,
+					gen7_dev->gmu.gmu_log->hostptr,
+					gen7_dev->gmu.gmu_log->size);
+		if (ret)
+			return ret;
+	}
+
+	if (!IS_ERR_OR_NULL(gen7_dev->gmu.hfi.hfi_mem)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_HFIMEM_ENTRY,
+					gen7_dev->gmu.hfi.hfi_mem->hostptr,
+					gen7_dev->gmu.hfi.hfi_mem->size);
+		if (ret)
+			return ret;
+	}
+
+	if (!IS_ERR_OR_NULL(gen7_dev->gmu.vrb)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_GMU_VRB_ENTRY,
+					gen7_dev->gmu.vrb->hostptr,
+					gen7_dev->gmu.vrb->size);
+		if (ret)
+			return ret;
+	}
+
+	if (!IS_ERR_OR_NULL(gen7_dev->gmu.trace.md)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_GMU_TRACE_ENTRY,
+					gen7_dev->gmu.trace.md->hostptr,
+					gen7_dev->gmu.trace.md->size);
+		if (ret)
+			return ret;
+	}
+
+	/* Dump HFI hwsched global mem alloc entries */
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i];
+		char hfi_minidump_str[MAX_VA_MINIDUMP_STR_LEN] = {0};
+		u32 rb_id = 0;
+
+		if (!hfi_get_minidump_string(entry->desc.mem_kind,
+					     &hfi_minidump_str[0],
+					     sizeof(hfi_minidump_str), &rb_id)) {
+			ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+						      hfi_minidump_str,
+						      entry->md->hostptr,
+						      entry->md->size);
+			if (ret)
+				return ret;
+		}
+	}
+
+	if (!IS_ERR_OR_NULL(hw_hfi->big_ib)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					      KGSL_HFI_BIG_IB_ENTRY,
+					      hw_hfi->big_ib->hostptr,
+					      hw_hfi->big_ib->size);
+		if (ret)
+			return ret;
+	}
+
+	if (!IS_ERR_OR_NULL(hw_hfi->big_ib_recurring))
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					      KGSL_HFI_BIG_IB_REC_ENTRY,
+					      hw_hfi->big_ib_recurring->hostptr,
+					      hw_hfi->big_ib_recurring->size);
+
+	return ret;
+}

+ 106 - 0
qcom/opensource/graphics-kernel/adreno_gen7_hwsched.h

@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_GEN7_HWSCHED_H_
+#define _ADRENO_GEN7_HWSCHED_H_
+
+#include "adreno_gen7_hwsched_hfi.h"
+
+/**
+ * struct gen7_hwsched_device - Container for the gen7 hwscheduling device
+ */
+struct gen7_hwsched_device {
+	/** @gen7_dev: Container for the gen7 device */
+	struct gen7_device gen7_dev;
+	/** @hwsched_hfi: Container for hwscheduling specific hfi resources */
+	struct gen7_hwsched_hfi hwsched_hfi;
+};
+
+/**
+ * gen7_hwsched_probe - Target specific probe for hwsched
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for hwsched enabled gmu targets.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hwsched_probe(struct platform_device *pdev,
+		u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * gen7_hwsched_reset_replay - Restart the gmu and gpu and replay inflight cmdbatches
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hwsched_reset_replay(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_snapshot - take gen7 hwsched snapshot
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * Snapshot the faulty ib and then snapshot rest of gen7 gmu things
+ */
+void gen7_hwsched_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+
+/**
+ * gen7_hwsched_handle_watchdog - Handle watchdog interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_hwsched_handle_watchdog(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_active_count_get - Increment the active count
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function increments the active count. If active count
+ * is 0, this function also powers up the device.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hwsched_active_count_get(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_active_count_put - Put back the active count
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function decrements the active count sets the idle
+ * timer if active count is zero.
+ */
+void gen7_hwsched_active_count_put(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_add_to_minidump - Register hwsched_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int gen7_hwsched_add_to_minidump(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_send_recurring_cmdobj - Dispatch IBs to GMU
+ * @adreno_dev: Pointer to adreno device structure
+ * @cmdobj: The command object which needs to be submitted
+ *
+ * This function is used to register the context if needed and submit
+ * recurring IBs to the GMU. Upon receiving ipc interrupt GMU will submit
+ * recurring IBs to GPU.
+
+ * Return: 0 on success and negative error on failure
+ */
+int gen7_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj);
+
+/**
+ * gen7_hwsched_fault - Set hwsched fault to request recovery
+ * @adreno_dev: A handle to adreno device
+ * @fault: The type of fault
+ */
+void gen7_hwsched_fault(struct adreno_device *adreno_dev, u32 fault);
+
+#endif

+ 4302 - 0
qcom/opensource/graphics-kernel/adreno_gen7_hwsched_hfi.c

@@ -0,0 +1,4302 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <dt-bindings/soc/qcom,ipcc.h>
+#include <linux/dma-fence-array.h>
+#include <linux/iommu.h>
+#include <linux/sched/clock.h>
+#include <linux/soc/qcom/msm_hw_fence.h>
+#include <soc/qcom/msm_performance.h>
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_gen7_hwsched.h"
+#include "adreno_hfi.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+#include "kgsl_device.h"
+#include "kgsl_eventlog.h"
+#include "kgsl_pwrctrl.h"
+#include "kgsl_trace.h"
+#include "kgsl_util.h"
+
+#define HFI_QUEUE_MAX (HFI_QUEUE_DEFAULT_CNT + HFI_QUEUE_DISPATCH_MAX_CNT)
+
+#define DEFINE_QHDR(gmuaddr, id, prio) \
+	{\
+		.status = 1, \
+		.start_addr = GMU_QUEUE_START_ADDR(gmuaddr, id), \
+		.type = QUEUE_HDR_TYPE(id, prio, 0, 0), \
+		.queue_size = SZ_4K >> 2, \
+		.msg_size = 0, \
+		.unused0 = 0, \
+		.unused1 = 0, \
+		.unused2 = 0, \
+		.unused3 = 0, \
+		.unused4 = 0, \
+		.read_index = 0, \
+		.write_index = 0, \
+}
+
+static struct dq_info {
+	/** @max_dq: Maximum number of dispatch queues per RB level */
+	u32 max_dq;
+	/** @base_dq_id: Base dqid for level */
+	u32 base_dq_id;
+	/** @offset: Next dqid to use for roundrobin context assignment */
+	u32 offset;
+} gen7_hfi_dqs[KGSL_PRIORITY_MAX_RB_LEVELS] = {
+	{ 4, 0, }, /* RB0 */
+	{ 4, 4, }, /* RB1 */
+	{ 3, 8, }, /* RB2 */
+	{ 3, 11, }, /* RB3 */
+}, gen7_hfi_dqs_lpac[KGSL_PRIORITY_MAX_RB_LEVELS + 1] = {
+	{ 4, 0, }, /* RB0 */
+	{ 4, 4, }, /* RB1 */
+	{ 3, 8, }, /* RB2 */
+	{ 2, 11, }, /* RB3 */
+	{ 1, 13, }, /* RB LPAC */
+};
+
+struct pending_cmd hw_fence_ack;
+
+struct gen7_hwsched_hfi *to_gen7_hwsched_hfi(
+	struct adreno_device *adreno_dev)
+{
+	struct gen7_device *gen7_dev = container_of(adreno_dev,
+					struct gen7_device, adreno_dev);
+	struct gen7_hwsched_device *gen7_hwsched = container_of(gen7_dev,
+					struct gen7_hwsched_device, gen7_dev);
+
+	return &gen7_hwsched->hwsched_hfi;
+}
+
+int gen7_hfi_send_lpac_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	if (!adreno_dev->lpac_enabled)
+		return 0;
+
+	return gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_LPAC, 1, 0);
+}
+
+static void add_waiter(struct gen7_hwsched_hfi *hfi, u32 hdr,
+	struct pending_cmd *ack)
+{
+	memset(ack, 0x0, sizeof(*ack));
+
+	init_completion(&ack->complete);
+	write_lock_irq(&hfi->msglock);
+	list_add_tail(&ack->node, &hfi->msglist);
+	write_unlock_irq(&hfi->msglock);
+
+	ack->sent_hdr = hdr;
+}
+
+static void del_waiter(struct gen7_hwsched_hfi *hfi, struct pending_cmd *ack)
+{
+	write_lock_irq(&hfi->msglock);
+	list_del(&ack->node);
+	write_unlock_irq(&hfi->msglock);
+}
+
+static void gen7_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct pending_cmd *cmd = NULL;
+	u32 waiters[64], num_waiters = 0, i;
+	u32 *ack = rcvd;
+	u32 hdr = ack[0];
+	u32 req_hdr = ack[1];
+	u32 size_bytes = MSG_HDR_GET_SIZE(hdr) << 2;
+
+	if (size_bytes > sizeof(cmd->results))
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"Ack result too big: %d Truncating to: %ld\n",
+			size_bytes, sizeof(cmd->results));
+
+	read_lock(&hfi->msglock);
+
+	list_for_each_entry(cmd, &hfi->msglist, node) {
+		if (CMP_HFI_ACK_HDR(cmd->sent_hdr, req_hdr)) {
+			memcpy(cmd->results, ack,
+				min_t(u32, size_bytes,
+					sizeof(cmd->results)));
+			complete(&cmd->complete);
+			read_unlock(&hfi->msglock);
+			return;
+		}
+
+		if (num_waiters < ARRAY_SIZE(waiters))
+			waiters[num_waiters++] = cmd->sent_hdr;
+	}
+
+	read_unlock(&hfi->msglock);
+
+	/* Didn't find the sender, list the waiter */
+	dev_err_ratelimited(&gmu->pdev->dev,
+		"Unexpectedly got id %d seqnum %d. Total waiters: %d Top %d Waiters:\n",
+		MSG_HDR_GET_ID(req_hdr), MSG_HDR_GET_SEQNUM(req_hdr),
+		num_waiters, min_t(u32, num_waiters, 5));
+
+	for (i = 0; i < num_waiters && i < 5; i++)
+		dev_err_ratelimited(&gmu->pdev->dev,
+			" id %d seqnum %d\n",
+			MSG_HDR_GET_ID(waiters[i]),
+			MSG_HDR_GET_SEQNUM(waiters[i]));
+}
+
+/* This function is called while holding the drawctxt spinlock */
+void gen7_remove_hw_fence_entry(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct adreno_context *drawctxt = entry->drawctxt;
+
+	atomic_dec(&hwsched->hw_fence_count);
+	drawctxt->hw_fence_count--;
+
+	dma_fence_put(&entry->kfence->fence);
+	list_del_init(&entry->node);
+	kmem_cache_free(hwsched->hw_fence_cache, entry);
+}
+
+static void _retire_inflight_hw_fences(struct adreno_device *adreno_dev,
+	struct kgsl_context *context)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct adreno_hw_fence_entry *entry, *tmp;
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags))
+		return;
+
+	spin_lock(&drawctxt->lock);
+
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) {
+		struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr;
+
+		/*
+		 * Since this list is sorted by timestamp, abort on the first fence that hasn't
+		 * yet been sent to TxQueue
+		 */
+		if (timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)
+			break;
+
+		gen7_remove_hw_fence_entry(adreno_dev, entry);
+	}
+	spin_unlock(&drawctxt->lock);
+}
+
+static void log_profiling_info(struct adreno_device *adreno_dev, u32 *rcvd)
+{
+	struct hfi_ts_retire_cmd *cmd = (struct hfi_ts_retire_cmd *)rcvd;
+	struct kgsl_context *context;
+	struct retire_info info = {0};
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	context = kgsl_context_get(device, cmd->ctxt_id);
+	if (context == NULL)
+		return;
+
+	info.timestamp = cmd->ts;
+	info.rb_id = adreno_get_level(context);
+	info.gmu_dispatch_queue = context->gmu_dispatch_queue;
+	info.submitted_to_rb = cmd->submitted_to_rb;
+	info.sop = cmd->sop;
+	info.eop = cmd->eop;
+	if (GMU_VER_MINOR(gmu->ver.hfi) < 4)
+		info.active = cmd->eop - cmd->sop;
+	else
+		info.active = cmd->active;
+	info.retired_on_gmu = cmd->retired_on_gmu;
+
+	/* protected GPU work must not be reported */
+	if  (!(context->flags & KGSL_CONTEXT_SECURE))
+		kgsl_work_period_update(device, context->proc_priv->period,
+					     info.active);
+
+	trace_adreno_cmdbatch_retired(context, &info, 0, 0, 0);
+
+	log_kgsl_cmdbatch_retired_event(context->id, cmd->ts,
+		context->priority, 0, cmd->sop, cmd->eop);
+
+	_retire_inflight_hw_fences(adreno_dev, context);
+
+	kgsl_context_put(context);
+}
+
+/* Look up a particular key's value for a given type of payload */
+static u32 gen7_hwsched_lookup_key_value_legacy(struct adreno_device *adreno_dev,
+	u32 type, u32 key)
+{
+	struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+
+	if (!cmd->hdr)
+		return 0;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd_legacy, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == type)
+			return adreno_hwsched_parse_payload(payload, key);
+
+		i += struct_size(payload, data, payload->dwords);
+	}
+
+	return 0;
+}
+
+static u32 get_payload_rb_key_legacy(struct adreno_device *adreno_dev,
+	u32 rb_id, u32 key)
+{
+	struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+
+	if (!cmd->hdr)
+		return 0;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd_legacy, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == PAYLOAD_RB) {
+			u32 id = adreno_hwsched_parse_payload(payload, KEY_RB_ID);
+
+			if (id == rb_id)
+				return adreno_hwsched_parse_payload(payload, key);
+		}
+
+		i += struct_size(payload, data, payload->dwords);
+	}
+
+	return 0;
+}
+
+struct syncobj_flags {
+	unsigned long mask;
+	const char *name;
+};
+
+static void _get_syncobj_string(char *str, u32 max_size, struct hfi_syncobj *syncobj, u32 index)
+{
+	u32 count = scnprintf(str, max_size, "syncobj[%d] ctxt_id:%llu seqno:%llu flags:", index,
+			syncobj->ctxt_id, syncobj->seq_no);
+	u32 i;
+	bool first = true;
+	static const struct syncobj_flags _flags[] = {
+		GMU_SYNCOBJ_FLAGS, { -1, NULL }};
+
+	for (i = 0; _flags[i].name; i++) {
+		if (!(syncobj->flags & _flags[i].mask))
+			continue;
+
+		if (first) {
+			count += scnprintf(str + count, max_size - count, "%s", _flags[i].name);
+			first = false;
+		} else {
+			count += scnprintf(str + count, max_size - count, "|%s", _flags[i].name);
+		}
+	}
+}
+
+static void log_syncobj(struct gen7_gmu_device *gmu, struct hfi_submit_syncobj *cmd)
+{
+	struct hfi_syncobj *syncobj = (struct hfi_syncobj *)&cmd[1];
+	char str[128];
+	u32 i = 0;
+
+	for (i = 0; i < cmd->num_syncobj; i++) {
+		_get_syncobj_string(str, sizeof(str), syncobj, i);
+		dev_err(&gmu->pdev->dev, "%s\n", str);
+		syncobj++;
+	}
+}
+
+static void find_timeout_syncobj(struct adreno_device *adreno_dev, u32 ctxt_id, u32 ts)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_context *context = NULL;
+	struct adreno_context *drawctxt;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gmu_context_queue_header *hdr;
+	struct hfi_submit_syncobj *cmd;
+	u32 *queue, i;
+	int ret;
+
+	/* We want to get the context even if it is detached */
+	read_lock(&device->context_lock);
+	context = idr_find(&device->context_idr, ctxt_id);
+	ret = _kgsl_context_get(context);
+	read_unlock(&device->context_lock);
+
+	if (!ret)
+		return;
+
+	drawctxt = ADRENO_CONTEXT(context);
+
+	hdr = drawctxt->gmu_context_queue.hostptr;
+	queue = (u32 *)(drawctxt->gmu_context_queue.hostptr + sizeof(*hdr));
+
+	for (i = hdr->read_index; i != hdr->write_index;) {
+		if (MSG_HDR_GET_ID(queue[i]) != H2F_MSG_ISSUE_SYNCOBJ) {
+			i = (i + MSG_HDR_GET_SIZE(queue[i])) % hdr->queue_size;
+			continue;
+		}
+
+		cmd = (struct hfi_submit_syncobj *)&queue[i];
+
+		if (cmd->timestamp == ts) {
+			log_syncobj(gmu, cmd);
+			break;
+		}
+		i = (i + MSG_HDR_GET_SIZE(queue[i])) % hdr->queue_size;
+	}
+
+	if (i == hdr->write_index)
+		dev_err(&gmu->pdev->dev, "Couldn't find unsignaled syncobj ctx:%d ts:%d\n",
+			ctxt_id, ts);
+
+	kgsl_context_put(context);
+}
+
+static void log_gpu_fault_legacy(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct device *dev = &gmu->pdev->dev;
+	struct hfi_context_bad_cmd_legacy *cmd = adreno_dev->hwsched.ctxt_bad;
+
+	switch (cmd->error) {
+	case GMU_GPU_HW_HANG:
+		dev_crit_ratelimited(dev, "MISC: GPU hang detected\n");
+		break;
+	case GMU_GPU_SW_HANG:
+		dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %u\n",
+			cmd->ctxt_id, cmd->ts);
+		break;
+	case GMU_CP_OPCODE_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP opcode error interrupt | opcode=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS,
+			KEY_CP_OPCODE_ERROR));
+		break;
+	case GMU_CP_PROTECTED_ERROR: {
+		u32 status = gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_PROTECTED_ERROR);
+
+		dev_crit_ratelimited(dev,
+			"CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n",
+			status & (1 << 20) ? "READ" : "WRITE",
+			status & 0x3FFFF, status);
+		}
+		break;
+	case GMU_CP_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "CP Illegal instruction error\n");
+		break;
+	case GMU_CP_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "CP ucode error interrupt\n");
+		break;
+	case GMU_CP_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP | Ringbuffer HW fault | status=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_HW_FAULT));
+		break;
+	case GMU_GPU_PREEMPT_TIMEOUT: {
+		u32 cur, next, cur_rptr, cur_wptr, next_rptr, next_wptr;
+
+		cur = gen7_hwsched_lookup_key_value_legacy(adreno_dev,
+			PAYLOAD_PREEMPT_TIMEOUT, KEY_PREEMPT_TIMEOUT_CUR_RB_ID);
+		next = gen7_hwsched_lookup_key_value_legacy(adreno_dev,
+			PAYLOAD_PREEMPT_TIMEOUT,
+			KEY_PREEMPT_TIMEOUT_NEXT_RB_ID);
+		cur_rptr = get_payload_rb_key_legacy(adreno_dev, cur, KEY_RB_RPTR);
+		cur_wptr = get_payload_rb_key_legacy(adreno_dev, cur, KEY_RB_WPTR);
+		next_rptr = get_payload_rb_key_legacy(adreno_dev, next, KEY_RB_RPTR);
+		next_wptr = get_payload_rb_key_legacy(adreno_dev, next, KEY_RB_WPTR);
+
+		dev_crit_ratelimited(dev,
+			"Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n",
+			cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr);
+		}
+		break;
+	case GMU_CP_GPC_ERROR:
+		dev_crit_ratelimited(dev, "RBBM: GPC error\n");
+		break;
+	case GMU_CP_BV_OPCODE_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP BV opcode error | opcode=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS,
+			KEY_CP_BV_OPCODE_ERROR));
+		break;
+	case GMU_CP_BV_PROTECTED_ERROR: {
+		u32 status = gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_BV_PROTECTED_ERROR);
+
+		dev_crit_ratelimited(dev,
+			"CP BV | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n",
+			status & (1 << 20) ? "READ" : "WRITE",
+			status & 0x3FFFF, status);
+		}
+		break;
+	case GMU_CP_BV_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP BV | Ringbuffer HW fault | status=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_HW_FAULT));
+		break;
+	case GMU_CP_BV_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n");
+		break;
+	case GMU_CP_BV_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "CP BV ucode error interrupt\n");
+		break;
+	case GMU_GPU_SW_FUSE_VIOLATION:
+		dev_crit_ratelimited(dev, "RBBM: SW Feature Fuse violation status=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value_legacy(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_SWFUSE_VIOLATION_FAULT));
+		break;
+	case GMU_GPU_AQE0_OPCODE_ERRROR:
+		dev_crit_ratelimited(dev, "AQE0 opcode error | opcode=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value_legacy(adreno_dev,
+				PAYLOAD_FAULT_REGS, KEY_AQE0_OPCODE_ERROR));
+		break;
+	case GMU_GPU_AQE0_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "AQE0 ucode error interrupt\n");
+		break;
+	case GMU_GPU_AQE0_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev, "AQE0 HW fault | status=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value_legacy(adreno_dev,
+				PAYLOAD_FAULT_REGS, KEY_AQE0_HW_FAULT));
+		break;
+	case GMU_GPU_AQE0_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "AQE0 Illegal instruction error\n");
+		break;
+	case GMU_GPU_AQE1_OPCODE_ERRROR:
+		dev_crit_ratelimited(dev, "AQE1 opcode error | opcode=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value_legacy(adreno_dev,
+				PAYLOAD_FAULT_REGS, KEY_AQE1_OPCODE_ERROR));
+		break;
+	case GMU_GPU_AQE1_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "AQE1 ucode error interrupt\n");
+		break;
+	case GMU_GPU_AQE1_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev, "AQE1 HW fault | status=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value_legacy(adreno_dev,
+				PAYLOAD_FAULT_REGS, KEY_AQE1_HW_FAULT));
+		break;
+	case GMU_GPU_AQE1_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "AQE1 Illegal instruction error\n");
+		break;
+	case GMU_SYNCOBJ_TIMEOUT_ERROR:
+		dev_crit_ratelimited(dev, "syncobj timeout ctx %d ts %u\n",
+			cmd->ctxt_id, cmd->ts);
+		find_timeout_syncobj(adreno_dev, cmd->ctxt_id, cmd->ts);
+		break;
+	case GMU_CP_UNKNOWN_ERROR:
+		fallthrough;
+	default:
+		dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n",
+			cmd->error);
+		break;
+	}
+}
+
+/* Look up a particular key's value for a given type of payload */
+static u32 gen7_hwsched_lookup_key_value(struct adreno_device *adreno_dev,
+	u32 type, u32 key)
+{
+	struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+
+	if (!cmd->hdr)
+		return 0;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == type)
+			return adreno_hwsched_parse_payload(payload, key);
+
+		i += struct_size(payload, data, payload->dwords);
+	}
+
+	return 0;
+}
+
+static u32 get_payload_rb_key(struct adreno_device *adreno_dev,
+	u32 rb_id, u32 key)
+{
+	struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+
+	if (!cmd->hdr)
+		return 0;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == PAYLOAD_RB) {
+			u32 id = adreno_hwsched_parse_payload(payload, KEY_RB_ID);
+
+			if (id == rb_id)
+				return adreno_hwsched_parse_payload(payload, key);
+		}
+
+		i += struct_size(payload, data, payload->dwords);
+	}
+
+	return 0;
+}
+
+static bool log_gpu_fault(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct device *dev = &gmu->pdev->dev;
+	struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad;
+
+	/* Return false for non fatal errors */
+	if (adreno_hwsched_log_nonfatal_gpu_fault(adreno_dev, dev, cmd->error))
+		return false;
+
+	switch (cmd->error) {
+	case GMU_GPU_HW_HANG:
+		dev_crit_ratelimited(dev, "MISC: GPU hang detected\n");
+		break;
+	case GMU_GPU_SW_HANG:
+		dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %d\n",
+			cmd->gc.ctxt_id, cmd->gc.ts);
+		break;
+	case GMU_CP_OPCODE_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP opcode error interrupt | opcode=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+			KEY_CP_OPCODE_ERROR));
+		break;
+	case GMU_CP_PROTECTED_ERROR: {
+		u32 status = gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_PROTECTED_ERROR);
+
+		dev_crit_ratelimited(dev,
+			"CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n",
+			status & (1 << 20) ? "READ" : "WRITE",
+			status & 0x3FFFF, status);
+		}
+		break;
+	case GMU_CP_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "CP Illegal instruction error\n");
+		break;
+	case GMU_CP_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "CP ucode error interrupt\n");
+		break;
+	case GMU_CP_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP | Ringbuffer HW fault | status=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_HW_FAULT));
+		break;
+	case GMU_GPU_PREEMPT_TIMEOUT: {
+		u32 cur, next, cur_rptr, cur_wptr, next_rptr, next_wptr;
+
+		cur = gen7_hwsched_lookup_key_value(adreno_dev,
+			PAYLOAD_PREEMPT_TIMEOUT, KEY_PREEMPT_TIMEOUT_CUR_RB_ID);
+		next = gen7_hwsched_lookup_key_value(adreno_dev,
+			PAYLOAD_PREEMPT_TIMEOUT,
+			KEY_PREEMPT_TIMEOUT_NEXT_RB_ID);
+		cur_rptr = get_payload_rb_key(adreno_dev, cur, KEY_RB_RPTR);
+		cur_wptr = get_payload_rb_key(adreno_dev, cur, KEY_RB_WPTR);
+		next_rptr = get_payload_rb_key(adreno_dev, next, KEY_RB_RPTR);
+		next_wptr = get_payload_rb_key(adreno_dev, next, KEY_RB_WPTR);
+
+		dev_crit_ratelimited(dev,
+			"Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n",
+			cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr);
+		}
+		break;
+	case GMU_CP_GPC_ERROR:
+		dev_crit_ratelimited(dev, "RBBM: GPC error\n");
+		break;
+	case GMU_CP_BV_OPCODE_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP BV opcode error | opcode=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+			KEY_CP_BV_OPCODE_ERROR));
+		break;
+	case GMU_CP_BV_PROTECTED_ERROR: {
+		u32 status = gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_BV_PROTECTED_ERROR);
+
+		dev_crit_ratelimited(dev,
+			"CP BV | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n",
+			status & (1 << 20) ? "READ" : "WRITE",
+			status & 0x3FFFF, status);
+		}
+		break;
+	case GMU_CP_BV_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP BV | Ringbuffer HW fault | status=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_HW_FAULT));
+		break;
+	case GMU_CP_BV_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n");
+		break;
+	case GMU_CP_BV_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "CP BV ucode error interrupt\n");
+		break;
+	case GMU_CP_LPAC_OPCODE_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP LPAC opcode error | opcode=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+			KEY_CP_LPAC_OPCODE_ERROR));
+		break;
+	case GMU_CP_LPAC_PROTECTED_ERROR: {
+		u32 status = gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_LPAC_PROTECTED_ERROR);
+
+		dev_crit_ratelimited(dev,
+			"CP LPAC | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n",
+			status & (1 << 20) ? "READ" : "WRITE",
+			status & 0x3FFFF, status);
+		}
+		break;
+	case GMU_CP_LPAC_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP LPAC | Ringbuffer HW fault | status=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_LPAC_HW_FAULT));
+		break;
+	case GMU_CP_LPAC_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "CP LPAC Illegal instruction error\n");
+		break;
+	case GMU_CP_LPAC_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "CP LPAC ucode error interrupt\n");
+		break;
+	case GMU_GPU_LPAC_SW_HANG:
+		dev_crit_ratelimited(dev, "LPAC: gpu timeout ctx %d ts %d\n",
+			cmd->lpac.ctxt_id, cmd->lpac.ts);
+		break;
+	case GMU_GPU_SW_FUSE_VIOLATION:
+		dev_crit_ratelimited(dev, "RBBM: SW Feature Fuse violation status=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_SWFUSE_VIOLATION_FAULT));
+		break;
+	case GMU_GPU_AQE0_OPCODE_ERRROR:
+		dev_crit_ratelimited(dev, "AQE0 opcode error | opcode=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value(adreno_dev,
+				PAYLOAD_FAULT_REGS, KEY_AQE0_OPCODE_ERROR));
+		break;
+	case GMU_GPU_AQE0_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "AQE0 ucode error interrupt\n");
+		break;
+	case GMU_GPU_AQE0_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev, "AQE0 HW fault | status=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value(adreno_dev,
+				PAYLOAD_FAULT_REGS, KEY_AQE0_HW_FAULT));
+		break;
+	case GMU_GPU_AQE0_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "AQE0 Illegal instruction error\n");
+		break;
+	case GMU_GPU_AQE1_OPCODE_ERRROR:
+		dev_crit_ratelimited(dev, "AQE1 opcode error | opcode=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value(adreno_dev,
+				PAYLOAD_FAULT_REGS, KEY_AQE1_OPCODE_ERROR));
+		break;
+	case GMU_GPU_AQE1_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "AQE1 ucode error interrupt\n");
+		break;
+	case GMU_GPU_AQE1_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev, "AQE1 HW fault | status=0x%8.8x\n",
+			gen7_hwsched_lookup_key_value(adreno_dev,
+				PAYLOAD_FAULT_REGS, KEY_AQE1_HW_FAULT));
+		break;
+	case GMU_GPU_AQE1_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "AQE1 Illegal instruction error\n");
+		break;
+	case GMU_SYNCOBJ_TIMEOUT_ERROR:
+		dev_crit_ratelimited(dev, "syncobj timeout ctx %d ts %u\n",
+			cmd->gc.ctxt_id, cmd->gc.ts);
+		find_timeout_syncobj(adreno_dev, cmd->gc.ctxt_id, cmd->gc.ts);
+		break;
+	case GMU_CP_UNKNOWN_ERROR:
+		fallthrough;
+	default:
+		dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n",
+			cmd->error);
+		break;
+	}
+
+	/* Return true for fatal errors to perform recovery sequence */
+	return true;
+}
+
+static u32 peek_next_header(struct gen7_gmu_device *gmu, uint32_t queue_idx)
+{
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	u32 *queue;
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+		return 0;
+
+	if (hdr->read_index == hdr->write_index)
+		return 0;
+
+	queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx);
+
+	return queue[hdr->read_index];
+}
+
+static void process_ctx_bad(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	if (GMU_VER_MINOR(gmu->ver.hfi) < 2) {
+		log_gpu_fault_legacy(adreno_dev);
+		goto done;
+	}
+
+	/* Non fatal RBBM error interrupts don't go through reset and recovery */
+	if (!log_gpu_fault(adreno_dev)) {
+		memset(adreno_dev->hwsched.ctxt_bad, 0x0, HFI_MAX_MSG_SIZE);
+		return;
+	}
+
+done:
+	gen7_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT);
+}
+
+#define GET_QUERIED_FENCE_INDEX(x) (x / BITS_PER_SYNCOBJ_QUERY)
+#define GET_QUERIED_FENCE_BIT(x) (x % BITS_PER_SYNCOBJ_QUERY)
+
+static bool fence_is_queried(struct hfi_syncobj_query_cmd *cmd, u32 fence_index)
+{
+	u32 index = GET_QUERIED_FENCE_INDEX(fence_index);
+	u32 bit = GET_QUERIED_FENCE_BIT(fence_index);
+
+	return (cmd->queries[index].query_bitmask & BIT(bit));
+}
+
+static void set_fence_signal_bit(struct adreno_device *adreno_dev,
+	struct hfi_syncobj_query_cmd *reply, struct dma_fence *fence, u32 fence_index,
+	char *name)
+{
+	u32 index = GET_QUERIED_FENCE_INDEX(fence_index);
+	u32 bit = GET_QUERIED_FENCE_BIT(fence_index);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u64 flags = ADRENO_HW_FENCE_SW_STATUS_PENDING;
+	char value[32] = "unknown";
+
+	if (fence->ops->timeline_value_str)
+		fence->ops->timeline_value_str(fence, value, sizeof(value));
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+		dev_err(&gmu->pdev->dev,
+			"GMU is waiting for signaled fence(ctx:%llu seqno:%llu value:%s)\n",
+			fence->context, fence->seqno, value);
+		reply->queries[index].query_bitmask |= BIT(bit);
+		flags = ADRENO_HW_FENCE_SW_STATUS_SIGNALED;
+	}
+	trace_adreno_hw_fence_query(fence->context, fence->seqno, flags, name, value);
+}
+
+static void gen7_syncobj_query_reply(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj, struct hfi_syncobj_query_cmd *cmd)
+{
+	struct hfi_syncobj_query_cmd reply = {0};
+	int i, j, fence_index = 0;
+	struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	for (i = 0; i < syncobj->numsyncs; i++) {
+		struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i];
+		struct kgsl_sync_fence_cb *kcb = event->handle;
+		struct dma_fence **fences;
+		struct dma_fence_array *array;
+		struct event_fence_info *info = event->priv;
+		u32 num_fences;
+
+		array = to_dma_fence_array(kcb->fence);
+		if (array != NULL) {
+			num_fences = array->num_fences;
+			fences = array->fences;
+		} else {
+			num_fences = 1;
+			fences = &kcb->fence;
+		}
+
+		for (j = 0; j < num_fences; j++, fence_index++) {
+			if (!fence_is_queried(cmd, fence_index))
+				continue;
+
+			set_fence_signal_bit(adreno_dev, &reply, fences[j], fence_index,
+				info ? info->fences[j].name : "unknown");
+		}
+	}
+
+	reply.hdr = CREATE_MSG_HDR(F2H_MSG_SYNCOBJ_QUERY, HFI_MSG_CMD);
+	reply.gmu_ctxt_id = cmd->gmu_ctxt_id;
+	reply.sync_obj_ts = cmd->sync_obj_ts;
+
+	trace_adreno_syncobj_query_reply(reply.gmu_ctxt_id, reply.sync_obj_ts,
+		gpudev->read_alwayson(adreno_dev));
+
+	gen7_hfi_send_cmd_async(adreno_dev, &reply, sizeof(reply));
+}
+
+struct syncobj_query_work {
+	/** @cmd: The query command to be processed */
+	struct hfi_syncobj_query_cmd cmd;
+	/** @context: kgsl context that is waiting for this sync object */
+	struct kgsl_context *context;
+	/** @work: The work structure to execute syncobj query reply */
+	struct kthread_work work;
+};
+
+static void gen7_process_syncobj_query_work(struct kthread_work *work)
+{
+	struct syncobj_query_work *query_work = container_of(work,
+						struct syncobj_query_work, work);
+	struct hfi_syncobj_query_cmd *cmd = (struct hfi_syncobj_query_cmd *)&query_work->cmd;
+	struct kgsl_context *context = query_work->context;
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct cmd_list_obj *obj;
+	bool missing = true;
+
+	mutex_lock(&hwsched->mutex);
+	mutex_lock(&device->mutex);
+
+	list_for_each_entry(obj, &hwsched->cmd_list, node) {
+		struct kgsl_drawobj *drawobj = obj->drawobj;
+
+		if ((drawobj->type & SYNCOBJ_TYPE) == 0)
+			continue;
+
+		if ((drawobj->context->id == cmd->gmu_ctxt_id) &&
+			(drawobj->timestamp == cmd->sync_obj_ts)) {
+			gen7_syncobj_query_reply(adreno_dev, drawobj, cmd);
+			missing = false;
+			break;
+		}
+	}
+
+	if (missing) {
+		struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+		struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+		struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr;
+
+		/*
+		 * If the sync object is not found, it can only mean that the sync object was
+		 * retired by the GMU in the meanwhile. However, if that is not the case, then
+		 * we have a problem.
+		 */
+		if (timestamp_cmp(cmd->sync_obj_ts, hdr->sync_obj_ts) > 0) {
+			dev_err(&gmu->pdev->dev, "Missing sync object ctx:%d ts:%d retired:%d\n",
+				context->id, cmd->sync_obj_ts, hdr->sync_obj_ts);
+			gmu_core_fault_snapshot(device);
+			gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+		}
+	}
+
+	mutex_unlock(&device->mutex);
+	mutex_unlock(&hwsched->mutex);
+
+	kgsl_context_put(context);
+	kfree(query_work);
+}
+
+static void gen7_trigger_syncobj_query(struct adreno_device *adreno_dev,
+	u32 *rcvd)
+{
+	struct syncobj_query_work *query_work;
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct hfi_syncobj_query_cmd *cmd = (struct hfi_syncobj_query_cmd *)rcvd;
+	struct kgsl_context *context = NULL;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ret;
+
+	trace_adreno_syncobj_query(cmd->gmu_ctxt_id, cmd->sync_obj_ts,
+		gpudev->read_alwayson(adreno_dev));
+
+	/*
+	 * We need the context even if it is detached. Hence, we can't use kgsl_context_get here.
+	 * We must make sure that this context id doesn't get destroyed (to avoid re-use) until GMU
+	 * has ack'd the query reply.
+	 */
+	read_lock(&device->context_lock);
+	context = idr_find(&device->context_idr, cmd->gmu_ctxt_id);
+	ret = _kgsl_context_get(context);
+	read_unlock(&device->context_lock);
+
+	if (!ret)
+		return;
+
+	query_work = kzalloc(sizeof(*query_work), GFP_KERNEL);
+	if (!query_work) {
+		kgsl_context_put(context);
+		return;
+	}
+
+	kthread_init_work(&query_work->work, gen7_process_syncobj_query_work);
+	memcpy(&query_work->cmd, cmd, sizeof(*cmd));
+	query_work->context = context;
+
+	kthread_queue_work(hwsched->worker, &query_work->work);
+}
+
+/*
+ * This defines the maximum unack'd hardware fences that we allow. When this limit is reached, we
+ * will put all threads (that want to create a hardware fence) to sleep until the maximum unack'd
+ * hardware fence count drops to MIN_HW_FENCE_UNACK_COUNT
+ */
+#define MAX_HW_FENCE_UNACK_COUNT 20
+
+/*
+ * Once the maximum unack'd hardware fences drops to this value, wake up all the threads (that want
+ * to create hardware fences)
+ */
+#define MIN_HW_FENCE_UNACK_COUNT 10
+
+/*
+ * This is the maximum duration (in milliseconds) a thread (that wants to create a hardware fence)
+ * is put to sleep while we wait for the maximum number of unack'd hardware fences to drop from
+ * MAX_HW_FENCE_UNACK_COUNT to MIN_HW_FENCE_UNACK_COUNT. If the count doesn't drop to the desired
+ * value, then log an error and trigger snapshot and recovery.
+ */
+#define HW_FENCE_SLEEP_MS 200
+
+static void _enable_hw_fence_throttle(struct adreno_device *adreno_dev)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+
+	set_bit(GEN7_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags);
+	set_bit(GEN7_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags);
+
+	/* Avoid submitting new work to gpu until the unack count drops to a desired threshold */
+	adreno_get_gpu_halt(adreno_dev);
+
+	mod_timer(&hfi->hw_fence_timer, jiffies + msecs_to_jiffies(HW_FENCE_SLEEP_MS));
+}
+
+static void _increment_hw_fence_unack_count(struct adreno_device *adreno_dev)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+
+	if ((++hfi->hw_fence.unack_count) == MAX_HW_FENCE_UNACK_COUNT)
+		_enable_hw_fence_throttle(adreno_dev);
+}
+
+/**
+ * _send_hw_fence_no_ack - Send a hardware fence hfi packet to GMU without waiting for its ack.
+ * Increment the unack count on success
+ *
+ * Return: 0 on success or negative error on failure
+ */
+static int _send_hw_fence_no_ack(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	u32 seqnum;
+	int ret;
+
+
+	seqnum = atomic_inc_return(&hfi->hw_fence.seqnum);
+	entry->cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(entry->cmd.hdr, seqnum, sizeof(entry->cmd) >> 2);
+
+	ret = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&entry->cmd, sizeof(entry->cmd));
+	if (!ret)
+		_increment_hw_fence_unack_count(adreno_dev);
+
+	return ret;
+}
+
+static struct adreno_hw_fence_entry *_get_deferred_hw_fence(struct adreno_context *drawctxt, u32 ts)
+{
+	struct adreno_hw_fence_entry *entry = NULL, *next, *deferred_hw_fence_entry = NULL;
+
+	spin_lock(&drawctxt->lock);
+	list_for_each_entry_safe(entry, next, &drawctxt->hw_fence_list, node) {
+
+		if (timestamp_cmp((u32)entry->cmd.ts, ts) > 0)
+			break;
+
+		/* We found a deferred hardware fence */
+		deferred_hw_fence_entry = entry;
+		break;
+	}
+	spin_unlock(&drawctxt->lock);
+
+	/*
+	 * This path executes in isolation from any paths that may release this entry. So, it is
+	 * safe to handle this entry outside of the drawctxt spinlock
+	 */
+	return deferred_hw_fence_entry;
+}
+
+static int _send_deferred_hw_fence(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, struct adreno_hw_fence_entry *entry, u32 ts)
+{
+	bool retired = kgsl_check_timestamp(KGSL_DEVICE(adreno_dev), &drawctxt->base, ts) ||
+				kgsl_context_is_bad(&drawctxt->base);
+	int ret = 0;
+	u32 flags = 0;
+
+	if (retired)
+		flags |= HW_FENCE_FLAG_SKIP_MEMSTORE;
+
+	ret = gen7_send_hw_fence_hfi_wait_ack(adreno_dev, entry, flags);
+	if (ret)
+		return ret;
+
+	spin_lock(&drawctxt->lock);
+	if (!retired)
+		list_move_tail(&entry->node, &drawctxt->hw_fence_inflight_list);
+	else
+		gen7_remove_hw_fence_entry(adreno_dev, entry);
+	spin_unlock(&drawctxt->lock);
+
+	return 0;
+}
+
+/**
+ * process_hw_fence_deferred_ctxt - This function sends hardware fences to GMU (from the
+ * deferred drawctxt) which couldn't be sent earlier
+ */
+static int process_hw_fence_deferred_ctxt(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, u32 ts)
+{
+	struct adreno_hw_fence_entry *deferred_hw_fence_entry = NULL;
+	int ret = 0;
+
+	do {
+		deferred_hw_fence_entry = _get_deferred_hw_fence(drawctxt, ts);
+
+		if (!deferred_hw_fence_entry)
+			break;
+
+		ret = _send_deferred_hw_fence(adreno_dev, drawctxt, deferred_hw_fence_entry, ts);
+		if (ret)
+			break;
+
+	} while (deferred_hw_fence_entry != NULL);
+
+	return ret;
+}
+
+static void _disable_hw_fence_throttle(struct adreno_device *adreno_dev, bool clear_abort_bit)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	bool max;
+
+	spin_lock(&hfi->hw_fence.lock);
+
+	hfi->hw_fence.defer_drawctxt = NULL;
+	hfi->hw_fence.defer_ts = 0;
+	max = test_bit(GEN7_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags);
+	if (max) {
+		clear_bit(GEN7_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags);
+		clear_bit(GEN7_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags);
+	}
+
+	if (clear_abort_bit)
+		clear_bit(GEN7_HWSCHED_HW_FENCE_ABORT_BIT, &hfi->hw_fence.flags);
+	spin_unlock(&hfi->hw_fence.lock);
+
+	/* Wake up dispatcher and any sleeping threads that want to create hardware fences */
+	if (max) {
+		adreno_put_gpu_halt(adreno_dev);
+		adreno_hwsched_trigger(adreno_dev);
+		wake_up_all(&hfi->hw_fence.unack_wq);
+	}
+}
+
+static void gen7_defer_hw_fence_work(struct kthread_work *work)
+{
+	struct gen7_hwsched_hfi *hfi = container_of(work,
+						struct gen7_hwsched_hfi, defer_hw_fence_work);
+	struct adreno_context *drawctxt = NULL;
+	struct kgsl_device *device;
+	struct adreno_device *adreno_dev;
+	u32 ts;
+	int ret;
+
+	spin_lock(&hfi->hw_fence.lock);
+	drawctxt = hfi->hw_fence.defer_drawctxt;
+	ts = hfi->hw_fence.defer_ts;
+	spin_unlock(&hfi->hw_fence.lock);
+
+	device = drawctxt->base.device;
+	adreno_dev = ADRENO_DEVICE(device);
+
+	/*
+	 * Grab the dispatcher and device mutex as we don't want to race with concurrent fault
+	 * recovery
+	 */
+	mutex_lock(&adreno_dev->hwsched.mutex);
+	mutex_lock(&device->mutex);
+
+	ret = process_hw_fence_deferred_ctxt(adreno_dev, drawctxt, ts);
+	if (ret) {
+		/* the deferred drawctxt will be handled post fault recovery */
+		gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+		goto unlock;
+	}
+
+	/*
+	 * Put back the context reference which was incremented when hw_fence.defer_drawctxt was set
+	 */
+	kgsl_context_put(&drawctxt->base);
+
+	gen7_hwsched_active_count_put(adreno_dev);
+
+	_disable_hw_fence_throttle(adreno_dev, false);
+
+unlock:
+	mutex_unlock(&device->mutex);
+	mutex_unlock(&adreno_dev->hwsched.mutex);
+}
+
+static void process_hw_fence_ack(struct adreno_device *adreno_dev, u32 received_hdr)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct adreno_context *drawctxt = NULL;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	spin_lock(&hfi->hw_fence.lock);
+
+	/* If this ack is being waited on, we don't need to touch the unack count */
+	if (hw_fence_ack.sent_hdr && CMP_HFI_ACK_HDR(hw_fence_ack.sent_hdr, received_hdr)) {
+		spin_unlock(&hfi->hw_fence.lock);
+		complete(&hw_fence_ack.complete);
+		return;
+	}
+
+	hfi->hw_fence.unack_count--;
+
+	/* The unack count should never be greater than MAX_HW_FENCE_UNACK_COUNT */
+	if (hfi->hw_fence.unack_count > MAX_HW_FENCE_UNACK_COUNT)
+		dev_err(&gmu->pdev->dev, "unexpected hardware fence unack count:%d\n",
+			hfi->hw_fence.unack_count);
+
+	if (!test_bit(GEN7_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags) ||
+		(hfi->hw_fence.unack_count != MIN_HW_FENCE_UNACK_COUNT)) {
+		spin_unlock(&hfi->hw_fence.lock);
+		return;
+	}
+
+	drawctxt = hfi->hw_fence.defer_drawctxt;
+
+	spin_unlock(&hfi->hw_fence.lock);
+
+	del_timer_sync(&hfi->hw_fence_timer);
+
+	/*
+	 * We need to handle the deferred context in another thread so that we can unblock the f2h
+	 * daemon here as it will need to process the acks for the hardware fences belonging to the
+	 * deferred context
+	 */
+	if (drawctxt) {
+		kthread_init_work(&hfi->defer_hw_fence_work, gen7_defer_hw_fence_work);
+		kthread_queue_work(adreno_dev->hwsched.worker, &hfi->defer_hw_fence_work);
+		return;
+	}
+
+	_disable_hw_fence_throttle(adreno_dev, false);
+}
+
+void gen7_hwsched_process_msgq(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev);
+	u32 rcvd[MAX_RCVD_SIZE], next_hdr, type;
+
+	mutex_lock(&hw_hfi->msgq_mutex);
+
+	for (;;) {
+		next_hdr = peek_next_header(gmu, HFI_MSG_ID);
+
+		if (!next_hdr)
+			break;
+
+		if (MSG_HDR_GET_TYPE(next_hdr) == HFI_MSG_ACK)
+			type = HFI_MSG_ACK;
+		else
+			type = MSG_HDR_GET_ID(next_hdr);
+
+		if (type != F2H_MSG_CONTEXT_BAD)
+			gen7_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd));
+
+		switch (type) {
+		case HFI_MSG_ACK:
+			/*
+			 * We are assuming that there is only one outstanding ack because hfi
+			 * sending thread waits for completion while holding the device mutex
+			 * (except when we send H2F_MSG_HW_FENCE_INFO packets)
+			 */
+			if (MSG_HDR_GET_ID(rcvd[1]) == H2F_MSG_HW_FENCE_INFO)
+				process_hw_fence_ack(adreno_dev, rcvd[1]);
+			else
+				gen7_receive_ack_async(adreno_dev, rcvd);
+			break;
+		case F2H_MSG_CONTEXT_BAD:
+			gen7_hfi_queue_read(gmu, HFI_MSG_ID, (u32 *)adreno_dev->hwsched.ctxt_bad,
+						HFI_MAX_MSG_SIZE);
+			process_ctx_bad(adreno_dev);
+			break;
+		case F2H_MSG_TS_RETIRE:
+			log_profiling_info(adreno_dev, rcvd);
+			adreno_hwsched_trigger(adreno_dev);
+			break;
+		case F2H_MSG_SYNCOBJ_QUERY:
+			gen7_trigger_syncobj_query(adreno_dev, rcvd);
+			break;
+		case F2H_MSG_GMU_CNTR_RELEASE: {
+			struct hfi_gmu_cntr_release_cmd *cmd =
+				(struct hfi_gmu_cntr_release_cmd *) rcvd;
+
+			adreno_perfcounter_put(adreno_dev,
+				cmd->group_id, cmd->countable, PERFCOUNTER_FLAG_KERNEL);
+			}
+			break;
+		}
+	}
+	mutex_unlock(&hw_hfi->msgq_mutex);
+}
+
+static void process_log_block(struct adreno_device *adreno_dev, void *data)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct hfi_log_block *cmd = data;
+	u32 *log_event = gmu->gmu_log->hostptr;
+	u32 start, end;
+
+	start = cmd->start_index;
+	end = cmd->stop_index;
+
+	log_event += start * 4;
+	while (start != end) {
+		trace_gmu_event(log_event);
+		log_event += 4;
+		start++;
+	}
+}
+
+static void gen7_hwsched_process_dbgq(struct adreno_device *adreno_dev, bool limited)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u32 rcvd[MAX_RCVD_SIZE];
+	bool recovery = false;
+
+	while (gen7_hfi_queue_read(gmu, HFI_DBG_ID, rcvd, sizeof(rcvd)) > 0) {
+
+		if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_ERR) {
+			adreno_gen7_receive_err_req(gmu, rcvd);
+			recovery = true;
+			break;
+		}
+
+		if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_DEBUG)
+			adreno_gen7_receive_debug_req(gmu, rcvd);
+
+		if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_LOG_BLOCK)
+			process_log_block(adreno_dev, rcvd);
+
+		/* Process one debug queue message and return to not delay msgq processing */
+		if (limited)
+			break;
+	}
+
+	if (!recovery)
+		return;
+
+	gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+}
+
+/* HFI interrupt handler */
+static irqreturn_t gen7_hwsched_hfi_handler(int irq, void *data)
+{
+	struct adreno_device *adreno_dev = data;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status = 0;
+
+	/*
+	 * GEN7_GMU_GMU2HOST_INTR_INFO may have bits set not specified in hfi->irq_mask.
+	 * Read and clear only those irq bits that we are processing here.
+	 */
+	gmu_core_regread(device, GEN7_GMU_GMU2HOST_INTR_INFO, &status);
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, status & hfi->irq_mask);
+
+	/*
+	 * If interrupts are not enabled on the HFI message queue,
+	 * the inline message processing loop will process it,
+	 * else, process it here.
+	 */
+	if (!(hfi->irq_mask & HFI_IRQ_MSGQ_MASK))
+		status &= ~HFI_IRQ_MSGQ_MASK;
+
+	if (status & (HFI_IRQ_MSGQ_MASK | HFI_IRQ_DBGQ_MASK)) {
+		wake_up_interruptible(&hfi->f2h_wq);
+		adreno_hwsched_trigger(adreno_dev);
+	}
+	if (status & HFI_IRQ_CM3_FAULT_MASK) {
+		atomic_set(&gmu->cm3_fault, 1);
+
+		/* make sure other CPUs see the update */
+		smp_wmb();
+
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"GMU CM3 fault interrupt received\n");
+
+		gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+	}
+
+	/* Ignore OOB bits */
+	status &= GENMASK(31 - (oob_max - 1), 0);
+
+	if (status & ~hfi->irq_mask)
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"Unhandled HFI interrupts 0x%x\n",
+			status & ~hfi->irq_mask);
+
+	return IRQ_HANDLED;
+}
+
+#define HFI_IRQ_MSGQ_MASK BIT(0)
+
+static int check_ack_failure(struct adreno_device *adreno_dev,
+	struct pending_cmd *ack)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	if (ack->results[2] != 0xffffffff)
+		return 0;
+
+	dev_err(&gmu->pdev->dev,
+		"ACK error: sender id %d seqnum %d\n",
+		MSG_HDR_GET_ID(ack->sent_hdr),
+		MSG_HDR_GET_SEQNUM(ack->sent_hdr));
+
+	return -EINVAL;
+}
+
+int gen7_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 size_bytes)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	u32 *cmd = data;
+	u32 seqnum;
+	int rc;
+	struct pending_cmd pending_ack;
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	*cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2);
+
+	add_waiter(hfi, *cmd, &pending_ack);
+
+	rc = gen7_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+	if (rc)
+		goto done;
+
+	rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack,
+		gen7_hwsched_process_msgq);
+	if (rc)
+		goto done;
+
+	rc = check_ack_failure(adreno_dev, &pending_ack);
+
+done:
+	del_waiter(hfi, &pending_ack);
+
+	return rc;
+}
+
+static void init_queues(struct gen7_hfi *hfi)
+{
+	u32 gmuaddr = hfi->hfi_mem->gmuaddr;
+	struct hfi_queue_table hfi_table = {
+		.qtbl_hdr = {
+			.version = 0,
+			.size = sizeof(struct hfi_queue_table) >> 2,
+			.qhdr0_offset =
+				sizeof(struct hfi_queue_table_header) >> 2,
+			.qhdr_size = sizeof(struct hfi_queue_header) >> 2,
+			.num_q = HFI_QUEUE_MAX,
+			.num_active_q = HFI_QUEUE_MAX,
+		},
+		.qhdr = {
+			DEFINE_QHDR(gmuaddr, HFI_CMD_ID, 0),
+			DEFINE_QHDR(gmuaddr, HFI_MSG_ID, 0),
+			DEFINE_QHDR(gmuaddr, HFI_DBG_ID, 0),
+			/* 4 DQs for RB priority 0 */
+			DEFINE_QHDR(gmuaddr, 3, 0),
+			DEFINE_QHDR(gmuaddr, 4, 0),
+			DEFINE_QHDR(gmuaddr, 5, 0),
+			DEFINE_QHDR(gmuaddr, 6, 0),
+			/* 4 DQs for RB priority 1 */
+			DEFINE_QHDR(gmuaddr, 7, 1),
+			DEFINE_QHDR(gmuaddr, 8, 1),
+			DEFINE_QHDR(gmuaddr, 9, 1),
+			DEFINE_QHDR(gmuaddr, 10, 1),
+			/* 3 DQs for RB priority 2 */
+			DEFINE_QHDR(gmuaddr, 11, 2),
+			DEFINE_QHDR(gmuaddr, 12, 2),
+			DEFINE_QHDR(gmuaddr, 13, 2),
+			/* 2 DQs for RB priority 3 */
+			DEFINE_QHDR(gmuaddr, 14, 3),
+			DEFINE_QHDR(gmuaddr, 15, 3),
+			/* 1 DQ for LPAC RB priority 4 */
+			DEFINE_QHDR(gmuaddr, 16, 4),
+		},
+	};
+
+	memcpy(hfi->hfi_mem->hostptr, &hfi_table, sizeof(hfi_table));
+}
+
+/* Total header sizes + queue sizes + 16 for alignment */
+#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \
+	(SZ_4K * HFI_QUEUE_MAX))
+
+static int hfi_f2h_main(void *arg);
+
+int gen7_hwsched_hfi_init(struct adreno_device *adreno_dev)
+{
+	struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev);
+
+	if (IS_ERR_OR_NULL(hw_hfi->big_ib)) {
+		hw_hfi->big_ib = gen7_reserve_gmu_kernel_block(
+				to_gen7_gmu(adreno_dev), 0,
+				HWSCHED_MAX_IBS * sizeof(struct hfi_issue_ib),
+				GMU_NONCACHED_KERNEL, 0);
+		if (IS_ERR(hw_hfi->big_ib))
+			return PTR_ERR(hw_hfi->big_ib);
+	}
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LSR) &&
+			IS_ERR_OR_NULL(hw_hfi->big_ib_recurring)) {
+		hw_hfi->big_ib_recurring = gen7_reserve_gmu_kernel_block(
+				to_gen7_gmu(adreno_dev), 0,
+				HWSCHED_MAX_IBS * sizeof(struct hfi_issue_ib),
+				GMU_NONCACHED_KERNEL, 0);
+		if (IS_ERR(hw_hfi->big_ib_recurring))
+			return PTR_ERR(hw_hfi->big_ib_recurring);
+	}
+
+	if (IS_ERR_OR_NULL(hfi->hfi_mem)) {
+		hfi->hfi_mem = gen7_reserve_gmu_kernel_block(
+				to_gen7_gmu(adreno_dev),
+				0, HFIMEM_SIZE, GMU_NONCACHED_KERNEL, 0);
+		if (IS_ERR(hfi->hfi_mem))
+			return PTR_ERR(hfi->hfi_mem);
+		init_queues(hfi);
+	}
+
+	if (IS_ERR_OR_NULL(hw_hfi->f2h_task)) {
+		hw_hfi->f2h_task = kthread_run(hfi_f2h_main, adreno_dev, "gmu_f2h");
+		if (!IS_ERR(hw_hfi->f2h_task))
+			sched_set_fifo(hw_hfi->f2h_task);
+	}
+
+	return PTR_ERR_OR_ZERO(hw_hfi->f2h_task);
+}
+
+static int get_attrs(u32 flags)
+{
+	int attrs = IOMMU_READ;
+
+	if (flags & HFI_MEMFLAG_GMU_PRIV)
+		attrs |= IOMMU_PRIV;
+
+	if (flags & HFI_MEMFLAG_GMU_WRITEABLE)
+		attrs |= IOMMU_WRITE;
+
+	return attrs;
+}
+
+static int gmu_import_buffer(struct adreno_device *adreno_dev,
+	struct hfi_mem_alloc_entry *entry)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct hfi_mem_alloc_desc *desc = &entry->desc;
+	u32 vma_id = (desc->flags & HFI_MEMFLAG_GMU_CACHEABLE) ? GMU_CACHE : GMU_NONCACHED_KERNEL;
+
+	return gen7_gmu_import_buffer(gmu, vma_id, entry->md, get_attrs(desc->flags), desc->align);
+}
+
+static struct hfi_mem_alloc_entry *lookup_mem_alloc_table(
+	struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc)
+{
+	struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev);
+	int i;
+
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i];
+
+		if ((entry->desc.mem_kind == desc->mem_kind) &&
+			(entry->desc.gmu_mem_handle == desc->gmu_mem_handle))
+			return entry;
+	}
+
+	return NULL;
+}
+
+static struct hfi_mem_alloc_entry *get_mem_alloc_entry(
+	struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct hfi_mem_alloc_entry *entry =
+		lookup_mem_alloc_table(adreno_dev, desc);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u64 flags = 0;
+	u32 priv = 0;
+	int ret;
+	const char *memkind_string = desc->mem_kind < HFI_MEMKIND_MAX ?
+			hfi_memkind_strings[desc->mem_kind] : "UNKNOWN";
+
+	if (entry)
+		return entry;
+
+	if (desc->mem_kind >= HFI_MEMKIND_MAX) {
+		dev_err(&gmu->pdev->dev, "Invalid mem kind: %d\n",
+			desc->mem_kind);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (hfi->mem_alloc_entries == ARRAY_SIZE(hfi->mem_alloc_table)) {
+		dev_err(&gmu->pdev->dev,
+			"Reached max mem alloc entries\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	entry = &hfi->mem_alloc_table[hfi->mem_alloc_entries];
+
+	memcpy(&entry->desc, desc, sizeof(*desc));
+
+	entry->desc.host_mem_handle = desc->gmu_mem_handle;
+
+	if (desc->flags & HFI_MEMFLAG_GFX_PRIV)
+		priv |= KGSL_MEMDESC_PRIVILEGED;
+
+	if (!(desc->flags & HFI_MEMFLAG_GFX_WRITEABLE))
+		flags |= KGSL_MEMFLAGS_GPUREADONLY;
+
+	if (desc->flags & HFI_MEMFLAG_GFX_SECURE)
+		flags |= KGSL_MEMFLAGS_SECURE;
+
+	if (!(desc->flags & HFI_MEMFLAG_GFX_ACC) &&
+		(desc->mem_kind != HFI_MEMKIND_HW_FENCE)) {
+		if (desc->mem_kind == HFI_MEMKIND_MMIO_IPC_CORE)
+			entry->md = gen7_reserve_gmu_kernel_block_fixed(gmu, 0,
+					desc->size,
+					(desc->flags & HFI_MEMFLAG_GMU_CACHEABLE) ?
+					GMU_CACHE : GMU_NONCACHED_KERNEL,
+					"qcom,ipc-core", get_attrs(desc->flags),
+					desc->align);
+		else
+			entry->md = gen7_reserve_gmu_kernel_block(gmu, 0,
+					desc->size,
+					(desc->flags & HFI_MEMFLAG_GMU_CACHEABLE) ?
+					GMU_CACHE : GMU_NONCACHED_KERNEL,
+					desc->align);
+
+		if (IS_ERR(entry->md)) {
+			int ret = PTR_ERR(entry->md);
+
+			memset(entry, 0, sizeof(*entry));
+			return ERR_PTR(ret);
+		}
+		entry->desc.size = entry->md->size;
+		entry->desc.gmu_addr = entry->md->gmuaddr;
+
+		goto done;
+	}
+
+	/*
+	 * Use pre-allocated memory descriptors to map the HFI_MEMKIND_HW_FENCE and
+	 * HFI_MEMKIND_MEMSTORE
+	 */
+	switch (desc->mem_kind) {
+	case HFI_MEMKIND_HW_FENCE:
+		entry->md = &adreno_dev->hwsched.hw_fence.memdesc;
+		break;
+	case HFI_MEMKIND_MEMSTORE:
+		entry->md = device->memstore;
+		break;
+	default:
+		entry->md = kgsl_allocate_global(device, desc->size, 0, flags,
+			priv, memkind_string);
+		break;
+	}
+	if (IS_ERR(entry->md)) {
+		int ret = PTR_ERR(entry->md);
+
+		memset(entry, 0, sizeof(*entry));
+		return ERR_PTR(ret);
+	}
+
+	entry->desc.size = entry->md->size;
+	entry->desc.gpu_addr = entry->md->gpuaddr;
+
+	if (!(desc->flags & HFI_MEMFLAG_GMU_ACC))
+		goto done;
+
+	 /*
+	  * If gmu mapping fails, then we have to live with
+	  * leaking the gpu global buffer allocated above.
+	  */
+	ret = gmu_import_buffer(adreno_dev, entry);
+	if (ret) {
+		dev_err(&gmu->pdev->dev,
+			"gpuaddr: 0x%llx size: %lld bytes lost\n",
+			entry->md->gpuaddr, entry->md->size);
+		memset(entry, 0, sizeof(*entry));
+		return ERR_PTR(ret);
+	}
+
+	entry->desc.gmu_addr = entry->md->gmuaddr;
+done:
+	hfi->mem_alloc_entries++;
+
+	return entry;
+}
+
+static int process_mem_alloc(struct adreno_device *adreno_dev,
+	struct hfi_mem_alloc_desc *mad)
+{
+	struct hfi_mem_alloc_entry *entry;
+
+	entry = get_mem_alloc_entry(adreno_dev, mad);
+	if (IS_ERR(entry))
+		return PTR_ERR(entry);
+
+	if (entry->md) {
+		mad->gpu_addr = entry->md->gpuaddr;
+		mad->gmu_addr = entry->md->gmuaddr;
+	}
+
+	/*
+	 * GMU uses the host_mem_handle to check if this memalloc was
+	 * successful
+	 */
+	mad->host_mem_handle = mad->gmu_mem_handle;
+
+	return 0;
+}
+
+static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd)
+{
+	struct hfi_mem_alloc_desc desc = {0};
+	struct hfi_mem_alloc_reply_cmd out = {0};
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u32 seqnum;
+	int ret;
+
+	hfi_get_mem_alloc_desc(rcvd, &desc);
+
+	ret = process_mem_alloc(adreno_dev, &desc);
+	if (ret)
+		return ret;
+
+	memcpy(&out.desc, &desc, sizeof(out.desc));
+
+	out.hdr = ACK_MSG_HDR(F2H_MSG_MEM_ALLOC);
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2);
+
+	out.req_hdr = *(u32 *)rcvd;
+
+	return gen7_hfi_cmdq_write(adreno_dev, (u32 *)&out, sizeof(out));
+}
+
+static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd)
+{
+	struct hfi_gmu_cntr_register_cmd *in = (struct hfi_gmu_cntr_register_cmd *)rcvd;
+	struct hfi_gmu_cntr_register_reply_cmd out = {0};
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u32 lo = 0, hi = 0, seqnum;
+
+	/*
+	 * Failure to allocate counter is not fatal. Sending lo = 0, hi = 0
+	 * indicates to GMU that counter allocation failed.
+	 */
+	adreno_perfcounter_get(adreno_dev,
+		in->group_id, in->countable, &lo, &hi, PERFCOUNTER_FLAG_KERNEL);
+
+	out.hdr = ACK_MSG_HDR(F2H_MSG_GMU_CNTR_REGISTER);
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2);
+	out.req_hdr = in->hdr;
+	out.group_id = in->group_id;
+	out.countable = in->countable;
+	/* Fill in byte offset of counter */
+	out.cntr_lo = lo << 2;
+	out.cntr_hi = hi << 2;
+
+	return gen7_hfi_cmdq_write(adreno_dev, (u32 *)&out, sizeof(out));
+}
+
+static int send_warmboot_start_msg(struct adreno_device *adreno_dev)
+{
+	int ret = 0;
+	struct hfi_start_cmd cmd;
+
+	if (!adreno_dev->warmboot_enabled)
+		return ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_START);
+	if (ret)
+		return ret;
+
+	cmd.hdr = RECORD_NOP_MSG_HDR(cmd.hdr);
+
+	return gen7_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int send_start_msg(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 seqnum;
+	int ret, rc = 0;
+	struct hfi_start_cmd cmd;
+	u32 rcvd[MAX_RCVD_SIZE];
+	struct pending_cmd pending_ack = {0};
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_START);
+	if (ret)
+		return ret;
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2);
+
+	pending_ack.sent_hdr = cmd.hdr;
+
+	rc = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd));
+	if (rc)
+		return rc;
+
+poll:
+	rc = gmu_core_timed_poll_check(device, GEN7_GMU_GMU2HOST_INTR_INFO,
+		HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK);
+
+	if (rc) {
+		dev_err(&gmu->pdev->dev,
+			"Timed out processing MSG_START seqnum: %d\n",
+			seqnum);
+		gmu_core_fault_snapshot(device);
+		return rc;
+	}
+
+	/* Clear the interrupt */
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR,
+		HFI_IRQ_MSGQ_MASK);
+
+	if (gen7_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) {
+		dev_err(&gmu->pdev->dev, "MSG_START: no payload\n");
+		gmu_core_fault_snapshot(device);
+		return -EINVAL;
+	}
+
+	if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) {
+		rc = gen7_receive_ack_cmd(gmu, rcvd, &pending_ack);
+		if (rc)
+			return rc;
+
+		return check_ack_failure(adreno_dev, &pending_ack);
+	}
+
+	if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_MEM_ALLOC) {
+		rc = mem_alloc_reply(adreno_dev, rcvd);
+		if (rc)
+			return rc;
+
+		goto poll;
+	}
+
+	if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_GMU_CNTR_REGISTER) {
+		rc = gmu_cntr_register_reply(adreno_dev, rcvd);
+		if (rc)
+			return rc;
+		goto poll;
+	}
+
+	dev_err(&gmu->pdev->dev,
+		"MSG_START: unexpected response id:%d, type:%d\n",
+		MSG_HDR_GET_ID(rcvd[0]),
+		MSG_HDR_GET_TYPE(rcvd[0]));
+
+	gmu_core_fault_snapshot(device);
+
+	return rc;
+}
+
+static void reset_hfi_mem_records(struct adreno_device *adreno_dev)
+{
+	struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct kgsl_memdesc *md = NULL;
+	u32 i;
+
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct hfi_mem_alloc_desc *desc = &hw_hfi->mem_alloc_table[i].desc;
+
+		if (desc->flags & HFI_MEMFLAG_HOST_INIT) {
+			md = hw_hfi->mem_alloc_table[i].md;
+			memset(md->hostptr, 0x0, md->size);
+		}
+	}
+}
+
+static void reset_hfi_queues(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr;
+	u32 i;
+
+	/* Flush HFI queues */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		struct hfi_queue_header *hdr = &tbl->qhdr[i];
+
+		if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+			continue;
+
+		hdr->read_index = hdr->write_index;
+	}
+}
+
+void gen7_hwsched_hfi_stop(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+
+	hfi->irq_mask &= ~HFI_IRQ_MSGQ_MASK;
+
+	/*
+	 * In some corner cases, it is possible that GMU put TS_RETIRE
+	 * on the msgq after we have turned off gmu interrupts. Hence,
+	 * drain the queue one last time before we reset HFI queues.
+	 */
+	gen7_hwsched_process_msgq(adreno_dev);
+
+	/* Drain the debug queue before we reset HFI queues */
+	gen7_hwsched_process_dbgq(adreno_dev, false);
+
+	kgsl_pwrctrl_axi(KGSL_DEVICE(adreno_dev), false);
+
+	clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	/*
+	 * Reset the hfi host access memory records, As GMU expects hfi memory
+	 * records to be clear in bootup.
+	 */
+	reset_hfi_mem_records(adreno_dev);
+}
+
+static void gen7_hwsched_enable_async_hfi(struct adreno_device *adreno_dev)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+
+	hfi->irq_mask |= HFI_IRQ_MSGQ_MASK;
+
+	gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN7_GMU_GMU2HOST_INTR_MASK,
+		(u32)~hfi->irq_mask);
+}
+
+static int enable_preemption(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 data;
+	int ret;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	/*
+	 * Bits [0:1] contains the preemption level
+	 * Bit 2 is to enable/disable gmem save/restore
+	 * Bit 3 is to enable/disable skipsaverestore
+	 */
+	data = FIELD_PREP(GENMASK(1, 0), adreno_dev->preempt.preempt_level) |
+			FIELD_PREP(BIT(2), adreno_dev->preempt.usesgmem) |
+			FIELD_PREP(BIT(3), adreno_dev->preempt.skipsaverestore);
+
+	ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_PREEMPTION, 1,
+			data);
+	if (ret)
+		return ret;
+
+	if (gen7_core->qos_value) {
+		int i;
+
+		for (i = 0; i < KGSL_PRIORITY_MAX_RB_LEVELS; i++) {
+			if (!gen7_core->qos_value[i])
+				continue;
+
+			gen7_hfi_send_set_value(adreno_dev,
+				HFI_VALUE_RB_GPU_QOS, i,
+				gen7_core->qos_value[i]);
+		}
+	}
+
+	if (device->pwrctrl.rt_bus_hint) {
+		ret = gen7_hfi_send_set_value(adreno_dev, HFI_VALUE_RB_IB_RULE, 0,
+			device->pwrctrl.rt_bus_hint);
+		if (ret)
+			device->pwrctrl.rt_bus_hint = 0;
+	}
+
+	/*
+	 * Bits[3:0] contain the preemption timeout enable bit per ringbuffer
+	 * Bits[31:4] contain the timeout in ms
+	 */
+	return gen7_hfi_send_set_value(adreno_dev, HFI_VALUE_BIN_TIME, 1,
+		FIELD_PREP(GENMASK(31, 4), ADRENO_PREEMPT_TIMEOUT) |
+		FIELD_PREP(GENMASK(3, 0), 0xf));
+
+}
+
+static int enable_gmu_stats(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u32 data;
+
+	if (!gmu->stats_enable)
+		return 0;
+
+	/*
+	 * Bits [23:0] contains the countables mask
+	 * Bits [31:24] is the sampling interval
+	 */
+	data = FIELD_PREP(GENMASK(23, 0), gmu->stats_mask) |
+		FIELD_PREP(GENMASK(31, 24), gmu->stats_interval);
+
+	return gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_GMU_STATS, 1, data);
+}
+
+static int gen7_hfi_send_perfcounter_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	/*
+	 * Perfcounter retention is disabled by default in GMU firmware.
+	 * In case perfcounter retention behaviour is overwritten by sysfs
+	 * setting dynmaically, send this HFI feature with 'enable = 0' to
+	 * disable this feature in GMU firmware.
+	 */
+	if (adreno_dev->perfcounter)
+		return gen7_hfi_send_feature_ctrl(adreno_dev,
+				HFI_FEATURE_PERF_NORETAIN, 0, 0);
+
+	return 0;
+}
+
+u32 gen7_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop)
+{
+	struct hfi_get_value_cmd cmd;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct pending_cmd pending_ack;
+	int rc;
+	u32 seqnum;
+
+	rc = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE);
+	if (rc)
+		return 0;
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2);
+	cmd.type = prop;
+	cmd.subtype = 0;
+
+	add_waiter(hfi, cmd.hdr, &pending_ack);
+
+	rc = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd));
+	if (rc)
+		goto done;
+
+	rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack,
+		gen7_hwsched_process_msgq);
+
+done:
+	del_waiter(hfi, &pending_ack);
+
+	if (rc || (pending_ack.results[2] == UINT_MAX))
+		return 0;
+
+	return pending_ack.results[2];
+}
+
+static void _context_queue_enable(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	if (GMU_VER_MINOR(gmu->ver.hfi) >= 3) {
+		if (gen7_hfi_send_get_value(adreno_dev, HFI_VALUE_CONTEXT_QUEUE, 0) == 1)
+			set_bit(ADRENO_HWSCHED_CONTEXT_QUEUE, &adreno_dev->hwsched.flags);
+	}
+}
+
+static int gen7_hfi_send_hw_fence_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	int ret;
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags))
+		return 0;
+
+	ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HW_FENCE, 1, 0);
+	if (ret && (ret == -ENOENT)) {
+		dev_err(&gmu->pdev->dev, "GMU doesn't support HW_FENCE feature\n");
+		adreno_hwsched_deregister_hw_fence(hwsched->hw_fence.handle);
+		return 0;
+	}
+
+	return ret;
+}
+
+static int gen7_hfi_send_dms_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret;
+
+	if (!test_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv))
+		return 0;
+
+	ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_DMS, 1, 0);
+	if (ret == -ENOENT) {
+		dev_err(&gmu->pdev->dev, "GMU doesn't support DMS feature\n");
+		clear_bit(ADRENO_DEVICE_DMS, &adreno_dev->priv);
+		adreno_dev->dms_enabled = false;
+		return 0;
+	}
+
+	return ret;
+}
+
+static void gen7_spin_idle_debug_lpac(struct adreno_device *adreno_dev,
+				const char *str)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	u32 rptr, wptr, status, status3, intstatus, hwfault;
+	bool val = adreno_is_preemption_enabled(adreno_dev);
+
+	dev_err(device->dev, str);
+
+	kgsl_regread(device, GEN7_CP_LPAC_RB_RPTR, &rptr);
+	kgsl_regread(device, GEN7_CP_LPAC_RB_WPTR, &wptr);
+
+	kgsl_regread(device, GEN7_RBBM_STATUS, &status);
+	kgsl_regread(device, GEN7_RBBM_STATUS3, &status3);
+	kgsl_regread(device, GEN7_RBBM_INT_0_STATUS, &intstatus);
+	kgsl_regread(device, GEN7_CP_HW_FAULT, &hwfault);
+
+	dev_err(device->dev,
+		"LPAC rb=%d pos=%X/%X rbbm_status=%8.8X/%8.8X int_0_status=%8.8X\n",
+		val ? KGSL_LPAC_RB_ID : 1, rptr, wptr,
+		status, status3, intstatus);
+
+	dev_err(device->dev, " hwfault=%8.8X\n", hwfault);
+
+	kgsl_device_snapshot(device, NULL, NULL, false);
+}
+
+static bool gen7_hwsched_warmboot_possible(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	if (adreno_dev->warmboot_enabled && test_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags)
+		&& test_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags) &&
+		!test_bit(ADRENO_DEVICE_FORCE_COLDBOOT, &adreno_dev->priv))
+		return true;
+
+	return false;
+}
+
+static int gen7_hwsched_hfi_send_warmboot_cmd(struct adreno_device *adreno_dev,
+		struct kgsl_memdesc *desc, u32 flag, bool async, struct pending_cmd *ack)
+{
+	struct hfi_warmboot_scratch_cmd cmd = {0};
+	int ret;
+
+	if (!adreno_dev->warmboot_enabled)
+		return 0;
+
+	cmd.scratch_addr = desc->gmuaddr;
+	cmd.scratch_size =  desc->size;
+	cmd.flags = flag;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_WARMBOOT_CMD);
+	if (ret)
+		return ret;
+
+	if (async)
+		return gen7_hfi_send_cmd_async(adreno_dev, &cmd, sizeof(cmd));
+
+	return gen7_hfi_send_generic_req_v5(adreno_dev, &cmd, ack, sizeof(cmd));
+}
+
+static int gen7_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev,
+		struct pending_cmd *ret_cmd)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct hfi_warmboot_scratch_cmd cmd = {
+		.scratch_addr = gmu->gpu_boot_scratch->gmuaddr,
+		.scratch_size = gmu->gpu_boot_scratch->size,
+		.flags = HFI_WARMBOOT_EXEC_SCRATCH,
+	};
+	int ret = 0;
+	u32 seqnum;
+
+	if (!adreno_dev->warmboot_enabled)
+		return 0;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_WARMBOOT_CMD);
+	if (ret)
+		return ret;
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2);
+
+	add_waiter(hfi, cmd.hdr, ret_cmd);
+
+	ret = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd));
+	if (ret)
+		goto err;
+
+	ret = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, ret_cmd,
+		gen7_hwsched_process_msgq);
+err:
+	del_waiter(hfi, ret_cmd);
+
+	return ret;
+}
+
+static int gen7_hwsched_warmboot_gpu(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	int ret = 0;
+
+	ret = gen7_hwsched_hfi_warmboot_gpu_cmd(adreno_dev, &ret_cmd);
+	if (!ret)
+		return ret;
+
+	if (MSG_HDR_GET_TYPE(ret_cmd.results[1]) != H2F_MSG_WARMBOOT_CMD)
+		goto err;
+
+	switch (MSG_HDR_GET_TYPE(ret_cmd.results[2])) {
+	case H2F_MSG_ISSUE_CMD_RAW: {
+		if (ret_cmd.results[2] == gmu->cp_init_hdr)
+			gen7_spin_idle_debug(adreno_dev,
+				"CP initialization failed to idle\n");
+		else if (ret_cmd.results[2] == gmu->switch_to_unsec_hdr)
+			gen7_spin_idle_debug(adreno_dev,
+				"Switch to unsecure failed to idle\n");
+		}
+		break;
+	case H2F_MSG_ISSUE_LPAC_CMD_RAW:
+		gen7_spin_idle_debug_lpac(adreno_dev,
+			"LPAC CP initialization failed to idle\n");
+		break;
+	}
+err:
+	/* Clear the bit on error so that in the next slumber exit we coldboot */
+	clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags);
+	gen7_disable_gpu_irq(adreno_dev);
+	return ret;
+}
+
+static int gen7_hwsched_coldboot_gpu(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev);
+	struct pending_cmd ack = {0};
+	int ret = 0;
+
+	ret = gen7_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gpu_boot_scratch,
+		 HFI_WARMBOOT_SET_SCRATCH, true, &ack);
+	if (ret)
+		goto done;
+
+	ret = gen7_hwsched_cp_init(adreno_dev);
+	if (ret)
+		goto done;
+
+	ret = gen7_hwsched_lpac_cp_init(adreno_dev);
+	if (ret)
+		goto done;
+
+	ret = gen7_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gpu_boot_scratch,
+		HFI_WARMBOOT_QUERY_SCRATCH, true, &ack);
+	if (ret)
+		goto done;
+
+	if (adreno_dev->warmboot_enabled)
+		set_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags);
+
+done:
+	/* Clear the bitmask so that we don't send record bit with future HFI messages */
+	memset(hfi->wb_set_record_bitmask, 0x0, sizeof(hfi->wb_set_record_bitmask));
+
+	if (ret)
+		gen7_disable_gpu_irq(adreno_dev);
+
+	return ret;
+}
+
+int gen7_hwsched_boot_gpu(struct adreno_device *adreno_dev)
+{
+	/* If warmboot is possible just send the warmboot command else coldboot */
+	if (gen7_hwsched_warmboot_possible(adreno_dev))
+		return gen7_hwsched_warmboot_gpu(adreno_dev);
+	else
+		return gen7_hwsched_coldboot_gpu(adreno_dev);
+}
+
+static int gen7_hwsched_setup_default_votes(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+
+	/* Request default DCVS level */
+	ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+	if (ret)
+		return ret;
+
+	/* Request default BW vote */
+	return kgsl_pwrctrl_axi(device, true);
+}
+
+int gen7_hwsched_warmboot_init_gmu(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct pending_cmd ack = {0};
+	int ret = 0;
+
+	ret = gen7_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gmu_init_scratch,
+		 HFI_WARMBOOT_EXEC_SCRATCH, false, &ack);
+	if (ret)
+		goto err;
+
+	gen7_hwsched_enable_async_hfi(adreno_dev);
+
+	set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	ret = gen7_hwsched_setup_default_votes(adreno_dev);
+
+err:
+	if (ret) {
+		/* Clear the bit in case of an error so next boot will be coldboot */
+		clear_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags);
+		clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags);
+		gen7_hwsched_hfi_stop(adreno_dev);
+	}
+
+	return ret;
+}
+
+static void warmboot_init_message_record_bitmask(struct adreno_device *adreno_dev)
+{
+	struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev);
+
+	if (!adreno_dev->warmboot_enabled)
+		return;
+
+	/* Set the record bit for all the messages */
+	memset(hfi->wb_set_record_bitmask, 0xFF, sizeof(hfi->wb_set_record_bitmask));
+
+	/* These messages should not be recorded */
+	clear_bit(H2F_MSG_WARMBOOT_CMD, hfi->wb_set_record_bitmask);
+	clear_bit(H2F_MSG_START, hfi->wb_set_record_bitmask);
+	clear_bit(H2F_MSG_GET_VALUE, hfi->wb_set_record_bitmask);
+	clear_bit(H2F_MSG_GX_BW_PERF_VOTE, hfi->wb_set_record_bitmask);
+}
+
+int gen7_hwsched_hfi_start(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct pending_cmd ack = {0};
+	int ret;
+
+	reset_hfi_queues(adreno_dev);
+
+	ret = gen7_gmu_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	if (gen7_hwsched_warmboot_possible(adreno_dev))
+		return gen7_hwsched_warmboot_init_gmu(adreno_dev);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_GMU_WARMBOOT) &&
+		(!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))) {
+		if (gen7_hfi_send_get_value(adreno_dev, HFI_VALUE_GMU_WARMBOOT, 0) == 1)
+			adreno_dev->warmboot_enabled = true;
+	}
+
+	warmboot_init_message_record_bitmask(adreno_dev);
+
+	/* Reset the variable here and set it when we successfully record the scratch */
+	clear_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags);
+	clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags);
+
+	ret = gen7_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gmu_init_scratch,
+		HFI_WARMBOOT_SET_SCRATCH, false, &ack);
+	if (ret)
+		goto err;
+
+	ret = gen7_hfi_send_gpu_perf_table(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table, sizeof(gmu->hfi.bw_table));
+	if (ret)
+		goto err;
+
+	ret = gen7_hfi_send_acd_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen7_hfi_send_bcl_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen7_hfi_send_clx_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen7_hfi_send_ifpc_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HWSCHED, 1, 0);
+	if (ret)
+		goto err;
+
+	ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_KPROF, 1, 0);
+	if (ret)
+		goto err;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LSR)) {
+		ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_LSR,
+				1, 0);
+		if (ret)
+			goto err;
+	}
+
+	ret = gen7_hfi_send_perfcounter_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen7_hfi_send_dms_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	/* Enable the long ib timeout detection */
+	if (adreno_long_ib_detect(adreno_dev)) {
+		ret = gen7_hfi_send_feature_ctrl(adreno_dev,
+			HFI_FEATURE_BAIL_OUT_TIMER, 1, 0);
+		if (ret)
+			goto err;
+	}
+
+	enable_gmu_stats(adreno_dev);
+
+	if (gmu->log_stream_enable)
+		gen7_hfi_send_set_value(adreno_dev,
+			HFI_VALUE_LOG_STREAM_ENABLE, 0, 1);
+
+	if (gmu->log_group_mask)
+		gen7_hfi_send_set_value(adreno_dev,
+			HFI_VALUE_LOG_GROUP, 0, gmu->log_group_mask);
+
+	ret = gen7_hfi_send_core_fw_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	/*
+	 * HFI_VALUE_CONTEXT_QUEUE can only be queried after GMU has initialized some of the
+	 * required resources as part of handling gen7_hfi_send_core_fw_start()
+	 */
+	if (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) {
+		_context_queue_enable(adreno_dev);
+		adreno_hwsched_register_hw_fence(adreno_dev);
+	}
+
+	ret = gen7_hfi_send_hw_fence_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = enable_preemption(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen7_hfi_send_lpac_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) {
+		ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_AQE, 1, 0);
+		if (ret)
+			goto err;
+	}
+
+	ret = send_start_msg(adreno_dev);
+	if (ret)
+		goto err;
+
+	/*
+	 * Send this additional start message on cold boot if warmboot is enabled.
+	 * This message will be recorded and on a warmboot this will trigger the
+	 * sequence to replay memory allocation requests and ECP task setup
+	 */
+	ret = send_warmboot_start_msg(adreno_dev);
+	if (ret)
+		goto err;
+
+	gen7_hwsched_enable_async_hfi(adreno_dev);
+
+	set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	/* Send this message only on cold boot */
+	ret = gen7_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gmu_init_scratch,
+		HFI_WARMBOOT_QUERY_SCRATCH, true, &ack);
+	if (ret)
+		goto err;
+
+	if (adreno_dev->warmboot_enabled)
+		set_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags);
+
+	ret = gen7_hwsched_setup_default_votes(adreno_dev);
+
+err:
+	if (ret)
+		gen7_hwsched_hfi_stop(adreno_dev);
+
+	return ret;
+}
+
+static int submit_raw_cmds(struct adreno_device *adreno_dev, void *cmds, u32 size_bytes,
+	const char *str)
+{
+	int ret;
+
+	ret = gen7_hfi_send_cmd_async(adreno_dev, cmds, size_bytes);
+	if (ret)
+		return ret;
+
+	ret = gmu_core_timed_poll_check(KGSL_DEVICE(adreno_dev),
+			GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS, 0, 200, BIT(23));
+	if (ret)
+		gen7_spin_idle_debug(adreno_dev, str);
+
+	return ret;
+}
+
+static int submit_lpac_raw_cmds(struct adreno_device *adreno_dev, void *cmds, u32 size_bytes,
+	const char *str)
+{
+	int ret;
+
+	ret = gen7_hfi_send_cmd_async(adreno_dev, cmds, size_bytes);
+	if (ret)
+		return ret;
+
+	ret = gmu_core_timed_poll_check(KGSL_DEVICE(adreno_dev),
+			GEN7_GPU_GMU_AO_GPU_LPAC_BUSY_STATUS, 0, 200, BIT(23));
+	if (ret)
+		gen7_spin_idle_debug_lpac(adreno_dev, str);
+
+	return ret;
+}
+
+static int cp_init(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u32 cmds[GEN7_CP_INIT_DWORDS + 1];
+	int ret = 0;
+
+	cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, HFI_MSG_CMD);
+
+	gen7_cp_init_cmds(adreno_dev, &cmds[1]);
+
+	ret = submit_raw_cmds(adreno_dev, cmds, sizeof(cmds),
+			"CP initialization failed to idle\n");
+
+	/* Save the header incase we need a warmboot debug */
+	gmu->cp_init_hdr = cmds[0];
+
+	return ret;
+}
+
+static int send_switch_to_unsecure(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u32 cmds[3];
+	int ret = 0;
+
+	cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, HFI_MSG_CMD);
+
+	cmds[1] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+	cmds[2] = 0;
+
+	ret = submit_raw_cmds(adreno_dev, cmds, sizeof(cmds),
+			"Switch to unsecure failed to idle\n");
+
+	/* Save the header incase we need a warmboot debug */
+	gmu->switch_to_unsec_hdr = cmds[0];
+
+	return ret;
+}
+
+int gen7_hwsched_cp_init(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	int ret;
+
+	ret = cp_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = adreno_zap_shader_load(adreno_dev, gen7_core->zap_name);
+	if (ret)
+		return ret;
+
+	if (!adreno_dev->zap_loaded)
+		kgsl_regwrite(KGSL_DEVICE(adreno_dev),
+			GEN7_RBBM_SECVID_TRUST_CNTL, 0x0);
+	else
+		ret = send_switch_to_unsecure(adreno_dev);
+
+	return ret;
+}
+
+int gen7_hwsched_lpac_cp_init(struct adreno_device *adreno_dev)
+{
+	u32 cmds[GEN7_CP_INIT_DWORDS + 1];
+
+	if (!adreno_dev->lpac_enabled)
+		return 0;
+
+	cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_LPAC_CMD_RAW, HFI_MSG_CMD);
+
+	gen7_cp_init_cmds(adreno_dev, &cmds[1]);
+
+	return submit_lpac_raw_cmds(adreno_dev, cmds, sizeof(cmds),
+			"LPAC CP initialization failed to idle\n");
+}
+
+static bool is_queue_empty(struct adreno_device *adreno_dev, u32 queue_idx)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+		return true;
+
+	if (hdr->read_index == hdr->write_index)
+		return true;
+
+	return false;
+}
+
+static int hfi_f2h_main(void *arg)
+{
+	struct adreno_device *adreno_dev = arg;
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(hfi->f2h_wq, kthread_should_stop() ||
+			/* If msgq irq is enabled and msgq has messages to process */
+			(((hfi->irq_mask & HFI_IRQ_MSGQ_MASK) &&
+			!is_queue_empty(adreno_dev, HFI_MSG_ID)) ||
+			/* Trace buffer has messages to process */
+			!gmu_core_is_trace_empty(gmu->trace.md->hostptr) ||
+			/* Dbgq has messages to process */
+			!is_queue_empty(adreno_dev, HFI_DBG_ID)));
+
+		if (kthread_should_stop())
+			break;
+
+		gen7_hwsched_process_msgq(adreno_dev);
+		gmu_core_process_trace_data(KGSL_DEVICE(adreno_dev),
+					&gmu->pdev->dev, &gmu->trace);
+		gen7_hwsched_process_dbgq(adreno_dev, true);
+	}
+
+	return 0;
+}
+
+static void gen7_hwsched_hw_fence_timeout(struct work_struct *work)
+{
+	struct gen7_hwsched_hfi *hfi = container_of(work, struct gen7_hwsched_hfi, hw_fence_ws);
+	struct gen7_hwsched_device *gen7_hw_dev = container_of(hfi, struct gen7_hwsched_device,
+						hwsched_hfi);
+	struct adreno_device *adreno_dev = &gen7_hw_dev->gen7_dev.adreno_dev;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u32 unack_count, ts;
+	struct adreno_context *drawctxt = NULL;
+	bool fault;
+
+	/* Check msgq one last time before recording a fault */
+	gen7_hwsched_process_msgq(adreno_dev);
+
+	spin_lock(&hfi->hw_fence.lock);
+
+	unack_count = hfi->hw_fence.unack_count;
+
+	fault = test_bit(GEN7_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags) &&
+		test_bit(GEN7_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags) &&
+		(unack_count > MIN_HW_FENCE_UNACK_COUNT);
+
+	drawctxt = hfi->hw_fence.defer_drawctxt;
+	ts = hfi->hw_fence.defer_ts;
+
+	spin_unlock(&hfi->hw_fence.lock);
+
+	if (!fault)
+		return;
+
+	dev_err(&gmu->pdev->dev, "Hardware fence unack(%d) timeout\n", unack_count);
+
+	if (drawctxt) {
+		struct kgsl_process_private *proc_priv = drawctxt->base.proc_priv;
+
+		dev_err(&gmu->pdev->dev,
+			"Hardware fence got deferred for ctx:%d ts:%d pid:%d proc:%s\n",
+			drawctxt->base.id, ts, pid_nr(proc_priv->pid), proc_priv->comm);
+	}
+	gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+}
+
+static void gen7_hwsched_hw_fence_timer(struct timer_list *t)
+{
+	struct gen7_hwsched_hfi *hfi = from_timer(hfi, t, hw_fence_timer);
+
+	kgsl_schedule_work(&hfi->hw_fence_ws);
+}
+
+int gen7_hwsched_hfi_probe(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev);
+
+	gmu->hfi.irq = kgsl_request_irq(gmu->pdev, "hfi",
+		gen7_hwsched_hfi_handler, adreno_dev);
+
+	if (gmu->hfi.irq < 0)
+		return gmu->hfi.irq;
+
+	hw_hfi->irq_mask = HFI_IRQ_MASK;
+
+	rwlock_init(&hw_hfi->msglock);
+
+	INIT_LIST_HEAD(&hw_hfi->msglist);
+	INIT_LIST_HEAD(&hw_hfi->detached_hw_fence_list);
+
+	init_waitqueue_head(&hw_hfi->f2h_wq);
+	init_waitqueue_head(&hw_hfi->hw_fence.unack_wq);
+
+	spin_lock_init(&hw_hfi->hw_fence.lock);
+
+	mutex_init(&hw_hfi->msgq_mutex);
+
+	INIT_WORK(&hw_hfi->hw_fence_ws, gen7_hwsched_hw_fence_timeout);
+
+	timer_setup(&hw_hfi->hw_fence_timer, gen7_hwsched_hw_fence_timer, 0);
+
+	return 0;
+}
+
+void gen7_hwsched_hfi_remove(struct adreno_device *adreno_dev)
+{
+	struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev);
+
+	if (hw_hfi->f2h_task)
+		kthread_stop(hw_hfi->f2h_task);
+}
+
+static void gen7_add_profile_events(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj_cmd *cmdobj, struct adreno_submit_time *time)
+{
+	unsigned long flags;
+	u64 time_in_s;
+	unsigned long time_in_ns;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct kgsl_context *context = drawobj->context;
+	struct submission_info info = {0};
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (!time)
+		return;
+
+	/*
+	 * Here we are attempting to create a mapping between the
+	 * GPU time domain (alwayson counter) and the CPU time domain
+	 * (local_clock) by sampling both values as close together as
+	 * possible. This is useful for many types of debugging and
+	 * profiling. In order to make this mapping as accurate as
+	 * possible, we must turn off interrupts to avoid running
+	 * interrupt handlers between the two samples.
+	 */
+
+	local_irq_save(flags);
+
+	/* Read always on registers */
+	time->ticks = gpudev->read_alwayson(adreno_dev);
+
+	/* Trace the GPU time to create a mapping to ftrace time */
+	trace_adreno_cmdbatch_sync(context->id, context->priority,
+		drawobj->timestamp, time->ticks);
+
+	/* Get the kernel clock for time since boot */
+	time->ktime = local_clock();
+
+	/* Get the timeofday for the wall time (for the user) */
+	ktime_get_real_ts64(&time->utime);
+
+	local_irq_restore(flags);
+
+	/* Return kernel clock time to the client if requested */
+	time_in_s = time->ktime;
+	time_in_ns = do_div(time_in_s, 1000000000);
+
+	info.inflight = hwsched->inflight;
+	info.rb_id = adreno_get_level(context);
+	info.gmu_dispatch_queue = context->gmu_dispatch_queue;
+
+	cmdobj->submit_ticks = time->ticks;
+
+	msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_SUBMIT,
+		pid_nr(context->proc_priv->pid),
+		context->id, drawobj->timestamp,
+		!!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME));
+	trace_adreno_cmdbatch_submitted(drawobj, &info, time->ticks,
+		(unsigned long) time_in_s, time_in_ns / 1000, 0);
+
+	log_kgsl_cmdbatch_submitted_event(context->id, drawobj->timestamp,
+			context->priority, drawobj->flags);
+}
+
+static void init_gmu_context_queue(struct adreno_context *drawctxt)
+{
+	struct kgsl_memdesc *md = &drawctxt->gmu_context_queue;
+	struct gmu_context_queue_header *hdr = md->hostptr;
+
+	hdr->start_addr = md->gmuaddr + sizeof(*hdr);
+	hdr->queue_size = (md->size - sizeof(*hdr)) >> 2;
+	hdr->hw_fence_buffer_va = drawctxt->gmu_hw_fence_queue.gmuaddr;
+	hdr->hw_fence_buffer_size = drawctxt->gmu_hw_fence_queue.size;
+}
+
+static u32 get_dq_id(struct adreno_device *adreno_dev, struct kgsl_context *context)
+{
+	struct dq_info *info;
+	u32 next;
+	u32 priority = adreno_get_level(context);
+
+	if (adreno_dev->lpac_enabled)
+		info = &gen7_hfi_dqs_lpac[priority];
+	else
+		info = &gen7_hfi_dqs[priority];
+
+	next = info->base_dq_id + info->offset;
+
+	info->offset = (info->offset + 1) % info->max_dq;
+
+	return next;
+}
+
+static int allocate_context_queues(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	int ret = 0;
+
+	if (!adreno_hwsched_context_queue_enabled(adreno_dev))
+		return 0;
+
+	if (test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags) &&
+		!drawctxt->gmu_hw_fence_queue.gmuaddr) {
+		ret = gen7_alloc_gmu_kernel_block(
+			to_gen7_gmu(adreno_dev), &drawctxt->gmu_hw_fence_queue,
+			HW_FENCE_QUEUE_SIZE, GMU_NONCACHED_KERNEL,
+			IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV);
+		if (ret) {
+			memset(&drawctxt->gmu_hw_fence_queue, 0x0,
+				sizeof(drawctxt->gmu_hw_fence_queue));
+			return ret;
+		}
+	}
+
+	if (!drawctxt->gmu_context_queue.gmuaddr) {
+		ret = gen7_alloc_gmu_kernel_block(
+			to_gen7_gmu(adreno_dev), &drawctxt->gmu_context_queue,
+			SZ_4K, GMU_NONCACHED_KERNEL,
+			IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV);
+		if (ret) {
+			memset(&drawctxt->gmu_context_queue, 0x0,
+				sizeof(drawctxt->gmu_context_queue));
+			return ret;
+		}
+		init_gmu_context_queue(drawctxt);
+	}
+
+	return 0;
+}
+
+static int send_context_register(struct adreno_device *adreno_dev,
+	struct kgsl_context *context)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct hfi_register_ctxt_cmd cmd;
+	struct kgsl_pagetable *pt = context->proc_priv->pagetable;
+	int ret, asid = kgsl_mmu_pagetable_get_asid(pt, context);
+
+	if (asid < 0)
+		return asid;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_REGISTER_CONTEXT);
+	if (ret)
+		return ret;
+
+	ret = allocate_context_queues(adreno_dev, drawctxt);
+	if (ret)
+		return ret;
+
+	cmd.ctxt_id = context->id;
+	cmd.flags = HFI_CTXT_FLAG_NOTIFY | context->flags;
+	/*
+	 * HLOS SMMU driver programs context bank to look up ASID from TTBR0 during a page
+	 * table walk. So the TLB entries are tagged with the ASID from TTBR0. TLBIASID
+	 * invalidates TLB entries whose ASID matches the value that was written to the
+	 * CBn_TLBIASID register. Set ASID along with PT address.
+	 */
+	cmd.pt_addr = kgsl_mmu_pagetable_get_ttbr0(pt) |
+		FIELD_PREP(GENMASK_ULL(63, KGSL_IOMMU_ASID_START_BIT), asid);
+	cmd.ctxt_idr = context->id;
+	cmd.ctxt_bank = kgsl_mmu_pagetable_get_context_bank(pt, context);
+
+	return gen7_hfi_send_cmd_async(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int send_context_pointers(struct adreno_device *adreno_dev,
+	struct kgsl_context *context)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct hfi_context_pointers_cmd cmd = {0};
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CONTEXT_POINTERS);
+	if (ret)
+		return ret;
+
+	cmd.ctxt_id = context->id;
+	cmd.sop_addr = MEMSTORE_ID_GPU_ADDR(device, context->id, soptimestamp);
+	cmd.eop_addr = MEMSTORE_ID_GPU_ADDR(device, context->id, eoptimestamp);
+	if (context->user_ctxt_record)
+		cmd.user_ctxt_record_addr =
+			context->user_ctxt_record->memdesc.gpuaddr;
+
+	if (adreno_hwsched_context_queue_enabled(adreno_dev))
+		cmd.gmu_context_queue_addr = drawctxt->gmu_context_queue.gmuaddr;
+
+	return gen7_hfi_send_cmd_async(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int hfi_context_register(struct adreno_device *adreno_dev,
+	struct kgsl_context *context)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (context->gmu_registered)
+		return 0;
+
+	ret = send_context_register(adreno_dev, context);
+	if (ret) {
+		dev_err(&gmu->pdev->dev,
+			"Unable to register context %u: %d\n",
+			context->id, ret);
+
+		if (device->gmu_fault)
+			gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+
+		return ret;
+	}
+
+	ret = send_context_pointers(adreno_dev, context);
+	if (ret) {
+		dev_err(&gmu->pdev->dev,
+			"Unable to register context %u pointers: %d\n",
+			context->id, ret);
+
+		if (device->gmu_fault)
+			gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+
+		return ret;
+	}
+
+	context->gmu_registered = true;
+	if (adreno_hwsched_context_queue_enabled(adreno_dev))
+		context->gmu_dispatch_queue = UINT_MAX;
+	else
+		context->gmu_dispatch_queue = get_dq_id(adreno_dev, context);
+
+	return 0;
+}
+
+static void populate_ibs(struct adreno_device *adreno_dev,
+	struct hfi_submit_cmd *cmd, struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct hfi_issue_ib *issue_ib;
+	struct kgsl_memobj_node *ib;
+
+	if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS) {
+		struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+		struct kgsl_memdesc *big_ib;
+
+		if (test_bit(CMDOBJ_RECURRING_START, &cmdobj->priv))
+			big_ib = hfi->big_ib_recurring;
+		else
+			big_ib = hfi->big_ib;
+		/*
+		 * The dispatcher ensures that there is only one big IB inflight
+		 */
+		cmd->big_ib_gmu_va = big_ib->gmuaddr;
+		cmd->flags |= CMDBATCH_INDIRECT;
+		issue_ib = big_ib->hostptr;
+	} else {
+		issue_ib = (struct hfi_issue_ib *)&cmd[1];
+	}
+
+	list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+		issue_ib->addr = ib->gpuaddr;
+		issue_ib->size = ib->size;
+		issue_ib++;
+	}
+
+	cmd->numibs = cmdobj->numibs;
+}
+
+#define HFI_DSP_IRQ_BASE 2
+
+#define DISPQ_IRQ_BIT(_idx) BIT((_idx) + HFI_DSP_IRQ_BASE)
+
+int gen7_gmu_context_queue_write(struct adreno_device *adreno_dev,
+	struct kgsl_memdesc *gmu_context_queue, u32 *msg, u32 size_bytes,
+	struct kgsl_drawobj *drawobj, struct adreno_submit_time *time)
+{
+	struct gmu_context_queue_header *hdr = gmu_context_queue->hostptr;
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	u32 *queue = gmu_context_queue->hostptr + sizeof(*hdr);
+	u32 i, empty_space, write_idx = hdr->write_index, read_idx = hdr->read_index;
+	u32 size_dwords = size_bytes >> 2;
+	u32 align_size = ALIGN(size_dwords, SZ_4);
+	u32 id = MSG_HDR_GET_ID(*msg);
+	struct kgsl_drawobj_cmd *cmdobj = NULL;
+
+	empty_space = (write_idx >= read_idx) ?
+			(hdr->queue_size - (write_idx - read_idx))
+			: (read_idx - write_idx);
+
+	if (empty_space <= align_size)
+		return -ENOSPC;
+
+	if (!IS_ALIGNED(size_bytes, sizeof(u32)))
+		return -EINVAL;
+
+	for (i = 0; i < size_dwords; i++) {
+		queue[write_idx] = msg[i];
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	/* Cookify any non used data at the end of the write buffer */
+	for (; i < align_size; i++) {
+		queue[write_idx] = 0xfafafafa;
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	/* Ensure packet is written out before proceeding */
+	wmb();
+
+	if (!drawobj)
+		goto done;
+
+	if (drawobj->type & SYNCOBJ_TYPE) {
+		struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj);
+
+		trace_adreno_syncobj_submitted(drawobj->context->id, drawobj->timestamp,
+			syncobj->numsyncs, gpudev->read_alwayson(adreno_dev));
+		goto done;
+	}
+
+	cmdobj = CMDOBJ(drawobj);
+
+	gen7_add_profile_events(adreno_dev, cmdobj, time);
+
+	/*
+	 * Put the profiling information in the user profiling buffer.
+	 * The hfi_update_write_idx below has a wmb() before the actual
+	 * write index update to ensure that the GMU does not see the
+	 * packet before the profile data is written out.
+	 */
+	adreno_profile_submit_time(time);
+
+done:
+	trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg));
+
+	hfi_update_write_idx(&hdr->write_index, write_idx);
+
+	return 0;
+}
+
+static u32 get_irq_bit(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj)
+{
+	if (!adreno_hwsched_context_queue_enabled(adreno_dev))
+		return drawobj->context->gmu_dispatch_queue;
+
+	if (adreno_is_preemption_enabled(adreno_dev))
+		return adreno_get_level(drawobj->context);
+
+	if (kgsl_context_is_lpac(drawobj->context))
+		return 1;
+
+	return 0;
+}
+
+static int add_gmu_waiter(struct adreno_device *adreno_dev,
+	struct dma_fence *fence)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = msm_hw_fence_wait_update(adreno_dev->hwsched.hw_fence.handle,
+			&fence, 1, true);
+
+	if (ret)
+		dev_err_ratelimited(device->dev,
+			"Failed to add GMU as waiter ret:%d fence ctx:%llu ts:%llu\n",
+			ret, fence->context, fence->seqno);
+
+	return ret;
+}
+
+static void populate_kgsl_fence(struct hfi_syncobj *obj,
+	struct dma_fence *fence)
+{
+	struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence;
+	struct kgsl_sync_timeline *ktimeline = kfence->parent;
+	unsigned long flags;
+
+	obj->flags |= BIT(GMU_SYNCOBJ_FLAG_KGSL_FENCE_BIT);
+
+	spin_lock_irqsave(&ktimeline->lock, flags);
+	/* If the context is going away or the dma fence is signaled, mark the fence as triggered */
+	if (!ktimeline->context || dma_fence_is_signaled_locked(fence)) {
+		obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT);
+		spin_unlock_irqrestore(&ktimeline->lock, flags);
+		return;
+	}
+	obj->ctxt_id = ktimeline->context->id;
+	spin_unlock_irqrestore(&ktimeline->lock, flags);
+
+	obj->seq_no =  kfence->timestamp;
+}
+
+static int _submit_hw_fence(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj, void *cmdbuf)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	int i, j;
+	u32 cmd_sizebytes;
+	struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj);
+	struct hfi_submit_syncobj *cmd;
+	struct hfi_syncobj *obj = NULL;
+	u32 seqnum;
+
+	/* Add hfi_syncobj struct for sync object */
+	cmd_sizebytes = sizeof(*cmd) +
+			(sizeof(struct hfi_syncobj) *
+			syncobj->num_hw_fence);
+
+	if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE))
+		return -EMSGSIZE;
+
+	memset(cmdbuf, 0x0, cmd_sizebytes);
+	cmd = cmdbuf;
+	cmd->num_syncobj = syncobj->num_hw_fence;
+	obj = (struct hfi_syncobj *)&cmd[1];
+
+	for (i = 0; i < syncobj->numsyncs; i++) {
+		struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i];
+		struct kgsl_sync_fence_cb *kcb = event->handle;
+		struct dma_fence **fences;
+		struct dma_fence_array *array;
+		u32 num_fences;
+
+		if (!kcb)
+			return -EINVAL;
+
+		array = to_dma_fence_array(kcb->fence);
+		if (array != NULL) {
+			num_fences = array->num_fences;
+			fences = array->fences;
+		} else {
+			num_fences = 1;
+			fences = &kcb->fence;
+		}
+
+		for (j = 0; j < num_fences; j++) {
+
+			/*
+			 * If this sync object has a software only fence, make sure that it is
+			 * already signaled so that we can skip sending this fence to the GMU.
+			 */
+			if (!test_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fences[j]->flags)) {
+				if (WARN(!dma_fence_is_signaled(fences[j]),
+					"sync object has unsignaled software fence"))
+					return -EINVAL;
+				continue;
+			}
+
+			if (is_kgsl_fence(fences[j])) {
+				populate_kgsl_fence(obj, fences[j]);
+			} else {
+				int ret = add_gmu_waiter(adreno_dev, fences[j]);
+
+				if (ret) {
+					syncobj->flags &= ~KGSL_SYNCOBJ_HW;
+					return ret;
+				}
+
+				if (test_bit(MSM_HW_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags) ||
+					test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags))
+					obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT);
+
+				obj->ctxt_id = fences[j]->context;
+				obj->seq_no =  fences[j]->seqno;
+			}
+			trace_adreno_input_hw_fence(drawobj->context->id, obj->ctxt_id,
+				obj->seq_no, obj->flags, fences[j]->ops->get_timeline_name ?
+				fences[j]->ops->get_timeline_name(fences[j]) : "unknown");
+
+			obj++;
+		}
+	}
+
+	/*
+	 * Attach a timestamp to this SYNCOBJ to keep track whether GMU has deemed it signaled
+	 * or not.
+	 */
+	drawobj->timestamp = ++drawctxt->syncobj_timestamp;
+	cmd->timestamp = drawobj->timestamp;
+
+	seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum);
+	cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_SYNCOBJ, HFI_MSG_CMD);
+	cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2);
+
+	return gen7_gmu_context_queue_write(adreno_dev, &drawctxt->gmu_context_queue,
+			(u32 *)cmd, cmd_sizebytes, drawobj, NULL);
+}
+
+int gen7_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_hw_fence_entry *entry, *tmp;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret = 0;
+
+	spin_lock(&drawctxt->lock);
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) {
+		struct gmu_context_queue_header *hdr =  drawctxt->gmu_context_queue.hostptr;
+
+		if (timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0) {
+			dev_err(&gmu->pdev->dev,
+				"detached ctx:%d has unsignaled fence ts:%d retired:%d\n",
+				drawctxt->base.id, (u32)entry->cmd.ts, hdr->out_fence_ts);
+			ret = -EINVAL;
+			break;
+		}
+
+		gen7_remove_hw_fence_entry(adreno_dev, entry);
+	}
+	spin_unlock(&drawctxt->lock);
+
+	return ret;
+}
+
+/**
+ * move_detached_context_hardware_fences - Move all pending hardware fences belonging to this
+ * context to the detached hardware fence list so as to send them to TxQueue after fault recovery.
+ * This is needed because this context may get destroyed before fault recovery gets executed.
+ */
+static void move_detached_context_hardware_fences(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_hw_fence_entry *entry, *tmp;
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+
+	/* We don't need the drawctxt lock here because this context has already been detached */
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) {
+		struct gmu_context_queue_header *hdr =  drawctxt->gmu_context_queue.hostptr;
+
+		if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) {
+			_kgsl_context_get(&drawctxt->base);
+			list_move_tail(&entry->node, &hfi->detached_hw_fence_list);
+			continue;
+		}
+
+		gen7_remove_hw_fence_entry(adreno_dev, entry);
+	}
+
+	/* Also grab all the hardware fences which were never sent to GMU */
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) {
+		_kgsl_context_get(&drawctxt->base);
+		list_move_tail(&entry->node, &hfi->detached_hw_fence_list);
+	}
+}
+
+/**
+ * check_detached_context_hardware_fences - When this context has been un-registered with the GMU,
+ * make sure all the hardware fences(that were sent to GMU) for this context have been sent to
+ * TxQueue. Also, send any hardware fences (to GMU) that were not yet dispatched to the GMU. In case
+ * of an error, move the pending hardware fences to detached hardware fence list, log the error,
+ * take a snapshot and trigger recovery.
+ */
+static int check_detached_context_hardware_fences(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_hw_fence_entry *entry, *tmp;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret = 0;
+
+	/* We don't need the drawctxt lock because this context has been detached */
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) {
+		struct gmu_context_queue_header *hdr =  drawctxt->gmu_context_queue.hostptr;
+
+		if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) {
+			dev_err(&gmu->pdev->dev,
+				"detached ctx:%d has unsignaled fence ts:%d retired:%d\n",
+				drawctxt->base.id, (u32)entry->cmd.ts, hdr->out_fence_ts);
+			ret = -EINVAL;
+			goto fault;
+		}
+		gen7_remove_hw_fence_entry(adreno_dev, entry);
+	}
+
+	/* Send hardware fences (to TxQueue) that were not dispatched to GMU */
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) {
+
+		ret = gen7_send_hw_fence_hfi_wait_ack(adreno_dev, entry,
+			HW_FENCE_FLAG_SKIP_MEMSTORE);
+		if (ret)
+			goto fault;
+
+		gen7_remove_hw_fence_entry(adreno_dev, entry);
+	}
+
+	return 0;
+
+fault:
+	move_detached_context_hardware_fences(adreno_dev, drawctxt);
+	gmu_core_fault_snapshot(device);
+	gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+
+	return ret;
+}
+
+static inline int setup_hw_fence_info_cmd(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry)
+{
+	struct kgsl_sync_fence *kfence = entry->kfence;
+	int ret;
+
+	ret = CMD_MSG_HDR(entry->cmd, H2F_MSG_HW_FENCE_INFO);
+	if (ret)
+		return ret;
+
+	entry->cmd.gmu_ctxt_id = entry->drawctxt->base.id;
+	entry->cmd.ctxt_id = kfence->fence.context;
+	entry->cmd.ts = kfence->fence.seqno;
+
+	entry->cmd.hash_index = kfence->hw_fence_index;
+
+	return 0;
+}
+
+/*
+ * gen7_send_hw_fence_hfi_wait_ack - This function is used in cases where multiple hardware fences
+ * are to be sent to GMU. Hence, we must send them one by one to avoid overwhelming the GMU with
+ * mutliple fences in a short span of time.
+ */
+int gen7_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry, u64 flags)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 seqnum;
+	int ret = 0;
+
+	/* Device mutex is necessary to ensure only one hardware fence ack is being waited for */
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EINVAL;
+
+	spin_lock(&hfi->hw_fence.lock);
+
+	init_completion(&hw_fence_ack.complete);
+
+	entry->cmd.flags |= flags;
+	seqnum = atomic_inc_return(&hfi->hw_fence.seqnum);
+	entry->cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(entry->cmd.hdr, seqnum, sizeof(entry->cmd) >> 2);
+
+	hw_fence_ack.sent_hdr = entry->cmd.hdr;
+
+	/*
+	 * We don't need to increment the unack count here as we are waiting for the ack for
+	 * this fence before sending another hardware fence.
+	 */
+	ret = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&entry->cmd, sizeof(entry->cmd));
+
+	spin_unlock(&hfi->hw_fence.lock);
+
+	if (!ret)
+		ret = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &hw_fence_ack,
+			gen7_hwsched_process_msgq);
+
+	memset(&hw_fence_ack, 0x0, sizeof(hw_fence_ack));
+	return ret;
+}
+
+/**
+ * drawctxt_queue_hw_fence - Add a hardware fence to draw context's hardware fence list and make
+ * sure the list remains sorted (with the fence with the largest timestamp at the end)
+ */
+static void drawctxt_queue_hw_fence(struct adreno_context *drawctxt,
+	struct adreno_hw_fence_entry *new)
+{
+	struct adreno_hw_fence_entry *entry = NULL;
+	u32 ts = (u32)new->cmd.ts;
+
+	/* Walk the list backwards to find the right spot for this fence */
+	list_for_each_entry_reverse(entry, &drawctxt->hw_fence_list, node) {
+		if (timestamp_cmp(ts, (u32)entry->cmd.ts) > 0)
+			break;
+	}
+
+	list_add(&new->node, &entry->node);
+}
+
+#define DRAWCTXT_SLOT_AVAILABLE(count)  \
+	((count + 1) < (HW_FENCE_QUEUE_SIZE / sizeof(struct hfi_hw_fence_info)))
+
+/**
+ * allocate_hw_fence_entry - Allocate an entry to keep track of a hardware fence. This is free'd
+ * when we know GMU has sent this fence to the TxQueue
+ */
+static struct adreno_hw_fence_entry *allocate_hw_fence_entry(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, struct kgsl_sync_fence *kfence)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct adreno_hw_fence_entry *entry;
+
+	if (!DRAWCTXT_SLOT_AVAILABLE(drawctxt->hw_fence_count))
+		return NULL;
+
+	entry = kmem_cache_zalloc(hwsched->hw_fence_cache, GFP_ATOMIC);
+	if (!entry)
+		return NULL;
+
+	entry->kfence = kfence;
+	entry->drawctxt = drawctxt;
+
+	if (setup_hw_fence_info_cmd(adreno_dev, entry)) {
+		kmem_cache_free(hwsched->hw_fence_cache, entry);
+		return NULL;
+	}
+
+	dma_fence_get(&kfence->fence);
+
+	drawctxt->hw_fence_count++;
+	atomic_inc(&hwsched->hw_fence_count);
+
+	INIT_LIST_HEAD(&entry->node);
+	INIT_LIST_HEAD(&entry->reset_node);
+	return entry;
+}
+
+static bool _hw_fence_end_sleep(struct adreno_device *adreno_dev)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	bool ret;
+
+	spin_lock(&hfi->hw_fence.lock);
+	ret = !test_bit(GEN7_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags);
+	spin_unlock(&hfi->hw_fence.lock);
+
+	return ret;
+}
+
+/**
+ * _hw_fence_sleep() - Check if the thread needs to sleep until the hardware fence unack count
+ * drops to a desired threshold.
+ *
+ * Return: negative error code if the thread was woken up by a signal, or the context became bad in
+ * the meanwhile, or the hardware fence unack count hasn't yet dropped to a desired threshold, or
+ * if fault recovery is imminent.
+ * Otherwise, return 0.
+ */
+static int _hw_fence_sleep(struct adreno_device *adreno_dev, struct adreno_context *drawctxt)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	int ret = 0;
+
+	if (!test_bit(GEN7_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags))
+		return 0;
+
+	spin_unlock(&hfi->hw_fence.lock);
+	spin_unlock(&drawctxt->lock);
+
+	ret = wait_event_interruptible(hfi->hw_fence.unack_wq,
+		_hw_fence_end_sleep(adreno_dev));
+
+	spin_lock(&drawctxt->lock);
+	spin_lock(&hfi->hw_fence.lock);
+
+	/*
+	 * If the thread received a signal, or the context became bad in the meanwhile or the limit
+	 * is still not settled, then return error to avoid creating this hardware fence
+	 */
+	if ((ret == -ERESTARTSYS) || kgsl_context_is_bad(&drawctxt->base) ||
+		test_bit(GEN7_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags))
+		return -EINVAL;
+
+	/*
+	 * If fault recovery is imminent then return error code to avoid creating new hardware
+	 * fences until recovery is complete
+	 */
+	if (test_bit(GEN7_HWSCHED_HW_FENCE_ABORT_BIT, &hfi->hw_fence.flags))
+		return -EBUSY;
+
+	return ret;
+}
+
+void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev,
+	struct kgsl_sync_fence *kfence)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_sync_timeline *ktimeline = kfence->parent;
+	struct kgsl_context *context = ktimeline->context;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct adreno_hw_fence_entry *entry = NULL;
+	struct msm_hw_fence_create_params params = {0};
+	/* Only allow a single log in a second */
+	static DEFINE_RATELIMIT_STATE(_rs, HZ, 1);
+	struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	u32 retired = 0;
+	int ret = 0;
+	bool destroy_hw_fence = true;
+
+	params.fence = &kfence->fence;
+	params.handle = &kfence->hw_fence_index;
+	kfence->hw_fence_handle = adreno_dev->hwsched.hw_fence.handle;
+
+	ret = msm_hw_fence_create(kfence->hw_fence_handle, &params);
+	if ((ret || IS_ERR_OR_NULL(params.handle))) {
+		if (__ratelimit(&_rs))
+			dev_err(device->dev, "Failed to create ctx:%d ts:%d hardware fence:%d\n",
+				kfence->context_id, kfence->timestamp, ret);
+		return;
+	}
+
+	spin_lock(&drawctxt->lock);
+	spin_lock(&hw_hfi->hw_fence.lock);
+
+	/*
+	 * If we create a hardware fence and this context is going away, we may never dispatch
+	 * this fence to the GMU. Hence, avoid creating a hardware fence if context is going away.
+	 */
+	if (kgsl_context_is_bad(context))
+		goto done;
+
+	entry = allocate_hw_fence_entry(adreno_dev, drawctxt, kfence);
+	if (!entry)
+		goto done;
+
+	/* If recovery is imminent, then do not create a hardware fence */
+	if (test_bit(GEN7_HWSCHED_HW_FENCE_ABORT_BIT, &hw_hfi->hw_fence.flags)) {
+		destroy_hw_fence = true;
+		goto done;
+	}
+
+	ret = _hw_fence_sleep(adreno_dev, drawctxt);
+	if (ret)
+		goto done;
+
+	/*
+	 * If this ts hasn't been submitted yet, then store it in the drawctxt hardware fence
+	 * list and return. This fence will be sent to GMU when this ts is dispatched to GMU.
+	 */
+	if (timestamp_cmp(kfence->timestamp, drawctxt->internal_timestamp) > 0) {
+		drawctxt_queue_hw_fence(drawctxt, entry);
+		destroy_hw_fence = false;
+		goto done;
+	}
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &retired);
+
+	/*
+	 * Check if timestamp is retired. If we are in SLUMBER at this point, the timestamp is
+	 * guaranteed to be retired. This way, we don't need the device mutex to check the device
+	 * state explicitly.
+	 */
+	if (timestamp_cmp(retired, kfence->timestamp) >= 0) {
+		kgsl_sync_timeline_signal(ktimeline, kfence->timestamp);
+		goto done;
+	}
+
+	/*
+	 * If timestamp is not retired then GMU must already be powered up. This is because SLUMBER
+	 * thread has to wait for hardware fence spinlock to make sure the hardware fence unack
+	 * count is zero.
+	 */
+	ret = _send_hw_fence_no_ack(adreno_dev, entry);
+	if (ret) {
+		if (__ratelimit(&_rs))
+			dev_err(&gmu->pdev->dev, "Aborting hw fence for ctx:%d ts:%d ret:%d\n",
+				kfence->context_id, kfence->timestamp, ret);
+		goto done;
+	}
+
+	list_add_tail(&entry->node, &drawctxt->hw_fence_inflight_list);
+
+	destroy_hw_fence = false;
+
+done:
+	if (destroy_hw_fence) {
+		msm_hw_fence_destroy(kfence->hw_fence_handle, &kfence->fence);
+		if (entry)
+			gen7_remove_hw_fence_entry(adreno_dev, entry);
+	}
+
+	spin_unlock(&hw_hfi->hw_fence.lock);
+	spin_unlock(&drawctxt->lock);
+}
+
+/**
+ * setup_hw_fence_deferred_ctxt - The hardware fence(s) from this context couldn't be sent to the
+ * GMU because the hardware fence unack count reached a threshold. Hence, setup this context such
+ * that these hardware fences are sent to the GMU when the unack count drops to a desired threshold.
+ */
+static void setup_hw_fence_deferred_ctxt(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, u32 ts)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return;
+
+	hfi->hw_fence.defer_drawctxt = drawctxt;
+	hfi->hw_fence.defer_ts = ts;
+	/*
+	 * Increment the active count so that device doesn't get powered off until this fence has
+	 * been sent to GMU
+	 */
+	gen7_hwsched_active_count_get(adreno_dev);
+}
+
+/**
+ * process_hw_fence_queue - This function walks the draw context's list of hardware fences
+ * and sends the ones which have a timestamp less than or equal to the timestamp that just
+ * got submitted to the GMU.
+ */
+static void process_hw_fence_queue(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, u32 ts)
+{
+	struct adreno_hw_fence_entry *entry = NULL, *next;
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	int ret = 0;
+
+	/* This list is sorted with smallest timestamp at head and highest timestamp at tail */
+	list_for_each_entry_safe(entry, next, &drawctxt->hw_fence_list, node) {
+
+		if (timestamp_cmp((u32)entry->cmd.ts, ts) > 0)
+			return;
+
+		spin_lock(&hfi->hw_fence.lock);
+
+		if (test_bit(GEN7_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags)) {
+			setup_hw_fence_deferred_ctxt(adreno_dev, drawctxt, ts);
+			spin_unlock(&hfi->hw_fence.lock);
+			return;
+		}
+
+		ret = _send_hw_fence_no_ack(adreno_dev, entry);
+
+		spin_unlock(&hfi->hw_fence.lock);
+
+		if (ret)
+			return;
+
+		/*
+		 * A fence that is sent to GMU must be added to the drawctxt->hw_fence_inflight_list
+		 * so that we can keep track of when GMU sends it to the TxQueue
+		 */
+		list_del_init(&entry->node);
+		list_add_tail(&entry->node, &drawctxt->hw_fence_inflight_list);
+	}
+}
+
+/* Size in below functions are in unit of dwords */
+static int gen7_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 queue_idx,
+	u32 *msg, u32 size_bytes, struct kgsl_drawobj_cmd *cmdobj, struct adreno_submit_time *time)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	u32 *queue;
+	u32 i, write, empty_space;
+	u32 size_dwords = size_bytes >> 2;
+	u32 align_size = ALIGN(size_dwords, SZ_4);
+	u32 id = MSG_HDR_GET_ID(*msg);
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED || !IS_ALIGNED(size_bytes, sizeof(u32)))
+		return -EINVAL;
+
+	queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx);
+
+	empty_space = (hdr->write_index >= hdr->read_index) ?
+			(hdr->queue_size - (hdr->write_index - hdr->read_index))
+			: (hdr->read_index - hdr->write_index);
+
+	if (empty_space <= align_size)
+		return -ENOSPC;
+
+	write = hdr->write_index;
+
+	for (i = 0; i < size_dwords; i++) {
+		queue[write] = msg[i];
+		write = (write + 1) % hdr->queue_size;
+	}
+
+	/* Cookify any non used data at the end of the write buffer */
+	for (; i < align_size; i++) {
+		queue[write] = 0xfafafafa;
+		write = (write + 1) % hdr->queue_size;
+	}
+
+	/* Ensure packet is written out before proceeding */
+	wmb();
+
+	if (!cmdobj)
+		goto done;
+
+	gen7_add_profile_events(adreno_dev, cmdobj, time);
+
+	/*
+	 * Put the profiling information in the user profiling buffer.
+	 * The hfi_update_write_idx below has a wmb() before the actual
+	 * write index update to ensure that the GMU does not see the
+	 * packet before the profile data is written out.
+	 */
+	adreno_profile_submit_time(time);
+
+done:
+	trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg));
+
+	hfi_update_write_idx(&hdr->write_index, write);
+
+	return 0;
+}
+
+int gen7_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj)
+{
+	int ret = 0;
+	u32 cmd_sizebytes, seqnum;
+	struct kgsl_drawobj_cmd *cmdobj = NULL;
+	struct hfi_submit_cmd *cmd;
+	struct adreno_submit_time time = {0};
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	static void *cmdbuf;
+
+	if (cmdbuf == NULL) {
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		cmdbuf = devm_kzalloc(&device->pdev->dev, HFI_MAX_MSG_SIZE,
+				GFP_KERNEL);
+		if (!cmdbuf)
+			return -ENOMEM;
+	}
+
+	ret = hfi_context_register(adreno_dev, drawobj->context);
+	if (ret)
+		return ret;
+
+	if ((drawobj->type & SYNCOBJ_TYPE) != 0)
+		return _submit_hw_fence(adreno_dev, drawobj, cmdbuf);
+
+	cmdobj = CMDOBJ(drawobj);
+
+	/*
+	 * If the MARKER object is retired, it doesn't need to be dispatched to GMU. Simply trigger
+	 * any pending fences that are less than/equal to this object's timestamp.
+	 */
+	if (test_bit(CMDOBJ_MARKER_EXPIRED, &cmdobj->priv)) {
+		spin_lock(&drawctxt->lock);
+		process_hw_fence_queue(adreno_dev, drawctxt, drawobj->timestamp);
+		spin_unlock(&drawctxt->lock);
+		return 0;
+	}
+
+	/* Add a *issue_ib struct for each IB */
+	if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS ||
+		test_bit(CMDOBJ_SKIP, &cmdobj->priv))
+		cmd_sizebytes = sizeof(*cmd);
+	else
+		cmd_sizebytes = sizeof(*cmd) +
+			(sizeof(struct hfi_issue_ib) * cmdobj->numibs);
+
+	if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE))
+		return -EMSGSIZE;
+
+	memset(cmdbuf, 0x0, cmd_sizebytes);
+
+	cmd = cmdbuf;
+
+	cmd->ctxt_id = drawobj->context->id;
+	cmd->flags = HFI_CTXT_FLAG_NOTIFY;
+	if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME)
+		cmd->flags |= CMDBATCH_EOF;
+
+	cmd->ts = drawobj->timestamp;
+
+	if (test_bit(CMDOBJ_SKIP, &cmdobj->priv))
+		goto skipib;
+
+	populate_ibs(adreno_dev, cmd, cmdobj);
+
+	if ((drawobj->flags & KGSL_DRAWOBJ_PROFILING) &&
+		cmdobj->profiling_buf_entry) {
+
+		time.drawobj = drawobj;
+
+		cmd->profile_gpuaddr_lo =
+			lower_32_bits(cmdobj->profiling_buffer_gpuaddr);
+		cmd->profile_gpuaddr_hi =
+			upper_32_bits(cmdobj->profiling_buffer_gpuaddr);
+
+		/* Indicate to GMU to do user profiling for this submission */
+		cmd->flags |= CMDBATCH_PROFILING;
+	}
+
+skipib:
+	adreno_drawobj_set_constraint(KGSL_DEVICE(adreno_dev), drawobj);
+
+	seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum);
+	cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD);
+	cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2);
+
+	if (adreno_hwsched_context_queue_enabled(adreno_dev))
+		ret = gen7_gmu_context_queue_write(adreno_dev,
+			&drawctxt->gmu_context_queue, (u32 *)cmd, cmd_sizebytes, drawobj, &time);
+	else
+		ret = gen7_hfi_dispatch_queue_write(adreno_dev,
+			HFI_DSP_ID_0 + drawobj->context->gmu_dispatch_queue,
+			(u32 *)cmd, cmd_sizebytes, cmdobj, &time);
+	if (ret)
+		return ret;
+
+	/* Send interrupt to GMU to receive the message */
+	gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN7_GMU_HOST2GMU_INTR_SET,
+		DISPQ_IRQ_BIT(get_irq_bit(adreno_dev, drawobj)));
+
+	spin_lock(&drawctxt->lock);
+	process_hw_fence_queue(adreno_dev, drawctxt, drawobj->timestamp);
+	/*
+	 * We need to update the internal timestamp while holding the drawctxt lock since we have to
+	 * check it in the hardware fence creation path, where we are not taking the device mutex.
+	 */
+	drawctxt->internal_timestamp = drawobj->timestamp;
+	spin_unlock(&drawctxt->lock);
+
+	return 0;
+}
+
+int gen7_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct hfi_submit_cmd *cmd;
+	struct kgsl_memobj_node *ib;
+	u32 cmd_sizebytes;
+	int ret;
+	static bool active;
+
+	if (adreno_gpu_halt(adreno_dev) || adreno_hwsched_gpu_fault(adreno_dev))
+		return -EBUSY;
+
+	if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) {
+		cmdobj->numibs = 0;
+	} else {
+		list_for_each_entry(ib, &cmdobj->cmdlist, node)
+			cmdobj->numibs++;
+	}
+
+	if (cmdobj->numibs > HWSCHED_MAX_IBS)
+		return -EINVAL;
+
+	if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS)
+		cmd_sizebytes = sizeof(*cmd);
+	else
+		cmd_sizebytes = sizeof(*cmd) +
+			(sizeof(struct hfi_issue_ib) * cmdobj->numibs);
+
+	if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE))
+		return -EMSGSIZE;
+
+	cmd = kzalloc(cmd_sizebytes, GFP_KERNEL);
+	if (cmd == NULL)
+		return -ENOMEM;
+
+	if (test_bit(CMDOBJ_RECURRING_START, &cmdobj->priv)) {
+		if (!active) {
+			ret = adreno_active_count_get(adreno_dev);
+			if (ret) {
+				kfree(cmd);
+				return ret;
+			}
+			active = true;
+		}
+		cmd->flags |= CMDBATCH_RECURRING_START;
+		populate_ibs(adreno_dev, cmd, cmdobj);
+	} else
+		cmd->flags |= CMDBATCH_RECURRING_STOP;
+
+	cmd->ctxt_id = drawobj->context->id;
+
+	ret = hfi_context_register(adreno_dev, drawobj->context);
+	if (ret) {
+		adreno_active_count_put(adreno_dev);
+		active = false;
+		kfree(cmd);
+		return ret;
+	}
+
+	cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_RECURRING_CMD, HFI_MSG_CMD);
+
+	ret = gen7_hfi_send_cmd_async(adreno_dev, cmd, sizeof(*cmd));
+
+	kfree(cmd);
+
+	if (ret) {
+		adreno_active_count_put(adreno_dev);
+		active = false;
+		return ret;
+	}
+
+	if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) {
+		adreno_hwsched_retire_cmdobj(hwsched, hwsched->recurring_cmdobj);
+		del_timer_sync(&hwsched->lsr_timer);
+		hwsched->recurring_cmdobj = NULL;
+		if (active)
+			adreno_active_count_put(adreno_dev);
+		active = false;
+		return ret;
+	}
+
+	hwsched->recurring_cmdobj = cmdobj;
+	/* Star LSR timer for power stats collection */
+	mod_timer(&hwsched->lsr_timer, jiffies + msecs_to_jiffies(10));
+	return ret;
+}
+
+void gen7_trigger_hw_fence_cpu(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry)
+{
+	int ret = msm_hw_fence_update_txq(adreno_dev->hwsched.hw_fence.handle,
+			entry->cmd.hash_index, 0, 0);
+
+	if (ret) {
+		dev_err_ratelimited(adreno_dev->dev.dev,
+			"Failed to trigger hw fence via cpu: ctx:%d ts:%d ret:%d\n",
+			entry->drawctxt->base.id, (u32)entry->cmd.ts, ret);
+		return;
+	}
+
+	msm_hw_fence_trigger_signal(adreno_dev->hwsched.hw_fence.handle, IPCC_CLIENT_GPU,
+		IPCC_CLIENT_APSS, 0);
+}
+
+/* We don't want to unnecessarily wake the GMU to trigger hardware fences */
+static void drain_context_hw_fence_cpu(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_hw_fence_entry *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) {
+
+		gen7_trigger_hw_fence_cpu(adreno_dev, entry);
+
+		gen7_remove_hw_fence_entry(adreno_dev, entry);
+	}
+}
+
+int gen7_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_hw_fence_entry *entry, *tmp;
+	int ret = 0;
+
+	/* We don't need the drawctxt lock here as this context has already been invalidated */
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) {
+
+		/* Any error here is fatal */
+		ret = gen7_send_hw_fence_hfi_wait_ack(adreno_dev, entry,
+			HW_FENCE_FLAG_SKIP_MEMSTORE);
+		if (ret)
+			break;
+
+		gen7_remove_hw_fence_entry(adreno_dev, entry);
+	}
+
+	return ret;
+}
+
+static void trigger_context_unregister_fault(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev));
+
+	/* Make sure we send all fences from this context to the TxQueue after recovery */
+	move_detached_context_hardware_fences(adreno_dev, drawctxt);
+	gen7_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+}
+
+static int send_context_unregister_hfi(struct adreno_device *adreno_dev,
+	struct kgsl_context *context, u32 ts)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct pending_cmd pending_ack;
+	struct hfi_unregister_ctxt_cmd cmd;
+	u32 seqnum;
+	int ret;
+
+	/* Only send HFI if device is not in SLUMBER */
+	if (!context->gmu_registered ||
+		!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) {
+		drain_context_hw_fence_cpu(adreno_dev, drawctxt);
+		return 0;
+	}
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_UNREGISTER_CONTEXT);
+	if (ret)
+		return ret;
+
+	cmd.ctxt_id = context->id,
+	cmd.ts = ts,
+
+	/*
+	 * Although we know device is powered on, we can still enter SLUMBER
+	 * because the wait for ack below is done without holding the mutex. So
+	 * take an active count before releasing the mutex so as to avoid a
+	 * concurrent SLUMBER sequence while GMU is un-registering this context.
+	 */
+	ret = gen7_hwsched_active_count_get(adreno_dev);
+	if (ret) {
+		trigger_context_unregister_fault(adreno_dev, drawctxt);
+		return ret;
+	}
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2);
+	add_waiter(hfi, cmd.hdr, &pending_ack);
+
+	ret = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd));
+	if (ret) {
+		trigger_context_unregister_fault(adreno_dev, drawctxt);
+		goto done;
+	}
+
+	ret = adreno_hwsched_ctxt_unregister_wait_completion(adreno_dev,
+		&gmu->pdev->dev, &pending_ack, gen7_hwsched_process_msgq, &cmd);
+	if (ret) {
+		trigger_context_unregister_fault(adreno_dev, drawctxt);
+		goto done;
+	}
+
+	ret = check_detached_context_hardware_fences(adreno_dev, drawctxt);
+	if (!ret)
+		ret = check_ack_failure(adreno_dev, &pending_ack);
+
+done:
+	gen7_hwsched_active_count_put(adreno_dev);
+	del_waiter(hfi, &pending_ack);
+
+	return ret;
+}
+
+void gen7_hwsched_context_detach(struct adreno_context *drawctxt)
+{
+	struct kgsl_context *context = &drawctxt->base;
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret = 0;
+
+	mutex_lock(&device->mutex);
+
+	ret = send_context_unregister_hfi(adreno_dev, context,
+		drawctxt->internal_timestamp);
+
+	if (!ret) {
+		kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+			drawctxt->timestamp);
+
+		kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+			drawctxt->timestamp);
+
+		adreno_profile_process_results(adreno_dev);
+	}
+
+	context->gmu_registered = false;
+
+	mutex_unlock(&device->mutex);
+}
+
+u32 gen7_hwsched_preempt_count_get(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (device->state != KGSL_STATE_ACTIVE)
+		return 0;
+
+	return gen7_hwsched_hfi_get_value(adreno_dev, HFI_VALUE_PREEMPT_COUNT);
+}
+
+void gen7_hwsched_context_destroy(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	if (!adreno_hwsched_context_queue_enabled(adreno_dev))
+		return;
+
+	if (drawctxt->gmu_context_queue.gmuaddr)
+		gen7_free_gmu_block(to_gen7_gmu(adreno_dev), &drawctxt->gmu_context_queue);
+
+	if (drawctxt->gmu_hw_fence_queue.gmuaddr)
+		gen7_free_gmu_block(to_gen7_gmu(adreno_dev), &drawctxt->gmu_hw_fence_queue);
+}
+
+static int register_global_ctxt(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct hfi_register_ctxt_cmd rcmd = {0};
+	struct hfi_context_pointers_cmd pcmd = {0};
+	int ret;
+
+	if (hwsched->global_ctxt_gmu_registered)
+		return 0;
+
+	if (adreno_hwsched_context_queue_enabled(adreno_dev) && !hwsched->global_ctxtq.hostptr) {
+		struct gmu_context_queue_header *hdr;
+
+		ret = gen7_alloc_gmu_kernel_block(to_gen7_gmu(adreno_dev), &hwsched->global_ctxtq,
+			SZ_4K, GMU_NONCACHED_KERNEL, IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV);
+		if (ret) {
+			memset(&hwsched->global_ctxtq, 0x0, sizeof(hwsched->global_ctxtq));
+			return ret;
+		}
+
+		hdr = hwsched->global_ctxtq.hostptr;
+		hdr->start_addr = hwsched->global_ctxtq.gmuaddr + sizeof(*hdr);
+		hdr->queue_size = (hwsched->global_ctxtq.size - sizeof(*hdr)) >> 2;
+	}
+
+	ret = CMD_MSG_HDR(rcmd, H2F_MSG_REGISTER_CONTEXT);
+	if (ret)
+		return ret;
+
+	rcmd.ctxt_id = KGSL_GLOBAL_CTXT_ID;
+	rcmd.flags = (KGSL_CONTEXT_PRIORITY_HIGH << KGSL_CONTEXT_PRIORITY_SHIFT);
+
+	ret = gen7_hfi_send_cmd_async(adreno_dev, &rcmd, sizeof(rcmd));
+	if (ret)
+		return ret;
+
+	ret = CMD_MSG_HDR(pcmd, H2F_MSG_CONTEXT_POINTERS);
+	if (ret)
+		return ret;
+
+	pcmd.ctxt_id = KGSL_GLOBAL_CTXT_ID;
+	pcmd.sop_addr = MEMSTORE_ID_GPU_ADDR(device, KGSL_GLOBAL_CTXT_ID, soptimestamp);
+	pcmd.eop_addr = MEMSTORE_ID_GPU_ADDR(device, KGSL_GLOBAL_CTXT_ID, eoptimestamp);
+
+	if (adreno_hwsched_context_queue_enabled(adreno_dev))
+		pcmd.gmu_context_queue_addr = hwsched->global_ctxtq.gmuaddr;
+
+	ret = gen7_hfi_send_cmd_async(adreno_dev, &pcmd, sizeof(pcmd));
+	if (!ret)
+		hwsched->global_ctxt_gmu_registered = true;
+
+	return ret;
+}
+
+static int submit_global_ctxt_cmd(struct adreno_device *adreno_dev, u64 gpuaddr, u32 size)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct {
+		struct hfi_submit_cmd submit_cmd;
+		struct hfi_issue_ib issue_ib;
+	} cmd = {0};
+	u32 seqnum, cmd_size = sizeof(cmd);
+	static u32 ts;
+	int ret;
+
+	cmd.submit_cmd.ctxt_id = KGSL_GLOBAL_CTXT_ID;
+	cmd.submit_cmd.ts = ++ts;
+	cmd.submit_cmd.numibs = 1;
+
+	cmd.issue_ib.addr = gpuaddr;
+	cmd.issue_ib.size = size;
+
+	seqnum = atomic_inc_return(&hwsched->submission_seqnum);
+	cmd.submit_cmd.hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD);
+	cmd.submit_cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.submit_cmd.hdr, seqnum, cmd_size >> 2);
+
+	if (adreno_hwsched_context_queue_enabled(adreno_dev))
+		ret = gen7_gmu_context_queue_write(adreno_dev,
+			  &hwsched->global_ctxtq, (u32 *)&cmd, cmd_size, NULL, NULL);
+	else
+		ret = gen7_hfi_dispatch_queue_write(adreno_dev, HFI_DSP_ID_0,
+			(u32 *)&cmd, cmd_size, NULL, NULL);
+
+	/* Send interrupt to GMU to receive the message */
+	if (!ret)
+		gmu_core_regwrite(device, GEN7_GMU_HOST2GMU_INTR_SET, DISPQ_IRQ_BIT(0));
+
+	return ret;
+}
+
+int gen7_hwsched_counter_inline_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 val, *cmds, count = 0;
+	int ret;
+
+	ret = register_global_ctxt(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = adreno_allocate_global(device, &hfi->perfctr_scratch,
+		PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "perfctr_scratch");
+	if (ret)
+		goto err;
+
+	if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE)
+		gen7_perfcounter_update(adreno_dev, reg, false,
+				FIELD_PREP(GENMASK(13, 12), PIPE_NONE), group->flags);
+
+	cmds = hfi->perfctr_scratch->hostptr;
+
+	cmds[count++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+	cmds[count++] = cp_type4_packet(reg->select, 1);
+	cmds[count++] = countable;
+
+	ret = submit_global_ctxt_cmd(adreno_dev, hfi->perfctr_scratch->gpuaddr, count << 2);
+	if (ret)
+		goto err;
+
+	/* Wait till the register is programmed with the countable */
+	ret = kgsl_regmap_read_poll_timeout(&device->regmap, reg->select, val,
+				val == countable, 100, ADRENO_IDLE_TIMEOUT);
+	if (!ret) {
+		reg->value = 0;
+		return ret;
+	}
+
+err:
+	dev_err(device->dev, "Perfcounter %s/%u/%u start via commands failed\n",
+			group->name, counter, countable);
+
+	return ret;
+}
+
+int gen7_hwsched_disable_hw_fence_throttle(struct adreno_device *adreno_dev)
+{
+	struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev);
+	struct adreno_context *drawctxt = NULL;
+	u32 ts = 0;
+	int ret = 0;
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags))
+		return 0;
+
+	spin_lock(&hfi->hw_fence.lock);
+
+	drawctxt = hfi->hw_fence.defer_drawctxt;
+	ts = hfi->hw_fence.defer_ts;
+
+	spin_unlock(&hfi->hw_fence.lock);
+
+	if (!drawctxt)
+		goto done;
+
+	ret = process_hw_fence_deferred_ctxt(adreno_dev, drawctxt, ts);
+
+	kgsl_context_put(&drawctxt->base);
+	gen7_hwsched_active_count_put(adreno_dev);
+
+done:
+	_disable_hw_fence_throttle(adreno_dev, true);
+
+	return ret;
+}

+ 363 - 0
qcom/opensource/graphics-kernel/adreno_gen7_hwsched_hfi.h

@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_GEN7_HWSCHED_HFI_H_
+#define _ADRENO_GEN7_HWSCHED_HFI_H_
+
+/* Maximum number of IBs in a submission */
+#define HWSCHED_MAX_NUMIBS \
+	((HFI_MAX_MSG_SIZE - offsetof(struct hfi_issue_cmd_cmd, ibs)) \
+		/ sizeof(struct hfi_issue_ib))
+
+/*
+ * This is used to put userspace threads to sleep when hardware fence unack count reaches a
+ * threshold. This bit is cleared in two scenarios:
+ * 1. If the hardware fence unack count drops to a desired threshold.
+ * 2. If there is a GMU/GPU fault. Because we don't want the threads to keep sleeping through fault
+ *    recovery, which can easily take 100s of milliseconds to complete.
+ */
+#define GEN7_HWSCHED_HW_FENCE_SLEEP_BIT	0x0
+
+/*
+ * This is used to avoid creating any more hardware fences until the hardware fence unack count
+ * drops to a desired threshold. This bit is required in cases where GEN7_HWSCHED_HW_FENCE_SLEEP_BIT
+ * will be cleared, but we still want to avoid creating any more hardware fences. For example, if
+ * hardware fence unack count reaches a maximum threshold, both GEN7_HWSCHED_HW_FENCE_SLEEP_BIT and
+ * GEN7_HWSCHED_HW_FENCE_MAX_BIT will be set. Say, a GMU/GPU fault happens and
+ * GEN7_HWSCHED_HW_FENCE_SLEEP_BIT will be cleared to wake up any sleeping threads. But,
+ * GEN7_HWSCHED_HW_FENCE_MAX_BIT will remain set to avoid creating any new hardware fences until
+ * recovery is complete and deferred drawctxt (if any) is handled.
+ */
+#define GEN7_HWSCHED_HW_FENCE_MAX_BIT	0x1
+
+/*
+ * This is used to avoid creating any more hardware fences until concurrent reset/recovery completes
+ */
+#define GEN7_HWSCHED_HW_FENCE_ABORT_BIT 0x2
+
+struct gen7_hwsched_hfi {
+	struct hfi_mem_alloc_entry mem_alloc_table[32];
+	u32 mem_alloc_entries;
+	/** @irq_mask: Store the hfi interrupt mask */
+	u32 irq_mask;
+	/** @msglock: To protect the list of un-ACKed hfi packets */
+	rwlock_t msglock;
+	/** @msglist: List of un-ACKed hfi packets */
+	struct list_head msglist;
+	/** @f2h_task: Task for processing gmu fw to host packets */
+	struct task_struct *f2h_task;
+	/** @f2h_wq: Waitqueue for the f2h_task */
+	wait_queue_head_t f2h_wq;
+	/** @big_ib: GMU buffer to hold big IBs */
+	struct kgsl_memdesc *big_ib;
+	/** @big_ib_recurring: GMU buffer to hold big recurring IBs */
+	struct kgsl_memdesc *big_ib_recurring;
+	/** @perfctr_scratch: Buffer to hold perfcounter PM4 commands */
+	struct kgsl_memdesc *perfctr_scratch;
+	/** @msg_mutex: Mutex for accessing the msgq */
+	struct mutex msgq_mutex;
+	struct {
+		/** @lock: Spinlock for managing hardware fences */
+		spinlock_t lock;
+		/**
+		 * @unack_count: Number of hardware fences sent to GMU but haven't yet been ack'd
+		 * by GMU
+		 */
+		u32 unack_count;
+		/**
+		 * @unack_wq: Waitqueue to wait on till number of unacked hardware fences drops to
+		 * a desired threshold
+		 */
+		wait_queue_head_t unack_wq;
+		/**
+		 * @defer_drawctxt: Drawctxt to send hardware fences from as soon as unacked
+		 * hardware fences drops to a desired threshold
+		 */
+		struct adreno_context *defer_drawctxt;
+		/**
+		 * @defer_ts: The timestamp of the hardware fence which got deferred
+		 */
+		u32 defer_ts;
+		/**
+		 * @flags: Flags to control the creation of new hardware fences
+		 */
+		unsigned long flags;
+		/** @seqnum: Sequence number for hardware fence packet header */
+		atomic_t seqnum;
+	} hw_fence;
+	/**
+	 * @hw_fence_timer: Timer to trigger fault if unack'd hardware fence count does'nt drop
+	 * to a desired threshold in given amount of time
+	 */
+	struct timer_list hw_fence_timer;
+	/**
+	 * @hw_fence_ws: Work struct that gets scheduled when hw_fence_timer expires
+	 */
+	struct work_struct hw_fence_ws;
+	/** @detached_hw_fences_list: List of hardware fences belonging to detached contexts */
+	struct list_head detached_hw_fence_list;
+	/** @defer_hw_fence_work: The work structure to send deferred hardware fences to GMU */
+	struct kthread_work defer_hw_fence_work;
+};
+
+struct kgsl_drawobj_cmd;
+
+/**
+ * gen7_hwsched_hfi_probe - Probe hwsched hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen7_hwsched_hfi_probe(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_hfi_remove - Release hwsched hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ */
+void gen7_hwsched_hfi_remove(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_hfi_init - Initialize hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to initialize hfi resources
+ * once before the very first gmu boot
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen7_hwsched_hfi_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_hfi_start - Start hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * Send the various hfi packets before booting the gpu
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen7_hwsched_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_hfi_stop - Stop the hfi resources
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function does the hfi cleanup when powering down the gmu
+ */
+void gen7_hwsched_hfi_stop(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwched_cp_init - Send CP_INIT via HFI
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to send CP INIT packet and bring
+ * GPU out of secure mode using hfi raw packets.
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen7_hwsched_cp_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_counter_inline_enable - Configure a performance counter for a countable
+ * @adreno_dev -  Adreno device to configure
+ * @group - Desired performance counter group
+ * @counter - Desired performance counter in the group
+ * @countable - Desired countable
+ *
+ * Physically set up a counter within a group with the desired countable
+ * Return 0 on success or negative error on failure.
+ */
+int gen7_hwsched_counter_inline_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable);
+
+/**
+ * gen7_hfi_send_cmd_async - Send an hfi packet
+ * @adreno_dev: Pointer to adreno device structure
+ * @data: Data to be sent in the hfi packet
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Send data in the form of an HFI packet to gmu and wait for
+ * it's ack asynchronously
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen7_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 size_bytes);
+
+/**
+ * gen7_hwsched_submit_drawobj - Dispatch IBs to dispatch queues
+ * @adreno_dev: Pointer to adreno device structure
+ * @drawobj: The command draw object which needs to be submitted
+ *
+ * This function is used to register the context if needed and submit
+ * IBs to the hfi dispatch queues.
+
+ * Return: 0 on success and negative error on failure
+ */
+int gen7_hwsched_submit_drawobj(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj *drawobj);
+
+/**
+ * gen7_hwsched_context_detach - Unregister a context with GMU
+ * @drawctxt: Pointer to the adreno context
+ *
+ * This function sends context unregister HFI and waits for the ack
+ * to ensure all submissions from this context have retired
+ */
+void gen7_hwsched_context_detach(struct adreno_context *drawctxt);
+
+/* Helper function to get to gen7 hwsched hfi device from adreno device */
+struct gen7_hwsched_hfi *to_gen7_hwsched_hfi(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_preempt_count_get - Get preemption count from GMU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * This function sends a GET_VALUE HFI packet to get the number of
+ * preemptions completed since last SLUMBER exit.
+ *
+ * Return: Preemption count
+ */
+u32 gen7_hwsched_preempt_count_get(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_lpac_cp_init - Send CP_INIT to LPAC via HFI
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to send CP INIT packet to LPAC and
+ * enable submission to LPAC queue.
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen7_hwsched_lpac_cp_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_send_lpac_feature_ctrl - Send the lpac feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_lpac_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_context_destroy - Destroy any hwsched related resources during context destruction
+ * @adreno_dev: Pointer to adreno device
+ * @drawctxt: Pointer to the adreno context
+ *
+ * This functions destroys any hwsched related resources when this context is destroyed
+ */
+void gen7_hwsched_context_destroy(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt);
+
+/**
+ * gen7_hwsched_hfi_get_value - Send GET_VALUE packet to GMU to get the value of a property
+ * @adreno_dev: Pointer to adreno device
+ * @prop: property to get from GMU
+ *
+ * This functions sends GET_VALUE HFI packet to query value of a property
+ *
+ * Return: On success, return the value in the GMU response. On failure, return 0
+ */
+u32 gen7_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop);
+
+/**
+ * gen7_send_hw_fence_hfi_wait_ack - Send hardware fence info to GMU
+ * @adreno_dev: Pointer to adreno device
+ * @entry: Pointer to the adreno hardware fence entry
+ * @flags: Flags for this hardware fence
+ *
+ * Send the hardware fence info to the GMU and wait for the ack
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry, u64 flags);
+
+/**
+ * gen7_hwsched_create_hw_fence - Create a hardware fence
+ * @adreno_dev: Pointer to adreno device
+ * @kfence: Pointer to the kgsl fence
+ *
+ * Create a hardware fence, set up hardware fence info and send it to GMU if required
+ */
+void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev,
+	struct kgsl_sync_fence *kfence);
+
+/**
+ * gen7_hwsched_drain_context_hw_fences - Drain context's hardware fences via GMU
+ * @adreno_dev: Pointer to adreno device
+ * @drawctxt: Pointer to the adreno context which is to be flushed
+ *
+ * Trigger hardware fences that were never dispatched to GMU
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen7_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt);
+
+/**
+ * gen7_hwsched_check_context_inflight_hw_fences - Check whether all hardware fences
+ * from a context have been sent to the TxQueue or not
+ * @adreno_dev: Pointer to adreno device
+ * @drawctxt: Pointer to the adreno context which is to be flushed
+ *
+ * Check if all hardware fences from this context have been sent to the
+ * TxQueue. If not, log an error and return error code.
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen7_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt);
+
+/**
+ * gen7_remove_hw_fence_entry - Remove hardware fence entry
+ * @adreno_dev: pointer to the adreno device
+ * @entry: Pointer to the hardware fence entry
+ */
+void gen7_remove_hw_fence_entry(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry);
+
+/**
+ * gen7_trigger_hw_fence_cpu - Trigger hardware fence from cpu
+ * @adreno_dev: pointer to the adreno device
+ * @fence: hardware fence entry to be triggered
+ *
+ * Trigger the hardware fence by sending it to GMU's TxQueue and raise the
+ * interrupt from GMU to APPS
+ */
+void gen7_trigger_hw_fence_cpu(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *fence);
+
+/**
+ * gen7_hwsched_disable_hw_fence_throttle - Disable hardware fence throttling after reset
+ * @adreno_dev: pointer to the adreno device
+ *
+ * After device reset, clear hardware fence related data structures and send any hardware fences
+ * that got deferred (prior to reset) and re-open the gates for hardware fence creation
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen7_hwsched_disable_hw_fence_throttle(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_process_msgq - Process msgq
+ * @adreno_dev: pointer to the adreno device
+ *
+ * This function grabs the msgq mutex and processes msgq for any outstanding hfi packets
+ */
+void gen7_hwsched_process_msgq(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_boot_gpu - Send the command to boot GPU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * Send the hfi to boot GPU, and check the ack, incase of a failure
+ * get a snapshot and capture registers of interest.
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen7_hwsched_boot_gpu(struct adreno_device *adreno_dev);
+
+#endif

+ 1216 - 0
qcom/opensource/graphics-kernel/adreno_gen7_perfcounter.c

@@ -0,0 +1,1216 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_gen7_hwsched_hfi.h"
+#include "adreno_perfcounter.h"
+#include "adreno_pm4types.h"
+#include "kgsl_device.h"
+
+/*
+ * For registers that do not get restored on power cycle, read the value and add
+ * the stored shadow value
+ */
+static u64 gen7_counter_read_norestore(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 hi, lo;
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	return ((((u64) hi) << 32) | lo) + reg->value;
+}
+
+static int gen7_counter_br_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	int ret = 0;
+	u32 val = 0;
+
+	kgsl_regread(device, GEN7_CP_APERTURE_CNTL_HOST, &val);
+	kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(13, 12), PIPE_BR));
+
+	ret = gen7_perfcounter_update(adreno_dev, reg, true,
+					FIELD_PREP(GENMASK(13, 12), PIPE_BR), group->flags);
+	kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, val);
+
+	/* Ensure all writes are posted before accessing the piped register */
+	mb();
+
+	if (!ret)
+		reg->value = 0;
+
+	return ret;
+}
+
+static int gen7_counter_bv_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	int ret = 0;
+	u32 val = 0;
+
+	kgsl_regread(device, GEN7_CP_APERTURE_CNTL_HOST, &val);
+	kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(13, 12), PIPE_BV));
+
+	ret = gen7_perfcounter_update(adreno_dev, reg, true,
+					FIELD_PREP(GENMASK(13, 12), PIPE_BV), group->flags);
+	kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, val);
+
+	/* Ensure all writes are posted before accessing the piped register */
+	mb();
+
+	if (!ret)
+		reg->value = 0;
+
+	return ret;
+}
+
+static int gen7_counter_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	int ret = 0;
+
+	ret = gen7_perfcounter_update(adreno_dev, reg, true,
+					FIELD_PREP(GENMASK(13, 12), PIPE_NONE), group->flags);
+	if (!ret)
+		reg->value = 0;
+
+	return ret;
+}
+
+static int gen7_hwsched_counter_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable)
+{
+	if (!(KGSL_DEVICE(adreno_dev)->state == KGSL_STATE_ACTIVE))
+		return gen7_counter_enable(adreno_dev, group, counter, countable);
+
+	return gen7_hwsched_counter_inline_enable(adreno_dev, group, counter, countable);
+}
+
+/* This function is specific to sw-scheduler and not applicable for hw-scheduler */
+static int gen7_counter_inline_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0];
+	u32 cmds[3];
+	int ret;
+
+	/* Fallback when we reach here from GPU initialization sequence */
+	if (!(device->state == KGSL_STATE_ACTIVE))
+		return gen7_counter_enable(adreno_dev, group, counter,
+			countable);
+
+	gen7_perfcounter_update(adreno_dev, reg, false,
+				FIELD_PREP(GENMASK(13, 12), PIPE_NONE), group->flags);
+
+	cmds[0] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+	cmds[1] = cp_type4_packet(reg->select, 1);
+	cmds[2] = countable;
+
+	/* submit to highest priority RB always */
+	ret = gen7_ringbuffer_addcmds(adreno_dev, rb, NULL,
+		F_NOTPROTECTED, cmds, 3, 0, NULL);
+	if (ret)
+		return ret;
+
+	/*
+	 * schedule dispatcher to make sure rb[0] is run, because
+	 * if the current RB is not rb[0] and gpu is idle then
+	 * rb[0] will not get scheduled to run
+	 */
+	if (adreno_dev->cur_rb != rb)
+		adreno_dispatcher_schedule(device);
+
+	/* wait for the above commands submitted to complete */
+	ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp,
+		ADRENO_IDLE_TIMEOUT);
+
+	if (ret) {
+		/*
+		 * If we were woken up because of cancelling rb events
+		 * either due to soft reset or adreno_stop, ignore the
+		 * error and return 0 here. The perfcounter is already
+		 * set up in software and it will be programmed in
+		 * hardware when we wake up or come up after soft reset,
+		 * by adreno_perfcounter_restore.
+		 */
+		if (ret == -EAGAIN)
+			ret = 0;
+		else
+			dev_err_ratelimited(device->dev,
+				     "Perfcounter %s/%u/%u start via commands failed %d\n",
+				     group->name, counter, countable, ret);
+	}
+
+	if (!ret)
+		reg->value = 0;
+
+	return ret;
+}
+
+static u64 gen7_counter_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 hi, lo;
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* These registers are restored on power resume */
+	return (((u64) hi) << 32) | lo;
+}
+
+static int gen7_counter_gbif_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	unsigned int shift = counter << 3;
+	unsigned int select = BIT(counter);
+
+	if (countable > 0xff)
+		return -EINVAL;
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_CLR, select, select);
+	kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_CLR, select, 0);
+
+	/* select the desired countable */
+	kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift);
+
+	/* enable counter */
+	kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_EN, select, select);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int gen7_counter_gbif_pwr_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	unsigned int shift = counter << 3;
+	unsigned int select = BIT(16 + counter);
+
+	if (countable > 0xff)
+		return -EINVAL;
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_CLR, select, select);
+	kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_CLR, select, 0);
+
+	/* select the desired countable */
+	kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift);
+
+	/* Enable the counter */
+	kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_EN, select, select);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int gen7_counter_alwayson_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	return 0;
+}
+
+static u64 gen7_counter_alwayson_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	return gpudev->read_alwayson(adreno_dev) + reg->value;
+}
+
+static void gen7_write_gmu_counter_enable(struct kgsl_device *device,
+		struct adreno_perfcount_register *reg, u32 bit, u32 countable)
+{
+	kgsl_regrmw(device, reg->select, 0xff << bit, countable << bit);
+}
+
+static int gen7_counter_gmu_xoclk_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	if (countable > 0xff)
+		return -EINVAL;
+
+	/*
+	 * Counters [0:3] are in select 0 bit offsets 0, 8, 16 and 24
+	 * Counters [4:5] are in select 1 bit offset 0, 8
+	 * Counters [6:9] are in select 2 bit offset 0, 8, 16 and 24
+	 * Counters [10] is in select 3 bit offset 0
+	 */
+
+	if (counter == 4 || counter == 5)
+		counter -= 4;
+	else if (counter >= 6 && counter <= 9)
+		counter -= 6;
+	else if (counter == 10)
+		counter = 0;
+
+	gen7_write_gmu_counter_enable(device, reg, counter * 8, countable);
+
+	reg->value = 0;
+
+	kgsl_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
+
+	return 0;
+}
+
+static int gen7_counter_gmu_gmuclk_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	if (countable > 0xff)
+		return -EINVAL;
+
+	/*
+	 * The two counters are stuck into GMU_CX_GMU_POWER_COUNTER_SELECT_1
+	 * at bit offset 16 and 24
+	 */
+	gen7_write_gmu_counter_enable(device, reg,
+		16 + (counter * 8), countable);
+
+	kgsl_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int gen7_counter_gmu_perf_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	if (countable > 0xff)
+		return -EINVAL;
+
+	/*
+	 * Counters [0:3] are in select 1 bit offsets 0, 8, 16 and 24
+	 * Counters [4:5] are in select 2 bit offset 0, 8
+	 */
+
+	if (counter >= 4)
+		counter -= 4;
+
+	gen7_write_gmu_counter_enable(device, reg, counter * 8, countable);
+
+	kgsl_regwrite(device, GEN7_GMU_CX_GMU_PERF_COUNTER_ENABLE, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static struct adreno_perfcount_register gen7_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_0_LO,
+		GEN7_RBBM_PERFCTR_CP_0_HI, -1, GEN7_CP_PERFCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_1_LO,
+		GEN7_RBBM_PERFCTR_CP_1_HI, -1, GEN7_CP_PERFCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_2_LO,
+		GEN7_RBBM_PERFCTR_CP_2_HI, -1, GEN7_CP_PERFCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_3_LO,
+		GEN7_RBBM_PERFCTR_CP_3_HI, -1, GEN7_CP_PERFCTR_CP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_4_LO,
+		GEN7_RBBM_PERFCTR_CP_4_HI, -1, GEN7_CP_PERFCTR_CP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_5_LO,
+		GEN7_RBBM_PERFCTR_CP_5_HI, -1, GEN7_CP_PERFCTR_CP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_6_LO,
+		GEN7_RBBM_PERFCTR_CP_6_HI, -1, GEN7_CP_PERFCTR_CP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_7_LO,
+		GEN7_RBBM_PERFCTR_CP_7_HI, -1, GEN7_CP_PERFCTR_CP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_8_LO,
+		GEN7_RBBM_PERFCTR_CP_8_HI, -1, GEN7_CP_PERFCTR_CP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_9_LO,
+		GEN7_RBBM_PERFCTR_CP_9_HI, -1, GEN7_CP_PERFCTR_CP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_10_LO,
+		GEN7_RBBM_PERFCTR_CP_10_HI, -1, GEN7_CP_PERFCTR_CP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_11_LO,
+		GEN7_RBBM_PERFCTR_CP_11_HI, -1, GEN7_CP_PERFCTR_CP_SEL_11 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_12_LO,
+		GEN7_RBBM_PERFCTR_CP_12_HI, -1, GEN7_CP_PERFCTR_CP_SEL_12 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_13_LO,
+		GEN7_RBBM_PERFCTR_CP_13_HI, -1, GEN7_CP_PERFCTR_CP_SEL_13 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_bv_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_0_LO,
+		GEN7_RBBM_PERFCTR2_CP_0_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_1_LO,
+		GEN7_RBBM_PERFCTR2_CP_1_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_2_LO,
+		GEN7_RBBM_PERFCTR2_CP_2_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_3_LO,
+		GEN7_RBBM_PERFCTR2_CP_3_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_4_LO,
+		GEN7_RBBM_PERFCTR2_CP_4_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_5_LO,
+		GEN7_RBBM_PERFCTR2_CP_5_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_6_LO,
+		GEN7_RBBM_PERFCTR2_CP_6_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_6 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_rbbm[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RBBM_0_LO,
+		GEN7_RBBM_PERFCTR_RBBM_0_HI, -1, GEN7_RBBM_PERFCTR_RBBM_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RBBM_1_LO,
+		GEN7_RBBM_PERFCTR_RBBM_1_HI, -1, GEN7_RBBM_PERFCTR_RBBM_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RBBM_2_LO,
+		GEN7_RBBM_PERFCTR_RBBM_2_HI, -1, GEN7_RBBM_PERFCTR_RBBM_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RBBM_3_LO,
+		GEN7_RBBM_PERFCTR_RBBM_3_HI, -1, GEN7_RBBM_PERFCTR_RBBM_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_0_LO,
+		GEN7_RBBM_PERFCTR_PC_0_HI, -1, GEN7_PC_PERFCTR_PC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_1_LO,
+		GEN7_RBBM_PERFCTR_PC_1_HI, -1, GEN7_PC_PERFCTR_PC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_2_LO,
+		GEN7_RBBM_PERFCTR_PC_2_HI, -1, GEN7_PC_PERFCTR_PC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_3_LO,
+		GEN7_RBBM_PERFCTR_PC_3_HI, -1, GEN7_PC_PERFCTR_PC_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_4_LO,
+		GEN7_RBBM_PERFCTR_PC_4_HI, -1, GEN7_PC_PERFCTR_PC_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_5_LO,
+		GEN7_RBBM_PERFCTR_PC_5_HI, -1, GEN7_PC_PERFCTR_PC_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_6_LO,
+		GEN7_RBBM_PERFCTR_PC_6_HI, -1, GEN7_PC_PERFCTR_PC_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_7_LO,
+		GEN7_RBBM_PERFCTR_PC_7_HI, -1, GEN7_PC_PERFCTR_PC_SEL_7 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_bv_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_0_LO,
+		GEN7_RBBM_PERFCTR_BV_PC_0_HI, -1, GEN7_PC_PERFCTR_PC_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_1_LO,
+		GEN7_RBBM_PERFCTR_BV_PC_1_HI, -1, GEN7_PC_PERFCTR_PC_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_2_LO,
+		GEN7_RBBM_PERFCTR_BV_PC_2_HI, -1, GEN7_PC_PERFCTR_PC_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_3_LO,
+		GEN7_RBBM_PERFCTR_BV_PC_3_HI, -1, GEN7_PC_PERFCTR_PC_SEL_11 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_4_LO,
+		GEN7_RBBM_PERFCTR_BV_PC_4_HI, -1, GEN7_PC_PERFCTR_PC_SEL_12 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_5_LO,
+		GEN7_RBBM_PERFCTR_BV_PC_5_HI, -1, GEN7_PC_PERFCTR_PC_SEL_13 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_6_LO,
+		GEN7_RBBM_PERFCTR_BV_PC_6_HI, -1, GEN7_PC_PERFCTR_PC_SEL_14 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_7_LO,
+		GEN7_RBBM_PERFCTR_BV_PC_7_HI, -1, GEN7_PC_PERFCTR_PC_SEL_15 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_0_LO,
+		GEN7_RBBM_PERFCTR_VFD_0_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_1_LO,
+		GEN7_RBBM_PERFCTR_VFD_1_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_2_LO,
+		GEN7_RBBM_PERFCTR_VFD_2_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_3_LO,
+		GEN7_RBBM_PERFCTR_VFD_3_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_4_LO,
+		GEN7_RBBM_PERFCTR_VFD_4_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_5_LO,
+		GEN7_RBBM_PERFCTR_VFD_5_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_6_LO,
+		GEN7_RBBM_PERFCTR_VFD_6_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_7_LO,
+		GEN7_RBBM_PERFCTR_VFD_7_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_7 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_bv_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_0_LO,
+		GEN7_RBBM_PERFCTR_BV_VFD_0_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_1_LO,
+		GEN7_RBBM_PERFCTR_BV_VFD_1_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_2_LO,
+		GEN7_RBBM_PERFCTR_BV_VFD_2_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_3_LO,
+		GEN7_RBBM_PERFCTR_BV_VFD_3_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_11 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_4_LO,
+		GEN7_RBBM_PERFCTR_BV_VFD_4_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_12 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_5_LO,
+		GEN7_RBBM_PERFCTR_BV_VFD_5_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_13 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_6_LO,
+		GEN7_RBBM_PERFCTR_BV_VFD_6_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_14 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_7_LO,
+		GEN7_RBBM_PERFCTR_BV_VFD_7_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_15 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_0_LO,
+		GEN7_RBBM_PERFCTR_HLSQ_0_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_1_LO,
+		GEN7_RBBM_PERFCTR_HLSQ_1_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_2_LO,
+		GEN7_RBBM_PERFCTR_HLSQ_2_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_3_LO,
+		GEN7_RBBM_PERFCTR_HLSQ_3_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_4_LO,
+		GEN7_RBBM_PERFCTR_HLSQ_4_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_5_LO,
+		GEN7_RBBM_PERFCTR_HLSQ_5_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_5 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_bv_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_HLSQ_0_LO,
+		GEN7_RBBM_PERFCTR2_HLSQ_0_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_HLSQ_1_LO,
+		GEN7_RBBM_PERFCTR2_HLSQ_1_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_HLSQ_2_LO,
+		GEN7_RBBM_PERFCTR2_HLSQ_2_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_HLSQ_3_LO,
+		GEN7_RBBM_PERFCTR2_HLSQ_3_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_HLSQ_4_LO,
+		GEN7_RBBM_PERFCTR2_HLSQ_4_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_HLSQ_5_LO,
+		GEN7_RBBM_PERFCTR2_HLSQ_5_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_5 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_0_LO,
+		GEN7_RBBM_PERFCTR_VPC_0_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_1_LO,
+		GEN7_RBBM_PERFCTR_VPC_1_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_2_LO,
+		GEN7_RBBM_PERFCTR_VPC_2_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_3_LO,
+		GEN7_RBBM_PERFCTR_VPC_3_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_4_LO,
+		GEN7_RBBM_PERFCTR_VPC_4_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_5_LO,
+		GEN7_RBBM_PERFCTR_VPC_5_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_5 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_bv_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_0_LO,
+		GEN7_RBBM_PERFCTR_BV_VPC_0_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_1_LO,
+		GEN7_RBBM_PERFCTR_BV_VPC_1_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_2_LO,
+		GEN7_RBBM_PERFCTR_BV_VPC_2_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_3_LO,
+		GEN7_RBBM_PERFCTR_BV_VPC_3_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_4_LO,
+		GEN7_RBBM_PERFCTR_BV_VPC_4_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_5_LO,
+		GEN7_RBBM_PERFCTR_BV_VPC_5_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_11 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_ccu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_0_LO,
+		GEN7_RBBM_PERFCTR_CCU_0_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_1_LO,
+		GEN7_RBBM_PERFCTR_CCU_1_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_2_LO,
+		GEN7_RBBM_PERFCTR_CCU_2_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_3_LO,
+		GEN7_RBBM_PERFCTR_CCU_3_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_4_LO,
+		GEN7_RBBM_PERFCTR_CCU_4_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_4 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TSE_0_LO,
+		GEN7_RBBM_PERFCTR_TSE_0_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TSE_1_LO,
+		GEN7_RBBM_PERFCTR_TSE_1_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TSE_2_LO,
+		GEN7_RBBM_PERFCTR_TSE_2_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TSE_3_LO,
+		GEN7_RBBM_PERFCTR_TSE_3_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_bv_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_TSE_0_LO,
+		GEN7_RBBM_PERFCTR_BV_TSE_0_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_TSE_1_LO,
+		GEN7_RBBM_PERFCTR_BV_TSE_1_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_TSE_2_LO,
+		GEN7_RBBM_PERFCTR_BV_TSE_2_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_TSE_3_LO,
+		GEN7_RBBM_PERFCTR_BV_TSE_3_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RAS_0_LO,
+		GEN7_RBBM_PERFCTR_RAS_0_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RAS_1_LO,
+		GEN7_RBBM_PERFCTR_RAS_1_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RAS_2_LO,
+		GEN7_RBBM_PERFCTR_RAS_2_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RAS_3_LO,
+		GEN7_RBBM_PERFCTR_RAS_3_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_bv_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_RAS_0_LO,
+		GEN7_RBBM_PERFCTR_BV_RAS_0_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_RAS_1_LO,
+		GEN7_RBBM_PERFCTR_BV_RAS_1_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_RAS_2_LO,
+		GEN7_RBBM_PERFCTR_BV_RAS_2_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_RAS_3_LO,
+		GEN7_RBBM_PERFCTR_BV_RAS_3_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_0_LO,
+		GEN7_RBBM_PERFCTR_UCHE_0_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_1_LO,
+		GEN7_RBBM_PERFCTR_UCHE_1_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_2_LO,
+		GEN7_RBBM_PERFCTR_UCHE_2_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_3_LO,
+		GEN7_RBBM_PERFCTR_UCHE_3_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_4_LO,
+		GEN7_RBBM_PERFCTR_UCHE_4_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_5_LO,
+		GEN7_RBBM_PERFCTR_UCHE_5_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_6_LO,
+		GEN7_RBBM_PERFCTR_UCHE_6_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_7_LO,
+		GEN7_RBBM_PERFCTR_UCHE_7_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_8_LO,
+		GEN7_RBBM_PERFCTR_UCHE_8_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_9_LO,
+		GEN7_RBBM_PERFCTR_UCHE_9_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_10_LO,
+		GEN7_RBBM_PERFCTR_UCHE_10_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_11_LO,
+		GEN7_RBBM_PERFCTR_UCHE_11_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_11 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_gen7_2_0_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_0_LO,
+		GEN7_RBBM_PERFCTR_UCHE_0_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_1_LO,
+		GEN7_RBBM_PERFCTR_UCHE_1_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_2_LO,
+		GEN7_RBBM_PERFCTR_UCHE_2_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_3_LO,
+		GEN7_RBBM_PERFCTR_UCHE_3_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_4_LO,
+		GEN7_RBBM_PERFCTR_UCHE_4_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_5_LO,
+		GEN7_RBBM_PERFCTR_UCHE_5_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_6_LO,
+		GEN7_RBBM_PERFCTR_UCHE_6_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_7_LO,
+		GEN7_RBBM_PERFCTR_UCHE_7_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_8_LO,
+		GEN7_RBBM_PERFCTR_UCHE_8_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_9_LO,
+		GEN7_RBBM_PERFCTR_UCHE_9_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_10_LO,
+		GEN7_RBBM_PERFCTR_UCHE_10_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_11_LO,
+		GEN7_RBBM_PERFCTR_UCHE_11_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_11 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_12_LO,
+		GEN7_RBBM_PERFCTR_UCHE_12_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_12 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_13_LO,
+		GEN7_RBBM_PERFCTR_UCHE_13_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_13 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_14_LO,
+		GEN7_RBBM_PERFCTR_UCHE_14_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_14 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_15_LO,
+		GEN7_RBBM_PERFCTR_UCHE_15_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_15 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_16_LO,
+		GEN7_RBBM_PERFCTR_UCHE_16_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_16 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_17_LO,
+		GEN7_RBBM_PERFCTR_UCHE_17_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_17 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_18_LO,
+		GEN7_RBBM_PERFCTR_UCHE_18_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_18 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_19_LO,
+		GEN7_RBBM_PERFCTR_UCHE_19_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_19 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_20_LO,
+		GEN7_RBBM_PERFCTR_UCHE_20_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_20 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_21_LO,
+		GEN7_RBBM_PERFCTR_UCHE_21_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_21 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_22_LO,
+		GEN7_RBBM_PERFCTR_UCHE_22_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_22 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_23_LO,
+		GEN7_RBBM_PERFCTR_UCHE_23_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_23 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_gen7_9_0_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_0_LO,
+		GEN7_RBBM_PERFCTR_UCHE_0_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_1_LO,
+		GEN7_RBBM_PERFCTR_UCHE_1_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_2_LO,
+		GEN7_RBBM_PERFCTR_UCHE_2_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_3_LO,
+		GEN7_RBBM_PERFCTR_UCHE_3_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_4_LO,
+		GEN7_RBBM_PERFCTR_UCHE_4_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_5_LO,
+		GEN7_RBBM_PERFCTR_UCHE_5_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_6_LO,
+		GEN7_RBBM_PERFCTR_UCHE_6_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_7_LO,
+		GEN7_RBBM_PERFCTR_UCHE_7_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_8_LO,
+		GEN7_RBBM_PERFCTR_UCHE_8_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_9_LO,
+		GEN7_RBBM_PERFCTR_UCHE_9_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_10_LO,
+		GEN7_RBBM_PERFCTR_UCHE_10_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_11_LO,
+		GEN7_RBBM_PERFCTR_UCHE_11_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_11 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_12_LO,
+		GEN7_RBBM_PERFCTR_UCHE_12_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_13_LO,
+		GEN7_RBBM_PERFCTR_UCHE_13_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_14_LO,
+		GEN7_RBBM_PERFCTR_UCHE_14_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_15_LO,
+		GEN7_RBBM_PERFCTR_UCHE_15_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_16_LO,
+		GEN7_RBBM_PERFCTR_UCHE_16_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_17_LO,
+		GEN7_RBBM_PERFCTR_UCHE_17_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_18_LO,
+		GEN7_RBBM_PERFCTR_UCHE_18_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_19_LO,
+		GEN7_RBBM_PERFCTR_UCHE_19_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_20_LO,
+		GEN7_RBBM_PERFCTR_UCHE_20_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_21_LO,
+		GEN7_RBBM_PERFCTR_UCHE_21_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_22_LO,
+		GEN7_RBBM_PERFCTR_UCHE_22_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_23_LO,
+		GEN7_RBBM_PERFCTR_UCHE_23_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2_11 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_0_LO,
+		GEN7_RBBM_PERFCTR_TP_0_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_1_LO,
+		GEN7_RBBM_PERFCTR_TP_1_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_2_LO,
+		GEN7_RBBM_PERFCTR_TP_2_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_3_LO,
+		GEN7_RBBM_PERFCTR_TP_3_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_4_LO,
+		GEN7_RBBM_PERFCTR_TP_4_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_5_LO,
+		GEN7_RBBM_PERFCTR_TP_5_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_6_LO,
+		GEN7_RBBM_PERFCTR_TP_6_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_7_LO,
+		GEN7_RBBM_PERFCTR_TP_7_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_8_LO,
+		GEN7_RBBM_PERFCTR_TP_8_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_9_LO,
+		GEN7_RBBM_PERFCTR_TP_9_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_10_LO,
+		GEN7_RBBM_PERFCTR_TP_10_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_11_LO,
+		GEN7_RBBM_PERFCTR_TP_11_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_11 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_bv_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_0_LO,
+		GEN7_RBBM_PERFCTR2_TP_0_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_12 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_1_LO,
+		GEN7_RBBM_PERFCTR2_TP_1_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_13 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_2_LO,
+		GEN7_RBBM_PERFCTR2_TP_2_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_14 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_3_LO,
+		GEN7_RBBM_PERFCTR2_TP_3_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_15 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_4_LO,
+		GEN7_RBBM_PERFCTR2_TP_4_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_16 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_5_LO,
+		GEN7_RBBM_PERFCTR2_TP_5_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_17 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_0_LO,
+		GEN7_RBBM_PERFCTR_SP_0_HI, -1, GEN7_SP_PERFCTR_SP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_1_LO,
+		GEN7_RBBM_PERFCTR_SP_1_HI, -1, GEN7_SP_PERFCTR_SP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_2_LO,
+		GEN7_RBBM_PERFCTR_SP_2_HI, -1, GEN7_SP_PERFCTR_SP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_3_LO,
+		GEN7_RBBM_PERFCTR_SP_3_HI, -1, GEN7_SP_PERFCTR_SP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_4_LO,
+		GEN7_RBBM_PERFCTR_SP_4_HI, -1, GEN7_SP_PERFCTR_SP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_5_LO,
+		GEN7_RBBM_PERFCTR_SP_5_HI, -1, GEN7_SP_PERFCTR_SP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_6_LO,
+		GEN7_RBBM_PERFCTR_SP_6_HI, -1, GEN7_SP_PERFCTR_SP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_7_LO,
+		GEN7_RBBM_PERFCTR_SP_7_HI, -1, GEN7_SP_PERFCTR_SP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_8_LO,
+		GEN7_RBBM_PERFCTR_SP_8_HI, -1, GEN7_SP_PERFCTR_SP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_9_LO,
+		GEN7_RBBM_PERFCTR_SP_9_HI, -1, GEN7_SP_PERFCTR_SP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_10_LO,
+		GEN7_RBBM_PERFCTR_SP_10_HI, -1, GEN7_SP_PERFCTR_SP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_11_LO,
+		GEN7_RBBM_PERFCTR_SP_11_HI, -1, GEN7_SP_PERFCTR_SP_SEL_11 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_12_LO,
+		GEN7_RBBM_PERFCTR_SP_12_HI, -1, GEN7_SP_PERFCTR_SP_SEL_12 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_13_LO,
+		GEN7_RBBM_PERFCTR_SP_13_HI, -1, GEN7_SP_PERFCTR_SP_SEL_13 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_14_LO,
+		GEN7_RBBM_PERFCTR_SP_14_HI, -1, GEN7_SP_PERFCTR_SP_SEL_14 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_15_LO,
+		GEN7_RBBM_PERFCTR_SP_15_HI, -1, GEN7_SP_PERFCTR_SP_SEL_15 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_16_LO,
+		GEN7_RBBM_PERFCTR_SP_16_HI, -1, GEN7_SP_PERFCTR_SP_SEL_16 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_17_LO,
+		GEN7_RBBM_PERFCTR_SP_17_HI, -1, GEN7_SP_PERFCTR_SP_SEL_17 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_18_LO,
+		GEN7_RBBM_PERFCTR_SP_18_HI, -1, GEN7_SP_PERFCTR_SP_SEL_18 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_19_LO,
+		GEN7_RBBM_PERFCTR_SP_19_HI, -1, GEN7_SP_PERFCTR_SP_SEL_19 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_20_LO,
+		GEN7_RBBM_PERFCTR_SP_20_HI, -1, GEN7_SP_PERFCTR_SP_SEL_20 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_21_LO,
+		GEN7_RBBM_PERFCTR_SP_21_HI, -1, GEN7_SP_PERFCTR_SP_SEL_21 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_22_LO,
+		GEN7_RBBM_PERFCTR_SP_22_HI, -1, GEN7_SP_PERFCTR_SP_SEL_22 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_23_LO,
+		GEN7_RBBM_PERFCTR_SP_23_HI, -1, GEN7_SP_PERFCTR_SP_SEL_23 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_bv_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_0_LO,
+		GEN7_RBBM_PERFCTR2_SP_0_HI, -1, GEN7_SP_PERFCTR_SP_SEL_24 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_1_LO,
+		GEN7_RBBM_PERFCTR2_SP_1_HI, -1, GEN7_SP_PERFCTR_SP_SEL_25 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_2_LO,
+		GEN7_RBBM_PERFCTR2_SP_2_HI, -1, GEN7_SP_PERFCTR_SP_SEL_26 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_3_LO,
+		GEN7_RBBM_PERFCTR2_SP_3_HI, -1, GEN7_SP_PERFCTR_SP_SEL_27 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_4_LO,
+		GEN7_RBBM_PERFCTR2_SP_4_HI, -1, GEN7_SP_PERFCTR_SP_SEL_28 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_5_LO,
+		GEN7_RBBM_PERFCTR2_SP_5_HI, -1, GEN7_SP_PERFCTR_SP_SEL_29 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_6_LO,
+		GEN7_RBBM_PERFCTR2_SP_6_HI, -1, GEN7_SP_PERFCTR_SP_SEL_30 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_7_LO,
+		GEN7_RBBM_PERFCTR2_SP_7_HI, -1, GEN7_SP_PERFCTR_SP_SEL_31 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_8_LO,
+		GEN7_RBBM_PERFCTR2_SP_8_HI, -1, GEN7_SP_PERFCTR_SP_SEL_32 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_9_LO,
+		GEN7_RBBM_PERFCTR2_SP_9_HI, -1, GEN7_SP_PERFCTR_SP_SEL_33 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_10_LO,
+		GEN7_RBBM_PERFCTR2_SP_10_HI, -1, GEN7_SP_PERFCTR_SP_SEL_34 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_11_LO,
+		GEN7_RBBM_PERFCTR2_SP_11_HI, -1, GEN7_SP_PERFCTR_SP_SEL_35 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_0_LO,
+		GEN7_RBBM_PERFCTR_RB_0_HI, -1, GEN7_RB_PERFCTR_RB_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_1_LO,
+		GEN7_RBBM_PERFCTR_RB_1_HI, -1, GEN7_RB_PERFCTR_RB_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_2_LO,
+		GEN7_RBBM_PERFCTR_RB_2_HI, -1, GEN7_RB_PERFCTR_RB_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_3_LO,
+		GEN7_RBBM_PERFCTR_RB_3_HI, -1, GEN7_RB_PERFCTR_RB_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_4_LO,
+		GEN7_RBBM_PERFCTR_RB_4_HI, -1, GEN7_RB_PERFCTR_RB_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_5_LO,
+		GEN7_RBBM_PERFCTR_RB_5_HI, -1, GEN7_RB_PERFCTR_RB_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_6_LO,
+		GEN7_RBBM_PERFCTR_RB_6_HI, -1, GEN7_RB_PERFCTR_RB_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_7_LO,
+		GEN7_RBBM_PERFCTR_RB_7_HI, -1, GEN7_RB_PERFCTR_RB_SEL_7 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_vsc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VSC_0_LO,
+		GEN7_RBBM_PERFCTR_VSC_0_HI, -1, GEN7_VSC_PERFCTR_VSC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VSC_1_LO,
+		GEN7_RBBM_PERFCTR_VSC_1_HI, -1, GEN7_VSC_PERFCTR_VSC_SEL_1 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_lrz[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_LRZ_0_LO,
+		GEN7_RBBM_PERFCTR_LRZ_0_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_LRZ_1_LO,
+		GEN7_RBBM_PERFCTR_LRZ_1_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_LRZ_2_LO,
+		GEN7_RBBM_PERFCTR_LRZ_2_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_LRZ_3_LO,
+		GEN7_RBBM_PERFCTR_LRZ_3_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_bv_lrz[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_LRZ_0_LO,
+		GEN7_RBBM_PERFCTR_BV_LRZ_0_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_LRZ_1_LO,
+		GEN7_RBBM_PERFCTR_BV_LRZ_1_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_LRZ_2_LO,
+		GEN7_RBBM_PERFCTR_BV_LRZ_2_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_LRZ_3_LO,
+		GEN7_RBBM_PERFCTR_BV_LRZ_3_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_cmp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CMP_0_LO,
+		GEN7_RBBM_PERFCTR_CMP_0_HI, -1, GEN7_RB_PERFCTR_CMP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CMP_1_LO,
+		GEN7_RBBM_PERFCTR_CMP_1_HI, -1, GEN7_RB_PERFCTR_CMP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CMP_2_LO,
+		GEN7_RBBM_PERFCTR_CMP_2_HI, -1, GEN7_RB_PERFCTR_CMP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CMP_3_LO,
+		GEN7_RBBM_PERFCTR_CMP_3_HI, -1, GEN7_RB_PERFCTR_CMP_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_ufc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UFC_0_LO,
+		GEN7_RBBM_PERFCTR_UFC_0_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UFC_1_LO,
+		GEN7_RBBM_PERFCTR_UFC_1_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UFC_2_LO,
+		GEN7_RBBM_PERFCTR_UFC_2_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UFC_3_LO,
+		GEN7_RBBM_PERFCTR_UFC_3_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_bv_ufc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_UFC_0_LO,
+		GEN7_RBBM_PERFCTR2_UFC_0_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_UFC_1_LO,
+		GEN7_RBBM_PERFCTR2_UFC_1_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_5 },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_gbif[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PERF_CNT_LOW0,
+		GEN7_GBIF_PERF_CNT_HIGH0, -1, GEN7_GBIF_PERF_CNT_SEL },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PERF_CNT_LOW1,
+		GEN7_GBIF_PERF_CNT_HIGH1, -1, GEN7_GBIF_PERF_CNT_SEL },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PERF_CNT_LOW2,
+		GEN7_GBIF_PERF_CNT_HIGH2, -1, GEN7_GBIF_PERF_CNT_SEL },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PERF_CNT_LOW3,
+		GEN7_GBIF_PERF_CNT_HIGH3, -1, GEN7_GBIF_PERF_CNT_SEL },
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_gbif_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PWR_CNT_LOW0,
+		GEN7_GBIF_PWR_CNT_HIGH0, -1, GEN7_GBIF_PERF_PWR_CNT_SEL },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PWR_CNT_LOW1,
+		GEN7_GBIF_PWR_CNT_HIGH1, -1, GEN7_GBIF_PERF_PWR_CNT_SEL },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PWR_CNT_LOW2,
+		GEN7_GBIF_PWR_CNT_HIGH2, -1, GEN7_GBIF_PERF_PWR_CNT_SEL },
+};
+
+#define GMU_COUNTER(lo, hi, sel) \
+	{ .countable = KGSL_PERFCOUNTER_NOT_USED, \
+	  .offset = lo, .offset_hi = hi, .select = sel }
+
+#define GMU_COUNTER_RESERVED(lo, hi, sel) \
+	{ .countable = KGSL_PERFCOUNTER_BROKEN, \
+	  .offset = lo, .offset_hi = hi, .select = sel }
+
+static struct adreno_perfcount_register gen7_perfcounters_gmu_xoclk[] = {
+	/*
+	 * COUNTER_XOCLK_0 and COUNTER_XOCLK_4 are used for the GPU
+	 * busy and ifpc count. Mark them as reserved to ensure they
+	 * are not re-used.
+	 */
+	GMU_COUNTER_RESERVED(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_H,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_H,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_H,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0),
+	GMU_COUNTER_RESERVED(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_H,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_L,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_H,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_L,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_H,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_L,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_H,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_L,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_H,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_L,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_H,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_10_L,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_10_H,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_3),
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_gmu_gmuclk[] = {
+	GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_L,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_H,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_L,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_H,
+		GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1),
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_gmu_perf[] = {
+	GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_0_L,
+		GEN7_GMU_CX_GMU_PERF_COUNTER_0_H,
+		GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_1_L,
+		GEN7_GMU_CX_GMU_PERF_COUNTER_1_H,
+		GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_2_L,
+		GEN7_GMU_CX_GMU_PERF_COUNTER_2_H,
+		GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_3_L,
+		GEN7_GMU_CX_GMU_PERF_COUNTER_3_H,
+		GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_4_L,
+		GEN7_GMU_CX_GMU_PERF_COUNTER_4_H,
+		GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_1),
+	GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_5_L,
+		GEN7_GMU_CX_GMU_PERF_COUNTER_5_H,
+		GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_1),
+};
+
+static struct adreno_perfcount_register gen7_perfcounters_alwayson[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_CP_ALWAYS_ON_COUNTER_LO,
+		GEN7_CP_ALWAYS_ON_COUNTER_HI, -1 },
+};
+
+/*
+ * ADRENO_PERFCOUNTER_GROUP_RESTORE flag is enabled by default
+ * because most of the perfcounter groups need to be restored
+ * as part of preemption and IFPC. Perfcounter groups that are
+ * not restored as part of preemption and IFPC should be defined
+ * using GEN7_PERFCOUNTER_GROUP_FLAGS macro
+ */
+
+#define GEN7_PERFCOUNTER_GROUP_FLAGS(core, offset, name, flags, \
+		enable, read) \
+	[KGSL_PERFCOUNTER_GROUP_##offset] = { core##_perfcounters_##name, \
+	ARRAY_SIZE(core##_perfcounters_##name), __stringify(name), flags, \
+	enable, read }
+
+#define GEN7_PERFCOUNTER_GROUP(offset, name, enable, read) \
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, offset, name, \
+	ADRENO_PERFCOUNTER_GROUP_RESTORE, enable, read)
+
+#define GEN7_REGULAR_PERFCOUNTER_GROUP(offset, name) \
+	GEN7_PERFCOUNTER_GROUP(offset, name, \
+		gen7_counter_enable, gen7_counter_read)
+
+#define GEN7_BV_PERFCOUNTER_GROUP(offset, name, enable, read) \
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, BV_##offset, bv_##name, \
+	ADRENO_PERFCOUNTER_GROUP_RESTORE, enable, read)
+
+#define GEN7_BV_REGULAR_PERFCOUNTER_GROUP(offset, name) \
+	GEN7_BV_PERFCOUNTER_GROUP(offset, name, \
+		gen7_counter_enable, gen7_counter_read)
+
+static const struct adreno_perfcount_group gen7_hwsched_perfcounter_groups
+				[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	GEN7_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, RBBM, rbbm, 0,
+		gen7_counter_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(PC, pc, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(VFD, vfd, gen7_hwsched_counter_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(HLSQ, hlsq, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(CCU, ccu),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(CMP, cmp),
+	GEN7_PERFCOUNTER_GROUP(TSE, tse, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(RAS, ras, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(LRZ, lrz, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(UCHE, gen7_2_0_uche),
+	GEN7_PERFCOUNTER_GROUP(TP, tp, gen7_hwsched_counter_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(SP, sp, gen7_hwsched_counter_enable, gen7_counter_read),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(RB, rb),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(VSC, vsc),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF, gbif, 0,
+		gen7_counter_gbif_enable, gen7_counter_read_norestore),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF_PWR, gbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		gen7_counter_gbif_pwr_enable, gen7_counter_read_norestore),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		gen7_counter_alwayson_enable, gen7_counter_alwayson_read),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_XOCLK, gmu_xoclk, 0,
+		gen7_counter_gmu_xoclk_enable, gen7_counter_read_norestore),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_GMUCLK, gmu_gmuclk, 0,
+		gen7_counter_gmu_gmuclk_enable, gen7_counter_read_norestore),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_PERF, gmu_perf, 0,
+		gen7_counter_gmu_perf_enable, gen7_counter_read_norestore),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(UFC, ufc),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	GEN7_BV_PERFCOUNTER_GROUP(PC, pc, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VFD, vfd),
+	GEN7_BV_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(TP, tp),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(SP, sp),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc),
+	GEN7_BV_PERFCOUNTER_GROUP(TSE, tse, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_PERFCOUNTER_GROUP(RAS, ras, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_PERFCOUNTER_GROUP(LRZ, lrz, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_PERFCOUNTER_GROUP(HLSQ, hlsq, gen7_counter_bv_enable, gen7_counter_read),
+};
+
+static const struct adreno_perfcount_group gen7_9_0_hwsched_perfcounter_groups
+				[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	GEN7_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, RBBM, rbbm, 0,
+		gen7_counter_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(PC, pc, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(VFD, vfd, gen7_hwsched_counter_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(HLSQ, hlsq, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(CCU, ccu),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(CMP, cmp),
+	GEN7_PERFCOUNTER_GROUP(TSE, tse, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(RAS, ras, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(LRZ, lrz, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(UCHE, gen7_9_0_uche),
+	GEN7_PERFCOUNTER_GROUP(TP, tp, gen7_hwsched_counter_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(SP, sp, gen7_hwsched_counter_enable, gen7_counter_read),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(RB, rb),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(VSC, vsc),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF, gbif, 0,
+		gen7_counter_gbif_enable, gen7_counter_read_norestore),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF_PWR, gbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		gen7_counter_gbif_pwr_enable, gen7_counter_read_norestore),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		gen7_counter_alwayson_enable, gen7_counter_alwayson_read),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_XOCLK, gmu_xoclk, 0,
+		gen7_counter_gmu_xoclk_enable, gen7_counter_read_norestore),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_GMUCLK, gmu_gmuclk, 0,
+		gen7_counter_gmu_gmuclk_enable, gen7_counter_read_norestore),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_PERF, gmu_perf, 0,
+		gen7_counter_gmu_perf_enable, gen7_counter_read_norestore),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(UFC, ufc),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(PC, pc),
+	GEN7_BV_PERFCOUNTER_GROUP(PC, pc, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VFD, vfd),
+	GEN7_BV_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(TP, tp),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(SP, sp),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc),
+	GEN7_BV_PERFCOUNTER_GROUP(TSE, tse, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_PERFCOUNTER_GROUP(RAS, ras, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_PERFCOUNTER_GROUP(LRZ, lrz, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_PERFCOUNTER_GROUP(HLSQ, hlsq, gen7_counter_bv_enable, gen7_counter_read),
+};
+
+static const struct adreno_perfcount_group gen7_perfcounter_groups
+				[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	GEN7_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, RBBM, rbbm, 0,
+		gen7_counter_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(PC, pc, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(VFD, vfd, gen7_counter_inline_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(HLSQ, hlsq, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(CCU, ccu),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(CMP, cmp),
+	GEN7_PERFCOUNTER_GROUP(TSE, tse, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(RAS, ras, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(LRZ, lrz, gen7_counter_br_enable, gen7_counter_read),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(UCHE, uche),
+	GEN7_PERFCOUNTER_GROUP(TP, tp, gen7_counter_inline_enable, gen7_counter_read),
+	GEN7_PERFCOUNTER_GROUP(SP, sp, gen7_counter_inline_enable, gen7_counter_read),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(RB, rb),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(VSC, vsc),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF, gbif, 0,
+		gen7_counter_gbif_enable, gen7_counter_read_norestore),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF_PWR, gbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		gen7_counter_gbif_pwr_enable, gen7_counter_read_norestore),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		gen7_counter_alwayson_enable, gen7_counter_alwayson_read),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_XOCLK, gmu_xoclk, 0,
+		gen7_counter_gmu_xoclk_enable, gen7_counter_read_norestore),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_GMUCLK, gmu_gmuclk, 0,
+		gen7_counter_gmu_gmuclk_enable, gen7_counter_read_norestore),
+	GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_PERF, gmu_perf, 0,
+		gen7_counter_gmu_perf_enable, gen7_counter_read_norestore),
+	GEN7_REGULAR_PERFCOUNTER_GROUP(UFC, ufc),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	GEN7_BV_PERFCOUNTER_GROUP(PC, pc, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VFD, vfd),
+	GEN7_BV_PERFCOUNTER_GROUP(VPC, vpc, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(TP, tp),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(SP, sp),
+	GEN7_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc),
+	GEN7_BV_PERFCOUNTER_GROUP(TSE, tse, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_PERFCOUNTER_GROUP(RAS, ras, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_PERFCOUNTER_GROUP(LRZ, lrz, gen7_counter_bv_enable, gen7_counter_read),
+	GEN7_BV_PERFCOUNTER_GROUP(HLSQ, hlsq, gen7_counter_bv_enable, gen7_counter_read),
+};
+
+const struct adreno_perfcounters adreno_gen7_perfcounters = {
+	gen7_perfcounter_groups,
+	ARRAY_SIZE(gen7_perfcounter_groups),
+};
+
+const struct adreno_perfcounters adreno_gen7_hwsched_perfcounters = {
+	gen7_hwsched_perfcounter_groups,
+	ARRAY_SIZE(gen7_hwsched_perfcounter_groups),
+};
+
+const struct adreno_perfcounters adreno_gen7_9_0_hwsched_perfcounters = {
+	gen7_9_0_hwsched_perfcounter_groups,
+	ARRAY_SIZE(gen7_9_0_hwsched_perfcounter_groups),
+};

+ 802 - 0
qcom/opensource/graphics-kernel/adreno_gen7_preempt.c

@@ -0,0 +1,802 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+
+#define PREEMPT_RECORD(_field) \
+		offsetof(struct gen7_cp_preemption_record, _field)
+
+#define PREEMPT_SMMU_RECORD(_field) \
+		offsetof(struct gen7_cp_smmu_info, _field)
+
+static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer,
+	bool atomic)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+
+	if (!atomic) {
+		/*
+		 * We might have skipped updating the wptr in case we are in
+		 * dispatcher context. Do it now.
+		 */
+		if (rb->skip_inline_wptr) {
+
+			ret = gen7_fenced_write(adreno_dev,
+				GEN7_CP_RB_WPTR, rb->wptr,
+				FENCE_STATUS_WRITEDROPPED0_MASK);
+
+			reset_timer = true;
+			rb->skip_inline_wptr = false;
+		}
+	} else {
+		unsigned int wptr;
+
+		kgsl_regread(device, GEN7_CP_RB_WPTR, &wptr);
+		if (wptr != rb->wptr) {
+			kgsl_regwrite(device, GEN7_CP_RB_WPTR, rb->wptr);
+			reset_timer = true;
+		}
+	}
+
+	if (reset_timer)
+		rb->dispatch_q.expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	if (!atomic) {
+		/* If WPTR update fails, set the fault and trigger recovery */
+		if (ret) {
+			gmu_core_fault_snapshot(device);
+			adreno_dispatcher_fault(adreno_dev,
+				ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+		}
+	}
+}
+
+static void _power_collapse_set(struct adreno_device *adreno_dev, bool val)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	gmu_core_regwrite(device,
+			 GEN7_GMU_PWR_COL_PREEMPT_KEEPALIVE, (val ? 1 : 0));
+}
+
+static void _gen7_preemption_done(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * In the very unlikely case that the power is off, do nothing - the
+	 * state will be reset on power up and everybody will be happy
+	 */
+
+	if (!kgsl_state_is_awake(device))
+		return;
+
+	kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status & 0x1) {
+		dev_err(device->dev,
+			"Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n",
+			status, adreno_dev->cur_rb->id,
+			adreno_get_rptr(adreno_dev->cur_rb),
+			adreno_dev->cur_rb->wptr,
+			adreno_dev->next_rb->id,
+			adreno_get_rptr(adreno_dev->next_rb),
+			adreno_dev->next_rb->wptr);
+
+		/* Set a fault and restart */
+		adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+
+		return;
+	}
+
+	adreno_dev->preempt.count++;
+
+	del_timer_sync(&adreno_dev->preempt.timer);
+
+	kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		status, 0);
+
+	/* Clean up all the bits */
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr for the new command queue */
+	_update_wptr(adreno_dev, true, false);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	/* Clear the preempt state */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+}
+
+static void _gen7_preemption_fault(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * If the power is on check the preemption status one more time - if it
+	 * was successful then just transition to the complete state
+	 */
+	if (kgsl_state_is_awake(device)) {
+		kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, &status);
+
+		if (!(status & 0x1)) {
+			adreno_set_preempt_state(adreno_dev,
+				ADRENO_PREEMPT_COMPLETE);
+
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+	}
+
+	dev_err(device->dev,
+		"Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n",
+		adreno_dev->cur_rb->id,
+		adreno_get_rptr(adreno_dev->cur_rb),
+		adreno_dev->cur_rb->wptr,
+		adreno_dev->next_rb->id,
+		adreno_get_rptr(adreno_dev->next_rb),
+		adreno_dev->next_rb->wptr);
+
+	adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+}
+
+static void _gen7_preemption_worker(struct work_struct *work)
+{
+	struct adreno_preemption *preempt = container_of(work,
+		struct adreno_preemption, work);
+	struct adreno_device *adreno_dev = container_of(preempt,
+		struct adreno_device, preempt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Need to take the mutex to make sure that the power stays on */
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED))
+		_gen7_preemption_fault(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+/* Find the highest priority active ringbuffer */
+static struct adreno_ringbuffer *gen7_next_ringbuffer(
+		struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb;
+	unsigned long flags;
+	unsigned int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		bool empty;
+
+		spin_lock_irqsave(&rb->preempt_lock, flags);
+		empty = adreno_rb_empty(rb);
+		spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+		if (!empty)
+			return rb;
+	}
+
+	return NULL;
+}
+
+void gen7_preemption_trigger(struct adreno_device *adreno_dev, bool atomic)
+{
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *next;
+	u64 ttbr0, gpuaddr;
+	u32 contextidr, cntl;
+	unsigned long flags;
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+
+	/* Put ourselves into a possible trigger state */
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START))
+		return;
+
+	/* Get the next ringbuffer to preempt in */
+	next = gen7_next_ringbuffer(adreno_dev);
+
+	/*
+	 * Nothing to do if every ringbuffer is empty or if the current
+	 * ringbuffer is the only active one
+	 */
+	if (next == NULL || next == adreno_dev->cur_rb) {
+		/*
+		 * Update any critical things that might have been skipped while
+		 * we were looking for a new ringbuffer
+		 */
+
+		if (next != NULL) {
+			_update_wptr(adreno_dev, false, atomic);
+
+			mod_timer(&adreno_dev->dispatcher.timer,
+				adreno_dev->cur_rb->dispatch_q.expires);
+		}
+
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+		return;
+	}
+
+	/* Turn off the dispatcher timer */
+	del_timer(&adreno_dev->dispatcher.timer);
+
+	/*
+	 * This is the most critical section - we need to take care not to race
+	 * until we have programmed the CP for the switch
+	 */
+
+	spin_lock_irqsave(&next->preempt_lock, flags);
+
+	/* Get the pagetable from the pagetable info. */
+	kgsl_sharedmem_readq(device->scratch, &ttbr0,
+		SCRATCH_RB_OFFSET(next->id, ttbr0));
+	kgsl_sharedmem_readl(device->scratch, &contextidr,
+		SCRATCH_RB_OFFSET(next->id, contextidr));
+
+	kgsl_sharedmem_writel(next->preemption_desc,
+		PREEMPT_RECORD(wptr), next->wptr);
+
+	spin_unlock_irqrestore(&next->preempt_lock, flags);
+
+	/* And write it to the smmu info */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), ttbr0);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), contextidr);
+	}
+
+	kgsl_sharedmem_readq(preempt->scratch, &gpuaddr,
+		next->id * sizeof(u64));
+
+	/*
+	 * Set a keepalive bit before the first preemption register write.
+	 * This is required since while each individual write to the context
+	 * switch registers will wake the GPU from collapse, it will not in
+	 * itself cause GPU activity. Thus, the GPU could technically be
+	 * re-collapsed between subsequent register writes leading to a
+	 * prolonged preemption sequence. The keepalive bit prevents any
+	 * further power collapse while it is set.
+	 * It is more efficient to use a keepalive+wake-on-fence approach here
+	 * rather than an OOB. Both keepalive and the fence are effectively
+	 * free when the GPU is already powered on, whereas an OOB requires an
+	 * unconditional handshake with the GMU.
+	 */
+	_power_collapse_set(adreno_dev, true);
+
+	/*
+	 * Fenced writes on this path will make sure the GPU is woken up
+	 * in case it was power collapsed by the GMU.
+	 */
+	if (gen7_fenced_write(adreno_dev,
+		GEN7_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO,
+		lower_32_bits(next->preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	/*
+	 * Above fence writes will make sure GMU comes out of
+	 * IFPC state if its was in IFPC state but it doesn't
+	 * guarantee that GMU FW actually moved to ACTIVE state
+	 * i.e. wake-up from IFPC is complete.
+	 * Wait for GMU to move to ACTIVE state before triggering
+	 * preemption. This is require to make sure CP doesn't
+	 * interrupt GMU during wake-up from IFPC.
+	 */
+	if (!atomic && gmu_core_dev_wait_for_active_transition(device))
+		goto err;
+
+	if (gen7_fenced_write(adreno_dev,
+		GEN7_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI,
+		upper_32_bits(next->preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen7_fenced_write(adreno_dev,
+		GEN7_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO,
+		lower_32_bits(next->secure_preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen7_fenced_write(adreno_dev,
+		GEN7_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI,
+		upper_32_bits(next->secure_preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen7_fenced_write(adreno_dev,
+		GEN7_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO,
+		lower_32_bits(gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen7_fenced_write(adreno_dev,
+		GEN7_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI,
+		upper_32_bits(gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	adreno_dev->next_rb = next;
+
+	/* Start the timer to detect a stuck preemption */
+	mod_timer(&adreno_dev->preempt.timer,
+		jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT));
+
+	cntl = (preempt->preempt_level << 6) | 0x01;
+
+	/* Skip save/restore during L1 preemption */
+	if (preempt->skipsaverestore)
+		cntl |= (1 << 9);
+
+	/* Enable GMEM save/restore across preemption */
+	if (preempt->usesgmem)
+		cntl |= (1 << 8);
+
+	trace_adreno_preempt_trigger(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		cntl, 0);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED);
+
+	if (gen7_core->qos_value)
+		kgsl_sharedmem_writel(preempt->scratch,
+			PREEMPT_SCRATCH_OFFSET(QOS_VALUE_IDX),
+			gen7_core->qos_value[next->id]);
+
+	/* Trigger the preemption */
+	if (gen7_fenced_write(adreno_dev, GEN7_CP_CONTEXT_SWITCH_CNTL, cntl,
+					FENCE_STATUS_WRITEDROPPED1_MASK)) {
+		adreno_dev->next_rb = NULL;
+		del_timer(&adreno_dev->preempt.timer);
+		goto err;
+	}
+
+	return;
+err:
+	/* If fenced write fails, take inline snapshot and trigger recovery */
+	if (!in_interrupt()) {
+		gmu_core_fault_snapshot(device);
+		adreno_dispatcher_fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	} else {
+		adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT);
+	}
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+	/* Clear the keep alive */
+	_power_collapse_set(adreno_dev, false);
+
+}
+
+void gen7_preemption_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING))
+		return;
+
+	kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status & 0x1) {
+		dev_err(KGSL_DEVICE(adreno_dev)->dev,
+			"preempt interrupt with non-zero status: %X\n",
+			status);
+
+		/*
+		 * Under the assumption that this is a race between the
+		 * interrupt and the register, schedule the worker to clean up.
+		 * If the status still hasn't resolved itself by the time we get
+		 * there then we have to assume something bad happened
+		 */
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE);
+		adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+		return;
+	}
+
+	adreno_dev->preempt.count++;
+
+	/*
+	 * We can now safely clear the preemption keepalive bit, allowing
+	 * power collapse to resume its regular activity.
+	 */
+	_power_collapse_set(adreno_dev, false);
+
+	del_timer(&adreno_dev->preempt.timer);
+
+	kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		status, 0);
+
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr if it changed while preemption was ongoing */
+	_update_wptr(adreno_dev, true, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	gen7_preemption_trigger(adreno_dev, true);
+}
+
+void gen7_preemption_prepare_postamble(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	u32 *postamble, count = 0;
+
+	/*
+	 * First 28 dwords of the device scratch buffer are used to store shadow rb data.
+	 * Reserve 15 dwords in the device scratch buffer from SCRATCH_POSTAMBLE_OFFSET for
+	 * KMD postamble pm4 packets. This should be in *device->scratch* so that userspace
+	 * cannot access it.
+	 */
+	postamble = device->scratch->hostptr + SCRATCH_POSTAMBLE_OFFSET;
+
+	/*
+	 * Reserve 4 dwords in the scratch buffer for dynamic QOS control feature. To ensure QOS
+	 * value is updated for first preemption, send it during bootup
+	 */
+	if (gen7_core->qos_value) {
+		postamble[count++] = cp_type7_packet(CP_MEM_TO_REG, 3);
+		postamble[count++] = GEN7_RBBM_GBIF_CLIENT_QOS_CNTL;
+		postamble[count++] = lower_32_bits(PREEMPT_SCRATCH_ADDR(adreno_dev, QOS_VALUE_IDX));
+		postamble[count++] = upper_32_bits(PREEMPT_SCRATCH_ADDR(adreno_dev, QOS_VALUE_IDX));
+	}
+
+	/*
+	 * Since postambles are not preserved across slumber, necessary packets
+	 * must be sent to GPU before first submission.
+	 *
+	 * If a packet needs to be sent before first submission, add it above this.
+	 */
+	preempt->postamble_bootup_len = count;
+
+	/* Reserve 11 dwords in the device scratch buffer to clear perfcounters */
+	if (!adreno_dev->perfcounter) {
+		postamble[count++] = cp_type7_packet(CP_REG_RMW, 3);
+		postamble[count++] = GEN7_RBBM_PERFCTR_SRAM_INIT_CMD;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+
+		postamble[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
+		postamble[count++] = 0x3;
+		postamble[count++] = GEN7_RBBM_PERFCTR_SRAM_INIT_STATUS;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+		postamble[count++] = 0x1;
+		postamble[count++] = 0x0;
+	}
+
+	preempt->postamble_len = count;
+}
+
+void gen7_preemption_schedule(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE))
+		_gen7_preemption_done(adreno_dev);
+
+	gen7_preemption_trigger(adreno_dev, false);
+
+	mutex_unlock(&device->mutex);
+}
+
+u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds)
+{
+	u32 *cmds_orig = cmds;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (test_and_set_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags))
+		goto done;
+
+	*cmds++ = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	*cmds++ = CP_SET_THREAD_BR;
+
+	*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12);
+
+	/* NULL SMMU_INFO buffer - we track in KMD */
+	*cmds++ = SET_PSEUDO_SMMU_INFO;
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0x0);
+
+	*cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr);
+
+	*cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->secure_preemption_desc->gpuaddr);
+
+	/*
+	 * There is no need to specify this address when we are about to
+	 * trigger preemption. This is because CP internally stores this
+	 * address specified here in the CP_SET_PSEUDO_REGISTER payload to
+	 * the context record and thus knows from where to restore
+	 * the saved perfcounters for the new ringbuffer.
+	 */
+	*cmds++ = SET_PSEUDO_COUNTER;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->perfcounter_save_restore_desc->gpuaddr);
+
+done:
+	if (drawctxt) {
+		struct adreno_ringbuffer *rb = drawctxt->rb;
+		u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, rb->id);
+		u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr;
+
+		*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2);
+		cmds += cp_gpuaddr(adreno_dev, cmds, dest);
+		*cmds++ = lower_32_bits(gpuaddr);
+		*cmds++ = upper_32_bits(gpuaddr);
+
+		if (adreno_dev->preempt.postamble_len) {
+			u64 kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev));
+
+			*cmds++ = cp_type7_packet(CP_SET_AMBLE, 3);
+			*cmds++ = lower_32_bits(kmd_postamble_addr);
+			*cmds++ = upper_32_bits(kmd_postamble_addr);
+			*cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE)
+				| (FIELD_PREP(GENMASK(19, 0), adreno_dev->preempt.postamble_len));
+		}
+	}
+
+	return (unsigned int) (cmds - cmds_orig);
+}
+
+u32 gen7_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+		u32 *cmds)
+{
+	u32 index = 0;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (adreno_dev->cur_rb) {
+		u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, adreno_dev->cur_rb->id);
+
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 4);
+		cmds[index++] = lower_32_bits(dest);
+		cmds[index++] = upper_32_bits(dest);
+		cmds[index++] = 0;
+		cmds[index++] = 0;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BOTH;
+	cmds[index++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+	cmds[index++] = 1;
+	cmds[index++] = 0;
+
+	return index;
+}
+
+void gen7_preemption_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *rb;
+	unsigned int i;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	/* Force the state to be clear */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		/* smmu_info is allocated and mapped in gen7_preemption_iommu_init */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(magic), GEN7_CP_SMMU_INFO_MAGIC_REF);
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device));
+
+		/* The CP doesn't use the asid record, so poison it */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(asid), 0xdecafbad);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), 0);
+
+		kgsl_regwrite(device, GEN7_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+			lower_32_bits(iommu->smmu_info->gpuaddr));
+
+		kgsl_regwrite(device, GEN7_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+			upper_32_bits(iommu->smmu_info->gpuaddr));
+	}
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(rptr), 0);
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(wptr), 0);
+
+		adreno_ringbuffer_set_pagetable(device, rb,
+			device->mmu.defaultpagetable);
+
+		clear_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags);
+	}
+}
+
+static void reset_rb_preempt_record(struct adreno_device *adreno_dev,
+	struct adreno_ringbuffer *rb)
+{
+	memset(rb->preemption_desc->hostptr, 0x0, rb->preemption_desc->size);
+
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(magic), GEN7_CP_CTXRECORD_MAGIC_REF);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(cntl), GEN7_CP_RB_CNTL_DEFAULT);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR(
+		KGSL_DEVICE(adreno_dev), rb->id, rptr));
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(bv_rptr_addr), SCRATCH_RB_GPU_ADDR(
+		KGSL_DEVICE(adreno_dev), rb->id, bv_rptr));
+}
+
+void gen7_reset_preempt_records(struct adreno_device *adreno_dev)
+{
+	int i;
+	struct adreno_ringbuffer *rb;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		reset_rb_preempt_record(adreno_dev, rb);
+	}
+}
+
+static int gen7_preemption_ringbuffer_init(struct adreno_device *adreno_dev,
+	struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES;
+	int ret;
+
+	if (gen7_core->ctxt_record_size)
+		ctxt_record_size = gen7_core->ctxt_record_size;
+
+	ret = adreno_allocate_global(device, &rb->preemption_desc,
+		ctxt_record_size, SZ_16K, 0,
+		KGSL_MEMDESC_PRIVILEGED, "preemption_desc");
+	if (ret)
+		return ret;
+
+	ret = adreno_allocate_global(device, &rb->secure_preemption_desc,
+		ctxt_record_size, 0,
+		KGSL_MEMFLAGS_SECURE, KGSL_MEMDESC_PRIVILEGED,
+		"secure_preemption_desc");
+	if (ret)
+		return ret;
+
+	ret = adreno_allocate_global(device, &rb->perfcounter_save_restore_desc,
+		GEN7_CP_PERFCOUNTER_SAVE_RESTORE_SIZE, 0, 0,
+		KGSL_MEMDESC_PRIVILEGED,
+		"perfcounter_save_restore_desc");
+	if (ret)
+		return ret;
+
+	reset_rb_preempt_record(adreno_dev, rb);
+
+	return 0;
+}
+
+int gen7_preemption_init(struct adreno_device *adreno_dev)
+{
+	u32 flags = ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ? KGSL_MEMDESC_PRIVILEGED : 0;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	struct adreno_ringbuffer *rb;
+	int ret;
+	unsigned int i;
+
+	/* We are dependent on IOMMU to make preemption go on the CP side */
+	if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU) {
+		ret = -ENODEV;
+		goto done;
+	}
+
+	INIT_WORK(&preempt->work, _gen7_preemption_worker);
+
+	/* Allocate mem for storing preemption switch record */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		ret = gen7_preemption_ringbuffer_init(adreno_dev, rb);
+		if (ret)
+			goto done;
+	}
+
+	ret = adreno_allocate_global(device, &preempt->scratch, PAGE_SIZE,
+			0, 0, flags, "preempt_scratch");
+	if (ret)
+		goto done;
+
+	/* Allocate mem for storing preemption smmu record */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		ret = adreno_allocate_global(device, &iommu->smmu_info, PAGE_SIZE, 0,
+			KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED,
+			"smmu_info");
+		if (ret)
+			goto done;
+	}
+
+	return 0;
+done:
+	clear_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+	return ret;
+}
+
+int gen7_preemption_context_init(struct kgsl_context *context)
+{
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u64 flags = 0;
+
+	if (!adreno_preemption_feature_set(adreno_dev))
+		return 0;
+
+	if (context->flags & KGSL_CONTEXT_SECURE)
+		flags |= KGSL_MEMFLAGS_SECURE;
+
+	if (is_compat_task())
+		flags |= KGSL_MEMFLAGS_FORCE_32BIT;
+
+	/*
+	 * gpumem_alloc_entry takes an extra refcount. Put it only when
+	 * destroying the context to keep the context record valid
+	 */
+	context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv,
+			GEN7_CP_CTXRECORD_USER_RESTORE_SIZE, flags);
+	if (IS_ERR(context->user_ctxt_record)) {
+		int ret = PTR_ERR(context->user_ctxt_record);
+
+		context->user_ctxt_record = NULL;
+		return ret;
+	}
+
+	return 0;
+}

+ 647 - 0
qcom/opensource/graphics-kernel/adreno_gen7_ringbuffer.c

@@ -0,0 +1,647 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_pm4types.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_trace.h"
+#include "kgsl_trace.h"
+
+static bool is_concurrent_binning(struct adreno_context *drawctxt)
+{
+	if (!drawctxt)
+		return false;
+
+	return !(drawctxt->base.flags & KGSL_CONTEXT_SECURE);
+}
+
+static int gen7_rb_pagetable_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		struct kgsl_pagetable *pagetable, u32 *cmds)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable);
+	int count = 0;
+	u32 id = drawctxt ? drawctxt->base.id : 0;
+
+	if (pagetable == device->mmu.defaultpagetable)
+		return 0;
+
+	/* CP switches the pagetable and flushes the Caches */
+	cmds[count++] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3);
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+	cmds[count++] = id;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5);
+	cmds[count++] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device,
+				rb->id, ttbr0));
+	cmds[count++] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device,
+				rb->id, ttbr0));
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+	cmds[count++] = id;
+
+	/*
+	 * Sync both threads after switching pagetables and enable BR only
+	 * to make sure BV doesn't race ahead while BR is still switching
+	 * pagetables.
+	 */
+	cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR;
+
+	return count;
+}
+
+static int gen7_rb_context_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_pagetable *pagetable =
+		adreno_drawctxt_get_pagetable(drawctxt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int count = 0;
+	u32 cmds[55];
+
+	/* Sync both threads */
+	cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BOTH;
+	/* Reset context state */
+	cmds[count++] = cp_type7_packet(CP_RESET_CONTEXT_STATE, 1);
+	cmds[count++] = CP_RESET_GLOBAL_LOCAL_TS | CP_CLEAR_BV_BR_COUNTER |
+			CP_CLEAR_RESOURCE_TABLE | CP_CLEAR_ON_CHIP_TS;
+	/*
+	 * Enable/disable concurrent binning for pagetable switch and
+	 * set the thread to BR since only BR can execute the pagetable
+	 * switch packets.
+	 */
+	/* Sync both threads and enable BR only */
+	cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR;
+
+	if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) {
+
+		/* Clear performance counters during context switches */
+		if (!adreno_dev->perfcounter) {
+			cmds[count++] = cp_type4_packet(GEN7_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
+			cmds[count++] = 0x1;
+		}
+
+		count += gen7_rb_pagetable_switch(adreno_dev, rb,
+			drawctxt, pagetable, &cmds[count]);
+
+		/* Wait for performance counter clear to finish */
+		if (!adreno_dev->perfcounter) {
+			cmds[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
+			cmds[count++] = 0x3;
+			cmds[count++] = GEN7_RBBM_PERFCTR_SRAM_INIT_STATUS;
+			cmds[count++] = 0x0;
+			cmds[count++] = 0x1;
+			cmds[count++] = 0x1;
+			cmds[count++] = 0x0;
+		}
+	} else {
+		struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+
+		u32 offset = GEN7_SMMU_BASE + (iommu->cb0_offset >> 2) + 0x0d;
+
+		/*
+		 * Set the CONTEXTIDR register to the current context id so we
+		 * can use it in pagefault debugging. Unlike TTBR0 we don't
+		 * need any special sequence or locking to change it
+		 */
+		cmds[count++] = cp_type4_packet(offset, 1);
+		cmds[count++] = drawctxt->base.id;
+	}
+
+	cmds[count++] = cp_type7_packet(CP_NOP, 1);
+	cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[count++] = 0x31;
+
+	if (adreno_is_preemption_enabled(adreno_dev)) {
+		u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr;
+
+		cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3);
+		cmds[count++] = SET_PSEUDO_NON_PRIV_SAVE_ADDR;
+		cmds[count++] = lower_32_bits(gpuaddr);
+		cmds[count++] = upper_32_bits(gpuaddr);
+	}
+
+	return gen7_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED,
+			cmds, count, 0, NULL);
+}
+
+#define RB_SOPTIMESTAMP(device, rb) \
+	MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp)
+#define CTXT_SOPTIMESTAMP(device, drawctxt) \
+	MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp)
+
+#define RB_EOPTIMESTAMP(device, rb) \
+	MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp)
+#define CTXT_EOPTIMESTAMP(device, drawctxt) \
+	MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp)
+
+int gen7_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+	unsigned long flags;
+
+	adreno_get_submit_time(adreno_dev, rb, time);
+	adreno_profile_submit_time(time);
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+		if (adreno_dev->cur_rb == rb) {
+			kgsl_pwrscale_busy(device);
+			ret = gen7_fenced_write(adreno_dev,
+				GEN7_CP_RB_WPTR, rb->_wptr,
+				FENCE_STATUS_WRITEDROPPED0_MASK);
+			rb->skip_inline_wptr = false;
+		}
+	} else {
+		if (adreno_dev->cur_rb == rb)
+			rb->skip_inline_wptr = true;
+	}
+
+	rb->wptr = rb->_wptr;
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	if (ret) {
+		/*
+		 * If WPTR update fails, take inline snapshot and trigger
+		 * recovery.
+		 */
+		gmu_core_fault_snapshot(device);
+		adreno_dispatcher_fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	}
+
+	return ret;
+}
+
+int gen7_ringbuffer_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int i, ret;
+
+	ret = adreno_allocate_global(device, &device->scratch, PAGE_SIZE,
+			0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED,
+			"scratch");
+	if (ret)
+		return ret;
+
+	adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+
+	if (!adreno_preemption_feature_set(adreno_dev)) {
+		adreno_dev->num_ringbuffers = 1;
+		return adreno_ringbuffer_setup(adreno_dev,
+				&adreno_dev->ringbuffers[0], 0);
+	}
+
+	adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers);
+
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		int ret;
+
+		ret = adreno_ringbuffer_setup(adreno_dev,
+				&adreno_dev->ringbuffers[i], i);
+		if (ret)
+			return ret;
+	}
+
+	timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0);
+	gen7_preemption_init(adreno_dev);
+	return 0;
+}
+
+#define GEN7_SUBMIT_MAX 104
+
+int gen7_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 size = GEN7_SUBMIT_MAX + dwords;
+	u32 *cmds, index = 0;
+	u64 profile_gpuaddr;
+	u32 profile_dwords;
+
+	if (adreno_drawctxt_detached(drawctxt))
+		return -ENOENT;
+
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return -EPROTO;
+
+	rb->timestamp++;
+
+	if (drawctxt)
+		drawctxt->internal_timestamp = rb->timestamp;
+
+	/* All submissions are run with protected mode off due to APRIV */
+	flags &= ~F_NOTPROTECTED;
+
+	cmds = adreno_ringbuffer_allocspace(rb, size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	/* Identify the start of a command */
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER;
+
+	/* This is 25 dwords when drawctxt is not NULL and perfcounter needs to be zapped*/
+	index += gen7_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt,
+		&cmds[index]);
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BOTH;
+
+	cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+	cmds[index++] = 0x101; /* IFPC disable */
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BR;
+
+	profile_gpuaddr = adreno_profile_preib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+		cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = rb->timestamp;
+
+	if (IS_SECURE(flags)) {
+		/* Sync BV and BR if entering secure mode */
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SYNC_THREADS | CP_CONCURRENT_BIN_DISABLE;
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	memcpy(&cmds[index], in, dwords << 2);
+	index += dwords;
+
+	profile_gpuaddr = adreno_profile_postib_processing(adreno_dev,
+		drawctxt, &dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy))
+		cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0);
+
+	if (is_concurrent_binning(drawctxt)) {
+		u64 addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_ts);
+
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BV;
+
+		/*
+		 * Make sure the timestamp is committed once BV pipe is
+		 * completely done with this submission.
+		 */
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(27);
+		cmds[index++] = lower_32_bits(addr);
+		cmds[index++] = upper_32_bits(addr);
+		cmds[index++] = rb->timestamp;
+
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BR;
+
+		/*
+		 * This makes sure that BR doesn't race ahead and commit
+		 * timestamp to memstore while BV is still processing
+		 * this submission.
+		 */
+		cmds[index++] = cp_type7_packet(CP_WAIT_TIMESTAMP, 4);
+		cmds[index++] = 0;
+		cmds[index++] = lower_32_bits(addr);
+		cmds[index++] = upper_32_bits(addr);
+		cmds[index++] = rb->timestamp;
+	}
+
+	/*
+	 * If this is an internal command, just write the ringbuffer timestamp,
+	 * otherwise, write both
+	 */
+	if (!drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	} else {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27);
+		cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(27);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	}
+
+	if (IS_WFI(flags))
+		cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+
+	if (IS_SECURE(flags)) {
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_CONCURRENT_BIN_DISABLE;
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 0;
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SYNC_THREADS;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BOTH;
+
+	cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+	cmds[index++] = 0x100; /* IFPC enable */
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BR;
+
+	/* 10 dwords */
+	index += gen7_preemption_post_ibsubmit(adreno_dev, &cmds[index]);
+
+	/* Adjust the thing for the number of bytes we actually wrote */
+	rb->_wptr -= (size - index);
+
+	return gen7_ringbuffer_submit(rb, time);
+}
+
+static u32 gen7_get_alwayson_counter(u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = GEN7_CP_ALWAYS_ON_COUNTER_LO | (1 << 30) | (2 << 18);
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+static u32 gen7_get_alwayson_context(u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = GEN7_CP_ALWAYS_ON_CONTEXT_LO | (1 << 30) | (2 << 18);
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+#define PROFILE_IB_DWORDS 4
+#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))
+
+static u64 gen7_get_user_profiling_ib(struct adreno_ringbuffer *rb,
+		struct kgsl_drawobj_cmd *cmdobj, u32 target_offset, u32 *cmds)
+{
+	u32 offset, *ib, dwords;
+
+	if (IS_ERR(rb->profile_desc))
+		return 0;
+
+	offset = rb->profile_index * (PROFILE_IB_DWORDS << 2);
+	ib = rb->profile_desc->hostptr + offset;
+	dwords = gen7_get_alwayson_counter(ib,
+		cmdobj->profiling_buffer_gpuaddr + target_offset);
+
+	cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+	cmds[1] = lower_32_bits(rb->profile_desc->gpuaddr + offset);
+	cmds[2] = upper_32_bits(rb->profile_desc->gpuaddr + offset);
+	cmds[3] = dwords;
+
+	rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;
+
+	return 4;
+}
+
+static int gen7_drawctxt_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (rb->drawctxt_active == drawctxt)
+		return 0;
+
+	if (kgsl_context_detached(&drawctxt->base))
+		return -ENOENT;
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return -ENOENT;
+
+	ret = gen7_rb_context_switch(adreno_dev, rb, drawctxt);
+	if (ret) {
+		kgsl_context_put(&drawctxt->base);
+		return ret;
+	}
+
+	trace_adreno_drawctxt_switch(rb, drawctxt);
+
+	/* Release the current drawctxt as soon as the new one is switched */
+	adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active,
+		rb, rb->timestamp);
+
+	rb->drawctxt_active = drawctxt;
+	return 0;
+}
+
+
+#define GEN7_USER_PROFILE_IB(rb, cmdobj, cmds, field) \
+	gen7_get_user_profiling_ib((rb), (cmdobj), \
+		offsetof(struct kgsl_drawobj_profiling_buffer, field), \
+		(cmds))
+
+#define GEN7_KERNEL_PROFILE(dev, cmdobj, cmds, field) \
+	gen7_get_alwayson_counter((cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define GEN7_KERNEL_PROFILE_CONTEXT(dev, cmdobj, cmds, field) \
+	gen7_get_alwayson_context((cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define GEN7_COMMAND_DWORDS 60
+
+int gen7_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct adreno_ringbuffer *rb = drawctxt->rb;
+	int ret = 0, numibs = 0, index = 0;
+	u32 *cmds;
+
+	/* Count the number of IBs (if we are not skipping) */
+	if (!IS_SKIP(flags)) {
+		struct list_head *tmp;
+
+		list_for_each(tmp, &cmdobj->cmdlist)
+			numibs++;
+	}
+
+	cmds = kvmalloc((GEN7_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL);
+	if (!cmds) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = START_IB_IDENTIFIER;
+
+	/* Kernel profiling: 8 dwords */
+	if (IS_KERNEL_PROFILE(flags)) {
+		index += GEN7_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			started);
+		index += GEN7_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
+			ctx_start);
+	}
+
+	/* User profiling: 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += GEN7_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
+			gpu_ticks_submitted);
+
+	if (is_concurrent_binning(drawctxt)) {
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BOTH;
+	}
+	if (numibs) {
+		struct kgsl_memobj_node *ib;
+
+		cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+		cmds[index++] = 0x00d; /* IB1LIST start */
+
+		list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+			if (ib->priv & MEMOBJ_SKIP ||
+				(ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE &&
+				 !IS_PREAMBLE(flags)))
+				cmds[index++] = cp_type7_packet(CP_NOP, 4);
+
+			cmds[index++] =
+				cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+			cmds[index++] = lower_32_bits(ib->gpuaddr);
+			cmds[index++] = upper_32_bits(ib->gpuaddr);
+
+			/* Double check that IB_PRIV is never set */
+			cmds[index++] = (ib->size >> 2) & 0xfffff;
+		}
+
+		cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+		cmds[index++] = 0x00e; /* IB1LIST end */
+	}
+
+	if (is_concurrent_binning(drawctxt)) {
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BR;
+	}
+	/* CCU invalidate depth */
+	cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 24;
+
+	/* CCU invalidate color */
+	cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 25;
+
+	/* 8 dwords */
+	if (IS_KERNEL_PROFILE(flags)) {
+		index += GEN7_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			retired);
+		index += GEN7_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
+			ctx_end);
+	}
+
+	/* 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += GEN7_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
+			gpu_ticks_retired);
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = END_IB_IDENTIFIER;
+
+	ret = gen7_drawctxt_switch(adreno_dev, rb, drawctxt);
+
+	/*
+	 * In the unlikely event of an error in the drawctxt switch,
+	 * treat it like a hang
+	 */
+	if (ret) {
+		/*
+		 * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it,
+		 * the upper layers know how to handle it
+		 */
+		if (ret != -ENOSPC && ret != -ENOENT)
+			dev_err(device->dev,
+				"Unable to switch draw context: %d\n", ret);
+		goto done;
+	}
+
+	adreno_drawobj_set_constraint(device, drawobj);
+
+	ret = gen7_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt,
+		flags, cmds, index, drawobj->timestamp, time);
+
+done:
+	trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs,
+		drawobj->timestamp, drawobj->flags, ret, drawctxt->type);
+
+	kvfree(cmds);
+	return ret;
+}

+ 519 - 0
qcom/opensource/graphics-kernel/adreno_gen7_rpmh.c

@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <soc/qcom/cmd-db.h>
+#include <soc/qcom/tcs.h>
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "kgsl_bus.h"
+#include "kgsl_device.h"
+
+struct rpmh_arc_vals {
+	u32 num;
+	const u16 *val;
+};
+
+struct bcm {
+	const char *name;
+	u32 buswidth;
+	u32 channels;
+	u32 unit;
+	u16 width;
+	u8 vcd;
+	bool fixed;
+};
+
+struct bcm_data {
+	__le32 unit;
+	__le16 width;
+	u8 vcd;
+	u8 reserved;
+};
+
+struct rpmh_bw_votes {
+	u32 wait_bitmask;
+	u32 num_cmds;
+	u32 *addrs;
+	u32 num_levels;
+	u32 **cmds;
+};
+
+#define ARC_VOTE_SET(pri, sec, vlvl) \
+	(FIELD_PREP(GENMASK(31, 16), vlvl) | \
+	 FIELD_PREP(GENMASK(15, 8), sec) | \
+	 FIELD_PREP(GENMASK(7, 0), pri))
+
+static int rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id)
+{
+	size_t len = 0;
+
+	arc->val = cmd_db_read_aux_data(res_id, &len);
+
+	/*
+	 * cmd_db_read_aux_data() gives us a zero-padded table of
+	 * size len that contains the arc values. To determine the
+	 * number of arc values, we loop through the table and count
+	 * them until we get to the end of the buffer or hit the
+	 * zero padding.
+	 */
+	for (arc->num = 1; arc->num < (len >> 1); arc->num++) {
+		if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0)
+			break;
+	}
+
+	return 0;
+}
+
+static int setup_volt_dependency_tbl(u32 *votes,
+		struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+		u16 *vlvl, unsigned int num_entries)
+{
+	int i, j, k;
+	uint16_t cur_vlvl;
+	bool found_match;
+
+	/* i tracks current KGSL GPU frequency table entry
+	 * j tracks secondary rail voltage table entry
+	 * k tracks primary rail voltage table entry
+	 */
+	for (i = 0; i < num_entries; i++) {
+		found_match = false;
+
+		/* Look for a primary rail voltage that matches a VLVL level */
+		for (k = 0; k < pri_rail->num; k++) {
+			if (pri_rail->val[k] >= vlvl[i]) {
+				cur_vlvl = pri_rail->val[k];
+				found_match = true;
+				break;
+			}
+		}
+
+		/* If we did not find a matching VLVL level then abort */
+		if (!found_match)
+			return -EINVAL;
+
+		/*
+		 * Look for a secondary rail index whose VLVL value
+		 * is greater than or equal to the VLVL value of the
+		 * corresponding index of the primary rail
+		 */
+		for (j = 0; j < sec_rail->num; j++) {
+			if (sec_rail->val[j] >= cur_vlvl ||
+					j + 1 == sec_rail->num)
+				break;
+		}
+
+		if (j == sec_rail->num)
+			j = 0;
+
+		votes[i] = ARC_VOTE_SET(k, j, cur_vlvl);
+	}
+
+	return 0;
+}
+
+/* Generate a set of bandwidth votes for the list of BCMs */
+static void tcs_cmd_data(struct bcm *bcms, int count,
+		u32 ab, u32 ib, u32 *data, u32 perfmode_vote, bool set_perfmode)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		bool valid = true;
+		bool commit = false;
+		u64 avg, peak, x, y;
+
+		if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd)
+			commit = true;
+
+		if (bcms[i].fixed) {
+			if (!ab && !ib)
+				data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0);
+			else
+				data[i] = BCM_TCS_CMD(commit, true, 0x0,
+							set_perfmode ? perfmode_vote : 0x0);
+			continue;
+		}
+
+		/* Multiple the bandwidth by the width of the connection */
+		avg = ((u64) ab) * bcms[i].width;
+
+		/* And then divide by the total width */
+		do_div(avg, bcms[i].buswidth);
+
+		peak = ((u64) ib) * bcms[i].width;
+		do_div(peak, bcms[i].buswidth);
+
+		/* Input bandwidth value is in KBps */
+		x = avg * 1000ULL;
+		do_div(x, bcms[i].unit);
+
+		/* Input bandwidth value is in KBps */
+		y = peak * 1000ULL;
+		do_div(y, bcms[i].unit);
+
+		/*
+		 * If a bandwidth value was specified but the calculation ends
+		 * rounding down to zero, set a minimum level
+		 */
+		if (ab && x == 0)
+			x = 1;
+
+		if (ib && y == 0)
+			y = 1;
+
+		x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK);
+		y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK);
+
+		if (!x && !y)
+			valid = false;
+
+		data[i] = BCM_TCS_CMD(commit, valid, x, y);
+	}
+}
+
+static void free_rpmh_bw_votes(struct rpmh_bw_votes *votes)
+{
+	int i;
+
+	if (!votes)
+		return;
+
+	for (i = 0; votes->cmds && i < votes->num_levels; i++)
+		kfree(votes->cmds[i]);
+
+	kfree(votes->cmds);
+	kfree(votes->addrs);
+	kfree(votes);
+}
+
+/* Build the votes table from the specified bandwidth levels */
+static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms,
+		int bcm_count, u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl)
+{
+	struct rpmh_bw_votes *votes;
+	bool set_perfmode;
+	int i;
+
+	votes = kzalloc(sizeof(*votes), GFP_KERNEL);
+	if (!votes)
+		return ERR_PTR(-ENOMEM);
+
+	votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL);
+	if (!votes->addrs) {
+		free_rpmh_bw_votes(votes);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL);
+	if (!votes->cmds) {
+		free_rpmh_bw_votes(votes);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	votes->num_cmds = bcm_count;
+	votes->num_levels = levels_count;
+
+	/* Get the cmd-db information for each BCM */
+	for (i = 0; i < bcm_count; i++) {
+		size_t l;
+		const struct bcm_data *data;
+
+		data = cmd_db_read_aux_data(bcms[i].name, &l);
+
+		votes->addrs[i] = cmd_db_read_addr(bcms[i].name);
+
+		bcms[i].unit = le32_to_cpu(data->unit);
+		bcms[i].width = le16_to_cpu(data->width);
+		bcms[i].vcd = data->vcd;
+	}
+
+	for (i = 0; i < bcm_count; i++) {
+		if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd)
+			votes->wait_bitmask |= (1 << i);
+	}
+
+	for (i = 0; i < levels_count; i++) {
+		votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL);
+		if (!votes->cmds[i]) {
+			free_rpmh_bw_votes(votes);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		set_perfmode = (i >= perfmode_lvl) ? true : false;
+		tcs_cmd_data(bcms, bcm_count, levels[i], levels[i], votes->cmds[i],
+								perfmode_vote, set_perfmode);
+	}
+
+	return votes;
+}
+
+/*
+ * setup_gmu_arc_votes - Build the gmu voting table
+ * @gmu: Pointer to gmu device
+ * @pri_rail: Pointer to primary power rail vlvl table
+ * @sec_rail: Pointer to second/dependent power rail vlvl table
+ *
+ * This function initializes the cx votes for all gmu frequencies
+ * for gmu dcvs
+ */
+static int setup_cx_arc_votes(struct gen7_gmu_device *gmu,
+	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail)
+{
+	/* Hardcoded values of GMU CX voltage levels */
+	u16 gmu_cx_vlvl[MAX_CX_LEVELS];
+	u32 cx_votes[MAX_CX_LEVELS];
+	struct gen7_dcvs_table *table = &gmu->dcvs_table;
+	u32 *freqs = gmu->freqs;
+	u32 *vlvls = gmu->vlvls;
+	int ret, i;
+
+	gmu_cx_vlvl[0] = 0;
+	gmu_cx_vlvl[1] = vlvls[0];
+	gmu_cx_vlvl[2] = vlvls[1];
+
+	table->gmu_level_num = 3;
+
+	table->cx_votes[0].freq = 0;
+	table->cx_votes[1].freq = freqs[0] / 1000;
+	table->cx_votes[2].freq = freqs[1] / 1000;
+
+	ret = setup_volt_dependency_tbl(cx_votes, pri_rail,
+			sec_rail, gmu_cx_vlvl, table->gmu_level_num);
+	if (!ret) {
+		for (i = 0; i < table->gmu_level_num; i++)
+			table->cx_votes[i].vote = cx_votes[i];
+	}
+
+	return ret;
+}
+
+static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl)
+{
+	u32 i;
+
+	/*
+	 * This means that the Gx level doesn't have a dependency on Cx level.
+	 * Return the same value to disable cx voting at GMU.
+	 */
+	if (vlvl == 0xffffffff) {
+		*hlvl = vlvl;
+		return 0;
+	}
+
+	for (i = 0; i < cx_rail->num; i++) {
+		if (cx_rail->val[i] >= vlvl) {
+			*hlvl = i;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * setup_gx_arc_votes - Build the gpu dcvs voting table
+ * @hfi: Pointer to hfi device
+ * @pri_rail: Pointer to primary power rail vlvl table
+ * @sec_rail: Pointer to second/dependent power rail vlvl table
+ *
+ * This function initializes the gx votes for all gpu frequencies
+ * for gpu dcvs
+ */
+static int setup_gx_arc_votes(struct adreno_device *adreno_dev,
+	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+	struct rpmh_arc_vals *cx_rail)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen7_dcvs_table *table = &gmu->dcvs_table;
+	u32 index;
+	u16 vlvl_tbl[MAX_GX_LEVELS];
+	u32 gx_votes[MAX_GX_LEVELS];
+	int ret, i;
+
+	if (pwr->num_pwrlevels + 1 > ARRAY_SIZE(vlvl_tbl)) {
+		dev_err(device->dev,
+			"Defined more GPU DCVS levels than RPMh can support\n");
+		return -ERANGE;
+	}
+
+	/* Add the zero powerlevel for the perf table */
+	table->gpu_level_num = pwr->num_pwrlevels + 1;
+
+	memset(vlvl_tbl, 0, sizeof(vlvl_tbl));
+
+	table->gx_votes[0].freq = 0;
+	table->gx_votes[0].cx_vote = 0;
+	/* Disable cx vote in gmu dcvs table if it is not supported in DT */
+	if (pwr->pwrlevels[0].cx_level == 0xffffffff)
+		table->gx_votes[0].cx_vote = 0xffffffff;
+
+	/* GMU power levels are in ascending order */
+	for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) {
+		u32 cx_vlvl = pwr->pwrlevels[i].cx_level;
+
+		vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level;
+		table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000;
+
+		ret = to_cx_hlvl(cx_rail, cx_vlvl,
+				&table->gx_votes[index].cx_vote);
+		if (ret) {
+			dev_err(device->dev, "Unsupported cx corner: %u\n",
+					cx_vlvl);
+			return ret;
+		}
+	}
+
+	ret = setup_volt_dependency_tbl(gx_votes, pri_rail,
+			sec_rail, vlvl_tbl, table->gpu_level_num);
+	if (!ret) {
+		for (i = 0; i < table->gpu_level_num; i++)
+			table->gx_votes[i].vote = gx_votes[i];
+	}
+
+	return ret;
+
+}
+
+static int build_dcvs_table(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct rpmh_arc_vals gx_arc, cx_arc, mx_arc;
+	int ret;
+
+	ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl");
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&cx_arc, "cx.lvl");
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&mx_arc, "mx.lvl");
+	if (ret)
+		return ret;
+
+	ret = setup_cx_arc_votes(gmu, &cx_arc, &mx_arc);
+	if (ret)
+		return ret;
+
+	return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc);
+}
+
+/*
+ * List of Bus Control Modules (BCMs) that need to be configured for the GPU
+ * to access DDR. For each bus level we will generate a vote each BC
+ */
+static struct bcm gen7_ddr_bcms[] = {
+	{ .name = "SH0", .buswidth = 16 },
+	{ .name = "MC0", .buswidth = 4 },
+	{ .name = "ACV", .fixed = true },
+};
+
+/* Same as above, but for the CNOC BCMs */
+static struct bcm gen7_cnoc_bcms[] = {
+	{ .name = "CN0", .buswidth = 4 },
+};
+
+static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd,
+		struct rpmh_bw_votes *ddr, struct rpmh_bw_votes *cnoc)
+{
+	u32 i, j;
+
+	cmd->bw_level_num = ddr->num_levels;
+	cmd->ddr_cmds_num = ddr->num_cmds;
+	cmd->ddr_wait_bitmask = ddr->wait_bitmask;
+
+	for (i = 0; i < ddr->num_cmds; i++)
+		cmd->ddr_cmd_addrs[i] = ddr->addrs[i];
+
+	for (i = 0; i < ddr->num_levels; i++)
+		for (j = 0; j < ddr->num_cmds; j++)
+			cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j];
+
+	if (!cnoc)
+		return;
+
+	cmd->cnoc_cmds_num = cnoc->num_cmds;
+		cmd->cnoc_wait_bitmask = cnoc->wait_bitmask;
+
+	for (i = 0; i < cnoc->num_cmds; i++)
+		cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i];
+
+	for (i = 0; i < cnoc->num_levels; i++)
+		for (j = 0; j < cnoc->num_cmds; j++)
+			cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j];
+}
+
+static int build_bw_table(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct rpmh_bw_votes *ddr, *cnoc = NULL;
+	u32 perfmode_vote = gen7_core->acv_perfmode_vote;
+	u32 perfmode_lvl = perfmode_vote ? kgsl_pwrctrl_get_acv_perfmode_lvl(device,
+					gen7_core->acv_perfmode_ddr_freq) : 1;
+	u32 *cnoc_table;
+	u32 count;
+	int ret;
+
+	/* If perfmode vote is not defined, use default value as 0x8 */
+	if (!perfmode_vote)
+		perfmode_vote = BIT(3);
+
+	ddr = build_rpmh_bw_votes(gen7_ddr_bcms, ARRAY_SIZE(gen7_ddr_bcms),
+		pwr->ddr_table, pwr->ddr_table_count, perfmode_vote, perfmode_lvl);
+	if (IS_ERR(ddr))
+		return PTR_ERR(ddr);
+
+	cnoc_table = kgsl_bus_get_table(device->pdev, "qcom,bus-table-cnoc",
+		&count);
+
+	if (count > 0)
+		cnoc = build_rpmh_bw_votes(gen7_cnoc_bcms,
+			ARRAY_SIZE(gen7_cnoc_bcms), cnoc_table, count, 0, 0);
+
+	kfree(cnoc_table);
+
+	if (IS_ERR(cnoc)) {
+		free_rpmh_bw_votes(ddr);
+		return PTR_ERR(cnoc);
+	}
+
+	ret = CMD_MSG_HDR(gmu->hfi.bw_table, H2F_MSG_BW_VOTE_TBL);
+	if (ret)
+		return ret;
+
+	build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc);
+
+	free_rpmh_bw_votes(ddr);
+	free_rpmh_bw_votes(cnoc);
+
+	return 0;
+}
+
+int gen7_build_rpmh_tables(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = build_dcvs_table(adreno_dev);
+	if (ret) {
+		dev_err(adreno_dev->dev.dev, "Failed to build dcvs table\n");
+		return ret;
+	}
+
+	ret = build_bw_table(adreno_dev);
+	if (ret)
+		dev_err(adreno_dev->dev.dev, "Failed to build bw table\n");
+
+	return ret;
+}

+ 1811 - 0
qcom/opensource/graphics-kernel/adreno_gen7_snapshot.c

@@ -0,0 +1,1811 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_snapshot.h"
+#include "adreno_gen7_0_0_snapshot.h"
+#include "adreno_gen7_2_0_snapshot.h"
+#include "adreno_gen7_9_0_snapshot.h"
+#include "adreno_gen7_11_0_snapshot.h"
+
+static struct kgsl_memdesc *gen7_capturescript;
+static struct kgsl_memdesc *gen7_crashdump_registers;
+static u32 *gen7_cd_reg_end;
+static const struct gen7_snapshot_block_list *gen7_snapshot_block_list;
+static bool gen7_crashdump_timedout;
+
+/* Starting kernel virtual address for QDSS TMC register block */
+static void __iomem *tmc_virt;
+
+const struct gen7_snapshot_block_list gen7_0_0_snapshot_block_list = {
+	.pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers,
+	.debugbus_blocks = gen7_0_0_debugbus_blocks,
+	.debugbus_blocks_len = ARRAY_SIZE(gen7_0_0_debugbus_blocks),
+	.gbif_debugbus_blocks = gen7_gbif_debugbus_blocks,
+	.gbif_debugbus_blocks_len = ARRAY_SIZE(gen7_gbif_debugbus_blocks),
+	.cx_debugbus_blocks = gen7_cx_dbgc_debugbus_blocks,
+	.cx_debugbus_blocks_len = ARRAY_SIZE(gen7_cx_dbgc_debugbus_blocks),
+	.external_core_regs = gen7_0_0_external_core_regs,
+	.num_external_core_regs = ARRAY_SIZE(gen7_0_0_external_core_regs),
+	.gmu_regs = gen7_0_0_gmu_registers,
+	.gmu_gx_regs = gen7_0_0_gmugx_registers,
+	.rscc_regs = gen7_0_0_rscc_registers,
+	.reg_list = gen7_0_0_reg_list,
+	.cx_misc_regs = gen7_0_0_cx_misc_registers,
+	.shader_blocks = gen7_0_0_shader_blocks,
+	.num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks),
+	.clusters = gen7_0_0_clusters,
+	.num_clusters = ARRAY_SIZE(gen7_0_0_clusters),
+	.sptp_clusters = gen7_0_0_sptp_clusters,
+	.num_sptp_clusters = ARRAY_SIZE(gen7_0_0_sptp_clusters),
+	.post_crashdumper_regs = gen7_0_0_post_crashdumper_registers,
+	.index_registers = gen7_cp_indexed_reg_list,
+	.index_registers_len = ARRAY_SIZE(gen7_cp_indexed_reg_list),
+};
+
+const struct gen7_snapshot_block_list gen7_2_0_snapshot_block_list = {
+	.pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers,
+	.debugbus_blocks = gen7_2_0_debugbus_blocks,
+	.debugbus_blocks_len = ARRAY_SIZE(gen7_2_0_debugbus_blocks),
+	.gbif_debugbus_blocks = gen7_gbif_debugbus_blocks,
+	.gbif_debugbus_blocks_len = ARRAY_SIZE(gen7_gbif_debugbus_blocks),
+	.cx_debugbus_blocks = gen7_cx_dbgc_debugbus_blocks,
+	.cx_debugbus_blocks_len = ARRAY_SIZE(gen7_cx_dbgc_debugbus_blocks),
+	.external_core_regs = gen7_2_0_external_core_regs,
+	.num_external_core_regs = ARRAY_SIZE(gen7_2_0_external_core_regs),
+	.gmu_regs = gen7_2_0_gmu_registers,
+	.gmu_gx_regs = gen7_2_0_gmugx_registers,
+	.rscc_regs = gen7_2_0_rscc_registers,
+	.reg_list = gen7_2_0_reg_list,
+	.cx_misc_regs = gen7_0_0_cx_misc_registers,
+	.shader_blocks = gen7_2_0_shader_blocks,
+	.num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks),
+	.clusters = gen7_2_0_clusters,
+	.num_clusters = ARRAY_SIZE(gen7_2_0_clusters),
+	.sptp_clusters = gen7_2_0_sptp_clusters,
+	.num_sptp_clusters = ARRAY_SIZE(gen7_2_0_sptp_clusters),
+	.post_crashdumper_regs = gen7_0_0_post_crashdumper_registers,
+	.index_registers = gen7_cp_indexed_reg_list,
+	.index_registers_len = ARRAY_SIZE(gen7_cp_indexed_reg_list),
+};
+
+const struct gen7_snapshot_block_list gen7_9_0_snapshot_block_list = {
+	.pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers,
+	.debugbus_blocks = gen7_9_0_debugbus_blocks,
+	.debugbus_blocks_len = ARRAY_SIZE(gen7_9_0_debugbus_blocks),
+	.gbif_debugbus_blocks = gen7_9_0_gbif_debugbus_blocks,
+	.gbif_debugbus_blocks_len = ARRAY_SIZE(gen7_9_0_gbif_debugbus_blocks),
+	.cx_debugbus_blocks = gen7_9_0_cx_debugbus_blocks,
+	.cx_debugbus_blocks_len = ARRAY_SIZE(gen7_9_0_cx_debugbus_blocks),
+	.external_core_regs = gen7_9_0_external_core_regs,
+	.num_external_core_regs = ARRAY_SIZE(gen7_9_0_external_core_regs),
+	.gmu_regs = gen7_9_0_gmu_registers,
+	.gmu_gx_regs = gen7_9_0_gmugx_registers,
+	.rscc_regs = gen7_9_0_rscc_registers,
+	.reg_list = gen7_9_0_reg_list,
+	.cx_misc_regs = gen7_9_0_cx_misc_registers,
+	.shader_blocks = gen7_9_0_shader_blocks,
+	.num_shader_blocks = ARRAY_SIZE(gen7_9_0_shader_blocks),
+	.clusters = gen7_9_0_clusters,
+	.num_clusters = ARRAY_SIZE(gen7_9_0_clusters),
+	.sptp_clusters = gen7_9_0_sptp_clusters,
+	.num_sptp_clusters = ARRAY_SIZE(gen7_9_0_sptp_clusters),
+	.post_crashdumper_regs = gen7_0_0_post_crashdumper_registers,
+	.index_registers = gen7_9_0_cp_indexed_reg_list,
+	.index_registers_len = ARRAY_SIZE(gen7_9_0_cp_indexed_reg_list),
+};
+
+const struct gen7_snapshot_block_list gen7_11_0_snapshot_block_list = {
+	.pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers,
+	.debugbus_blocks = gen7_11_0_debugbus_blocks,
+	.debugbus_blocks_len = ARRAY_SIZE(gen7_11_0_debugbus_blocks),
+	.gbif_debugbus_blocks = gen7_11_0_gbif_debugbus_blocks,
+	.gbif_debugbus_blocks_len = ARRAY_SIZE(gen7_11_0_gbif_debugbus_blocks),
+	.cx_debugbus_blocks = gen7_11_0_cx_debugbus_blocks,
+	.cx_debugbus_blocks_len = ARRAY_SIZE(gen7_11_0_cx_debugbus_blocks),
+	.external_core_regs = gen7_11_0_external_core_regs,
+	.num_external_core_regs = ARRAY_SIZE(gen7_11_0_external_core_regs),
+	.gmu_regs = gen7_11_0_gmu_registers,
+	.gmu_gx_regs = gen7_11_0_gmugx_registers,
+	.rscc_regs = gen7_11_0_rscc_registers,
+	.reg_list = gen7_11_0_reg_list,
+	.cx_misc_regs = gen7_11_0_cx_misc_registers,
+	.shader_blocks = gen7_11_0_shader_blocks,
+	.num_shader_blocks = ARRAY_SIZE(gen7_11_0_shader_blocks),
+	.clusters = gen7_11_0_clusters,
+	.num_clusters = ARRAY_SIZE(gen7_11_0_clusters),
+	.sptp_clusters = gen7_11_0_sptp_clusters,
+	.num_sptp_clusters = ARRAY_SIZE(gen7_11_0_sptp_clusters),
+	.post_crashdumper_regs = gen7_0_0_post_crashdumper_registers,
+	.index_registers = gen7_11_0_cp_indexed_reg_list,
+	.index_registers_len = ARRAY_SIZE(gen7_11_0_cp_indexed_reg_list),
+};
+
+#define GEN7_SP_READ_SEL_VAL(_location, _pipe, _statetype, _usptp, _sptp) \
+				(FIELD_PREP(GENMASK(19, 18), _location) | \
+				 FIELD_PREP(GENMASK(17, 16), _pipe) | \
+				 FIELD_PREP(GENMASK(15, 8), _statetype) | \
+				 FIELD_PREP(GENMASK(7, 4), _usptp) | \
+				 FIELD_PREP(GENMASK(3, 0), _sptp))
+
+#define GEN7_CP_APERTURE_REG_VAL(_pipe, _cluster, _context) \
+			(FIELD_PREP(GENMASK(13, 12), _pipe) | \
+			 FIELD_PREP(GENMASK(10, 8), _cluster) | \
+			 FIELD_PREP(GENMASK(5, 4), _context))
+
+#define GEN7_DEBUGBUS_SECTION_SIZE (sizeof(struct kgsl_snapshot_debugbus) \
+			+ (GEN7_DEBUGBUS_BLOCK_SIZE << 3))
+
+#define CD_REG_END 0xaaaaaaaa
+
+static int CD_WRITE(u64 *ptr, u32 offset, u64 val)
+{
+	ptr[0] = val;
+	ptr[1] = FIELD_PREP(GENMASK(63, 44), offset) | BIT(21) | BIT(0);
+
+	return 2;
+}
+
+static int CD_READ(u64 *ptr, u32 offset, u32 size, u64 target)
+{
+	ptr[0] = target;
+	ptr[1] = FIELD_PREP(GENMASK(63, 44), offset) | size;
+
+	return 2;
+}
+
+static void CD_FINISH(u64 *ptr, u32 offset)
+{
+	gen7_cd_reg_end = gen7_crashdump_registers->hostptr + offset;
+	*gen7_cd_reg_end = CD_REG_END;
+	ptr[0] = gen7_crashdump_registers->gpuaddr + offset;
+	ptr[1] = FIELD_PREP(GENMASK(63, 44), GEN7_CP_CRASH_DUMP_STATUS) | BIT(0);
+	ptr[2] = 0;
+	ptr[3] = 0;
+}
+
+static bool CD_SCRIPT_CHECK(struct kgsl_device *device)
+{
+	return (adreno_smmu_is_stalled(ADRENO_DEVICE(device)) || (!device->snapshot_crashdumper) ||
+		IS_ERR_OR_NULL(gen7_capturescript) ||
+		IS_ERR_OR_NULL(gen7_crashdump_registers) ||
+		gen7_crashdump_timedout);
+}
+
+static bool _gen7_do_crashdump(struct kgsl_device *device)
+{
+	unsigned int reg = 0;
+	ktime_t timeout;
+
+	if (CD_SCRIPT_CHECK(device))
+		return false;
+
+	kgsl_regwrite(device, GEN7_CP_CRASH_SCRIPT_BASE_LO,
+			lower_32_bits(gen7_capturescript->gpuaddr));
+	kgsl_regwrite(device, GEN7_CP_CRASH_SCRIPT_BASE_HI,
+			upper_32_bits(gen7_capturescript->gpuaddr));
+	kgsl_regwrite(device, GEN7_CP_CRASH_DUMP_CNTL, 1);
+
+	timeout = ktime_add_ms(ktime_get(), CP_CRASH_DUMPER_TIMEOUT);
+
+	if (!device->snapshot_atomic)
+		might_sleep();
+	for (;;) {
+		/* make sure we're reading the latest value */
+		rmb();
+		if ((*gen7_cd_reg_end) != CD_REG_END)
+			break;
+		if (ktime_compare(ktime_get(), timeout) > 0)
+			break;
+		/* Wait 1msec to avoid unnecessary looping */
+		if (!device->snapshot_atomic)
+			usleep_range(100, 1000);
+	}
+
+	kgsl_regread(device, GEN7_CP_CRASH_DUMP_STATUS, &reg);
+
+	/*
+	 * Writing to the GEN7_CP_CRASH_DUMP_CNTL also resets the
+	 * GEN7_CP_CRASH_DUMP_STATUS. Make sure the read above is
+	 * complete before we change the value
+	 */
+	rmb();
+
+	kgsl_regwrite(device, GEN7_CP_CRASH_DUMP_CNTL, 0);
+
+	if (WARN(!(reg & 0x2), "Crashdumper timed out\n")) {
+		/*
+		 * Gen7 crash dumper script is broken down into multiple chunks
+		 * and script will be invoked multiple times to capture snapshot
+		 * of different sections of GPU. If crashdumper fails once, it is
+		 * highly likely it will fail subsequently as well. Hence update
+		 * gen7_crashdump_timedout variable to avoid running crashdumper
+		 * after it fails once.
+		 */
+		gen7_crashdump_timedout = true;
+		return false;
+	}
+
+	return true;
+}
+
+static size_t gen7_legacy_snapshot_registers(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct gen7_reg_list *regs = priv;
+
+	if (regs->sel)
+		kgsl_regwrite(device, regs->sel->host_reg, regs->sel->val);
+
+	return adreno_snapshot_registers_v2(device, buf, remain, (void *)regs->regs);
+}
+
+static size_t gen7_snapshot_registers(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct gen7_reg_list *regs = (struct gen7_reg_list *)priv;
+	const u32 *ptr = regs->regs;
+	unsigned int *data = (unsigned int *)buf;
+	unsigned int *src;
+	unsigned int size = adreno_snapshot_regs_count(ptr) * 4;
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	src = gen7_crashdump_registers->hostptr + regs->offset;
+
+	for (ptr = regs->regs; ptr[0] != UINT_MAX; ptr += 2) {
+		unsigned int cnt = REG_COUNT(ptr);
+
+		if (cnt == 1)
+			*data++ = BIT(31) | ptr[0];
+		else {
+			*data++ = ptr[0];
+			*data++ = cnt;
+		}
+		memcpy(data, src, cnt << 2);
+		data += cnt;
+		src += cnt;
+	}
+
+	/* Return the size of the section */
+	return size;
+}
+
+static size_t gen7_legacy_snapshot_shader(struct kgsl_device *device,
+				u8 *buf, size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_shader_v2 *header =
+		(struct kgsl_snapshot_shader_v2 *) buf;
+	struct gen7_shader_block_info *info = (struct gen7_shader_block_info *) priv;
+	struct gen7_shader_block *block = info->block;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int read_sel;
+	int i;
+
+	if (remain < (sizeof(*header) + (block->size << 2))) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	/*
+	 * If crashdumper times out, accessing some readback states from
+	 * AHB path might fail. Hence, skip SP_INST_TAG and SP_INST_DATA*
+	 * state types during snapshot dump in legacy flow.
+	 */
+	if (adreno_is_gen7_0_0(adreno_dev) || adreno_is_gen7_0_1(adreno_dev) ||
+		adreno_is_gen7_4_0(adreno_dev)) {
+		if (block->statetype == SP_INST_TAG ||
+			block->statetype == SP_INST_DATA ||
+			block->statetype == SP_INST_DATA_1 ||
+			block->statetype == SP_INST_DATA_2)
+			return 0;
+	}
+
+	header->type = block->statetype;
+	header->index = info->sp_id;
+	header->size = block->size;
+	header->usptp = info->usptp;
+	header->location = block->location;
+	header->pipe_id = block->pipeid;
+
+	read_sel = GEN7_SP_READ_SEL_VAL(block->location, block->pipeid,
+				block->statetype, info->usptp, info->sp_id);
+
+	kgsl_regwrite(device, GEN7_SP_READ_SEL, read_sel);
+
+	/*
+	 * An explicit barrier is needed so that reads do not happen before
+	 * the register write.
+	 */
+	mb();
+
+	for (i = 0; i < block->size; i++)
+		data[i] = kgsl_regmap_read(&device->regmap, GEN7_SP_AHB_READ_APERTURE + i);
+
+	return (sizeof(*header) + (block->size << 2));
+}
+
+static size_t gen7_snapshot_shader_memory(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_shader_v2 *header =
+		(struct kgsl_snapshot_shader_v2 *) buf;
+	struct gen7_shader_block_info *info = (struct gen7_shader_block_info *) priv;
+	struct gen7_shader_block *block = info->block;
+	unsigned int *data = (unsigned int *) (buf + sizeof(*header));
+
+	if (remain < (sizeof(*header) + (block->size << 2))) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	header->type = block->statetype;
+	header->index = info->sp_id;
+	header->size = block->size;
+	header->usptp = info->usptp;
+	header->location = block->location;
+	header->pipe_id = block->pipeid;
+
+	memcpy(data, gen7_crashdump_registers->hostptr + info->offset,
+			(block->size << 2));
+
+	return (sizeof(*header) + (block->size << 2));
+}
+
+static void qdss_regwrite(void __iomem *regbase, u32 offsetbytes, u32 value)
+{
+	void __iomem *reg;
+
+	reg = regbase + offsetbytes;
+
+	 /* Ensure previous write is committed */
+	wmb();
+	__raw_writel(value, reg);
+}
+
+static u32 qdss_regread(void __iomem *regbase, u32 offsetbytes)
+{
+	void __iomem *reg;
+	u32 val;
+
+	reg = regbase + offsetbytes;
+	val = __raw_readl(reg);
+
+	/* Make sure memory is updated before next access */
+	rmb();
+	return val;
+}
+
+static size_t gen7_snapshot_trace_buffer_gfx_trace(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	u32 start_idx = 0, status = 0, count = 0, wrap_count = 0, write_ptr = 0;
+	struct kgsl_snapshot_trace_buffer *header =
+			(struct kgsl_snapshot_trace_buffer *) buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	struct gen7_trace_buffer_info *info =
+				(struct gen7_trace_buffer_info *) priv;
+
+	if (remain < SZ_2K + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "TRACE 2K BUFFER");
+		return 0;
+	}
+
+	memcpy(header->ping_blk, info->ping_blk, sizeof(header->ping_blk));
+	memcpy(header->ping_idx, info->ping_idx, sizeof(header->ping_idx));
+	header->granularity = info->granularity;
+	header->segment = info->segment;
+	header->dbgc_ctrl = info->dbgc_ctrl;
+
+	/* Read the status of trace buffer to determine if it's full or empty */
+	kgsl_regread(device, GEN7_DBGC_TRACE_BUFFER_STATUS, &status);
+
+	/*
+	 * wrap_count and write ptr are part of status.
+	 * if status is 0 => wrap_count = 0 and write ptr = 0 buffer is empty.
+	 * if status is non zero and wrap count is 0 read partial buffer.
+	 * if wrap count in non zero read entier 2k buffer.
+	 * Always read the oldest data available.
+	 */
+
+	/* if status is 0 then buffer is empty */
+	if (!status) {
+		header->size = 0;
+		return sizeof(*header);
+	}
+
+	/* Number of times the circular buffer has wrapped around */
+	wrap_count = FIELD_GET(GENMASK(31, 12), status);
+	write_ptr = FIELD_GET(GENMASK(8, 0), status);
+
+	/* Read partial buffer starting from 0 */
+	if (!wrap_count) {
+		/* No of dwords to read : (write ptr - 0) of indexed register */
+		count = write_ptr;
+		header->size = count << 2;
+		start_idx = 0;
+	} else {
+		/* Read entire 2k buffer starting from write ptr */
+		start_idx = write_ptr + 1;
+		count = SZ_512;
+		header->size = SZ_2K;
+	}
+
+	kgsl_regmap_read_indexed_interleaved(&device->regmap,
+		GEN7_DBGC_DBG_TRACE_BUFFER_RD_ADDR, GEN7_DBGC_DBG_TRACE_BUFFER_RD_DATA, data,
+			start_idx, count);
+
+	return (sizeof(*header) + header->size);
+}
+
+static size_t gen7_snapshot_trace_buffer_etb(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	u32 read_ptr, count, write_ptr, val, idx = 0;
+	struct kgsl_snapshot_trace_buffer *header = (struct kgsl_snapshot_trace_buffer *) buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	struct gen7_trace_buffer_info *info = (struct gen7_trace_buffer_info *) priv;
+
+	/* Unlock ETB buffer */
+	qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_LAR, 0xC5ACCE55);
+
+	/* Make sure unlock goes through before proceeding further */
+	mb();
+
+	/* Flush the QDSS pipeline to ensure completion of pending write to buffer */
+	val = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_FFCR);
+	qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_FFCR, val | 0x40);
+
+	/* Make sure pipeline is flushed before we get read and write pointers */
+	mb();
+
+	/* Disable ETB */
+	qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_CTRL, 0);
+
+	/* Set to circular mode */
+	qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_MODE, 0);
+
+	/* Ensure buffer is set to circular mode before accessing it */
+	mb();
+
+	/* Size of buffer is specified in register TMC_RSZ */
+	count = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_RSZ) << 2;
+	read_ptr = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_RRP);
+	write_ptr = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_RWP);
+
+	/* ETB buffer if full read_ptr will be equal to write_ptr else write_ptr leads read_ptr */
+	count = (read_ptr == write_ptr) ? count : (write_ptr - read_ptr);
+
+	if (remain < count + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "ETB BUFFER");
+		return 0;
+	}
+
+	/*
+	 * Read pointer is 4 byte aligned and write pointer is 2 byte aligned
+	 * We read 4 bytes of data in one iteration below so aligin it down
+	 * to 4 bytes.
+	 */
+	count = ALIGN_DOWN(count, 4);
+
+	header->size = count;
+	header->dbgc_ctrl = info->dbgc_ctrl;
+	memcpy(header->ping_blk, info->ping_blk, sizeof(header->ping_blk));
+	memcpy(header->ping_idx, info->ping_idx, sizeof(header->ping_idx));
+	header->granularity = info->granularity;
+	header->segment = info->segment;
+
+	while (count != 0) {
+		/* This indexed register auto increments index as we read */
+		data[idx++] = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_RRD);
+		count = count - 4;
+	}
+
+	return (sizeof(*header) + header->size);
+}
+
+static void gen7_snapshot_trace_buffer(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot)
+{
+	u32 val_tmc_ctrl = 0, val_etr_ctrl = 0, val_etr1_ctrl = 0;
+	u32 i = 0, sel_gx = 0, sel_cx = 0, val_gx = 0, val_cx = 0, val = 0;
+	struct gen7_trace_buffer_info info;
+	struct resource *res1, *res2;
+	struct clk *clk;
+	int ret;
+	void __iomem *etr_virt;
+
+	/*
+	 * Data can be collected from CX_DBGC or DBGC and it's mutually exclusive.
+	 * Read the necessary select registers and determine the source of data.
+	 * This loop reads SEL_A to SEL_D of both CX_DBGC and DBGC and accordingly
+	 * updates the header information of trace buffer section.
+	 */
+	for (i = 0; i < TRACE_BUF_NUM_SIG; i++) {
+		kgsl_regread(device, GEN7_DBGC_CFG_DBGBUS_SEL_A + i, &sel_gx);
+		kgsl_regread(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_A + i, &sel_cx);
+		val_gx |= sel_gx;
+		val_cx |= sel_cx;
+		info.ping_idx[i] = FIELD_GET(GENMASK(7, 0), (sel_gx | sel_cx));
+		info.ping_blk[i] = FIELD_GET(GENMASK(24, 16), (sel_gx | sel_cx));
+	}
+
+	/* Zero the header if not programmed to export any buffer */
+	if (!val_gx && !val_cx) {
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_TRACE_BUFFER,
+			snapshot, NULL, &info);
+		return;
+	}
+
+	/* Enable APB clock to read data from trace buffer */
+	clk = clk_get(&device->pdev->dev, "apb_pclk");
+
+	if (IS_ERR(clk)) {
+		dev_err(device->dev, "Unable to get QDSS clock\n");
+		return;
+	}
+
+	ret = clk_prepare_enable(clk);
+
+	if (ret) {
+		dev_err(device->dev, "QDSS Clock enable error: %d\n", ret);
+		clk_put(clk);
+		return;
+	}
+
+	res1 = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "qdss_etr");
+	res2 = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "qdss_tmc");
+
+	if (!res1 || !res2)
+		goto err_clk_put;
+
+	etr_virt = ioremap(res1->start, resource_size(res1));
+	tmc_virt = ioremap(res2->start, resource_size(res2));
+
+	if (!etr_virt || !tmc_virt)
+		goto err_unmap;
+
+	/*
+	 * Update header information based on source of data, read necessary CNTLT registers
+	 * for granularity and segment information.
+	 */
+	if (val_gx) {
+		info.dbgc_ctrl = GX_DBGC;
+		kgsl_regread(device, GEN7_DBGC_CFG_DBGBUS_CNTLT, &val);
+	} else {
+		info.dbgc_ctrl = CX_DBGC;
+		kgsl_regread(device, GEN7_CX_DBGC_CFG_DBGBUS_CNTLT, &val);
+	}
+
+	info.granularity = FIELD_GET(GENMASK(14, 12), val);
+	info.segment = FIELD_GET(GENMASK(31, 28), val);
+
+	val_tmc_ctrl = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_CTRL);
+
+	/*
+	 * Incase TMC CTRL is 0 and val_cx is non zero dump empty buffer.
+	 * Incase TMC CTRL is 0 and val_gx is non zero dump 2k gfx buffer.
+	 * 2k buffer is not present for CX blocks.
+	 * Incase both ETR's CTRL is 0 Dump ETB QDSS buffer and disable QDSS.
+	 * Incase either ETR's CTRL is 1 Disable QDSS dumping ETB buffer to DDR.
+	 */
+	if (!val_tmc_ctrl) {
+		if (val_gx)
+			kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_TRACE_BUFFER,
+				snapshot, gen7_snapshot_trace_buffer_gfx_trace, &info);
+		else
+			kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_TRACE_BUFFER,
+					snapshot, NULL, &info);
+	} else {
+		val_etr_ctrl = qdss_regread(etr_virt, QDSS_AOSS_APB_ETR_CTRL);
+		val_etr1_ctrl = qdss_regread(etr_virt, QDSS_AOSS_APB_ETR1_CTRL);
+		if (!val_etr_ctrl && !val_etr1_ctrl)
+			kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_TRACE_BUFFER,
+				snapshot, gen7_snapshot_trace_buffer_etb, &info);
+		qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_CTRL, 0);
+	}
+
+err_unmap:
+	iounmap(tmc_virt);
+	iounmap(etr_virt);
+
+err_clk_put:
+	clk_disable_unprepare(clk);
+	clk_put(clk);
+}
+
+static void gen7_snapshot_shader(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot)
+{
+	unsigned int i;
+	struct gen7_shader_block_info info;
+	u64 *ptr;
+	u32 offset = 0;
+	struct gen7_shader_block *shader_blocks = gen7_snapshot_block_list->shader_blocks;
+	size_t num_shader_blocks = gen7_snapshot_block_list->num_shader_blocks;
+	unsigned int sp;
+	unsigned int usptp;
+	size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain,
+		void *priv) = gen7_legacy_snapshot_shader;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (adreno_is_gen7_0_x_family(adreno_dev))
+		kgsl_regrmw(device, GEN7_SP_DBG_CNTL, GENMASK(1, 0), 3);
+
+	if (CD_SCRIPT_CHECK(device)) {
+		for (i = 0; i < num_shader_blocks; i++) {
+			struct gen7_shader_block *block = &shader_blocks[i];
+
+			for (sp = 0; sp < block->num_sps; sp++) {
+				for (usptp = 0; usptp < block->num_usptps; usptp++) {
+					info.block = block;
+					info.sp_id = sp;
+					info.usptp = usptp;
+					info.offset = offset;
+					offset += block->size << 2;
+
+					/* Shader working/shadow memory */
+					kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_SHADER_V2,
+						snapshot, func, &info);
+				}
+			}
+		}
+
+		goto done;
+	}
+
+	for (i = 0; i < num_shader_blocks; i++) {
+		struct gen7_shader_block *block = &shader_blocks[i];
+
+		/* Build the crash script */
+		ptr = gen7_capturescript->hostptr;
+		offset = 0;
+
+		for (sp = 0; sp < block->num_sps; sp++) {
+			for (usptp = 0; usptp < block->num_usptps; usptp++) {
+				/* Program the aperture */
+				ptr += CD_WRITE(ptr, GEN7_SP_READ_SEL,
+					GEN7_SP_READ_SEL_VAL(block->location, block->pipeid,
+						block->statetype, usptp, sp));
+
+				/* Read all the data in one chunk */
+				ptr += CD_READ(ptr, GEN7_SP_AHB_READ_APERTURE, block->size,
+					gen7_crashdump_registers->gpuaddr + offset);
+				offset += block->size << 2;
+			}
+		}
+		/* Marker for end of script */
+		CD_FINISH(ptr, offset);
+
+		/* Try to run the crash dumper */
+		func = gen7_legacy_snapshot_shader;
+		if (_gen7_do_crashdump(device))
+			func = gen7_snapshot_shader_memory;
+
+		offset = 0;
+		for (sp = 0; sp < block->num_sps; sp++) {
+			for (usptp = 0; usptp < block->num_usptps; usptp++) {
+				info.block = block;
+				info.sp_id = sp;
+				info.usptp = usptp;
+				info.offset = offset;
+				offset += block->size << 2;
+
+				/* Shader working/shadow memory */
+				kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_SHADER_V2,
+					snapshot, func, &info);
+			}
+		}
+	}
+
+done:
+	if (adreno_is_gen7_0_x_family(adreno_dev))
+		kgsl_regrmw(device, GEN7_SP_DBG_CNTL, GENMASK(1, 0), 0x0);
+}
+
+static void gen7_snapshot_mempool(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot)
+{
+	/* set CP_CHICKEN_DBG[StabilizeMVC] to stabilize it while dumping */
+	kgsl_regrmw(device, GEN7_CP_CHICKEN_DBG, 0x4, 0x4);
+	kgsl_regrmw(device, GEN7_CP_BV_CHICKEN_DBG, 0x4, 0x4);
+
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		GEN7_CP_MEM_POOL_DBG_ADDR, GEN7_CP_MEM_POOL_DBG_DATA,
+		0, 0x2200);
+
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		GEN7_CP_BV_MEM_POOL_DBG_ADDR, GEN7_CP_BV_MEM_POOL_DBG_DATA,
+		0, 0x2200);
+
+	kgsl_regrmw(device, GEN7_CP_CHICKEN_DBG, 0x4, 0x0);
+	kgsl_regrmw(device, GEN7_CP_BV_CHICKEN_DBG, 0x4, 0x0);
+}
+
+static unsigned int gen7_read_dbgahb(struct kgsl_device *device,
+				unsigned int regbase, unsigned int reg)
+{
+	unsigned int val;
+
+	kgsl_regread(device, (GEN7_SP_AHB_READ_APERTURE + reg - regbase), &val);
+	return val;
+}
+
+static size_t gen7_legacy_snapshot_cluster_dbgahb(struct kgsl_device *device,
+				u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mvc_regs_v2 *header =
+				(struct kgsl_snapshot_mvc_regs_v2 *)buf;
+	struct gen7_sptp_cluster_registers *cluster =
+			(struct gen7_sptp_cluster_registers *)priv;
+	const u32 *ptr = cluster->regs;
+	unsigned int read_sel;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int j;
+	unsigned int size = adreno_snapshot_regs_count(ptr) * 4;
+
+	if (remain < (sizeof(*header) + size)) {
+		SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS");
+		return 0;
+	}
+
+	header->ctxt_id = cluster->context_id;
+	header->cluster_id = cluster->cluster_id;
+	header->pipe_id = cluster->pipe_id;
+	header->location_id = cluster->location_id;
+
+	read_sel = GEN7_SP_READ_SEL_VAL(cluster->location_id, cluster->pipe_id,
+			cluster->statetype, 0, 0);
+
+	kgsl_regwrite(device, GEN7_SP_READ_SEL, read_sel);
+
+	/*
+	 * An explicit barrier is needed so that reads do not happen before
+	 * the register write.
+	 */
+	mb();
+
+	for (ptr = cluster->regs; ptr[0] != UINT_MAX; ptr += 2) {
+		unsigned int count = REG_COUNT(ptr);
+
+		if (count == 1)
+			*data++ = ptr[0];
+		else {
+			*data++ = ptr[0] | (1 << 31);
+			*data++ = ptr[1];
+		}
+		for (j = ptr[0]; j <= ptr[1]; j++)
+			*data++ = gen7_read_dbgahb(device, cluster->regbase, j);
+	}
+
+	return (size + sizeof(*header));
+}
+
+static size_t gen7_snapshot_cluster_dbgahb(struct kgsl_device *device, u8 *buf,
+				size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mvc_regs_v2 *header =
+				(struct kgsl_snapshot_mvc_regs_v2 *)buf;
+	struct gen7_sptp_cluster_registers *cluster =
+				(struct gen7_sptp_cluster_registers *)priv;
+	const u32 *ptr = cluster->regs;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int *src;
+	unsigned int size = adreno_snapshot_regs_count(ptr) * 4;
+
+	if (remain < (sizeof(*header) + size)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	header->ctxt_id = cluster->context_id;
+	header->cluster_id = cluster->cluster_id;
+	header->pipe_id = cluster->pipe_id;
+	header->location_id = cluster->location_id;
+
+	src = gen7_crashdump_registers->hostptr + cluster->offset;
+
+	for (ptr = cluster->regs; ptr[0] != UINT_MAX; ptr += 2) {
+		unsigned int cnt = REG_COUNT(ptr);
+
+		if (cnt == 1)
+			*data++ = ptr[0];
+		else {
+			*data++ = ptr[0] | (1 << 31);
+			*data++ = ptr[1];
+		}
+		memcpy(data, src, cnt << 2);
+		data += cnt;
+		src += cnt;
+	}
+
+	return (size + sizeof(*header));
+}
+
+static void gen7_snapshot_dbgahb_regs(struct kgsl_device *device,
+			struct kgsl_snapshot *snapshot)
+{
+	int i;
+	u64 *ptr, offset = 0;
+	unsigned int count;
+	struct gen7_sptp_cluster_registers *sptp_clusters = gen7_snapshot_block_list->sptp_clusters;
+	size_t num_sptp_clusters = gen7_snapshot_block_list->num_sptp_clusters;
+	size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain,
+		void *priv) = gen7_legacy_snapshot_cluster_dbgahb;
+
+	if (CD_SCRIPT_CHECK(device)) {
+		for (i = 0; i < num_sptp_clusters; i++)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_MVC_V2, snapshot, func,
+				&sptp_clusters[i]);
+		return;
+	}
+
+	/* Build the crash script */
+	ptr = gen7_capturescript->hostptr;
+
+	for (i = 0; i < num_sptp_clusters; i++) {
+		struct gen7_sptp_cluster_registers *cluster = &sptp_clusters[i];
+		const u32 *regs = cluster->regs;
+
+		cluster->offset = offset;
+
+		/* Program the aperture */
+		ptr += CD_WRITE(ptr, GEN7_SP_READ_SEL, GEN7_SP_READ_SEL_VAL
+			(cluster->location_id, cluster->pipe_id, cluster->statetype, 0, 0));
+
+		for (; regs[0] != UINT_MAX; regs += 2) {
+			count = REG_COUNT(regs);
+			ptr += CD_READ(ptr, (GEN7_SP_AHB_READ_APERTURE +
+				regs[0] - cluster->regbase), count,
+				(gen7_crashdump_registers->gpuaddr + offset));
+
+			offset += count * sizeof(unsigned int);
+		}
+	}
+	/* Marker for end of script */
+	CD_FINISH(ptr, offset);
+
+	/* Try to run the crash dumper */
+	if (_gen7_do_crashdump(device))
+		func = gen7_snapshot_cluster_dbgahb;
+
+	/* Capture the registers in snapshot */
+	for (i = 0; i < num_sptp_clusters; i++)
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_MVC_V2, snapshot, func, &sptp_clusters[i]);
+}
+
+static size_t gen7_legacy_snapshot_mvc(struct kgsl_device *device, u8 *buf,
+				size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mvc_regs_v2 *header =
+					(struct kgsl_snapshot_mvc_regs_v2 *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	struct gen7_cluster_registers *cluster =
+			(struct gen7_cluster_registers *)priv;
+	const u32 *ptr = cluster->regs;
+	unsigned int j;
+	unsigned int size = adreno_snapshot_regs_count(ptr) * 4;
+
+	if (remain < (sizeof(*header) + size)) {
+		SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS");
+		return 0;
+	}
+
+	header->ctxt_id = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
+	header->cluster_id = cluster->cluster_id;
+	header->pipe_id = cluster->pipe_id;
+	header->location_id = UINT_MAX;
+
+	/*
+	 * Set the AHB control for the Host to read from the
+	 * cluster/context for this iteration.
+	 */
+	kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, GEN7_CP_APERTURE_REG_VAL
+			(cluster->pipe_id, cluster->cluster_id, cluster->context_id));
+
+	if (cluster->sel)
+		kgsl_regwrite(device, cluster->sel->host_reg, cluster->sel->val);
+
+	for (ptr = cluster->regs; ptr[0] != UINT_MAX; ptr += 2) {
+		unsigned int count = REG_COUNT(ptr);
+
+		if (count == 1)
+			*data++ = ptr[0];
+		else {
+			*data++ = ptr[0] | (1 << 31);
+			*data++ = ptr[1];
+		}
+		for (j = ptr[0]; j <= ptr[1]; j++) {
+			kgsl_regread(device, j, data);
+			data++;
+		}
+	}
+
+	return (size + sizeof(*header));
+}
+
+static size_t gen7_snapshot_mvc(struct kgsl_device *device, u8 *buf,
+				size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mvc_regs_v2 *header =
+				(struct kgsl_snapshot_mvc_regs_v2 *)buf;
+	struct gen7_cluster_registers *cluster =
+			(struct gen7_cluster_registers *)priv;
+	const u32 *ptr = cluster->regs;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int *src;
+	unsigned int cnt;
+	unsigned int size = adreno_snapshot_regs_count(ptr) * 4;
+
+	if (remain < (sizeof(*header) + size)) {
+		SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS");
+		return 0;
+	}
+
+	header->ctxt_id = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
+	header->cluster_id = cluster->cluster_id;
+	header->pipe_id = cluster->pipe_id;
+	header->location_id = UINT_MAX;
+
+	src = gen7_crashdump_registers->hostptr + cluster->offset;
+
+	for (ptr = cluster->regs; ptr[0] != UINT_MAX; ptr += 2) {
+		cnt = REG_COUNT(ptr);
+
+		if (cnt == 1)
+			*data++ = ptr[0];
+		else {
+			*data++ = ptr[0] | (1 << 31);
+			*data++ = ptr[1];
+		}
+		memcpy(data, src, cnt << 2);
+		src += cnt;
+		data += cnt;
+	}
+
+	return (size + sizeof(*header));
+
+}
+
+static void gen7_snapshot_mvc_regs(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot)
+{
+	int i;
+	u64 *ptr, offset = 0;
+	unsigned int count;
+	struct gen7_cluster_registers *clusters = gen7_snapshot_block_list->clusters;
+	size_t num_clusters = gen7_snapshot_block_list->num_clusters;
+	size_t (*func)(struct kgsl_device *device, u8 *buf,
+				size_t remain, void *priv) = gen7_legacy_snapshot_mvc;
+
+	if (CD_SCRIPT_CHECK(device)) {
+		for (i = 0; i < num_clusters; i++)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_MVC_V2, snapshot, func, &clusters[i]);
+		return;
+	}
+
+	/* Build the crash script */
+	ptr = gen7_capturescript->hostptr;
+
+	for (i = 0; i < num_clusters; i++) {
+		struct gen7_cluster_registers *cluster = &clusters[i];
+		const u32 *regs = cluster->regs;
+
+		cluster->offset = offset;
+		ptr += CD_WRITE(ptr, GEN7_CP_APERTURE_CNTL_CD, GEN7_CP_APERTURE_REG_VAL
+			(cluster->pipe_id, cluster->cluster_id, cluster->context_id));
+
+		if (cluster->sel)
+			ptr += CD_WRITE(ptr, cluster->sel->cd_reg, cluster->sel->val);
+
+		for (; regs[0] != UINT_MAX; regs += 2) {
+			count = REG_COUNT(regs);
+
+			ptr += CD_READ(ptr, regs[0],
+				count, (gen7_crashdump_registers->gpuaddr + offset));
+
+			offset += count * sizeof(unsigned int);
+		}
+	}
+
+	/* Marker for end of script */
+	CD_FINISH(ptr, offset);
+
+	/* Try to run the crash dumper */
+	if (_gen7_do_crashdump(device))
+		func = gen7_snapshot_mvc;
+
+	for (i = 0; i < num_clusters; i++)
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_MVC_V2, snapshot, func, &clusters[i]);
+}
+
+/* gen7_dbgc_debug_bus_read() - Read data from trace bus */
+static void gen7_dbgc_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int reg;
+
+	reg = FIELD_PREP(GENMASK(7, 0), index) |
+		FIELD_PREP(GENMASK(24, 16), block_id);
+
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_A, reg);
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_B, reg);
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_C, reg);
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_D, reg);
+
+	/*
+	 * There needs to be a delay of 1 us to ensure enough time for correct
+	 * data is funneled into the trace buffer
+	 */
+	udelay(1);
+
+	kgsl_regread(device, GEN7_DBGC_CFG_DBGBUS_TRACE_BUF2, val);
+	val++;
+	kgsl_regread(device, GEN7_DBGC_CFG_DBGBUS_TRACE_BUF1, val);
+}
+
+/* gen7_snapshot_dbgc_debugbus_block() - Capture debug data for a gpu block */
+static size_t gen7_snapshot_dbgc_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	const u32 *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+
+	if (remain < GEN7_DEBUGBUS_SECTION_SIZE) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = *block;
+	header->count = GEN7_DEBUGBUS_BLOCK_SIZE * 2;
+
+	for (i = 0; i < GEN7_DEBUGBUS_BLOCK_SIZE; i++)
+		gen7_dbgc_debug_bus_read(device, *block, i, &data[i*2]);
+
+	return GEN7_DEBUGBUS_SECTION_SIZE;
+}
+
+static u32 gen7_dbgc_side_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index)
+{
+	u32 val;
+	unsigned int reg = FIELD_PREP(GENMASK(7, 0), index) |
+			FIELD_PREP(GENMASK(24, 16), block_id);
+
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_A, reg);
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_B, reg);
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_C, reg);
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_D, reg);
+
+	/*
+	 * There needs to be a delay of 1 us to ensure enough time for correct
+	 * data is funneled into the trace buffer
+	 */
+	udelay(1);
+
+	val = kgsl_regmap_read(&device->regmap, GEN7_DBGC_CFG_DBGBUS_OVER);
+
+	return FIELD_GET(GENMASK(27, 24), val);
+}
+
+static size_t gen7_snapshot_dbgc_side_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_side_debugbus *header =
+		(struct kgsl_snapshot_side_debugbus *)buf;
+	const u32 *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size = (GEN7_DEBUGBUS_BLOCK_SIZE * sizeof(unsigned int)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = *block;
+	header->size = GEN7_DEBUGBUS_BLOCK_SIZE;
+	header->valid_data = 0x4;
+
+	for (i = 0; i < GEN7_DEBUGBUS_BLOCK_SIZE; i++)
+		data[i] = gen7_dbgc_side_debug_bus_read(device, *block, i);
+
+	return size;
+}
+
+/* gen7_cx_dbgc_debug_bus_read() - Read data from trace bus */
+static void gen7_cx_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int reg;
+
+	reg = FIELD_PREP(GENMASK(7, 0), index) |
+		FIELD_PREP(GENMASK(24, 16), block_id);
+
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
+
+	/*
+	 * There needs to be a delay of 1 us to ensure enough time for correct
+	 * data is funneled into the trace buffer
+	 */
+	udelay(1);
+
+	kgsl_regread(device, GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF2, val);
+	val++;
+	kgsl_regread(device, GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF1, val);
+}
+
+/*
+ * gen7_snapshot_cx_dbgc_debugbus_block() - Capture debug data for a gpu
+ * block from the CX DBGC block
+ */
+static size_t gen7_snapshot_cx_dbgc_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	const u32 *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+
+	if (remain < GEN7_DEBUGBUS_SECTION_SIZE) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = *block;
+	header->count = GEN7_DEBUGBUS_BLOCK_SIZE * 2;
+
+	for (i = 0; i < GEN7_DEBUGBUS_BLOCK_SIZE; i++)
+		gen7_cx_debug_bus_read(device, *block, i, &data[i*2]);
+
+	return GEN7_DEBUGBUS_SECTION_SIZE;
+}
+
+/* gen7_cx_side_dbgc_debug_bus_read() - Read data from trace bus */
+static void gen7_cx_side_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int reg = FIELD_PREP(GENMASK(7, 0), index) |
+			FIELD_PREP(GENMASK(24, 16), block_id);
+
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
+
+	/*
+	 * There needs to be a delay of 1 us to ensure enough time for correct
+	 * data is funneled into the trace buffer
+	 */
+	udelay(1);
+
+	kgsl_regread(device, GEN7_CX_DBGC_CFG_DBGBUS_OVER, &reg);
+	*val = FIELD_GET(GENMASK(27, 24), reg);
+}
+
+/*
+ * gen7_snapshot_cx_dbgc_debugbus_block() - Capture debug data for a gpu
+ * block from the CX DBGC block
+ */
+static size_t gen7_snapshot_cx_side_dbgc_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_side_debugbus *header =
+		(struct kgsl_snapshot_side_debugbus *)buf;
+	const u32 *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size = (GEN7_DEBUGBUS_BLOCK_SIZE * sizeof(unsigned int)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = *block;
+	header->size = GEN7_DEBUGBUS_BLOCK_SIZE;
+	header->valid_data = 0x4;
+
+	for (i = 0; i < GEN7_DEBUGBUS_BLOCK_SIZE; i++)
+		gen7_cx_side_debug_bus_read(device, *block, i, &data[i]);
+
+	return size;
+}
+
+/* gen7_snapshot_debugbus() - Capture debug bus data */
+static void gen7_snapshot_debugbus(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_CNTLT,
+			FIELD_PREP(GENMASK(31, 28), 0xf));
+
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_CNTLM,
+			FIELD_PREP(GENMASK(27, 24), 0xf));
+
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_IVTL_0, 0);
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_IVTL_1, 0);
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_IVTL_2, 0);
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_IVTL_3, 0);
+
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_BYTEL_0,
+			FIELD_PREP(GENMASK(3, 0), 0x0) |
+			FIELD_PREP(GENMASK(7, 4), 0x1) |
+			FIELD_PREP(GENMASK(11, 8), 0x2) |
+			FIELD_PREP(GENMASK(15, 12), 0x3) |
+			FIELD_PREP(GENMASK(19, 16), 0x4) |
+			FIELD_PREP(GENMASK(23, 20), 0x5) |
+			FIELD_PREP(GENMASK(27, 24), 0x6) |
+			FIELD_PREP(GENMASK(31, 28), 0x7));
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_BYTEL_1,
+			FIELD_PREP(GENMASK(3, 0), 0x8) |
+			FIELD_PREP(GENMASK(7, 4), 0x9) |
+			FIELD_PREP(GENMASK(11, 8), 0xa) |
+			FIELD_PREP(GENMASK(15, 12), 0xb) |
+			FIELD_PREP(GENMASK(19, 16), 0xc) |
+			FIELD_PREP(GENMASK(23, 20), 0xd) |
+			FIELD_PREP(GENMASK(27, 24), 0xe) |
+			FIELD_PREP(GENMASK(31, 28), 0xf));
+
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_MASKL_0, 0);
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_MASKL_1, 0);
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_MASKL_2, 0);
+	kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_MASKL_3, 0);
+
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_CNTLT,
+			FIELD_PREP(GENMASK(31, 28), 0xf));
+
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_CNTLM,
+			FIELD_PREP(GENMASK(27, 24), 0xf));
+
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
+
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_0,
+			FIELD_PREP(GENMASK(3, 0), 0x0) |
+			FIELD_PREP(GENMASK(7, 4), 0x1) |
+			FIELD_PREP(GENMASK(11, 8), 0x2) |
+			FIELD_PREP(GENMASK(15, 12), 0x3) |
+			FIELD_PREP(GENMASK(19, 16), 0x4) |
+			FIELD_PREP(GENMASK(23, 20), 0x5) |
+			FIELD_PREP(GENMASK(27, 24), 0x6) |
+			FIELD_PREP(GENMASK(31, 28), 0x7));
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_1,
+			FIELD_PREP(GENMASK(3, 0), 0x8) |
+			FIELD_PREP(GENMASK(7, 4), 0x9) |
+			FIELD_PREP(GENMASK(11, 8), 0xa) |
+			FIELD_PREP(GENMASK(15, 12), 0xb) |
+			FIELD_PREP(GENMASK(19, 16), 0xc) |
+			FIELD_PREP(GENMASK(23, 20), 0xd) |
+			FIELD_PREP(GENMASK(27, 24), 0xe) |
+			FIELD_PREP(GENMASK(31, 28), 0xf));
+
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
+	kgsl_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
+
+	for (i = 0; i < gen7_snapshot_block_list->debugbus_blocks_len; i++) {
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+			snapshot, gen7_snapshot_dbgc_debugbus_block,
+			(void *) &gen7_snapshot_block_list->debugbus_blocks[i]);
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS,
+			snapshot, gen7_snapshot_dbgc_side_debugbus_block,
+			(void *) &gen7_snapshot_block_list->debugbus_blocks[i]);
+	}
+
+	for (i = 0; i < gen7_snapshot_block_list->gbif_debugbus_blocks_len; i++) {
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+			snapshot, gen7_snapshot_dbgc_debugbus_block,
+			(void *) &gen7_snapshot_block_list->gbif_debugbus_blocks[i]);
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS,
+			snapshot, gen7_snapshot_dbgc_side_debugbus_block,
+			(void *) &gen7_snapshot_block_list->gbif_debugbus_blocks[i]);
+	}
+
+	/* Dump the CX debugbus data if the block exists */
+	if (kgsl_regmap_valid_offset(&device->regmap, GEN7_CX_DBGC_CFG_DBGBUS_SEL_A)) {
+		for (i = 0; i < gen7_snapshot_block_list->cx_debugbus_blocks_len; i++) {
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, gen7_snapshot_cx_dbgc_debugbus_block,
+				(void *) &gen7_snapshot_block_list->cx_debugbus_blocks[i]);
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS,
+				snapshot, gen7_snapshot_cx_side_dbgc_debugbus_block,
+				(void *) &gen7_snapshot_block_list->cx_debugbus_blocks[i]);
+		}
+	}
+}
+
+/* gen7_snapshot_sqe() - Dump SQE data in snapshot */
+static size_t gen7_snapshot_sqe(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE);
+
+	if (remain < DEBUG_SECTION_SZ(GEN7_SQE_FW_SNAPSHOT_DWORDS)) {
+		SNAPSHOT_ERR_NOMEM(device, "SQE VERSION DEBUG");
+		return 0;
+	}
+
+	/* Dump the SQE firmware version */
+	header->type = SNAPSHOT_DEBUG_SQE_VERSION;
+	header->size = GEN7_SQE_FW_SNAPSHOT_DWORDS;
+	memcpy(data, fw->memdesc->hostptr, (GEN7_SQE_FW_SNAPSHOT_DWORDS * sizeof(u32)));
+
+	return DEBUG_SECTION_SZ(GEN7_SQE_FW_SNAPSHOT_DWORDS);
+}
+
+/* gen7_snapshot_aqe() - Dump AQE data in snapshot */
+static size_t gen7_snapshot_aqe(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	u32 *data = (unsigned int *)(buf + sizeof(*header));
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_AQE);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_AQE))
+		return 0;
+
+	if (remain < DEBUG_SECTION_SZ(1)) {
+		SNAPSHOT_ERR_NOMEM(device, "AQE VERSION DEBUG");
+		return 0;
+	}
+
+	/* Dump the AQE firmware version */
+	header->type = SNAPSHOT_DEBUG_AQE_VERSION;
+	header->size = 1;
+	*data = fw->version;
+
+	return DEBUG_SECTION_SZ(1);
+}
+
+/* Snapshot the preemption related buffers */
+static size_t snapshot_preemption_record(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_memdesc *memdesc = priv;
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)buf;
+	u8 *ptr = buf + sizeof(*header);
+	const struct adreno_gen7_core *gpucore = to_gen7_core(ADRENO_DEVICE(device));
+	u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES;
+
+	if (gpucore->ctxt_record_size)
+		ctxt_record_size = gpucore->ctxt_record_size;
+
+	ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size);
+
+	if (remain < (ctxt_record_size + sizeof(*header))) {
+		SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD");
+		return 0;
+	}
+
+	header->size = ctxt_record_size >> 2;
+	header->gpuaddr = memdesc->gpuaddr;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable);
+	header->type = SNAPSHOT_GPU_OBJECT_GLOBAL;
+
+	memcpy(ptr, memdesc->hostptr, ctxt_record_size);
+
+	return ctxt_record_size + sizeof(*header);
+}
+
+static void gen7_reglist_snapshot(struct kgsl_device *device,
+					struct kgsl_snapshot *snapshot)
+{
+	u64 *ptr, offset = 0;
+	int i;
+	u32 r;
+	struct gen7_reg_list *reg_list = gen7_snapshot_block_list->reg_list;
+	size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain,
+		void *priv) = gen7_legacy_snapshot_registers;
+
+	if (CD_SCRIPT_CHECK(device)) {
+		for (i = 0; reg_list[i].regs; i++)
+			kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2,
+				snapshot, func, &reg_list[i]);
+		return;
+	}
+
+	/* Build the crash script */
+	ptr = (u64 *)gen7_capturescript->hostptr;
+
+	for (i = 0; reg_list[i].regs; i++) {
+		struct gen7_reg_list *regs = &reg_list[i];
+		const u32 *regs_ptr = regs->regs;
+
+		regs->offset = offset;
+
+		/* Program the SEL_CNTL_CD register appropriately */
+		if (regs->sel)
+			ptr += CD_WRITE(ptr, regs->sel->cd_reg, regs->sel->val);
+
+		for (; regs_ptr[0] != UINT_MAX; regs_ptr += 2) {
+			r = REG_COUNT(regs_ptr);
+			ptr += CD_READ(ptr, regs_ptr[0], r,
+				(gen7_crashdump_registers->gpuaddr + offset));
+			offset += r * sizeof(u32);
+		}
+	}
+
+	/* Marker for end of script */
+	CD_FINISH(ptr, offset);
+
+	/* Try to run the crash dumper */
+	if (_gen7_do_crashdump(device))
+		func = gen7_snapshot_registers;
+
+	for (i = 0; reg_list[i].regs; i++)
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2,
+			snapshot, func, &reg_list[i]);
+}
+
+static size_t gen7_snapshot_cx_misc_registers(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	const u32 *ptr = (u32 *)priv;
+	u32 *src, *data = (unsigned int *)buf;
+	size_t size = adreno_snapshot_regs_count(ptr) * sizeof(u32);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "CX_MISC REGISTERS");
+		return 0;
+	}
+
+	src = gen7_crashdump_registers->hostptr;
+
+	for (; ptr[0] != UINT_MAX; ptr += 2) {
+		u32 cnt = REG_COUNT(ptr);
+
+		if (cnt == 1)
+			*data++ = BIT(31) | ptr[0];
+		else {
+			*data++ = ptr[0];
+			*data++ = cnt;
+		}
+		memcpy(data, src, cnt << 2);
+		data += cnt;
+		src += cnt;
+	}
+
+	/* Return the size of the section */
+	return size;
+}
+
+static void gen7_cx_misc_regs_snapshot(struct kgsl_device *device,
+					struct kgsl_snapshot *snapshot)
+{
+	u64 *ptr, offset = 0;
+	const u32 *regs_ptr = (const u32 *)gen7_snapshot_block_list->cx_misc_regs;
+
+	if (CD_SCRIPT_CHECK(device) || !adreno_gx_is_on(ADRENO_DEVICE(device)))
+		goto done;
+
+	/* Build the crash script */
+	ptr = (u64 *)gen7_capturescript->hostptr;
+
+	for (; regs_ptr[0] != UINT_MAX; regs_ptr += 2) {
+		u32 r = REG_COUNT(regs_ptr);
+
+		ptr += CD_READ(ptr, regs_ptr[0], r,
+			(gen7_crashdump_registers->gpuaddr + offset));
+		offset += r * sizeof(u32);
+	}
+
+	/* Marker for end of script */
+	CD_FINISH(ptr, offset);
+
+	/* Try to run the crash dumper */
+	if (_gen7_do_crashdump(device)) {
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2,
+			snapshot, gen7_snapshot_cx_misc_registers,
+			(void *)gen7_snapshot_block_list->cx_misc_regs);
+		return;
+	}
+
+done:
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2,
+		snapshot, adreno_snapshot_cx_misc_registers,
+		(void *)gen7_snapshot_block_list->cx_misc_regs);
+}
+
+static void gen7_snapshot_br_roq(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot)
+{
+	unsigned int roq_size;
+
+	/*
+	 * CP ROQ dump units is 4 dwords. The number of units is stored
+	 * in CP_ROQ_THRESHOLDS_2[31:20], but it is not accessible to
+	 * host. Program the GEN7_CP_SQE_UCODE_DBG_ADDR with 0x70d3 offset
+	 * and read the value CP_ROQ_THRESHOLDS_2 from
+	 * GEN7_CP_SQE_UCODE_DBG_DATA
+	 */
+	kgsl_regwrite(device, GEN7_CP_SQE_UCODE_DBG_ADDR, 0x70d3);
+	kgsl_regread(device, GEN7_CP_SQE_UCODE_DBG_DATA, &roq_size);
+	roq_size = roq_size >> 20;
+	kgsl_snapshot_indexed_registers(device, snapshot,
+			GEN7_CP_ROQ_DBG_ADDR, GEN7_CP_ROQ_DBG_DATA, 0, (roq_size << 2));
+}
+
+static void gen7_snapshot_bv_roq(struct kgsl_device *device,
+			struct kgsl_snapshot *snapshot)
+{
+	unsigned int roq_size;
+
+	/*
+	 * CP ROQ dump units is 4 dwords. The number of units is stored
+	 * in CP_BV_ROQ_THRESHOLDS_2[31:20], but it is not accessible to
+	 * host. Program the GEN7_CP_BV_SQE_UCODE_DBG_ADDR with 0x70d3 offset
+	 * (at which CP stores the roq values) and read the value of
+	 * CP_BV_ROQ_THRESHOLDS_2 from GEN7_CP_BV_SQE_UCODE_DBG_DATA
+	 */
+	kgsl_regwrite(device, GEN7_CP_BV_SQE_UCODE_DBG_ADDR, 0x70d3);
+	kgsl_regread(device, GEN7_CP_BV_SQE_UCODE_DBG_DATA, &roq_size);
+	roq_size = roq_size >> 20;
+	kgsl_snapshot_indexed_registers(device, snapshot,
+			GEN7_CP_BV_ROQ_DBG_ADDR, GEN7_CP_BV_ROQ_DBG_DATA, 0, (roq_size << 2));
+}
+
+static void gen7_snapshot_lpac_roq(struct kgsl_device *device,
+			struct kgsl_snapshot *snapshot)
+{
+	unsigned int roq_size;
+
+	/*
+	 * CP ROQ dump units is 4 dwords. The number of units is stored
+	 * in CP_LPAC_ROQ_THRESHOLDS_2[31:20], but it is not accessible to
+	 * host. Program the GEN7_CP_SQE_AC_UCODE_DBG_ADDR with 0x70d3 offset
+	 * (at which CP stores the roq values) and read the value of
+	 * CP_LPAC_ROQ_THRESHOLDS_2 from GEN7_CP_SQE_AC_UCODE_DBG_DATA
+	 */
+	kgsl_regwrite(device, GEN7_CP_SQE_AC_UCODE_DBG_ADDR, 0x70d3);
+	kgsl_regread(device, GEN7_CP_SQE_AC_UCODE_DBG_DATA, &roq_size);
+	roq_size = roq_size >> 20;
+	kgsl_snapshot_indexed_registers(device, snapshot,
+			GEN7_CP_LPAC_ROQ_DBG_ADDR, GEN7_CP_LPAC_ROQ_DBG_DATA, 0, (roq_size << 2));
+}
+
+void gen7_snapshot_external_core_regs(struct kgsl_device *device,
+			struct kgsl_snapshot *snapshot)
+{
+	size_t i;
+	const u32 **external_core_regs;
+	unsigned int num_external_core_regs;
+	const struct adreno_gen7_core *gpucore = to_gen7_core(ADRENO_DEVICE(device));
+
+	gen7_snapshot_block_list = gpucore->gen7_snapshot_block_list;
+	external_core_regs = gen7_snapshot_block_list->external_core_regs;
+	num_external_core_regs = gen7_snapshot_block_list->num_external_core_regs;
+
+	for (i = 0; i < num_external_core_regs; i++) {
+		const u32 *regs = external_core_regs[i];
+
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2,
+			snapshot, adreno_snapshot_registers_v2,
+			(void *) regs);
+	}
+}
+
+/*
+ * gen7_snapshot() - GEN7 GPU snapshot function
+ * @adreno_dev: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the GEN7 specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void gen7_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb;
+	unsigned int i;
+	u32 hi, lo, cgc = 0, cgc1 = 0, cgc2 = 0;
+	const struct adreno_gen7_core *gpucore = to_gen7_core(ADRENO_DEVICE(device));
+	int is_current_rt;
+
+	gen7_crashdump_timedout = false;
+	gen7_snapshot_block_list = gpucore->gen7_snapshot_block_list;
+
+	/* External registers are dumped in the beginning of gmu snapshot */
+	if (!gmu_core_isenabled(device))
+		gen7_snapshot_external_core_regs(device, snapshot);
+
+	gen7_snapshot_trace_buffer(device, snapshot);
+
+	/*
+	 * Dump debugbus data here to capture it for both
+	 * GMU and GPU snapshot. Debugbus data can be accessed
+	 * even if the gx headswitch is off. If gx
+	 * headswitch is off, data for gx blocks will show as
+	 * 0x5c00bd00. Disable clock gating for SP and TP to capture
+	 * debugbus data.
+	 */
+	if (!adreno_is_gen7_9_x(adreno_dev) && device->ftbl->is_hwcg_on(device)) {
+		kgsl_regread(device, GEN7_RBBM_CLOCK_CNTL2_SP0, &cgc);
+		kgsl_regread(device, GEN7_RBBM_CLOCK_CNTL_TP0, &cgc1);
+		kgsl_regread(device, GEN7_RBBM_CLOCK_CNTL3_TP0, &cgc2);
+		kgsl_regrmw(device, GEN7_RBBM_CLOCK_CNTL2_SP0, GENMASK(22, 20), 0);
+		kgsl_regrmw(device, GEN7_RBBM_CLOCK_CNTL_TP0, GENMASK(2, 0), 0);
+		kgsl_regrmw(device, GEN7_RBBM_CLOCK_CNTL3_TP0, GENMASK(14, 12), 0);
+	}
+
+	gen7_snapshot_debugbus(adreno_dev, snapshot);
+
+	/* Restore the value of the clockgating registers */
+	if (!adreno_is_gen7_9_x(adreno_dev) && device->ftbl->is_hwcg_on(device)) {
+		kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL2_SP0, cgc);
+		kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL_TP0, cgc1);
+		kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL3_TP0, cgc2);
+	}
+
+	gen7_cx_misc_regs_snapshot(device, snapshot);
+
+	/* SQE Firmware */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, gen7_snapshot_sqe, NULL);
+
+	/* AQE Firmware */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, gen7_snapshot_aqe, NULL);
+
+	if (!adreno_gx_is_on(adreno_dev))
+		return;
+
+	is_current_rt = rt_task(current);
+
+	if (is_current_rt)
+		sched_set_normal(current, 0);
+
+	kgsl_regread(device, GEN7_CP_IB1_BASE, &lo);
+	kgsl_regread(device, GEN7_CP_IB1_BASE_HI, &hi);
+
+	snapshot->ib1base = (((u64) hi) << 32) | lo;
+
+	kgsl_regread(device, GEN7_CP_IB2_BASE, &lo);
+	kgsl_regread(device, GEN7_CP_IB2_BASE_HI, &hi);
+
+	snapshot->ib2base = (((u64) hi) << 32) | lo;
+
+	kgsl_regread(device, GEN7_CP_IB1_REM_SIZE, &snapshot->ib1size);
+	kgsl_regread(device, GEN7_CP_IB2_REM_SIZE, &snapshot->ib2size);
+
+	kgsl_regread(device, GEN7_CP_LPAC_IB1_BASE, &lo);
+	kgsl_regread(device, GEN7_CP_LPAC_IB1_BASE_HI, &hi);
+
+	snapshot->ib1base_lpac = (((u64) hi) << 32) | lo;
+
+	kgsl_regread(device, GEN7_CP_LPAC_IB2_BASE, &lo);
+	kgsl_regread(device, GEN7_CP_LPAC_IB2_BASE_HI, &hi);
+
+	snapshot->ib2base_lpac = (((u64) hi) << 32) | lo;
+
+	kgsl_regread(device, GEN7_CP_LPAC_IB1_REM_SIZE, &snapshot->ib1size_lpac);
+	kgsl_regread(device, GEN7_CP_LPAC_IB2_REM_SIZE, &snapshot->ib2size_lpac);
+
+	/* Assert the isStatic bit before triggering snapshot */
+	kgsl_regwrite(device, GEN7_RBBM_SNAPSHOT_STATUS, 0x1);
+
+	/* Dump the registers which get affected by crash dumper trigger */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2,
+		snapshot, adreno_snapshot_registers_v2,
+		(void *) gen7_snapshot_block_list->pre_crashdumper_regs);
+
+	gen7_reglist_snapshot(device, snapshot);
+
+	/*
+	 * Need to program and save this register before capturing resource table
+	 * to workaround a CGC issue
+	 */
+	if (device->ftbl->is_hwcg_on(device)) {
+		kgsl_regread(device, GEN7_RBBM_CLOCK_MODE_CP, &cgc);
+		kgsl_regrmw(device, GEN7_RBBM_CLOCK_MODE_CP, 0x7, 0);
+	}
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		GEN7_CP_RESOURCE_TABLE_DBG_ADDR, GEN7_CP_RESOURCE_TABLE_DBG_DATA,
+		0, 0x4100);
+
+	/* Reprogram the register back to the original stored value */
+	if (device->ftbl->is_hwcg_on(device))
+		kgsl_regwrite(device, GEN7_RBBM_CLOCK_MODE_CP, cgc);
+
+
+	for (i = 0; i < gen7_snapshot_block_list->index_registers_len; i++)
+		kgsl_snapshot_indexed_registers(device, snapshot,
+			gen7_snapshot_block_list->index_registers[i].addr,
+			gen7_snapshot_block_list->index_registers[i].data, 0,
+			gen7_snapshot_block_list->index_registers[i].size);
+
+	if (!adreno_is_gen7_9_x(adreno_dev)) {
+		gen7_snapshot_br_roq(device, snapshot);
+
+		gen7_snapshot_bv_roq(device, snapshot);
+
+		gen7_snapshot_lpac_roq(device, snapshot);
+	}
+
+	/* Mempool debug data */
+	gen7_snapshot_mempool(device, snapshot);
+
+	/* MVC register section */
+	gen7_snapshot_mvc_regs(device, snapshot);
+
+	/* registers dumped through DBG AHB */
+	gen7_snapshot_dbgahb_regs(device, snapshot);
+
+	/* Shader memory */
+	gen7_snapshot_shader(device, snapshot);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2,
+		snapshot, adreno_snapshot_registers_v2,
+		(void *) gen7_snapshot_block_list->post_crashdumper_regs);
+
+	kgsl_regwrite(device, GEN7_RBBM_SNAPSHOT_STATUS, 0x0);
+
+	/* Preemption record */
+	if (adreno_is_preemption_enabled(adreno_dev)) {
+		FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, snapshot_preemption_record,
+				rb->preemption_desc);
+		}
+	}
+	if (is_current_rt)
+		sched_set_fifo(current);
+}
+
+void gen7_crashdump_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (IS_ERR_OR_NULL(gen7_capturescript))
+		gen7_capturescript = kgsl_allocate_global(device,
+			3 * PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY,
+			KGSL_MEMDESC_PRIVILEGED, "capturescript");
+
+	if (IS_ERR(gen7_capturescript))
+		return;
+
+	if (IS_ERR_OR_NULL(gen7_crashdump_registers))
+		gen7_crashdump_registers = kgsl_allocate_global(device,
+			25 * PAGE_SIZE, 0, 0, KGSL_MEMDESC_PRIVILEGED,
+			"capturescript_regs");
+
+	if (IS_ERR(gen7_crashdump_registers))
+		return;
+}

+ 383 - 0
qcom/opensource/graphics-kernel/adreno_gen7_snapshot.h

@@ -0,0 +1,383 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN7_SNAPSHOT_H
+#define __ADRENO_GEN7_SNAPSHOT_H
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "kgsl_regmap.h"
+
+#define CLUSTER_NONE 0
+#define CLUSTER_FE 1
+#define CLUSTER_SP_VS 2
+#define CLUSTER_PC_VS 3
+#define CLUSTER_GRAS 4
+#define CLUSTER_SP_PS 5
+#define CLUSTER_VPC_PS 6
+#define CLUSTER_PS 7
+
+#define HLSQ_STATE 0
+#define HLSQ_DP 1
+#define SP_TOP 2
+#define USPTP 3
+#define HLSQ_DP_STR 4
+
+#define STATE_NON_CONTEXT 0
+#define STATE_TOGGLE_CTXT 1
+#define STATE_FORCE_CTXT_0 2
+#define STATE_FORCE_CTXT_1 3
+
+#define GEN7_DEBUGBUS_BLOCK_SIZE 0x100
+
+/* Number of dword to dump in snapshot for CP SQE */
+#define GEN7_SQE_FW_SNAPSHOT_DWORDS 5
+
+struct gen7_sel_reg {
+	unsigned int host_reg;
+	unsigned int cd_reg;
+	unsigned int val;
+};
+
+struct gen7_sptp_cluster_registers {
+	/* cluster_id: Cluster identifier */
+	int cluster_id;
+	/* statetype: SP block state type for the cluster */
+	int statetype;
+	/* pipe_id: Pipe identifier */
+	int pipe_id;
+	/* context_id: Context identifier */
+	int context_id;
+	/* location_id: Location identifier */
+	int location_id;
+	/* regs: Pointer to the list of register pairs to read */
+	const u32 *regs;
+	/* regbase: Dword offset of the register block in the GPu register space */
+	unsigned int regbase;
+	/* offset: Internal variable used to track the crashdump state */
+	unsigned int offset;
+};
+
+struct gen7_shader_block {
+	/* statetype: Type identifer for the block */
+	u32 statetype;
+	/* size: Size of the block (in dwords) */
+	u32 size;
+	/* num_sps: The SP id to dump */
+	u32 num_sps;
+	/* num_usptps: The number of USPTPs to dump */;
+	u32 num_usptps;
+	/* pipe_id: Pipe identifier for the block data  */
+	u32 pipeid;
+	/* location: Location identifer for the block data */
+	u32 location;
+	/* offset: The offset in the snasphot dump */
+	u64 offset;
+};
+
+struct gen7_shader_block_info {
+	struct gen7_shader_block *block;
+	unsigned int sp_id;
+	unsigned int usptp;
+	u32 bank;
+	u64 offset;
+};
+
+struct gen7_reg_list {
+	const u32 *regs;
+	const struct gen7_sel_reg *sel;
+	u64 offset;
+};
+
+struct gen7_cp_indexed_reg {
+	u32 addr;
+	u32 data;
+	u32 size;
+};
+
+struct gen7_cluster_registers {
+	/* cluster_id: Cluster identifier */
+	int cluster_id;
+	/* pipe_id: Pipe Identifier */
+	int pipe_id;
+	/* context_id: one of STATE_ that identifies the context to dump */
+	int context_id;
+	/* regs: Pointer to an array of register pairs */
+	const u32 *regs;
+	/* sel: Pointer to a selector register to write before reading */
+	const struct gen7_sel_reg *sel;
+	/* offset: Internal variable to track the state of the crashdump */
+	unsigned int offset;
+};
+
+struct gen7_snapshot_block_list {
+	/* pre_crashdumper_regs : Registers which need to be dumped before CD runs */
+	const u32 *pre_crashdumper_regs;
+	/* debugbus_blocks : List of debugbus blocks */
+	const u32 *debugbus_blocks;
+	/* debugbus_blocks_len : Length of the debugbus list */
+	size_t debugbus_blocks_len;
+	/* gbif_debugbus_blocks : List of GBIF debugbus blocks */
+	const u32 *gbif_debugbus_blocks;
+	/* gbif_debugbus_blocks_len : Length of GBIF debugbus list */
+	size_t gbif_debugbus_blocks_len;
+	/* cx_debugbus_blocks : List of CX debugbus blocks */
+	const u32 *cx_debugbus_blocks;
+	/* cx_debugbus_blocks_len : Length of the CX debugbus list */
+	size_t cx_debugbus_blocks_len;
+	/* external_core_regs : List of external core registers */
+	const u32 **external_core_regs;
+	/* num_external_core_regs : length of external core registers list */
+	size_t num_external_core_regs;
+	/* gmu_regs : List of GMU registers */
+	const u32 *gmu_regs;
+	/* gmu_gx_regs : List of GMU GX registers */
+	const u32 *gmu_gx_regs;
+	/* rscc_regs : List of RSCC registers */
+	const u32 *rscc_regs;
+	/* reg_list : List of GPU internal registers */
+	struct gen7_reg_list *reg_list;
+	/* reg_list : List of cx_misc registers */
+	const u32 *cx_misc_regs;
+	/* shader_blocks : List of GPU shader memory */
+	struct gen7_shader_block *shader_blocks;
+	/* num_shader_blocks : Length of the shader memory list */
+	size_t num_shader_blocks;
+	/* cluster_registers : List of GPU cluster registers */
+	struct gen7_cluster_registers *clusters;
+	/* num_clusters : Length of GPU cluster registers list */
+	size_t num_clusters;
+	/* spstp_cluster_registers : List of GPU SPTP cluster registers */
+	struct gen7_sptp_cluster_registers *sptp_clusters;
+	/* num_sptp_clusters : Length of GPU SPTP cluster registers list */
+	size_t num_sptp_clusters;
+	/* post_crashdumper_regs : Registers which need to be dumped after CD runs */
+	const u32 *post_crashdumper_regs;
+	/* index_registers : List of index_registers */
+	struct gen7_cp_indexed_reg *index_registers;
+	/* index_registers_len : Length of the index registers */
+	size_t index_registers_len;
+};
+
+struct gen7_trace_buffer_info {
+	u16 dbgc_ctrl;
+	u16 segment;
+	u16 granularity;
+	u16 ping_blk[TRACE_BUF_NUM_SIG];
+	u16 ping_idx[TRACE_BUF_NUM_SIG];
+};
+
+enum gen7_debugbus_ids {
+	DEBUGBUS_CP_0_0           = 1,
+	DEBUGBUS_CP_0_1           = 2,
+	DEBUGBUS_RBBM             = 3,
+	DEBUGBUS_GBIF_GX          = 5,
+	DEBUGBUS_GBIF_CX          = 6,
+	DEBUGBUS_HLSQ             = 7,
+	DEBUGBUS_UCHE_0           = 9,
+	DEBUGBUS_UCHE_1           = 10,
+	DEBUGBUS_TESS_BR          = 13,
+	DEBUGBUS_TESS_BV          = 14,
+	DEBUGBUS_PC_BR            = 17,
+	DEBUGBUS_PC_BV            = 18,
+	DEBUGBUS_VFDP_BR          = 21,
+	DEBUGBUS_VFDP_BV          = 22,
+	DEBUGBUS_VPC_BR           = 25,
+	DEBUGBUS_VPC_BV           = 26,
+	DEBUGBUS_TSE_BR           = 29,
+	DEBUGBUS_TSE_BV           = 30,
+	DEBUGBUS_RAS_BR           = 33,
+	DEBUGBUS_RAS_BV           = 34,
+	DEBUGBUS_VSC              = 37,
+	DEBUGBUS_COM_0            = 39,
+	DEBUGBUS_LRZ_BR           = 43,
+	DEBUGBUS_LRZ_BV           = 44,
+	DEBUGBUS_UFC_0            = 47,
+	DEBUGBUS_UFC_1            = 48,
+	DEBUGBUS_GMU_GX           = 55,
+	DEBUGBUS_DBGC             = 59,
+	DEBUGBUS_CX               = 60,
+	DEBUGBUS_GMU_CX           = 61,
+	DEBUGBUS_GPC_BR           = 62,
+	DEBUGBUS_GPC_BV           = 63,
+	DEBUGBUS_LARC             = 66,
+	DEBUGBUS_HLSQ_SPTP        = 68,
+	DEBUGBUS_RB_0             = 70,
+	DEBUGBUS_RB_1             = 71,
+	DEBUGBUS_RB_2             = 72,
+	DEBUGBUS_RB_3             = 73,
+	DEBUGBUS_RB_4             = 74,
+	DEBUGBUS_RB_5             = 75,
+	DEBUGBUS_UCHE_WRAPPER     = 102,
+	DEBUGBUS_CCU_0            = 106,
+	DEBUGBUS_CCU_1            = 107,
+	DEBUGBUS_CCU_2            = 108,
+	DEBUGBUS_CCU_3            = 109,
+	DEBUGBUS_CCU_4            = 110,
+	DEBUGBUS_CCU_5            = 111,
+	DEBUGBUS_VFD_BR_0         = 138,
+	DEBUGBUS_VFD_BR_1         = 139,
+	DEBUGBUS_VFD_BR_2         = 140,
+	DEBUGBUS_VFD_BR_3         = 141,
+	DEBUGBUS_VFD_BR_4         = 142,
+	DEBUGBUS_VFD_BR_5         = 143,
+	DEBUGBUS_VFD_BR_6         = 144,
+	DEBUGBUS_VFD_BR_7         = 145,
+	DEBUGBUS_VFD_BV_0         = 202,
+	DEBUGBUS_VFD_BV_1         = 203,
+	DEBUGBUS_VFD_BV_2         = 204,
+	DEBUGBUS_VFD_BV_3         = 205,
+	DEBUGBUS_USP_0            = 234,
+	DEBUGBUS_USP_1            = 235,
+	DEBUGBUS_USP_2            = 236,
+	DEBUGBUS_USP_3            = 237,
+	DEBUGBUS_USP_4            = 238,
+	DEBUGBUS_USP_5            = 239,
+	DEBUGBUS_TP_0             = 266,
+	DEBUGBUS_TP_1             = 267,
+	DEBUGBUS_TP_2             = 268,
+	DEBUGBUS_TP_3             = 269,
+	DEBUGBUS_TP_4             = 270,
+	DEBUGBUS_TP_5             = 271,
+	DEBUGBUS_TP_6             = 272,
+	DEBUGBUS_TP_7             = 273,
+	DEBUGBUS_TP_8             = 274,
+	DEBUGBUS_TP_9             = 275,
+	DEBUGBUS_TP_10            = 276,
+	DEBUGBUS_TP_11            = 277,
+	DEBUGBUS_USPTP_0          = 330,
+	DEBUGBUS_USPTP_1          = 331,
+	DEBUGBUS_USPTP_2          = 332,
+	DEBUGBUS_USPTP_3          = 333,
+	DEBUGBUS_USPTP_4          = 334,
+	DEBUGBUS_USPTP_5          = 335,
+	DEBUGBUS_USPTP_6          = 336,
+	DEBUGBUS_USPTP_7          = 337,
+	DEBUGBUS_USPTP_8          = 338,
+	DEBUGBUS_USPTP_9          = 339,
+	DEBUGBUS_USPTP_10         = 340,
+	DEBUGBUS_USPTP_11         = 341,
+	DEBUGBUS_CCHE_0           = 396,
+	DEBUGBUS_CCHE_1           = 397,
+	DEBUGBUS_CCHE_2           = 398,
+	DEBUGBUS_VPC_DSTR_0       = 408,
+	DEBUGBUS_VPC_DSTR_1       = 409,
+	DEBUGBUS_VPC_DSTR_2       = 410,
+	DEBUGBUS_HLSQ_DP_STR_0    = 411,
+	DEBUGBUS_HLSQ_DP_STR_1    = 412,
+	DEBUGBUS_HLSQ_DP_STR_2    = 413,
+	DEBUGBUS_HLSQ_DP_STR_3    = 414,
+	DEBUGBUS_HLSQ_DP_STR_4    = 415,
+	DEBUGBUS_HLSQ_DP_STR_5    = 416,
+	DEBUGBUS_UFC_DSTR_0       = 443,
+	DEBUGBUS_UFC_DSTR_1       = 444,
+	DEBUGBUS_UFC_DSTR_2       = 445,
+	DEBUGBUS_CGC_SUBCORE      = 446,
+	DEBUGBUS_CGC_CORE         = 447,
+};
+
+static const u32 gen7_gbif_debugbus_blocks[] = {
+	DEBUGBUS_GBIF_CX,
+	DEBUGBUS_GBIF_GX,
+};
+
+static const u32 gen7_cx_dbgc_debugbus_blocks[] = {
+	DEBUGBUS_GMU_CX,
+	DEBUGBUS_CX,
+	DEBUGBUS_GBIF_CX,
+};
+
+enum gen7_statetype_ids {
+	TP0_NCTX_REG                   = 0,
+	TP0_CTX0_3D_CVS_REG            = 1,
+	TP0_CTX0_3D_CPS_REG            = 2,
+	TP0_CTX1_3D_CVS_REG            = 3,
+	TP0_CTX1_3D_CPS_REG            = 4,
+	TP0_CTX2_3D_CPS_REG            = 5,
+	TP0_CTX3_3D_CPS_REG            = 6,
+	TP0_TMO_DATA                   = 9,
+	TP0_SMO_DATA                   = 10,
+	TP0_MIPMAP_BASE_DATA           = 11,
+	SP_NCTX_REG                    = 32,
+	SP_CTX0_3D_CVS_REG             = 33,
+	SP_CTX0_3D_CPS_REG             = 34,
+	SP_CTX1_3D_CVS_REG             = 35,
+	SP_CTX1_3D_CPS_REG             = 36,
+	SP_CTX2_3D_CPS_REG             = 37,
+	SP_CTX3_3D_CPS_REG             = 38,
+	SP_INST_DATA                   = 39,
+	SP_INST_DATA_1                 = 40,
+	SP_LB_0_DATA                   = 41,
+	SP_LB_1_DATA                   = 42,
+	SP_LB_2_DATA                   = 43,
+	SP_LB_3_DATA                   = 44,
+	SP_LB_4_DATA                   = 45,
+	SP_LB_5_DATA                   = 46,
+	SP_LB_6_DATA                   = 47,
+	SP_LB_7_DATA                   = 48,
+	SP_CB_RAM                      = 49,
+	SP_LB_13_DATA                  = 50,
+	SP_LB_14_DATA                  = 51,
+	SP_INST_TAG                    = 52,
+	SP_INST_DATA_2                 = 53,
+	SP_TMO_TAG                     = 54,
+	SP_SMO_TAG                     = 55,
+	SP_STATE_DATA                  = 56,
+	SP_HWAVE_RAM                   = 57,
+	SP_L0_INST_BUF                 = 58,
+	SP_LB_8_DATA                   = 59,
+	SP_LB_9_DATA                   = 60,
+	SP_LB_10_DATA                  = 61,
+	SP_LB_11_DATA                  = 62,
+	SP_LB_12_DATA                  = 63,
+	HLSQ_DATAPATH_DSTR_META        = 64,
+	HLSQ_L2STC_TAG_RAM             = 67,
+	HLSQ_L2STC_INFO_CMD            = 68,
+	HLSQ_CVS_BE_CTXT_BUF_RAM_TAG   = 69,
+	HLSQ_CPS_BE_CTXT_BUF_RAM_TAG   = 70,
+	HLSQ_GFX_CVS_BE_CTXT_BUF_RAM   = 71,
+	HLSQ_GFX_CPS_BE_CTXT_BUF_RAM   = 72,
+	HLSQ_CHUNK_CVS_RAM             = 73,
+	HLSQ_CHUNK_CPS_RAM             = 74,
+	HLSQ_CHUNK_CVS_RAM_TAG         = 75,
+	HLSQ_CHUNK_CPS_RAM_TAG         = 76,
+	HLSQ_ICB_CVS_CB_BASE_TAG       = 77,
+	HLSQ_ICB_CPS_CB_BASE_TAG       = 78,
+	HLSQ_CVS_MISC_RAM              = 79,
+	HLSQ_CPS_MISC_RAM              = 80,
+	HLSQ_CPS_MISC_RAM_1            = 81,
+	HLSQ_INST_RAM                  = 82,
+	HLSQ_GFX_CVS_CONST_RAM         = 83,
+	HLSQ_GFX_CPS_CONST_RAM         = 84,
+	HLSQ_CVS_MISC_RAM_TAG          = 85,
+	HLSQ_CPS_MISC_RAM_TAG          = 86,
+	HLSQ_INST_RAM_TAG              = 87,
+	HLSQ_GFX_CVS_CONST_RAM_TAG     = 88,
+	HLSQ_GFX_CPS_CONST_RAM_TAG     = 89,
+	HLSQ_GFX_LOCAL_MISC_RAM        = 90,
+	HLSQ_GFX_LOCAL_MISC_RAM_TAG    = 91,
+	HLSQ_INST_RAM_1                = 92,
+	HLSQ_STPROC_META               = 93,
+	HLSQ_BV_BE_META                = 94,
+	HLSQ_INST_RAM_2                = 95,
+	HLSQ_DATAPATH_META             = 96,
+	HLSQ_FRONTEND_META             = 97,
+	HLSQ_INDIRECT_META             = 98,
+	HLSQ_BACKEND_META              = 99,
+};
+
+static struct gen7_cp_indexed_reg gen7_cp_indexed_reg_list[] = {
+	{ GEN7_CP_SQE_STAT_ADDR, GEN7_CP_SQE_STAT_DATA, 0x40},
+	{ GEN7_CP_DRAW_STATE_ADDR, GEN7_CP_DRAW_STATE_DATA, 0x100},
+	{ GEN7_CP_SQE_UCODE_DBG_ADDR, GEN7_CP_SQE_UCODE_DBG_DATA, 0x8000},
+	{ GEN7_CP_BV_SQE_STAT_ADDR, GEN7_CP_BV_SQE_STAT_DATA, 0x40},
+	{ GEN7_CP_BV_DRAW_STATE_ADDR, GEN7_CP_BV_DRAW_STATE_DATA, 0x100},
+	{ GEN7_CP_BV_SQE_UCODE_DBG_ADDR, GEN7_CP_BV_SQE_UCODE_DBG_DATA, 0x8000},
+	{ GEN7_CP_SQE_AC_STAT_ADDR, GEN7_CP_SQE_AC_STAT_DATA, 0x40},
+	{ GEN7_CP_LPAC_DRAW_STATE_ADDR, GEN7_CP_LPAC_DRAW_STATE_DATA, 0x100},
+	{ GEN7_CP_SQE_AC_UCODE_DBG_ADDR, GEN7_CP_SQE_AC_UCODE_DBG_DATA, 0x8000},
+	{ GEN7_CP_LPAC_FIFO_DBG_ADDR, GEN7_CP_LPAC_FIFO_DBG_DATA, 0x40},
+};
+#endif /*_ADRENO_GEN7_SNAPSHOT_H */

+ 2777 - 0
qcom/opensource/graphics-kernel/adreno_gen8.c

@@ -0,0 +1,2777 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/soc/qcom/llcc-qcom.h>
+#include <soc/qcom/of_common.h>
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "adreno_gen8_hwsched.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+#include "kgsl_pwrscale.h"
+#include "kgsl_trace.h"
+#include "kgsl_util.h"
+
+/* IFPC & Preemption static powerup restore list */
+static const u32 gen8_3_0_pwrup_reglist[] = {
+	GEN8_UCHE_MODE_CNTL,
+	GEN8_UCHE_VARB_IDLE_TIMEOUT,
+	GEN8_UCHE_GBIF_GX_CONFIG,
+	GEN8_UCHE_CACHE_WAYS,
+	GEN8_UCHE_CCHE_MODE_CNTL,
+	GEN8_UCHE_CCHE_CACHE_WAYS,
+	GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_LO,
+	GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_HI,
+	GEN8_UCHE_WRITE_THRU_BASE_LO,
+	GEN8_UCHE_WRITE_THRU_BASE_HI,
+	GEN8_UCHE_TRAP_BASE_LO,
+	GEN8_UCHE_TRAP_BASE_HI,
+	GEN8_UCHE_CLIENT_PF,
+	GEN8_VSC_BIN_SIZE,
+	GEN8_RB_CMP_NC_MODE_CNTL,
+	GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP,
+	GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_LO,
+	GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_HI,
+	GEN8_SP_READ_SEL,
+};
+
+/* IFPC only static powerup restore list */
+static const u32 gen8_3_0_ifpc_pwrup_reglist[] = {
+	GEN8_RBBM_NC_MODE_CNTL,
+	GEN8_RBBM_SLICE_INTERFACE_HANG_INT_CNTL,
+	GEN8_RBBM_SLICE_NC_MODE_CNTL,
+	GEN8_SP_NC_MODE_CNTL,
+	GEN8_SP_CHICKEN_BITS_2,
+	GEN8_SP_CHICKEN_BITS_3,
+	GEN8_SP_PERFCTR_SHADER_MASK,
+	GEN8_TPL1_NC_MODE_CNTL,
+	GEN8_TPL1_DBG_ECO_CNTL,
+	GEN8_TPL1_DBG_ECO_CNTL1,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_1,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_2,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_3,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_4,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_5,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_6,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_7,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_8,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_9,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_10,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_11,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_12,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_13,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_14,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_15,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_16,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_17,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_18,
+	GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_19,
+	GEN8_CP_PROTECT_REG_GLOBAL,
+	GEN8_CP_PROTECT_REG_GLOBAL + 1,
+	GEN8_CP_PROTECT_REG_GLOBAL + 2,
+	GEN8_CP_PROTECT_REG_GLOBAL + 3,
+	GEN8_CP_PROTECT_REG_GLOBAL + 4,
+	GEN8_CP_PROTECT_REG_GLOBAL + 5,
+	GEN8_CP_PROTECT_REG_GLOBAL + 6,
+	GEN8_CP_PROTECT_REG_GLOBAL + 7,
+	GEN8_CP_PROTECT_REG_GLOBAL + 8,
+	GEN8_CP_PROTECT_REG_GLOBAL + 9,
+	GEN8_CP_PROTECT_REG_GLOBAL + 10,
+	GEN8_CP_PROTECT_REG_GLOBAL + 11,
+	GEN8_CP_PROTECT_REG_GLOBAL + 12,
+	GEN8_CP_PROTECT_REG_GLOBAL + 13,
+	GEN8_CP_PROTECT_REG_GLOBAL + 14,
+	GEN8_CP_PROTECT_REG_GLOBAL + 15,
+	GEN8_CP_PROTECT_REG_GLOBAL + 16,
+	GEN8_CP_PROTECT_REG_GLOBAL + 17,
+	GEN8_CP_PROTECT_REG_GLOBAL + 18,
+	GEN8_CP_PROTECT_REG_GLOBAL + 19,
+	GEN8_CP_PROTECT_REG_GLOBAL + 20,
+	GEN8_CP_PROTECT_REG_GLOBAL + 21,
+	GEN8_CP_PROTECT_REG_GLOBAL + 22,
+	GEN8_CP_PROTECT_REG_GLOBAL + 23,
+	GEN8_CP_PROTECT_REG_GLOBAL + 24,
+	GEN8_CP_PROTECT_REG_GLOBAL + 25,
+	GEN8_CP_PROTECT_REG_GLOBAL + 26,
+	GEN8_CP_PROTECT_REG_GLOBAL + 27,
+	GEN8_CP_PROTECT_REG_GLOBAL + 28,
+	GEN8_CP_PROTECT_REG_GLOBAL + 29,
+	GEN8_CP_PROTECT_REG_GLOBAL + 30,
+	GEN8_CP_PROTECT_REG_GLOBAL + 31,
+	GEN8_CP_PROTECT_REG_GLOBAL + 32,
+	GEN8_CP_PROTECT_REG_GLOBAL + 33,
+	GEN8_CP_PROTECT_REG_GLOBAL + 34,
+	GEN8_CP_PROTECT_REG_GLOBAL + 35,
+	GEN8_CP_PROTECT_REG_GLOBAL + 36,
+	GEN8_CP_PROTECT_REG_GLOBAL + 37,
+	GEN8_CP_PROTECT_REG_GLOBAL + 38,
+	GEN8_CP_PROTECT_REG_GLOBAL + 39,
+	GEN8_CP_PROTECT_REG_GLOBAL + 40,
+	GEN8_CP_PROTECT_REG_GLOBAL + 41,
+	GEN8_CP_PROTECT_REG_GLOBAL + 42,
+	GEN8_CP_PROTECT_REG_GLOBAL + 43,
+	GEN8_CP_PROTECT_REG_GLOBAL + 44,
+	GEN8_CP_PROTECT_REG_GLOBAL + 45,
+	GEN8_CP_PROTECT_REG_GLOBAL + 63,
+};
+
+static const struct gen8_pwrup_extlist gen8_3_0_pwrup_extlist[] = {
+	{ GEN8_CP_PROTECT_CNTL_PIPE, BIT(PIPE_BR) | BIT(PIPE_BV) },
+	{ GEN8_CP_PROTECT_REG_PIPE + 15, BIT(PIPE_BR) | BIT(PIPE_BV) },
+	{ GEN8_GRAS_TSEFE_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)},
+	{ GEN8_GRAS_NC_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)},
+	{ GEN8_GRAS_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)},
+	{ GEN8_RB_CCU_CNTL, BIT(PIPE_BR)},
+	{ GEN8_RB_CCU_NC_MODE_CNTL, BIT(PIPE_BR)},
+	{ GEN8_RB_CMP_NC_MODE_CNTL, BIT(PIPE_BR)},
+	{ GEN8_RB_RESOLVE_PREFETCH_CNTL, BIT(PIPE_BR)},
+	{ GEN8_RB_CMP_DBG_ECO_CNTL, BIT(PIPE_BR)},
+	{ GEN8_RB_GC_GMEM_PROTECT, BIT(PIPE_BR)},
+	{ GEN8_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, BIT(PIPE_BR)},
+	{ GEN8_VPC_FLATSHADE_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)},
+	{ GEN8_PC_CHICKEN_BITS_1, BIT(PIPE_BV) | BIT(PIPE_BR)},
+	{ GEN8_PC_CHICKEN_BITS_2, BIT(PIPE_BV) | BIT(PIPE_BR)},
+	{ GEN8_PC_CHICKEN_BITS_3, BIT(PIPE_BV) | BIT(PIPE_BR)},
+	{ GEN8_PC_CHICKEN_BITS_4, BIT(PIPE_BV) | BIT(PIPE_BR)},
+	{ GEN8_PC_AUTO_VERTEX_STRIDE, BIT(PIPE_BR) | BIT(PIPE_BV)},
+	{ GEN8_PC_VIS_STREAM_CNTL, BIT(PIPE_BR) | BIT(PIPE_BV)},
+	{ GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1, BIT(PIPE_BR) | BIT(PIPE_BV)},
+	{ GEN8_VFD_CB_BV_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR)},
+	{ GEN8_VFD_CB_BR_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR)},
+	{ GEN8_VFD_CB_BUSY_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR)},
+	{ GEN8_VFD_CB_LP_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR)},
+	{ GEN8_VFD_DBG_ECO_CNTL, BIT(PIPE_BR) | BIT(PIPE_BV)},
+};
+
+struct gen8_nonctxt_overrides gen8_nc_overrides[] = {
+	{ GEN8_UCHE_MODE_CNTL, BIT(PIPE_NONE), 0, 0, 0, },
+	{ GEN8_UCHE_CACHE_WAYS, BIT(PIPE_NONE), 0, 0, 0, },
+	{ GEN8_UCHE_CLIENT_PF, BIT(PIPE_NONE), 0, 0, 0, },
+	{ GEN8_UCHE_DBG_ECO_CNTL_0, BIT(PIPE_NONE), 0, 0, 2, },
+	{ GEN8_UCHE_HW_DBG_CNTL, BIT(PIPE_NONE), 0, 0, 2, },
+	{ GEN8_UCHE_CCHE_HW_DBG_CNTL, BIT(PIPE_NONE), 0, 0, 2, },
+	{ GEN8_GRAS_NC_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_GRAS_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_RB_DBG_ECO_CNTL, BIT(PIPE_BR), 0, 0, 3, },
+	{ GEN8_RB_CCU_DBG_ECO_CNTL, BIT(PIPE_BR), 0, 0, 3, },
+	{ GEN8_RB_CCU_CNTL, BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_RB_CCU_NC_MODE_CNTL, BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_RB_SLICE_UFC_PREFETCH_CNTL, BIT(PIPE_BR), 0, 0, 3, },
+	{ GEN8_RB_SLICE_UFC_DBG_CNTL, BIT(PIPE_BR), 0, 0, 3, },
+	{ GEN8_RB_CMP_NC_MODE_CNTL, BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_RB_RESOLVE_PREFETCH_CNTL, BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_RB_CMP_DBG_ECO_CNTL, BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_RB_UFC_DBG_CNTL, BIT(PIPE_BR), 0, 0, 3, },
+	{ GEN8_PC_CHICKEN_BITS_1, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_PC_CHICKEN_BITS_2, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_PC_CHICKEN_BITS_3, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_PC_CHICKEN_BITS_4, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_PC_CHICKEN_BITS_5, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 2, },
+	{ GEN8_PC_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 3, },
+	{ GEN8_VFD_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_VFD_CB_BV_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR),  0, 0, 0, },
+	{ GEN8_VFD_CB_BR_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR),  0, 0, 0, },
+	{ GEN8_VFD_CB_LP_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_VFD_CB_BUSY_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_VPC_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 2, },
+	{ GEN8_VPC_DBG_ECO_CNTL_1, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 2, },
+	{ GEN8_VPC_DBG_ECO_CNTL_2, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 1, },
+	{ GEN8_VPC_DBG_ECO_CNTL_3, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 2, },
+	{ GEN8_VPC_FLATSHADE_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, },
+	{ GEN8_SP_DBG_ECO_CNTL, BIT(PIPE_NONE), 0, 0, 1, },
+	{ GEN8_SP_NC_MODE_CNTL, BIT(PIPE_NONE), 0, 0, 0, },
+	{ GEN8_SP_CHICKEN_BITS, BIT(PIPE_NONE), 0, 0, 1, },
+	{ GEN8_SP_NC_MODE_CNTL_2, BIT(PIPE_NONE), 0, 0, 1, },
+	{ GEN8_SP_CHICKEN_BITS_1, BIT(PIPE_NONE), 0, 0, 0, },
+	{ GEN8_SP_CHICKEN_BITS_2, BIT(PIPE_NONE), 0, 0, 0, },
+	{ GEN8_SP_CHICKEN_BITS_3, BIT(PIPE_NONE), 0, 0, 0, },
+	{ GEN8_SP_CHICKEN_BITS_4, BIT(PIPE_NONE), 0, 0, 1, },
+	{ GEN8_SP_DISPATCH_CNTL, BIT(PIPE_NONE), 0, 0, 1, },
+	{ GEN8_SP_HLSQ_DBG_ECO_CNTL, BIT(PIPE_NONE), 0, 0, 1, },
+	{ GEN8_SP_DBG_CNTL, BIT(PIPE_NONE), 0, 0, 1, },
+	{ GEN8_TPL1_NC_MODE_CNTL, BIT(PIPE_NONE), 0, 0, 1, },
+	{ GEN8_TPL1_DBG_ECO_CNTL, BIT(PIPE_NONE), 0, 0, 0, },
+	{ GEN8_TPL1_DBG_ECO_CNTL1, BIT(PIPE_NONE), 0, 0, 0, },
+	{ 0 }
+};
+
+static int acd_calibrate_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 debug_val = (u32) val;
+	int ret;
+
+	mutex_lock(&device->mutex);
+	ret = adreno_active_count_get(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen8_hfi_send_set_value(adreno_dev, HFI_VALUE_DBG,
+				F_PWR_ACD_CALIBRATE, debug_val);
+	if (!ret)
+		gmu->acd_debug_val = debug_val;
+
+	adreno_active_count_put(adreno_dev);
+err:
+	mutex_unlock(&device->mutex);
+	return ret;
+}
+
+static int acd_calibrate_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	*val = (u64) gmu->acd_debug_val;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(acd_cal_fops, acd_calibrate_get, acd_calibrate_set, "%llu\n");
+
+static ssize_t nc_override_get(struct file *filep,
+		char __user *user_buf, size_t len, loff_t *off)
+{
+	struct kgsl_device *device = (struct kgsl_device *) filep->private_data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_device *gen8_dev = container_of(adreno_dev,
+					struct gen8_device, adreno_dev);
+	struct gen8_nonctxt_overrides *nc_overrides = gen8_dev->nc_overrides;
+	u32 i, max_size = PAGE_SIZE;
+	char *buf, *pos;
+	ssize_t size = 0;
+
+	if (!gen8_dev->nc_overrides_enabled || !nc_overrides)
+		return 0;
+
+	buf = kzalloc(max_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	pos = buf;
+
+	mutex_lock(&gen8_dev->nc_mutex);
+	/* Copy all assignments from list to str */
+	for (i = 0; nc_overrides[i].offset; i++) {
+		if (nc_overrides[i].set) {
+			len = scnprintf(pos, max_size, "0x%x:0x%8.8x\n",
+					nc_overrides[i].offset, nc_overrides[i].val);
+			/* If we run out of space len will be zero */
+			if (len == 0)
+				break;
+			max_size -= len;
+			pos += len;
+		}
+	}
+	mutex_unlock(&gen8_dev->nc_mutex);
+
+	size = simple_read_from_buffer(user_buf, len, off, buf, pos - buf);
+
+	kfree(buf);
+	return size;
+}
+
+static void nc_override_cb(struct adreno_device *adreno_dev, void *priv)
+{
+	struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev);
+
+	gen8_dev->nc_overrides_enabled = true;
+	/* Force to update and make new patched reglist */
+	adreno_dev->patch_reglist = false;
+}
+
+static ssize_t nc_override_set(struct file *filep,
+		const char __user *user_buf, size_t len, loff_t *off)
+{
+	struct kgsl_device *device = (struct kgsl_device *) filep->private_data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev);
+	struct gen8_nonctxt_overrides *nc_overrides = gen8_dev->nc_overrides;
+	u32 i, offset, val;
+	int ret = -EINVAL;
+	ssize_t size = 0;
+	char *buf;
+
+	if (!nc_overrides)
+		return 0;
+
+	if ((len >= PAGE_SIZE) || (len == 0))
+		return -EINVAL;
+
+	buf = kzalloc(len + 1, GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, user_buf, len)) {
+		ret = -EFAULT;
+		goto err;
+	}
+
+	/* For sanity and parsing, ensure it is null terminated */
+	buf[len] = '\0';
+
+	size = sscanf(buf, "0x%x:0x%x", &offset, &val);
+	if (size == 0)
+		goto err;
+
+	size = 0;
+
+	mutex_lock(&gen8_dev->nc_mutex);
+	for (i = 0; nc_overrides[i].offset; i++) {
+		if (nc_overrides[i].offset == offset) {
+			nc_overrides[i].val = val;
+			nc_overrides[i].set = true;
+			size = len;
+			break;
+		}
+	}
+	mutex_unlock(&gen8_dev->nc_mutex);
+
+	if (size > 0) {
+		ret = adreno_power_cycle(ADRENO_DEVICE(device), nc_override_cb, NULL);
+		if (!ret)
+			ret = size;
+	}
+
+err:
+	kfree(buf);
+	return ret;
+}
+
+static const struct file_operations nc_override_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = nc_override_get,
+	.write = nc_override_set,
+	.llseek = noop_llseek,
+};
+
+void gen8_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds)
+{
+	u32 i = 0, mask = 0;
+
+	/* Disable concurrent binning before sending CP init */
+	cmds[i++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[i++] = BIT(27);
+
+	/* Use multiple HW contexts */
+	mask |= BIT(0);
+
+	/* Enable error detection */
+	mask |= BIT(1);
+
+	/* Set default reset state */
+	mask |= BIT(3);
+
+	/* Disable save/restore of performance counters across preemption */
+	mask |= BIT(6);
+
+	/* Enable the register init list with the spinlock */
+	mask |= BIT(8);
+
+	cmds[i++] = cp_type7_packet(CP_ME_INIT, 7);
+
+	/* Enabled ordinal mask */
+	cmds[i++] = mask;
+	cmds[i++] = 0x00000003; /* Set number of HW contexts */
+	cmds[i++] = 0x20000000; /* Enable error detection */
+	cmds[i++] = 0x00000002; /* Operation mode mask */
+
+	/* Register initialization list with spinlock */
+	cmds[i++] = lower_32_bits(adreno_dev->pwrup_reglist->gpuaddr);
+	cmds[i++] = upper_32_bits(adreno_dev->pwrup_reglist->gpuaddr);
+	/*
+	 * Gen8 targets with concurrent binning are expected to have a dynamic
+	 * power up list with triplets which contains the pipe id in it.
+	 * Bit 31 of POWER_UP_REGISTER_LIST_LENGTH is reused here to let CP
+	 * know if the power up contains the triplets. If
+	 * REGISTER_INIT_LIST_WITH_SPINLOCK is set and bit 31 below is set,
+	 * CP expects a dynamic list with triplets.
+	 */
+	cmds[i++] = BIT(31);
+}
+
+int gen8_fenced_write(struct adreno_device *adreno_dev, u32 offset,
+		u32 value, u32 mask)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	u32 status, i;
+	u64 ts1, ts2;
+
+	kgsl_regwrite(device, offset, value);
+	ts1 = gpudev->read_alwayson(adreno_dev);
+	for (i = 0; i < GMU_CORE_LONG_WAKEUP_RETRY_LIMIT; i++) {
+		/*
+		 * Make sure the previous register write is posted before
+		 * checking the fence status
+		 */
+		mb();
+
+		gmu_core_regread(device, GEN8_GMUAO_AHB_FENCE_STATUS, &status);
+
+		/*
+		 * If !writedropped0/1, then the write to fenced register
+		 * was successful
+		 */
+		if (!(status & mask))
+			break;
+
+		/* Wait a small amount of time before trying again */
+		udelay(GMU_CORE_WAKEUP_DELAY_US);
+
+		/* Try to write the fenced register again */
+		kgsl_regwrite(device, offset, value);
+	}
+
+	if (i < GMU_CORE_SHORT_WAKEUP_RETRY_LIMIT)
+		return 0;
+
+	if (i == GMU_CORE_LONG_WAKEUP_RETRY_LIMIT) {
+		ts2 = gpudev->read_alwayson(adreno_dev);
+		dev_err(device->dev,
+				"Timed out waiting %d usecs to write fenced register 0x%x, timestamps: %llx %llx\n",
+				i * GMU_CORE_WAKEUP_DELAY_US, offset, ts1, ts2);
+		return -ETIMEDOUT;
+	}
+
+	dev_info(device->dev,
+		"Waited %d usecs to write fenced register 0x%x\n",
+		i * GMU_CORE_WAKEUP_DELAY_US, offset);
+
+	return 0;
+}
+
+int gen8_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_device *gen8_dev = container_of(adreno_dev,
+					struct gen8_device, adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	u64 freq = gen8_core->gmu_hub_clk_freq;
+
+	adreno_dev->highest_bank_bit = gen8_core->highest_bank_bit;
+	adreno_dev->gmu_hub_clk_freq = freq ? freq : 150000000;
+	adreno_dev->ahb_timeout_val = adreno_get_ahb_timeout_val(adreno_dev,
+			gen8_core->noc_timeout_us);
+	adreno_dev->bcl_data = gen8_core->bcl_data;
+
+	adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev,
+			ADRENO_COOP_RESET);
+
+	/* If the memory type is DDR 4, override the existing configuration */
+	if (of_fdt_get_ddrtype() == 0x7)
+		adreno_dev->highest_bank_bit = 14;
+
+	gen8_crashdump_init(adreno_dev);
+
+	gen8_dev->nc_overrides = gen8_nc_overrides;
+	mutex_init(&gen8_dev->nc_mutex);
+
+	/* Debugfs node for noncontext registers override */
+	debugfs_create_file("nc_override", 0644, device->d_debugfs, device, &nc_override_fops);
+
+	return adreno_allocate_global(device, &adreno_dev->pwrup_reglist,
+		PAGE_SIZE, 0, 0, KGSL_MEMDESC_PRIVILEGED,
+		"powerup_register_list");
+}
+
+#define CX_TIMER_INIT_SAMPLES 16
+void gen8_cx_timer_init(struct adreno_device *adreno_dev)
+{
+	u64 seed_val, tmr, skew = 0;
+	int i;
+	unsigned long flags;
+
+	/* Set it up during first boot or after suspend resume */
+	if (test_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv))
+		return;
+
+	/* Disable irqs to get accurate timings */
+	local_irq_save(flags);
+
+	/* Calculate the overhead of timer reads and register writes */
+	for (i = 0; i < CX_TIMER_INIT_SAMPLES; i++) {
+		u64 tmr1, tmr2, tmr3;
+
+		/* Measure time for two reads of the CPU timer */
+		tmr1 = arch_timer_read_counter();
+		tmr2 = arch_timer_read_counter();
+
+		/* Write to the register and time it */
+		adreno_cx_misc_regwrite(adreno_dev,
+					GEN8_GPU_CX_MISC_AO_COUNTER_LO,
+					lower_32_bits(tmr2));
+		adreno_cx_misc_regwrite(adreno_dev,
+					GEN8_GPU_CX_MISC_AO_COUNTER_HI,
+					upper_32_bits(tmr2));
+
+		/* Barrier to make sure the write completes before timing it */
+		mb();
+		tmr3 = arch_timer_read_counter();
+
+		/* Calculate difference between register write and CPU timer */
+		skew += (tmr3 - tmr2) - (tmr2 - tmr1);
+	}
+
+	local_irq_restore(flags);
+
+	/* Get the average over all our readings, to the closest integer */
+	skew = (skew + CX_TIMER_INIT_SAMPLES / 2) / CX_TIMER_INIT_SAMPLES;
+
+	local_irq_save(flags);
+	tmr = arch_timer_read_counter();
+
+	seed_val = tmr + skew;
+
+	/* Seed the GPU CX counter with the adjusted timer */
+	adreno_cx_misc_regwrite(adreno_dev,
+			GEN8_GPU_CX_MISC_AO_COUNTER_LO, lower_32_bits(seed_val));
+	adreno_cx_misc_regwrite(adreno_dev,
+			GEN8_GPU_CX_MISC_AO_COUNTER_HI, upper_32_bits(seed_val));
+
+	local_irq_restore(flags);
+
+	set_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv);
+}
+
+void gen8_get_gpu_feature_info(struct adreno_device *adreno_dev)
+{
+	u32 feature_fuse = 0;
+
+	/* Get HW feature soft fuse value */
+	adreno_cx_misc_regread(adreno_dev, GEN8_GPU_CX_MISC_SW_FUSE_VALUE,
+			       &feature_fuse);
+
+	adreno_dev->fastblend_enabled = feature_fuse & BIT(GEN8_FASTBLEND_SW_FUSE);
+	adreno_dev->raytracing_enabled = feature_fuse & BIT(GEN8_RAYTRACING_SW_FUSE);
+
+	/* If software enables LPAC without HW support, disable it */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC))
+		adreno_dev->lpac_enabled = feature_fuse & BIT(GEN8_LPAC_SW_FUSE);
+
+	adreno_dev->feature_fuse = feature_fuse;
+}
+
+void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id,
+		u32 slice_id, u32 use_slice_id)
+{
+	struct gen8_device *gen8_dev = container_of(adreno_dev,
+					struct gen8_device, adreno_dev);
+	u32 aperture_val = (FIELD_PREP(GENMASK(15, 12), pipe_id) |
+			    FIELD_PREP(GENMASK(18, 16), slice_id) |
+			    FIELD_PREP(GENMASK(23, 23), use_slice_id));
+
+	/* Check if we already set the aperture */
+	if (gen8_dev->aperture == aperture_val)
+		return;
+
+	kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_CP_APERTURE_CNTL_HOST, aperture_val);
+
+	/* Make sure the aperture write goes through before reading the registers */
+	mb();
+
+	gen8_dev->aperture = aperture_val;
+}
+
+void gen8_regread64_aperture(struct kgsl_device *device,
+	u32 offsetwords_lo, u32 offsetwords_hi, u64 *value, u32 pipe,
+	u32 slice_id, u32 use_slice_id)
+{
+	u32 val_lo = 0, val_hi = 0;
+
+	gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id);
+
+	val_lo = kgsl_regmap_read(&device->regmap, offsetwords_lo);
+	val_hi = kgsl_regmap_read(&device->regmap, offsetwords_hi);
+
+	*value = (((u64)val_hi << 32) | val_lo);
+}
+
+void gen8_regread_aperture(struct kgsl_device *device,
+	u32 offsetwords, u32 *value, u32 pipe, u32 slice_id, u32 use_slice_id)
+{
+	gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id);
+
+	*value = kgsl_regmap_read(&device->regmap, offsetwords);
+}
+
+static inline void gen8_regwrite_aperture(struct kgsl_device *device,
+	u32 offsetwords, u32 value, u32 pipe, u32 slice_id, u32 use_slice_id)
+{
+	gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id);
+
+	kgsl_regmap_write(&device->regmap, value, offsetwords);
+}
+
+#define GEN8_CP_PROTECT_DEFAULT (FIELD_PREP(GENMASK(31, 16), 0xffff) | BIT(0) | BIT(1) | BIT(3))
+static void gen8_protect_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	const struct gen8_protected_regs *regs = gen8_core->protected_regs;
+	u32 count = 0;
+	int i;
+
+	/*
+	 * Enable access protection to privileged registers, fault on an access
+	 * protect violation and select the last span to protect from the start
+	 * address all the way to the end of the register address space
+	 */
+	gen8_regwrite_aperture(device, GEN8_CP_PROTECT_CNTL_PIPE,
+			       GEN8_CP_PROTECT_DEFAULT, PIPE_BR, 0, 0);
+	gen8_regwrite_aperture(device, GEN8_CP_PROTECT_CNTL_PIPE,
+			       GEN8_CP_PROTECT_DEFAULT, PIPE_BV, 0, 0);
+	if (adreno_dev->lpac_enabled)
+		gen8_regwrite_aperture(device, GEN8_CP_PROTECT_CNTL_PIPE,
+			       GEN8_CP_PROTECT_DEFAULT, PIPE_LPAC, 0, 0);
+
+	/* Clear aperture register */
+	gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+
+	/* Program each register defined by the core definition */
+	for (i = 0; regs[i].reg; i++) {
+		/*
+		 * This is the offset of the end register as counted from the
+		 * start, i.e. # of registers in the range - 1
+		 */
+		count = regs[i].end - regs[i].start;
+
+		kgsl_regwrite(device, regs[i].reg,
+				FIELD_PREP(GENMASK(17, 0), regs[i].start) |
+				FIELD_PREP(GENMASK(30, 18), count) |
+				FIELD_PREP(BIT(31), regs[i].noaccess));
+	}
+
+	/*
+	 * Last span setting is only being applied to the last pipe specific
+	 * register. Hence duplicate the last span from protect reg into the
+	 * BR, BV and LPAC protect reg pipe 15.
+	 */
+	i--;
+	gen8_regwrite_aperture(device, GEN8_CP_PROTECT_REG_PIPE + 15,
+			       FIELD_PREP(GENMASK(17, 0), regs[i].start) |
+			       FIELD_PREP(GENMASK(30, 18), count) |
+			       FIELD_PREP(BIT(31), regs[i].noaccess),
+			       PIPE_BR, 0, 0);
+
+	gen8_regwrite_aperture(device, GEN8_CP_PROTECT_REG_PIPE + 15,
+			       FIELD_PREP(GENMASK(17, 0), regs[i].start) |
+			       FIELD_PREP(GENMASK(30, 18), count) |
+			       FIELD_PREP(BIT(31), regs[i].noaccess),
+			       PIPE_BV, 0, 0);
+
+	if (adreno_dev->lpac_enabled)
+		gen8_regwrite_aperture(device, GEN8_CP_PROTECT_REG_PIPE + 15,
+				       FIELD_PREP(GENMASK(17, 0), regs[i].start) |
+				       FIELD_PREP(GENMASK(30, 18), count) |
+				       FIELD_PREP(BIT(31), regs[i].noaccess),
+				       PIPE_LPAC, 0, 0);
+
+	/* Clear aperture register */
+	gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+}
+
+static void gen8_nonctxt_regconfig(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	const struct gen8_nonctxt_regs *regs = gen8_core->nonctxt_regs;
+	struct gen8_device *gen8_dev = container_of(adreno_dev,
+					struct gen8_device, adreno_dev);
+	u32 i, pipe_id;
+	unsigned long pipe;
+
+	/* Program non context registers for all pipes */
+	for (pipe_id = PIPE_NONE; pipe_id <= PIPE_AQE1; pipe_id++) {
+
+		if ((pipe_id == PIPE_LPAC) && !ADRENO_FEATURE(adreno_dev, ADRENO_LPAC))
+			continue;
+		else if (((pipe_id == PIPE_AQE0) || (pipe_id == PIPE_AQE1)) &&
+			 !ADRENO_FEATURE(adreno_dev, ADRENO_AQE))
+			continue;
+
+		for (i = 0; regs[i].offset; i++) {
+			pipe = (unsigned long)regs[i].pipelines;
+			if (test_bit(pipe_id, &pipe))
+				gen8_regwrite_aperture(device, regs[i].offset,
+					regs[i].val, pipe_id, 0, 0);
+		}
+	}
+
+	/* Program non context registers overrides for all pipes */
+	if (gen8_dev->nc_overrides_enabled) {
+		struct gen8_nonctxt_overrides *nc_overrides = gen8_dev->nc_overrides;
+
+		mutex_lock(&gen8_dev->nc_mutex);
+		for (pipe_id = PIPE_NONE; pipe_id <= PIPE_AQE1; pipe_id++) {
+
+			if ((pipe_id == PIPE_LPAC) && !ADRENO_FEATURE(adreno_dev, ADRENO_LPAC))
+				continue;
+			else if (((pipe_id == PIPE_AQE0) || (pipe_id == PIPE_AQE1)) &&
+				 !ADRENO_FEATURE(adreno_dev, ADRENO_AQE))
+				continue;
+
+			for (i = 0; nc_overrides[i].offset; i++) {
+				if (!nc_overrides[i].set)
+					continue;
+
+				pipe = (unsigned long)nc_overrides[i].pipelines;
+				if (test_bit(pipe_id, &pipe))
+					gen8_regwrite_aperture(device, nc_overrides[i].offset,
+							nc_overrides[i].val, pipe_id, 0, 0);
+			}
+		}
+		mutex_unlock(&gen8_dev->nc_mutex);
+	}
+
+	/* Clear aperture register */
+	gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+}
+
+#define RBBM_CLOCK_CNTL_ON 0x8aa8aa82
+
+static void gen8_hwcg_set(struct adreno_device *adreno_dev, bool on)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	u32 value;
+	int i;
+
+	/* Increase clock keep-on hysteresis from 5 cycles to 8 cycles */
+	if (!adreno_is_gen8_3_0(adreno_dev) && on)
+		kgsl_regwrite(device, GEN8_RBBM_CGC_0_PC, 0x00000702);
+
+	if (!adreno_dev->hwcg_enabled)
+		on = false;
+
+	for (i = 0; i < gen8_core->ao_hwcg_count; i++)
+		gmu_core_regwrite(device, gen8_core->ao_hwcg[i].offset,
+			on ? gen8_core->ao_hwcg[i].val : 0);
+
+	kgsl_regwrite(device, GEN8_RBBM_CLOCK_CNTL_GLOBAL, 1);
+	kgsl_regwrite(device, GEN8_RBBM_CGC_GLOBAL_LOAD_CMD, on ? 1 : 0);
+
+	if (on) {
+		u32 retry = 3;
+
+		kgsl_regwrite(device, GEN8_RBBM_CGC_P2S_TRIG_CMD, 1);
+		/* Poll for the TXDONE:BIT(0) status */
+		do {
+			/* Wait for small amount of time for TXDONE status*/
+			udelay(1);
+			kgsl_regread(device, GEN8_RBBM_CGC_P2S_STATUS, &value);
+		} while (!(value & BIT(0)) && --retry);
+
+		if (!(value & BIT(0))) {
+			dev_err(device->dev, "RBBM_CGC_P2S_STATUS:TXDONE Poll failed\n");
+			kgsl_device_snapshot(device, NULL, NULL, false);
+			return;
+		}
+		kgsl_regwrite(device, GEN8_RBBM_CLOCK_CNTL_GLOBAL, 0);
+	}
+}
+
+static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_device *gen8_dev = container_of(adreno_dev,
+					struct gen8_device, adreno_dev);
+	struct adreno_reglist_list reglist[3];
+	void *ptr = adreno_dev->pwrup_reglist->hostptr;
+	struct cpu_gpu_lock *lock = ptr;
+	u32 items = 0, i, j, pipe_id;
+	u32 *dest = ptr + sizeof(*lock);
+	struct gen8_nonctxt_overrides *nc_overrides = gen8_dev->nc_overrides;
+
+	/* Static IFPC restore only registers */
+	reglist[items].regs = gen8_3_0_ifpc_pwrup_reglist;
+	reglist[items].count = ARRAY_SIZE(gen8_3_0_ifpc_pwrup_reglist);
+	lock->ifpc_list_len = reglist[items].count;
+	items++;
+
+	/* Static IFPC + preemption registers */
+	reglist[items].regs = gen8_3_0_pwrup_reglist;
+	reglist[items].count = ARRAY_SIZE(gen8_3_0_pwrup_reglist);
+	lock->preemption_list_len = reglist[items].count;
+	items++;
+
+	/*
+	 * For each entry in each of the lists, write the offset and the current
+	 * register value into the GPU buffer
+	 */
+	for (i = 0; i < items; i++) {
+		const u32 *r = reglist[i].regs;
+
+		for (j = 0; j < reglist[i].count; j++) {
+			*dest++ = r[j];
+			kgsl_regread(device, r[j], dest++);
+		}
+
+		mutex_lock(&gen8_dev->nc_mutex);
+		for (j = 0; j < nc_overrides[j].offset; j++) {
+			unsigned long pipe = (unsigned long)nc_overrides[j].pipelines;
+
+			if (!(test_bit(PIPE_NONE, &pipe) && nc_overrides[j].set &&
+				nc_overrides[j].list_type))
+				continue;
+
+			if ((reglist[i].regs == gen8_3_0_ifpc_pwrup_reglist) &&
+				(nc_overrides[j].list_type == 1)) {
+				*dest++ = nc_overrides[j].offset;
+				kgsl_regread(device, nc_overrides[j].offset, dest++);
+				lock->ifpc_list_len++;
+			} else if ((reglist[i].regs == gen8_3_0_pwrup_reglist) &&
+				(nc_overrides[j].list_type == 2)) {
+				*dest++ = nc_overrides[j].offset;
+				kgsl_regread(device, nc_overrides[j].offset, dest++);
+				lock->preemption_list_len++;
+			}
+		}
+		mutex_unlock(&gen8_dev->nc_mutex);
+
+	}
+
+	/*
+	 * The overall register list is composed of
+	 * 1. Static IFPC-only registers
+	 * 2. Static IFPC + preemption registers
+	 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects)
+	 *
+	 * The first two lists are static. Size of these lists are stored as
+	 * number of pairs in ifpc_list_len and preemption_list_len
+	 * respectively. With concurrent binning, Some of the perfcounter
+	 * registers being virtualized, CP needs to know the pipe id to program
+	 * the aperture inorder to restore the same. Thus, third list is a
+	 * dynamic list with triplets as
+	 * (<aperture, shifted 12 bits> <address> <data>), and the length is
+	 * stored as number for triplets in dynamic_list_len.
+	 *
+	 * Starting with Gen8, some of the registers that are initialized statically
+	 * by the kernel are pipe-specific. Because only the dynamic list is able to
+	 * support specifying a pipe ID, these registers are bundled along with any
+	 * dynamic entries such as perf counter selects into a single dynamic list.
+	 */
+
+	gen8_dev->ext_pwrup_list_len = 0;
+
+	/*
+	 * Write external pipe specific regs (<aperture> <address> <value> - triplets)
+	 * offset and the current value into GPU buffer
+	 */
+	for (pipe_id = PIPE_BR; pipe_id <= PIPE_LPAC; pipe_id++) {
+		for (i = 0; i < ARRAY_SIZE(gen8_3_0_pwrup_extlist); i++) {
+			unsigned long pipe = (unsigned long)gen8_3_0_pwrup_extlist[i].pipelines;
+
+			if (!test_bit(pipe_id, &pipe))
+				continue;
+
+			*dest++ = FIELD_PREP(GENMASK(15, 12), pipe_id);
+			*dest++ = gen8_3_0_pwrup_extlist[i].offset;
+			gen8_regread_aperture(device, gen8_3_0_pwrup_extlist[i].offset,
+					dest++, pipe_id, 0, 0);
+			gen8_dev->ext_pwrup_list_len++;
+		}
+	}
+
+	/*
+	 * Write noncontext override pipe specific regs (<aperture> <address> <value> - triplets)
+	 * offset and the current value into GPU buffer
+	 */
+	mutex_lock(&gen8_dev->nc_mutex);
+	for (pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) {
+		for (i = 0; i < nc_overrides[i].offset; i++) {
+			unsigned long pipe = (unsigned long)nc_overrides[i].pipelines;
+
+			if (!(test_bit(pipe_id, &pipe) && nc_overrides[i].set &&
+				nc_overrides[i].list_type))
+				continue;
+
+			*dest++ = FIELD_PREP(GENMASK(15, 12), pipe_id);
+			*dest++ = nc_overrides[i].offset;
+			gen8_regread_aperture(device, nc_overrides[i].offset,
+					dest++, pipe_id, 0, 0);
+			gen8_dev->ext_pwrup_list_len++;
+		}
+	}
+	mutex_unlock(&gen8_dev->nc_mutex);
+
+	/* Clear aperture register */
+	gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+
+	lock->dynamic_list_len = gen8_dev->ext_pwrup_list_len;
+}
+
+/* _llc_configure_gpu_scid() - Program the sub-cache ID for all GPU blocks */
+static void _llc_configure_gpu_scid(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 gpu_scid;
+
+	if (IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice) ||
+		!adreno_dev->gpu_llc_slice_enable)
+		return;
+
+	if (llcc_slice_activate(adreno_dev->gpu_llc_slice))
+		return;
+
+	gpu_scid = llcc_get_slice_id(adreno_dev->gpu_llc_slice);
+
+	/* 5 blocks at 6 bits per block */
+	kgsl_regwrite(device, GEN8_GBIF_SCACHE_CNTL1,
+			FIELD_PREP(GENMASK(29, 24), gpu_scid) |
+			FIELD_PREP(GENMASK(23, 18), gpu_scid) |
+			FIELD_PREP(GENMASK(17, 12), gpu_scid) |
+			FIELD_PREP(GENMASK(11, 6), gpu_scid) |
+			FIELD_PREP(GENMASK(5, 0), gpu_scid));
+
+	kgsl_regwrite(device, GEN8_GBIF_SCACHE_CNTL0,
+			FIELD_PREP(GENMASK(15, 10), gpu_scid) |
+			FIELD_PREP(GENMASK(21, 16), gpu_scid) |
+			FIELD_PREP(GENMASK(27, 22), gpu_scid) | BIT(8));
+}
+
+static void _llc_gpuhtw_slice_activate(struct adreno_device *adreno_dev)
+{
+	if (IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice) ||
+		!adreno_dev->gpuhtw_llc_slice_enable)
+		return;
+
+	llcc_slice_activate(adreno_dev->gpuhtw_llc_slice);
+}
+
+static void _set_secvid(struct kgsl_device *device)
+{
+	kgsl_regwrite(device, GEN8_RBBM_SECVID_TSB_CNTL, 0x0);
+	kgsl_regwrite(device, GEN8_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
+		lower_32_bits(KGSL_IOMMU_SECURE_BASE32));
+	kgsl_regwrite(device, GEN8_RBBM_SECVID_TSB_TRUSTED_BASE_HI,
+		upper_32_bits(KGSL_IOMMU_SECURE_BASE32));
+	kgsl_regwrite(device, GEN8_RBBM_SECVID_TSB_TRUSTED_SIZE,
+		FIELD_PREP(GENMASK(31, 12),
+		(KGSL_IOMMU_SECURE_SIZE(&device->mmu) / SZ_4K)));
+}
+
+/* Set UCHE_TRAP_BASE to a page below the top of the memory space */
+#define GEN8_UCHE_TRAP_BASE 0x1FFFFFFFFF000ULL
+
+static u64 gen8_get_uche_trap_base(void)
+{
+	return GEN8_UCHE_TRAP_BASE;
+}
+
+/*
+ * All Gen8 targets support marking certain transactions as always privileged
+ * which allows us to mark more memory as privileged without having to
+ * explicitly set the APRIV bit. Choose the following transactions to be
+ * privileged by default:
+ * CDWRITE     [6:6] - Crashdumper writes
+ * CDREAD      [5:5] - Crashdumper reads
+ * RBRPWB      [3:3] - RPTR shadow writes
+ * RBPRIVLEVEL [2:2] - Memory accesses from PM4 packets in the ringbuffer
+ * RBFETCH     [1:1] - Ringbuffer reads
+ * ICACHE      [0:0] - Instruction cache fetches
+ */
+
+#define GEN8_APRIV_DEFAULT (BIT(3) | BIT(2) | BIT(1) | BIT(0))
+/* Add crashdumper permissions for the BR APRIV */
+#define GEN8_BR_APRIV_DEFAULT (GEN8_APRIV_DEFAULT | BIT(6) | BIT(5))
+
+static const struct kgsl_regmap_list gen8_3_0_bicubic_regs[] = {
+	/*GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_0 default and recomended values are same */
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_1,  0x3fe05ff4 },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_2,  0x3fa0ebee },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_3,  0x3f5193ed },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_4,  0x3f0243f0 },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_5,  0x00000000 },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_6,  0x3fd093e8 },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_7,  0x3f4133dc },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_8,  0x3ea1dfdb },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_9,  0x3e0283e0 },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_10, 0x0000ac2b },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_11, 0x0000f01d },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_12, 0x00114412 },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_13, 0x0021980a },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_14, 0x0051ec05 },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_15, 0x0000380e },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_16, 0x3ff09001 },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_17, 0x3fc10bfa },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_18, 0x3f9193f7 },
+	{ GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_19, 0x3f7227f7 },
+};
+
+void gen8_enable_ahb_timeout_detection(struct adreno_device *adreno_dev)
+{
+	u32 val;
+
+	if (!adreno_dev->ahb_timeout_val)
+		return;
+
+	val = (ADRENO_AHB_CNTL_DEFAULT | FIELD_PREP(GENMASK(4, 0),
+			adreno_dev->ahb_timeout_val));
+	adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_AON_CNTL, val);
+	adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_GMU_CNTL, val);
+	adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_CP_CNTL, val);
+	adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL, val);
+	adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_HOST_CNTL, val);
+}
+
+#define MIN_HBB 13
+int gen8_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	u32 mal, mode = 0, mode2 = 0, rgb565_predicator = 0, amsbc = 0;
+	struct gen8_device *gen8_dev = container_of(adreno_dev,
+					struct gen8_device, adreno_dev);
+	/*
+	 * HBB values 13 to 16 can represented LSB of HBB from 0 to 3.
+	 * Any HBB value beyond 16 needs programming MSB of HBB.
+	 * By default highest bank bit is 14, Hence set default HBB LSB
+	 * to "1" and MSB to "0".
+	 */
+	u32 hbb_lo = 1, hbb_hi = 0, hbb = 1;
+	struct cpu_gpu_lock *pwrup_lock = adreno_dev->pwrup_reglist->hostptr;
+	u64 uche_trap_base = gen8_get_uche_trap_base();
+	u32 rgba8888_lossless = 0, fp16compoptdis = 0;
+
+	/* Reset aperture fields to go through first aperture write check */
+	gen8_dev->aperture = UINT_MAX;
+
+	/* Make all blocks contribute to the GPU BUSY perf counter */
+	kgsl_regwrite(device, GEN8_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
+
+	kgsl_regwrite(device, GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_LO,
+			lower_32_bits(adreno_dev->uche_gmem_base));
+	kgsl_regwrite(device, GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_HI,
+			upper_32_bits(adreno_dev->uche_gmem_base));
+	kgsl_regwrite(device, GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_LO,
+			lower_32_bits(adreno_dev->uche_gmem_base));
+	kgsl_regwrite(device, GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_HI,
+			upper_32_bits(adreno_dev->uche_gmem_base));
+
+	if (adreno_dev->lpac_enabled) {
+		gen8_regwrite_aperture(device, GEN8_RB_LPAC_GMEM_PROTECT,
+			0x0c000000, PIPE_BR, 0, 0);
+
+		/* Clear aperture register  */
+		gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+
+		kgsl_regwrite(device, GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_LO,
+				lower_32_bits(adreno_dev->uche_gmem_base));
+		kgsl_regwrite(device, GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_HI,
+				upper_32_bits(adreno_dev->uche_gmem_base));
+		kgsl_regwrite(device, GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_LO,
+				lower_32_bits(adreno_dev->uche_gmem_base));
+		kgsl_regwrite(device, GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_HI,
+				upper_32_bits(adreno_dev->uche_gmem_base));
+	}
+
+	/*
+	 * Set UCHE_WRITE_THRU_BASE to the UCHE_TRAP_BASE effectively
+	 * disabling L2 bypass
+	 */
+	kgsl_regwrite(device, GEN8_UCHE_TRAP_BASE_LO, lower_32_bits(uche_trap_base));
+	kgsl_regwrite(device, GEN8_UCHE_TRAP_BASE_HI, upper_32_bits(uche_trap_base));
+	kgsl_regwrite(device, GEN8_UCHE_WRITE_THRU_BASE_LO, lower_32_bits(uche_trap_base));
+	kgsl_regwrite(device, GEN8_UCHE_WRITE_THRU_BASE_HI, upper_32_bits(uche_trap_base));
+
+	/*
+	 * CP takes care of the restore during IFPC exit. We need to restore at slumber
+	 * boundary as well
+	 */
+	if (pwrup_lock->dynamic_list_len - gen8_dev->ext_pwrup_list_len > 0) {
+		kgsl_regwrite(device, GEN8_RBBM_PERFCTR_CNTL, 0x1);
+		kgsl_regwrite(device, GEN8_RBBM_SLICE_PERFCTR_CNTL, 0x1);
+	}
+
+	/* Turn on the IFPC counter (countable 4 on XOCLK4) */
+	kgsl_regwrite(device, GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1,
+			FIELD_PREP(GENMASK(7, 0), 0x4));
+
+	/* Turn on counter to count total time spent in BCL throttle */
+	if (adreno_dev->bcl_enabled)
+		kgsl_regrmw(device, GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1, GENMASK(15, 8),
+				FIELD_PREP(GENMASK(15, 8), 0x26));
+
+	if (of_property_read_u32(device->pdev->dev.of_node, "qcom,min-access-length", &mal))
+		mal = 32;
+
+	of_property_read_u32(device->pdev->dev.of_node, "qcom,ubwc-mode", &mode);
+
+	switch (mode) {
+	case KGSL_UBWC_5_0:
+		amsbc = 1;
+		rgb565_predicator = 1;
+		mode2 = 4;
+		break;
+	case KGSL_UBWC_4_0:
+		amsbc = 1;
+		rgb565_predicator = 1;
+		fp16compoptdis = 1;
+		rgba8888_lossless = 1;
+		mode2 = 2;
+		break;
+	case KGSL_UBWC_3_0:
+		amsbc = 1;
+		mode2 = 1;
+		break;
+	default:
+		break;
+	}
+
+	if (!WARN_ON(!adreno_dev->highest_bank_bit)) {
+		hbb = adreno_dev->highest_bank_bit - MIN_HBB;
+		hbb_lo = hbb & 3;
+		hbb_hi = (hbb >> 2) & 1;
+	}
+
+	mal = (mal == 64) ? 1 : 0;
+
+	gen8_regwrite_aperture(device, GEN8_GRAS_NC_MODE_CNTL,
+			       FIELD_PREP(GENMASK(8, 5), hbb), PIPE_BV, 0, 0);
+	gen8_regwrite_aperture(device, GEN8_GRAS_NC_MODE_CNTL,
+			       FIELD_PREP(GENMASK(8, 5), hbb), PIPE_BR, 0, 0);
+	gen8_regwrite_aperture(device, GEN8_RB_CCU_NC_MODE_CNTL,
+			       FIELD_PREP(GENMASK(3, 3), hbb_hi) |
+			       FIELD_PREP(GENMASK(2, 1), hbb_lo),
+			       PIPE_BR, 0, 0);
+	gen8_regwrite_aperture(device, GEN8_RB_CMP_NC_MODE_CNTL,
+			       FIELD_PREP(GENMASK(17, 15), mode2) |
+			       FIELD_PREP(GENMASK(4, 4), rgba8888_lossless) |
+			       FIELD_PREP(GENMASK(3, 3), fp16compoptdis) |
+			       FIELD_PREP(GENMASK(2, 2), rgb565_predicator) |
+			       FIELD_PREP(GENMASK(1, 1), amsbc) |
+			       FIELD_PREP(GENMASK(0, 0), mal),
+			       PIPE_BR, 0, 0);
+
+	/* Clear aperture register  */
+	gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+
+	kgsl_regwrite(device, GEN8_SP_NC_MODE_CNTL,
+		      FIELD_PREP(GENMASK(11, 10), hbb_hi) |
+		      FIELD_PREP(GENMASK(5, 4), 2) |
+		      FIELD_PREP(GENMASK(3, 3), mal) |
+		      FIELD_PREP(GENMASK(2, 1), hbb_lo));
+
+	kgsl_regwrite(device, GEN8_TPL1_NC_MODE_CNTL,
+		      FIELD_PREP(GENMASK(4, 4), hbb_hi) |
+		      FIELD_PREP(GENMASK(3, 3), mal) |
+		      FIELD_PREP(GENMASK(2, 1), hbb_lo));
+
+	/* Configure TP bicubic registers */
+	kgsl_regmap_multi_write(&device->regmap, gen8_3_0_bicubic_regs,
+				ARRAY_SIZE(gen8_3_0_bicubic_regs));
+
+	/* Program noncontext registers */
+	gen8_nonctxt_regconfig(adreno_dev);
+
+	/* Enable hardware hang detection */
+	kgsl_regwrite(device, GEN8_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) |
+			FIELD_PREP(GENMASK(27, 0), gen8_core->hang_detect_cycles));
+	kgsl_regwrite(device, GEN8_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30));
+
+	kgsl_regwrite(device, GEN8_UCHE_CLIENT_PF, BIT(7) |
+			FIELD_PREP(GENMASK(6, 0), adreno_dev->uche_client_pf));
+
+	/* Enable the GMEM save/restore feature for preemption */
+	if (adreno_is_preemption_enabled(adreno_dev)) {
+		gen8_regwrite_aperture(device,
+				GEN8_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE,
+				0x1, PIPE_BR, 0, 0);
+		/* Clear aperture register  */
+		gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+	}
+
+	/* Enable GMU power counter 0 to count GPU busy */
+	kgsl_regwrite(device, GEN8_GMUAO_GPU_CX_BUSY_MASK, 0xff000000);
+	kgsl_regrmw(device, GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0, 0xFF, 0x20);
+	kgsl_regwrite(device, GEN8_GMUCX_POWER_COUNTER_ENABLE, 0x1);
+
+	gen8_protect_init(adreno_dev);
+
+	/* Configure LLCC */
+	_llc_configure_gpu_scid(adreno_dev);
+	_llc_gpuhtw_slice_activate(adreno_dev);
+
+	gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE,
+				GEN8_BR_APRIV_DEFAULT, PIPE_BR, 0, 0);
+	gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE,
+				GEN8_APRIV_DEFAULT, PIPE_BV, 0, 0);
+
+	if (adreno_dev->lpac_enabled)
+		gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE,
+					GEN8_APRIV_DEFAULT, PIPE_LPAC, 0, 0);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) {
+		gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE,
+					GEN8_APRIV_DEFAULT, PIPE_AQE0, 0, 0);
+		gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE,
+					GEN8_APRIV_DEFAULT, PIPE_AQE1, 0, 0);
+	}
+
+	/* Clear aperture register  */
+	gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+
+	_set_secvid(device);
+
+	/*
+	 * Enable hardware clock gating here to prevent any register access
+	 * issue due to internal clock gating.
+	 */
+	gen8_hwcg_set(adreno_dev, true);
+
+	/*
+	 * All registers must be written before this point so that we don't
+	 * miss any register programming when we patch the power up register
+	 * list.
+	 */
+	if (!adreno_dev->patch_reglist &&
+		(adreno_dev->pwrup_reglist->gpuaddr != 0)) {
+		gen8_patch_pwrup_reglist(adreno_dev);
+		adreno_dev->patch_reglist = true;
+	}
+
+	return 0;
+}
+
+/* Offsets into the MX/CX mapped register regions */
+#define GEN8_RDPM_MX_OFFSET 0xf00
+#define GEN8_RDPM_CX_OFFSET 0xf14
+
+void gen8_rdpm_mx_freq_update(struct gen8_gmu_device *gmu, u32 freq)
+{
+	if (gmu->rdpm_mx_virt) {
+		writel_relaxed(freq/1000, (gmu->rdpm_mx_virt + GEN8_RDPM_MX_OFFSET));
+
+		/*
+		 * ensure previous writes post before this one,
+		 * i.e. act like normal writel()
+		 */
+		wmb();
+	}
+}
+
+void gen8_rdpm_cx_freq_update(struct gen8_gmu_device *gmu, u32 freq)
+{
+	if (gmu->rdpm_cx_virt) {
+		writel_relaxed(freq/1000, (gmu->rdpm_cx_virt + GEN8_RDPM_CX_OFFSET));
+
+		/*
+		 * ensure previous writes post before this one,
+		 * i.e. act like normal writel()
+		 */
+		wmb();
+	}
+}
+
+int gen8_scm_gpu_init_cx_regs(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 gpu_req = GPU_ALWAYS_EN_REQ;
+	int ret;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL))
+		gpu_req |= GPU_BCL_EN_REQ;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_CLX))
+		gpu_req |= GPU_CLX_EN_REQ;
+
+	gpu_req |= GPU_TSENSE_EN_REQ;
+
+	ret = kgsl_scm_gpu_init_regs(&device->pdev->dev, gpu_req);
+
+	/*
+	 * For targets that support this scm call to program BCL id , enable BCL.
+	 * For other targets, BCL is enabled after first GMU boot.
+	 */
+	if (!ret && ADRENO_FEATURE(adreno_dev, ADRENO_BCL))
+		adreno_dev->bcl_enabled = true;
+
+	/* If programming TZ CLX was successful, then program KMD owned CLX regs */
+	if (!ret && ADRENO_FEATURE(adreno_dev, ADRENO_CLX))
+		adreno_dev->clx_enabled = true;
+
+	/*
+	 * If scm call returned EOPNOTSUPP, either we are on a kernel version
+	 * lesser than 6.1 where scm call is not supported or we are sending an
+	 * empty request. Ignore the error in such cases.
+	 */
+	return (ret == -EOPNOTSUPP) ? 0 : ret;
+}
+
+void gen8_spin_idle_debug(struct adreno_device *adreno_dev,
+				const char *str)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	u32 rptr, wptr, status, intstatus, global_status;
+
+	dev_err(device->dev, str);
+
+	kgsl_regread(device, GEN8_CP_RB_RPTR_BR, &rptr);
+	kgsl_regread(device, GEN8_CP_RB_WPTR_GC, &wptr);
+
+	kgsl_regread(device, GEN8_RBBM_STATUS, &status);
+	kgsl_regread(device, GEN8_RBBM_INT_0_STATUS, &intstatus);
+	kgsl_regread(device, GEN8_CP_INTERRUPT_STATUS_GLOBAL, &global_status);
+
+	dev_err(device->dev,
+		"rb=%d pos=%X/%X rbbm_status=%8.8X int_0_status=%8.8X global_status=%8.8X\n",
+		adreno_dev->cur_rb ? adreno_dev->cur_rb->id : -1, rptr, wptr,
+		status, intstatus, global_status);
+
+	kgsl_device_snapshot(device, NULL, NULL, false);
+}
+
+/*
+ * gen8_send_cp_init() - Initialize ringbuffer
+ * @adreno_dev: Pointer to adreno device
+ * @rb: Pointer to the ringbuffer of device
+ *
+ * Submit commands for ME initialization,
+ */
+static int gen8_send_cp_init(struct adreno_device *adreno_dev,
+			 struct adreno_ringbuffer *rb)
+{
+	u32 *cmds;
+	int ret;
+
+	cmds = adreno_ringbuffer_allocspace(rb, GEN8_CP_INIT_DWORDS);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	gen8_cp_init_cmds(adreno_dev, cmds);
+
+	ret = gen8_ringbuffer_submit(rb, NULL);
+	if (ret)
+		return ret;
+
+	ret = adreno_spin_idle(adreno_dev, 2000);
+	if (ret) {
+		gen8_spin_idle_debug(adreno_dev,
+				     "CP initialization failed to idle\n");
+		rb->wptr = 0;
+		rb->_wptr = 0;
+	}
+
+	return ret;
+}
+
+static int gen8_post_start(struct adreno_device *adreno_dev)
+{
+	int ret;
+	u32 *cmds;
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	u64 kmd_postamble_addr;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev));
+	gen8_preemption_prepare_postamble(adreno_dev);
+
+	cmds = adreno_ringbuffer_allocspace(rb,
+			(preempt->postamble_bootup_len ? 16 : 12));
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 6);
+	*cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR;
+	*cmds++ = lower_32_bits(rb->preemption_desc->gpuaddr);
+	*cmds++ = upper_32_bits(rb->preemption_desc->gpuaddr);
+
+	*cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR;
+	*cmds++ = lower_32_bits(rb->secure_preemption_desc->gpuaddr);
+	*cmds++ = upper_32_bits(rb->secure_preemption_desc->gpuaddr);
+
+	if (preempt->postamble_bootup_len) {
+		*cmds++ = cp_type7_packet(CP_SET_AMBLE, 3);
+		*cmds++ = lower_32_bits(kmd_postamble_addr);
+		*cmds++ = upper_32_bits(kmd_postamble_addr);
+		*cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE)
+			| (FIELD_PREP(GENMASK(19, 0),
+				adreno_dev->preempt.postamble_bootup_len));
+	}
+
+	*cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	*cmds++ = 0;
+	*cmds++ = 0;
+	*cmds++ = 0;
+	/* generate interrupt on preemption completion */
+	*cmds++ = 0;
+
+	ret = gen8_ringbuffer_submit(rb, NULL);
+	if (!ret) {
+		ret = adreno_spin_idle(adreno_dev, 2000);
+		if (ret)
+			gen8_spin_idle_debug(adreno_dev,
+				"hw preemption initialization failed to idle\n");
+	}
+
+	return ret;
+}
+
+int gen8_rb_start(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb;
+	u64 addr;
+	int ret, i;
+	u32 *cmds;
+
+	/* Clear all the ringbuffers */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE);
+		kgsl_sharedmem_writel(device->scratch,
+			SCRATCH_RB_OFFSET(rb->id, rptr), 0);
+		kgsl_sharedmem_writel(device->scratch,
+			SCRATCH_RB_OFFSET(rb->id, bv_rptr), 0);
+
+		rb->wptr = 0;
+		rb->_wptr = 0;
+		rb->wptr_preempt_end = UINT_MAX;
+	}
+
+	gen8_preemption_start(adreno_dev);
+
+	/* Set up the current ringbuffer */
+	rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+
+	addr = SCRATCH_RB_GPU_ADDR(device, rb->id, rptr);
+	kgsl_regwrite(device, GEN8_CP_RB_RPTR_ADDR_LO_BR, lower_32_bits(addr));
+	kgsl_regwrite(device, GEN8_CP_RB_RPTR_ADDR_HI_BR, upper_32_bits(addr));
+
+	addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_rptr);
+	kgsl_regwrite(device, GEN8_CP_RB_RPTR_ADDR_LO_BV, lower_32_bits(addr));
+	kgsl_regwrite(device, GEN8_CP_RB_RPTR_ADDR_HI_BV, upper_32_bits(addr));
+
+	kgsl_regwrite(device, GEN8_CP_RB_CNTL_GC, GEN8_CP_RB_CNTL_DEFAULT);
+
+	kgsl_regwrite(device, GEN8_CP_RB_BASE_LO_GC,
+		lower_32_bits(rb->buffer_desc->gpuaddr));
+	kgsl_regwrite(device, GEN8_CP_RB_BASE_HI_GC,
+		upper_32_bits(rb->buffer_desc->gpuaddr));
+
+	/* Program the ucode base for CP */
+	kgsl_regwrite(device, GEN8_CP_SQE_INSTR_BASE_LO,
+		lower_32_bits(fw->memdesc->gpuaddr));
+	kgsl_regwrite(device, GEN8_CP_SQE_INSTR_BASE_HI,
+		upper_32_bits(fw->memdesc->gpuaddr));
+
+	/* Clear the SQE_HALT to start the CP engine */
+	kgsl_regwrite(device, GEN8_CP_SQE_CNTL, 1);
+
+	ret = gen8_send_cp_init(adreno_dev, rb);
+	if (ret)
+		return ret;
+
+	ret = adreno_zap_shader_load(adreno_dev, gen8_core->zap_name);
+	if (ret)
+		return ret;
+
+	/*
+	 * Take the GPU out of secure mode. Try the zap shader if it is loaded,
+	 * otherwise just try to write directly to the secure control register
+	 */
+	if (!adreno_dev->zap_loaded)
+		kgsl_regwrite(device, GEN8_RBBM_SECVID_TRUST_CNTL, 0);
+	else {
+		cmds = adreno_ringbuffer_allocspace(rb, 2);
+		if (IS_ERR(cmds))
+			return PTR_ERR(cmds);
+
+		*cmds++ = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		*cmds++ = 0;
+
+		ret = gen8_ringbuffer_submit(rb, NULL);
+		if (!ret) {
+			ret = adreno_spin_idle(adreno_dev, 2000);
+			if (ret) {
+				gen8_spin_idle_debug(adreno_dev,
+					"Switch to unsecure failed to idle\n");
+				return ret;
+			}
+		}
+	}
+
+	return gen8_post_start(adreno_dev);
+}
+
+/*
+ * gen8_gpu_keepalive() - GMU reg write to request GPU stays on
+ * @adreno_dev: Pointer to the adreno device that has the GMU
+ * @state: State to set: true is ON, false is OFF
+ */
+static void gen8_gpu_keepalive(struct adreno_device *adreno_dev,
+		bool state)
+{
+	gmu_core_regwrite(KGSL_DEVICE(adreno_dev),
+			GEN8_GMUCX_PWR_COL_KEEPALIVE, state);
+}
+
+bool gen8_hw_isidle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 reg;
+
+	gmu_core_regread(device, GEN8_GMUAO_GPU_CX_BUSY_STATUS, &reg);
+
+	/* Bit 23 is GPUBUSYIGNAHB */
+	return (reg & BIT(23)) ? false : true;
+}
+
+int gen8_microcode_read(struct adreno_device *adreno_dev)
+{
+	struct adreno_firmware *sqe_fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+
+	return adreno_get_firmware(adreno_dev, gen8_core->sqefw_name, sqe_fw);
+}
+
+/* CP Interrupt bits */
+#define GEN8_CP_GLOBAL_INT_HWFAULTBR 0
+#define GEN8_CP_GLOBAL_INT_HWFAULTBV 1
+#define GEN8_CP_GLOBAL_INT_HWFAULTLPAC 2
+#define GEN8_CP_GLOBAL_INT_HWFAULTAQE0 3
+#define GEN8_CP_GLOBAL_INT_HWFAULTAQE1 4
+#define GEN8_CP_GLOBAL_INT_HWFAULTDDEBR 5
+#define GEN8_CP_GLOBAL_INT_HWFAULTDDEBV 6
+#define GEN8_CP_GLOBAL_INT_SWFAULTBR 16
+#define GEN8_CP_GLOBAL_INT_SWFAULTBV 17
+#define GEN8_CP_GLOBAL_INT_SWFAULTLPAC 18
+#define GEN8_CP_GLOBAL_INT_SWFAULTAQE0 19
+#define GEN8_CP_GLOBAL_INT_SWFAULTAQE1 20
+#define GEN8_CP_GLOBAL_INT_SWFAULTDDEBR 21
+#define GEN8_CP_GLOBAL_INT_SWFAULTDDEBV 22
+
+/* CP HW Fault status bits */
+#define CP_HW_RBFAULT 0
+#define CP_HW_IB1FAULT 1
+#define CP_HW_IB2FAULT 2
+#define CP_HW_IB3FAULT 3
+#define CP_HW_SDSFAULT 4
+#define CP_HW_MRBFAULT 5
+#define CP_HW_VSDFAULT 6
+#define CP_HW_SQEREADBRUSTOVF 8
+#define CP_HW_EVENTENGINEOVF 9
+#define CP_HW_UCODEERROR 10
+
+/* CP SW Fault status bits */
+#define CP_SW_CSFRBWRAP 0
+#define CP_SW_CSFIB1WRAP 1
+#define CP_SW_CSFIB2WRAP 2
+#define CP_SW_CSFIB3WRAP 3
+#define CP_SW_SDSWRAP 4
+#define CP_SW_MRBWRAP 5
+#define CP_SW_VSDWRAP 6
+#define CP_SW_OPCODEERROR 8
+#define CP_SW_VSDPARITYERROR 9
+#define CP_SW_REGISTERPROTECTIONERROR 10
+#define CP_SW_ILLEGALINSTRUCTION 11
+#define CP_SW_SMMUFAULT 12
+#define CP_SW_VBIFRESPCLIENT 13
+#define CP_SW_VBIFRESPTYPE 19
+#define CP_SW_VBIFRESPREAD 21
+#define CP_SW_VBIFRESP 22
+#define CP_SW_RTWROVF 23
+#define CP_SW_LRZRTWROVF 24
+#define CP_SW_LRZRTREFCNTOVF 25
+#define CP_SW_LRZRTCLRRESMISS 26
+
+static void gen8_get_cp_hwfault_status(struct adreno_device *adreno_dev, u32 status)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 hw_status;
+	u32 pipe_id = PIPE_NONE;
+	const char * const table[] = {
+			[CP_HW_RBFAULT] = "RBFAULT",
+			[CP_HW_IB1FAULT] = "IB1FAULT",
+			[CP_HW_IB2FAULT] = "IB2FAULT",
+			[CP_HW_SDSFAULT] = "SDSFAULT",
+			[CP_HW_MRBFAULT] = "MRGFAULT",
+			[CP_HW_VSDFAULT] = "VSDFAULT",
+			[CP_HW_SQEREADBRUSTOVF] = "SQEREADBRUSTOVF",
+			[CP_HW_EVENTENGINEOVF] = "EVENTENGINEOVF",
+			[CP_HW_UCODEERROR] = "UCODEERROR",
+	};
+
+	switch (status) {
+	case BIT(GEN8_CP_GLOBAL_INT_HWFAULTBR):
+		pipe_id = PIPE_BR;
+		break;
+	case BIT(GEN8_CP_GLOBAL_INT_HWFAULTBV):
+		pipe_id = PIPE_BV;
+		break;
+	case BIT(GEN8_CP_GLOBAL_INT_HWFAULTLPAC):
+		pipe_id = PIPE_LPAC;
+		break;
+	case BIT(GEN8_CP_GLOBAL_INT_HWFAULTAQE0):
+		pipe_id = PIPE_AQE0;
+		break;
+	case BIT(GEN8_CP_GLOBAL_INT_HWFAULTAQE1):
+		pipe_id = PIPE_AQE1;
+		break;
+	case BIT(GEN8_CP_GLOBAL_INT_HWFAULTDDEBR):
+		pipe_id = PIPE_DDE_BR;
+		break;
+	case BIT(GEN8_CP_GLOBAL_INT_HWFAULTDDEBV):
+		pipe_id = PIPE_DDE_BV;
+		break;
+	}
+
+	gen8_regread_aperture(device, GEN8_CP_HW_FAULT_STATUS_PIPE, &hw_status,
+		pipe_id, 0, 0);
+	/* Clear aperture register */
+	gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+
+	dev_crit_ratelimited(device->dev, "CP HW Fault pipe_id:%u %s\n", pipe_id,
+			hw_status < ARRAY_SIZE(table) ? table[hw_status] : "UNKNOWN");
+}
+
+static void gen8_get_cp_swfault_status(struct adreno_device *adreno_dev, u32 status)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 sw_status, status1;
+	u32 opcode, pipe_id = PIPE_NONE;
+	const char * const table[] = {
+		[CP_SW_CSFRBWRAP] = "CSFRBWRAP",
+		[CP_SW_CSFIB1WRAP] = "CSFIB1WRAP",
+		[CP_SW_CSFIB2WRAP] = "CSFIB2WRAP",
+		[CP_SW_CSFIB3WRAP] = "CSFIB3WRAP",
+		[CP_SW_SDSWRAP] = "SDSWRAP",
+		[CP_SW_MRBWRAP] = "MRBWRAP",
+		[CP_SW_VSDWRAP] = "VSDWRAP",
+		[CP_SW_OPCODEERROR] = "OPCODEERROR",
+		[CP_SW_VSDPARITYERROR] = "VSDPARITYERROR",
+		[CP_SW_REGISTERPROTECTIONERROR] = "REGISTERPROTECTIONERROR",
+		[CP_SW_ILLEGALINSTRUCTION] = "ILLEGALINSTRUCTION",
+		[CP_SW_SMMUFAULT] = "SMMUFAULT",
+		[CP_SW_VBIFRESPCLIENT] = "VBIFRESPCLIENT",
+		[CP_SW_VBIFRESPTYPE] = "VBIFRESPTYPE",
+		[CP_SW_VBIFRESPREAD] = "VBIFRESPREAD",
+		[CP_SW_VBIFRESP] = "VBIFRESP",
+		[CP_SW_RTWROVF] = "RTWROVF",
+		[CP_SW_LRZRTWROVF] = "LRZRTWROVF",
+		[CP_SW_LRZRTREFCNTOVF] = "LRZRTREFCNTOVF",
+		[CP_SW_LRZRTCLRRESMISS] = "LRZRTCLRRESMISS",
+	};
+
+	switch (status) {
+	case BIT(GEN8_CP_GLOBAL_INT_SWFAULTBR):
+		pipe_id = PIPE_BR;
+		break;
+	case BIT(GEN8_CP_GLOBAL_INT_SWFAULTBV):
+		pipe_id = PIPE_BV;
+		break;
+	case BIT(GEN8_CP_GLOBAL_INT_SWFAULTLPAC):
+		pipe_id = PIPE_LPAC;
+		break;
+	case BIT(GEN8_CP_GLOBAL_INT_SWFAULTAQE0):
+		pipe_id = PIPE_AQE0;
+		break;
+	case BIT(GEN8_CP_GLOBAL_INT_SWFAULTAQE1):
+		pipe_id = PIPE_AQE1;
+		break;
+	case BIT(GEN8_CP_GLOBAL_INT_SWFAULTDDEBR):
+		pipe_id = PIPE_DDE_BR;
+		break;
+	case BIT(GEN8_CP_GLOBAL_INT_SWFAULTDDEBV):
+		pipe_id = PIPE_DDE_BV;
+		break;
+	}
+
+	gen8_regread_aperture(device, GEN8_CP_INTERRUPT_STATUS_PIPE, &sw_status,
+			      pipe_id, 0, 0);
+
+	dev_crit_ratelimited(device->dev, "CP SW Fault pipe_id: %u %s\n", pipe_id,
+			sw_status < ARRAY_SIZE(table) ? table[sw_status] : "UNKNOWN");
+
+	if (sw_status & BIT(CP_SW_OPCODEERROR)) {
+		gen8_regwrite_aperture(device, GEN8_CP_SQE_STAT_ADDR_PIPE, 1,
+				pipe_id, 0, 0);
+		gen8_regread_aperture(device, GEN8_CP_SQE_STAT_DATA_PIPE, &opcode,
+				pipe_id, 0, 0);
+		dev_crit_ratelimited(device->dev,
+			"CP opcode error interrupt | opcode=0x%8.8x\n", opcode);
+	}
+
+	if (sw_status & BIT(CP_SW_REGISTERPROTECTIONERROR)) {
+		gen8_regread_aperture(device, GEN8_CP_PROTECT_STATUS_PIPE, &status1,
+			pipe_id, 0, 0);
+		dev_crit_ratelimited(device->dev,
+			"CP | Protected mode error | %s | addr=%lx | status=%x\n",
+			FIELD_GET(GENMASK(20, 20), status1) ? "READ" : "WRITE",
+			FIELD_GET(GENMASK(17, 0), status1), status1);
+	}
+
+	/* Clear aperture register */
+	gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+}
+
+static void gen8_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 global_status;
+	u32 hw_fault, sw_fault;
+
+	kgsl_regread(device, GEN8_CP_INTERRUPT_STATUS_GLOBAL, &global_status);
+
+	dev_crit_ratelimited(device->dev, "CP fault int_status_global=0x%x\n", global_status);
+
+	hw_fault = FIELD_GET(GENMASK(6, 0), global_status);
+	sw_fault = FIELD_GET(GENMASK(22, 16), global_status);
+
+	if (hw_fault)
+		gen8_get_cp_hwfault_status(adreno_dev, hw_fault);
+	else if (sw_fault)
+		gen8_get_cp_swfault_status(adreno_dev, sw_fault);
+}
+
+static void gen8_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	switch (bit) {
+	case GEN8_INT_AHBERROR:
+		{
+		u32 err_details_0, err_details_1;
+
+		kgsl_regread(device, GEN8_CP_RL_ERROR_DETAILS_0, &err_details_0);
+		kgsl_regread(device, GEN8_CP_RL_ERROR_DETAILS_1, &err_details_1);
+		dev_crit_ratelimited(device->dev,
+			"CP: AHB bus error, CP_RL_ERROR_DETAILS_0:0x%x CP_RL_ERROR_DETAILS_1:0x%x\n",
+			err_details_0, err_details_1);
+		break;
+		}
+	case GEN8_INT_ATBASYNCFIFOOVERFLOW:
+		dev_crit_ratelimited(device->dev, "RBBM: ATB ASYNC overflow\n");
+		break;
+	case GEN8_INT_ATBBUSOVERFLOW:
+		dev_crit_ratelimited(device->dev, "RBBM: ATB bus overflow\n");
+		break;
+	case GEN8_INT_OUTOFBOUNDACCESS:
+		dev_crit_ratelimited(device->dev, "UCHE: Out of bounds access\n");
+		break;
+	case GEN8_INT_UCHETRAPINTERRUPT:
+		dev_crit_ratelimited(device->dev, "UCHE: Trap interrupt\n");
+		break;
+	case GEN8_INT_TSBWRITEERROR:
+		{
+		u32 lo, hi;
+
+		kgsl_regread(device, GEN8_RBBM_SECVID_TSB_STATUS_LO, &lo);
+		kgsl_regread(device, GEN8_RBBM_SECVID_TSB_STATUS_HI, &hi);
+
+		dev_crit_ratelimited(device->dev, "TSB: Write error interrupt: Address: 0x%lx MID: %lu\n",
+			FIELD_GET(GENMASK(16, 0), hi) << 32 | lo,
+			FIELD_GET(GENMASK(31, 23), hi));
+		break;
+		}
+	default:
+		dev_crit_ratelimited(device->dev, "Unknown interrupt %d\n", bit);
+	}
+}
+
+static const char *const uche_client[] = {
+	"BR_VFD", "BR_SP", "BR_VSC", "BR_VPC",
+	"BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
+	"BV_VFD", "BV_SP", "BV_VSC", "BV_VPC",
+	"BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
+	"STCHE",
+};
+
+static const char *const uche_lpac_client[] = {
+	"-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC"
+};
+
+#define SCOOBYDOO 0x5c00bd00
+
+static const char *gen8_fault_block_uche(struct kgsl_device *device,
+		char *str, int size, bool lpac)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u32 uche_client_id = adreno_dev->uche_client_pf;
+	const char *uche_client_str, *fault_block;
+
+	/*
+	 * Smmu driver takes a vote on CX gdsc before calling the kgsl
+	 * pagefault handler. If there is contention for device mutex in this
+	 * path and the dispatcher fault handler is holding this lock, trying
+	 * to turn off CX gdsc will fail during the reset. So to avoid blocking
+	 * here, try to lock device mutex and return if it fails.
+	 */
+	if (!mutex_trylock(&device->mutex))
+		goto regread_fail;
+
+	if (!kgsl_state_is_awake(device)) {
+		mutex_unlock(&device->mutex);
+		goto regread_fail;
+	}
+
+	kgsl_regread(device, GEN8_UCHE_CLIENT_PF, &uche_client_id);
+	mutex_unlock(&device->mutex);
+
+	/* Ignore the value if the gpu is in IFPC */
+	if (uche_client_id == SCOOBYDOO) {
+		uche_client_id = adreno_dev->uche_client_pf;
+		goto regread_fail;
+	}
+
+	/* UCHE client id mask is bits [6:0] */
+	uche_client_id &= GENMASK(6, 0);
+
+regread_fail:
+	if (lpac) {
+		fault_block = "UCHE_LPAC";
+		if (uche_client_id >= ARRAY_SIZE(uche_lpac_client))
+			goto fail;
+		uche_client_str = uche_lpac_client[uche_client_id];
+	} else {
+		fault_block = "UCHE";
+		if (uche_client_id >= ARRAY_SIZE(uche_client))
+			goto fail;
+		uche_client_str = uche_client[uche_client_id];
+	}
+
+	snprintf(str, size, "%s: %s", fault_block, uche_client_str);
+	return str;
+
+fail:
+	snprintf(str, size, "%s: Unknown (client_id: %u)",
+			fault_block, uche_client_id);
+	return str;
+}
+
+static const char *gen8_iommu_fault_block(struct kgsl_device *device,
+		u32 fsynr1)
+{
+	u32 mid = fsynr1 & 0xff;
+	static char str[36];
+
+	switch (mid) {
+	case 0x0:
+		return "CP";
+	case 0x1:
+		return "UCHE: Unknown";
+	case 0x2:
+		return "UCHE_LPAC: Unknown";
+	case 0x3:
+		return gen8_fault_block_uche(device, str, sizeof(str), false);
+	case 0x4:
+		return "CCU";
+	case 0x5:
+		return "Flag cache";
+	case 0x6:
+		return "PREFETCH";
+	case 0x7:
+		return "GMU";
+	case 0x8:
+		return gen8_fault_block_uche(device, str, sizeof(str), true);
+	case 0x9:
+		return "UCHE_HPAC";
+	}
+
+	snprintf(str, sizeof(str), "Unknown (mid: %u)", mid);
+	return str;
+}
+
+static void gen8_cp_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (adreno_is_preemption_enabled(adreno_dev))
+		gen8_preemption_trigger(adreno_dev, true);
+
+	adreno_dispatcher_schedule(device);
+}
+
+/*
+ * gen8_gpc_err_int_callback() - Isr for GPC error interrupts
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+static void gen8_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/*
+	 * GPC error is typically the result of mistake SW programming.
+	 * Force GPU fault for this interrupt so that we can debug it
+	 * with help of register dump.
+	 */
+
+	dev_crit(device->dev, "RBBM: GPC error\n");
+	adreno_irqctrl(adreno_dev, 0);
+
+	/* Trigger a fault in the dispatcher - this will effect a restart */
+	adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT);
+}
+
+/*
+ * gen8_swfuse_violation_callback() - ISR for software fuse violation interrupt
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+static void gen8_swfuse_violation_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status;
+
+	/*
+	 * SWFUSEVIOLATION error is typically the result of enabling software
+	 * feature which is not supported by the hardware. Following are the
+	 * Feature violation will be reported
+	 * 1) FASTBLEND (BIT:0): NO Fault, RB will send the workload to legacy
+	 * blender HW pipeline.
+	 * 2) LPAC (BIT:1): Fault
+	 * 3) RAYTRACING (BIT:2): Fault
+	 */
+	kgsl_regread(device, GEN8_RBBM_SW_FUSE_INT_STATUS, &status);
+
+	/*
+	 * RBBM_INT_CLEAR_CMD will not clear SWFUSEVIOLATION interrupt. Hence
+	 * do explicit swfuse irq clear.
+	 */
+	kgsl_regwrite(device, GEN8_RBBM_SW_FUSE_INT_MASK, 0);
+
+	dev_crit_ratelimited(device->dev,
+		"RBBM: SW Feature Fuse violation status=0x%8.8x\n", status);
+
+	/* Trigger a fault in the dispatcher for LPAC and RAYTRACING violation */
+	if (status & GENMASK(GEN8_RAYTRACING_SW_FUSE, GEN8_LPAC_SW_FUSE)) {
+		adreno_irqctrl(adreno_dev, 0);
+		adreno_dispatcher_fault(adreno_dev, ADRENO_HARD_FAULT);
+	}
+}
+
+static const struct adreno_irq_funcs gen8_irq_funcs[32] = {
+	ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
+	ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 1 - RBBM_AHB_ERROR */
+	ADRENO_IRQ_CALLBACK(NULL), /* 2 - UNUSED */
+	ADRENO_IRQ_CALLBACK(NULL), /* 3 - UNUSED */
+	ADRENO_IRQ_CALLBACK(NULL), /* 4 - CPIPCINT0 */
+	ADRENO_IRQ_CALLBACK(NULL), /* 5 - CPIPCINT1 */
+	ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 6 - ATBASYNCOVERFLOW */
+	ADRENO_IRQ_CALLBACK(gen8_gpc_err_int_callback), /* 7 - GPC_ERR */
+	ADRENO_IRQ_CALLBACK(gen8_preemption_callback),/* 8 - CP_SW */
+	ADRENO_IRQ_CALLBACK(gen8_cp_hw_err_callback), /* 9 - CP_HW_ERROR */
+	ADRENO_IRQ_CALLBACK(NULL), /* 10 - CP_CCU_FLUSH_DEPTH_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 11 - CP_CCU_FLUSH_COLOR_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_CCU_RESOLVE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 13 - UNUSED */
+	ADRENO_IRQ_CALLBACK(NULL), /* 14 - UNUSED */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */
+	ADRENO_IRQ_CALLBACK(NULL), /* 16 - CP_RB_INT_LPAC*/
+	ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 18 - UNUSED */
+	ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNUSED */
+	ADRENO_IRQ_CALLBACK(gen8_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 21 - CP_CACHE_TS_LPAC */
+	ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */
+	ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), /* 23 - MISHANGDETECT */
+	ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 24 - UCHE_OOB_ACCESS */
+	ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 25 - UCHE_TRAP_INTR */
+	ADRENO_IRQ_CALLBACK(NULL), /* 26 - DEBBUS_INTR_0 */
+	ADRENO_IRQ_CALLBACK(NULL), /* 27 - DEBBUS_INTR_1 */
+	ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 28 - TSBWRITEERROR */
+	ADRENO_IRQ_CALLBACK(gen8_swfuse_violation_callback), /* 29 - SWFUSEVIOLATION */
+	ADRENO_IRQ_CALLBACK(NULL), /* 30 - ISDB_CPU_IRQ */
+	ADRENO_IRQ_CALLBACK(NULL), /* 31 - ISDB_UNDER_DEBUG */
+};
+
+/*
+ * If the AHB fence is not in ALLOW mode when we receive an RBBM
+ * interrupt, something went wrong. This means that we cannot proceed
+ * since the IRQ status and clear registers are not accessible.
+ * This is usually harmless because the GMU will abort power collapse
+ * and change the fence back to ALLOW. Poll so that this can happen.
+ */
+static int gen8_irq_poll_fence(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	u32 status, fence, fence_retries = 0;
+	u64 a, b, c;
+
+	a = gpudev->read_alwayson(adreno_dev);
+
+	kgsl_regread(device, GEN8_GMUAO_AHB_FENCE_CTRL, &fence);
+
+	while (fence != 0) {
+		b = gpudev->read_alwayson(adreno_dev);
+
+		/* Wait for small time before trying again */
+		udelay(1);
+		kgsl_regread(device, GEN8_GMUAO_AHB_FENCE_CTRL, &fence);
+
+		if (fence_retries == 100 && fence != 0) {
+			c = gpudev->read_alwayson(adreno_dev);
+
+			kgsl_regread(device, GEN8_GMUAO_RBBM_INT_UNMASKED_STATUS_SHADOW,
+				&status);
+
+			dev_crit_ratelimited(device->dev,
+				"status=0x%x Unmasked status=0x%x Mask=0x%x timestamps: %llx %llx %llx\n",
+					status & adreno_dev->irq_mask, status,
+					adreno_dev->irq_mask, a, b, c);
+				return -ETIMEDOUT;
+		}
+
+		fence_retries++;
+	}
+
+	return 0;
+}
+
+static irqreturn_t gen8_irq_handler(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	irqreturn_t ret = IRQ_NONE;
+	u32 status;
+
+	/*
+	 * GPU can power down once the INT_0_STATUS is read below.
+	 * But there still might be some register reads required so
+	 * force the GMU/GPU into KEEPALIVE mode until done with the ISR.
+	 */
+	gen8_gpu_keepalive(adreno_dev, true);
+
+	if (gen8_irq_poll_fence(adreno_dev)) {
+		adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT);
+		goto done;
+	}
+
+	kgsl_regread(device, GEN8_RBBM_INT_0_STATUS, &status);
+
+	kgsl_regwrite(device, GEN8_RBBM_INT_CLEAR_CMD, status);
+
+	ret = adreno_irq_callbacks(adreno_dev, gen8_irq_funcs, status);
+
+	trace_kgsl_gen8_irq_status(adreno_dev, status);
+
+done:
+	/* If hard fault, then let snapshot turn off the keepalive */
+	if (!(adreno_gpu_fault(adreno_dev) & ADRENO_HARD_FAULT))
+		gen8_gpu_keepalive(adreno_dev, false);
+
+	return ret;
+}
+
+static irqreturn_t gen8_cx_host_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u32 status;
+
+	adreno_cx_misc_regread(adreno_dev, GEN8_GPU_CX_MISC_INT_0_STATUS, &status);
+	adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_CLEAR_CMD, status);
+
+	if (status & BIT(GEN8_CX_MISC_GPU_CC_IRQ))
+		KGSL_PWRCTRL_LOG_FREQLIM(device);
+
+	if (status & ~GEN8_CX_MISC_INT_MASK)
+		dev_err_ratelimited(device->dev, "Unhandled CX MISC interrupts 0x%lx\n",
+			status & ~GEN8_CX_MISC_INT_MASK);
+
+	return IRQ_HANDLED;
+}
+
+int gen8_probe_common(struct platform_device *pdev,
+	struct adreno_device *adreno_dev, u32 chipid,
+	const struct adreno_gpu_core *gpucore)
+{
+	const struct adreno_gpudev *gpudev = gpucore->gpudev;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = container_of(gpucore,
+			struct adreno_gen8_core, base);
+	int ret;
+
+	adreno_dev->gpucore = gpucore;
+	adreno_dev->chipid = chipid;
+
+	adreno_reg_offset_init(gpudev->reg_offsets);
+
+	adreno_dev->hwcg_enabled = true;
+	adreno_dev->uche_client_pf = 1;
+
+	kgsl_pwrscale_fast_bus_hint(gen8_core->fast_bus_hint);
+	device->pwrctrl.cx_cfg_gdsc_offset = GEN8_GPU_CC_CX_CFG_GDSCR;
+
+	device->pwrctrl.rt_bus_hint = gen8_core->rt_bus_hint;
+
+	device->cx_host_irq_num = kgsl_request_irq_optional(pdev,
+		"cx_host_irq", gen8_cx_host_irq_handler, device);
+
+	ret = adreno_device_probe(pdev, adreno_dev);
+	if (ret)
+		return ret;
+
+	if (adreno_preemption_feature_set(adreno_dev)) {
+		adreno_dev->preempt.preempt_level = gen8_core->preempt_level;
+		adreno_dev->preempt.skipsaverestore = true;
+		adreno_dev->preempt.usesgmem = true;
+		set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+	}
+
+	/* debugfs node for ACD calibration */
+	debugfs_create_file("acd_calibrate", 0644, device->d_debugfs, device, &acd_cal_fops);
+
+	/* Dump additional AQE 16KB data on top of default 128KB(64(BR)+64(BV)) */
+	device->snapshot_ctxt_record_size = ADRENO_FEATURE(adreno_dev, ADRENO_AQE) ?
+			(GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES + SZ_16K) :
+			GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES;
+
+	return 0;
+}
+
+/* Register offset defines for Gen8, in order of enum adreno_regs */
+static u32 gen8_register_offsets[ADRENO_REG_REGISTER_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, GEN8_CP_RB_BASE_LO_GC),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, GEN8_CP_RB_BASE_HI_GC),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, GEN8_CP_RB_RPTR_BR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, GEN8_CP_RB_WPTR_GC),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, GEN8_CP_SQE_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, GEN8_CP_IB1_BASE_LO_PIPE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, GEN8_CP_IB1_BASE_HI_PIPE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, GEN8_CP_IB1_REM_SIZE_PIPE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, GEN8_CP_IB2_BASE_LO_PIPE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, GEN8_CP_IB2_BASE_HI_PIPE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, GEN8_CP_IB2_REM_SIZE_PIPE),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, GEN8_RBBM_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, GEN8_RBBM_INT_0_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, GEN8_RBBM_SW_RESET_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK,
+			GEN8_GMUAO_AO_HOST_INTERRUPT_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_GMU_GMU2HOST_INTR_MASK,
+			GEN8_GMUCX_GMU2HOST_INTR_MASK),
+};
+
+static u32 _get_pipeid(u32 groupid)
+{
+	switch (groupid) {
+	case KGSL_PERFCOUNTER_GROUP_BV_PC:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_BV_VFD:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_BV_VPC:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_BV_TSE:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_BV_RAS:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_BV_LRZ:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_BV_HLSQ:
+		return PIPE_BV;
+	case KGSL_PERFCOUNTER_GROUP_PC:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_VFD:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_HLSQ:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_VPC:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_CCU:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_CMP:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_TSE:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_RAS:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_LRZ:
+		fallthrough;
+	case KGSL_PERFCOUNTER_GROUP_RB:
+		return PIPE_BR;
+	default:
+		return PIPE_NONE;
+	}
+}
+
+int gen8_perfcounter_remove(struct adreno_device *adreno_dev,
+			    struct adreno_perfcount_register *reg, u32 groupid)
+{
+	const struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev);
+	const struct adreno_perfcount_group *group;
+	void *ptr = adreno_dev->pwrup_reglist->hostptr;
+	struct cpu_gpu_lock *lock = ptr;
+	u32 offset = ((lock->ifpc_list_len + lock->preemption_list_len) * 2) +
+			(gen8_dev->ext_pwrup_list_len * 3);
+	int i, last_offset, num_removed, start_offset = -1;
+	u32 *data = ptr + sizeof(*lock), pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid));
+	u16 perfcntr_list_len = lock->dynamic_list_len - gen8_dev->ext_pwrup_list_len;
+
+	if (!perfcntr_list_len)
+		return -EINVAL;
+
+	group = &(counters->groups[groupid]);
+
+	if (!(group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE)) {
+		if (perfcntr_list_len != 2)
+			return 0;
+
+		if (kgsl_hwlock(lock)) {
+			kgsl_hwunlock(lock);
+			return -EBUSY;
+		}
+		goto disable_perfcounter;
+	}
+
+	last_offset = offset + (perfcntr_list_len * 3);
+
+	/* Look for the perfcounter to remove in the list */
+	for (i = 0; i < perfcntr_list_len - 2; i++) {
+		if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) {
+			start_offset = offset;
+			break;
+		}
+		offset += 3;
+	}
+
+	if (start_offset == -1)
+		return -ENOENT;
+
+	for (i = 0; i < PERFCOUNTER_REG_DEPENDENCY_LEN && reg->reg_dependency[i]; i++)
+		offset += 3;
+
+	if (kgsl_hwlock(lock)) {
+		kgsl_hwunlock(lock);
+		return -EBUSY;
+	}
+
+	/* Let offset point to the first entry that is going to be retained */
+	offset += 3;
+
+	memcpy(&data[start_offset], &data[offset], (last_offset - offset) * sizeof(u32));
+
+	memset(&data[start_offset + (last_offset - offset)], 0,
+			(offset - start_offset) * sizeof(u32));
+
+	num_removed = offset - start_offset;
+	do_div(num_removed, 3);
+	lock->dynamic_list_len -= num_removed;
+
+disable_perfcounter:
+	/*
+	 * If dynamic list length is 2 and no_restore_count is 0, then we can remove
+	 * the perfcounter controls from the list.
+	 */
+	if (perfcntr_list_len == 2 && !adreno_dev->no_restore_count) {
+		memset(&data[offset], 0, 6 * sizeof(u32));
+		lock->dynamic_list_len = gen8_dev->ext_pwrup_list_len;
+	}
+
+	kgsl_hwunlock(lock);
+	return 0;
+}
+
+int gen8_perfcounter_update(struct adreno_device *adreno_dev,
+	struct adreno_perfcount_register *reg, bool update_reg, u32 pipe, unsigned long flags)
+{
+	struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev);
+	void *ptr = adreno_dev->pwrup_reglist->hostptr;
+	struct cpu_gpu_lock *lock = ptr;
+	u32 offset = ((lock->ifpc_list_len + lock->preemption_list_len) * 2) +
+			(gen8_dev->ext_pwrup_list_len * 3);
+	u32 *data = ptr + sizeof(*lock);
+	int i, start_offset = -1;
+	u16 perfcntr_list_len = lock->dynamic_list_len - gen8_dev->ext_pwrup_list_len;
+
+	if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) {
+		for (i = 0; i < perfcntr_list_len - 2; i++) {
+			if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) {
+				start_offset = offset;
+				break;
+			}
+
+			offset += 3;
+		}
+	} else if (perfcntr_list_len) {
+		goto update;
+	}
+
+	if (kgsl_hwlock(lock)) {
+		kgsl_hwunlock(lock);
+		return -EBUSY;
+	}
+
+	/*
+	 * If the perfcounter select register is already present in reglist
+	 * update it, otherwise append the <aperture, select register, value>
+	 * triplet to the end of the list.
+	 */
+	if (start_offset != -1) {
+		data[offset + 2] = reg->countable;
+		for (i = 0; i < PERFCOUNTER_REG_DEPENDENCY_LEN && reg->reg_dependency[i]; i++) {
+			offset += 3;
+			data[offset + 2] = reg->countable;
+		}
+		kgsl_hwunlock(lock);
+		goto update;
+	}
+
+	/* Initialize the lock->dynamic_list_len to account for perfcounter controls */
+	if (!perfcntr_list_len)
+		lock->dynamic_list_len = gen8_dev->ext_pwrup_list_len + 2;
+
+	/*
+	 * For all targets GEN8_SLICE_RBBM_PERFCTR_CNTL needs to be the last entry,
+	 * so overwrite the existing GEN8_SLICE_RBBM_PERFCNTL_CNTL and add it back to
+	 * the end.
+	 */
+	if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) {
+		data[offset++] = pipe;
+		data[offset++] = reg->select;
+		data[offset++] = reg->countable;
+		lock->dynamic_list_len++;
+
+		for (i = 0; i < PERFCOUNTER_REG_DEPENDENCY_LEN && reg->reg_dependency[i]; i++) {
+			data[offset++] = pipe;
+			data[offset++] = reg->reg_dependency[i];
+			data[offset++] = reg->countable;
+			lock->dynamic_list_len++;
+		}
+	}
+
+	data[offset++] = FIELD_PREP(GENMASK(15, 12), PIPE_NONE);
+	data[offset++] = GEN8_RBBM_PERFCTR_CNTL;
+	data[offset++] = 1;
+
+	data[offset++] = FIELD_PREP(GENMASK(15, 12), PIPE_NONE);
+	data[offset++] = GEN8_RBBM_SLICE_PERFCTR_CNTL;
+	data[offset++] = 1;
+
+	kgsl_hwunlock(lock);
+
+update:
+	if (update_reg) {
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		kgsl_regwrite(device, reg->select, reg->countable);
+
+		for (i = 0; i < PERFCOUNTER_REG_DEPENDENCY_LEN && reg->reg_dependency[i]; i++)
+			kgsl_regwrite(device, reg->reg_dependency[i], reg->countable);
+	}
+
+	return 0;
+}
+
+static u64 gen8_read_alwayson(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 lo = 0, hi = 0, tmp = 0;
+
+	/* Always use the GMU AO counter when doing a AHB read */
+	gmu_core_regread(device, GEN8_GMUCX_AO_COUNTER_HI, &hi);
+	gmu_core_regread(device, GEN8_GMUCX_AO_COUNTER_LO, &lo);
+
+	/* Check for overflow */
+	gmu_core_regread(device, GEN8_GMUCX_AO_COUNTER_HI, &tmp);
+
+	if (hi != tmp) {
+		gmu_core_regread(device, GEN8_GMUCX_AO_COUNTER_LO,
+				&lo);
+		hi = tmp;
+	}
+
+	return (((u64) hi) << 32) | lo;
+}
+
+static int gen8_lpac_store(struct adreno_device *adreno_dev, bool enable)
+{
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LPAC))
+		return -EINVAL;
+
+	if (!(adreno_dev->feature_fuse & BIT(GEN8_LPAC_SW_FUSE)) ||
+		(adreno_dev->lpac_enabled == enable))
+		return 0;
+
+	/* Power down the GPU before changing the lpac setting */
+	return adreno_power_cycle_bool(adreno_dev, &adreno_dev->lpac_enabled, enable);
+}
+
+static void gen8_remove(struct adreno_device *adreno_dev)
+{
+	if (adreno_preemption_feature_set(adreno_dev))
+		del_timer(&adreno_dev->preempt.timer);
+}
+
+static void gen8_read_bus_stats(struct kgsl_device *device,
+		struct kgsl_power_stats *stats,
+		struct adreno_busy_data *busy)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u64 ram_cycles, starved_ram;
+
+	ram_cycles = counter_delta(device, adreno_dev->ram_cycles_lo,
+		&busy->bif_ram_cycles);
+
+	starved_ram = counter_delta(device, adreno_dev->starved_ram_lo,
+		&busy->bif_starved_ram);
+
+	ram_cycles += counter_delta(device,
+		adreno_dev->ram_cycles_lo_ch1_read,
+		&busy->bif_ram_cycles_read_ch1);
+
+	ram_cycles += counter_delta(device,
+		adreno_dev->ram_cycles_lo_ch0_write,
+		&busy->bif_ram_cycles_write_ch0);
+
+	ram_cycles += counter_delta(device,
+		adreno_dev->ram_cycles_lo_ch1_write,
+		&busy->bif_ram_cycles_write_ch1);
+
+	starved_ram += counter_delta(device,
+		adreno_dev->starved_ram_lo_ch1,
+		&busy->bif_starved_ram_ch1);
+
+	stats->ram_time = ram_cycles;
+	stats->ram_wait = starved_ram;
+}
+
+static void gen8_power_stats(struct adreno_device *adreno_dev,
+		struct kgsl_power_stats *stats)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_busy_data *busy = &adreno_dev->busy_data;
+	u64 gpu_busy;
+
+	/* Set the GPU busy counter for frequency scaling */
+	gpu_busy = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_0,
+		&busy->gpu_busy);
+
+	stats->busy_time = gpu_busy * 10;
+	do_div(stats->busy_time, 192);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) {
+		u32 ifpc = counter_delta(device,
+			GEN8_GMUCX_POWER_COUNTER_XOCLK_L_4,
+			&busy->num_ifpc);
+
+		adreno_dev->ifpc_count += ifpc;
+		if (ifpc > 0)
+			trace_adreno_ifpc_count(adreno_dev->ifpc_count);
+	}
+
+	if (device->pwrctrl.bus_control)
+		gen8_read_bus_stats(device, stats, busy);
+
+	if (adreno_dev->bcl_enabled) {
+		u32 a, b, c, bcl_throttle;
+
+		a = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_1,
+			&busy->throttle_cycles[0]);
+
+		b = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_2,
+			&busy->throttle_cycles[1]);
+
+		c = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_3,
+			&busy->throttle_cycles[2]);
+
+		if (a || b || c)
+			trace_kgsl_bcl_clock_throttling(a, b, c);
+
+		bcl_throttle = counter_delta(device,
+					GEN8_GMUCX_POWER_COUNTER_XOCLK_L_5, &busy->bcl_throttle);
+		/*
+		 * This counts number of cycles throttled in XO cycles. Convert it to
+		 * micro seconds by dividing by XO freq which is 19.2MHz.
+		 */
+		adreno_dev->bcl_throttle_time_us += ((bcl_throttle * 10) / 192);
+	}
+}
+
+static int gen8_setproperty(struct kgsl_device_private *dev_priv,
+		u32 type, void __user *value, u32 sizebytes)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u32 enable;
+
+	if (type != KGSL_PROP_PWRCTRL)
+		return -ENODEV;
+
+	if (sizebytes != sizeof(enable))
+		return -EINVAL;
+
+	if (copy_from_user(&enable, value, sizeof(enable)))
+		return -EFAULT;
+
+	mutex_lock(&device->mutex);
+
+	if (enable) {
+		clear_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags);
+
+		kgsl_pwrscale_enable(device);
+	} else {
+		set_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags);
+
+		if (!adreno_active_count_get(adreno_dev))
+			adreno_active_count_put(adreno_dev);
+
+		kgsl_pwrscale_disable(device, true);
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static void gen8_set_isdb_breakpoint_registers(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct clk *clk;
+	int ret;
+
+	if (!device->set_isdb_breakpoint || device->ftbl->is_hwcg_on(device)
+			|| device->qdss_gfx_virt == NULL || !device->force_panic)
+		return;
+
+	clk = clk_get(&device->pdev->dev, "apb_pclk");
+
+	if (IS_ERR(clk)) {
+		dev_err(device->dev, "Unable to get QDSS clock\n");
+		goto err;
+	}
+
+	ret = clk_prepare_enable(clk);
+
+	if (ret) {
+		dev_err(device->dev, "QDSS Clock enable error: %d\n", ret);
+		clk_put(clk);
+		goto err;
+	}
+
+	/* Issue break command for SPs */
+	isdb_write(device->qdss_gfx_virt, 0x0000);
+	isdb_write(device->qdss_gfx_virt, 0x1000);
+	isdb_write(device->qdss_gfx_virt, 0x2000);
+	isdb_write(device->qdss_gfx_virt, 0x3000);
+	isdb_write(device->qdss_gfx_virt, 0x4000);
+	isdb_write(device->qdss_gfx_virt, 0x5000);
+	isdb_write(device->qdss_gfx_virt, 0x6000);
+	isdb_write(device->qdss_gfx_virt, 0x7000);
+	isdb_write(device->qdss_gfx_virt, 0x8000);
+	isdb_write(device->qdss_gfx_virt, 0x9000);
+	isdb_write(device->qdss_gfx_virt, 0xa000);
+	isdb_write(device->qdss_gfx_virt, 0xb000);
+
+	clk_disable_unprepare(clk);
+	clk_put(clk);
+
+	return;
+
+err:
+	/* Do not force kernel panic if isdb writes did not go through */
+	device->force_panic = false;
+}
+
+static void gen8_swfuse_irqctrl(struct adreno_device *adreno_dev, bool state)
+{
+		kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_RBBM_SW_FUSE_INT_MASK,
+			state ? GEN8_SW_FUSE_INT_MASK : 0);
+}
+
+static void gen8_lpac_fault_header(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_context *drawctxt;
+	u32 status = 0, rptr = 0, wptr = 0, ib1sz = 0, ib2sz = 0, ib3sz = 0;
+	u64 ib1base = 0, ib2base = 0, ib3base = 0;
+	bool gx_on = adreno_gx_is_on(adreno_dev);
+
+	drawctxt = ADRENO_CONTEXT(drawobj->context);
+	drawobj->context->last_faulted_cmd_ts = drawobj->timestamp;
+	drawobj->context->total_fault_count++;
+
+	pr_context(device, drawobj->context,
+		   "LPAC ctx %u ctx_type %s ts %u policy %lX dispatch_queue=%d\n",
+		   drawobj->context->id, kgsl_context_type(drawctxt->type),
+		   drawobj->timestamp, CMDOBJ(drawobj)->fault_recovery,
+		   drawobj->context->gmu_dispatch_queue);
+
+	pr_context(device, drawobj->context, "lpac cmdline: %s\n",
+		   drawctxt->base.proc_priv->cmdline);
+
+	if (!gen8_gmu_rpmh_pwr_state_is_active(device) || !gx_on)
+		goto done;
+
+	kgsl_regread(device, GEN8_RBBM_LPAC_STATUS, &status);
+	kgsl_regread(device, GEN8_CP_RB_RPTR_LPAC, &rptr);
+	kgsl_regread(device, GEN8_CP_RB_WPTR_LPAC, &wptr);
+	gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE,
+			GEN8_CP_IB1_BASE_HI_PIPE, &ib1base, PIPE_LPAC, 0, 0);
+	gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, &ib1sz, PIPE_LPAC, 0, 0);
+	gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE,
+			GEN8_CP_IB2_BASE_HI_PIPE, &ib2base, PIPE_LPAC, 0, 0);
+	gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, &ib2sz, PIPE_LPAC, 0, 0);
+	gen8_regread64_aperture(device, GEN8_CP_IB3_BASE_LO_PIPE,
+			GEN8_CP_IB3_BASE_HI_PIPE, &ib3base, PIPE_LPAC, 0, 0);
+	gen8_regread_aperture(device, GEN8_CP_IB3_REM_SIZE_PIPE, &ib3sz, PIPE_LPAC, 0, 0);
+	gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+
+	pr_context(device, drawobj->context,
+		   "LPAC: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
+		   status, rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, ib3base, ib3sz);
+
+done:
+	trace_adreno_gpu_fault(drawobj->context->id, drawobj->timestamp, status,
+		rptr, wptr, ib1base, ib1sz, ib2base, ib2sz,
+		adreno_get_level(drawobj->context));
+
+}
+
+static void gen8_fault_header(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_context *drawctxt;
+	u32 status = 0, rptr = 0, wptr = 0, ib1sz = 0, ib2sz = 0, ib3sz, rptr_bv = 0;
+	u32 ib1sz_bv = 0, ib2sz_bv = 0, ib3sz_bv, gfx_status, gfx_br_status, gfx_bv_status;
+	u64 ib1base = 0, ib2base = 0, ib3base, ib1base_bv = 0, ib2base_bv, ib3base_bv;
+	u32 ctxt_id = 0, ts = 0;
+	int rb_id = -1;
+	bool gx_on = adreno_gx_is_on(adreno_dev);
+
+	if (drawobj) {
+		drawctxt = ADRENO_CONTEXT(drawobj->context);
+		drawobj->context->last_faulted_cmd_ts = drawobj->timestamp;
+		drawobj->context->total_fault_count++;
+		ctxt_id = drawobj->context->id;
+		ts = drawobj->timestamp;
+		rb_id = adreno_get_level(drawobj->context);
+
+		pr_context(device, drawobj->context, "ctx %u ctx_type %s ts %u policy %lX\n",
+			   drawobj->context->id, kgsl_context_type(drawctxt->type),
+			   drawobj->timestamp, CMDOBJ(drawobj)->fault_recovery);
+
+		pr_context(device, drawobj->context, "cmdline: %s\n",
+			   drawctxt->base.proc_priv->cmdline);
+	}
+
+	if (!gen8_gmu_rpmh_pwr_state_is_active(device) || !gx_on)
+		goto done;
+
+	kgsl_regread(device, GEN8_RBBM_STATUS, &status);
+	kgsl_regread(device, GEN8_RBBM_GFX_STATUS, &gfx_status);
+	kgsl_regread(device, GEN8_RBBM_GFX_BV_STATUS, &gfx_bv_status);
+	kgsl_regread(device, GEN8_RBBM_GFX_BR_STATUS, &gfx_br_status);
+	kgsl_regread(device, GEN8_CP_RB_RPTR_BR, &rptr);
+	kgsl_regread(device, GEN8_CP_RB_WPTR_GC, &wptr);
+	kgsl_regread(device, GEN8_CP_RB_RPTR_BV, &rptr_bv);
+	gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE,
+			GEN8_CP_IB1_BASE_HI_PIPE, &ib1base, PIPE_BR, 0, 0);
+	gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, &ib1sz, PIPE_BR, 0, 0);
+	gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE,
+			GEN8_CP_IB2_BASE_HI_PIPE, &ib2base, PIPE_BR, 0, 0);
+	gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, &ib2sz, PIPE_BR, 0, 0);
+	gen8_regread64_aperture(device, GEN8_CP_IB3_BASE_LO_PIPE,
+			GEN8_CP_IB3_BASE_HI_PIPE, &ib3base, PIPE_BR, 0, 0);
+	gen8_regread_aperture(device, GEN8_CP_IB3_REM_SIZE_PIPE, &ib3sz, PIPE_BR, 0, 0);
+	gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE,
+			GEN8_CP_IB1_BASE_HI_PIPE, &ib1base_bv, PIPE_BV, 0, 0);
+	gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, &ib1sz_bv, PIPE_BV, 0, 0);
+	gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE,
+			GEN8_CP_IB2_BASE_HI_PIPE, &ib2base_bv, PIPE_BV, 0, 0);
+	gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, &ib2sz_bv, PIPE_BV, 0, 0);
+	gen8_regread64_aperture(device, GEN8_CP_IB3_BASE_LO_PIPE,
+			GEN8_CP_IB3_BASE_HI_PIPE, &ib3base_bv, PIPE_BV, 0, 0);
+	gen8_regread_aperture(device, GEN8_CP_IB3_REM_SIZE_PIPE, &ib3sz_bv, PIPE_BV, 0, 0);
+	gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+
+	dev_err(device->dev,
+		"status %8.8X gfx_status %8.8X gfx_br_status %8.8X gfx_bv_status %8.8X\n",
+		status, gfx_status, gfx_br_status, gfx_bv_status);
+
+	dev_err(device->dev,
+		"BR: rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
+		rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, ib3base, ib3sz);
+
+	dev_err(device->dev,
+		"BV: rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
+		rptr_bv, wptr, ib1base_bv, ib1sz_bv, ib2base_bv, ib2sz_bv, ib3base_bv, ib3sz_bv);
+
+done:
+	trace_adreno_gpu_fault(ctxt_id, ts, status,
+		rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, rb_id);
+}
+
+const struct gen8_gpudev adreno_gen8_hwsched_gpudev = {
+	.base = {
+		.reg_offsets = gen8_register_offsets,
+		.probe = gen8_hwsched_probe,
+		.snapshot = gen8_hwsched_snapshot,
+		.irq_handler = gen8_irq_handler,
+		.iommu_fault_block = gen8_iommu_fault_block,
+		.preemption_context_init = gen8_preemption_context_init,
+		.context_detach = gen8_hwsched_context_detach,
+		.read_alwayson = gen8_read_alwayson,
+		.reset = gen8_hwsched_reset_replay,
+		.power_ops = &gen8_hwsched_power_ops,
+		.power_stats = gen8_power_stats,
+		.setproperty = gen8_setproperty,
+		.hw_isidle = gen8_hw_isidle,
+		.add_to_va_minidump = gen8_hwsched_add_to_minidump,
+		.gx_is_on = gen8_gmu_gx_is_on,
+		.send_recurring_cmdobj = gen8_hwsched_send_recurring_cmdobj,
+		.perfcounter_remove = gen8_perfcounter_remove,
+		.set_isdb_breakpoint_registers = gen8_set_isdb_breakpoint_registers,
+		.context_destroy = gen8_hwsched_context_destroy,
+		.lpac_store = gen8_lpac_store,
+		.get_uche_trap_base = gen8_get_uche_trap_base,
+		.fault_header = gen8_fault_header,
+		.lpac_fault_header = gen8_lpac_fault_header,
+	},
+	.hfi_probe = gen8_hwsched_hfi_probe,
+	.hfi_remove = gen8_hwsched_hfi_remove,
+	.handle_watchdog = gen8_hwsched_handle_watchdog,
+};
+
+const struct gen8_gpudev adreno_gen8_gmu_gpudev = {
+	.base = {
+		.reg_offsets = gen8_register_offsets,
+		.probe = gen8_gmu_device_probe,
+		.snapshot = gen8_gmu_snapshot,
+		.irq_handler = gen8_irq_handler,
+		.rb_start = gen8_rb_start,
+		.gpu_keepalive = gen8_gpu_keepalive,
+		.hw_isidle = gen8_hw_isidle,
+		.iommu_fault_block = gen8_iommu_fault_block,
+		.reset = gen8_gmu_reset,
+		.preemption_schedule = gen8_preemption_schedule,
+		.preemption_context_init = gen8_preemption_context_init,
+		.read_alwayson = gen8_read_alwayson,
+		.power_ops = &gen8_gmu_power_ops,
+		.remove = gen8_remove,
+		.ringbuffer_submitcmd = gen8_ringbuffer_submitcmd,
+		.power_stats = gen8_power_stats,
+		.setproperty = gen8_setproperty,
+		.add_to_va_minidump = gen8_gmu_add_to_minidump,
+		.gx_is_on = gen8_gmu_gx_is_on,
+		.perfcounter_remove = gen8_perfcounter_remove,
+		.set_isdb_breakpoint_registers = gen8_set_isdb_breakpoint_registers,
+		.swfuse_irqctrl = gen8_swfuse_irqctrl,
+		.get_uche_trap_base = gen8_get_uche_trap_base,
+		.fault_header = gen8_fault_header,
+	},
+	.hfi_probe = gen8_gmu_hfi_probe,
+	.handle_watchdog = gen8_gmu_handle_watchdog,
+};

+ 615 - 0
qcom/opensource/graphics-kernel/adreno_gen8.h

@@ -0,0 +1,615 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_GEN8_H_
+#define _ADRENO_GEN8_H_
+
+#include <linux/delay.h>
+
+#include "adreno_gen8_gmu.h"
+#include "gen8_reg.h"
+
+/* Forward struct declaration */
+struct gen8_snapshot_block_list;
+
+extern const struct adreno_power_ops gen8_gmu_power_ops;
+extern const struct adreno_power_ops gen8_hwsched_power_ops;
+extern const struct adreno_perfcounters adreno_gen8_perfcounters;
+
+struct gen8_gpudev {
+	struct adreno_gpudev base;
+	int (*hfi_probe)(struct adreno_device *adreno_dev);
+	void (*hfi_remove)(struct adreno_device *adreno_dev);
+	void (*handle_watchdog)(struct adreno_device *adreno_dev);
+};
+
+extern const struct gen8_gpudev adreno_gen8_gmu_gpudev;
+extern const struct gen8_gpudev adreno_gen8_hwsched_gpudev;
+
+struct gen8_nonctxt_overrides {
+	/** offset: Dword offset of the register to write */
+	u32 offset;
+	/** pipelines: Pipelines to write */
+	u32 pipelines;
+	/** val: Value to be written to the register */
+	u32 val;
+	/** set: True for user override request */
+	bool set;
+	/**
+	 * list_type: 0 If the register already present in any of exisiting static pwrup list
+			1 if the register fits into IFPC static pwrup only list
+			2 if the register fits into IFPC + preemption static list
+			3 if the register fits into external powerup list
+	 */
+	u32 list_type;
+};
+
+/**
+ * struct gen8_device - Container for the gen8_device
+ */
+struct gen8_device {
+	/** @gmu: Container for the gen8 GMU device */
+	struct gen8_gmu_device gmu;
+	/** @adreno_dev: Container for the generic adreno device */
+	struct adreno_device adreno_dev;
+	/** @aperture: The last value that the host aperture register was programmed to */
+	u32 aperture;
+	/** @ext_pwrup_list_len: External pwrup reglist length */
+	u16 ext_pwrup_list_len;
+	/**
+	 * @nc_overrides: Noncontext registers overrides whitelist if defined,
+	 * must be null terminated
+	 */
+	struct gen8_nonctxt_overrides *nc_overrides;
+	/** @nc_mutex: Mutex to protect nc_overrides updates */
+	struct mutex nc_mutex;
+	/** @nc_overrides_enabled: Set through debugfs path when any override is enabled */
+	bool nc_overrides_enabled;
+};
+
+/**
+ * struct gen8_pwrup_extlist - container for a powerup external reglist
+ */
+struct gen8_pwrup_extlist {
+	/** offset: Dword offset of the register to write */
+	u32 offset;
+	/** pipelines: pipelines to write */
+	u32 pipelines;
+};
+
+/**
+ * struct gen8_protected_regs - container for a protect register span
+ */
+struct gen8_protected_regs {
+	/** @reg: Physical protected mode register to write to */
+	u32 reg;
+	/** @start: Dword offset of the starting register in the range */
+	u32 start;
+	/** @end: Dword offset of the ending register in the range (inclusive) */
+	u32 end;
+	/**
+	 * @noaccess: 1 if the register should not be accessible from
+	 * userspace, 0 if it can be read (but not written)
+	 */
+	u32 noaccess;
+};
+
+/**
+ * struct gen8_nonctxt_regs - Container for non context registers span
+ */
+struct gen8_nonctxt_regs {
+	/** @offset: Dword offset of the register to write */
+	u32 offset;
+	/** @val: Value to write */
+	u32 val;
+	/** @pipelines: pipelines to write */
+	u32 pipelines;
+};
+
+/**
+ * struct adreno_gen8_core - gen8 specific GPU core definitions
+ */
+struct adreno_gen8_core {
+	/** @base: Container for the generic GPU definitions */
+	struct adreno_gpu_core base;
+	/** @gmu_fw_version: Minimum firmware version required to support this core */
+	u32 gmu_fw_version;
+	/** @sqefw_name: Name of the SQE microcode file */
+	const char *sqefw_name;
+	/** @aqefw_name: Name of the AQE microcode file */
+	const char *aqefw_name;
+	/** @gmufw_name: Name of the GMU firmware file */
+	const char *gmufw_name;
+	/** @zap_name: Name of the CPZ zap file */
+	const char *zap_name;
+	/** @ao_hwcg: List of registers and values to write for HWCG in AO block */
+	const struct kgsl_regmap_list *ao_hwcg;
+	/** @ao_hwcg_count: Number of registers in @ao_hwcg */
+	u32 ao_hwcg_count;
+	/** @gbif: List of registers and values to write for GBIF */
+	const struct kgsl_regmap_list *gbif;
+	/** @gbif_count: Number of registers in @gbif */
+	u32 gbif_count;
+	/** @hang_detect_cycles: Hang detect counter timeout value */
+	u32 hang_detect_cycles;
+	/** @protected_regs: Array of protected registers for the target */
+	const struct gen8_protected_regs *protected_regs;
+	/** @nonctxt_regs: Array of non context register list */
+	const struct gen8_nonctxt_regs *nonctxt_regs;
+	/** @ctxt_record_size: Size of the preemption record in bytes */
+	u64 ctxt_record_size;
+	/** @highest_bank_bit: Highest bank bit value */
+	u32 highest_bank_bit;
+	/** @gen8_snapshot_block_list: Device-specific blocks dumped in the snapshot */
+	const struct gen8_snapshot_block_list *gen8_snapshot_block_list;
+	/** @gmu_hub_clk_freq: Gmu hub interface clock frequency */
+	u64 gmu_hub_clk_freq;
+	/**
+	 * @bcl_data: bit 0 contains response type for bcl alarms and bits 1:21 controls sid vals
+	 * to configure throttle levels for bcl alarm levels 0-2. If sid vals are not set,
+	 * gmu fw sets default throttle levels.
+	 */
+	u32 bcl_data;
+	/** @preempt_level: Preemption level valid ranges [0 to 2] */
+	u32 preempt_level;
+	/** @qos_value: GPU qos value to set for each RB. */
+	const u32 *qos_value;
+	/**
+	 * @acv_perfmode_ddr_freq: Vote perfmode when DDR frequency >= acv_perfmode_ddr_freq.
+	 * If not specified, vote perfmode for highest DDR level only.
+	 */
+	u32 acv_perfmode_ddr_freq;
+	/** @rt_bus_hint: IB level hint for real time clients i.e. RB-0 */
+	const u32 rt_bus_hint;
+	/** @fast_bus_hint: Whether or not to increase IB vote on high ddr stall */
+	bool fast_bus_hint;
+	/** @noc_timeout_us: GPU config NOC port timeout in usec */
+	u32 noc_timeout_us;
+};
+
+/**
+ * struct gen8_cp_preemption_record - CP context record for
+ * preemption.
+ * @magic: (00) Value at this offset must be equal to
+ * GEN8_CP_CTXRECORD_MAGIC_REF.
+ * @info: (04) Type of record. Written non-zero (usually) by CP.
+ * we must set to zero for all ringbuffers.
+ * @errno: (08) Error code. Initialize this to GEN8_CP_CTXRECORD_ERROR_NONE.
+ * CP will update to another value if a preemption error occurs.
+ * @data: (12) DATA field in YIELD and SET_MARKER packets.
+ * Written by CP when switching out. Not used on switch-in. Initialized to 0.
+ * @cntl: (16) RB_CNTL, saved and restored by CP. We must initialize this.
+ * @rptr: (20) RB_RPTR, saved and restored by CP. We must initialize this.
+ * @wptr: (24) RB_WPTR, saved and restored by CP. We must initialize this.
+ * @_pad28: (28) Reserved/padding.
+ * @rptr_addr: (32) RB_RPTR_ADDR_LO|HI saved and restored. We must initialize.
+ * rbase: (40) RB_BASE_LO|HI saved and restored.
+ * counter: (48) Pointer to preemption counter.
+ * @bv_rptr_addr: (56) BV_RB_RPTR_ADDR_LO|HI save and restored. We must initialize.
+ */
+struct gen8_cp_preemption_record {
+	u32 magic;
+	u32 info;
+	u32 errno;
+	u32 data;
+	u32 cntl;
+	u32 rptr;
+	u32 wptr;
+	u32 _pad28;
+	u64 rptr_addr;
+	u64 rbase;
+	u64 counter;
+	u64 bv_rptr_addr;
+};
+
+/**
+ * struct gen8_cp_smmu_info - CP preemption SMMU info.
+ * @magic: (00) The value at this offset must be equal to
+ * GEN8_CP_SMMU_INFO_MAGIC_REF
+ * @_pad4: (04) Reserved/padding
+ * @ttbr0: (08) Base address of the page table for the * incoming context
+ * @asid: (16) Address Space IDentifier (ASID) of the incoming context
+ * @context_idr: (20) Context Identification Register value
+ * @context_bank: (24) Which Context Bank in SMMU to update
+ */
+struct gen8_cp_smmu_info {
+	u32 magic;
+	u32 _pad4;
+	u64 ttbr0;
+	u32 asid;
+	u32 context_idr;
+	u32 context_bank;
+};
+
+#define GEN8_CP_SMMU_INFO_MAGIC_REF		0x241350d5UL
+
+#define GEN8_CP_CTXRECORD_MAGIC_REF		0xae399d6eUL
+/* Size of each CP preemption record */
+#define GEN8_CP_CTXRECORD_SIZE_IN_BYTES		(13536 * SZ_1K)
+/* Size of preemption record to be dumped in snapshot */
+#define GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES	(128 * 1024)
+/* Size of the user context record block (in bytes) */
+#define GEN8_CP_CTXRECORD_USER_RESTORE_SIZE	(192 * 1024)
+/* Size of the performance counter save/restore block (in bytes) */
+#define GEN8_CP_PERFCOUNTER_SAVE_RESTORE_SIZE	(4 * 1024)
+
+#define GEN8_CP_RB_CNTL_DEFAULT \
+	(FIELD_PREP(GENMASK(7, 0), ilog2(KGSL_RB_DWORDS >> 1)) | \
+	 FIELD_PREP(GENMASK(12, 8), ilog2(4)))
+
+/* Size of the CP_INIT pm4 stream in dwords */
+#define GEN8_CP_INIT_DWORDS 10
+
+#define GEN8_INT_MASK \
+	((1 << GEN8_INT_AHBERROR) |			\
+	 (1 << GEN8_INT_ATBASYNCFIFOOVERFLOW) |		\
+	 (1 << GEN8_INT_GPCERROR) |			\
+	 (1 << GEN8_INT_SWINTERRUPT) |			\
+	 (1 << GEN8_INT_HWERROR) |			\
+	 (1 << GEN8_INT_PM4CPINTERRUPT) |		\
+	 (1 << GEN8_INT_RB_DONE_TS) |			\
+	 (1 << GEN8_INT_CACHE_CLEAN_TS) |		\
+	 (1 << GEN8_INT_ATBBUSOVERFLOW) |		\
+	 (1 << GEN8_INT_HANGDETECTINTERRUPT) |		\
+	 (1 << GEN8_INT_OUTOFBOUNDACCESS) |		\
+	 (1 << GEN8_INT_UCHETRAPINTERRUPT) |		\
+	 (1 << GEN8_INT_TSBWRITEERROR) |		\
+	 (1 << GEN8_INT_SWFUSEVIOLATION))
+
+#define GEN8_HWSCHED_INT_MASK \
+	((1 << GEN8_INT_AHBERROR) |			\
+	 (1 << GEN8_INT_ATBASYNCFIFOOVERFLOW) |		\
+	 (1 << GEN8_INT_ATBBUSOVERFLOW) |		\
+	 (1 << GEN8_INT_OUTOFBOUNDACCESS) |		\
+	 (1 << GEN8_INT_UCHETRAPINTERRUPT) |		\
+	 (1 << GEN8_INT_TSBWRITEERROR))
+
+/* GEN8 CX MISC interrupt bits */
+#define GEN8_CX_MISC_GPU_CC_IRQ	31
+
+#define GEN8_CX_MISC_INT_MASK	BIT(GEN8_CX_MISC_GPU_CC_IRQ)
+
+/**
+ * to_gen8_core - return the gen8 specific GPU core struct
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Returns:
+ * A pointer to the gen8 specific GPU core struct
+ */
+static inline const struct adreno_gen8_core *
+to_gen8_core(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpu_core *core = adreno_dev->gpucore;
+
+	return container_of(core, struct adreno_gen8_core, base);
+}
+
+/* Preemption functions */
+void gen8_preemption_trigger(struct adreno_device *adreno_dev, bool atomic);
+void gen8_preemption_schedule(struct adreno_device *adreno_dev);
+void gen8_preemption_start(struct adreno_device *adreno_dev);
+int gen8_preemption_init(struct adreno_device *adreno_dev);
+
+u32 gen8_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+		u32 *cmds);
+u32 gen8_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds);
+
+u32 gen8_set_marker(u32 *cmds, enum adreno_cp_marker_type type);
+
+void gen8_preemption_callback(struct adreno_device *adreno_dev, int bit);
+
+int gen8_preemption_context_init(struct kgsl_context *context);
+
+void gen8_preemption_context_destroy(struct kgsl_context *context);
+
+void gen8_preemption_prepare_postamble(struct adreno_device *adreno_dev);
+
+void gen8_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+void gen8_crashdump_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_snapshot_external_core_regs - Dump external registers into snapshot
+ * @device: Pointer to KGSL device
+ * @snapshot: Pointer to the snapshot
+ *
+ * Dump external core registers like GPUCC, CPR into GPU snapshot.
+ */
+void gen8_snapshot_external_core_regs(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot);
+
+/**
+ * gen8_enable_ahb_timeout_detection - Program AHB control registers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Program AHB control registers to enable AHB timeout detection.
+ *
+ */
+void gen8_enable_ahb_timeout_detection(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_start - Program gen8 registers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does all gen8 register programming every
+ * time we boot the gpu
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_init - Initialize gen8 resources
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does gen8 specific one time initialization
+ * and is invoked when the very first client opens a
+ * kgsl instance
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int gen8_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_cx_timer_init - Initialize the CX timer on Gen8 devices
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Synchronize the GPU CX timer (if we have one) with the CPU timer
+ */
+void gen8_cx_timer_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_get_gpu_feature_info - Get hardware supported feature info
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Get HW supported feature info and update sofware feature configuration
+ */
+void gen8_get_gpu_feature_info(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_rb_start - Gen8 specific ringbuffer setup
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does gen8 specific ringbuffer setup and
+ * attempts to submit CP INIT and bring GPU out of secure mode
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int gen8_rb_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_microcode_read - Get the cp microcode from the filesystem
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function gets the firmware from filesystem and sets up
+ * the micorocode global buffer
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int gen8_microcode_read(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_probe_common - Probe common gen8 resources
+ * @pdev: Pointer to the platform device
+ * @adreno_dev: Pointer to the adreno device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore strucure
+ *
+ * This function sets up the gen8 resources common across all
+ * gen8 targets
+ */
+int gen8_probe_common(struct platform_device *pdev,
+	struct adreno_device *adreno_dev, u32 chipid,
+	const struct adreno_gpu_core *gpucore);
+
+/**
+ * gen8_hw_isidle - Check whether gen8 gpu is idle or not
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Return: True if gpu is idle, otherwise false
+ */
+bool gen8_hw_isidle(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_spin_idle_debug - Debug logging used when gpu fails to idle
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function logs interesting registers and triggers a snapshot
+ */
+void gen8_spin_idle_debug(struct adreno_device *adreno_dev,
+	const char *str);
+
+/**
+ * gen8_perfcounter_update - Update the IFPC perfcounter list
+ * @adreno_dev: An Adreno GPU handle
+ * @reg: Perfcounter reg struct to add/remove to the list
+ * @update_reg: true if the perfcounter needs to be programmed by the CPU
+ * @pipe: pipe id for CP aperture control
+ * @flags: Flags set for requested perfcounter group
+ *
+ * Return: 0 on success or -EBUSY if the lock couldn't be taken
+ */
+int gen8_perfcounter_update(struct adreno_device *adreno_dev,
+	struct adreno_perfcount_register *reg, bool update_reg, u32 pipe,
+	unsigned long flags);
+
+/*
+ * gen8_ringbuffer_init - Initialize the ringbuffers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Initialize the ringbuffer(s) for a5xx.
+ * Return: 0 on success or negative on failure
+ */
+int gen8_ringbuffer_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_ringbuffer_submitcmd - Submit a user command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @cmdobj: Pointer to a user command object
+ * @flags: Internal submit flags
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time);
+
+/**
+ * gen8_ringbuffer_submit - Submit a command to the ringbuffer
+ * @rb: Ringbuffer pointer
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time);
+
+/**
+ * gen8_fenced_write - Write to a fenced register
+ * @adreno_dev: An Adreno GPU handle
+ * @offset: Register offset
+ * @value: Value to write
+ * @mask: Expected FENCE_STATUS for successful write
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_fenced_write(struct adreno_device *adreno_dev, u32 offset,
+		u32 value, u32 mask);
+
+/**
+ * gen87ringbuffer_addcmds - Wrap and submit commands to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @rb: Ringbuffer pointer
+ * @drawctxt: Draw context submitting the commands
+ * @flags: Submission flags
+ * @in: Input buffer to write to ringbuffer
+ * @dwords: Dword length of @in
+ * @timestamp: Draw context timestamp for the submission
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time);
+
+/**
+ * gen8_cp_init_cmds - Create the CP_INIT commands
+ * @adreno_dev: An Adreno GPU handle
+ * @cmd: Buffer to write the CP_INIT commands into
+ */
+void gen8_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds);
+
+/**
+ * gen8_gmu_hfi_probe - Probe Gen8 HFI specific data
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_gmu_hfi_probe(struct adreno_device *adreno_dev);
+
+static inline const struct gen8_gpudev *
+to_gen8_gpudev(const struct adreno_gpudev *gpudev)
+{
+	return container_of(gpudev, struct gen8_gpudev, base);
+}
+
+/**
+ * gen8_reset_preempt_records - Reset the preemption buffers
+ * @adreno_dev: Handle to the adreno device
+ *
+ * Reset the preemption records at the time of hard reset
+ */
+void gen8_reset_preempt_records(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_rdpm_mx_freq_update - Update the mx frequency
+ * @gmu: An Adreno GMU handle
+ * @freq: Frequency in KHz
+ *
+ * This function communicates GPU mx frequency(in Mhz) changes to rdpm.
+ */
+void gen8_rdpm_mx_freq_update(struct gen8_gmu_device *gmu, u32 freq);
+
+/**
+ * gen8_rdpm_cx_freq_update - Update the cx frequency
+ * @gmu: An Adreno GMU handle
+ * @freq: Frequency in KHz
+ *
+ * This function communicates GPU cx frequency(in Mhz) changes to rdpm.
+ */
+void gen8_rdpm_cx_freq_update(struct gen8_gmu_device *gmu, u32 freq);
+
+/**
+ * gen8_scm_gpu_init_cx_regs - Program gpu regs for feature support
+ * @adreno_dev: Handle to the adreno device
+ *
+ * Program gpu regs for feature support. Scm call for the same
+ * is added from kernel version 6.0 onwards.
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_scm_gpu_init_cx_regs(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_legacy_snapshot_registers - Dump registers for GPU/GMU
+ * @device: Handle to the KGSL device
+ * @buf: Target buffer to copy the data
+ * @remain: Buffer size remaining for dump
+ * @priv: Private data to dump the registers
+ *
+ * Return: Size of the section
+ */
+size_t gen8_legacy_snapshot_registers(struct kgsl_device *device,
+		 u8 *buf, size_t remain, void *priv);
+
+/**
+ * gen8_regread64_aperture - Read 64 bit register values
+ * @device: Handle to the KGSL device
+ * @offsetwords_lo: Lower 32 bit address to read
+ * @offsetwords_hi: Higher 32 bit address to read
+ * @value: The value of register at offsetwords
+ * @pipe: Pipe for which the register is to be read
+ * @slice_id: Slice for which the register is to be read
+ * @use_slice_id: Set if the value to be read is from a sliced register
+ *
+ * This function reads the 64 bit value for registers
+ */
+void gen8_regread64_aperture(struct kgsl_device *device,
+	u32 offsetwords_lo, u32 offsetwords_hi, u64 *value, u32 pipe,
+	u32 slice_id, u32 use_slice_id);
+
+/**
+ * gen8_regread_aperture - Read 32 bit register values
+ * @device: Handle to the KGSL device
+ * @offsetwords: 32 bit address to read
+ * @value: The value of register at offsetwords
+ * @pipe: Pipe for which the register is to be read
+ * @slice_id: Slice for which the register is to be read
+ * @use_slice_id: Set if the value to be read is from a sliced register
+ *
+ * This function reads the 32 bit value for registers
+ */
+void gen8_regread_aperture(struct kgsl_device *device,
+	u32 offsetwords, u32 *value, u32 pipe, u32 slice_id, u32 use_slice_id);
+
+
+/**
+ * gen8_host_aperture_set - Program CP aperture register
+ * @adreno_dev: Handle to the adreno device
+ * @pipe_id: Pipe for which the register is to be set
+ * @slice_id: Slice for which the register is to be set
+ * @use_slice_id: Set if the value to be read is from a sliced register
+ *
+ * This function programs CP aperture register
+ */
+void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id,
+		u32 slice_id, u32 use_slice_id);
+#endif

+ 2108 - 0
qcom/opensource/graphics-kernel/adreno_gen8_3_0_snapshot.h

@@ -0,0 +1,2108 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __ADRENO_GEN8_3_0_SNAPSHOT_H
+#define __ADRENO_GEN8_3_0_SNAPSHOT_H
+
+#include "adreno_gen8_snapshot.h"
+
+static const u32 gen8_3_0_debugbus_blocks[] = {
+	DEBUGBUS_GMU_GX_GC_US_I_0,
+	DEBUGBUS_DBGC_GC_US_I_0,
+	DEBUGBUS_RBBM_GC_US_I_0,
+	DEBUGBUS_LARC_GC_US_I_0,
+	DEBUGBUS_COM_GC_US_I_0,
+	DEBUGBUS_HLSQ_GC_US_I_0,
+	DEBUGBUS_CGC_GC_US_I_0,
+	DEBUGBUS_VSC_GC_US_I_0_0,
+	DEBUGBUS_VSC_GC_US_I_0_1,
+	DEBUGBUS_UFC_GC_US_I_0,
+	DEBUGBUS_UFC_GC_US_I_1,
+	DEBUGBUS_CP_GC_US_I_0_0,
+	DEBUGBUS_CP_GC_US_I_0_1,
+	DEBUGBUS_CP_GC_US_I_0_2,
+	DEBUGBUS_PC_BR_US_I_0,
+	DEBUGBUS_PC_BV_US_I_0,
+	DEBUGBUS_GPC_BR_US_I_0,
+	DEBUGBUS_GPC_BV_US_I_0,
+	DEBUGBUS_VPC_BR_US_I_0,
+	DEBUGBUS_VPC_BV_US_I_0,
+	DEBUGBUS_UCHE_WRAPPER_GC_US_I_0,
+	DEBUGBUS_UCHE_GC_US_I_0,
+	DEBUGBUS_CP_GC_S_0_I_0,
+	DEBUGBUS_PC_BR_S_0_I_0,
+	DEBUGBUS_PC_BV_S_0_I_0,
+	DEBUGBUS_TESS_GC_S_0_I_0,
+	DEBUGBUS_TSEBE_GC_S_0_I_0,
+	DEBUGBUS_RAS_GC_S_0_I_0,
+	DEBUGBUS_LRZ_BR_S_0_I_0,
+	DEBUGBUS_LRZ_BV_S_0_I_0,
+	DEBUGBUS_VFDP_GC_S_0_I_0,
+	DEBUGBUS_GPC_BR_S_0_I_0,
+	DEBUGBUS_GPC_BV_S_0_I_0,
+	DEBUGBUS_VPCFE_BR_S_0_I_0,
+	DEBUGBUS_VPCFE_BV_S_0_I_0,
+	DEBUGBUS_VPCBE_BR_S_0_I_0,
+	DEBUGBUS_VPCBE_BV_S_0_I_0,
+	DEBUGBUS_DBGC_GC_S_0_I_0,
+	DEBUGBUS_LARC_GC_S_0_I_0,
+	DEBUGBUS_RBBM_GC_S_0_I_0,
+	DEBUGBUS_CCRE_GC_S_0_I_0,
+	DEBUGBUS_CGC_GC_S_0_I_0,
+	DEBUGBUS_GMU_GC_S_0_I_0,
+	DEBUGBUS_SLICE_GC_S_0_I_0,
+	DEBUGBUS_HLSQ_SPTP_STAR_GC_S_0_I_0,
+	DEBUGBUS_USP_GC_S_0_I_0,
+	DEBUGBUS_USPTP_GC_S_0_I_0,
+	DEBUGBUS_USPTP_GC_S_0_I_1,
+	DEBUGBUS_TP_GC_S_0_I_0,
+	DEBUGBUS_TP_GC_S_0_I_1,
+	DEBUGBUS_RB_GC_S_0_I_0,
+	DEBUGBUS_CCU_GC_S_0_I_0,
+	DEBUGBUS_HLSQ_GC_S_0_I_0,
+	DEBUGBUS_VFD_GC_S_0_I_0,
+};
+
+static struct gen8_shader_block gen8_3_0_shader_blocks[] = {
+	{ TP0_TMO_DATA, 0x0200, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ TP0_SMO_DATA, 0x0080, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ TP0_MIPMAP_BASE_DATA, 0x0080, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_INST_DATA_3, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_INST_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_INST_DATA_1, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_0_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_1_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_2_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_3_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_4_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_5_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_6_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_7_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_CB_RAM, 0x0390, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_13_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_14_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_INST_TAG, 0x0100, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_INST_DATA_2, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_TMO_TAG, 0x0080, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_SMO_TAG, 0x0080, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_STATE_DATA, 0x0040, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_HWAVE_RAM, 0x0100, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_L0_INST_BUF, 0x0080, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_8_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_9_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_10_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_11_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ SP_LB_12_DATA, 0x0800, 1, 2, PIPE_BR, USPTP, MAX_PHYSICAL_SLICES, 1},
+	{ HLSQ_DATAPATH_DSTR_META, 0x0170, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_DATAPATH_DSTR_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_DESC_REMAP_META, 0x0018, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_DESC_REMAP_META, 0x000C, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_DESC_REMAP_META, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1},
+	{ HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_SLICE_TOP_META, 0x0048, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_SLICE_TOP_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1},
+	{ HLSQ_L2STC_TAG_RAM, 0x0200, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_L2STC_INFO_CMD, 0x0474, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x0400, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_CHUNK_CVS_RAM, 0x01C0, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_CHUNK_CPS_RAM, 0x0300, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_CHUNK_CVS_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_CHUNK_CPS_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_ICB_CVS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_ICB_CPS_CB_BASE_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_CVS_MISC_RAM, 0x0540, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_CPS_MISC_RAM, 0x0640, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_CPS_MISC_RAM, 0x00B0, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1},
+	{ HLSQ_CPS_MISC_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_INST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_INST_RAM, 0x0200, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_CVS_CONST_RAM, 0x0800, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_CPS_CONST_RAM, 0x0800, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1},
+	{ HLSQ_CVS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_CVS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_CPS_MISC_RAM_TAG, 0x0050, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_CPS_MISC_RAM_TAG, 0x0008, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1},
+	{ HLSQ_INST_RAM_TAG, 0x0014, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_INST_RAM_TAG, 0x0010, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_INST_RAM_TAG, 0x0004, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_CVS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0040, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_CPS_CONST_RAM_TAG, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_LOCAL_MISC_RAM, 0x03C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_LOCAL_MISC_RAM, 0x0280, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_LOCAL_MISC_RAM, 0x0050, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_GFX_LOCAL_MISC_RAM_TAG, 0x0008, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_INST_RAM_1, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_STPROC_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_SLICE_BACKEND_META, 0x00C0, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1},
+	{ HLSQ_INST_RAM_2, 0x0800, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_DATAPATH_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_FRONTEND_META, 0x0080, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1},
+	{ HLSQ_INDIRECT_META, 0x0010, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_BR, HLSQ_STATE, 1, 1},
+	{ HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_BV, HLSQ_STATE, 1, 1},
+	{ HLSQ_BACKEND_META, 0x0020, 1, 1, PIPE_LPAC, HLSQ_STATE, 1, 1},
+};
+
+/*
+ * Block   : ['AHB_PRECD']
+ * pairs   : 1 (Regs:2)
+ */
+static const u32 gen8_3_0_ahb_precd_gpu_registers[] = {
+	 0x00012, 0x00013,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_ahb_precd_gpu_registers), 8));
+
+/*
+ * Block   : ['AHB_PRECD']
+ * REGION  : SLICE
+ * pairs   : 1 (Regs:3)
+ */
+static const u32 gen8_3_0_ahb_precd_gpu_slice_slice_registers[] = {
+	 0x00580, 0x00582,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_ahb_precd_gpu_slice_slice_registers), 8));
+
+/*
+ * Block   : ['AHB_SECURE']
+ * pairs   : 3 (Regs:7)
+ */
+static const u32 gen8_3_0_ahb_secure_gpu_registers[] = {
+	 0x0f400, 0x0f400, 0x0f800, 0x0f803, 0x0fc00, 0x0fc01,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_ahb_secure_gpu_registers), 8));
+
+/*
+ * Block   : ['GBIF']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 8 (Regs:59)
+ */
+static const u32 gen8_3_0_gbif_registers[] = {
+	 0x03c00, 0x03c0b, 0x03c40, 0x03c42, 0x03c45, 0x03c47, 0x03c49, 0x03c4e,
+	 0x03c50, 0x03c57, 0x03cc0, 0x03cc4, 0x03cc6, 0x03cd5, 0x03ce0, 0x03ce5,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gbif_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'GRAS', 'PC']
+ * Block   : ['RBBM', 'RDVM', 'UCHE']
+ * Block   : ['VFD', 'VPC', 'VSC']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 120 (Regs:1161)
+ */
+static const u32 gen8_3_0_gpu_registers[] = {
+	 0x00000, 0x00000, 0x00002, 0x00002, 0x00008, 0x0000d, 0x00010, 0x00013,
+	 0x00015, 0x00016, 0x00018, 0x00018, 0x0001a, 0x0001a, 0x0001c, 0x0001c,
+	 0x00028, 0x0002b, 0x0002d, 0x00039, 0x00040, 0x00053, 0x00062, 0x00066,
+	 0x00069, 0x0006e, 0x00071, 0x00072, 0x00074, 0x00074, 0x00076, 0x0007c,
+	 0x0007f, 0x0009a, 0x0009d, 0x000af, 0x000b2, 0x000d4, 0x000d7, 0x000e2,
+	 0x000e5, 0x000e6, 0x000e9, 0x000f1, 0x000f4, 0x000f6, 0x000f9, 0x00108,
+	 0x0010b, 0x0010e, 0x00111, 0x00111, 0x00114, 0x0011c, 0x0011f, 0x00121,
+	 0x00125, 0x00125, 0x00127, 0x00127, 0x00129, 0x00129, 0x0012b, 0x00131,
+	 0x00134, 0x00138, 0x0013a, 0x0013a, 0x0013c, 0x0013f, 0x00142, 0x00150,
+	 0x00153, 0x00155, 0x00158, 0x00159, 0x0015c, 0x0015c, 0x00166, 0x00179,
+	 0x0019e, 0x001a3, 0x001b0, 0x002c9, 0x002e2, 0x0036b, 0x00380, 0x0039b,
+	 0x003a4, 0x003ab, 0x003b4, 0x003c5, 0x003ce, 0x003cf, 0x003e0, 0x003e0,
+	 0x003f0, 0x003f0, 0x00440, 0x00444, 0x00460, 0x00460, 0x00c02, 0x00c04,
+	 0x00c06, 0x00c06, 0x00c10, 0x00cd9, 0x00ce0, 0x00d0c, 0x00df0, 0x00df4,
+	 0x00e01, 0x00e04, 0x00e06, 0x00e09, 0x00e0e, 0x00e13, 0x00e15, 0x00e16,
+	 0x00e20, 0x00e37, 0x0ec00, 0x0ec01, 0x0ec05, 0x0ec05, 0x0ec07, 0x0ec07,
+	 0x0ec0a, 0x0ec0a, 0x0ec12, 0x0ec12, 0x0ec26, 0x0ec28, 0x0ec2b, 0x0ec2d,
+	 0x0ec2f, 0x0ec2f, 0x0ec40, 0x0ec41, 0x0ec45, 0x0ec45, 0x0ec47, 0x0ec47,
+	 0x0ec4a, 0x0ec4a, 0x0ec52, 0x0ec52, 0x0ec66, 0x0ec68, 0x0ec6b, 0x0ec6d,
+	 0x0ec6f, 0x0ec6f, 0x0ec80, 0x0ec81, 0x0ec85, 0x0ec85, 0x0ec87, 0x0ec87,
+	 0x0ec8a, 0x0ec8a, 0x0ec92, 0x0ec92, 0x0eca6, 0x0eca8, 0x0ecab, 0x0ecad,
+	 0x0ecaf, 0x0ecaf, 0x0ecc0, 0x0ecc1, 0x0ecc5, 0x0ecc5, 0x0ecc7, 0x0ecc7,
+	 0x0ecca, 0x0ecca, 0x0ecd2, 0x0ecd2, 0x0ece6, 0x0ece8, 0x0eceb, 0x0eced,
+	 0x0ecef, 0x0ecef, 0x0ed00, 0x0ed01, 0x0ed05, 0x0ed05, 0x0ed07, 0x0ed07,
+	 0x0ed0a, 0x0ed0a, 0x0ed12, 0x0ed12, 0x0ed26, 0x0ed28, 0x0ed2b, 0x0ed2d,
+	 0x0ed2f, 0x0ed2f, 0x0ed40, 0x0ed41, 0x0ed45, 0x0ed45, 0x0ed47, 0x0ed47,
+	 0x0ed4a, 0x0ed4a, 0x0ed52, 0x0ed52, 0x0ed66, 0x0ed68, 0x0ed6b, 0x0ed6d,
+	 0x0ed6f, 0x0ed6f, 0x0ed80, 0x0ed81, 0x0ed85, 0x0ed85, 0x0ed87, 0x0ed87,
+	 0x0ed8a, 0x0ed8a, 0x0ed92, 0x0ed92, 0x0eda6, 0x0eda8, 0x0edab, 0x0edad,
+	 0x0edaf, 0x0edaf, UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gpu_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'GRAS', 'PC']
+ * Block   : ['RBBM', 'RDVM', 'UCHE']
+ * Block   : ['VFD', 'VPC', 'VSC']
+ * REGION  : SLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 12 (Regs:89)
+ */
+static const u32 gen8_3_0_gpu_slice_registers[] = {
+	 0x00500, 0x00500, 0x00580, 0x00584, 0x00586, 0x0058b, 0x0058f, 0x00599,
+	 0x005a0, 0x005b3, 0x005c0, 0x005c0, 0x005c2, 0x005c6, 0x005e0, 0x005e3,
+	 0x005ec, 0x005ec, 0x00f01, 0x00f02, 0x00f04, 0x00f0c, 0x00f20, 0x00f37,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gpu_slice_registers), 8));
+
+/*
+ * Block   : ['GMUAO', 'GMUCX', 'GMUCX_RAM']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 160 (Regs:616)
+ */
+static const u32 gen8_3_0_gmu_registers[] = {
+	 0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403,
+	 0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03,
+	 0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403,
+	 0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03,
+	 0x1f400, 0x1f40b, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507,
+	 0x1f509, 0x1f50b, 0x1f700, 0x1f701, 0x1f704, 0x1f706, 0x1f708, 0x1f709,
+	 0x1f70c, 0x1f70d, 0x1f710, 0x1f711, 0x1f713, 0x1f716, 0x1f718, 0x1f71d,
+	 0x1f720, 0x1f725, 0x1f729, 0x1f729, 0x1f730, 0x1f747, 0x1f750, 0x1f75a,
+	 0x1f75c, 0x1f75c, 0x1f780, 0x1f781, 0x1f784, 0x1f78b, 0x1f790, 0x1f797,
+	 0x1f7a0, 0x1f7a7, 0x1f7b0, 0x1f7b7, 0x1f7e0, 0x1f7e1, 0x1f7e4, 0x1f7e5,
+	 0x1f7e8, 0x1f7e9, 0x1f7ec, 0x1f7ed, 0x1f800, 0x1f804, 0x1f807, 0x1f808,
+	 0x1f80b, 0x1f80c, 0x1f80f, 0x1f80f, 0x1f811, 0x1f811, 0x1f813, 0x1f817,
+	 0x1f819, 0x1f81c, 0x1f824, 0x1f830, 0x1f840, 0x1f842, 0x1f848, 0x1f848,
+	 0x1f84c, 0x1f84c, 0x1f850, 0x1f850, 0x1f858, 0x1f859, 0x1f868, 0x1f869,
+	 0x1f878, 0x1f883, 0x1f930, 0x1f931, 0x1f934, 0x1f935, 0x1f938, 0x1f939,
+	 0x1f93c, 0x1f93d, 0x1f940, 0x1f941, 0x1f943, 0x1f943, 0x1f948, 0x1f94a,
+	 0x1f94f, 0x1f951, 0x1f954, 0x1f955, 0x1f95d, 0x1f95d, 0x1f962, 0x1f96b,
+	 0x1f970, 0x1f970, 0x1f97c, 0x1f97e, 0x1f980, 0x1f981, 0x1f984, 0x1f986,
+	 0x1f992, 0x1f993, 0x1f996, 0x1f99e, 0x1f9c0, 0x1f9cf, 0x1f9f0, 0x1f9f1,
+	 0x1f9f8, 0x1f9fa, 0x1f9fc, 0x1f9fc, 0x1fa00, 0x1fa03, 0x1fc00, 0x1fc01,
+	 0x1fc04, 0x1fc07, 0x1fc10, 0x1fc10, 0x1fc14, 0x1fc14, 0x1fc18, 0x1fc19,
+	 0x1fc20, 0x1fc20, 0x1fc24, 0x1fc26, 0x1fc30, 0x1fc33, 0x1fc38, 0x1fc3b,
+	 0x1fc40, 0x1fc49, 0x1fc50, 0x1fc59, 0x1fc60, 0x1fc7f, 0x1fca0, 0x1fcef,
+	 0x20000, 0x20007, 0x20010, 0x20015, 0x20018, 0x2001a, 0x2001c, 0x2001d,
+	 0x20020, 0x20021, 0x20024, 0x20025, 0x2002a, 0x2002c, 0x20030, 0x20031,
+	 0x20034, 0x20036, 0x20080, 0x20087, 0x20300, 0x20301, 0x20304, 0x20305,
+	 0x20308, 0x2030c, 0x20310, 0x20314, 0x20318, 0x2031a, 0x20320, 0x20322,
+	 0x20324, 0x20326, 0x20328, 0x2032a, 0x20330, 0x20333, 0x20338, 0x20338,
+	 0x20340, 0x20350, 0x20354, 0x2035b, 0x20360, 0x20367, 0x20370, 0x20377,
+	 0x23801, 0x23801, 0x23803, 0x23803, 0x23805, 0x23805, 0x23807, 0x23807,
+	 0x23809, 0x23809, 0x2380b, 0x2380b, 0x2380d, 0x2380d, 0x2380f, 0x2380f,
+	 0x23811, 0x23811, 0x23813, 0x23813, 0x23815, 0x23815, 0x23817, 0x23817,
+	 0x23819, 0x23819, 0x2381b, 0x2381b, 0x2381d, 0x2381d, 0x2381f, 0x23820,
+	 0x23822, 0x23822, 0x23824, 0x23824, 0x23826, 0x23826, 0x23828, 0x23828,
+	 0x2382a, 0x2382a, 0x2382c, 0x2382c, 0x2382e, 0x2382e, 0x23830, 0x23830,
+	 0x23832, 0x23832, 0x23834, 0x23834, 0x23836, 0x23836, 0x23838, 0x23838,
+	 0x2383a, 0x2383a, 0x2383c, 0x2383c, 0x2383e, 0x2383e, 0x23840, 0x23847,
+	 0x23b00, 0x23b01, 0x23b03, 0x23b03, 0x23b05, 0x23b0e, 0x23b10, 0x23b13,
+	 0x23b15, 0x23b16, 0x23b28, 0x23b28, 0x23b30, 0x23b30,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gmu_registers), 8));
+
+/*
+ * Block   : ['GMUGX']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 17 (Regs:170)
+ */
+static const u32 gen8_3_0_gmugx_registers[] = {
+	 0x0dc00, 0x0dc0d, 0x0dc10, 0x0dc11, 0x0dc13, 0x0dc15, 0x0dc18, 0x0dc1a,
+	 0x0dc1c, 0x0dc23, 0x0dc26, 0x0dc2b, 0x0dc2e, 0x0dc2f, 0x0dc40, 0x0dc42,
+	 0x0dc60, 0x0dc7f, 0x0dc88, 0x0dc90, 0x0dc98, 0x0dc99, 0x0dca0, 0x0dcbf,
+	 0x0dcc8, 0x0dcd0, 0x0dcd8, 0x0dcd9, 0x0dce0, 0x0dcff, 0x0dd08, 0x0dd10,
+	 0x0dd18, 0x0dd19,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gmugx_registers), 8));
+
+/*
+ * Block   : ['GMUGX']
+ * REGION  : SLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 19 (Regs:224)
+ */
+static const u32 gen8_3_0_gmugx_slice_registers[] = {
+	 0x0e400, 0x0e401, 0x0e404, 0x0e404, 0x0e406, 0x0e408, 0x0e40a, 0x0e40a,
+	 0x0e40e, 0x0e42f, 0x0e438, 0x0e440, 0x0e448, 0x0e449, 0x0e490, 0x0e4af,
+	 0x0e4b8, 0x0e4c0, 0x0e4c8, 0x0e4c9, 0x0e4d0, 0x0e4ef, 0x0e4f8, 0x0e500,
+	 0x0e508, 0x0e509, 0x0e510, 0x0e52f, 0x0e538, 0x0e540, 0x0e548, 0x0e549,
+	 0x0e590, 0x0e5af, 0x0e5b8, 0x0e5c0, 0x0e5c8, 0x0e5c9,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gmugx_slice_registers), 8));
+
+/*
+ * Block   : ['CX_MISC']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 7 (Regs:58)
+ */
+static const u32 gen8_3_0_cx_misc_registers[] = {
+	 0x27800, 0x27800, 0x27810, 0x27814, 0x27820, 0x27824, 0x27828, 0x2782a,
+	 0x27832, 0x27857, 0x27880, 0x27883, 0x27c00, 0x27c01,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_cx_misc_registers), 8));
+
+/*
+ * Block   : ['DBGC']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 21 (Regs:152)
+ */
+static const u32 gen8_3_0_dbgc_registers[] = {
+	 0x00600, 0x0061c, 0x0061e, 0x0063d, 0x00640, 0x00644, 0x00650, 0x00655,
+	 0x00660, 0x00660, 0x00662, 0x00668, 0x0066a, 0x0066a, 0x00680, 0x00685,
+	 0x00700, 0x00704, 0x00707, 0x0070a, 0x0070f, 0x00716, 0x00720, 0x00724,
+	 0x00730, 0x00732, 0x00740, 0x00740, 0x00742, 0x0074a, 0x00750, 0x00755,
+	 0x00759, 0x0075c, 0x00760, 0x00763, 0x00770, 0x00770, 0x00780, 0x0078d,
+	 0x00790, 0x00790,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_dbgc_registers), 8));
+
+/*
+ * Block   : ['DBGC']
+ * REGION  : SLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 2 (Regs:61)
+ */
+static const u32 gen8_3_0_dbgc_slice_registers[] = {
+	 0x007a0, 0x007d5, 0x007e0, 0x007e6,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_dbgc_slice_registers), 8));
+
+/*
+ * Block   : ['CX_DBGC']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 6 (Regs:75)
+ */
+static const u32 gen8_3_0_cx_dbgc_registers[] = {
+	 0x18400, 0x1841c, 0x1841e, 0x1843d, 0x18440, 0x18444, 0x18450, 0x18455,
+	 0x1846a, 0x1846a, 0x18580, 0x18581,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_cx_dbgc_registers), 8));
+
+/*
+ * Block   : ['CP']
+ * REGION  : UNSLICE
+ * Pipeline: CP_PIPE_NONE
+ * Cluster : CLUSTER_NONE
+ * pairs   : 16 (Regs:300)
+ */
+static const u32 gen8_3_0_cp_cp_pipe_none_registers[] = {
+	 0x00800, 0x0080a, 0x00813, 0x0081e, 0x00820, 0x0082d, 0x00838, 0x0083e,
+	 0x00840, 0x00847, 0x0084b, 0x0084c, 0x00850, 0x0088f, 0x008b5, 0x008b6,
+	 0x008c0, 0x008cb, 0x008d0, 0x008e4, 0x008e7, 0x008ee, 0x008fa, 0x008fb,
+	 0x00928, 0x00929, 0x00958, 0x0095b, 0x00980, 0x009ff,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_none_registers), 8));
+
+/*
+ * Block   : ['CP']
+ * REGION  : UNSLICE
+ * Pipeline: CP_PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 8 (Regs:96)
+ */
+static const u32 gen8_3_0_cp_cp_pipe_br_registers[] = {
+	 0x00830, 0x00837, 0x0084d, 0x0084f, 0x008a0, 0x008b4, 0x008b7, 0x008bb,
+	 0x008f0, 0x008f9, 0x00900, 0x0091e, 0x00920, 0x00926, 0x00930, 0x0093a,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_br_registers), 8));
+
+/*
+ * Block   : ['CP']
+ * REGION  : SLICE
+ * Pipeline: CP_PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 4 (Regs:23)
+ */
+static const u32 gen8_3_0_cp_slice_cp_pipe_br_registers[] = {
+	 0x00b00, 0x00b0c, 0x00b10, 0x00b10, 0x00b80, 0x00b84, 0x00b90, 0x00b93,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_slice_cp_pipe_br_registers), 8));
+
+/*
+ * Block   : ['CP']
+ * REGION  : UNSLICE
+ * Pipeline: CP_PIPE_BV
+ * Cluster : CLUSTER_NONE
+ * pairs   : 9 (Regs:72)
+ */
+static const u32 gen8_3_0_cp_cp_pipe_bv_registers[] = {
+	 0x00830, 0x00835, 0x0084d, 0x0084f, 0x008b0, 0x008b4, 0x008b7, 0x008bb,
+	 0x008f0, 0x008f9, 0x00900, 0x00913, 0x00918, 0x0091d, 0x00920, 0x00925,
+	 0x00930, 0x0093a,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_bv_registers), 8));
+
+/*
+ * Block   : ['CP']
+ * REGION  : SLICE
+ * Pipeline: CP_PIPE_BV
+ * Cluster : CLUSTER_NONE
+ * pairs   : 4 (Regs:23)
+ */
+static const u32 gen8_3_0_cp_slice_cp_pipe_bv_registers[] = {
+	 0x00b00, 0x00b0c, 0x00b10, 0x00b10, 0x00b80, 0x00b84, 0x00b90, 0x00b93,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_slice_cp_pipe_bv_registers), 8));
+
+/*
+ * Block   : ['CP']
+ * REGION  : UNSLICE
+ * Pipeline: CP_PIPE_LPAC
+ * Cluster : CLUSTER_NONE
+ * pairs   : 3 (Regs:13)
+ */
+static const u32 gen8_3_0_cp_cp_pipe_lpac_registers[] = {
+	 0x00830, 0x00837, 0x008b3, 0x008b4, 0x008b7, 0x008b9,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_lpac_registers), 8));
+
+/*
+ * Block   : ['CP']
+ * REGION  : UNSLICE
+ * Pipeline: CP_PIPE_AQE0
+ * Cluster : CLUSTER_NONE
+ * pairs   : 2 (Regs:5)
+ */
+static const u32 gen8_3_0_cp_cp_pipe_aqe0_registers[] = {
+	 0x008b3, 0x008b4, 0x008b7, 0x008b9,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_aqe0_registers), 8));
+
+/*
+ * Block   : ['CP']
+ * REGION  : UNSLICE
+ * Pipeline: CP_PIPE_AQE1
+ * Cluster : CLUSTER_NONE
+ * pairs   : 2 (Regs:5)
+ */
+static const u32 gen8_3_0_cp_cp_pipe_aqe1_registers[] = {
+	 0x008b3, 0x008b4, 0x008b7, 0x008b9,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_aqe1_registers), 8));
+
+/*
+ * Block   : ['CP']
+ * REGION  : UNSLICE
+ * Pipeline: CP_PIPE_DDE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 3 (Regs:7)
+ */
+static const u32 gen8_3_0_cp_cp_pipe_dde_br_registers[] = {
+	 0x008b3, 0x008b4, 0x008b7, 0x008b9, 0x008fe, 0x008ff,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_dde_br_registers), 8));
+
+/*
+ * Block   : ['CP']
+ * REGION  : UNSLICE
+ * Pipeline: CP_PIPE_DDE_BV
+ * Cluster : CLUSTER_NONE
+ * pairs   : 3 (Regs:7)
+ */
+static const u32 gen8_3_0_cp_cp_pipe_dde_bv_registers[] = {
+	 0x008b3, 0x008b4, 0x008b7, 0x008b9, 0x008fe, 0x008ff,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_cp_cp_pipe_dde_bv_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO']
+ * Block   : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM']
+ * Block   : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 11 (Regs:1124)
+ */
+static const u32 gen8_3_0_non_context_pipe_br_registers[] = {
+	 0x09600, 0x09605, 0x09610, 0x09617, 0x09620, 0x09627, 0x09670, 0x0967b,
+	 0x09e00, 0x09e04, 0x09e06, 0x09e15, 0x09e17, 0x09e23, 0x09e30, 0x09e3f,
+	 0x09e50, 0x09e59, 0x09e60, 0x09e65, 0x0d200, 0x0d5ff,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_pipe_br_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO']
+ * Block   : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM']
+ * Block   : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 21 (Regs:300)
+ */
+static const u32 gen8_3_0_non_context_slice_pipe_br_registers[] = {
+	 0x08600, 0x08602, 0x08610, 0x08613, 0x08700, 0x08703, 0x08710, 0x08713,
+	 0x08720, 0x08723, 0x08730, 0x08733, 0x08740, 0x08744, 0x09680, 0x09681,
+	 0x09690, 0x0969b, 0x09740, 0x09745, 0x09750, 0x0975b, 0x09770, 0x097ef,
+	 0x09f00, 0x09f0f, 0x09f20, 0x09f23, 0x09f30, 0x09f31, 0x0a600, 0x0a600,
+	 0x0a603, 0x0a603, 0x0a610, 0x0a61f, 0x0a630, 0x0a632, 0x0a638, 0x0a63c,
+	 0x0a640, 0x0a67f,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_slice_pipe_br_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO']
+ * Block   : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM']
+ * Block   : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_NONE
+ * pairs   : 11 (Regs:1124)
+ */
+static const u32 gen8_3_0_non_context_pipe_bv_registers[] = {
+	 0x09600, 0x09605, 0x09610, 0x09617, 0x09620, 0x09627, 0x09670, 0x0967b,
+	 0x09e00, 0x09e04, 0x09e06, 0x09e15, 0x09e17, 0x09e23, 0x09e30, 0x09e3f,
+	 0x09e50, 0x09e59, 0x09e60, 0x09e65, 0x0d200, 0x0d5ff,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_pipe_bv_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO']
+ * Block   : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM']
+ * Block   : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_NONE
+ * pairs   : 21 (Regs:300)
+ */
+static const u32 gen8_3_0_non_context_slice_pipe_bv_registers[] = {
+	 0x08600, 0x08602, 0x08610, 0x08613, 0x08700, 0x08703, 0x08710, 0x08713,
+	 0x08720, 0x08723, 0x08730, 0x08733, 0x08740, 0x08744, 0x09680, 0x09681,
+	 0x09690, 0x0969b, 0x09740, 0x09745, 0x09750, 0x0975b, 0x09770, 0x097ef,
+	 0x09f00, 0x09f0f, 0x09f20, 0x09f23, 0x09f30, 0x09f31, 0x0a600, 0x0a600,
+	 0x0a603, 0x0a603, 0x0a610, 0x0a61f, 0x0a630, 0x0a632, 0x0a638, 0x0a63c,
+	 0x0a640, 0x0a67f,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_slice_pipe_bv_registers), 8));
+
+/*
+ * Block   : ['BROADCAST', 'CX_DBGC', 'CX_MISC', 'DBGC', 'GBIF', 'GMUAO']
+ * Block   : ['GMUCX', 'GMUGX', 'GRAS', 'PC', 'RBBM']
+ * Block   : ['RDVM', 'UCHE', 'VFD', 'VPC', 'VSC']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_NONE
+ * pairs   : 1 (Regs:1)
+ */
+static const u32 gen8_3_0_non_context_pipe_lpac_registers[] = {
+	 0x00e14, 0x00e14,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_pipe_lpac_registers), 8));
+
+/*
+ * Block   : ['RB']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 4 (Regs:26)
+ */
+static const u32 gen8_3_0_non_context_rb_pipe_br_rbp_registers[] = {
+	 0x08f00, 0x08f07, 0x08f10, 0x08f15, 0x08f20, 0x08f29, 0x08f30, 0x08f31,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_rb_pipe_br_rbp_registers), 8));
+
+/*
+ * Block   : ['RB']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 5 (Regs:32)
+ */
+static const u32 gen8_3_0_non_context_rb_slice_pipe_br_rac_registers[] = {
+	 0x08e09, 0x08e0b, 0x08e10, 0x08e17, 0x08e51, 0x08e5a, 0x08e69, 0x08e6f,
+	 0x08ea0, 0x08ea3,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_rb_slice_pipe_br_rac_registers), 8));
+
+/*
+ * Block   : ['RB']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_NONE
+ * pairs   : 9 (Regs:28)
+ */
+static const u32 gen8_3_0_non_context_rb_slice_pipe_br_rbp_registers[] = {
+	 0x08e01, 0x08e01, 0x08e04, 0x08e04, 0x08e06, 0x08e08, 0x08e0c, 0x08e0c,
+	 0x08e18, 0x08e1c, 0x08e3b, 0x08e40, 0x08e50, 0x08e50, 0x08e5d, 0x08e5d,
+	 0x08e77, 0x08e7f,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_rb_slice_pipe_br_rbp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * Cluster : CLUSTER_NONE
+ * Location: HLSQ_STATE
+ * pairs   : 8 (Regs:34)
+ */
+static const u32 gen8_3_0_non_context_sp_pipe_none_hlsq_state_registers[] = {
+	 0x0ae05, 0x0ae05, 0x0ae10, 0x0ae13, 0x0ae15, 0x0ae16, 0x0ae52, 0x0ae52,
+	 0x0ae60, 0x0ae67, 0x0ae69, 0x0ae6e, 0x0ae70, 0x0ae75, 0x0aec0, 0x0aec5,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_sp_pipe_none_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * Cluster : CLUSTER_NONE
+ * Location: SP_TOP
+ * pairs   : 6 (Regs:60)
+ */
+static const u32 gen8_3_0_non_context_sp_pipe_none_sp_top_registers[] = {
+	 0x0ae00, 0x0ae0c, 0x0ae0f, 0x0ae0f, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3f,
+	 0x0ae50, 0x0ae52, 0x0ae80, 0x0aea3,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_sp_pipe_none_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * Cluster : CLUSTER_NONE
+ * Location: USPTP
+ * pairs   : 9 (Regs:64)
+ */
+static const u32 gen8_3_0_non_context_sp_pipe_none_usptp_registers[] = {
+	 0x0ae00, 0x0ae0c, 0x0ae0f, 0x0ae0f, 0x0ae17, 0x0ae19, 0x0ae30, 0x0ae32,
+	 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3b, 0x0ae3e, 0x0ae3f, 0x0ae50, 0x0ae52,
+	 0x0ae80, 0x0aea3,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_sp_pipe_none_usptp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * Cluster : CLUSTER_NONE
+ * Location: HLSQ_DP_STR
+ * pairs   : 5 (Regs:18)
+ */
+static const u32 gen8_3_0_non_context_sp_pipe_none_hlsq_dp_str_registers[] = {
+	 0x0ae05, 0x0ae05, 0x0ae60, 0x0ae65, 0x0ae6b, 0x0ae6c, 0x0ae73, 0x0ae75,
+	 0x0aec0, 0x0aec5,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_sp_pipe_none_hlsq_dp_str_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * Cluster : CLUSTER_NONE
+ * Location: USPTP
+ * pairs   : 5 (Regs:48)
+ */
+static const u32 gen8_3_0_non_context_tpl1_pipe_none_usptp_registers[] = {
+	 0x0b600, 0x0b600, 0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b606, 0x0b61e,
+	 0x0b620, 0x0b633,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_non_context_tpl1_pipe_none_usptp_registers), 8));
+
+/*
+ * Block   : ['GRAS']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_VPC_VS
+ * pairs   : 9 (Regs:238)
+ */
+static const u32 gen8_3_0_gras_slice_pipe_br_cluster_vpc_vs_registers[] = {
+	 0x08200, 0x08213, 0x08220, 0x08225, 0x08228, 0x0822d, 0x08230, 0x0823b,
+	 0x08240, 0x0825f, 0x08270, 0x0828f, 0x0829f, 0x082b7, 0x082d0, 0x0832f,
+	 0x08500, 0x08508,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gras_slice_pipe_br_cluster_vpc_vs_registers), 8));
+
+/*
+ * Block   : ['GRAS']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_GRAS
+ * pairs   : 14 (Regs:332)
+ */
+static const u32 gen8_3_0_gras_slice_pipe_br_cluster_gras_registers[] = {
+	 0x08080, 0x08080, 0x08086, 0x08092, 0x080c0, 0x080df, 0x08101, 0x08110,
+	 0x08130, 0x0814f, 0x08200, 0x08213, 0x08220, 0x08225, 0x08228, 0x0822d,
+	 0x08230, 0x0823b, 0x08240, 0x0825f, 0x08270, 0x0828f, 0x0829f, 0x082b7,
+	 0x082d0, 0x0832f, 0x08500, 0x08508,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gras_slice_pipe_br_cluster_gras_registers), 8));
+
+/*
+ * Block   : ['GRAS']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_VPC_VS
+ * pairs   : 9 (Regs:238)
+ */
+static const u32 gen8_3_0_gras_slice_pipe_bv_cluster_vpc_vs_registers[] = {
+	 0x08200, 0x08213, 0x08220, 0x08225, 0x08228, 0x0822d, 0x08230, 0x0823b,
+	 0x08240, 0x0825f, 0x08270, 0x0828f, 0x0829f, 0x082b7, 0x082d0, 0x0832f,
+	 0x08500, 0x08508,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gras_slice_pipe_bv_cluster_vpc_vs_registers), 8));
+
+/*
+ * Block   : ['GRAS']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_GRAS
+ * pairs   : 14 (Regs:332)
+ */
+static const u32 gen8_3_0_gras_slice_pipe_bv_cluster_gras_registers[] = {
+	 0x08080, 0x08080, 0x08086, 0x08092, 0x080c0, 0x080df, 0x08101, 0x08110,
+	 0x08130, 0x0814f, 0x08200, 0x08213, 0x08220, 0x08225, 0x08228, 0x0822d,
+	 0x08230, 0x0823b, 0x08240, 0x0825f, 0x08270, 0x0828f, 0x0829f, 0x082b7,
+	 0x082d0, 0x0832f, 0x08500, 0x08508,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gras_slice_pipe_bv_cluster_gras_registers), 8));
+
+/*
+ * Block   : ['PC']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_FE_US
+ * pairs   : 6 (Regs:35)
+ */
+static const u32 gen8_3_0_pc_pipe_br_cluster_fe_us_registers[] = {
+	 0x09805, 0x09807, 0x0980b, 0x0980b, 0x09812, 0x09817, 0x0981a, 0x0981b,
+	 0x09b00, 0x09b0d, 0x09b10, 0x09b18,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_pc_pipe_br_cluster_fe_us_registers), 8));
+
+/*
+ * Block   : ['PC']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_FE_S
+ * pairs   : 2 (Regs:23)
+ */
+static const u32 gen8_3_0_pc_slice_pipe_br_cluster_fe_s_registers[] = {
+	 0x09b00, 0x09b0d, 0x09b10, 0x09b18,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_pc_slice_pipe_br_cluster_fe_s_registers), 8));
+
+/*
+ * Block   : ['PC']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_FE_US
+ * pairs   : 6 (Regs:35)
+ */
+static const u32 gen8_3_0_pc_pipe_bv_cluster_fe_us_registers[] = {
+	 0x09805, 0x09807, 0x0980b, 0x0980b, 0x09812, 0x09817, 0x0981a, 0x0981b,
+	 0x09b00, 0x09b0d, 0x09b10, 0x09b18,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_pc_pipe_bv_cluster_fe_us_registers), 8));
+
+/*
+ * Block   : ['PC']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_FE_S
+ * pairs   : 2 (Regs:23)
+ */
+static const u32 gen8_3_0_pc_slice_pipe_bv_cluster_fe_s_registers[] = {
+	 0x09b00, 0x09b0d, 0x09b10, 0x09b18,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_pc_slice_pipe_bv_cluster_fe_s_registers), 8));
+
+/*
+ * Block   : ['VFD']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_FE_S
+ * pairs   : 2 (Regs:236)
+ */
+static const u32 gen8_3_0_vfd_slice_pipe_br_cluster_fe_s_registers[] = {
+	 0x0a000, 0x0a009, 0x0a00e, 0x0a0ef,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_vfd_slice_pipe_br_cluster_fe_s_registers), 8));
+
+/*
+ * Block   : ['VFD']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_FE_S
+ * pairs   : 2 (Regs:236)
+ */
+static const u32 gen8_3_0_vfd_slice_pipe_bv_cluster_fe_s_registers[] = {
+	 0x0a000, 0x0a009, 0x0a00e, 0x0a0ef,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_vfd_slice_pipe_bv_cluster_fe_s_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_FE_S
+ * pairs   : 1 (Regs:27)
+ */
+static const u32 gen8_3_0_vpc_slice_pipe_br_cluster_fe_s_registers[] = {
+	 0x09300, 0x0931a,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_vpc_slice_pipe_br_cluster_fe_s_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_VPC_VS
+ * pairs   : 2 (Regs:29)
+ */
+static const u32 gen8_3_0_vpc_slice_pipe_br_cluster_vpc_vs_registers[] = {
+	 0x090c0, 0x090c1, 0x09300, 0x0931a,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_vpc_slice_pipe_br_cluster_vpc_vs_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_VPC_US
+ * pairs   : 3 (Regs:58)
+ */
+static const u32 gen8_3_0_vpc_pipe_br_cluster_vpc_us_registers[] = {
+	 0x09180, 0x09180, 0x09182, 0x0919f, 0x09300, 0x0931a,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_vpc_pipe_br_cluster_vpc_us_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_VPC_PS
+ * pairs   : 4 (Regs:52)
+ */
+static const u32 gen8_3_0_vpc_slice_pipe_br_cluster_vpc_ps_registers[] = {
+	 0x09240, 0x0924f, 0x09252, 0x09255, 0x09278, 0x0927c, 0x09300, 0x0931a,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_vpc_slice_pipe_br_cluster_vpc_ps_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_FE_S
+ * pairs   : 1 (Regs:27)
+ */
+static const u32 gen8_3_0_vpc_slice_pipe_bv_cluster_fe_s_registers[] = {
+	 0x09300, 0x0931a,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_vpc_slice_pipe_bv_cluster_fe_s_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_VPC_VS
+ * pairs   : 2 (Regs:29)
+ */
+static const u32 gen8_3_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers[] = {
+	 0x090c0, 0x090c1, 0x09300, 0x0931a,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_VPC_US
+ * pairs   : 3 (Regs:58)
+ */
+static const u32 gen8_3_0_vpc_pipe_bv_cluster_vpc_us_registers[] = {
+	 0x09180, 0x09180, 0x09182, 0x0919f, 0x09300, 0x0931a,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_vpc_pipe_bv_cluster_vpc_us_registers), 8));
+
+/*
+ * Block   : ['VPC']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_VPC_PS
+ * pairs   : 4 (Regs:52)
+ */
+static const u32 gen8_3_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers[] = {
+	 0x09240, 0x0924f, 0x09252, 0x09255, 0x09278, 0x0927c, 0x09300, 0x0931a,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers), 8));
+
+/*
+ * Block   : ['RB']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_PS
+ * pairs   : 36 (Regs:151)
+ */
+static const u32 gen8_3_0_rb_slice_pipe_br_cluster_ps_rac_registers[] = {
+	 0x08802, 0x08802, 0x08804, 0x0880a, 0x0880e, 0x08811, 0x08813, 0x08814,
+	 0x08818, 0x0881e, 0x08821, 0x08821, 0x08823, 0x08826, 0x08829, 0x08829,
+	 0x0882b, 0x0882e, 0x08831, 0x08831, 0x08833, 0x08836, 0x08839, 0x08839,
+	 0x0883b, 0x0883e, 0x08841, 0x08841, 0x08843, 0x08846, 0x08849, 0x08849,
+	 0x0884b, 0x0884e, 0x08851, 0x08851, 0x08853, 0x08856, 0x08859, 0x08859,
+	 0x0885b, 0x0885e, 0x08860, 0x08864, 0x08870, 0x08870, 0x08873, 0x08876,
+	 0x08878, 0x08879, 0x08882, 0x08885, 0x08887, 0x08889, 0x08891, 0x08891,
+	 0x08898, 0x08899, 0x088b0, 0x088cf, 0x088e5, 0x088e5, 0x088f4, 0x088f5,
+	 0x08930, 0x08937, 0x08c00, 0x08c01, 0x08c18, 0x08c1f, 0x08c26, 0x08c34,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_rb_slice_pipe_br_cluster_ps_rac_registers), 8));
+
+/*
+ * Block   : ['RB']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_PS
+ * pairs   : 36 (Regs:104)
+ */
+static const u32 gen8_3_0_rb_slice_pipe_br_cluster_ps_rbp_registers[] = {
+	 0x08800, 0x08801, 0x08803, 0x08803, 0x0880b, 0x0880d, 0x08812, 0x08812,
+	 0x08815, 0x08816, 0x08820, 0x08820, 0x08822, 0x08822, 0x08827, 0x08828,
+	 0x0882a, 0x0882a, 0x0882f, 0x08830, 0x08832, 0x08832, 0x08837, 0x08838,
+	 0x0883a, 0x0883a, 0x0883f, 0x08840, 0x08842, 0x08842, 0x08847, 0x08848,
+	 0x0884a, 0x0884a, 0x0884f, 0x08850, 0x08852, 0x08852, 0x08857, 0x08858,
+	 0x0885a, 0x0885a, 0x0885f, 0x0885f, 0x08865, 0x08865, 0x08871, 0x08872,
+	 0x08877, 0x08877, 0x08880, 0x08881, 0x08886, 0x08886, 0x08890, 0x08890,
+	 0x088d0, 0x088e4, 0x088e6, 0x088e6, 0x088e8, 0x088ea, 0x088f0, 0x088f1,
+	 0x08900, 0x0891a, 0x08927, 0x08928, 0x08c17, 0x08c17, 0x08c20, 0x08c25,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_rb_slice_pipe_br_cluster_ps_rbp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: HLSQ_STATE
+ * pairs   : 29 (Regs:114)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_registers[] = {
+	 0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	 0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a,
+	 0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	 0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872,
+	 0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898,
+	 0x0a89a, 0x0a89d, 0x0a8b0, 0x0a8bb, 0x0a8c0, 0x0a8c3, 0x0a974, 0x0a977,
+	 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05, 0x0ab09, 0x0ab09, 0x0ab23, 0x0ab23,
+	 0x0abd0, 0x0abff,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: HLSQ_STATE
+ * pairs   : 2 (Regs:34)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_cctx_registers[] = {
+	 0x0a8a0, 0x0a8af, 0x0ab0a, 0x0ab1b,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(
+	sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_cctx_registers),	8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: HLSQ_STATE
+ * pairs   : 1 (Regs:160)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_shared_const_registers[] = {
+	 0x0ab30, 0x0abcf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(
+	sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_shared_const_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: SP_TOP
+ * pairs   : 23 (Regs:39)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_registers[] = {
+	 0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a826, 0x0a826,
+	 0x0a82d, 0x0a82d, 0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c,
+	 0x0a83e, 0x0a83e, 0x0a840, 0x0a840, 0x0a85c, 0x0a85d, 0x0a862, 0x0a864,
+	 0x0a866, 0x0a866, 0x0a868, 0x0a868, 0x0a870, 0x0a871, 0x0a88d, 0x0a88e,
+	 0x0a893, 0x0a895, 0x0a897, 0x0a897, 0x0a899, 0x0a899, 0x0ab00, 0x0ab00,
+	 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab09, 0x0ab09,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: SP_TOP
+ * pairs   : 2 (Regs:34)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_cctx_registers[] = {
+	 0x0a8a0, 0x0a8af, 0x0ab0a, 0x0ab1b,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_cctx_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 15 (Regs:145)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_usptp_registers[] = {
+	 0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d,
+	 0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861,
+	 0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899,
+	 0x0a8c0, 0x0a8c3, 0x0a974, 0x0a977, 0x0ab00, 0x0ab07,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_usptp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 1 (Regs:160)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_usptp_shared_const_registers[] = {
+	 0x0ab30, 0x0abcf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(
+	gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_usptp_shared_const_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_STATE
+ * pairs   : 21 (Regs:88)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_registers[] = {
+	 0x0a980, 0x0a984, 0x0a99e, 0x0a99e, 0x0a9a7, 0x0a9a7, 0x0a9aa, 0x0a9aa,
+	 0x0a9af, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc,
+	 0x0a9c4, 0x0a9c4, 0x0a9c6, 0x0a9c6, 0x0a9cd, 0x0a9cd, 0x0a9fa, 0x0a9fc,
+	 0x0aa00, 0x0aa00, 0x0aa0d, 0x0aa12, 0x0aa30, 0x0aa31, 0x0aaf2, 0x0aaf3,
+	 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05, 0x0ab09, 0x0ab09, 0x0ab23, 0x0ab23,
+	 0x0abd0, 0x0abff,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_STATE
+ * pairs   : 2 (Regs:44)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_cctx_registers[] = {
+	 0x0a9e0, 0x0a9f9, 0x0ab0a, 0x0ab1b,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(
+	sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_cctx_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_STATE
+ * pairs   : 2 (Regs:320)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_shared_const_registers[] = {
+	 0x0aa40, 0x0aadf, 0x0ab30, 0x0abcf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(
+	sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_shared_const_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_DP
+ * pairs   : 2 (Regs:13)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers[] = {
+	 0x0a9b1, 0x0a9b1, 0x0a9d4, 0x0a9df,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: SP_TOP
+ * pairs   : 17 (Regs:34)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_registers[] = {
+	 0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a9a2, 0x0a9a7, 0x0a9a8,
+	 0x0a9aa, 0x0a9ab, 0x0a9ae, 0x0a9ae, 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5,
+	 0x0a9ba, 0x0a9bc, 0x0a9be, 0x0a9be, 0x0a9c5, 0x0a9c5, 0x0a9ce, 0x0a9ce,
+	 0x0aa00, 0x0aa03, 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05,
+	 0x0ab09, 0x0ab09,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: SP_TOP
+ * pairs   : 2 (Regs:44)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers[] = {
+	 0x0a9e0, 0x0a9f9, 0x0ab0a, 0x0ab1b,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 15 (Regs:89)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_usptp_registers[] = {
+	 0x0a980, 0x0a982, 0x0a985, 0x0a9a6, 0x0a9a8, 0x0a9a9, 0x0a9ab, 0x0a9ae,
+	 0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9bf, 0x0a9c2, 0x0a9c3,
+	 0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9ce, 0x0a9d0, 0x0a9d3, 0x0aa01, 0x0aa0c,
+	 0x0aa30, 0x0aa31, 0x0aaf2, 0x0aaf3, 0x0ab00, 0x0ab07,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_usptp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 2 (Regs:320)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_usptp_shared_const_registers[] = {
+	 0x0aa40, 0x0aadf, 0x0ab30, 0x0abcf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(
+	sizeof(gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_usptp_shared_const_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: HLSQ_STATE
+ * pairs   : 28 (Regs:112)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_registers[] = {
+	 0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	 0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a,
+	 0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	 0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872,
+	 0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898,
+	 0x0a89a, 0x0a89d, 0x0a8b0, 0x0a8bb, 0x0a8c0, 0x0a8c3, 0x0a974, 0x0a977,
+	 0x0ab00, 0x0ab02, 0x0ab09, 0x0ab09, 0x0ab23, 0x0ab23, 0x0abd0, 0x0abff,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: HLSQ_STATE
+ * pairs   : 2 (Regs:34)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_cctx_registers[] = {
+	 0x0a8a0, 0x0a8af, 0x0ab0a, 0x0ab1b,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(
+	sizeof(gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_cctx_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: HLSQ_STATE
+ * pairs   : 1 (Regs:160)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_shared_const_registers[] = {
+	 0x0ab30, 0x0abcf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(
+	sizeof(gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_shared_const_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: SP_TOP
+ * pairs   : 22 (Regs:37)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_registers[] = {
+	 0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a826, 0x0a826,
+	 0x0a82d, 0x0a82d, 0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c,
+	 0x0a83e, 0x0a83e, 0x0a840, 0x0a840, 0x0a85c, 0x0a85d, 0x0a862, 0x0a864,
+	 0x0a866, 0x0a866, 0x0a868, 0x0a868, 0x0a870, 0x0a871, 0x0a88d, 0x0a88e,
+	 0x0a893, 0x0a895, 0x0a897, 0x0a897, 0x0a899, 0x0a899, 0x0ab00, 0x0ab00,
+	 0x0ab02, 0x0ab02, 0x0ab09, 0x0ab09,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: SP_TOP
+ * pairs   : 2 (Regs:34)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_cctx_registers[] = {
+	 0x0a8a0, 0x0a8af, 0x0ab0a, 0x0ab1b,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_cctx_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 16 (Regs:142)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_registers[] = {
+	 0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d,
+	 0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861,
+	 0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899,
+	 0x0a8c0, 0x0a8c3, 0x0a974, 0x0a977, 0x0ab00, 0x0ab02, 0x0ab06, 0x0ab07,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 1 (Regs:160)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_shared_const_registers[] = {
+	 0x0ab30, 0x0abcf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(
+	sizeof(gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_shared_const_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_STATE
+ * pairs   : 14 (Regs:73)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_registers[] = {
+	 0x0a9b0, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc,
+	 0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9fa, 0x0a9fc, 0x0aa00, 0x0aa00,
+	 0x0aa10, 0x0aa12, 0x0aa31, 0x0aa35, 0x0aaf3, 0x0aaf3, 0x0ab00, 0x0ab01,
+	 0x0ab23, 0x0ab23, 0x0abd0, 0x0abff,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(
+	sizeof(gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_STATE
+ * pairs   : 2 (Regs:22)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_cctx_registers[] = {
+	 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9f9,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(
+	sizeof(gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_cctx_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_STATE
+ * pairs   : 2 (Regs:320)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_shared_const_registers[] = {
+	 0x0aa40, 0x0aadf, 0x0ab30, 0x0abcf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(
+	sizeof(gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_shared_const_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: HLSQ_DP
+ * pairs   : 2 (Regs:13)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_dp_registers[] = {
+	 0x0a9b1, 0x0a9b1, 0x0a9d4, 0x0a9df,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_dp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: SP_TOP
+ * pairs   : 8 (Regs:13)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_registers[] = {
+	 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9bc, 0x0a9be, 0x0a9be,
+	 0x0a9c5, 0x0a9c5, 0x0a9ce, 0x0a9ce, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: SP_TOP
+ * pairs   : 2 (Regs:22)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_cctx_registers[] = {
+	 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9f9,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(
+	sizeof(gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_cctx_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 11 (Regs:26)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_registers[] = {
+	 0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9be, 0x0a9c2, 0x0a9c3,
+	 0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9ce, 0x0a9d0, 0x0a9d3, 0x0aa31, 0x0aa31,
+	 0x0aaf3, 0x0aaf3, 0x0ab00, 0x0ab01, 0x0ab06, 0x0ab06,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_registers), 8));
+
+/*
+ * Block   : ['SP']
+ * REGION  : SLICE
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 2 (Regs:320)
+ */
+static const u32 gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_shared_const_registers[] = {
+	 0x0aa40, 0x0aadf, 0x0ab30, 0x0abcf,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(
+	sizeof(gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_shared_const_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 3 (Regs:7)
+ */
+static const u32 gen8_3_0_tpl1_slice_pipe_br_cluster_sp_vs_usptp_registers[] = {
+	 0x0b300, 0x0b304, 0x0b307, 0x0b307, 0x0b309, 0x0b309,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_tpl1_slice_pipe_br_cluster_sp_vs_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BR
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 6 (Regs:41)
+ */
+static const u32 gen8_3_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers[] = {
+	 0x0b180, 0x0b183, 0x0b190, 0x0b195, 0x0b2c0, 0x0b2d7, 0x0b300, 0x0b304,
+	 0x0b307, 0x0b307, 0x0b309, 0x0b309,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * REGION  : SLICE
+ * Pipeline: PIPE_BV
+ * Cluster : CLUSTER_SP_VS
+ * Location: USPTP
+ * pairs   : 3 (Regs:7)
+ */
+static const u32 gen8_3_0_tpl1_slice_pipe_bv_cluster_sp_vs_usptp_registers[] = {
+	 0x0b300, 0x0b304, 0x0b307, 0x0b307, 0x0b309, 0x0b309,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_tpl1_slice_pipe_bv_cluster_sp_vs_usptp_registers), 8));
+
+/*
+ * Block   : ['TPL1']
+ * REGION  : SLICE
+ * Pipeline: PIPE_LPAC
+ * Cluster : CLUSTER_SP_PS
+ * Location: USPTP
+ * pairs   : 5 (Regs:7)
+ */
+static const u32 gen8_3_0_tpl1_slice_pipe_lpac_cluster_sp_ps_usptp_registers[] = {
+	 0x0b180, 0x0b181, 0x0b300, 0x0b301, 0x0b304, 0x0b304, 0x0b307, 0x0b307,
+	 0x0b309, 0x0b309,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_tpl1_slice_pipe_lpac_cluster_sp_ps_usptp_registers), 8));
+static const struct sel_reg gen8_3_0_rb_rac_sel = {
+	.host_reg = GEN8_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN8_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0,
+};
+
+static const struct sel_reg gen8_3_0_rb_rbp_sel = {
+	.host_reg = GEN8_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN8_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0x9,
+};
+
+static struct gen8_cluster_registers gen8_3_0_cp_clusters[] = {
+	{ CLUSTER_NONE, UNSLICE, PIPE_NONE, STATE_NON_CONTEXT,
+		gen8_3_0_cp_cp_pipe_none_registers,  },
+	{ CLUSTER_NONE, UNSLICE, PIPE_BR, STATE_NON_CONTEXT,
+		gen8_3_0_cp_cp_pipe_br_registers,  },
+	{ CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT,
+		gen8_3_0_cp_slice_cp_pipe_br_registers,  },
+	{ CLUSTER_NONE, UNSLICE, PIPE_BV, STATE_NON_CONTEXT,
+		gen8_3_0_cp_cp_pipe_bv_registers,  },
+	{ CLUSTER_NONE, SLICE, PIPE_BV, STATE_NON_CONTEXT,
+		gen8_3_0_cp_slice_cp_pipe_bv_registers,  },
+	{ CLUSTER_NONE, UNSLICE, PIPE_LPAC, STATE_NON_CONTEXT,
+		gen8_3_0_cp_cp_pipe_lpac_registers,  },
+	{ CLUSTER_NONE, UNSLICE, PIPE_AQE0, STATE_NON_CONTEXT,
+		gen8_3_0_cp_cp_pipe_aqe0_registers,  },
+	{ CLUSTER_NONE, UNSLICE, PIPE_AQE1, STATE_NON_CONTEXT,
+		gen8_3_0_cp_cp_pipe_aqe1_registers,  },
+	{ CLUSTER_NONE, UNSLICE, PIPE_DDE_BR, STATE_NON_CONTEXT,
+		gen8_3_0_cp_cp_pipe_dde_br_registers,  },
+	{ CLUSTER_NONE, UNSLICE, PIPE_DDE_BV, STATE_NON_CONTEXT,
+		gen8_3_0_cp_cp_pipe_dde_bv_registers,  },
+};
+
+static struct gen8_cluster_registers gen8_3_0_mvc_clusters[] = {
+	{ CLUSTER_NONE, UNSLICE, PIPE_BR, STATE_NON_CONTEXT,
+		gen8_3_0_non_context_pipe_br_registers,  },
+	{ CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT,
+		gen8_3_0_non_context_slice_pipe_br_registers,  },
+	{ CLUSTER_NONE, UNSLICE, PIPE_BV, STATE_NON_CONTEXT,
+		gen8_3_0_non_context_pipe_bv_registers,  },
+	{ CLUSTER_NONE, SLICE, PIPE_BV, STATE_NON_CONTEXT,
+		gen8_3_0_non_context_slice_pipe_bv_registers,  },
+	{ CLUSTER_NONE, UNSLICE, PIPE_LPAC, STATE_NON_CONTEXT,
+		gen8_3_0_non_context_pipe_lpac_registers,  },
+	{ CLUSTER_NONE, UNSLICE, PIPE_BR, STATE_NON_CONTEXT,
+		gen8_3_0_non_context_rb_pipe_br_rbp_registers, &gen8_3_0_rb_rbp_sel, },
+	{ CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT,
+		gen8_3_0_non_context_rb_slice_pipe_br_rac_registers, &gen8_3_0_rb_rac_sel, },
+	{ CLUSTER_NONE, SLICE, PIPE_BR, STATE_NON_CONTEXT,
+		gen8_3_0_non_context_rb_slice_pipe_br_rbp_registers, &gen8_3_0_rb_rbp_sel, },
+	{ CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen8_3_0_rb_slice_pipe_br_cluster_ps_rac_registers, &gen8_3_0_rb_rac_sel, },
+	{ CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen8_3_0_rb_slice_pipe_br_cluster_ps_rac_registers, &gen8_3_0_rb_rac_sel, },
+	{ CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen8_3_0_rb_slice_pipe_br_cluster_ps_rbp_registers, &gen8_3_0_rb_rbp_sel, },
+	{ CLUSTER_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen8_3_0_rb_slice_pipe_br_cluster_ps_rbp_registers, &gen8_3_0_rb_rbp_sel, },
+	{ CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen8_3_0_gras_slice_pipe_br_cluster_vpc_vs_registers,  },
+	{ CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen8_3_0_gras_slice_pipe_br_cluster_vpc_vs_registers,  },
+	{ CLUSTER_GRAS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen8_3_0_gras_slice_pipe_br_cluster_gras_registers,  },
+	{ CLUSTER_GRAS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen8_3_0_gras_slice_pipe_br_cluster_gras_registers,  },
+	{ CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen8_3_0_gras_slice_pipe_bv_cluster_vpc_vs_registers,  },
+	{ CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen8_3_0_gras_slice_pipe_bv_cluster_vpc_vs_registers,  },
+	{ CLUSTER_GRAS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen8_3_0_gras_slice_pipe_bv_cluster_gras_registers,  },
+	{ CLUSTER_GRAS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen8_3_0_gras_slice_pipe_bv_cluster_gras_registers,  },
+	{ CLUSTER_FE_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen8_3_0_pc_pipe_br_cluster_fe_us_registers,  },
+	{ CLUSTER_FE_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen8_3_0_pc_pipe_br_cluster_fe_us_registers,  },
+	{ CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen8_3_0_pc_slice_pipe_br_cluster_fe_s_registers,  },
+	{ CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen8_3_0_pc_slice_pipe_br_cluster_fe_s_registers,  },
+	{ CLUSTER_FE_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen8_3_0_pc_pipe_bv_cluster_fe_us_registers,  },
+	{ CLUSTER_FE_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen8_3_0_pc_pipe_bv_cluster_fe_us_registers,  },
+	{ CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen8_3_0_pc_slice_pipe_bv_cluster_fe_s_registers,  },
+	{ CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen8_3_0_pc_slice_pipe_bv_cluster_fe_s_registers,  },
+	{ CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen8_3_0_vfd_slice_pipe_br_cluster_fe_s_registers,  },
+	{ CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen8_3_0_vfd_slice_pipe_br_cluster_fe_s_registers,  },
+	{ CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen8_3_0_vfd_slice_pipe_bv_cluster_fe_s_registers,  },
+	{ CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen8_3_0_vfd_slice_pipe_bv_cluster_fe_s_registers,  },
+	{ CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen8_3_0_vpc_slice_pipe_br_cluster_fe_s_registers,  },
+	{ CLUSTER_FE_S, SLICE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen8_3_0_vpc_slice_pipe_br_cluster_fe_s_registers,  },
+	{ CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen8_3_0_vpc_slice_pipe_br_cluster_vpc_vs_registers,  },
+	{ CLUSTER_VPC_VS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen8_3_0_vpc_slice_pipe_br_cluster_vpc_vs_registers,  },
+	{ CLUSTER_VPC_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen8_3_0_vpc_pipe_br_cluster_vpc_us_registers,  },
+	{ CLUSTER_VPC_US, UNSLICE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen8_3_0_vpc_pipe_br_cluster_vpc_us_registers,  },
+	{ CLUSTER_VPC_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen8_3_0_vpc_slice_pipe_br_cluster_vpc_ps_registers,  },
+	{ CLUSTER_VPC_PS, SLICE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen8_3_0_vpc_slice_pipe_br_cluster_vpc_ps_registers,  },
+	{ CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen8_3_0_vpc_slice_pipe_bv_cluster_fe_s_registers,  },
+	{ CLUSTER_FE_S, SLICE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen8_3_0_vpc_slice_pipe_bv_cluster_fe_s_registers,  },
+	{ CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen8_3_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers,  },
+	{ CLUSTER_VPC_VS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen8_3_0_vpc_slice_pipe_bv_cluster_vpc_vs_registers,  },
+	{ CLUSTER_VPC_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen8_3_0_vpc_pipe_bv_cluster_vpc_us_registers,  },
+	{ CLUSTER_VPC_US, UNSLICE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen8_3_0_vpc_pipe_bv_cluster_vpc_us_registers,  },
+	{ CLUSTER_VPC_PS, SLICE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen8_3_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers,  },
+	{ CLUSTER_VPC_PS, SLICE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen8_3_0_vpc_slice_pipe_bv_cluster_vpc_ps_registers,  },
+};
+
+static struct gen8_sptp_cluster_registers gen8_3_0_sptp_clusters[] = {
+	{ CLUSTER_NONE, UNSLICE, 1, 2, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen8_3_0_non_context_sp_pipe_none_hlsq_state_registers, 0xae00},
+	{ CLUSTER_NONE, UNSLICE, 1, 2, SP_NCTX_REG, PIPE_BR, 0, SP_TOP,
+		gen8_3_0_non_context_sp_pipe_none_sp_top_registers, 0xae00},
+	{ CLUSTER_NONE, UNSLICE, 1, 2, SP_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen8_3_0_non_context_sp_pipe_none_usptp_registers, 0xae00},
+	{ CLUSTER_NONE, UNSLICE, 1, 2, SP_NCTX_REG, PIPE_BR, 0, HLSQ_DP_STR,
+		gen8_3_0_non_context_sp_pipe_none_hlsq_dp_str_registers, 0xae00},
+	{ CLUSTER_NONE, UNSLICE, 1, 2, TP0_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen8_3_0_non_context_tpl1_pipe_none_usptp_registers, 0xb600},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_cctx_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_usptp_shared_const_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP,
+		gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP,
+		gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_cctx_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_shared_const_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_sp_top_cctx_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_cctx_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_hlsq_state_shared_const_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP,
+		gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP,
+		gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_sp_top_cctx_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen8_3_0_sp_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_cctx_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_shared_const_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_usptp_shared_const_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_cctx_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_state_shared_const_registers,
+		0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP,
+		gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP,
+		gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP,
+		gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_sp_top_cctx_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen8_3_0_sp_slice_pipe_lpac_cluster_sp_ps_usptp_shared_const_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_cctx_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_state_shared_const_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_hlsq_dp_registers, 0xa800},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP,
+		gen8_3_0_sp_slice_pipe_br_cluster_sp_ps_sp_top_cctx_registers, 0xa800},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen8_3_0_tpl1_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xb000},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen8_3_0_tpl1_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen8_3_0_tpl1_slice_pipe_br_cluster_sp_vs_usptp_registers, 0xb000},
+	{ CLUSTER_SP_VS, SLICE, 1, 2, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen8_3_0_tpl1_slice_pipe_bv_cluster_sp_vs_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen8_3_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen8_3_0_tpl1_slice_pipe_lpac_cluster_sp_ps_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen8_3_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP,
+		gen8_3_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000},
+	{ CLUSTER_SP_PS, SLICE, 1, 2, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP,
+		gen8_3_0_tpl1_slice_pipe_br_cluster_sp_ps_usptp_registers, 0xb000},
+};
+
+/*
+ * Before dumping the CP MVC
+ * Program CP_APERTURE_CNTL_* with pipeID={CP_PIPE}
+ * Then dump corresponding {Register_PIPE}
+ */
+static struct gen8_cp_indexed_reg gen8_3_0_cp_indexed_reg_list[] = {
+	{ GEN8_CP_SQE_STAT_ADDR_PIPE, GEN8_CP_SQE_STAT_DATA_PIPE, UNSLICE, PIPE_BR, 0x00040},
+	{ GEN8_CP_SQE_STAT_ADDR_PIPE, GEN8_CP_SQE_STAT_DATA_PIPE, UNSLICE, PIPE_BV, 0x00040},
+	{ GEN8_CP_DRAW_STATE_ADDR_PIPE, GEN8_CP_DRAW_STATE_DATA_PIPE, UNSLICE, PIPE_BR, 0x00200},
+	{ GEN8_CP_DRAW_STATE_ADDR_PIPE, GEN8_CP_DRAW_STATE_DATA_PIPE, UNSLICE, PIPE_BV, 0x00200},
+	{ GEN8_CP_ROQ_DBG_ADDR_PIPE, GEN8_CP_ROQ_DBG_DATA_PIPE, UNSLICE, PIPE_BR, 0x00800},
+	{ GEN8_CP_ROQ_DBG_ADDR_PIPE, GEN8_CP_ROQ_DBG_DATA_PIPE, UNSLICE, PIPE_BV, 0x00800},
+	{ GEN8_CP_SQE_UCODE_DBG_ADDR_PIPE, GEN8_CP_SQE_UCODE_DBG_DATA_PIPE,
+	UNSLICE, PIPE_BR, 0x08000},
+	{ GEN8_CP_SQE_UCODE_DBG_ADDR_PIPE, GEN8_CP_SQE_UCODE_DBG_DATA_PIPE,
+	UNSLICE, PIPE_BV, 0x08000},
+	{ GEN8_CP_RESOURCE_TABLE_DBG_ADDR_BV, GEN8_CP_RESOURCE_TABLE_DBG_DATA_BV,
+	UNSLICE, PIPE_NONE, 0x04100},
+	{ GEN8_CP_FIFO_DBG_ADDR_DDE_PIPE, GEN8_CP_FIFO_DBG_DATA_DDE_PIPE,
+	UNSLICE, PIPE_DDE_BR, 0x01100},
+	{ GEN8_CP_FIFO_DBG_ADDR_DDE_PIPE, GEN8_CP_FIFO_DBG_DATA_DDE_PIPE,
+	UNSLICE, PIPE_DDE_BV, 0x01100},
+};
+
+
+/*
+ * Before dumping the CP Mempool over the CP_*_MEM_POOL_DBG_ADDR/DATA
+ * indexed register pair it must be stabilized.
+ * for p in [CP_PIPE_BR, CP_PIPE_BV]:
+ *   Program CP_APERTURE_CNTL_* with pipeID={p} sliceID={MAX_UINT}
+ *   Program CP_CHICKEN_DBG_PIPE[crashStabilizeMVC] bit = 1.
+ *   Dump CP_MEM_POOL_DBG_ADDR_PIPE for pipe=p
+ *   Program CP_CHICKEN_DBG_PIPE[crashStabilizeMVC] bit = 0.
+ *
+ * same thing for CP_SLICE_MEM_POOL_DBG_ADDR_PIPE
+ * for p in [CP_PIPE_BR, CP_PIPE_BV]:
+ *   for s in [0,1,2]:
+ *     Program CP_APERTURE_CNTL_* with pipeID={p} sliceID={s}
+ *     Program CP_CHICKEN_DBG_PIPE[crashStabilizeMVC] bit = 1.
+ *     Program CP_SLICE_CHICKEN_DBG[crashStabilizeMVC] bit = 1.
+ *     Dump CP_SLICE_MEM_POOL_DBG_ADDR_PIPE for pipe=p, sliceID=s
+ *     Program CP_CHICKEN_DBG_PIPE[crashStabilizeMVC] bit = 0.
+ *     Program CP_SLICE_CHICKEN_DBG[crashStabilizeMVC] bit = 0.
+ */
+
+static struct gen8_cp_indexed_reg gen8_3_0_cp_mempool_reg_list[] = {
+	{ GEN8_CP_MEM_POOL_DBG_ADDR_PIPE, GEN8_CP_MEM_POOL_DBG_DATA_PIPE,
+	UNSLICE, PIPE_BR, 0x02400},
+	{ GEN8_CP_MEM_POOL_DBG_ADDR_PIPE, GEN8_CP_MEM_POOL_DBG_DATA_PIPE,
+	UNSLICE, PIPE_BV, 0x02400},
+	{ GEN8_CP_SLICE_MEM_POOL_DBG_ADDR_PIPE, GEN8_CP_SLICE_MEM_POOL_DBG_DATA_PIPE,
+	SLICE, PIPE_BR, 0x02400},
+	{ GEN8_CP_SLICE_MEM_POOL_DBG_ADDR_PIPE, GEN8_CP_SLICE_MEM_POOL_DBG_DATA_PIPE,
+	SLICE, PIPE_BV, 0x02400},
+};
+
+/*
+ * this is just a temp table to give hint of reg pointers
+ * which are not included so far in MVC and SPTP structs
+ */
+static struct gen8_reg_list gen8_3_0_reg_list[] = {
+	{ UNSLICE, gen8_3_0_gpu_registers },
+	{ SLICE, gen8_3_0_gpu_slice_registers },
+	{ UNSLICE, gen8_3_0_dbgc_registers },
+	{ SLICE, gen8_3_0_dbgc_slice_registers },
+	{ UNSLICE, gen8_3_0_cx_dbgc_registers },
+	{ UNSLICE, NULL},
+};
+
+static struct gen8_reg_list gen8_3_0_ahb_registers[] = {
+	{ UNSLICE, gen8_3_0_gbif_registers },
+	{ UNSLICE, gen8_3_0_ahb_precd_gpu_registers },
+	{ SLICE, gen8_3_0_ahb_precd_gpu_slice_slice_registers },
+	{ UNSLICE, gen8_3_0_ahb_secure_gpu_registers },
+};
+
+/*
+ * Block   : ['GDPM_LKG']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 9 (Regs:26)
+ */
+static const u32 gen8_3_0_gdpm_lkg_registers[] = {
+	 0x21c00, 0x21c00, 0x21c08, 0x21c09, 0x21c0e, 0x21c0f, 0x21c4f, 0x21c50,
+	 0x21c52, 0x21c52, 0x21c54, 0x21c56, 0x21c58, 0x21c5a, 0x21c5c, 0x21c60,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gdpm_lkg_registers), 8));
+
+/*
+ * Block   : ['GPU_CC_AHB2PHY_BROADCAST_SWMAN']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 1 (Regs:256)
+ */
+static const u32 gen8_3_0_gpu_cc_ahb2phy_broadcast_swman_registers[] = {
+	 0x24c00, 0x24cff,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gpu_cc_ahb2phy_broadcast_swman_registers), 8));
+
+/*
+ * Block   : ['GPU_CC_AHB2PHY_SWMAN']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 1 (Regs:6)
+ */
+static const u32 gen8_3_0_gpu_cc_ahb2phy_swman_registers[] = {
+	 0x24800, 0x24805,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gpu_cc_ahb2phy_swman_registers), 8));
+
+/*
+ * Block   : ['GPU_CC_GPU_CC_REG']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 28 (Regs:130)
+ */
+static const u32 gen8_3_0_gpu_cc_gpu_cc_reg_registers[] = {
+	 0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004,
+	 0x26400, 0x26406, 0x26415, 0x2641d, 0x2641f, 0x26437, 0x26439, 0x2643b,
+	 0x2643d, 0x2643f, 0x26443, 0x26444, 0x26478, 0x2647a, 0x26489, 0x2648a,
+	 0x2649c, 0x2649e, 0x264a0, 0x264a1, 0x264c5, 0x264c7, 0x264e8, 0x264ea,
+	 0x264f9, 0x264fc, 0x2650b, 0x2650b, 0x2651c, 0x2651e, 0x26540, 0x2654b,
+	 0x26554, 0x26556, 0x26558, 0x2655c, 0x2655e, 0x2655f, 0x26563, 0x26563,
+	 0x2656d, 0x26573, 0x26576, 0x26576, 0x26578, 0x2657a,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gpu_cc_gpu_cc_reg_registers), 8));
+
+/*
+ * Block   : ['GPU_CC_PLL0_CM_PLL_LUCID_OLE']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 1 (Regs:16)
+ */
+static const u32 gen8_3_0_gpu_cc_pll0_cm_pll_lucid_ole_registers[] = {
+	 0x24000, 0x2400f,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gpu_cc_pll0_cm_pll_lucid_ole_registers), 8));
+
+/*
+ * Block   : ['ACD_ACD']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 10 (Regs:53)
+ */
+static const u32 gen8_3_0_acd_acd_mnd_registers[] = {
+	 0x1a400, 0x1a416, 0x1a420, 0x1a42d, 0x1a430, 0x1a431, 0x1a435, 0x1a435,
+	 0x1a437, 0x1a437, 0x1a43a, 0x1a43a, 0x1a442, 0x1a442, 0x1a456, 0x1a458,
+	 0x1a45b, 0x1a45d, 0x1a45f, 0x1a462,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_acd_acd_mnd_registers), 8));
+
+/*
+ * Block   : ['GX_CLKCTL_AHB2PHY_BROADCAST_SWMAN']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 1 (Regs:256)
+ */
+static const u32 gen8_3_0_gx_clkctl_ahb2phy_broadcast_swman_registers[] = {
+	 0x19c00, 0x19cff,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gx_clkctl_ahb2phy_broadcast_swman_registers), 8));
+
+/*
+ * Block   : ['GX_CLKCTL_AHB2PHY_SWMAN']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 1 (Regs:6)
+ */
+static const u32 gen8_3_0_gx_clkctl_ahb2phy_swman_registers[] = {
+	 0x19800, 0x19805,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gx_clkctl_ahb2phy_swman_registers), 8));
+
+/*
+ * Block   : ['GX_CLKCTL_GX_CLKCTL_REG']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 12 (Regs:82)
+ */
+static const u32 gen8_3_0_gx_clkctl_gx_clkctl_reg_registers[] = {
+	 0x1a000, 0x1a004, 0x1a008, 0x1a012, 0x1a014, 0x1a014, 0x1a017, 0x1a017,
+	 0x1a019, 0x1a019, 0x1a022, 0x1a022, 0x1a024, 0x1a029, 0x1a03f, 0x1a05d,
+	 0x1a060, 0x1a063, 0x1a065, 0x1a066, 0x1a068, 0x1a076, 0x1a078, 0x1a07b,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gx_clkctl_gx_clkctl_reg_registers), 8));
+
+/*
+ * Block   : ['GX_CLKCTL_PLL0_CM_PLL_LUCID_OLE']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 1 (Regs:16)
+ */
+static const u32 gen8_3_0_gx_clkctl_pll0_cm_pll_lucid_ole_registers[] = {
+	 0x19000, 0x1900f,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_gx_clkctl_pll0_cm_pll_lucid_ole_registers), 8));
+
+/*
+ * Block   : ['RSCC_RSC']
+ * REGION  : UNSLICE
+ * Pipeline: PIPE_NONE
+ * pairs   : 99 (Regs:598)
+ */
+static const u32 gen8_3_0_rscc_rsc_registers[] = {
+	 0x14000, 0x14034, 0x14036, 0x14036, 0x14040, 0x14042, 0x14044, 0x14045,
+	 0x14047, 0x14047, 0x14080, 0x14084, 0x14089, 0x1408c, 0x14091, 0x14094,
+	 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac, 0x14100, 0x14104,
+	 0x14114, 0x14119, 0x14124, 0x14132, 0x14154, 0x1416b, 0x14340, 0x14341,
+	 0x14344, 0x14344, 0x14346, 0x1437c, 0x143f0, 0x143f8, 0x143fa, 0x143fe,
+	 0x14400, 0x14404, 0x14406, 0x1440a, 0x1440c, 0x14410, 0x14412, 0x14416,
+	 0x14418, 0x1441c, 0x1441e, 0x14422, 0x14424, 0x14424, 0x14498, 0x144a0,
+	 0x144a2, 0x144a6, 0x144a8, 0x144ac, 0x144ae, 0x144b2, 0x144b4, 0x144b8,
+	 0x144ba, 0x144be, 0x144c0, 0x144c4, 0x144c6, 0x144ca, 0x144cc, 0x144cc,
+	 0x14540, 0x14548, 0x1454a, 0x1454e, 0x14550, 0x14554, 0x14556, 0x1455a,
+	 0x1455c, 0x14560, 0x14562, 0x14566, 0x14568, 0x1456c, 0x1456e, 0x14572,
+	 0x14574, 0x14574, 0x145e8, 0x145f0, 0x145f2, 0x145f6, 0x145f8, 0x145fc,
+	 0x145fe, 0x14602, 0x14604, 0x14608, 0x1460a, 0x1460e, 0x14610, 0x14614,
+	 0x14616, 0x1461a, 0x1461c, 0x1461c, 0x14690, 0x14698, 0x1469a, 0x1469e,
+	 0x146a0, 0x146a4, 0x146a6, 0x146aa, 0x146ac, 0x146b0, 0x146b2, 0x146b6,
+	 0x146b8, 0x146bc, 0x146be, 0x146c2, 0x146c4, 0x146c4, 0x14738, 0x14740,
+	 0x14742, 0x14746, 0x14748, 0x1474c, 0x1474e, 0x14752, 0x14754, 0x14758,
+	 0x1475a, 0x1475e, 0x14760, 0x14764, 0x14766, 0x1476a, 0x1476c, 0x1476c,
+	 0x147e0, 0x147e8, 0x147ea, 0x147ee, 0x147f0, 0x147f4, 0x147f6, 0x147fa,
+	 0x147fc, 0x14800, 0x14802, 0x14806, 0x14808, 0x1480c, 0x1480e, 0x14812,
+	 0x14814, 0x14814, 0x14888, 0x14890, 0x14892, 0x14896, 0x14898, 0x1489c,
+	 0x1489e, 0x148a2, 0x148a4, 0x148a8, 0x148aa, 0x148ae, 0x148b0, 0x148b4,
+	 0x148b6, 0x148ba, 0x148bc, 0x148bc, 0x14930, 0x14938, 0x1493a, 0x1493e,
+	 0x14940, 0x14944, 0x14946, 0x1494a, 0x1494c, 0x14950, 0x14952, 0x14956,
+	 0x14958, 0x1495c, 0x1495e, 0x14962, 0x14964, 0x14964,
+	 UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen8_3_0_rscc_rsc_registers), 8));
+
+static const u32 *gen8_3_0_external_core_regs[] = {
+	gen8_3_0_gdpm_lkg_registers,
+	gen8_3_0_gpu_cc_ahb2phy_broadcast_swman_registers,
+	gen8_3_0_gpu_cc_ahb2phy_swman_registers,
+	gen8_3_0_gpu_cc_gpu_cc_reg_registers,
+	gen8_3_0_gpu_cc_pll0_cm_pll_lucid_ole_registers,
+};
+
+static struct gen8_reg_list gen8_3_0_gmu_gx_regs[] = {
+	{ UNSLICE, gen8_3_0_gmugx_registers },
+	{ UNSLICE, gen8_3_0_gx_clkctl_ahb2phy_broadcast_swman_registers },
+	{ UNSLICE, gen8_3_0_gx_clkctl_ahb2phy_swman_registers },
+	{ UNSLICE, gen8_3_0_gx_clkctl_pll0_cm_pll_lucid_ole_registers },
+	{ UNSLICE, gen8_3_0_gx_clkctl_gx_clkctl_reg_registers },
+	{ UNSLICE, gen8_3_0_acd_acd_mnd_registers },
+	{ SLICE, gen8_3_0_gmugx_registers },
+};
+
+#endif /*_ADRENO_GEN8_3_0_SNAPSHOT_H */

+ 3332 - 0
qcom/opensource/graphics-kernel/adreno_gen8_gmu.c

@@ -0,0 +1,3332 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <dt-bindings/regulator/qcom,rpmh-regulator-levels.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/delay.h>
+#include <linux/dma-map-ops.h>
+#include <linux/firmware.h>
+#include <linux/interconnect.h>
+#include <linux/io.h>
+#include <linux/kobject.h>
+#include <linux/of_platform.h>
+#include <linux/qcom-iommu-util.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+#include <linux/soc/qcom/llcc-qcom.h>
+#include <linux/sysfs.h>
+#include <soc/qcom/cmd-db.h>
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "adreno_trace.h"
+#include "kgsl_bus.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+#include "kgsl_util.h"
+
+static struct gmu_vma_entry gen8_gmu_vma[] = {
+	[GMU_ITCM] = {
+			.start = 0x00000000,
+			.size = SZ_16K,
+		},
+	[GMU_CACHE] = {
+			.start = SZ_16K,
+			.size = (SZ_16M - SZ_16K),
+			.next_va = SZ_16K,
+		},
+	[GMU_DTCM] = {
+			.start = SZ_256M + SZ_16K,
+			.size = SZ_16K,
+		},
+	[GMU_DCACHE] = {
+			.start = 0x0,
+			.size = 0x0,
+		},
+	[GMU_NONCACHED_KERNEL] = {
+			.start = 0x60000000,
+			.size = SZ_512M,
+			.next_va = 0x60000000,
+		},
+	[GMU_NONCACHED_KERNEL_EXTENDED] = {
+			.start = 0xc0000000,
+			.size = SZ_512M,
+			.next_va = 0xc0000000,
+		},
+};
+
+static ssize_t log_stream_enable_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, log_kobj);
+	bool val;
+	int ret;
+
+	ret = kstrtobool(buf, &val);
+	if (ret)
+		return ret;
+
+	gmu->log_stream_enable = val;
+	adreno_mark_for_coldboot(gen8_gmu_to_adreno(gmu));
+	return count;
+}
+
+static ssize_t log_stream_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, log_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", gmu->log_stream_enable);
+}
+
+static ssize_t log_group_mask_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, log_kobj);
+	u32 val;
+	int ret;
+
+	ret = kstrtou32(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	gmu->log_group_mask = val;
+	adreno_mark_for_coldboot(gen8_gmu_to_adreno(gmu));
+	return count;
+}
+
+static ssize_t log_group_mask_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, log_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->log_group_mask);
+}
+
+static struct kobj_attribute log_stream_enable_attr =
+	__ATTR(log_stream_enable, 0644, log_stream_enable_show, log_stream_enable_store);
+
+static struct kobj_attribute log_group_mask_attr =
+	__ATTR(log_group_mask, 0644, log_group_mask_show, log_group_mask_store);
+
+static struct attribute *log_attrs[] = {
+	&log_stream_enable_attr.attr,
+	&log_group_mask_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(log);
+
+static struct kobj_type log_kobj_type = {
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = log_groups,
+};
+
+static ssize_t stats_enable_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, stats_kobj);
+	bool val;
+	int ret;
+
+	ret = kstrtobool(buf, &val);
+	if (ret)
+		return ret;
+
+	gmu->stats_enable = val;
+	adreno_mark_for_coldboot(gen8_gmu_to_adreno(gmu));
+	return count;
+}
+
+static ssize_t stats_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, stats_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", gmu->stats_enable);
+}
+
+static ssize_t stats_mask_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, stats_kobj);
+	u32 val;
+	int ret;
+
+	ret = kstrtou32(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	gmu->stats_mask = val;
+	adreno_mark_for_coldboot(gen8_gmu_to_adreno(gmu));
+	return count;
+}
+
+static ssize_t stats_mask_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, stats_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->stats_mask);
+}
+
+static ssize_t stats_interval_store(struct kobject *kobj,
+	struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, stats_kobj);
+	u32 val;
+	int ret;
+
+	ret = kstrtou32(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	gmu->stats_interval = val;
+	adreno_mark_for_coldboot(gen8_gmu_to_adreno(gmu));
+	return count;
+}
+
+static ssize_t stats_interval_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct gen8_gmu_device *gmu = container_of(kobj, struct gen8_gmu_device, stats_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->stats_interval);
+}
+
+static struct kobj_attribute stats_enable_attr =
+	__ATTR(stats_enable, 0644, stats_enable_show, stats_enable_store);
+
+static struct kobj_attribute stats_mask_attr =
+	__ATTR(stats_mask, 0644, stats_mask_show, stats_mask_store);
+
+static struct kobj_attribute stats_interval_attr =
+	__ATTR(stats_interval, 0644, stats_interval_show, stats_interval_store);
+
+static struct attribute *stats_attrs[] = {
+	&stats_enable_attr.attr,
+	&stats_mask_attr.attr,
+	&stats_interval_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(stats);
+
+static struct kobj_type stats_kobj_type = {
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = stats_groups,
+};
+
+static int gen8_timed_poll_check_rscc(struct gen8_gmu_device *gmu,
+		u32 offset, u32 expected_ret,
+		u32 timeout, u32 mask)
+{
+	u32 value;
+
+	return readl_poll_timeout(gmu->rscc_virt + (offset << 2), value,
+		(value & mask) == expected_ret, 100, timeout * 1000);
+}
+
+struct gen8_gmu_device *to_gen8_gmu(struct adreno_device *adreno_dev)
+{
+	struct gen8_device *gen8_dev = container_of(adreno_dev,
+					struct gen8_device, adreno_dev);
+
+	return &gen8_dev->gmu;
+}
+
+struct adreno_device *gen8_gmu_to_adreno(struct gen8_gmu_device *gmu)
+{
+	struct gen8_device *gen8_dev =
+			container_of(gmu, struct gen8_device, gmu);
+
+	return &gen8_dev->adreno_dev;
+}
+
+/* Configure and enable GMU low power mode */
+static void gen8_gmu_power_config(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Disable GMU WB/RB buffer and caches at boot */
+	gmu_core_regwrite(device, GEN8_GMUCX_SYS_BUS_CONFIG, 0x1);
+	gmu_core_regwrite(device, GEN8_GMUCX_ICACHE_CONFIG, 0x1);
+	gmu_core_regwrite(device, GEN8_GMUCX_DCACHE_CONFIG, 0x1);
+}
+
+static void gmu_ao_sync_event(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned long flags;
+	u64 ticks;
+
+	/*
+	 * Get the GMU always on ticks and log it in a trace message. This
+	 * will be used to map GMU ticks to ftrace time. Do this in atomic
+	 * context to ensure nothing happens between reading the always
+	 * on ticks and doing the trace.
+	 */
+
+	local_irq_save(flags);
+
+	ticks = gpudev->read_alwayson(adreno_dev);
+
+	trace_gmu_ao_sync(ticks);
+
+	local_irq_restore(flags);
+}
+
+int gen8_gmu_device_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	gmu_core_reset_trace_header(&gmu->trace);
+
+	gmu_ao_sync_event(adreno_dev);
+
+	/* Bring GMU out of reset */
+	gmu_core_regwrite(device, GEN8_GMUCX_CM3_SYSRESET, 0);
+
+	/* Make sure the write is posted before moving ahead */
+	wmb();
+
+	if (gmu_core_timed_poll_check(device, GEN8_GMUCX_CM3_FW_INIT_RESULT,
+			BIT(8), 100, GENMASK(8, 0))) {
+		dev_err(&gmu->pdev->dev, "GMU failed to come out of reset\n");
+		gmu_core_fault_snapshot(device);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/*
+ * gen8_gmu_hfi_start() - Write registers and start HFI.
+ * @device: Pointer to KGSL device
+ */
+int gen8_gmu_hfi_start(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	gmu_core_regwrite(device, GEN8_GMUCX_HFI_CTRL_INIT, 1);
+
+	if (gmu_core_timed_poll_check(device, GEN8_GMUCX_HFI_CTRL_STATUS,
+			BIT(0), 100, BIT(0))) {
+		dev_err(&gmu->pdev->dev, "GMU HFI init failed\n");
+		gmu_core_fault_snapshot(device);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+int gen8_rscc_wakeup_sequence(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct device *dev = &gmu->pdev->dev;
+
+	/* Skip wakeup sequence if we didn't do the sleep sequence */
+	if (!test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags))
+		return 0;
+
+	/* RSC wake sequence */
+	gmu_core_regwrite(device, GEN8_GMUAO_RSCC_CONTROL_REQ, BIT(1));
+
+	/* Write request before polling */
+	wmb();
+
+	if (gmu_core_timed_poll_check(device, GEN8_GMUAO_RSCC_CONTROL_ACK,
+				BIT(1), 100, BIT(1))) {
+		dev_err(dev, "Failed to do GPU RSC power on\n");
+		return -ETIMEDOUT;
+	}
+
+	if (gen8_timed_poll_check_rscc(gmu, GEN8_RSCC_SEQ_BUSY_DRV0,
+				0x0, 100, UINT_MAX)) {
+		dev_err(dev, "GPU RSC sequence stuck in waking up GPU\n");
+		return -ETIMEDOUT;
+	}
+
+	gmu_core_regwrite(device, GEN8_GMUAO_RSCC_CONTROL_REQ, 0);
+
+	clear_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags);
+
+	return 0;
+}
+
+int gen8_rscc_sleep_sequence(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret;
+
+	if (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))
+		return 0;
+
+	if (test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags))
+		return 0;
+
+	gmu_core_regwrite(device, GEN8_GMUCX_CM3_SYSRESET, 1);
+	/* Make sure M3 is in reset before going on */
+	wmb();
+
+	gmu_core_regread(device, GEN8_GMUCX_GENERAL_9, &gmu->log_wptr_retention);
+
+	gmu_core_regwrite(device, GEN8_GMUAO_RSCC_CONTROL_REQ, BIT(0));
+	/* Make sure the request completes before continuing */
+	wmb();
+
+	ret = gen8_timed_poll_check_rscc(gmu, GEN8_GPU_RSCC_RSC_STATUS0_DRV0,
+			BIT(16), 100, BIT(16));
+	if (ret) {
+		dev_err(&gmu->pdev->dev, "GPU RSC power off fail\n");
+		return -ETIMEDOUT;
+	}
+
+	gmu_core_regwrite(device, GEN8_GMUAO_RSCC_CONTROL_REQ, 0);
+
+	set_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags);
+
+	return 0;
+}
+
+static struct kgsl_memdesc *find_gmu_memdesc(struct gen8_gmu_device *gmu,
+	u32 addr, u32 size)
+{
+	int i;
+
+	for (i = 0; i < gmu->global_entries; i++) {
+		struct kgsl_memdesc *md = &gmu->gmu_globals[i];
+
+		if ((addr >= md->gmuaddr) &&
+				(((addr + size) <= (md->gmuaddr + md->size))))
+			return md;
+	}
+
+	return NULL;
+}
+
+static int find_vma_block(struct gen8_gmu_device *gmu, u32 addr, u32 size)
+{
+	int i;
+
+	for (i = 0; i < GMU_MEM_TYPE_MAX; i++) {
+		struct gmu_vma_entry *vma = &gmu->vma[i];
+
+		if ((addr >= vma->start) &&
+			((addr + size) <= (vma->start + vma->size)))
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+static void load_tcm(struct adreno_device *adreno_dev, const u8 *src,
+	u32 tcm_start, u32 base, const struct gmu_block_header *blk)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 tcm_offset = tcm_start + ((blk->addr - base)/sizeof(u32));
+
+	kgsl_regmap_bulk_write(&device->regmap, tcm_offset, src,
+		blk->size >> 2);
+}
+
+int gen8_gmu_load_fw(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	const u8 *fw = (const u8 *)gmu->fw_image->data;
+
+	while (fw < gmu->fw_image->data + gmu->fw_image->size) {
+		const struct gmu_block_header *blk =
+					(const struct gmu_block_header *)fw;
+		int id;
+
+		fw += sizeof(*blk);
+
+		/* Don't deal with zero size blocks */
+		if (blk->size == 0)
+			continue;
+
+		id = find_vma_block(gmu, blk->addr, blk->size);
+
+		if (id < 0) {
+			dev_err(&gmu->pdev->dev,
+				"Unknown block in GMU FW addr:0x%x size:0x%x\n",
+				blk->addr, blk->size);
+			return -EINVAL;
+		}
+
+		if (id == GMU_ITCM) {
+			load_tcm(adreno_dev, fw,
+				GEN8_GMU_CM3_ITCM_START,
+				gmu->vma[GMU_ITCM].start, blk);
+		} else if (id == GMU_DTCM) {
+			load_tcm(adreno_dev, fw,
+				GEN8_GMU_CM3_DTCM_START,
+				gmu->vma[GMU_DTCM].start, blk);
+		} else {
+			struct kgsl_memdesc *md =
+				find_gmu_memdesc(gmu, blk->addr, blk->size);
+
+			if (!md) {
+				dev_err(&gmu->pdev->dev,
+					"No backing memory for GMU FW block addr:0x%x size:0x%x\n",
+					blk->addr, blk->size);
+				return -EINVAL;
+			}
+
+			memcpy(md->hostptr + (blk->addr - md->gmuaddr), fw,
+				blk->size);
+		}
+
+		fw += blk->size;
+	}
+
+	/* Proceed only after the FW is written */
+	wmb();
+	return 0;
+}
+
+static const char *oob_to_str(enum oob_request req)
+{
+	switch (req) {
+	case oob_gpu:
+		return "oob_gpu";
+	case oob_perfcntr:
+		return "oob_perfcntr";
+	case oob_boot_slumber:
+		return "oob_boot_slumber";
+	case oob_dcvs:
+		return "oob_dcvs";
+	default:
+		return "unknown";
+	}
+}
+
+static void trigger_reset_recovery(struct adreno_device *adreno_dev,
+	enum oob_request req)
+{
+	/*
+	 * Trigger recovery for perfcounter oob only since only
+	 * perfcounter oob can happen alongside an actively rendering gpu.
+	 */
+	if (req != oob_perfcntr)
+		return;
+
+	if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->fault)
+		adreno_dev->dispatch_ops->fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+}
+
+int gen8_gmu_oob_set(struct kgsl_device *device,
+		enum oob_request req)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret = 0;
+	int set, check;
+
+	if (req == oob_perfcntr && gmu->num_oob_perfcntr++)
+		return 0;
+
+	if (req >= oob_boot_slumber) {
+		dev_err(&gmu->pdev->dev,
+			"Unsupported OOB request %s\n",
+			oob_to_str(req));
+		return -EINVAL;
+	}
+
+	set = BIT(30 - req * 2);
+	check = BIT(31 - req);
+
+	gmu_core_regwrite(device, GEN8_GMUCX_HOST2GMU_INTR_SET, set);
+
+	if (gmu_core_timed_poll_check(device, GEN8_GMUCX_GMU2HOST_INTR_INFO, check,
+				100, check)) {
+		if (req == oob_perfcntr)
+			gmu->num_oob_perfcntr--;
+		gmu_core_fault_snapshot(device);
+		ret = -ETIMEDOUT;
+		WARN(1, "OOB request %s timed out\n", oob_to_str(req));
+		trigger_reset_recovery(adreno_dev, req);
+	}
+
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, check);
+
+	trace_kgsl_gmu_oob_set(set);
+	return ret;
+}
+
+void gen8_gmu_oob_clear(struct kgsl_device *device,
+		enum oob_request req)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int clear = BIT(31 - req * 2);
+
+	if (req == oob_perfcntr && --gmu->num_oob_perfcntr)
+		return;
+
+	if (req >= oob_boot_slumber) {
+		dev_err(&gmu->pdev->dev, "Unsupported OOB clear %s\n",
+				oob_to_str(req));
+		return;
+	}
+
+	gmu_core_regwrite(device, GEN8_GMUCX_HOST2GMU_INTR_SET, clear);
+	trace_kgsl_gmu_oob_clear(clear);
+}
+
+void gen8_gmu_irq_enable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hfi *hfi = &gmu->hfi;
+
+	/* Clear pending IRQs and Unmask needed IRQs */
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, UINT_MAX);
+	gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_CLR, UINT_MAX);
+
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_MASK,
+			(u32)~HFI_IRQ_MASK);
+	gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK,
+			(u32)~GMU_AO_INT_MASK);
+
+	/* Enable all IRQs on host */
+	enable_irq(hfi->irq);
+	enable_irq(gmu->irq);
+
+	if (device->cx_host_irq_num <= 0)
+		return;
+
+	/* Clear pending IRQs, unmask needed interrupts and enable CX host IRQ */
+	adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_CLEAR_CMD, UINT_MAX);
+	adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_0_MASK, GEN8_CX_MISC_INT_MASK);
+	enable_irq(device->cx_host_irq_num);
+}
+
+void gen8_gmu_irq_disable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hfi *hfi = &gmu->hfi;
+
+	/* Disable all IRQs on host */
+	disable_irq(gmu->irq);
+	disable_irq(hfi->irq);
+
+	/* Mask all IRQs and clear pending IRQs */
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_MASK, UINT_MAX);
+	gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK, UINT_MAX);
+
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, UINT_MAX);
+	gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_CLR, UINT_MAX);
+
+	if (device->cx_host_irq_num <= 0)
+		return;
+
+	/* Disable CX host IRQ, mask all interrupts and clear pending IRQs */
+	disable_irq(device->cx_host_irq_num);
+	adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_0_MASK, UINT_MAX);
+	adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_CLEAR_CMD, UINT_MAX);
+}
+
+static int gen8_gmu_hfi_start_msg(struct adreno_device *adreno_dev)
+{
+	struct hfi_start_cmd req;
+	int ret;
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_START);
+	if (ret)
+		return ret;
+
+	return gen8_hfi_send_generic_req(adreno_dev, &req, sizeof(req));
+}
+
+static u32 gen8_rscc_tcsm_drv0_status_reglist[] = {
+	GEN8_RSCC_TCS0_DRV0_STATUS,
+	GEN8_RSCC_TCS1_DRV0_STATUS,
+	GEN8_RSCC_TCS2_DRV0_STATUS,
+	GEN8_RSCC_TCS3_DRV0_STATUS,
+	GEN8_RSCC_TCS4_DRV0_STATUS,
+	GEN8_RSCC_TCS5_DRV0_STATUS,
+	GEN8_RSCC_TCS6_DRV0_STATUS,
+	GEN8_RSCC_TCS7_DRV0_STATUS,
+	GEN8_RSCC_TCS8_DRV0_STATUS,
+	GEN8_RSCC_TCS9_DRV0_STATUS,
+};
+
+static int gen8_complete_rpmh_votes(struct gen8_gmu_device *gmu,
+		u32 timeout)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < ARRAY_SIZE(gen8_rscc_tcsm_drv0_status_reglist); i++)
+		ret |= gen8_timed_poll_check_rscc(gmu,
+			gen8_rscc_tcsm_drv0_status_reglist[i], BIT(0), timeout,
+			BIT(0));
+
+	if (ret)
+		dev_err(&gmu->pdev->dev, "RPMH votes timedout: %d\n", ret);
+
+	return ret;
+}
+
+#define GX_GDSC_POWER_OFF	BIT(0)
+#define GX_CLK_OFF		BIT(1)
+#define is_on(val)		(!(val & (GX_GDSC_POWER_OFF | GX_CLK_OFF)))
+
+bool gen8_gmu_gx_is_on(struct adreno_device *adreno_dev)
+{
+	u32 val;
+
+	gmu_core_regread(KGSL_DEVICE(adreno_dev),
+			GEN8_GMUCX_GFX_PWR_CLK_STATUS, &val);
+	return is_on(val);
+}
+
+bool gen8_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device)
+{
+	u32 val;
+
+	gmu_core_regread(device, GEN8_GMUCX_RPMH_POWER_STATE, &val);
+	return (val == GPU_HW_ACTIVE) ? true : false;
+}
+
+static const char *idle_level_name(int level)
+{
+	if (level == GPU_HW_ACTIVE)
+		return "GPU_HW_ACTIVE";
+	else if (level == GPU_HW_IFPC)
+		return "GPU_HW_IFPC";
+
+	return "(Unknown)";
+}
+
+int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	u32 reg, reg1, reg2, reg3, reg4;
+	unsigned long t;
+	u64 ts1, ts2;
+
+	ts1 = gpudev->read_alwayson(adreno_dev);
+
+	t = jiffies + msecs_to_jiffies(100);
+	do {
+		gmu_core_regread(device,
+			GEN8_GMUCX_RPMH_POWER_STATE, &reg);
+		gmu_core_regread(device, GEN8_GMUCX_GFX_PWR_CLK_STATUS, &reg1);
+
+		/*
+		 * Check that we are at lowest level. If lowest level is IFPC
+		 * double check that GFX clock is off.
+		 */
+		if (gmu->idle_level == reg)
+			if (!(gmu->idle_level == GPU_HW_IFPC && is_on(reg1)))
+				return 0;
+
+		/* Wait 100us to reduce unnecessary AHB bus traffic */
+		usleep_range(10, 100);
+	} while (!time_after(jiffies, t));
+
+	/* Check one last time */
+	gmu_core_regread(device, GEN8_GMUCX_RPMH_POWER_STATE, &reg);
+	gmu_core_regread(device, GEN8_GMUCX_GFX_PWR_CLK_STATUS, &reg1);
+
+	/*
+	 * Check that we are at lowest level. If lowest level is IFPC
+	 * double check that GFX clock is off.
+	 */
+	if (gmu->idle_level == reg)
+		if (!(gmu->idle_level == GPU_HW_IFPC && is_on(reg1)))
+			return 0;
+
+	ts2 = gpudev->read_alwayson(adreno_dev);
+
+	/* Collect abort data to help with debugging */
+	gmu_core_regread(device, GEN8_GMUAO_GPU_CX_BUSY_STATUS, &reg2);
+	gmu_core_regread(device, GEN8_GMUAO_RBBM_INT_UNMASKED_STATUS_SHADOW, &reg3);
+	gmu_core_regread(device, GEN8_GMUCX_PWR_COL_KEEPALIVE, &reg4);
+
+	dev_err(&gmu->pdev->dev,
+		"----------------------[ GMU error ]----------------------\n");
+	dev_err(&gmu->pdev->dev, "Timeout waiting for lowest idle level %s\n",
+		idle_level_name(gmu->idle_level));
+	dev_err(&gmu->pdev->dev, "Start: %llx (absolute ticks)\n", ts1);
+	dev_err(&gmu->pdev->dev, "Poll: %llx (ticks relative to start)\n", ts2-ts1);
+	dev_err(&gmu->pdev->dev, "RPMH_POWER_STATE=%x GFX_PWR_CLK_STATUS=%x\n", reg, reg1);
+	dev_err(&gmu->pdev->dev, "CX_BUSY_STATUS=%x\n", reg2);
+	dev_err(&gmu->pdev->dev, "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", reg3, reg4);
+
+	/* Access GX registers only when GX is ON */
+	if (is_on(reg1)) {
+		gen8_regread_aperture(device, GEN8_CP_PIPE_STATUS_PIPE, &reg, PIPE_BV, 0, 0);
+		gen8_regread_aperture(device, GEN8_CP_PIPE_STATUS_PIPE, &reg1, PIPE_BR, 0, 0);
+		/* Clear aperture register */
+		gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+		kgsl_regread(device, GEN8_CP_CP2GMU_STATUS, &reg2);
+		kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, &reg3);
+
+		dev_err(&gmu->pdev->dev, "GEN8_CP_PIPE_STATUS_PIPE BV:%x BR:%x\n", reg, reg1);
+		dev_err(&gmu->pdev->dev, "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", reg2, reg3);
+	}
+
+	WARN_ON(1);
+	gmu_core_fault_snapshot(device);
+	return -ETIMEDOUT;
+}
+
+/* Bitmask for GPU idle status check */
+#define CXGXCPUBUSYIGNAHB	BIT(30)
+int gen8_gmu_wait_for_idle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	u32 status2;
+	u64 ts1;
+
+	ts1 = gpudev->read_alwayson(adreno_dev);
+	if (gmu_core_timed_poll_check(device, GEN8_GMUAO_GPU_CX_BUSY_STATUS,
+			0, 100, CXGXCPUBUSYIGNAHB)) {
+		gmu_core_regread(device,
+				GEN8_GMUAO_GPU_CX_BUSY_STATUS2, &status2);
+		dev_err(&gmu->pdev->dev,
+				"GMU not idling: status2=0x%x %llx %llx\n",
+				status2, ts1,
+				gpudev->read_alwayson(adreno_dev));
+		gmu_core_fault_snapshot(device);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+int gen8_gmu_version_info(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+
+	/* GMU version info is at a fixed offset in the DTCM */
+	gmu_core_regread(device, GEN8_GMU_CM3_DTCM_START + 0xff8,
+			&gmu->ver.core);
+	gmu_core_regread(device, GEN8_GMU_CM3_DTCM_START + 0xff9,
+			&gmu->ver.core_dev);
+	gmu_core_regread(device, GEN8_GMU_CM3_DTCM_START + 0xffa,
+			&gmu->ver.pwr);
+	gmu_core_regread(device, GEN8_GMU_CM3_DTCM_START + 0xffb,
+			&gmu->ver.pwr_dev);
+	gmu_core_regread(device, GEN8_GMU_CM3_DTCM_START + 0xffc,
+			&gmu->ver.hfi);
+
+	/* Check if gmu fw version on device is compatible with kgsl driver */
+	if (gmu->ver.core < gen8_core->gmu_fw_version) {
+		dev_err_once(&gmu->pdev->dev,
+			     "GMU FW version 0x%x error (expected 0x%x)\n",
+			     gmu->ver.core, gen8_core->gmu_fw_version);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int gen8_gmu_itcm_shadow(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 i, *dest;
+
+	if (gmu->itcm_shadow)
+		return 0;
+
+	gmu->itcm_shadow = vzalloc(gmu->vma[GMU_ITCM].size);
+	if (!gmu->itcm_shadow)
+		return -ENOMEM;
+
+	dest = (u32 *)gmu->itcm_shadow;
+
+	for (i = 0; i < (gmu->vma[GMU_ITCM].size >> 2); i++)
+		gmu_core_regread(KGSL_DEVICE(adreno_dev),
+			GEN8_GMU_CM3_ITCM_START + i, dest++);
+
+	return 0;
+}
+
+void gen8_gmu_register_config(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 val;
+
+	/* Clear any previously set cm3 fault */
+	atomic_set(&gmu->cm3_fault, 0);
+
+	/* Init the power state register before GMU turns on GX */
+	gmu_core_regwrite(device, GEN8_GMUCX_RPMH_POWER_STATE, 0xF);
+
+	/* Vote veto for FAL10 */
+	gmu_core_regwrite(device, GEN8_GMUCX_CX_FALNEXT_INTF, 0x1);
+	gmu_core_regwrite(device, GEN8_GMUCX_CX_FAL_INTF, 0x1);
+
+	/* Clear init result to make sure we are getting fresh value */
+	gmu_core_regwrite(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, 0);
+	gmu_core_regwrite(device, GEN8_GMUCX_CM3_BOOT_CONFIG, 0x2);
+
+	gmu_core_regwrite(device, GEN8_GMUCX_HFI_QTBL_ADDR,
+			gmu->hfi.hfi_mem->gmuaddr);
+	gmu_core_regwrite(device, GEN8_GMUCX_HFI_QTBL_INFO, 1);
+
+	gmu_core_regwrite(device, GEN8_GMUAO_AHB_FENCE_RANGE_0, BIT(31) |
+			FIELD_PREP(GENMASK(30, 18), 0x32) |
+			FIELD_PREP(GENMASK(17, 0), 0x8a0));
+
+	/*
+	 * Make sure that CM3 state is at reset value. Snapshot is changing
+	 * NMI bit and if we boot up GMU with NMI bit set GMU will boot
+	 * straight in to NMI handler without executing __main code
+	 */
+	gmu_core_regwrite(device, GEN8_GMUCX_CM3_CFG, 0x4052);
+
+	/* Set up GBIF registers from the GPU core definition */
+	kgsl_regmap_multi_write(&device->regmap, gen8_core->gbif,
+		gen8_core->gbif_count);
+
+	/**
+	 * We may have asserted gbif halt as part of reset sequence which may
+	 * not get cleared if the gdsc was not reset. So clear it before
+	 * attempting GMU boot.
+	 */
+	kgsl_regwrite(device, GEN8_GBIF_HALT, BIT(3));
+
+	/* Set vrb address before starting GMU */
+	if (!IS_ERR_OR_NULL(gmu->vrb))
+		gmu_core_regwrite(device, GEN8_GMUCX_GENERAL_11, gmu->vrb->gmuaddr);
+
+	/* Set the log wptr index */
+	gmu_core_regwrite(device, GEN8_GMUCX_GENERAL_9,
+			gmu->log_wptr_retention);
+
+	/* Pass chipid to GMU FW, must happen before starting GMU */
+	gmu_core_regwrite(device, GEN8_GMUCX_GENERAL_10,
+			ADRENO_GMU_REV(ADRENO_GPUREV(adreno_dev)));
+
+	/* Log size is encoded in (number of 4K units - 1) */
+	val = (gmu->gmu_log->gmuaddr & GENMASK(31, 12)) |
+		((GMU_LOG_SIZE/SZ_4K - 1) & GENMASK(7, 0));
+	gmu_core_regwrite(device, GEN8_GMUCX_GENERAL_8, val);
+
+	/* Configure power control and bring the GMU out of reset */
+	gen8_gmu_power_config(adreno_dev);
+
+	/*
+	 * Enable BCL throttling -
+	 * XOCLK1: countable: 0x13 (25% throttle)
+	 * XOCLK2: countable: 0x17 (58% throttle)
+	 * XOCLK3: countable: 0x19 (75% throttle)
+	 * POWER_CONTROL_SELECT_0 controls counters 0 - 3, each selector
+	 * is 8 bits wide.
+	 */
+	if (adreno_dev->bcl_enabled)
+		gmu_core_regrmw(device, GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0,
+			0xffffff00, FIELD_PREP(GENMASK(31, 24), 0x19) |
+			FIELD_PREP(GENMASK(23, 16), 0x17) |
+			FIELD_PREP(GENMASK(15, 8), 0x13));
+
+}
+
+static struct gmu_vma_node *find_va(struct gmu_vma_entry *vma, u32 addr, u32 size)
+{
+	struct rb_node *node = vma->vma_root.rb_node;
+
+	while (node != NULL) {
+		struct gmu_vma_node *data = rb_entry(node, struct gmu_vma_node, node);
+
+		if (addr + size <= data->va)
+			node = node->rb_left;
+		else if (addr >= data->va + data->size)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+/* Return true if VMA supports dynamic allocations */
+static bool vma_is_dynamic(int vma_id)
+{
+	/* Dynamic allocations are done in the GMU_NONCACHED_KERNEL space */
+	return vma_id == GMU_NONCACHED_KERNEL;
+}
+
+static int insert_va(struct gmu_vma_entry *vma, u32 addr, u32 size)
+{
+	struct rb_node **node, *parent = NULL;
+	struct gmu_vma_node *new = kzalloc(sizeof(*new), GFP_NOWAIT);
+
+	if (new == NULL)
+		return -ENOMEM;
+
+	new->va = addr;
+	new->size = size;
+
+	node = &vma->vma_root.rb_node;
+	while (*node != NULL) {
+		struct gmu_vma_node *this;
+
+		parent = *node;
+		this = rb_entry(parent, struct gmu_vma_node, node);
+
+		if (addr + size <= this->va)
+			node = &parent->rb_left;
+		else if (addr >= this->va + this->size)
+			node = &parent->rb_right;
+		else {
+			kfree(new);
+			return -EEXIST;
+		}
+	}
+
+	/* Add new node and rebalance tree */
+	rb_link_node(&new->node, parent, node);
+	rb_insert_color(&new->node, &vma->vma_root);
+
+	return 0;
+}
+
+static u32 find_unmapped_va(struct gmu_vma_entry *vma, u32 size, u32 va_align)
+{
+	struct rb_node *node = rb_first(&vma->vma_root);
+	u32 cur = vma->start;
+	bool found = false;
+
+	cur = ALIGN(cur, va_align);
+
+	while (node) {
+		struct gmu_vma_node *data = rb_entry(node, struct gmu_vma_node, node);
+
+		if (cur + size <= data->va) {
+			found = true;
+			break;
+		}
+
+		cur = ALIGN(data->va + data->size, va_align);
+		node = rb_next(node);
+	}
+
+	/* Do we have space after the last node? */
+	if (!found && (cur + size <= vma->start + vma->size))
+		found = true;
+	return found ? cur : 0;
+}
+
+static int _map_gmu_dynamic(struct gen8_gmu_device *gmu,
+	struct kgsl_memdesc *md,
+	u32 addr, u32 vma_id, int attrs, u32 align)
+{
+	int ret;
+	struct gmu_vma_entry *vma = &gmu->vma[vma_id];
+	struct gmu_vma_node *vma_node = NULL;
+	u32 size = ALIGN(md->size, hfi_get_gmu_sz_alignment(align));
+
+	spin_lock(&vma->lock);
+	if (!addr) {
+		/*
+		 * We will end up with a hole (GMU VA range not backed by physical mapping) if
+		 * the aligned size is greater than the size of the physical mapping
+		 */
+		addr = find_unmapped_va(vma, size, hfi_get_gmu_va_alignment(align));
+		if (addr == 0) {
+			spin_unlock(&vma->lock);
+			dev_err(&gmu->pdev->dev,
+				"Insufficient VA space size: %x\n", size);
+			return -ENOMEM;
+		}
+	}
+
+	ret = insert_va(vma, addr, size);
+	spin_unlock(&vma->lock);
+	if (ret < 0) {
+		dev_err(&gmu->pdev->dev,
+			"Could not insert va: %x size %x\n", addr, size);
+		return ret;
+	}
+
+	ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs);
+	if (!ret) {
+		md->gmuaddr = addr;
+		return 0;
+	}
+
+	/* Failed to map to GMU */
+	dev_err(&gmu->pdev->dev,
+		"Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n",
+		addr, md->size, ret);
+
+	spin_lock(&vma->lock);
+	vma_node = find_va(vma, md->gmuaddr, size);
+	if (vma_node)
+		rb_erase(&vma_node->node, &vma->vma_root);
+	spin_unlock(&vma->lock);
+	kfree(vma_node);
+
+	return ret;
+}
+
+static int _map_gmu_static(struct gen8_gmu_device *gmu,
+	struct kgsl_memdesc *md,
+	u32 addr, u32 vma_id, int attrs, u32 align)
+{
+	int ret;
+	struct gmu_vma_entry *vma = &gmu->vma[vma_id];
+	u32 size = ALIGN(md->size, hfi_get_gmu_sz_alignment(align));
+
+	if (!addr)
+		addr = ALIGN(vma->next_va, hfi_get_gmu_va_alignment(align));
+
+	ret = gmu_core_map_memdesc(gmu->domain, md, addr, attrs);
+	if (ret) {
+		dev_err(&gmu->pdev->dev,
+			"Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n",
+			addr, md->size, ret);
+		return ret;
+	}
+	md->gmuaddr = addr;
+	/*
+	 * We will end up with a hole (GMU VA range not backed by physical mapping) if the aligned
+	 * size is greater than the size of the physical mapping
+	 */
+	vma->next_va = md->gmuaddr + size;
+	return 0;
+}
+
+static int _map_gmu(struct gen8_gmu_device *gmu,
+	struct kgsl_memdesc *md,
+	u32 addr, u32 vma_id, int attrs, u32 align)
+{
+	return vma_is_dynamic(vma_id) ?
+			_map_gmu_dynamic(gmu, md, addr, vma_id, attrs, align) :
+			_map_gmu_static(gmu, md, addr, vma_id, attrs, align);
+}
+
+int gen8_gmu_import_buffer(struct gen8_gmu_device *gmu, u32 vma_id,
+				struct kgsl_memdesc *md, u32 attrs, u32 align)
+{
+	return _map_gmu(gmu, md, 0, vma_id, attrs, align);
+}
+
+struct kgsl_memdesc *gen8_reserve_gmu_kernel_block(struct gen8_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, u32 align)
+{
+	int ret;
+	struct kgsl_memdesc *md;
+	struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu));
+	int attrs = IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV;
+
+	if (gmu->global_entries == ARRAY_SIZE(gmu->gmu_globals))
+		return ERR_PTR(-ENOMEM);
+
+	md = &gmu->gmu_globals[gmu->global_entries];
+
+	ret = kgsl_allocate_kernel(device, md, size, 0, KGSL_MEMDESC_SYSMEM);
+	if (ret) {
+		memset(md, 0x0, sizeof(*md));
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret = _map_gmu(gmu, md, addr, vma_id, attrs, align);
+	if (ret) {
+		kgsl_sharedmem_free(md);
+		memset(md, 0x0, sizeof(*md));
+		return ERR_PTR(ret);
+	}
+
+	gmu->global_entries++;
+
+	return md;
+}
+
+struct kgsl_memdesc *gen8_reserve_gmu_kernel_block_fixed(struct gen8_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, const char *resource, int attrs, u32 align)
+{
+	int ret;
+	struct kgsl_memdesc *md;
+	struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu));
+
+	if (gmu->global_entries == ARRAY_SIZE(gmu->gmu_globals))
+		return ERR_PTR(-ENOMEM);
+
+	md = &gmu->gmu_globals[gmu->global_entries];
+
+	ret = kgsl_memdesc_init_fixed(device, gmu->pdev, resource, md);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ret = _map_gmu(gmu, md, addr, vma_id, attrs, align);
+
+	sg_free_table(md->sgt);
+	kfree(md->sgt);
+	md->sgt = NULL;
+
+	if (!ret)
+		gmu->global_entries++;
+	else {
+		dev_err(&gmu->pdev->dev,
+			"Unable to map GMU kernel block: addr:0x%08x size:0x%llx :%d\n",
+			addr, md->size, ret);
+		memset(md, 0x0, sizeof(*md));
+		md = ERR_PTR(ret);
+	}
+	return md;
+}
+
+int gen8_alloc_gmu_kernel_block(struct gen8_gmu_device *gmu,
+	struct kgsl_memdesc *md, u32 size, u32 vma_id, int attrs)
+{
+	int ret;
+	struct kgsl_device *device = KGSL_DEVICE(gen8_gmu_to_adreno(gmu));
+
+	ret = kgsl_allocate_kernel(device, md, size, 0, KGSL_MEMDESC_SYSMEM);
+	if (ret)
+		return ret;
+
+	ret = _map_gmu(gmu, md, 0, vma_id, attrs, 0);
+	if (ret)
+		kgsl_sharedmem_free(md);
+
+	return ret;
+}
+
+void gen8_free_gmu_block(struct gen8_gmu_device *gmu, struct kgsl_memdesc *md)
+{
+	int vma_id = find_vma_block(gmu, md->gmuaddr, md->size);
+	struct gmu_vma_entry *vma;
+	struct gmu_vma_node *vma_node;
+
+	if ((vma_id < 0) || !vma_is_dynamic(vma_id))
+		return;
+
+	vma = &gmu->vma[vma_id];
+
+	/*
+	 * Do not remove the vma node if we failed to unmap the entire buffer. This is because the
+	 * iommu driver considers remapping an already mapped iova as fatal.
+	 */
+	if (md->size != iommu_unmap(gmu->domain, md->gmuaddr, md->size))
+		goto free;
+
+	spin_lock(&vma->lock);
+	vma_node = find_va(vma, md->gmuaddr, md->size);
+	if (vma_node)
+		rb_erase(&vma_node->node, &vma->vma_root);
+	spin_unlock(&vma->lock);
+	kfree(vma_node);
+free:
+	kgsl_sharedmem_free(md);
+}
+
+static int gen8_gmu_process_prealloc(struct gen8_gmu_device *gmu,
+	struct gmu_block_header *blk)
+{
+	struct kgsl_memdesc *md;
+
+	int id = find_vma_block(gmu, blk->addr, blk->value);
+
+	if (id < 0) {
+		dev_err(&gmu->pdev->dev,
+			"Invalid prealloc block addr: 0x%x value:%d\n",
+			blk->addr, blk->value);
+		return id;
+	}
+
+	/* Nothing to do for TCM blocks or user uncached */
+	if (id == GMU_ITCM || id == GMU_DTCM || id == GMU_NONCACHED_USER)
+		return 0;
+
+	/* Check if the block is already allocated */
+	md = find_gmu_memdesc(gmu, blk->addr, blk->value);
+	if (md != NULL)
+		return 0;
+
+	md = gen8_reserve_gmu_kernel_block(gmu, blk->addr, blk->value, id, 0);
+
+	return PTR_ERR_OR_ZERO(md);
+}
+
+int gen8_gmu_parse_fw(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	struct gmu_block_header *blk;
+	int ret, offset = 0;
+	const char *gmufw_name = gen8_core->gmufw_name;
+
+	/*
+	 * If GMU fw already saved and verified, do nothing new.
+	 * Skip only request_firmware and allow preallocation to
+	 * ensure in scenario where GMU request firmware succeeded
+	 * but preallocation fails, we don't return early without
+	 * successful preallocations on next open call.
+	 */
+	if (!gmu->fw_image) {
+
+		if (gen8_core->gmufw_name == NULL)
+			return -EINVAL;
+
+		ret = request_firmware(&gmu->fw_image, gmufw_name,
+				&gmu->pdev->dev);
+		if (ret) {
+			dev_err(&gmu->pdev->dev, "request_firmware (%s) failed: %d\n",
+					gmufw_name, ret);
+			return ret;
+		}
+	}
+
+	/*
+	 * Zero payload fw blocks contain metadata and are
+	 * guaranteed to precede fw load data. Parse the
+	 * metadata blocks.
+	 */
+	while (offset < gmu->fw_image->size) {
+		blk = (struct gmu_block_header *)&gmu->fw_image->data[offset];
+
+		if (offset + sizeof(*blk) > gmu->fw_image->size) {
+			dev_err(&gmu->pdev->dev, "Invalid FW Block\n");
+			return -EINVAL;
+		}
+
+		/* Done with zero length blocks so return */
+		if (blk->size)
+			break;
+
+		offset += sizeof(*blk);
+
+		if (blk->type == GMU_BLK_TYPE_PREALLOC_REQ ||
+			blk->type == GMU_BLK_TYPE_PREALLOC_PERSIST_REQ) {
+			ret = gen8_gmu_process_prealloc(gmu, blk);
+
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+int gen8_gmu_memory_init(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	/* GMU master log */
+	if (IS_ERR_OR_NULL(gmu->gmu_log))
+		gmu->gmu_log = gen8_reserve_gmu_kernel_block(gmu, 0,
+				GMU_LOG_SIZE, GMU_NONCACHED_KERNEL, 0);
+
+	return PTR_ERR_OR_ZERO(gmu->gmu_log);
+}
+
+static int gen8_gmu_init(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = gen8_gmu_parse_fw(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_gmu_memory_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	return gen8_hfi_init(adreno_dev);
+}
+
+static void _do_gbif_halt(struct kgsl_device *device, u32 reg, u32 ack_reg,
+	u32 mask, const char *client)
+{
+	u32 ack;
+	unsigned long t;
+
+	kgsl_regwrite(device, reg, mask);
+
+	t = jiffies + msecs_to_jiffies(100);
+	do {
+		kgsl_regread(device, ack_reg, &ack);
+		if ((ack & mask) == mask)
+			return;
+
+		/*
+		 * If we are attempting recovery in case of stall-on-fault
+		 * then the halt sequence will not complete as long as SMMU
+		 * is stalled.
+		 */
+		kgsl_mmu_pagefault_resume(&device->mmu, false);
+
+		usleep_range(10, 100);
+	} while (!time_after(jiffies, t));
+
+	/* Check one last time */
+	kgsl_mmu_pagefault_resume(&device->mmu, false);
+
+	kgsl_regread(device, ack_reg, &ack);
+	if ((ack & mask) == mask)
+		return;
+
+	dev_err(device->dev, "%s GBIF halt timed out\n", client);
+}
+
+static void gen8_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev)
+{
+	int ret = 0;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	/* Disconnect GPU from BUS is not needed if CX GDSC goes off later */
+
+	/*
+	 * GEMNOC can enter power collapse state during GPU power down sequence.
+	 * This could abort CX GDSC collapse. Assert Qactive to avoid this.
+	 */
+	gmu_core_regwrite(device, GEN8_GMUCX_CX_FALNEXT_INTF, 0x1);
+
+	/* Check no outstanding RPMh voting */
+	gen8_complete_rpmh_votes(gmu, 1);
+
+	/* Clear the WRITEDROPPED fields and set fence to allow mode */
+	gmu_core_regwrite(device, GEN8_GMUAO_AHB_FENCE_STATUS_CLR, 0x7);
+	gmu_core_regwrite(device, GEN8_GMUAO_AHB_FENCE_CTRL, 0);
+
+	/* Make sure above writes are committed before we proceed to recovery */
+	wmb();
+
+	gmu_core_regwrite(device, GEN8_GMUCX_CM3_SYSRESET, 1);
+
+	/* Halt GX traffic */
+	if (gen8_gmu_gx_is_on(adreno_dev))
+		_do_gbif_halt(device, GEN8_RBBM_GBIF_HALT,
+				GEN8_RBBM_GBIF_HALT_ACK,
+				GEN8_GBIF_GX_HALT_MASK,
+				"GX");
+
+	/* Halt CX traffic */
+	_do_gbif_halt(device, GEN8_GBIF_HALT, GEN8_GBIF_HALT_ACK,
+			GEN8_GBIF_ARB_HALT_MASK, "CX");
+
+	if (gen8_gmu_gx_is_on(adreno_dev))
+		kgsl_regwrite(device, GEN8_RBBM_SW_RESET_CMD, 0x1);
+
+	/* Allow the software reset to complete */
+	udelay(100);
+
+	/*
+	 * This is based on the assumption that GMU is the only one controlling
+	 * the GX HS. This code path is the only client voting for GX through
+	 * the regulator interface.
+	 */
+	if (pwr->gx_gdsc) {
+		if (gen8_gmu_gx_is_on(adreno_dev)) {
+			/* Switch gx gdsc control from GMU to CPU
+			 * force non-zero reference count in clk driver
+			 * so next disable call will turn
+			 * off the GDSC
+			 */
+			ret = regulator_enable(pwr->gx_gdsc);
+			if (ret)
+				dev_err(&gmu->pdev->dev,
+					"suspend fail: gx enable %d\n", ret);
+
+			ret = regulator_disable(pwr->gx_gdsc);
+			if (ret)
+				dev_err(&gmu->pdev->dev,
+					"suspend fail: gx disable %d\n", ret);
+
+			if (gen8_gmu_gx_is_on(adreno_dev))
+				dev_err(&gmu->pdev->dev,
+					"gx is stuck on\n");
+		}
+	}
+}
+
+/*
+ * gen8_gmu_notify_slumber() - initiate request to GMU to prepare to slumber
+ * @device: Pointer to KGSL device
+ */
+static int gen8_gmu_notify_slumber(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int bus_level = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq;
+	int perf_idx = gmu->dcvs_table.gpu_level_num -
+			pwr->default_pwrlevel - 1;
+	struct hfi_prep_slumber_cmd req = {
+		.freq = perf_idx,
+		.bw = bus_level,
+	};
+	int ret;
+
+	req.bw |= gen8_bus_ab_quantize(adreno_dev, 0);
+
+	/* Disable the power counter so that the GMU is not busy */
+	gmu_core_regwrite(device, GEN8_GMUCX_POWER_COUNTER_ENABLE, 0);
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_PREPARE_SLUMBER);
+	if (ret)
+		return ret;
+
+	ret = gen8_hfi_send_generic_req(adreno_dev, &req, sizeof(req));
+
+	/* Make sure the fence is in ALLOW mode */
+	gmu_core_regwrite(device, GEN8_GMUAO_AHB_FENCE_CTRL, 0);
+
+	/*
+	 * GEMNOC can enter power collapse state during GPU power down sequence.
+	 * This could abort CX GDSC collapse. Assert Qactive to avoid this.
+	 */
+	gmu_core_regwrite(device, GEN8_GMUCX_CX_FALNEXT_INTF, 0x1);
+
+	return ret;
+}
+
+void gen8_gmu_suspend(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	gen8_gmu_pwrctrl_suspend(adreno_dev);
+
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen8_rdpm_cx_freq_update(gmu, 0);
+
+	dev_err(&gmu->pdev->dev, "Suspended GMU\n");
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE);
+}
+
+static int gen8_gmu_dcvs_set(struct adreno_device *adreno_dev,
+		int gpu_pwrlevel, int bus_level, u32 ab)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_dcvs_table *table = &gmu->dcvs_table;
+	struct hfi_gx_bw_perf_vote_cmd req = {
+		.ack_type = DCVS_ACK_BLOCK,
+		.freq = INVALID_DCVS_IDX,
+		.bw = INVALID_DCVS_IDX,
+	};
+	int ret = 0;
+
+	if (!test_bit(GMU_PRIV_HFI_STARTED, &gmu->flags))
+		return 0;
+
+	/* Do not set to XO and lower GPU clock vote from GMU */
+	if ((gpu_pwrlevel != INVALID_DCVS_IDX) &&
+			(gpu_pwrlevel >= table->gpu_level_num - 1))
+		return -EINVAL;
+
+	if (gpu_pwrlevel < table->gpu_level_num - 1)
+		req.freq = table->gpu_level_num - gpu_pwrlevel - 1;
+
+	if (bus_level < pwr->ddr_table_count && bus_level > 0)
+		req.bw = bus_level;
+
+	req.bw |=  gen8_bus_ab_quantize(adreno_dev, ab);
+
+	/* GMU will vote for slumber levels through the sleep sequence */
+	if ((req.freq == INVALID_DCVS_IDX) && (req.bw == INVALID_BW_VOTE))
+		return 0;
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_GX_BW_PERF_VOTE);
+	if (ret)
+		return ret;
+
+	ret = gen8_hfi_send_generic_req(adreno_dev, &req, sizeof(req));
+	if (ret) {
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"Failed to set GPU perf idx %d, bw idx %d\n",
+			req.freq, req.bw);
+
+		/*
+		 * If this was a dcvs request along side an active gpu, request
+		 * dispatcher based reset and recovery.
+		 */
+		if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+			adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT |
+				ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	}
+
+	if (req.freq != INVALID_DCVS_IDX)
+		gen8_rdpm_mx_freq_update(gmu,
+			gmu->dcvs_table.gx_votes[req.freq].freq);
+
+	return ret;
+}
+
+static int gen8_gmu_clock_set(struct adreno_device *adreno_dev, u32 pwrlevel)
+{
+	return gen8_gmu_dcvs_set(adreno_dev, pwrlevel, INVALID_DCVS_IDX, INVALID_AB_VALUE);
+}
+
+static int gen8_gmu_ifpc_store(struct kgsl_device *device,
+		u32 val)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 requested_idle_level;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_IFPC))
+		return -EINVAL;
+
+	if (val)
+		requested_idle_level = GPU_HW_IFPC;
+	else
+		requested_idle_level = GPU_HW_ACTIVE;
+
+	if (gmu->idle_level == requested_idle_level)
+		return 0;
+
+	/* Power down the GPU before changing the idle level */
+	return adreno_power_cycle_u32(adreno_dev, &gmu->idle_level,
+		requested_idle_level);
+}
+
+static u32 gen8_gmu_ifpc_isenabled(struct kgsl_device *device)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(ADRENO_DEVICE(device));
+
+	return gmu->idle_level == GPU_HW_IFPC;
+}
+
+/* Send an NMI to the GMU */
+void gen8_gmu_send_nmi(struct kgsl_device *device, bool force)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 result;
+
+	/*
+	 * Do not send NMI if the SMMU is stalled because GMU will not be able
+	 * to save cm3 state to DDR.
+	 */
+	if (gen8_gmu_gx_is_on(adreno_dev) && adreno_smmu_is_stalled(adreno_dev)) {
+		dev_err(&gmu->pdev->dev,
+			"Skipping NMI because SMMU is stalled\n");
+		return;
+	}
+
+	if (force)
+		goto nmi;
+
+	/*
+	 * We should not send NMI if there was a CM3 fault reported because we
+	 * don't want to overwrite the critical CM3 state captured by gmu before
+	 * it sent the CM3 fault interrupt. Also don't send NMI if GMU reset is
+	 * already active. We could have hit a GMU assert and NMI might have
+	 * already been triggered.
+	 */
+
+	/* make sure we're reading the latest cm3_fault */
+	smp_rmb();
+
+	if (atomic_read(&gmu->cm3_fault))
+		return;
+
+	gmu_core_regread(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, &result);
+
+	if (result & 0xE00)
+		return;
+
+nmi:
+	/* Mask so there's no interrupt caused by NMI */
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_MASK, UINT_MAX);
+
+	/* Make sure the interrupt is masked before causing it */
+	wmb();
+
+	/* This will cause the GMU to save it's internal state to ddr */
+	gmu_core_regrmw(device, GEN8_GMUCX_CM3_CFG, BIT(9), BIT(9));
+
+	/* Make sure the NMI is invoked before we proceed*/
+	wmb();
+
+	/* Wait for the NMI to be handled */
+	udelay(200);
+}
+
+static void gen8_gmu_cooperative_reset(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 result;
+
+	gmu_core_regwrite(device, GEN8_GMUCX_WDOG_CTRL, 0);
+	gmu_core_regwrite(device, GEN8_GMUCX_HOST2GMU_INTR_SET, BIT(17));
+
+	/*
+	 * After triggering graceful death wait for snapshot ready
+	 * indication from GMU.
+	 */
+	if (!gmu_core_timed_poll_check(device, GEN8_GMUCX_CM3_FW_INIT_RESULT,
+				0x800, 2, 0x800))
+		return;
+
+	gmu_core_regread(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, &result);
+	dev_err(&gmu->pdev->dev,
+		"GMU cooperative reset timed out 0x%x\n", result);
+	/*
+	 * If we dont get a snapshot ready from GMU, trigger NMI
+	 * and if we still timeout then we just continue with reset.
+	 */
+	gen8_gmu_send_nmi(device, true);
+
+	gmu_core_regread(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, &result);
+	if ((result & 0x800) != 0x800)
+		dev_err(&gmu->pdev->dev,
+			"GMU cooperative reset NMI timed out 0x%x\n", result);
+}
+
+static int gen8_gmu_wait_for_active_transition(struct kgsl_device *device)
+{
+	u32 reg;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(ADRENO_DEVICE(device));
+
+	if (gmu_core_timed_poll_check(device, GEN8_GMUCX_RPMH_POWER_STATE,
+			GPU_HW_ACTIVE, 100, GENMASK(3, 0))) {
+		gmu_core_regread(device, GEN8_GMUCX_RPMH_POWER_STATE, &reg);
+		dev_err(&gmu->pdev->dev,
+			"GMU failed to move to ACTIVE state, Current state: 0x%x\n",
+			reg);
+
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static bool gen8_gmu_scales_bandwidth(struct kgsl_device *device)
+{
+	return true;
+}
+
+void gen8_gmu_handle_watchdog(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 mask;
+
+	/* Temporarily mask the watchdog interrupt to prevent a storm */
+	gmu_core_regread(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK, &mask);
+	gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK,
+			(mask | GMU_INT_WDOG_BITE));
+
+	gen8_gmu_send_nmi(device, false);
+
+	dev_err_ratelimited(&gmu->pdev->dev,
+			"GMU watchdog expired interrupt received\n");
+}
+
+static irqreturn_t gen8_gmu_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	const struct gen8_gpudev *gen8_gpudev =
+		to_gen8_gpudev(ADRENO_GPU_DEVICE(adreno_dev));
+	u32 status = 0;
+
+	gmu_core_regread(device, GEN8_GMUAO_AO_HOST_INTERRUPT_STATUS, &status);
+	gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_CLR, status);
+
+	if (status & GMU_INT_HOST_AHB_BUS_ERR)
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"AHB bus error interrupt received\n");
+
+	if (status & GMU_INT_WDOG_BITE)
+		gen8_gpudev->handle_watchdog(adreno_dev);
+
+	if (status & GMU_INT_FENCE_ERR) {
+		u32 fence_status;
+
+		gmu_core_regread(device, GEN8_GMUAO_AHB_FENCE_STATUS,
+			&fence_status);
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"FENCE error interrupt received %x\n", fence_status);
+	}
+
+	if (status & ~GMU_AO_INT_MASK)
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"Unhandled GMU interrupts 0x%lx\n",
+				status & ~GMU_AO_INT_MASK);
+
+	return IRQ_HANDLED;
+}
+
+void gen8_gmu_aop_send_acd_state(struct gen8_gmu_device *gmu, bool flag)
+{
+	char msg_buf[36];
+	u32 size;
+	int ret;
+
+	if (IS_ERR_OR_NULL(gmu->qmp))
+		return;
+
+	size = scnprintf(msg_buf, sizeof(msg_buf),
+			"{class: gpu, res: acd, val: %d}", flag);
+
+	ret = qmp_send(gmu->qmp, msg_buf, ALIGN((size + 1), SZ_4));
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+			"AOP qmp send message failed: %d\n", ret);
+}
+
+int gen8_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	gen8_rdpm_cx_freq_update(gmu, gmu->freqs[level] / 1000);
+
+	ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk",
+			gmu->freqs[level]);
+	if (ret) {
+		dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n",
+			gmu->freqs[level], ret);
+		return ret;
+	}
+
+	ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "hub_clk",
+			adreno_dev->gmu_hub_clk_freq);
+	if (ret && ret != -ENODEV) {
+		dev_err(&gmu->pdev->dev, "Unable to set the HUB clock\n");
+		return ret;
+	}
+
+	ret = clk_bulk_prepare_enable(gmu->num_clks, gmu->clks);
+	if (ret) {
+		dev_err(&gmu->pdev->dev, "Cannot enable GMU clocks\n");
+		return ret;
+	}
+
+	device->state = KGSL_STATE_AWARE;
+
+	return 0;
+}
+
+static int gen8_gmu_first_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int level, ret;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE);
+
+	gen8_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled);
+
+	ret = kgsl_pwrctrl_enable_cx_gdsc(device);
+	if (ret)
+		return ret;
+
+	ret = gen8_gmu_enable_clks(adreno_dev, 0);
+	if (ret)
+		goto gdsc_off;
+
+	/*
+	 * Enable AHB timeout detection to catch any register access taking longer
+	 * time before NOC timeout gets detected. Enable this logic before any
+	 * register access which happens to be just after enabling clocks.
+	 */
+	gen8_enable_ahb_timeout_detection(adreno_dev);
+
+	/* Initialize the CX timer */
+	gen8_cx_timer_init(adreno_dev);
+
+	ret = gen8_gmu_load_fw(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = gen8_gmu_version_info(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = gen8_gmu_itcm_shadow(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = gen8_scm_gpu_init_cx_regs(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	gen8_gmu_register_config(adreno_dev);
+
+	gen8_gmu_irq_enable(adreno_dev);
+
+	/* Vote for minimal DDR BW for GMU to init */
+	level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min;
+	icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level]));
+
+	/* Clear any GPU faults that might have been left over */
+	adreno_clear_gpu_fault(adreno_dev);
+
+	ret = gen8_gmu_device_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen8_gmu_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	gen8_get_gpu_feature_info(adreno_dev);
+
+	ret = gen8_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	if (gen8_hfi_send_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE, 0) == 1 &&
+		!WARN_ONCE(!adreno_dev->gpucore->num_ddr_channels,
+			"Number of DDR channel is not specified in gpu core")) {
+		adreno_dev->gmu_ab = true;
+		set_bit(ADRENO_DEVICE_GMU_AB, &adreno_dev->priv);
+	}
+
+	icc_set_bw(pwr->icc_path, 0, 0);
+
+	device->gmu_fault = false;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+
+	return 0;
+
+err:
+	gen8_gmu_irq_disable(adreno_dev);
+
+	if (device->gmu_fault) {
+		gen8_gmu_suspend(adreno_dev);
+		return ret;
+	}
+
+clks_gdsc_off:
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+gdsc_off:
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen8_rdpm_cx_freq_update(gmu, 0);
+
+	return ret;
+}
+
+static int gen8_gmu_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret = 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE);
+
+	ret = kgsl_pwrctrl_enable_cx_gdsc(device);
+	if (ret)
+		return ret;
+
+	ret = gen8_gmu_enable_clks(adreno_dev, 0);
+	if (ret)
+		goto gdsc_off;
+
+	/*
+	 * Enable AHB timeout detection to catch any register access taking longer
+	 * time before NOC timeout gets detected. Enable this logic before any
+	 * register access which happens to be just after enabling clocks.
+	 */
+	gen8_enable_ahb_timeout_detection(adreno_dev);
+
+	/* Initialize the CX timer */
+	gen8_cx_timer_init(adreno_dev);
+
+	ret = gen8_rscc_wakeup_sequence(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = gen8_gmu_load_fw(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	gen8_gmu_register_config(adreno_dev);
+
+	gen8_gmu_irq_enable(adreno_dev);
+
+	/* Clear any GPU faults that might have been left over */
+	adreno_clear_gpu_fault(adreno_dev);
+
+	ret = gen8_gmu_device_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen8_gmu_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen8_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	device->gmu_fault = false;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+
+	return 0;
+
+err:
+	gen8_gmu_irq_disable(adreno_dev);
+
+	if (device->gmu_fault) {
+		gen8_gmu_suspend(adreno_dev);
+		return ret;
+	}
+
+clks_gdsc_off:
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+gdsc_off:
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen8_rdpm_cx_freq_update(gmu, 0);
+
+	return ret;
+}
+
+static void set_acd(struct adreno_device *adreno_dev, void *priv)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	adreno_dev->acd_enabled = *((bool *)priv);
+	gen8_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled);
+}
+
+static int gen8_gmu_acd_set(struct kgsl_device *device, bool val)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	if (IS_ERR_OR_NULL(gmu->qmp))
+		return -EINVAL;
+
+	/* Don't do any unneeded work if ACD is already in the correct state */
+	if (adreno_dev->acd_enabled == val)
+		return 0;
+
+	/* Power cycle the GPU for changes to take effect */
+	return adreno_power_cycle(adreno_dev, set_acd, &val);
+}
+
+#define BCL_RESP_TYPE_MASK   BIT(0)
+#define BCL_SID0_MASK        GENMASK(7, 1)
+#define BCL_SID1_MASK        GENMASK(14, 8)
+#define BCL_SID2_MASK        GENMASK(21, 15)
+
+static int gen8_bcl_sid_set(struct kgsl_device *device, u32 sid_id, u64 sid_val)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u32 bcl_data, val = (u32) sid_val;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_BCL) ||
+		!FIELD_GET(BCL_RESP_TYPE_MASK, adreno_dev->bcl_data))
+		return -EINVAL;
+
+	switch (sid_id) {
+	case 0:
+		adreno_dev->bcl_data &= ~BCL_SID0_MASK;
+		bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID0_MASK, val);
+		break;
+	case 1:
+		adreno_dev->bcl_data &= ~BCL_SID1_MASK;
+		bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID1_MASK, val);
+		break;
+	case 2:
+		adreno_dev->bcl_data &= ~BCL_SID2_MASK;
+		bcl_data = adreno_dev->bcl_data | FIELD_PREP(BCL_SID2_MASK, val);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return adreno_power_cycle_u32(adreno_dev, &adreno_dev->bcl_data, bcl_data);
+}
+
+static u64 gen8_bcl_sid_get(struct kgsl_device *device, u32 sid_id)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_BCL) ||
+	    !FIELD_GET(BCL_RESP_TYPE_MASK, adreno_dev->bcl_data))
+		return 0;
+
+	switch (sid_id) {
+	case 0:
+		return ((u64) FIELD_GET(BCL_SID0_MASK, adreno_dev->bcl_data));
+	case 1:
+		return ((u64) FIELD_GET(BCL_SID1_MASK, adreno_dev->bcl_data));
+	case 2:
+		return ((u64) FIELD_GET(BCL_SID2_MASK, adreno_dev->bcl_data));
+	default:
+		return 0;
+	}
+}
+
+static void gen8_send_tlb_hint(struct kgsl_device *device, bool val)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	if (!gmu->domain)
+		return;
+
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+	qcom_skip_tlb_management(&gmu->pdev->dev, val);
+#endif
+	if (!val)
+		iommu_flush_iotlb_all(gmu->domain);
+}
+
+static const struct gmu_dev_ops gen8_gmudev = {
+	.oob_set = gen8_gmu_oob_set,
+	.oob_clear = gen8_gmu_oob_clear,
+	.ifpc_store = gen8_gmu_ifpc_store,
+	.ifpc_isenabled = gen8_gmu_ifpc_isenabled,
+	.cooperative_reset = gen8_gmu_cooperative_reset,
+	.wait_for_active_transition = gen8_gmu_wait_for_active_transition,
+	.scales_bandwidth = gen8_gmu_scales_bandwidth,
+	.acd_set = gen8_gmu_acd_set,
+	.bcl_sid_set = gen8_bcl_sid_set,
+	.bcl_sid_get = gen8_bcl_sid_get,
+	.send_nmi = gen8_gmu_send_nmi,
+	.send_tlb_hint = gen8_send_tlb_hint,
+};
+
+static int gen8_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel,
+	u32 ab)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int ret = 0;
+
+	if (buslevel == pwr->cur_buslevel)
+		buslevel = INVALID_DCVS_IDX;
+
+	if ((ab == pwr->cur_ab) || (ab == 0))
+		ab = INVALID_AB_VALUE;
+
+	if ((ab == INVALID_AB_VALUE) && (buslevel == INVALID_DCVS_IDX))
+		return 0;
+
+	ret = gen8_gmu_dcvs_set(adreno_dev, INVALID_DCVS_IDX,
+			buslevel, ab);
+	if (ret)
+		return ret;
+
+	if (buslevel != INVALID_DCVS_IDX)
+		pwr->cur_buslevel = buslevel;
+
+	if (ab != INVALID_AB_VALUE) {
+		if (!adreno_dev->gmu_ab)
+			icc_set_bw(pwr->icc_path, MBps_to_icc(ab), 0);
+		pwr->cur_ab = ab;
+	}
+
+	trace_kgsl_buslevel(device, pwr->active_pwrlevel, pwr->cur_buslevel, pwr->cur_ab);
+	return ret;
+}
+
+u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab)
+{
+	u16 vote = 0;
+	u32 max_bw, max_ab;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	if (!adreno_dev->gmu_ab || (ab == INVALID_AB_VALUE))
+		return (FIELD_PREP(GENMASK(31, 16), INVALID_AB_VALUE));
+
+	/*
+	 * max ddr bandwidth (kbps) = (Max bw in kbps per channel * number of channel)
+	 * max ab (Mbps) = max ddr bandwidth (kbps) / 1000
+	 */
+	max_bw = pwr->ddr_table[pwr->ddr_table_count - 1] * adreno_dev->gpucore->num_ddr_channels;
+	max_ab = max_bw / 1000;
+
+	/*
+	 * If requested AB is higher than theoretical max bandwidth, set AB vote as max
+	 * allowable quantized AB value.
+	 *
+	 * Power FW supports a 16 bit AB BW level. We can quantize the entire vote-able BW
+	 * range to a 16 bit space and the quantized value can be used to vote for AB though
+	 * GMU. Quantization can be performed as below.
+	 *
+	 * quantized_vote = (ab vote (kbps) * 2^16) / max ddr bandwidth (kbps)
+	 */
+	if (ab >= max_ab)
+		vote = MAX_AB_VALUE;
+	else
+		vote = (u16)(((u64)ab * 1000 * (1 << 16)) / max_bw);
+
+	/*
+	 * Vote will be calculated as 0 for smaller AB values.
+	 * Set a minimum non-zero vote in such cases.
+	 */
+	if (ab && !vote)
+		vote = 0x1;
+
+	/*
+	 * Set ab enable mask and valid AB vote. req.bw is 32 bit value 0xABABENIB
+	 * and with this return we want to set the upper 16 bits and EN field specifies
+	 * if the AB vote is valid or not.
+	 */
+	return (FIELD_PREP(GENMASK(31, 16), vote) | FIELD_PREP(GENMASK(15, 8), 1));
+}
+
+static void gen8_free_gmu_globals(struct gen8_gmu_device *gmu)
+{
+	int i;
+
+	for (i = 0; i < gmu->global_entries && i < ARRAY_SIZE(gmu->gmu_globals); i++) {
+		struct kgsl_memdesc *md = &gmu->gmu_globals[i];
+
+		if (!md->gmuaddr)
+			continue;
+
+		iommu_unmap(gmu->domain, md->gmuaddr, md->size);
+
+		if (md->priv & KGSL_MEMDESC_SYSMEM)
+			kgsl_sharedmem_free(md);
+
+		memset(md, 0, sizeof(*md));
+	}
+
+	if (gmu->domain) {
+		iommu_detach_device(gmu->domain, &gmu->pdev->dev);
+		iommu_domain_free(gmu->domain);
+		gmu->domain = NULL;
+	}
+
+	gmu->global_entries = 0;
+}
+
+static int gen8_gmu_qmp_aoss_init(struct adreno_device *adreno_dev,
+		struct gen8_gmu_device *gmu)
+{
+	gmu->qmp = qmp_get(&gmu->pdev->dev);
+	if (IS_ERR(gmu->qmp))
+		return PTR_ERR(gmu->qmp);
+
+	adreno_dev->acd_enabled = true;
+	return 0;
+}
+
+static void gen8_gmu_acd_probe(struct kgsl_device *device,
+		struct gen8_gmu_device *gmu, struct device_node *node)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwrlevel *pwrlevel =
+			&pwr->pwrlevels[pwr->num_pwrlevels - 1];
+	struct hfi_acd_table_cmd *cmd = &gmu->hfi.acd_table;
+	int ret, i, cmd_idx = 0;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_ACD))
+		return;
+
+	cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ACD_TBL, HFI_MSG_CMD);
+
+	cmd->version = 1;
+	cmd->stride = 1;
+	cmd->enable_by_level = 0;
+
+	/*
+	 * Iterate through each gpu power level and generate a mask for GMU
+	 * firmware for ACD enabled levels and store the corresponding control
+	 * register configurations to the acd_table structure.
+	 */
+	for (i = 0; i < pwr->num_pwrlevels; i++) {
+		if (pwrlevel->acd_level) {
+			cmd->enable_by_level |= (1 << (i + 1));
+			cmd->data[cmd_idx++] = pwrlevel->acd_level;
+		}
+		pwrlevel--;
+	}
+
+	if (!cmd->enable_by_level)
+		return;
+
+	cmd->num_levels = cmd_idx;
+
+	ret = gen8_gmu_qmp_aoss_init(adreno_dev, gmu);
+	if (ret)
+		dev_err(&gmu->pdev->dev,
+			"AOP qmp init failed: %d\n", ret);
+}
+
+static int gen8_gmu_reg_probe(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret;
+
+	ret = kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu", NULL, NULL);
+
+	if (ret)
+		dev_err(&gmu->pdev->dev, "Unable to map the GMU registers\n");
+	/*
+	 * gmu_ao_blk_dec1 and gmu_ao_blk_dec2 are contiguous and contained within the gmu region
+	 * mapped above. gmu_ao_blk_dec0 is not within the gmu region and is mapped separately.
+	 */
+	kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu_ao_blk_dec0", NULL, NULL);
+
+	return ret;
+}
+
+static int gen8_gmu_clk_probe(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret, i;
+	int tbl_size;
+	int num_freqs;
+	int offset;
+
+	ret = devm_clk_bulk_get_all(&gmu->pdev->dev, &gmu->clks);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Voting for apb_pclk will enable power and clocks required for
+	 * QDSS path to function. However, if QCOM_KGSL_QDSS_STM is not enabled,
+	 * QDSS is essentially unusable. Hence, if QDSS cannot be used,
+	 * don't vote for this clock.
+	 */
+	if (!IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) {
+		for (i = 0; i < ret; i++) {
+			if (!strcmp(gmu->clks[i].id, "apb_pclk")) {
+				gmu->clks[i].clk = NULL;
+				break;
+			}
+		}
+	}
+
+	gmu->num_clks = ret;
+
+	/* Read the optional list of GMU frequencies */
+	if (of_get_property(gmu->pdev->dev.of_node,
+		"qcom,gmu-freq-table", &tbl_size) == NULL)
+		goto default_gmu_freq;
+
+	num_freqs = (tbl_size / sizeof(u32)) / 2;
+	if (num_freqs != ARRAY_SIZE(gmu->freqs))
+		goto default_gmu_freq;
+
+	for (i = 0; i < num_freqs; i++) {
+		offset = i * 2;
+		ret = of_property_read_u32_index(gmu->pdev->dev.of_node,
+			"qcom,gmu-freq-table", offset, &gmu->freqs[i]);
+		if (ret)
+			goto default_gmu_freq;
+		ret = of_property_read_u32_index(gmu->pdev->dev.of_node,
+			"qcom,gmu-freq-table", offset + 1, &gmu->vlvls[i]);
+		if (ret)
+			goto default_gmu_freq;
+	}
+	return 0;
+
+default_gmu_freq:
+	/* The GMU frequency table is missing or invalid. Go with a default */
+	gmu->freqs[0] = GMU_FREQ_MIN;
+	gmu->vlvls[0] = RPMH_REGULATOR_LEVEL_LOW_SVS;
+	gmu->freqs[1] = GMU_FREQ_MAX;
+	gmu->vlvls[1] = RPMH_REGULATOR_LEVEL_SVS;
+
+	return 0;
+}
+
+static void gen8_gmu_rdpm_probe(struct gen8_gmu_device *gmu,
+		struct kgsl_device *device)
+{
+	struct resource *res;
+
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "rdpm_cx");
+	if (res)
+		gmu->rdpm_cx_virt = devm_ioremap(&device->pdev->dev,
+				res->start, resource_size(res));
+
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "rdpm_mx");
+	if (res)
+		gmu->rdpm_mx_virt = devm_ioremap(&device->pdev->dev,
+				res->start, resource_size(res));
+}
+
+void gen8_gmu_remove(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	if (!IS_ERR_OR_NULL(gmu->qmp))
+		qmp_put(gmu->qmp);
+
+	adreno_dev->acd_enabled = false;
+
+	if (gmu->fw_image)
+		release_firmware(gmu->fw_image);
+
+	gen8_free_gmu_globals(gmu);
+
+	vfree(gmu->itcm_shadow);
+	kobject_put(&gmu->log_kobj);
+	kobject_put(&gmu->stats_kobj);
+}
+
+static int gen8_gmu_iommu_fault_handler(struct iommu_domain *domain,
+		struct device *dev, unsigned long addr, int flags, void *token)
+{
+	char *fault_type = "unknown";
+
+	if (flags & IOMMU_FAULT_TRANSLATION)
+		fault_type = "translation";
+	else if (flags & IOMMU_FAULT_PERMISSION)
+		fault_type = "permission";
+	else if (flags & IOMMU_FAULT_EXTERNAL)
+		fault_type = "external";
+	else if (flags & IOMMU_FAULT_TRANSACTION_STALLED)
+		fault_type = "transaction stalled";
+
+	dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n",
+			addr,
+			(flags & IOMMU_FAULT_WRITE) ? "write" : "read",
+			fault_type);
+
+	return 0;
+}
+
+static int gen8_gmu_iommu_init(struct gen8_gmu_device *gmu)
+{
+	int ret;
+
+	gmu->domain = iommu_domain_alloc(&platform_bus_type);
+	if (gmu->domain == NULL) {
+		dev_err(&gmu->pdev->dev, "Unable to allocate GMU IOMMU domain\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * Disable stall on fault for the GMU context bank.
+	 * This sets SCTLR.CFCFG = 0.
+	 * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default.
+	 */
+	qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL);
+
+	ret = iommu_attach_device(gmu->domain, &gmu->pdev->dev);
+	if (!ret) {
+		iommu_set_fault_handler(gmu->domain,
+			gen8_gmu_iommu_fault_handler, gmu);
+		return 0;
+	}
+
+	dev_err(&gmu->pdev->dev,
+		"Unable to attach GMU IOMMU domain: %d\n", ret);
+	iommu_domain_free(gmu->domain);
+	gmu->domain = NULL;
+
+	return ret;
+}
+
+/* Default IFPC timer (300usec) value */
+#define GEN8_GMU_LONG_IFPC_HYST	FIELD_PREP(GENMASK(15, 0), 0x1680)
+
+/* Minimum IFPC timer (200usec) allowed to override default value */
+#define GEN8_GMU_LONG_IFPC_HYST_FLOOR	FIELD_PREP(GENMASK(15, 0), 0x0F00)
+
+int gen8_gmu_probe(struct kgsl_device *device,
+		struct platform_device *pdev)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	int ret, i;
+
+	gmu->pdev = pdev;
+
+	dma_set_coherent_mask(&gmu->pdev->dev, DMA_BIT_MASK(64));
+	gmu->pdev->dev.dma_mask = &gmu->pdev->dev.coherent_dma_mask;
+	set_dma_ops(&gmu->pdev->dev, NULL);
+
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+						"rscc");
+	if (res) {
+		gmu->rscc_virt = devm_ioremap(&device->pdev->dev, res->start,
+						resource_size(res));
+		if (!gmu->rscc_virt) {
+			dev_err(&gmu->pdev->dev, "rscc ioremap failed\n");
+			return -ENOMEM;
+		}
+	}
+
+	/* Setup any rdpm register ranges */
+	gen8_gmu_rdpm_probe(gmu, device);
+
+	/* Set up GMU regulators */
+	ret = kgsl_pwrctrl_probe_regulators(device, pdev);
+	if (ret)
+		return ret;
+
+	ret = gen8_gmu_clk_probe(adreno_dev);
+	if (ret)
+		return ret;
+
+	/* Set up GMU IOMMU and shared memory with GMU */
+	ret = gen8_gmu_iommu_init(gmu);
+	if (ret)
+		goto error;
+
+	gmu->vma = gen8_gmu_vma;
+	for (i = 0; i < ARRAY_SIZE(gen8_gmu_vma); i++) {
+		struct gmu_vma_entry *vma = &gen8_gmu_vma[i];
+
+		vma->vma_root = RB_ROOT;
+		spin_lock_init(&vma->lock);
+	}
+
+	/* Map and reserve GMU CSRs registers */
+	ret = gen8_gmu_reg_probe(adreno_dev);
+	if (ret)
+		goto error;
+
+	/* Populates RPMh configurations */
+	ret = gen8_build_rpmh_tables(adreno_dev);
+	if (ret)
+		goto error;
+
+	/* Set up GMU idle state */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) {
+		gmu->idle_level = GPU_HW_IFPC;
+		adreno_dev->ifpc_hyst = GEN8_GMU_LONG_IFPC_HYST;
+		adreno_dev->ifpc_hyst_floor = GEN8_GMU_LONG_IFPC_HYST_FLOOR;
+	} else {
+		gmu->idle_level = GPU_HW_ACTIVE;
+	}
+
+	gen8_gmu_acd_probe(device, gmu, pdev->dev.of_node);
+
+	set_bit(GMU_ENABLED, &device->gmu_core.flags);
+
+	device->gmu_core.dev_ops = &gen8_gmudev;
+
+	/* Set default GMU attributes */
+	gmu->log_stream_enable = false;
+	gmu->log_group_mask = 0x3;
+
+	/* Initialize to zero to detect trace packet loss */
+	gmu->trace.seq_num = 0;
+
+	/* Disabled by default */
+	gmu->stats_enable = false;
+	/* Set default to CM3 busy cycles countable */
+	gmu->stats_mask = BIT(GEN8_GMU_CM3_BUSY_CYCLES);
+	/* Interval is in 50 us units. Set default sampling frequency to 4x50 us */
+	gmu->stats_interval = HFI_FEATURE_GMU_STATS_INTERVAL;
+
+	/* GMU sysfs nodes setup */
+	(void) kobject_init_and_add(&gmu->log_kobj, &log_kobj_type, &dev->kobj, "log");
+	(void) kobject_init_and_add(&gmu->stats_kobj, &stats_kobj_type, &dev->kobj, "stats");
+
+	of_property_read_u32(gmu->pdev->dev.of_node, "qcom,gmu-perf-ddr-bw",
+		&gmu->perf_ddr_bw);
+
+	spin_lock_init(&gmu->hfi.cmdq_lock);
+
+	gmu->irq = kgsl_request_irq(gmu->pdev, "gmu",
+		gen8_gmu_irq_handler, device);
+
+	if (gmu->irq >= 0)
+		return 0;
+
+	ret = gmu->irq;
+
+error:
+	gen8_gmu_remove(device);
+	return ret;
+}
+
+static void gen8_gmu_active_count_put(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return;
+
+	if (WARN(atomic_read(&device->active_cnt) == 0,
+		"Unbalanced get/put calls to KGSL active count\n"))
+		return;
+
+	if (atomic_dec_and_test(&device->active_cnt)) {
+		kgsl_pwrscale_update_stats(device);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+	}
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	wake_up(&device->active_cnt_wq);
+}
+
+int gen8_halt_gbif(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	/* Halt new client requests */
+	kgsl_regwrite(device, GEN8_GBIF_HALT, GEN8_GBIF_CLIENT_HALT_MASK);
+	ret = adreno_wait_for_halt_ack(device,
+		GEN8_GBIF_HALT_ACK, GEN8_GBIF_CLIENT_HALT_MASK);
+
+	/* Halt all AXI requests */
+	kgsl_regwrite(device, GEN8_GBIF_HALT, GEN8_GBIF_ARB_HALT_MASK);
+	ret = adreno_wait_for_halt_ack(device,
+		GEN8_GBIF_HALT_ACK, GEN8_GBIF_ARB_HALT_MASK);
+
+	/* De-assert the halts */
+	kgsl_regwrite(device, GEN8_GBIF_HALT, 0x0);
+
+	return ret;
+}
+
+static int gen8_gmu_power_off(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+
+	if (device->gmu_fault)
+		goto error;
+
+	/* Wait for the lowest idle level we requested */
+	ret = gen8_gmu_wait_for_lowest_idle(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = gen8_complete_rpmh_votes(gmu, 2);
+	if (ret)
+		goto error;
+
+	ret = gen8_gmu_notify_slumber(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = gen8_gmu_wait_for_idle(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = gen8_rscc_sleep_sequence(adreno_dev);
+	if (ret)
+		goto error;
+
+	gen8_rdpm_mx_freq_update(gmu, 0);
+
+	/* Now that we are done with GMU and GPU, Clear the GBIF */
+	ret = gen8_halt_gbif(adreno_dev);
+	if (ret)
+		goto error;
+
+	gen8_gmu_irq_disable(adreno_dev);
+
+	gen8_hfi_stop(adreno_dev);
+
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen8_rdpm_cx_freq_update(gmu, 0);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE);
+
+	return 0;
+
+error:
+	gen8_gmu_irq_disable(adreno_dev);
+	gen8_hfi_stop(adreno_dev);
+	gen8_gmu_suspend(adreno_dev);
+
+	return ret;
+}
+
+void gen8_enable_gpu_irq(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_pwrctrl_irq(device, true);
+
+	adreno_irqctrl(adreno_dev, 1);
+}
+
+void gen8_disable_gpu_irq(struct adreno_device *adreno_dev)
+{
+	kgsl_pwrctrl_irq(KGSL_DEVICE(adreno_dev), false);
+
+	if (gen8_gmu_gx_is_on(adreno_dev))
+		adreno_irqctrl(adreno_dev, 0);
+}
+
+static int gen8_gpu_boot(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	ret = kgsl_mmu_start(device);
+	if (ret)
+		goto err;
+
+	ret = gen8_gmu_oob_set(device, oob_gpu);
+	if (ret)
+		goto oob_clear;
+
+	ret = gen8_gmu_hfi_start_msg(adreno_dev);
+	if (ret)
+		goto oob_clear;
+
+	/* Clear the busy_data stats - we're starting over from scratch */
+	memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data));
+
+	gen8_start(adreno_dev);
+
+	if (gen8_core->qos_value && adreno_is_preemption_enabled(adreno_dev))
+		kgsl_regwrite(device, GEN8_RBBM_GBIF_CLIENT_QOS_CNTL,
+			gen8_core->qos_value[adreno_dev->cur_rb->id]);
+
+	/* Re-initialize the coresight registers if applicable */
+	adreno_coresight_start(adreno_dev);
+
+	adreno_perfcounter_start(adreno_dev);
+
+	/* Clear FSR here in case it is set from a previous pagefault */
+	kgsl_mmu_clear_fsr(&device->mmu);
+
+	gen8_enable_gpu_irq(adreno_dev);
+
+	ret = gen8_rb_start(adreno_dev);
+	if (ret) {
+		gen8_disable_gpu_irq(adreno_dev);
+		goto oob_clear;
+	}
+
+	/*
+	 * At this point it is safe to assume that we recovered. Setting
+	 * this field allows us to take a new snapshot for the next failure
+	 * if we are prioritizing the first unrecoverable snapshot.
+	 */
+	if (device->snapshot)
+		device->snapshot->recovered = true;
+
+	/* Start the dispatcher */
+	adreno_dispatcher_start(device);
+
+	device->reset_counter++;
+
+	gen8_gmu_oob_clear(device, oob_gpu);
+
+	return 0;
+
+oob_clear:
+	gen8_gmu_oob_clear(device, oob_gpu);
+
+err:
+	gen8_gmu_power_off(adreno_dev);
+
+	return ret;
+}
+
+static void gmu_idle_timer(struct timer_list *t)
+{
+	struct kgsl_device *device = container_of(t, struct kgsl_device,
+					idle_timer);
+
+	kgsl_schedule_work(&device->idle_check_ws);
+}
+
+static int gen8_boot(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (WARN_ON(test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = gen8_gmu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_start_idle_timer(device);
+	kgsl_pwrscale_wake(device);
+
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	return ret;
+}
+
+static int gen8_first_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) {
+		if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+			return gen8_boot(adreno_dev);
+
+		return 0;
+	}
+
+	ret = gen8_ringbuffer_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_gmu_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = gen8_gmu_first_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	adreno_get_bus_counters(adreno_dev);
+
+	adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev,
+						 ADRENO_COOP_RESET);
+
+	adreno_create_profile_buffer(adreno_dev);
+
+	set_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags);
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	/*
+	 * BCL needs respective Central Broadcast register to
+	 * be programed from TZ. For kernel version prior to 6.1, this
+	 * programing happens only when zap shader firmware load is successful.
+	 * Zap firmware load can fail in boot up path hence enable BCL only
+	 * after we successfully complete first boot to ensure that Central
+	 * Broadcast register was programed before enabling BCL.
+	 */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL))
+		adreno_dev->bcl_enabled = true;
+
+	/*
+	 * There is a possible deadlock scenario during kgsl firmware reading
+	 * (request_firmware) and devfreq update calls. During first boot, kgsl
+	 * device mutex is held and then request_firmware is called for reading
+	 * firmware. request_firmware internally takes dev_pm_qos_mtx lock.
+	 * Whereas in case of devfreq update calls triggered by thermal/bcl or
+	 * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then
+	 * tries to take kgsl device mutex as part of get_dev_status/target
+	 * calls. This results in deadlock when both thread are unable to acquire
+	 * the mutex held by other thread. Enable devfreq updates now as we are
+	 * done reading all firmware files.
+	 */
+	device->pwrscale.devfreq_enabled = true;
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	return 0;
+}
+
+static bool gen8_irq_pending(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status;
+
+	kgsl_regread(device, GEN8_RBBM_INT_0_STATUS, &status);
+
+	/* Return busy if a interrupt is pending */
+	return ((status & adreno_dev->irq_mask) ||
+		atomic_read(&adreno_dev->pending_irq_refcnt));
+}
+
+static int gen8_power_off(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret;
+
+	WARN_ON(!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags));
+
+	adreno_suspend_context(device);
+
+	/*
+	 * adreno_suspend_context() unlocks the device mutex, which
+	 * could allow a concurrent thread to attempt SLUMBER sequence.
+	 * Hence, check the flags again before proceeding with SLUMBER.
+	 */
+	if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER);
+
+	ret = gen8_gmu_oob_set(device, oob_gpu);
+	if (ret)
+		goto no_gx_power;
+
+	if (gen8_irq_pending(adreno_dev)) {
+		gen8_gmu_oob_clear(device, oob_gpu);
+		return -EBUSY;
+	}
+
+	kgsl_pwrscale_update_stats(device);
+
+	/* Save active coresight registers if applicable */
+	adreno_coresight_stop(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 0);
+
+no_gx_power:
+	gen8_gmu_oob_clear(device, oob_gpu);
+
+	kgsl_pwrctrl_irq(device, false);
+
+	gen8_gmu_power_off(adreno_dev);
+
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	adreno_dispatcher_stop(adreno_dev);
+
+	adreno_ringbuffer_stop(adreno_dev);
+
+	if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice))
+		llcc_slice_deactivate(adreno_dev->gpu_llc_slice);
+
+	if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice))
+		llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice);
+
+	clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	del_timer_sync(&device->idle_timer);
+
+	kgsl_pwrscale_sleep(device);
+
+	kgsl_pwrctrl_clear_l3_vote(device);
+
+	/*
+	 * Reset the context records so that CP can start
+	 * at the correct read pointer for BV thread after
+	 * coming out of slumber.
+	 */
+	gen8_reset_preempt_records(adreno_dev);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
+
+	return ret;
+}
+
+static void gmu_idle_check(struct work_struct *work)
+{
+	struct kgsl_device *device = container_of(work,
+					struct kgsl_device, idle_check_ws);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret;
+
+	mutex_lock(&device->mutex);
+
+	if (test_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags))
+		goto done;
+
+	if (atomic_read(&device->active_cnt) || time_is_after_jiffies(device->idle_jiffies)) {
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		goto done;
+
+	spin_lock(&device->submit_lock);
+
+	if (device->submit_now) {
+		spin_unlock(&device->submit_lock);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	device->skip_inline_submit = true;
+	spin_unlock(&device->submit_lock);
+
+	ret = gen8_power_off(adreno_dev);
+	if (ret == -EBUSY) {
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+	}
+
+done:
+	mutex_unlock(&device->mutex);
+}
+
+static int gen8_gmu_first_open(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	/*
+	 * Do the one time settings that need to happen when we
+	 * attempt to boot the gpu the very first time
+	 */
+	ret = gen8_first_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	/*
+	 * A client that does a first_open but never closes the device
+	 * may prevent us from going back to SLUMBER. So trigger the idle
+	 * check by incrementing the active count and immediately releasing it.
+	 */
+	atomic_inc(&device->active_cnt);
+	gen8_gmu_active_count_put(adreno_dev);
+
+	return 0;
+}
+
+static int gen8_gmu_last_close(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return gen8_power_off(adreno_dev);
+
+	return 0;
+}
+
+static int gen8_gmu_active_count_get(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret = 0;
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EINVAL;
+
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags))
+		return -EINVAL;
+
+	if ((atomic_read(&device->active_cnt) == 0) &&
+		!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		ret = gen8_boot(adreno_dev);
+
+	if (ret == 0)
+		atomic_inc(&device->active_cnt);
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	return ret;
+}
+
+static int gen8_gmu_pm_suspend(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SUSPEND);
+
+	/* Halt any new submissions */
+	reinit_completion(&device->halt_gate);
+
+	/* wait for active count so device can be put in slumber */
+	ret = kgsl_active_count_wait(device, 0, HZ);
+	if (ret) {
+		dev_err(device->dev,
+			"Timed out waiting for the active count\n");
+		goto err;
+	}
+
+	ret = adreno_idle(device);
+	if (ret)
+		goto err;
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		gen8_power_off(adreno_dev);
+
+	set_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
+
+	adreno_get_gpu_halt(adreno_dev);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND);
+
+	return 0;
+err:
+	adreno_dispatcher_start(device);
+	return ret;
+}
+
+static void gen8_gmu_pm_resume(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags),
+		"resume invoked without a suspend\n"))
+		return;
+
+	adreno_put_gpu_halt(adreno_dev);
+
+	adreno_dispatcher_start(device);
+
+	clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
+}
+
+static void gen8_gmu_touch_wakeup(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret;
+
+	/*
+	 * Do not wake up a suspended device or until the first boot sequence
+	 * has been completed.
+	 */
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags) ||
+		!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))
+		return;
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		goto done;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = gen8_gmu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	ret = gen8_gpu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	kgsl_pwrscale_wake(device);
+
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+done:
+	/*
+	 * When waking up from a touch event we want to stay active long enough
+	 * for the user to send a draw command. The default idle timer timeout
+	 * is shorter than we want so go ahead and push the idle timer out
+	 * further for this special case
+	 */
+	mod_timer(&device->idle_timer, jiffies +
+			msecs_to_jiffies(adreno_wake_timeout));
+}
+
+const struct adreno_power_ops gen8_gmu_power_ops = {
+	.first_open = gen8_gmu_first_open,
+	.last_close = gen8_gmu_last_close,
+	.active_count_get = gen8_gmu_active_count_get,
+	.active_count_put = gen8_gmu_active_count_put,
+	.pm_suspend = gen8_gmu_pm_suspend,
+	.pm_resume = gen8_gmu_pm_resume,
+	.touch_wakeup = gen8_gmu_touch_wakeup,
+	.gpu_clock_set = gen8_gmu_clock_set,
+	.gpu_bus_set = gen8_gmu_bus_set,
+};
+
+int gen8_gmu_device_probe(struct platform_device *pdev,
+	u32 chipid, const struct adreno_gpu_core *gpucore)
+{
+	struct adreno_device *adreno_dev;
+	struct kgsl_device *device;
+	struct gen8_device *gen8_dev;
+	int ret;
+
+	gen8_dev = devm_kzalloc(&pdev->dev, sizeof(*gen8_dev),
+			GFP_KERNEL);
+	if (!gen8_dev)
+		return -ENOMEM;
+
+	adreno_dev = &gen8_dev->adreno_dev;
+
+	adreno_dev->irq_mask = GEN8_INT_MASK;
+
+	ret = gen8_probe_common(pdev, adreno_dev, chipid, gpucore);
+	if (ret)
+		return ret;
+
+	ret = adreno_dispatcher_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	device = KGSL_DEVICE(adreno_dev);
+
+	INIT_WORK(&device->idle_check_ws, gmu_idle_check);
+
+	timer_setup(&device->idle_timer, gmu_idle_timer, 0);
+
+	return 0;
+}
+
+int gen8_gmu_reset(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	gen8_disable_gpu_irq(adreno_dev);
+
+	gen8_gmu_irq_disable(adreno_dev);
+
+	gen8_hfi_stop(adreno_dev);
+
+	/* Hard reset the gmu and gpu */
+	gen8_gmu_suspend(adreno_dev);
+
+	gen8_reset_preempt_records(adreno_dev);
+
+	clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	/* Attempt to reboot the gmu and gpu */
+	return gen8_boot(adreno_dev);
+}
+
+int gen8_gmu_hfi_probe(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hfi *hfi = &gmu->hfi;
+
+	hfi->irq = kgsl_request_irq(gmu->pdev, "hfi",
+		gen8_hfi_irq_handler, KGSL_DEVICE(adreno_dev));
+
+	return hfi->irq < 0 ? hfi->irq : 0;
+}
+
+int gen8_gmu_add_to_minidump(struct adreno_device *adreno_dev)
+{
+	struct gen8_device *gen8_dev = container_of(adreno_dev,
+					struct gen8_device, adreno_dev);
+	int ret;
+
+	ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GEN8_DEVICE,
+			(void *)(gen8_dev), sizeof(struct gen8_device));
+	if (ret)
+		return ret;
+
+	ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GMU_LOG_ENTRY,
+			gen8_dev->gmu.gmu_log->hostptr, gen8_dev->gmu.gmu_log->size);
+	if (ret)
+		return ret;
+
+	ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HFIMEM_ENTRY,
+			gen8_dev->gmu.hfi.hfi_mem->hostptr, gen8_dev->gmu.hfi.hfi_mem->size);
+
+	return ret;
+}
+
+static int gen8_gmu_bind(struct device *dev, struct device *master, void *data)
+{
+	struct kgsl_device *device = dev_get_drvdata(master);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	const struct gen8_gpudev *gen8_gpudev = to_gen8_gpudev(gpudev);
+	int ret;
+
+	ret = gen8_gmu_probe(device, to_platform_device(dev));
+	if (ret)
+		return ret;
+
+	if (gen8_gpudev->hfi_probe) {
+		ret = gen8_gpudev->hfi_probe(adreno_dev);
+
+		if (ret) {
+			gen8_gmu_remove(device);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void gen8_gmu_unbind(struct device *dev, struct device *master,
+		void *data)
+{
+	struct kgsl_device *device = dev_get_drvdata(master);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	const struct gen8_gpudev *gen8_gpudev = to_gen8_gpudev(gpudev);
+
+	if (gen8_gpudev->hfi_remove)
+		gen8_gpudev->hfi_remove(adreno_dev);
+
+	gen8_gmu_remove(device);
+}
+
+static const struct component_ops gen8_gmu_component_ops = {
+	.bind = gen8_gmu_bind,
+	.unbind = gen8_gmu_unbind,
+};
+
+static int gen8_gmu_probe_dev(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &gen8_gmu_component_ops);
+}
+
+static int gen8_gmu_remove_dev(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &gen8_gmu_component_ops);
+	return 0;
+}
+
+static const struct of_device_id gen8_gmu_match_table[] = {
+	{ .compatible = "qcom,gen8-gmu" },
+	{ },
+};
+
+struct platform_driver gen8_gmu_driver = {
+	.probe = gen8_gmu_probe_dev,
+	.remove = gen8_gmu_remove_dev,
+	.driver = {
+		.name = "adreno-gen8-gmu",
+		.of_match_table = gen8_gmu_match_table,
+	},
+};

+ 505 - 0
qcom/opensource/graphics-kernel/adreno_gen8_gmu.h

@@ -0,0 +1,505 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __ADRENO_GEN8_GMU_H
+#define __ADRENO_GEN8_GMU_H
+
+#include <linux/soc/qcom/qcom_aoss.h>
+
+#include "adreno_gen8_hfi.h"
+#include "kgsl_gmu_core.h"
+
+struct gen8_dcvs_table {
+	u32 gpu_level_num;
+	u32 gmu_level_num;
+	struct opp_gx_desc gx_votes[MAX_GX_LEVELS];
+	struct opp_desc cx_votes[MAX_CX_LEVELS];
+};
+
+/**
+ * struct gen8_gmu_device - GMU device structure
+ * @ver: GMU Version information
+ * @irq: GMU interrupt number
+ * @fw_image: GMU FW image
+ * @hfi_mem: pointer to HFI shared memory
+ * @dump_mem: pointer to GMU debug dump memory
+ * @gmu_log: gmu event log memory
+ * @hfi: HFI controller
+ * @num_gpupwrlevels: number GPU frequencies in GPU freq table
+ * @num_bwlevel: number of GPU BW levels
+ * @num_cnocbwlevel: number CNOC BW levels
+ * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling
+ * @clks: GPU subsystem clocks required for GMU functionality
+ * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different
+ *  than default power level
+ * @idle_level: Minimal GPU idle power level
+ * @fault_count: GMU fault count
+ * @log_wptr_retention: Store the log wptr offset on slumber
+ */
+struct gen8_gmu_device {
+	struct {
+		u32 core;
+		u32 core_dev;
+		u32 pwr;
+		u32 pwr_dev;
+		u32 hfi;
+	} ver;
+	struct platform_device *pdev;
+	int irq;
+	const struct firmware *fw_image;
+	struct kgsl_memdesc *dump_mem;
+	struct kgsl_memdesc *gmu_log;
+	/** @vrb: GMU virtual register bank memory */
+	struct kgsl_memdesc *vrb;
+	/** @trace: gmu trace container */
+	struct kgsl_gmu_trace trace;
+	/** @gmu_init_scratch: Memory to store the initial HFI messages */
+	struct kgsl_memdesc *gmu_init_scratch;
+	/** @gpu_boot_scratch: Memory to store the bootup HFI messages */
+	struct kgsl_memdesc *gpu_boot_scratch;
+	struct gen8_hfi hfi;
+	/** @pwrlevels: Array of GMU power levels */
+	struct clk_bulk_data *clks;
+	/** @num_clks: Number of entries in the @clks array */
+	int num_clks;
+	u32 idle_level;
+	/** @freqs: Array of GMU frequencies */
+	u32 freqs[GMU_MAX_PWRLEVELS];
+	/** @vlvls: Array of GMU voltage levels */
+	u32 vlvls[GMU_MAX_PWRLEVELS];
+	/** @qmp: aoss_qmp handle */
+	struct qmp *qmp;
+	/** @gmu_globals: Array to store gmu global buffers */
+	struct kgsl_memdesc gmu_globals[GMU_KERNEL_ENTRIES];
+	/** @global_entries: To keep track of number of gmu buffers */
+	u32 global_entries;
+	struct gmu_vma_entry *vma;
+	u32 log_wptr_retention;
+	/** @cm3_fault: whether gmu received a cm3 fault interrupt */
+	atomic_t cm3_fault;
+	/**
+	 * @itcm_shadow: Copy of the itcm block in firmware binary used for
+	 * snapshot
+	 */
+	void *itcm_shadow;
+	/** @flags: Internal gmu flags */
+	unsigned long flags;
+	/** @rscc_virt: Pointer where RSCC block is mapped */
+	void __iomem *rscc_virt;
+	/** @domain: IOMMU domain for the kernel context */
+	struct iommu_domain *domain;
+	/** @log_stream_enable: GMU log streaming enable. Disabled by default */
+	bool log_stream_enable;
+	/** @log_group_mask: Allows overriding default GMU log group mask */
+	u32 log_group_mask;
+	struct kobject log_kobj;
+	/*
+	 * @perf_ddr_bw: The lowest ddr bandwidth that puts CX at a corner at
+	 * which GMU can run at higher frequency.
+	 */
+	u32 perf_ddr_bw;
+	/** @rdpm_cx_virt: Pointer where the RDPM CX block is mapped */
+	void __iomem *rdpm_cx_virt;
+	/** @rdpm_mx_virt: Pointer where the RDPM MX block is mapped */
+	void __iomem *rdpm_mx_virt;
+	/** @num_oob_perfcntr: Number of active oob_perfcntr requests */
+	u32 num_oob_perfcntr;
+	/** @acd_debug_val: DVM value to calibrate ACD for a level */
+	u32 acd_debug_val;
+	/** @stats_enable: GMU stats feature enable */
+	bool stats_enable;
+	/** @stats_mask: GMU performance countables to enable */
+	u32 stats_mask;
+	/** @stats_interval: GMU performance counters sampling interval */
+	u32 stats_interval;
+	/** @stats_kobj: kernel object for GMU stats directory in sysfs */
+	struct kobject stats_kobj;
+	/** @cp_init_hdr: raw command header for cp_init */
+	u32 cp_init_hdr;
+	/** @switch_to_unsec_hdr: raw command header for switch to unsecure packet */
+	u32 switch_to_unsec_hdr;
+	/** @dcvs_table: Table for gpu dcvs levels */
+	struct gen8_dcvs_table dcvs_table;
+};
+
+/* Helper function to get to gen8 gmu device from adreno device */
+struct gen8_gmu_device *to_gen8_gmu(struct adreno_device *adreno_dev);
+
+/* Helper function to get to adreno device from gen8 gmu device */
+struct adreno_device *gen8_gmu_to_adreno(struct gen8_gmu_device *gmu);
+
+/**
+ * gen8_reserve_gmu_kernel_block() - Allocate a global gmu buffer
+ * @gmu: Pointer to the gen8 gmu device
+ * @addr: Desired gmu virtual address
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @align: Alignment for the GMU VA and GMU mapping size
+ *
+ * This function allocates a global gmu buffer and maps it in
+ * the desired gmu vma
+ *
+ * Return: Pointer to the memory descriptor or error pointer on failure
+ */
+struct kgsl_memdesc *gen8_reserve_gmu_kernel_block(struct gen8_gmu_device *gmu,
+		u32 addr, u32 size, u32 vma_id, u32 align);
+
+/**
+ * gen8_reserve_gmu_kernel_block_fixed() - Maps phyical resource address to gmu
+ * @gmu: Pointer to the gen8 gmu device
+ * @addr: Desired gmu virtual address
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @resource: Name of the resource to get the size and address to allocate
+ * @attrs: Attributes for the mapping
+ * @align: Alignment for the GMU VA and GMU mapping size
+ *
+ * This function maps the physcial resource address to desired gmu vma
+ *
+ * Return: Pointer to the memory descriptor or error pointer on failure
+ */
+struct kgsl_memdesc *gen8_reserve_gmu_kernel_block_fixed(struct gen8_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, const char *resource, int attrs, u32 align);
+
+/**
+ * gen8_alloc_gmu_kernel_block() - Allocate a gmu buffer
+ * @gmu: Pointer to the gen8 gmu device
+ * @md: Pointer to the memdesc
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @attrs: Attributes for the mapping
+ *
+ * This function allocates a buffer and maps it in the desired gmu vma
+ *
+ * Return: 0 on success or error code on failure
+ */
+int gen8_alloc_gmu_kernel_block(struct gen8_gmu_device *gmu,
+	struct kgsl_memdesc *md, u32 size, u32 vma_id, int attrs);
+
+/**
+ * gen8_gmu_import_buffer() - Import a gmu buffer
+ * @gmu: Pointer to the gen8 gmu device
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @md: Pointer to the memdesc to be mapped
+ * @attrs: Attributes for the mapping
+ * @align: Alignment for the GMU VA and GMU mapping size
+ *
+ * This function imports and maps a buffer to a gmu vma
+ *
+ * Return: 0 on success or error code on failure
+ */
+int gen8_gmu_import_buffer(struct gen8_gmu_device *gmu, u32 vma_id,
+			struct kgsl_memdesc *md, u32 attrs, u32 align);
+
+/**
+ * gen8_free_gmu_block() - Free a gmu buffer
+ * @gmu: Pointer to the gen8 gmu device
+ * @md: Pointer to the memdesc that is to be freed
+ *
+ * This function frees a gmu block allocated by gen8_reserve_gmu_kernel_block()
+ */
+void gen8_free_gmu_block(struct gen8_gmu_device *gmu, struct kgsl_memdesc *md);
+
+/**
+ * gen8_build_rpmh_tables - Build the rpmh tables
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function creates the gpu dcvs and bw tables
+ *
+ * Return: 0 on success and negative error on failure
+ */
+int gen8_build_rpmh_tables(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_gx_is_on - Check if GX is on
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function reads pwr status registers to check if GX
+ * is on or off
+ */
+bool gen8_gmu_gx_is_on(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_device_probe - GEN8 GMU snapshot function
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for gmu based gen8 targets.
+ */
+int gen8_gmu_device_probe(struct platform_device *pdev,
+		u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * gen8_gmu_reset - Reset and restart the gmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_reset(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_enable_gpu_irq - Enable gpu interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen8_enable_gpu_irq(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_disable_gpu_irq - Disable gpu interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen8_disable_gpu_irq(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_snapshot- Take snapshot for gmu targets
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot structure
+ *
+ * Send an NMI to gmu if we hit a gmu fault. Then take gmu
+ * snapshot and carry on with rest of the gen8 snapshot
+ */
+void gen8_gmu_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+
+/**
+ * gen8_gmu_probe - Probe gen8 gmu resources
+ * @device: Pointer to the kgsl device
+ * @pdev: Pointer to the gmu platform device
+ *
+ * Probe the gmu and hfi resources
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_probe(struct kgsl_device *device,
+		struct platform_device *pdev);
+
+/**
+ * gen8_gmu_parse_fw - Parse the gmu fw binary
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_parse_fw(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_memory_init - Allocate gmu memory
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Allocates the gmu log buffer and others if ndeeded.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_memory_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_aop_send_acd_state - Enable or disable acd feature in aop
+ * @gmu: Pointer to the gen8 gmu device
+ * @flag: Boolean to enable or disable acd in aop
+ *
+ * This function enables or disables gpu acd feature using qmp
+ */
+void gen8_gmu_aop_send_acd_state(struct gen8_gmu_device *gmu, bool flag);
+
+/**
+ * gen8_gmu_load_fw - Load gmu firmware
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Loads the gmu firmware binary into TCMs and memory
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_load_fw(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_device_start - Bring gmu out of reset
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_device_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_hfi_start - Indicate hfi start to gmu
+ * @device: Pointer to the kgsl device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_itcm_shadow - Create itcm shadow copy for snapshot
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_itcm_shadow(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_register_config - gmu register configuration
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Program gmu regsiters based on features
+ */
+void gen8_gmu_register_config(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_version_info - Get gmu firmware version
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_version_info(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_irq_enable - Enable gmu interrupts
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen8_gmu_irq_enable(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_irq_disable - Disaable gmu interrupts
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen8_gmu_irq_disable(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_suspend - Hard reset the gpu and gmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * In case we hit a gmu fault, hard reset the gpu and gmu
+ * to recover from the fault
+ */
+void gen8_gmu_suspend(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_oob_set - send gmu oob request
+ * @device: Pointer to the kgsl device
+ * @req: Type of oob request as defined in enum oob_request
+ *
+ * Request gmu to keep gpu powered up till the oob is cleared
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_oob_set(struct kgsl_device *device, enum oob_request oob);
+
+/**
+ * gen8_gmu_oob_clear - clear an asserted oob request
+ * @device: Pointer to the kgsl device
+ * @req: Type of oob request as defined in enum oob_request
+ *
+ * Clear a previously requested oob so that gmu can power
+ * collapse the gpu
+ */
+void gen8_gmu_oob_clear(struct kgsl_device *device, enum oob_request oob);
+
+/**
+ * gen8_gmu_wait_for_lowest_idle - wait for gmu to complete ifpc
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * If ifpc is enabled, wait for gmu to put gpu into ifpc.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_wait_for_idle - Wait for gmu to become idle
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_wait_for_idle(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_rscc_sleep_sequence - Trigger rscc sleep sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_rscc_sleep_sequence(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_rscc_wakeup_sequence - Trigger rscc wakeup sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_rscc_wakeup_sequence(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_halt_gbif - Halt CX and GX requests in GBIF
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Clear any pending GX or CX transactions in GBIF and
+ * deassert GBIF halt
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_halt_gbif(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_remove - Clean up gmu probed resources
+ * @device: Pointer to the kgsl device
+ */
+void gen8_gmu_remove(struct kgsl_device *device);
+
+/**
+ * gen8_gmu_enable_clks - Enable gmu clocks
+ * @adreno_dev: Pointer to the adreno device
+ * @level: GMU frequency level
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level);
+
+/**
+ * gen8_gmu_handle_watchdog - Handle watchdog interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen8_gmu_handle_watchdog(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_send_nmi - Send NMI to GMU
+ * @device: Pointer to the kgsl device
+ * @force: Boolean to forcefully send NMI irrespective of GMU state
+ */
+void gen8_gmu_send_nmi(struct kgsl_device *device, bool force);
+
+/**
+ * gen8_gmu_add_to_minidump - Register gen8_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int gen8_gmu_add_to_minidump(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_snapshot_gmu_mem - Snapshot a GMU memory descriptor
+ * @device: Pointer to the kgsl device
+ * @buf: Destination snapshot buffer
+ * @remain: Remaining size of the snapshot buffer
+ * @priv: Opaque handle
+ *
+ * Return: Number of bytes written to snapshot buffer
+ */
+size_t gen8_snapshot_gmu_mem(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv);
+
+/**
+ * gen8_bus_ab_quantize - Calculate the AB vote that needs to be sent to GMU
+ * @adreno_dev: Handle to the adreno device
+ * @ab: ab request that needs to be scaled in MBps
+ *
+ * Returns the AB value that needs to be prefixed to bandwidth vote in kbps
+ */
+u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab);
+
+/**
+ * gen8_gmu_rpmh_pwr_state_is_active - Check the state of GPU HW
+ * @device: Pointer to the kgsl device
+ *
+ * Returns true on active or false otherwise
+ */
+bool gen8_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device);
+#endif

+ 310 - 0
qcom/opensource/graphics-kernel/adreno_gen8_gmu_snapshot.c

@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "adreno_gen8_gmu.h"
+#include "adreno_gen8_3_0_snapshot.h"
+#include "adreno_snapshot.h"
+#include "gen8_reg.h"
+#include "kgsl_device.h"
+
+size_t gen8_snapshot_gmu_mem(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+		(struct kgsl_snapshot_gmu_mem *)buf;
+	u32 *data = (u32 *)(buf + sizeof(*mem_hdr));
+	struct gmu_mem_type_desc *desc = priv;
+
+	if (priv == NULL || desc->memdesc->hostptr == NULL)
+		return 0;
+
+	if (remain < desc->memdesc->size + sizeof(*mem_hdr)) {
+		dev_err(device->dev,
+			"snapshot: Not enough memory for the gmu section %d\n",
+			desc->type);
+		return 0;
+	}
+
+	mem_hdr->type = desc->type;
+	mem_hdr->hostaddr = (u64)(uintptr_t)desc->memdesc->hostptr;
+	mem_hdr->gmuaddr = desc->memdesc->gmuaddr;
+	mem_hdr->gpuaddr = 0;
+
+	/* The hw fence queues are mapped as iomem in the kernel */
+	if (desc->type == SNAPSHOT_GMU_MEM_HW_FENCE)
+		memcpy_fromio(data, desc->memdesc->hostptr, desc->memdesc->size);
+	else
+		memcpy(data, desc->memdesc->hostptr, desc->memdesc->size);
+
+	return desc->memdesc->size + sizeof(*mem_hdr);
+}
+
+static size_t gen8_gmu_snapshot_dtcm(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+		(struct kgsl_snapshot_gmu_mem *)buf;
+	struct gen8_gmu_device *gmu = (struct gen8_gmu_device *)priv;
+	u32 *data = (u32 *)(buf + sizeof(*mem_hdr));
+	u32 i;
+
+	if (remain < gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU DTCM Memory");
+		return 0;
+	}
+
+	mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+	mem_hdr->hostaddr = 0;
+	mem_hdr->gmuaddr = gmu->vma[GMU_DTCM].start;
+	mem_hdr->gpuaddr = 0;
+
+	/*
+	 * Read of GMU TCMs over side-band debug controller interface is
+	 * supported on gen8 family
+	 * region [20]: Dump ITCM/DTCM. Select 1 for DTCM.
+	 * autoInc [31]: Autoincrement the address field after each
+	 * access to TCM_DBG_DATA
+	 */
+	kgsl_regwrite(device, GEN8_CX_DBGC_TCM_DBG_ADDR, BIT(20) | BIT(31));
+
+	for (i = 0; i < (gmu->vma[GMU_DTCM].size >> 2); i++)
+		kgsl_regread(device, GEN8_CX_DBGC_TCM_DBG_DATA, data++);
+
+	return gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr);
+}
+
+static size_t gen8_gmu_snapshot_itcm(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+			(struct kgsl_snapshot_gmu_mem *)buf;
+	void *dest = buf + sizeof(*mem_hdr);
+	struct gen8_gmu_device *gmu = (struct gen8_gmu_device *)priv;
+
+	if (!gmu->itcm_shadow) {
+		dev_err(&gmu->pdev->dev, "No memory allocated for ITCM shadow capture\n");
+		return 0;
+	}
+
+	if (remain < gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU ITCM Memory");
+		return 0;
+	}
+
+	mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+	mem_hdr->hostaddr = 0;
+	mem_hdr->gmuaddr = gmu->vma[GMU_ITCM].start;
+	mem_hdr->gpuaddr = 0;
+
+	memcpy(dest, gmu->itcm_shadow, gmu->vma[GMU_ITCM].size);
+
+	return gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr);
+}
+
+static void gen8_gmu_snapshot_memories(struct kgsl_device *device,
+	struct gen8_gmu_device *gmu, struct kgsl_snapshot *snapshot)
+{
+	struct gmu_mem_type_desc desc;
+	struct kgsl_memdesc *md;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(gmu->gmu_globals); i++) {
+
+		md = &gmu->gmu_globals[i];
+		if (!md->size)
+			continue;
+
+		desc.memdesc = md;
+		if (md == gmu->hfi.hfi_mem)
+			desc.type = SNAPSHOT_GMU_MEM_HFI;
+		else if (md == gmu->gmu_log)
+			desc.type = SNAPSHOT_GMU_MEM_LOG;
+		else if (md == gmu->dump_mem)
+			desc.type = SNAPSHOT_GMU_MEM_DEBUG;
+		else if ((md == gmu->gmu_init_scratch) || (md == gmu->gpu_boot_scratch))
+			desc.type = SNAPSHOT_GMU_MEM_WARMBOOT;
+		else if (md == gmu->vrb)
+			desc.type = SNAPSHOT_GMU_MEM_VRB;
+		else if (md == gmu->trace.md)
+			desc.type = SNAPSHOT_GMU_MEM_TRACE;
+		else
+			desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+			snapshot, gen8_snapshot_gmu_mem, &desc);
+	}
+}
+
+struct kgsl_snapshot_gmu_version {
+	u32 type;
+	u32 value;
+};
+
+static size_t gen8_snapshot_gmu_version(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	u32 *data = (u32 *) (buf + sizeof(*header));
+	struct kgsl_snapshot_gmu_version *ver = priv;
+
+	if (remain < DEBUG_SECTION_SZ(1)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU Version");
+		return 0;
+	}
+
+	header->type = ver->type;
+	header->size = 1;
+
+	*data = ver->value;
+
+	return DEBUG_SECTION_SZ(1);
+}
+
+static void gen8_gmu_snapshot_versions(struct kgsl_device *device,
+		struct gen8_gmu_device *gmu,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+
+	struct kgsl_snapshot_gmu_version gmu_vers[] = {
+		{ .type = SNAPSHOT_DEBUG_GMU_CORE_VERSION,
+			.value = gmu->ver.core, },
+		{ .type = SNAPSHOT_DEBUG_GMU_CORE_DEV_VERSION,
+			.value = gmu->ver.core_dev, },
+		{ .type = SNAPSHOT_DEBUG_GMU_PWR_VERSION,
+			.value = gmu->ver.pwr, },
+		{ .type = SNAPSHOT_DEBUG_GMU_PWR_DEV_VERSION,
+			.value = gmu->ver.pwr_dev, },
+		{ .type = SNAPSHOT_DEBUG_GMU_HFI_VERSION,
+			.value = gmu->ver.hfi, },
+	};
+
+	for (i = 0; i < ARRAY_SIZE(gmu_vers); i++)
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+				snapshot, gen8_snapshot_gmu_version,
+				&gmu_vers[i]);
+}
+
+#define RSCC_OFFSET_DWORDS 0x14000
+
+static size_t gen8_snapshot_rscc_registers(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	const u32 *regs = priv;
+	u32 *data = (u32 *)buf;
+	int count = 0, k;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	/* Figure out how many registers we are going to dump */
+	count = adreno_snapshot_regs_count(regs);
+
+	if (remain < (count * 4)) {
+		SNAPSHOT_ERR_NOMEM(device, "RSCC REGISTERS");
+		return 0;
+	}
+
+	for (regs = priv; regs[0] != UINT_MAX; regs += 2) {
+		u32 cnt = REG_COUNT(regs);
+
+		if (cnt == 1) {
+			*data++ = BIT(31) |  regs[0];
+			*data++ =  __raw_readl(gmu->rscc_virt +
+				((regs[0] - RSCC_OFFSET_DWORDS) << 2));
+			continue;
+		}
+		*data++ = regs[0];
+		*data++ = cnt;
+		for (k = regs[0]; k <= regs[1]; k++)
+			*data++ = __raw_readl(gmu->rscc_virt +
+				((k - RSCC_OFFSET_DWORDS) << 2));
+	}
+
+	/* Return the size of the section */
+	return (count * 4);
+}
+
+/*
+ * gen8_gmu_device_snapshot() - GEN8 GMU snapshot function
+ * @device: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the GEN8 GMU specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+static void gen8_gmu_device_snapshot(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device));
+	const struct gen8_snapshot_block_list *gen8_snapshot_block_list =
+						gpucore->gen8_snapshot_block_list;
+	u32 i, slice, j;
+	struct gen8_reg_list_info info = {0};
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, gen8_gmu_snapshot_itcm, gmu);
+
+	gen8_gmu_snapshot_versions(device, gmu, snapshot);
+
+	gen8_gmu_snapshot_memories(device, gmu, snapshot);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot,
+		gen8_snapshot_rscc_registers, (void *) gen8_snapshot_block_list->rscc_regs);
+
+	/* Capture GMU registers which are on CX domain and unsliced */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot,
+		adreno_snapshot_registers_v2,
+		(void *) gen8_snapshot_block_list->gmu_cx_unsliced_regs);
+
+	if (!gen8_gmu_rpmh_pwr_state_is_active(device) ||
+		!gen8_gmu_gx_is_on(adreno_dev))
+		goto dtcm;
+
+	/* Set fence to ALLOW mode so registers can be read */
+	kgsl_regwrite(device, GEN8_GMUAO_AHB_FENCE_CTRL, 0);
+
+	/* Capture GMU registers which are on GX domain */
+	for (i = 0 ; i < gen8_snapshot_block_list->num_gmu_gx_regs; i++) {
+		struct gen8_reg_list *regs = &gen8_snapshot_block_list->gmu_gx_regs[i];
+
+		slice = regs->slice_region ? MAX_PHYSICAL_SLICES : 1;
+		for (j = 0 ; j < slice; j++) {
+			info.regs = regs;
+			info.slice_id = SLICE_ID(regs->slice_region, j);
+			kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot,
+				gen8_legacy_snapshot_registers, &info);
+		}
+	}
+
+dtcm:
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, gen8_gmu_snapshot_dtcm, gmu);
+}
+
+void gen8_gmu_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/*
+	 * Dump external register first to have GPUCC and other external
+	 * register in snapshot to analyze the system state even in partial
+	 * snapshot dump
+	 */
+	gen8_snapshot_external_core_regs(device, snapshot);
+
+	gen8_gmu_device_snapshot(device, snapshot);
+
+	gen8_snapshot(adreno_dev, snapshot);
+
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, UINT_MAX);
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_MASK, HFI_IRQ_MASK);
+}

+ 831 - 0
qcom/opensource/graphics-kernel/adreno_gen8_hfi.c

@@ -0,0 +1,831 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/delay.h>
+#include <linux/nvmem-consumer.h>
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "adreno_gen8_gmu.h"
+#include "adreno_gen8_hfi.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+
+/* Below section is for all structures related to HFI queues */
+#define HFI_QUEUE_MAX HFI_QUEUE_DEFAULT_CNT
+
+/* Total header sizes + queue sizes + 16 for alignment */
+#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \
+		(HFI_QUEUE_SIZE * HFI_QUEUE_MAX))
+
+#define HOST_QUEUE_START_ADDR(hfi_mem, i) \
+	((hfi_mem)->hostptr + HFI_QUEUE_OFFSET(i))
+
+struct gen8_hfi *to_gen8_hfi(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	return &gmu->hfi;
+}
+
+/* Size in below functions are in unit of dwords */
+int gen8_hfi_queue_read(struct gen8_gmu_device *gmu, u32 queue_idx,
+		u32 *output, u32 max_size)
+{
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	u32 *queue;
+	u32 msg_hdr;
+	u32 i, read;
+	u32 size;
+	int result = 0;
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+		return -EINVAL;
+
+	if (hdr->read_index == hdr->write_index)
+		return -ENODATA;
+
+	/* Clear the output data before populating */
+	memset(output, 0, max_size);
+
+	queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx);
+	msg_hdr = queue[hdr->read_index];
+	size = MSG_HDR_GET_SIZE(msg_hdr);
+
+	if (size > (max_size >> 2)) {
+		dev_err(&gmu->pdev->dev,
+		"HFI message too big: hdr:0x%x rd idx=%d\n",
+			msg_hdr, hdr->read_index);
+		result = -EMSGSIZE;
+		goto done;
+	}
+
+	read = hdr->read_index;
+
+	if (read < hdr->queue_size) {
+		for (i = 0; i < size && i < (max_size >> 2); i++) {
+			output[i] = queue[read];
+			read = (read + 1)%hdr->queue_size;
+		}
+		result = size;
+	} else {
+		/* In case FW messed up */
+		dev_err(&gmu->pdev->dev,
+			"Read index %d greater than queue size %d\n",
+			hdr->read_index, hdr->queue_size);
+		result = -ENODATA;
+	}
+
+	read = ALIGN(read, SZ_4) % hdr->queue_size;
+
+	hfi_update_read_idx(hdr, read);
+
+	/* For acks, trace the packet for which this ack was sent */
+	if (MSG_HDR_GET_TYPE(msg_hdr) == HFI_MSG_ACK)
+		trace_kgsl_hfi_receive(MSG_HDR_GET_ID(output[1]),
+			MSG_HDR_GET_SIZE(output[1]),
+			MSG_HDR_GET_SEQNUM(output[1]));
+	else
+		trace_kgsl_hfi_receive(MSG_HDR_GET_ID(msg_hdr),
+			MSG_HDR_GET_SIZE(msg_hdr), MSG_HDR_GET_SEQNUM(msg_hdr));
+
+done:
+	return result;
+}
+
+int gen8_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx,
+		u32 *msg, u32 size_bytes)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	u32 *queue;
+	u32 i, write_idx, read_idx, empty_space;
+	u32 size_dwords = size_bytes >> 2;
+	u32 align_size = ALIGN(size_dwords, SZ_4);
+	u32 id = MSG_HDR_GET_ID(*msg);
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED || !IS_ALIGNED(size_bytes, sizeof(u32)))
+		return -EINVAL;
+
+	queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx);
+
+	write_idx = hdr->write_index;
+	read_idx = hdr->read_index;
+
+	empty_space = (write_idx >= read_idx) ?
+			(hdr->queue_size - (write_idx - read_idx))
+			: (read_idx - write_idx);
+
+	if (empty_space <= align_size)
+		return -ENOSPC;
+
+	for (i = 0; i < size_dwords; i++) {
+		queue[write_idx] = msg[i];
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	/* Cookify any non used data at the end of the write buffer */
+	for (; i < align_size; i++) {
+		queue[write_idx] = 0xfafafafa;
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg));
+
+	hfi_update_write_idx(&hdr->write_index, write_idx);
+
+	return 0;
+}
+
+int gen8_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hfi *hfi = &gmu->hfi;
+	int ret;
+
+	spin_lock(&hfi->cmdq_lock);
+
+	if (test_bit(MSG_HDR_GET_ID(msg[0]), hfi->wb_set_record_bitmask))
+		*msg = RECORD_MSG_HDR(*msg);
+
+	ret = gen8_hfi_queue_write(adreno_dev, HFI_CMD_ID, msg, size_bytes);
+
+	/*
+	 * Some messages like ACD table and perf table are saved in memory, so we need
+	 * to reset the header to make sure we do not send a record enabled bit incase
+	 * we change the warmboot setting from debugfs
+	 */
+	*msg = CLEAR_RECORD_MSG_HDR(*msg);
+	/*
+	 * Memory barrier to make sure packet and write index are written before
+	 * an interrupt is raised
+	 */
+	wmb();
+
+	/* Send interrupt to GMU to receive the message */
+	if (!ret)
+		gmu_core_regwrite(KGSL_DEVICE(adreno_dev),
+			GEN8_GMUCX_HOST2GMU_INTR_SET, 0x1);
+
+	spin_unlock(&hfi->cmdq_lock);
+
+	return ret;
+}
+
+/* Sizes of the queue and message are in unit of dwords */
+static void init_queues(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	int i;
+	struct hfi_queue_table *tbl;
+	struct hfi_queue_header *hdr;
+	struct {
+		u32 idx;
+		u32 pri;
+		u32 status;
+	} queue[HFI_QUEUE_MAX] = {
+		{ HFI_CMD_ID, HFI_CMD_PRI, HFI_QUEUE_STATUS_ENABLED },
+		{ HFI_MSG_ID, HFI_MSG_PRI, HFI_QUEUE_STATUS_ENABLED },
+		{ HFI_DBG_ID, HFI_DBG_PRI, HFI_QUEUE_STATUS_ENABLED },
+	};
+
+	/* Fill Table Header */
+	tbl = mem_addr->hostptr;
+	tbl->qtbl_hdr.version = 0;
+	tbl->qtbl_hdr.size = sizeof(struct hfi_queue_table) >> 2;
+	tbl->qtbl_hdr.qhdr0_offset = sizeof(struct hfi_queue_table_header) >> 2;
+	tbl->qtbl_hdr.qhdr_size = sizeof(struct hfi_queue_header) >> 2;
+	tbl->qtbl_hdr.num_q = HFI_QUEUE_MAX;
+	tbl->qtbl_hdr.num_active_q = HFI_QUEUE_MAX;
+
+	memset(&tbl->qhdr[0], 0, sizeof(tbl->qhdr));
+
+	/* Fill Individual Queue Headers */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		hdr = &tbl->qhdr[i];
+		hdr->start_addr = GMU_QUEUE_START_ADDR(mem_addr->gmuaddr, i);
+		hdr->type = QUEUE_HDR_TYPE(queue[i].idx, queue[i].pri, 0, 0);
+		hdr->status = queue[i].status;
+		hdr->queue_size = HFI_QUEUE_SIZE >> 2; /* convert to dwords */
+	}
+}
+
+int gen8_hfi_init(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hfi *hfi = &gmu->hfi;
+
+	/* Allocates & maps memory for HFI */
+	if (IS_ERR_OR_NULL(hfi->hfi_mem)) {
+		hfi->hfi_mem = gen8_reserve_gmu_kernel_block(gmu, 0,
+				HFIMEM_SIZE, GMU_NONCACHED_KERNEL, 0);
+		if (!IS_ERR(hfi->hfi_mem))
+			init_queues(adreno_dev);
+	}
+
+	return PTR_ERR_OR_ZERO(hfi->hfi_mem);
+}
+
+int gen8_receive_ack_cmd(struct gen8_gmu_device *gmu, void *rcvd,
+	struct pending_cmd *ret_cmd)
+{
+	struct adreno_device *adreno_dev = gen8_gmu_to_adreno(gmu);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 *ack = rcvd;
+	u32 hdr = ack[0];
+	u32 req_hdr = ack[1];
+
+	if (ret_cmd == NULL)
+		return -EINVAL;
+
+	if (CMP_HFI_ACK_HDR(ret_cmd->sent_hdr, req_hdr)) {
+		memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2);
+		return 0;
+	}
+
+	/* Didn't find the sender, list the waiter */
+	dev_err_ratelimited(&gmu->pdev->dev,
+		"HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n",
+		req_hdr, ret_cmd->sent_hdr);
+
+	gmu_core_fault_snapshot(device);
+
+	return -ENODEV;
+}
+
+static int poll_gmu_reg(struct adreno_device *adreno_dev,
+	u32 offsetdwords, u32 expected_val,
+	u32 mask, u32 timeout_ms)
+{
+	u32 val;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+	bool nmi = false;
+
+	while (time_is_after_jiffies(timeout)) {
+		gmu_core_regread(device, offsetdwords, &val);
+		if ((val & mask) == expected_val)
+			return 0;
+
+		/*
+		 * If GMU firmware fails any assertion, error message is sent
+		 * to KMD and NMI is triggered. So check if GMU is in NMI and
+		 * timeout early. Bits [11:9] of A6XX_GMU_CM3_FW_INIT_RESULT
+		 * contain GMU reset status. Non zero value here indicates that
+		 * GMU reset is active, NMI handler would eventually complete
+		 * and GMU would wait for recovery.
+		 */
+		gmu_core_regread(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, &val);
+		if (val & 0xE00) {
+			nmi = true;
+			break;
+		}
+
+		usleep_range(10, 100);
+	}
+
+	/* Check one last time */
+	gmu_core_regread(device, offsetdwords, &val);
+	if ((val & mask) == expected_val)
+		return 0;
+
+	dev_err(&gmu->pdev->dev,
+		"Reg poll %s: offset 0x%x, want 0x%x, got 0x%x\n",
+		nmi ? "abort" : "timeout", offsetdwords, expected_val,
+		val & mask);
+
+	return -ETIMEDOUT;
+}
+
+static int gen8_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev,
+	void *data, u32 size_bytes, struct pending_cmd *ret_cmd)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int rc;
+	u32 *cmd = data;
+	struct gen8_hfi *hfi = &gmu->hfi;
+	u32 seqnum = atomic_inc_return(&hfi->seqnum);
+
+	*cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2);
+	if (ret_cmd == NULL)
+		return gen8_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+
+	ret_cmd->sent_hdr = cmd[0];
+
+	rc = gen8_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+	if (rc)
+		return rc;
+
+	rc = poll_gmu_reg(adreno_dev, GEN8_GMUCX_GMU2HOST_INTR_INFO,
+		HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT);
+
+	if (rc) {
+		gmu_core_fault_snapshot(device);
+		dev_err(&gmu->pdev->dev,
+		"Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n",
+		cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd));
+		return rc;
+	}
+
+	/* Clear the interrupt */
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR,
+		HFI_IRQ_MSGQ_MASK);
+
+	rc = gen8_hfi_process_queue(gmu, HFI_MSG_ID, ret_cmd);
+
+	return rc;
+}
+
+int gen8_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes)
+{
+	struct pending_cmd ret_cmd;
+	int rc;
+
+	memset(&ret_cmd, 0, sizeof(ret_cmd));
+
+	rc = gen8_hfi_send_cmd_wait_inline(adreno_dev, cmd, size_bytes, &ret_cmd);
+	if (rc)
+		return rc;
+
+	if (ret_cmd.results[2]) {
+		struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		gmu_core_fault_snapshot(device);
+		dev_err(&gmu->pdev->dev,
+				"HFI ACK failure: Req=0x%8.8X, Result=0x%8.8X\n",
+				ret_cmd.results[1],
+				ret_cmd.results[2]);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int gen8_hfi_send_core_fw_start(struct adreno_device *adreno_dev)
+{
+	struct hfi_core_fw_start_cmd cmd = {
+		.handle = 0x0,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CORE_FW_START);
+	if (ret)
+		return ret;
+
+	return gen8_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static const char *feature_to_string(u32 feature)
+{
+	if (feature == HFI_FEATURE_ACD)
+		return "ACD";
+
+	return "unknown";
+}
+
+/* For sending hfi message inline to handle GMU return type error */
+int gen8_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd,
+		struct pending_cmd *ret_cmd, u32 size_bytes)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int rc;
+
+	if (GMU_VER_MINOR(gmu->ver.hfi) <= 4)
+		return gen8_hfi_send_generic_req(adreno_dev, cmd, size_bytes);
+
+	rc = gen8_hfi_send_cmd_wait_inline(adreno_dev, cmd, size_bytes, ret_cmd);
+	if (rc)
+		return rc;
+
+	switch (ret_cmd->results[3]) {
+	case GMU_SUCCESS:
+		rc = ret_cmd->results[2];
+		break;
+	case GMU_ERROR_NO_ENTRY:
+		/* Unique error to handle undefined HFI msgs by caller */
+		rc = -ENOENT;
+		break;
+	case GMU_ERROR_TIMEOUT:
+		rc = -EINVAL;
+		break;
+	default:
+		gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev));
+		dev_err(&gmu->pdev->dev,
+			"HFI ACK: Req=0x%8.8X, Result=0x%8.8X Error:0x%8.8X\n",
+			ret_cmd->results[1], ret_cmd->results[2], ret_cmd->results[3]);
+		rc = -EINVAL;
+		break;
+	}
+
+	return rc;
+}
+
+int gen8_hfi_send_feature_ctrl(struct adreno_device *adreno_dev,
+	u32 feature, u32 enable, u32 data)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	struct hfi_feature_ctrl_cmd cmd = {
+		.feature = feature,
+		.enable = enable,
+		.data = data,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_FEATURE_CTRL);
+	if (ret)
+		return ret;
+
+	ret = gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd));
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+				"Unable to %s feature %s (%d)\n",
+				enable ? "enable" : "disable",
+				feature_to_string(feature),
+				feature);
+	return ret;
+}
+
+int gen8_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subtype)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	struct hfi_get_value_cmd cmd = {
+		.type = type,
+		.subtype = subtype,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE);
+	if (ret)
+		return ret;
+
+	ret = gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd));
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+			"Unable to get HFI Value type: %d, subtype: %d, error = %d\n",
+			type, subtype, ret);
+
+	return ret;
+}
+
+int gen8_hfi_send_set_value(struct adreno_device *adreno_dev,
+		u32 type, u32 subtype, u32 data)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	struct hfi_set_value_cmd cmd = {
+		.type = type,
+		.subtype = subtype,
+		.data = data,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_SET_VALUE);
+	if (ret)
+		return ret;
+
+	ret = gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd));
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+			"Unable to set HFI Value %d, %d to %d, error = %d\n",
+			type, subtype, data, ret);
+	return ret;
+}
+
+void adreno_gen8_receive_err_req(struct gen8_gmu_device *gmu, void *rcvd)
+{
+	struct hfi_err_cmd *cmd = rcvd;
+
+	dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n",
+			((cmd->error_code >> 16) & 0xffff),
+			(cmd->error_code & 0xffff),
+			(char *) cmd->data);
+}
+
+void adreno_gen8_receive_debug_req(struct gen8_gmu_device *gmu, void *rcvd)
+{
+	struct hfi_debug_cmd *cmd = rcvd;
+
+	dev_dbg(&gmu->pdev->dev, "HFI Debug Received: %d %d %d\n",
+			cmd->type, cmd->timestamp, cmd->data);
+}
+
+int gen8_hfi_process_queue(struct gen8_gmu_device *gmu,
+		u32 queue_idx, struct pending_cmd *ret_cmd)
+{
+	u32 rcvd[MAX_RCVD_SIZE];
+
+	while (gen8_hfi_queue_read(gmu, queue_idx, rcvd, sizeof(rcvd)) > 0) {
+		/* ACK Handler */
+		if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) {
+			int ret = gen8_receive_ack_cmd(gmu, rcvd, ret_cmd);
+
+			if (ret)
+				return ret;
+			continue;
+		}
+
+		/* Request Handler */
+		switch (MSG_HDR_GET_ID(rcvd[0])) {
+		case F2H_MSG_ERR: /* No Reply */
+			adreno_gen8_receive_err_req(gmu, rcvd);
+			break;
+		case F2H_MSG_DEBUG: /* No Reply */
+			adreno_gen8_receive_debug_req(gmu, rcvd);
+			break;
+		default: /* No Reply */
+			dev_err(&gmu->pdev->dev,
+				"HFI request %d not supported\n",
+				MSG_HDR_GET_ID(rcvd[0]));
+			break;
+		}
+	}
+
+	return 0;
+}
+
+int gen8_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	if (!adreno_dev->bcl_enabled)
+		return 0;
+
+	/*
+	 * BCL data is expected by gmu in below format
+	 * BIT[0] - response type
+	 * BIT[1:7] - Throttle level 1 (optional)
+	 * BIT[8:14] - Throttle level 2 (optional)
+	 * BIT[15:21] - Throttle level 3 (optional)
+	 */
+	return gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_BCL, 1, adreno_dev->bcl_data);
+}
+
+int gen8_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	int ret = 0;
+	struct hfi_clx_table_v2_cmd cmd = {0};
+
+	if (!adreno_dev->clx_enabled)
+		return 0;
+
+	/* Make sure the table is valid before enabling feature */
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CLX_TBL);
+	if (ret)
+		return ret;
+
+	ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_CLX, 1, 0);
+	if (ret)
+		return ret;
+
+	cmd.version = FIELD_PREP(GENMASK(31, 16), 0x2) | FIELD_PREP(GENMASK(15, 0), 0x1);
+	/* cmd.domain[0] is never used but needed per hfi spec */
+	cmd.domain[1].data0 = FIELD_PREP(GENMASK(31, 29), 1) |
+				FIELD_PREP(GENMASK(28, 28), 1) |
+				FIELD_PREP(GENMASK(27, 22), 1) |
+				FIELD_PREP(GENMASK(21, 16), 40) |
+				FIELD_PREP(GENMASK(15, 0), 0);
+	cmd.domain[1].clxt = 0;
+	cmd.domain[1].clxh = 0;
+	cmd.domain[1].urgmode = 1;
+	cmd.domain[1].lkgen = 0;
+	cmd.domain[1].currbudget = 50;
+
+	return gen8_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+#define EVENT_PWR_ACD_THROTTLE_PROF 44
+
+int gen8_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret = 0;
+
+	if (adreno_dev->acd_enabled) {
+		ret = gen8_hfi_send_feature_ctrl(adreno_dev,
+			HFI_FEATURE_ACD, 1, 0);
+		if (ret)
+			return ret;
+
+		ret = gen8_hfi_send_generic_req(adreno_dev,
+				&gmu->hfi.acd_table, sizeof(gmu->hfi.acd_table));
+		if (ret)
+			return ret;
+
+		gen8_hfi_send_set_value(adreno_dev, HFI_VALUE_LOG_EVENT_ON,
+				EVENT_PWR_ACD_THROTTLE_PROF, 0);
+	}
+
+	return 0;
+}
+
+int gen8_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	if (gmu->idle_level == GPU_HW_IFPC)
+		return gen8_hfi_send_feature_ctrl(adreno_dev,
+				HFI_FEATURE_IFPC, 1, adreno_dev->ifpc_hyst);
+	return 0;
+}
+
+static void reset_hfi_queues(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr;
+	u32 i;
+
+	/* Flush HFI queues */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		hdr = &tbl->qhdr[i];
+		if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+			continue;
+
+		hdr->read_index = hdr->write_index;
+	}
+}
+
+/* Fill the entry and return the dword count written */
+static u32 _fill_table_entry(struct hfi_table_entry *entry, u32 count,
+		u32 stride_bytes, u32 *data)
+{
+	entry->count = count;
+	entry->stride = stride_bytes >> 2; /* entry->stride is in dwords */
+	memcpy(entry->data, data, stride_bytes * count);
+
+	/* Return total dword count of entry + data */
+	return (sizeof(*entry) >> 2) + (entry->count * entry->stride);
+}
+
+int gen8_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev)
+{
+	/*
+	 * Buffer to store either hfi_table_cmd or hfi_dcvstable_cmd.
+	 * Current max size for either is 165 dwords.
+	 */
+	static u32 cmd_buf[200];
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_dcvs_table *tbl = &gmu->dcvs_table;
+	int ret = 0;
+
+	/* Starting with GMU HFI Version 2.6.1, use H2F_MSG_TABLE */
+	if (gmu->ver.hfi >= HFI_VERSION(2, 6, 1)) {
+		struct hfi_table_cmd *cmd = (struct hfi_table_cmd *)&cmd_buf[0];
+		u32 dword_off;
+
+		/* Already setup, so just send cmd */
+		if (cmd->hdr)
+			return gen8_hfi_send_generic_req(adreno_dev, cmd,
+					MSG_HDR_GET_SIZE(cmd->hdr) << 2);
+
+		if (tbl->gpu_level_num > MAX_GX_LEVELS || tbl->gmu_level_num > MAX_CX_LEVELS)
+			return -EINVAL;
+
+		/* CMD starts with struct hfi_table_cmd data */
+		cmd->type = HFI_TABLE_GPU_PERF;
+		dword_off = sizeof(*cmd) >> 2;
+
+		/* Fill in the table entry and data starting at dword_off */
+		dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off],
+				tbl->gpu_level_num, sizeof(struct opp_gx_desc),
+				(u32 *)tbl->gx_votes);
+
+		/* Fill in the table entry and data starting at dword_off */
+		dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off],
+				tbl->gmu_level_num, sizeof(struct opp_desc),
+				(u32 *)tbl->cx_votes);
+
+		cmd->hdr = CREATE_MSG_HDR(H2F_MSG_TABLE, HFI_MSG_CMD);
+		cmd->hdr = MSG_HDR_SET_SIZE(cmd->hdr, dword_off);
+
+		ret = gen8_hfi_send_generic_req(adreno_dev, cmd, dword_off << 2);
+	} else {
+		struct hfi_dcvstable_cmd *cmd = (struct hfi_dcvstable_cmd *)&cmd_buf[0];
+
+		/* Already setup, so just send cmd */
+		if (cmd->hdr)
+			return gen8_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd));
+
+		if (tbl->gpu_level_num > MAX_GX_LEVELS_LEGACY || tbl->gmu_level_num > MAX_CX_LEVELS)
+			return -EINVAL;
+
+		ret = CMD_MSG_HDR(*cmd, H2F_MSG_PERF_TBL);
+		if (ret)
+			return ret;
+
+		cmd->gpu_level_num = tbl->gpu_level_num;
+		cmd->gmu_level_num = tbl->gmu_level_num;
+		memcpy(&cmd->gx_votes, tbl->gx_votes,
+				sizeof(struct opp_gx_desc) * cmd->gpu_level_num);
+		memcpy(&cmd->cx_votes, tbl->cx_votes,
+				sizeof(struct opp_desc) * cmd->gmu_level_num);
+
+		ret = gen8_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd));
+	}
+
+	return ret;
+}
+
+int gen8_hfi_start(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int result;
+
+	reset_hfi_queues(adreno_dev);
+
+	result = gen8_hfi_send_gpu_perf_table(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen8_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table,
+			sizeof(gmu->hfi.bw_table));
+	if (result)
+		goto err;
+
+	result = gen8_hfi_send_acd_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen8_hfi_send_bcl_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen8_hfi_send_clx_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen8_hfi_send_ifpc_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen8_hfi_send_core_fw_start(adreno_dev);
+	if (result)
+		goto err;
+
+	set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	/* Request default DCVS level */
+	result = kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+	if (result)
+		goto err;
+
+	/* Request default BW vote */
+	result = kgsl_pwrctrl_axi(device, true);
+
+err:
+	if (result)
+		gen8_hfi_stop(adreno_dev);
+
+	return result;
+
+}
+
+void gen8_hfi_stop(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_pwrctrl_axi(device, false);
+
+	clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+}
+
+/* HFI interrupt handler */
+irqreturn_t gen8_hfi_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(ADRENO_DEVICE(device));
+	u32 status = 0;
+
+	gmu_core_regread(device, GEN8_GMUCX_GMU2HOST_INTR_INFO, &status);
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, HFI_IRQ_MASK);
+
+	if (status & HFI_IRQ_DBGQ_MASK)
+		gen8_hfi_process_queue(gmu, HFI_DBG_ID, NULL);
+	if (status & HFI_IRQ_CM3_FAULT_MASK) {
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"GMU CM3 fault interrupt received\n");
+		atomic_set(&gmu->cm3_fault, 1);
+
+		/* make sure other CPUs see the update */
+		smp_wmb();
+	}
+	if (status & ~HFI_IRQ_MASK)
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"Unhandled HFI interrupts 0x%lx\n",
+				status & ~HFI_IRQ_MASK);
+
+	return IRQ_HANDLED;
+}

+ 235 - 0
qcom/opensource/graphics-kernel/adreno_gen8_hfi.h

@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __ADRENO_GEN8_HFI_H
+#define __ADRENO_GEN8_HFI_H
+
+#include "adreno_hfi.h"
+
+/**
+ * struct gen8_hfi - HFI control structure
+ */
+struct gen8_hfi {
+	/** @irq: HFI interrupt line */
+	int irq;
+	/** @seqnum: atomic counter that is incremented for each message sent.
+	 *   The value of the counter is used as sequence number for HFI message.
+	 */
+	atomic_t seqnum;
+	/** @hfi_mem: Memory descriptor for the hfi memory */
+	struct kgsl_memdesc *hfi_mem;
+	/** @bw_table: HFI BW table buffer */
+	struct hfi_bwtable_cmd bw_table;
+	/** @acd_table: HFI table for ACD data */
+	struct hfi_acd_table_cmd acd_table;
+	/** @cmdq_lock: Spinlock for accessing the cmdq */
+	spinlock_t cmdq_lock;
+	/**
+	 * @wb_set_record_bitmask: Bitmask to enable or disable the recording
+	 * of messages in the GMU scratch.
+	 */
+	unsigned long wb_set_record_bitmask[BITS_TO_LONGS(HFI_MAX_ID)];
+};
+
+struct gen8_gmu_device;
+
+/* gen8_hfi_irq_handler - IRQ handler for HFI interripts */
+irqreturn_t gen8_hfi_irq_handler(int irq, void *data);
+
+/**
+ * gen8_hfi_start - Send the various HFIs during device boot up
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_start - Send the various HFIs during device boot up
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+void gen8_hfi_stop(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_init - Initialize hfi resources
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function allocates and sets up hfi queues
+ * when a process creates the very first kgsl instance
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_init(struct adreno_device *adreno_dev);
+
+/* Helper function to get to gen8 hfi struct from adreno device */
+struct gen8_hfi *to_gen8_hfi(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_queue_write - Write a command to hfi queue
+ * @adreno_dev: Pointer to the adreno device
+ * @queue_idx: destination queue id
+ * @msg: Data to be written to the queue
+ * @size_bytes: Size of the command in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx,
+		u32 *msg, u32 size_bytes);
+
+/**
+ * gen8_hfi_queue_read - Read data from hfi queue
+ * @gmu: Pointer to the gen8 gmu device
+ * @queue_idx: queue id to read from
+ * @output: Pointer to read the data into
+ * @max_size: Number of bytes to read from the queue
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_queue_read(struct gen8_gmu_device *gmu, u32 queue_idx,
+		u32 *output, u32 max_size);
+
+/**
+ * gen8_receive_ack_cmd - Process ack type packets
+ * @gmu: Pointer to the gen8 gmu device
+ * @rcvd: Pointer to the data read from hfi queue
+ * @ret_cmd: Container for the hfi packet for which this ack is received
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_receive_ack_cmd(struct gen8_gmu_device *gmu, void *rcvd,
+		struct pending_cmd *ret_cmd);
+
+/**
+ * gen8_hfi_send_feature_ctrl - Enable gmu feature via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @feature: feature to be enabled or disabled
+ * enable: Set 1 to enable or 0 to disable a feature
+ * @data: payload for the send feature hfi packet
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_feature_ctrl(struct adreno_device *adreno_dev,
+		u32 feature, u32 enable, u32 data);
+
+/**
+ * gen8_hfi_send_get_value - Send gmu get_values via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @type: GMU get_value type
+ * @subtype: GMU get_value subtype
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subtype);
+
+/**
+ * gen8_hfi_send_set_value - Send gmu set_values via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @type: GMU set_value type
+ * @subtype: GMU set_value subtype
+ * @data: Value to set
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_set_value(struct adreno_device *adreno_dev,
+		u32 type, u32 subtype, u32 data);
+
+/**
+ * gen8_hfi_send_core_fw_start - Send the core fw start hfi
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_core_fw_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_acd_feature_ctrl - Send the acd table and acd feature
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_generic_req - Send a generic hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ * @cmd: Pointer to the hfi packet header and data
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes);
+
+/**
+ * gen8_hfi_send_generic_req_v5 - Send a generic hfi packet with additional error handling
+ * @adreno_dev: Pointer to the adreno device
+ * @cmd: Pointer to the hfi packet header and data
+ * @ret_cmd: Ack for the command we just sent
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd,
+		struct pending_cmd *ret_cmd, u32 size_bytes);
+
+/**
+ * gen8_hfi_send_bcl_feature_ctrl - Send the bcl feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_clx_feature_ctrl - Send the clx feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_ifpc_feature_ctrl - Send the ipfc feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_gpu_perf_table - Send the gpu perf table hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev);
+
+/*
+ * gen8_hfi_process_queue - Check hfi queue for messages from gmu
+ * @gmu: Pointer to the gen8 gmu device
+ * @queue_idx: queue id to be processed
+ * @ret_cmd: Container for data needed for waiting for the ack
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_process_queue(struct gen8_gmu_device *gmu,
+		u32 queue_idx, struct pending_cmd *ret_cmd);
+
+/**
+ * gen8_hfi_cmdq_write - Write a command to command queue
+ * @adreno_dev: Pointer to the adreno device
+ * @msg: Data to be written to the queue
+ * @size_bytes: Size of the command in bytes
+ *
+ * This function takes the cmdq lock before writing data to the queue
+
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes);
+void adreno_gen8_receive_err_req(struct gen8_gmu_device *gmu, void *rcvd);
+void adreno_gen8_receive_debug_req(struct gen8_gmu_device *gmu, void *rcvd);
+#endif

+ 1905 - 0
qcom/opensource/graphics-kernel/adreno_gen8_hwsched.c

@@ -0,0 +1,1905 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/interconnect.h>
+#include <linux/soc/qcom/llcc-qcom.h>
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "adreno_gen8_hwsched.h"
+#include "adreno_snapshot.h"
+#include "kgsl_bus.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+
+static void _wakeup_hw_fence_waiters(struct adreno_device *adreno_dev, u32 fault)
+{
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	bool lock = !in_interrupt();
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags))
+		return;
+
+	/*
+	 * We could be in interrupt context here, which means we need to use spin_lock_irqsave
+	 * (which disables interrupts) everywhere we take this lock. Instead of that, simply
+	 * avoid taking this lock if we are recording a fault from an interrupt handler.
+	 */
+	if (lock)
+		spin_lock(&hfi->hw_fence.lock);
+
+	clear_bit(GEN8_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags);
+
+	/* Avoid creating new hardware fences until recovery is complete */
+	set_bit(GEN8_HWSCHED_HW_FENCE_ABORT_BIT, &hfi->hw_fence.flags);
+
+	if (!lock) {
+		/*
+		 * This barrier ensures that the above bitops complete before we wake up the waiters
+		 */
+		smp_wmb();
+	} else {
+		spin_unlock(&hfi->hw_fence.lock);
+	}
+
+	wake_up_all(&hfi->hw_fence.unack_wq);
+
+	del_timer_sync(&hfi->hw_fence_timer);
+}
+
+void gen8_hwsched_fault(struct adreno_device *adreno_dev, u32 fault)
+{
+	/*
+	 * Wake up any threads that may be sleeping waiting for the hardware fence unack count to
+	 * drop to a desired threshold.
+	 */
+	_wakeup_hw_fence_waiters(adreno_dev, fault);
+
+	adreno_hwsched_fault(adreno_dev, fault);
+}
+
+static size_t gen8_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	struct kgsl_memdesc *rb = (struct kgsl_memdesc *)priv;
+
+	if (remain < rb->size + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "RB");
+		return 0;
+	}
+
+	header->start = 0;
+	header->end = rb->size >> 2;
+	header->rptr = 0;
+	header->rbsize = rb->size >> 2;
+	header->count = rb->size >> 2;
+	header->timestamp_queued = 0;
+	header->timestamp_retired = 0;
+	header->gpuaddr = rb->gpuaddr;
+	header->id = 0;
+
+	memcpy(data, rb->hostptr, rb->size);
+
+	return rb->size + sizeof(*header);
+}
+
+static void gen8_hwsched_snapshot_preemption_record(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset)
+{
+	struct kgsl_snapshot_section_header *section_header =
+		(struct kgsl_snapshot_section_header *)snapshot->ptr;
+	u8 *dest = snapshot->ptr + sizeof(*section_header);
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)dest;
+	u64 ctxt_record_size = max_t(u64, GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES,
+				device->snapshot_ctxt_record_size);
+	size_t section_size;
+
+	section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size;
+	if (snapshot->remain < section_size) {
+		SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD");
+		return;
+	}
+
+	section_header->magic = SNAPSHOT_SECTION_MAGIC;
+	section_header->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2;
+	section_header->size = section_size;
+
+	header->size = ctxt_record_size >> 2;
+	header->gpuaddr = md->gpuaddr + offset;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable);
+	header->type = SNAPSHOT_GPU_OBJECT_GLOBAL;
+
+	dest += sizeof(*header);
+
+	memcpy(dest, md->hostptr + offset, ctxt_record_size);
+
+	snapshot->ptr += section_header->size;
+	snapshot->remain -= section_header->size;
+	snapshot->size += section_header->size;
+}
+
+static void snapshot_preemption_records(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md)
+{
+	u64 ctxt_record_size = md->size;
+	u64 offset;
+
+	do_div(ctxt_record_size, KGSL_PRIORITY_MAX_RB_LEVELS);
+
+	/* All preemption records exist as a single mem alloc entry */
+	for (offset = 0; offset < md->size; offset += ctxt_record_size)
+		gen8_hwsched_snapshot_preemption_record(device, snapshot, md,
+			offset);
+}
+
+static void *get_rb_hostptr(struct adreno_device *adreno_dev,
+	u64 gpuaddr, u32 size)
+{
+	struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev);
+	u64 offset;
+	u32 i;
+
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct kgsl_memdesc *md = hw_hfi->mem_alloc_table[i].md;
+
+		if (md && (gpuaddr >= md->gpuaddr) &&
+			((gpuaddr + size) <= (md->gpuaddr + md->size))) {
+			offset = gpuaddr - md->gpuaddr;
+			return md->hostptr + offset;
+		}
+	}
+
+	return NULL;
+}
+
+static u32 gen8_copy_gpu_global(void *out, void *in, u32 size)
+{
+	if (out && in) {
+		memcpy(out, in, size);
+		return size;
+	}
+
+	return 0;
+}
+
+static void adreno_hwsched_snapshot_rb_payload(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot, struct payload_section *payload)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_snapshot_section_header *section_header =
+		(struct kgsl_snapshot_section_header *)snapshot->ptr;
+	u8 *buf = snapshot->ptr + sizeof(*section_header);
+	struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	u32 size = gen8_hwsched_parse_payload(payload, KEY_RB_SIZEDWORDS) << 2;
+	u64 lo, hi, gpuaddr;
+	void *rb_hostptr;
+	char str[16];
+
+	lo = gen8_hwsched_parse_payload(payload, KEY_RB_GPUADDR_LO);
+	hi = gen8_hwsched_parse_payload(payload, KEY_RB_GPUADDR_HI);
+	gpuaddr = hi << 32 | lo;
+
+	/* Sanity check to make sure there is enough for the header */
+	if (snapshot->remain < sizeof(*section_header))
+		goto err;
+
+	rb_hostptr = get_rb_hostptr(adreno_dev, gpuaddr, size);
+
+	/* If the gpuaddress and size don't match any allocation, then abort */
+	if (((snapshot->remain - sizeof(*section_header)) <
+	    (size + sizeof(*header))) ||
+	    !gen8_copy_gpu_global(data, rb_hostptr, size))
+		goto err;
+
+	if (device->dump_all_ibs) {
+		u64 rbaddr, lpac_rbaddr;
+
+		kgsl_regread64(device, GEN8_CP_RB_BASE_LO_GC,
+			       GEN8_CP_RB_BASE_HI_GC, &rbaddr);
+		kgsl_regread64(device, GEN8_CP_RB_BASE_LO_LPAC,
+			       GEN8_CP_RB_BASE_HI_LPAC, &lpac_rbaddr);
+
+		/* Parse all IBs from current RB */
+		if ((rbaddr == gpuaddr) || (lpac_rbaddr == gpuaddr))
+			adreno_snapshot_dump_all_ibs(device, rb_hostptr, snapshot);
+	}
+
+	header->start = 0;
+	header->end = size >> 2;
+	header->rptr = gen8_hwsched_parse_payload(payload, KEY_RB_RPTR);
+	header->wptr = gen8_hwsched_parse_payload(payload, KEY_RB_WPTR);
+	header->rbsize = size >> 2;
+	header->count = size >> 2;
+	header->timestamp_queued = gen8_hwsched_parse_payload(payload,
+			KEY_RB_QUEUED_TS);
+	header->timestamp_retired = gen8_hwsched_parse_payload(payload,
+			KEY_RB_RETIRED_TS);
+	header->gpuaddr = gpuaddr;
+	header->id = gen8_hwsched_parse_payload(payload, KEY_RB_ID);
+
+	section_header->magic = SNAPSHOT_SECTION_MAGIC;
+	section_header->id = KGSL_SNAPSHOT_SECTION_RB_V2;
+	section_header->size = size + sizeof(*header) + sizeof(*section_header);
+
+	snapshot->ptr += section_header->size;
+	snapshot->remain -= section_header->size;
+	snapshot->size += section_header->size;
+
+	return;
+err:
+	snprintf(str, sizeof(str), "RB addr:0x%llx", gpuaddr);
+	SNAPSHOT_ERR_NOMEM(device, str);
+}
+
+static bool parse_payload_rb(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+	bool ret = false;
+
+	/* Skip if we didn't receive a context bad HFI */
+	if (!cmd->hdr)
+		return false;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == PAYLOAD_RB) {
+			adreno_hwsched_snapshot_rb_payload(adreno_dev,
+							   snapshot, payload);
+			ret = true;
+		}
+
+		i += sizeof(*payload) + (payload->dwords << 2);
+	}
+
+	return ret;
+}
+
+static int snapshot_context_queue(int id, void *ptr, void *data)
+{
+	struct kgsl_snapshot *snapshot = data;
+	struct kgsl_context *context = ptr;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct gmu_mem_type_desc desc;
+
+	if (!context->gmu_registered)
+		return 0;
+
+	desc.memdesc = &drawctxt->gmu_context_queue;
+	desc.type = SNAPSHOT_GMU_MEM_CONTEXT_QUEUE;
+	kgsl_snapshot_add_section(context->device,
+		KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, gen8_snapshot_gmu_mem, &desc);
+
+	return 0;
+}
+
+/* Snapshot AQE buffer */
+static size_t snapshot_aqe_buffer(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_memdesc *memdesc = priv;
+
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)buf;
+
+	u8 *ptr = buf + sizeof(*header);
+
+	if (IS_ERR_OR_NULL(memdesc) || memdesc->size == 0)
+		return 0;
+
+	if (remain < (memdesc->size + sizeof(*header))) {
+		SNAPSHOT_ERR_NOMEM(device, "AQE BUFFER");
+		return 0;
+	}
+
+	header->size = memdesc->size >> 2;
+	header->gpuaddr = memdesc->gpuaddr;
+	header->ptbase = MMU_DEFAULT_TTBR0(device);
+	header->type = SNAPSHOT_GPU_OBJECT_GLOBAL;
+
+	memcpy(ptr, memdesc->hostptr, memdesc->size);
+
+	return memdesc->size + sizeof(*header);
+}
+
+void gen8_hwsched_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev);
+	bool skip_memkind_rb = false;
+	u32 i;
+	bool parse_payload;
+
+	gen8_gmu_snapshot(adreno_dev, snapshot);
+
+	adreno_hwsched_parse_fault_cmdobj(adreno_dev, snapshot);
+
+	/*
+	 * First try to dump ringbuffers using context bad HFI payloads
+	 * because they have all the ringbuffer parameters. If ringbuffer
+	 * payloads are not present, fall back to dumping ringbuffers
+	 * based on MEMKIND_RB
+	 */
+	parse_payload = parse_payload_rb(adreno_dev, snapshot);
+
+	if (parse_payload)
+		skip_memkind_rb = true;
+
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i];
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_RB && !skip_memkind_rb)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_RB_V2,
+				snapshot, gen8_hwsched_snapshot_rb,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_SCRATCH)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, adreno_snapshot_global,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_PROFILE)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, adreno_snapshot_global,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_CSW_SMMU_INFO)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, adreno_snapshot_global,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_CSW_PRIV_NON_SECURE)
+			snapshot_preemption_records(device, snapshot,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_PREEMPT_SCRATCH)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, adreno_snapshot_global,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_AQE_BUFFER)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, snapshot_aqe_buffer,
+				entry->md);
+
+		if (entry->desc.mem_kind == HFI_MEMKIND_HW_FENCE) {
+			struct gmu_mem_type_desc desc;
+
+			desc.memdesc = entry->md;
+			desc.type = SNAPSHOT_GMU_MEM_HW_FENCE;
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+				snapshot, gen8_snapshot_gmu_mem, &desc);
+		}
+
+	}
+
+	if (!adreno_hwsched_context_queue_enabled(adreno_dev))
+		return;
+
+	read_lock(&device->context_lock);
+	idr_for_each(&device->context_idr, snapshot_context_queue, snapshot);
+	read_unlock(&device->context_lock);
+}
+
+static int gmu_clock_set_rate(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret = 0;
+
+	/* Switch to min GMU clock */
+	gen8_rdpm_cx_freq_update(gmu, gmu->freqs[0] / 1000);
+
+	ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk",
+			gmu->freqs[0]);
+	if (ret)
+		dev_err(&gmu->pdev->dev, "GMU clock:%d set failed:%d\n",
+			gmu->freqs[0], ret);
+
+	trace_kgsl_gmu_pwrlevel(gmu->freqs[0], gmu->freqs[GMU_MAX_PWRLEVELS - 1]);
+
+	return ret;
+}
+
+static int gen8_hwsched_gmu_first_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int level, ret = 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE);
+
+	gen8_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled);
+
+	ret = kgsl_pwrctrl_enable_cx_gdsc(device);
+	if (ret)
+		return ret;
+
+	ret = gen8_gmu_enable_clks(adreno_dev, GMU_MAX_PWRLEVELS - 1);
+	if (ret)
+		goto gdsc_off;
+
+	/*
+	 * Enable AHB timeout detection to catch any register access taking longer
+	 * time before NOC timeout gets detected. Enable this logic before any
+	 * register access which happens to be just after enabling clocks.
+	 */
+	gen8_enable_ahb_timeout_detection(adreno_dev);
+
+	/* Initialize the CX timer */
+	gen8_cx_timer_init(adreno_dev);
+
+	ret = gen8_gmu_load_fw(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = gen8_gmu_itcm_shadow(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = gen8_scm_gpu_init_cx_regs(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	gen8_gmu_register_config(adreno_dev);
+
+	ret = gen8_gmu_version_info(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	gen8_gmu_irq_enable(adreno_dev);
+
+	/* Vote for minimal DDR BW for GMU to init */
+	level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min;
+
+	icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level]));
+
+	/* From this GMU FW all RBBM interrupts are handled at GMU */
+	if (gmu->ver.core >= GMU_VERSION(5, 01, 06))
+		adreno_irq_free(adreno_dev);
+
+	/* Clear any hwsched faults that might have been left over */
+	adreno_hwsched_clear_fault(adreno_dev);
+
+	ret = gen8_gmu_device_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	gen8_get_gpu_feature_info(adreno_dev);
+
+	ret = gen8_hwsched_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gmu_clock_set_rate(adreno_dev);
+	if (ret) {
+		gen8_hwsched_hfi_stop(adreno_dev);
+		goto err;
+	}
+
+	if (gen8_hwsched_hfi_get_value(adreno_dev, HFI_VALUE_GMU_AB_VOTE) == 1 &&
+		!WARN_ONCE(!adreno_dev->gpucore->num_ddr_channels,
+			"Number of DDR channel is not specified in gpu core")) {
+		adreno_dev->gmu_ab = true;
+		set_bit(ADRENO_DEVICE_GMU_AB, &adreno_dev->priv);
+	}
+
+	icc_set_bw(pwr->icc_path, 0, 0);
+
+	device->gmu_fault = false;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+
+	return 0;
+
+err:
+	gen8_gmu_irq_disable(adreno_dev);
+
+	if (device->gmu_fault) {
+		gen8_gmu_suspend(adreno_dev);
+
+		return ret;
+	}
+
+clks_gdsc_off:
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+gdsc_off:
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen8_rdpm_cx_freq_update(gmu, 0);
+
+	return ret;
+}
+
+static int gen8_hwsched_gmu_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret = 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE);
+
+	ret = kgsl_pwrctrl_enable_cx_gdsc(device);
+	if (ret)
+		return ret;
+
+	ret = gen8_gmu_enable_clks(adreno_dev, GMU_MAX_PWRLEVELS - 1);
+	if (ret)
+		goto gdsc_off;
+
+	/*
+	 * Enable AHB timeout detection to catch any register access taking longer
+	 * time before NOC timeout gets detected. Enable this logic before any
+	 * register access which happens to be just after enabling clocks.
+	 */
+	gen8_enable_ahb_timeout_detection(adreno_dev);
+
+	/* Initialize the CX timer */
+	gen8_cx_timer_init(adreno_dev);
+
+	ret = gen8_rscc_wakeup_sequence(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	ret = gen8_gmu_load_fw(adreno_dev);
+	if (ret)
+		goto clks_gdsc_off;
+
+	gen8_gmu_register_config(adreno_dev);
+
+	gen8_gmu_irq_enable(adreno_dev);
+
+	/* Clear any hwsched faults that might have been left over */
+	adreno_hwsched_clear_fault(adreno_dev);
+
+	ret = gen8_gmu_device_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen8_hwsched_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gmu_clock_set_rate(adreno_dev);
+	if (ret) {
+		gen8_hwsched_hfi_stop(adreno_dev);
+		goto err;
+	}
+
+	device->gmu_fault = false;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+
+	return 0;
+err:
+	gen8_gmu_irq_disable(adreno_dev);
+
+	if (device->gmu_fault) {
+		gen8_gmu_suspend(adreno_dev);
+
+		return ret;
+	}
+
+clks_gdsc_off:
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+gdsc_off:
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen8_rdpm_cx_freq_update(gmu, 0);
+
+	return ret;
+}
+
+void gen8_hwsched_active_count_put(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return;
+
+	if (WARN(atomic_read(&device->active_cnt) == 0,
+		"Unbalanced get/put calls to KGSL active count\n"))
+		return;
+
+	if (atomic_dec_and_test(&device->active_cnt)) {
+		kgsl_pwrscale_update_stats(device);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+	}
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	wake_up(&device->active_cnt_wq);
+}
+
+static int gen8_hwsched_notify_slumber(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct hfi_prep_slumber_cmd req;
+	int ret;
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_PREPARE_SLUMBER);
+	if (ret)
+		return ret;
+
+	req.freq = gmu->dcvs_table.gpu_level_num - pwr->default_pwrlevel - 1;
+	req.bw = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq;
+
+	req.bw |= gen8_bus_ab_quantize(adreno_dev, 0);
+	/* Disable the power counter so that the GMU is not busy */
+	gmu_core_regwrite(device, GEN8_GMUCX_POWER_COUNTER_ENABLE, 0);
+
+	ret = gen8_hfi_send_cmd_async(adreno_dev, &req, sizeof(req));
+
+	/*
+	 * GEMNOC can enter power collapse state during GPU power down sequence.
+	 * This could abort CX GDSC collapse. Assert Qactive to avoid this.
+	 */
+	gmu_core_regwrite(device, GEN8_GMUCX_CX_FALNEXT_INTF, 0x1);
+
+	return ret;
+}
+static int gen8_hwsched_gmu_power_off(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret = 0;
+
+	if (device->gmu_fault)
+		goto error;
+
+	/* Wait for the lowest idle level we requested */
+	ret = gen8_gmu_wait_for_lowest_idle(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = gen8_hwsched_notify_slumber(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = gen8_gmu_wait_for_idle(adreno_dev);
+	if (ret)
+		goto error;
+
+	ret = gen8_rscc_sleep_sequence(adreno_dev);
+
+	gen8_rdpm_mx_freq_update(gmu, 0);
+
+	/* Now that we are done with GMU and GPU, Clear the GBIF */
+	ret = gen8_halt_gbif(adreno_dev);
+
+	gen8_gmu_irq_disable(adreno_dev);
+
+	gen8_hwsched_hfi_stop(adreno_dev);
+
+	clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks);
+
+	kgsl_pwrctrl_disable_cx_gdsc(device);
+
+	gen8_rdpm_cx_freq_update(gmu, 0);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE);
+
+	return ret;
+
+error:
+	gen8_gmu_irq_disable(adreno_dev);
+	gen8_hwsched_hfi_stop(adreno_dev);
+	gen8_gmu_suspend(adreno_dev);
+
+	return ret;
+}
+
+static void gen8_hwsched_init_ucode_regs(struct adreno_device *adreno_dev)
+{
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Program the ucode base for CP */
+	kgsl_regwrite(device, GEN8_CP_SQE_INSTR_BASE_LO,
+		lower_32_bits(fw->memdesc->gpuaddr));
+	kgsl_regwrite(device, GEN8_CP_SQE_INSTR_BASE_HI,
+		upper_32_bits(fw->memdesc->gpuaddr));
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) {
+		fw = ADRENO_FW(adreno_dev, ADRENO_FW_AQE);
+
+		/* Program the ucode base for AQE0 (BV coprocessor) */
+		kgsl_regwrite(device, GEN8_CP_AQE_INSTR_BASE_LO_0,
+			lower_32_bits(fw->memdesc->gpuaddr));
+		kgsl_regwrite(device, GEN8_CP_AQE_INSTR_BASE_HI_0,
+			upper_32_bits(fw->memdesc->gpuaddr));
+
+		/* Program the ucode base for AQE1 (LPAC coprocessor) */
+		if (adreno_dev->lpac_enabled) {
+			kgsl_regwrite(device, GEN8_CP_AQE_INSTR_BASE_LO_1,
+				      lower_32_bits(fw->memdesc->gpuaddr));
+			kgsl_regwrite(device, GEN8_CP_AQE_INSTR_BASE_HI_1,
+				      upper_32_bits(fw->memdesc->gpuaddr));
+		}
+	}
+}
+
+static int gen8_hwsched_gpu_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	ret = kgsl_mmu_start(device);
+	if (ret)
+		goto err;
+
+	ret = gen8_gmu_oob_set(device, oob_gpu);
+	if (ret)
+		goto err;
+
+	/* Clear the busy_data stats - we're starting over from scratch */
+	memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data));
+
+	gen8_start(adreno_dev);
+
+	/* Re-initialize the coresight registers if applicable */
+	adreno_coresight_start(adreno_dev);
+
+	adreno_perfcounter_start(adreno_dev);
+
+	/* Clear FSR here in case it is set from a previous pagefault */
+	kgsl_mmu_clear_fsr(&device->mmu);
+
+	gen8_enable_gpu_irq(adreno_dev);
+
+	gen8_hwsched_init_ucode_regs(adreno_dev);
+
+	ret = gen8_hwsched_boot_gpu(adreno_dev);
+	if (ret)
+		goto err;
+
+	/*
+	 * At this point it is safe to assume that we recovered. Setting
+	 * this field allows us to take a new snapshot for the next failure
+	 * if we are prioritizing the first unrecoverable snapshot.
+	 */
+	if (device->snapshot)
+		device->snapshot->recovered = true;
+
+	device->reset_counter++;
+
+	/*
+	 * If warmboot is enabled and we switched a sysfs node, we will do a coldboot
+	 * in the subseqent slumber exit. Once that is done we need to mark this bool
+	 * as false so that in the next run we can do warmboot
+	 */
+	clear_bit(ADRENO_DEVICE_FORCE_COLDBOOT, &adreno_dev->priv);
+err:
+	gen8_gmu_oob_clear(device, oob_gpu);
+
+	if (ret)
+		gen8_hwsched_gmu_power_off(adreno_dev);
+
+	return ret;
+}
+
+static void hwsched_idle_timer(struct timer_list *t)
+{
+	struct kgsl_device *device = container_of(t, struct kgsl_device,
+					idle_timer);
+
+	kgsl_schedule_work(&device->idle_check_ws);
+}
+
+static int gen8_gmu_warmboot_init(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret = 0;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_GMU_WARMBOOT))
+		return ret;
+
+	if (IS_ERR_OR_NULL(gmu->gmu_init_scratch)) {
+		gmu->gmu_init_scratch = gen8_reserve_gmu_kernel_block(gmu, 0,
+				SZ_4K, GMU_CACHE, 0);
+		ret = PTR_ERR_OR_ZERO(gmu->gmu_init_scratch);
+		if (ret)
+			return ret;
+	}
+
+	if (IS_ERR_OR_NULL(gmu->gpu_boot_scratch)) {
+		gmu->gpu_boot_scratch = gen8_reserve_gmu_kernel_block(gmu, 0,
+				SZ_4K, GMU_CACHE, 0);
+		ret = PTR_ERR_OR_ZERO(gmu->gpu_boot_scratch);
+	}
+
+	return ret;
+}
+
+static int gen8_hwsched_gmu_memory_init(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	/* GMU Virtual register bank */
+	if (IS_ERR_OR_NULL(gmu->vrb)) {
+		gmu->vrb = gen8_reserve_gmu_kernel_block(gmu, 0, GMU_VRB_SIZE,
+						GMU_NONCACHED_KERNEL, 0);
+
+		if (IS_ERR(gmu->vrb))
+			return PTR_ERR(gmu->vrb);
+
+		/* Populate size of the virtual register bank */
+		gmu_core_set_vrb_register(gmu->vrb->hostptr, VRB_SIZE_IDX,
+					gmu->vrb->size >> 2);
+	}
+
+	/* GMU trace log */
+	if (IS_ERR_OR_NULL(gmu->trace.md)) {
+		gmu->trace.md = gen8_reserve_gmu_kernel_block(gmu, 0,
+					GMU_TRACE_SIZE, GMU_NONCACHED_KERNEL, 0);
+
+		if (IS_ERR(gmu->trace.md))
+			return PTR_ERR(gmu->trace.md);
+
+		/* Pass trace buffer address to GMU through the VRB */
+		gmu_core_set_vrb_register(gmu->vrb->hostptr,
+					VRB_TRACE_BUFFER_ADDR_IDX,
+					gmu->trace.md->gmuaddr);
+
+		/* Initialize the GMU trace buffer header */
+		gmu_core_trace_header_init(&gmu->trace);
+	}
+
+	return 0;
+}
+
+static int gen8_hwsched_gmu_init(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = gen8_gmu_parse_fw(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_gmu_memory_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_gmu_warmboot_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_hwsched_gmu_memory_init(adreno_dev);
+		if (ret)
+			return ret;
+
+	return gen8_hwsched_hfi_init(adreno_dev);
+}
+
+static void gen8_hwsched_touch_wakeup(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret;
+
+	/*
+	 * Do not wake up a suspended device or until the first boot sequence
+	 * has been completed.
+	 */
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags) ||
+		!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))
+		return;
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		goto done;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = gen8_hwsched_gmu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	ret = gen8_hwsched_gpu_boot(adreno_dev);
+	if (ret)
+		return;
+
+	kgsl_pwrscale_wake(device);
+
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+done:
+	/*
+	 * When waking up from a touch event we want to stay active long enough
+	 * for the user to send a draw command. The default idle timer timeout
+	 * is shorter than we want so go ahead and push the idle timer out
+	 * further for this special case
+	 */
+	mod_timer(&device->idle_timer, jiffies +
+		msecs_to_jiffies(adreno_wake_timeout));
+}
+
+static int gen8_hwsched_boot(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	adreno_hwsched_start(adreno_dev);
+
+	ret = gen8_hwsched_gmu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_hwsched_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_start_idle_timer(device);
+	kgsl_pwrscale_wake(device);
+
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	return ret;
+}
+
+static int gen8_aqe_microcode_read(struct adreno_device *adreno_dev)
+{
+	struct adreno_firmware *aqe_fw = ADRENO_FW(adreno_dev, ADRENO_FW_AQE);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_AQE))
+		return 0;
+
+	return adreno_get_firmware(adreno_dev, gen8_core->aqefw_name, aqe_fw);
+}
+
+static int gen8_hwsched_first_boot(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))
+		return gen8_hwsched_boot(adreno_dev);
+
+	adreno_hwsched_start(adreno_dev);
+
+	ret = gen8_microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_aqe_microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_hwsched_gmu_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
+
+	ret = gen8_hwsched_gmu_first_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = gen8_hwsched_gpu_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	adreno_get_bus_counters(adreno_dev);
+
+	adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev,
+						 ADRENO_COOP_RESET);
+
+	set_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags);
+	set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	/*
+	 * BCL needs respective Central Broadcast register to
+	 * be programed from TZ. This programing happens only
+	 * when zap shader firmware load is successful. Zap firmware
+	 * load can fail in boot up path hence enable BCL only after we
+	 * successfully complete first boot to ensure that Central
+	 * Broadcast register was programed before enabling BCL.
+	 */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL))
+		adreno_dev->bcl_enabled = true;
+
+	/*
+	 * There is a possible deadlock scenario during kgsl firmware reading
+	 * (request_firmware) and devfreq update calls. During first boot, kgsl
+	 * device mutex is held and then request_firmware is called for reading
+	 * firmware. request_firmware internally takes dev_pm_qos_mtx lock.
+	 * Whereas in case of devfreq update calls triggered by thermal/bcl or
+	 * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then
+	 * tries to take kgsl device mutex as part of get_dev_status/target
+	 * calls. This results in deadlock when both thread are unable to acquire
+	 * the mutex held by other thread. Enable devfreq updates now as we are
+	 * done reading all firmware files.
+	 */
+	device->pwrscale.devfreq_enabled = true;
+
+	device->pwrctrl.last_stat_updated = ktime_get();
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	return 0;
+}
+
+/**
+ * drain_ctx_hw_fences_cpu - Force trigger the hardware fences that
+ * were not sent to TxQueue by the GMU
+ */
+static void drain_ctx_hw_fences_cpu(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_hw_fence_entry *entry, *tmp;
+
+	spin_lock(&drawctxt->lock);
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) {
+		gen8_trigger_hw_fence_cpu(adreno_dev, entry);
+		gen8_remove_hw_fence_entry(adreno_dev, entry);
+	}
+	spin_unlock(&drawctxt->lock);
+}
+
+static void drain_hw_fences_cpu(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_context *context;
+	int id;
+
+	read_lock(&device->context_lock);
+	idr_for_each_entry(&device->context_idr, context, id) {
+		if (context->gmu_registered)
+			drain_ctx_hw_fences_cpu(adreno_dev, ADRENO_CONTEXT(context));
+	}
+	read_unlock(&device->context_lock);
+}
+
+/**
+ * check_inflight_hw_fences - During SLUMBER entry, we must make sure all hardware fences across
+ * all registered contexts have been sent to TxQueue. If not, take a snapshot
+ */
+static int check_inflight_hw_fences(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_context *context;
+	int id, ret = 0;
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags))
+		return 0;
+
+	read_lock(&device->context_lock);
+	idr_for_each_entry(&device->context_idr, context, id) {
+
+		if (context->gmu_registered) {
+			ret = gen8_hwsched_check_context_inflight_hw_fences(adreno_dev,
+				ADRENO_CONTEXT(context));
+			if (ret)
+				break;
+		}
+	}
+	read_unlock(&device->context_lock);
+
+	if (ret)
+		gmu_core_fault_snapshot(device);
+
+	return ret;
+}
+
+static int gen8_hwsched_power_off(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret = 0;
+	bool drain_cpu = false;
+
+	if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER);
+
+	ret = gen8_gmu_oob_set(device, oob_gpu);
+	if (ret) {
+		gen8_gmu_oob_clear(device, oob_gpu);
+		goto no_gx_power;
+	}
+
+	kgsl_pwrscale_update_stats(device);
+
+	/* Save active coresight registers if applicable */
+	adreno_coresight_stop(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 0);
+
+	gen8_gmu_oob_clear(device, oob_gpu);
+
+no_gx_power:
+	kgsl_pwrctrl_irq(device, false);
+
+	/* Make sure GMU has sent all hardware fences to TxQueue */
+	if (check_inflight_hw_fences(adreno_dev))
+		drain_cpu = true;
+
+	gen8_hwsched_gmu_power_off(adreno_dev);
+
+	/* Now that we are sure that GMU is powered off, drain pending fences */
+	if (drain_cpu)
+		drain_hw_fences_cpu(adreno_dev);
+
+	adreno_hwsched_unregister_contexts(adreno_dev);
+
+	if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice))
+		llcc_slice_deactivate(adreno_dev->gpu_llc_slice);
+
+	if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice))
+		llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice);
+
+	clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	del_timer_sync(&device->idle_timer);
+
+	kgsl_pwrscale_sleep(device);
+
+	kgsl_pwrctrl_clear_l3_vote(device);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
+
+	return ret;
+}
+
+static void check_hw_fence_unack_count(struct adreno_device *adreno_dev)
+{
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 unack_count;
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags))
+		return;
+
+	gen8_hwsched_process_msgq(adreno_dev);
+
+	spin_lock(&hfi->hw_fence.lock);
+	unack_count = hfi->hw_fence.unack_count;
+	spin_unlock(&hfi->hw_fence.lock);
+
+	if (!unack_count)
+		return;
+
+	dev_err(&gmu->pdev->dev, "hardware fence unack_count(%d) isn't zero before SLUMBER\n",
+		unack_count);
+	gmu_core_fault_snapshot(device);
+}
+
+static void hwsched_idle_check(struct work_struct *work)
+{
+	struct kgsl_device *device = container_of(work,
+					struct kgsl_device, idle_check_ws);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	mutex_lock(&device->mutex);
+
+	if (test_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags))
+		goto done;
+
+	if (atomic_read(&device->active_cnt) || time_is_after_jiffies(device->idle_jiffies)) {
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	spin_lock(&device->submit_lock);
+	if (device->submit_now) {
+		spin_unlock(&device->submit_lock);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		goto done;
+	}
+
+	device->skip_inline_submit = true;
+	spin_unlock(&device->submit_lock);
+
+	if (!gen8_hw_isidle(adreno_dev)) {
+		dev_err(device->dev, "GPU isn't idle before SLUMBER\n");
+		gmu_core_fault_snapshot(device);
+	}
+
+	check_hw_fence_unack_count(adreno_dev);
+
+	gen8_hwsched_power_off(adreno_dev);
+
+done:
+	mutex_unlock(&device->mutex);
+}
+
+static int gen8_hwsched_first_open(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	/*
+	 * Do the one time settings that need to happen when we
+	 * attempt to boot the gpu the very first time
+	 */
+	ret = gen8_hwsched_first_boot(adreno_dev);
+	if (ret)
+		return ret;
+
+	/*
+	 * A client that does a first_open but never closes the device
+	 * may prevent us from going back to SLUMBER. So trigger the idle
+	 * check by incrementing the active count and immediately releasing it.
+	 */
+	atomic_inc(&device->active_cnt);
+	gen8_hwsched_active_count_put(adreno_dev);
+
+	return 0;
+}
+
+int gen8_hwsched_active_count_get(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret = 0;
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EINVAL;
+
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags))
+		return -EINVAL;
+
+	if ((atomic_read(&device->active_cnt) == 0))
+		ret = gen8_hwsched_boot(adreno_dev);
+
+	if (ret == 0)
+		atomic_inc(&device->active_cnt);
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	return ret;
+}
+
+static int gen8_hwsched_dcvs_set(struct adreno_device *adreno_dev,
+		int gpu_pwrlevel, int bus_level, u32 ab)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_dcvs_table *table = &gmu->dcvs_table;
+	struct hfi_gx_bw_perf_vote_cmd req = {
+		.ack_type = DCVS_ACK_BLOCK,
+		.freq = INVALID_DCVS_IDX,
+		.bw = INVALID_DCVS_IDX,
+	};
+	int ret;
+
+	if (!test_bit(GMU_PRIV_HFI_STARTED, &gmu->flags))
+		return 0;
+
+	/* Do not set to XO and lower GPU clock vote from GMU */
+	if ((gpu_pwrlevel != INVALID_DCVS_IDX) &&
+			(gpu_pwrlevel >= table->gpu_level_num - 1)) {
+		dev_err(&gmu->pdev->dev, "Invalid gpu dcvs request: %d\n",
+			gpu_pwrlevel);
+		return -EINVAL;
+	}
+
+	if (gpu_pwrlevel < table->gpu_level_num - 1)
+		req.freq = table->gpu_level_num - gpu_pwrlevel - 1;
+
+	if (bus_level < pwr->ddr_table_count && bus_level > 0)
+		req.bw = bus_level;
+
+	req.bw |=  gen8_bus_ab_quantize(adreno_dev, ab);
+
+	/* GMU will vote for slumber levels through the sleep sequence */
+	if ((req.freq == INVALID_DCVS_IDX) && (req.bw == INVALID_BW_VOTE))
+		return 0;
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_GX_BW_PERF_VOTE);
+	if (ret)
+		return ret;
+
+	ret = gen8_hfi_send_cmd_async(adreno_dev, &req, sizeof(req));
+
+	if (ret) {
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"Failed to set GPU perf idx %d, bw idx %d\n",
+			req.freq, req.bw);
+
+		/*
+		 * If this was a dcvs request along side an active gpu, request
+		 * dispatcher based reset and recovery.
+		 */
+		if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+			gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+	}
+
+	if (req.freq != INVALID_DCVS_IDX)
+		gen8_rdpm_mx_freq_update(gmu, gmu->dcvs_table.gx_votes[req.freq].freq);
+
+	return ret;
+}
+
+static int gen8_hwsched_clock_set(struct adreno_device *adreno_dev,
+	u32 pwrlevel)
+{
+	return gen8_hwsched_dcvs_set(adreno_dev, pwrlevel, INVALID_DCVS_IDX, INVALID_AB_VALUE);
+}
+
+static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	static unsigned long prev_freq;
+	unsigned long freq = gmu->freqs[0];
+
+	if (!gmu->perf_ddr_bw)
+		return;
+
+	/*
+	 * Scale the GMU if DDR is at a CX corner at which GMU can run at
+	 * a higher frequency
+	 */
+	if (pwr->ddr_table[buslevel] >= gmu->perf_ddr_bw)
+		freq = gmu->freqs[GMU_MAX_PWRLEVELS - 1];
+
+	if (prev_freq == freq)
+		return;
+
+	if (kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", freq)) {
+		dev_err(&gmu->pdev->dev, "Unable to set the GMU clock to %ld\n",
+			freq);
+		return;
+	}
+
+	gen8_rdpm_cx_freq_update(gmu, freq / 1000);
+
+	trace_kgsl_gmu_pwrlevel(freq, prev_freq);
+
+	prev_freq = freq;
+}
+
+static int gen8_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel,
+	u32 ab)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int ret = 0;
+
+	if (buslevel == pwr->cur_buslevel)
+		buslevel = INVALID_DCVS_IDX;
+
+	if ((ab == pwr->cur_ab) || (ab == 0))
+		ab = INVALID_AB_VALUE;
+
+	if ((ab == INVALID_AB_VALUE) && (buslevel == INVALID_DCVS_IDX))
+		return 0;
+
+	ret = gen8_hwsched_dcvs_set(adreno_dev, INVALID_DCVS_IDX,
+			buslevel, ab);
+	if (ret)
+		return ret;
+
+	if (buslevel != INVALID_DCVS_IDX) {
+		scale_gmu_frequency(adreno_dev, buslevel);
+
+		pwr->cur_buslevel = buslevel;
+	}
+
+	if (ab != INVALID_AB_VALUE) {
+		if (!adreno_dev->gmu_ab)
+			icc_set_bw(pwr->icc_path, MBps_to_icc(ab), 0);
+		pwr->cur_ab = ab;
+	}
+
+	trace_kgsl_buslevel(device, pwr->active_pwrlevel, pwr->cur_buslevel, pwr->cur_ab);
+	return ret;
+}
+
+static int gen8_hwsched_pm_suspend(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret;
+
+	if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags))
+		return 0;
+
+	kgsl_pwrctrl_request_state(device, KGSL_STATE_SUSPEND);
+
+	/* Halt any new submissions */
+	reinit_completion(&device->halt_gate);
+
+	/**
+	 * Wait for the dispatcher to retire everything by waiting
+	 * for the active count to go to zero.
+	 */
+	ret = kgsl_active_count_wait(device, 0, msecs_to_jiffies(100));
+	if (ret) {
+		dev_err(device->dev, "Timed out waiting for the active count\n");
+		goto err;
+	}
+
+	ret = adreno_hwsched_idle(adreno_dev);
+	if (ret)
+		goto err;
+
+	gen8_hwsched_power_off(adreno_dev);
+
+	adreno_get_gpu_halt(adreno_dev);
+
+	set_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND);
+
+	return 0;
+
+err:
+	adreno_hwsched_start(adreno_dev);
+
+	return ret;
+}
+
+static void gen8_hwsched_pm_resume(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags),
+		"resume invoked without a suspend\n"))
+		return;
+
+	adreno_put_gpu_halt(adreno_dev);
+
+	adreno_hwsched_start(adreno_dev);
+
+	clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
+}
+
+void gen8_hwsched_handle_watchdog(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 mask;
+
+	/* Temporarily mask the watchdog interrupt to prevent a storm */
+	gmu_core_regread(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK,
+		&mask);
+	gmu_core_regwrite(device, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK,
+			(mask | GMU_INT_WDOG_BITE));
+
+	gen8_gmu_send_nmi(device, false);
+
+	dev_err_ratelimited(&gmu->pdev->dev,
+			"GMU watchdog expired interrupt received\n");
+
+	gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+}
+
+static void gen8_hwsched_drain_ctxt_unregister(struct adreno_device *adreno_dev)
+{
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct pending_cmd *cmd = NULL;
+
+	read_lock(&hfi->msglock);
+
+	list_for_each_entry(cmd, &hfi->msglist, node) {
+		if (MSG_HDR_GET_ID(cmd->sent_hdr) == H2F_MSG_UNREGISTER_CONTEXT)
+			complete(&cmd->complete);
+	}
+
+	read_unlock(&hfi->msglock);
+}
+
+/**
+ * process_context_hw_fences_after_reset - This function processes all hardware fences that were
+ * sent to GMU prior to recovery. If a fence is not retired by the GPU, and the context is still
+ * good, then move them to the reset list.
+ */
+static void process_context_hw_fences_after_reset(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, struct list_head *reset_list)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_hw_fence_entry *entry, *tmp;
+
+	spin_lock(&drawctxt->lock);
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) {
+		struct adreno_context *drawctxt = entry->drawctxt;
+		struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr;
+		bool retired = kgsl_check_timestamp(device, &drawctxt->base, (u32)entry->cmd.ts);
+
+		/* Delete the fences that GMU has sent to the TxQueue */
+		if (timestamp_cmp(hdr->out_fence_ts, (u32)entry->cmd.ts) >= 0) {
+			gen8_remove_hw_fence_entry(adreno_dev, entry);
+			continue;
+		}
+
+		/*
+		 * Force retire the fences if the corresponding submission is retired by GPU
+		 * or if the context has gone bad
+		 */
+		if (retired || kgsl_context_is_bad(&drawctxt->base))
+			entry->cmd.flags |= HW_FENCE_FLAG_SKIP_MEMSTORE;
+
+		list_add_tail(&entry->reset_node, reset_list);
+	}
+	spin_unlock(&drawctxt->lock);
+}
+
+/**
+ * process_inflight_hw_fences_after_reset - Send hardware fences from all contexts back to the GMU
+ * after fault recovery. We must wait for ack when sending each of these fences to GMU so as to
+ * avoid sending a large number of hardware fences in a short span of time.
+ */
+static int process_inflight_hw_fences_after_reset(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_context *context = NULL;
+	int id, ret = 0;
+	struct list_head hw_fence_list;
+	struct adreno_hw_fence_entry *entry, *tmp;
+
+	/**
+	 * Since we need to wait for ack from GMU when sending each inflight fence back to GMU, we
+	 * cannot send them from within atomic context. Hence, walk list of such hardware fences
+	 * for each context and add it to this local list and then walk this list to send all these
+	 * fences to GMU.
+	 */
+	INIT_LIST_HEAD(&hw_fence_list);
+
+	read_lock(&device->context_lock);
+	idr_for_each_entry(&device->context_idr, context, id) {
+		process_context_hw_fences_after_reset(adreno_dev, ADRENO_CONTEXT(context),
+			&hw_fence_list);
+	}
+	read_unlock(&device->context_lock);
+
+	list_for_each_entry_safe(entry, tmp, &hw_fence_list, reset_node) {
+
+		/*
+		 * This is part of the reset sequence and any error in this path will be handled by
+		 * the caller.
+		 */
+		ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry, 0);
+		if (ret)
+			break;
+
+		list_del_init(&entry->reset_node);
+	}
+
+	return ret;
+}
+
+/**
+ * process_detached_hw_fences_after_reset - Send fences that couldn't be sent to GMU when a context
+ * got detached. We must wait for ack when sending each of these fences to GMU so as to avoid
+ * sending a large number of hardware fences in a short span of time.
+ */
+static int process_detached_hw_fences_after_reset(struct adreno_device *adreno_dev)
+{
+	struct adreno_hw_fence_entry *entry, *tmp;
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct kgsl_context *context = NULL;
+	int ret = 0;
+
+	list_for_each_entry_safe(entry, tmp, &hfi->detached_hw_fence_list, node) {
+
+		/*
+		 * This is part of the reset sequence and any error in this path will be handled by
+		 * the caller.
+		 */
+		ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry,
+			HW_FENCE_FLAG_SKIP_MEMSTORE);
+		if (ret)
+			return ret;
+
+		context = &entry->drawctxt->base;
+
+		gen8_remove_hw_fence_entry(adreno_dev, entry);
+
+		kgsl_context_put(context);
+	}
+
+	return ret;
+}
+
+static int drain_guilty_context_hw_fences(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_context *context = NULL;
+	struct adreno_context *guilty = NULL;
+	int id, ret = 0;
+
+	read_lock(&device->context_lock);
+	idr_for_each_entry(&device->context_idr, context, id) {
+		if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv) &&
+			_kgsl_context_get(context)) {
+			guilty = ADRENO_CONTEXT(context);
+			break;
+		}
+	}
+	read_unlock(&device->context_lock);
+
+	if (!guilty)
+		return 0;
+
+	/*
+	 * We don't need drawctxt spinlock to signal these fences since the only other place
+	 * which can retire these fences is the context detach path and device mutex
+	 * ensures mutual exclusion between recovery thread and detach thread.
+	 */
+	ret = gen8_hwsched_drain_context_hw_fences(adreno_dev, guilty);
+
+	kgsl_context_put(&guilty->base);
+
+	return ret;
+}
+
+static int handle_hw_fences_after_reset(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = drain_guilty_context_hw_fences(adreno_dev);
+	if (ret)
+		return ret;
+
+	/*
+	 * We must do this after adreno_hwsched_replay() so that context registration
+	 * is done before we re-send the un-retired hardware fences to the GMU
+	 */
+	ret = process_inflight_hw_fences_after_reset(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = process_detached_hw_fences_after_reset(adreno_dev);
+	if (ret)
+		return ret;
+
+	return gen8_hwsched_disable_hw_fence_throttle(adreno_dev);
+}
+
+int gen8_hwsched_reset_replay(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	int ret;
+
+	/*
+	 * Any pending context unregister packets will be lost
+	 * since we hard reset the GMU. This means any threads waiting
+	 * for context unregister hfi ack will timeout. Wake them
+	 * to avoid false positive ack timeout messages later.
+	 */
+	gen8_hwsched_drain_ctxt_unregister(adreno_dev);
+
+	if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
+		return 0;
+
+	gen8_disable_gpu_irq(adreno_dev);
+
+	gen8_gmu_irq_disable(adreno_dev);
+
+	gen8_hwsched_hfi_stop(adreno_dev);
+
+	gen8_gmu_suspend(adreno_dev);
+
+	adreno_hwsched_unregister_contexts(adreno_dev);
+
+	clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);
+
+	spin_lock(&hfi->hw_fence.lock);
+
+	/* Reset the unack count back to zero as we start afresh */
+	hfi->hw_fence.unack_count = 0;
+
+	spin_unlock(&hfi->hw_fence.lock);
+
+	/*
+	 * When we reset, we want to coldboot incase any scratch corruption
+	 * has occurred before we faulted.
+	 */
+	adreno_mark_for_coldboot(adreno_dev);
+
+	ret = gen8_hwsched_boot(adreno_dev);
+	if (ret)
+		goto done;
+
+	adreno_hwsched_replay(adreno_dev);
+
+	ret = handle_hw_fences_after_reset(adreno_dev);
+done:
+	BUG_ON(ret);
+
+	return ret;
+}
+
+const struct adreno_power_ops gen8_hwsched_power_ops = {
+	.first_open = gen8_hwsched_first_open,
+	.last_close = gen8_hwsched_power_off,
+	.active_count_get = gen8_hwsched_active_count_get,
+	.active_count_put = gen8_hwsched_active_count_put,
+	.touch_wakeup = gen8_hwsched_touch_wakeup,
+	.pm_suspend = gen8_hwsched_pm_suspend,
+	.pm_resume = gen8_hwsched_pm_resume,
+	.gpu_clock_set = gen8_hwsched_clock_set,
+	.gpu_bus_set = gen8_hwsched_bus_set,
+};
+
+const struct adreno_hwsched_ops gen8_hwsched_ops = {
+	.submit_drawobj = gen8_hwsched_submit_drawobj,
+	.preempt_count = gen8_hwsched_preempt_count_get,
+	.create_hw_fence = gen8_hwsched_create_hw_fence,
+};
+
+int gen8_hwsched_probe(struct platform_device *pdev,
+		u32 chipid, const struct adreno_gpu_core *gpucore)
+{
+	struct adreno_device *adreno_dev;
+	struct kgsl_device *device;
+	struct gen8_hwsched_device *gen8_hwsched_dev;
+	int ret;
+
+	gen8_hwsched_dev = devm_kzalloc(&pdev->dev, sizeof(*gen8_hwsched_dev),
+				GFP_KERNEL);
+	if (!gen8_hwsched_dev)
+		return -ENOMEM;
+
+	adreno_dev = &gen8_hwsched_dev->gen8_dev.adreno_dev;
+
+	adreno_dev->hwsched_enabled = true;
+
+	adreno_dev->irq_mask = GEN8_HWSCHED_INT_MASK;
+
+	ret = gen8_probe_common(pdev, adreno_dev, chipid, gpucore);
+	if (ret)
+		return ret;
+
+	device = KGSL_DEVICE(adreno_dev);
+
+	INIT_WORK(&device->idle_check_ws, hwsched_idle_check);
+
+	timer_setup(&device->idle_timer, hwsched_idle_timer, 0);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC))
+		adreno_dev->lpac_enabled = true;
+
+	kgsl_mmu_set_feature(device, KGSL_MMU_PAGEFAULT_TERMINATE);
+
+	return adreno_hwsched_init(adreno_dev, &gen8_hwsched_ops);
+}
+
+int gen8_hwsched_add_to_minidump(struct adreno_device *adreno_dev)
+{
+	struct gen8_device *gen8_dev = container_of(adreno_dev,
+					struct gen8_device, adreno_dev);
+	struct gen8_hwsched_device *gen8_hwsched = container_of(gen8_dev,
+					struct gen8_hwsched_device, gen8_dev);
+	struct gen8_hwsched_hfi *hw_hfi = &gen8_hwsched->hwsched_hfi;
+	int ret, i;
+
+	ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HWSCHED_DEVICE,
+			(void *)(gen8_hwsched), sizeof(struct gen8_hwsched_device));
+	if (ret)
+		return ret;
+
+	if (!IS_ERR_OR_NULL(gen8_dev->gmu.gmu_log)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_GMU_LOG_ENTRY,
+					gen8_dev->gmu.gmu_log->hostptr,
+					gen8_dev->gmu.gmu_log->size);
+		if (ret)
+			return ret;
+	}
+
+	if (!IS_ERR_OR_NULL(gen8_dev->gmu.hfi.hfi_mem)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_HFIMEM_ENTRY,
+					gen8_dev->gmu.hfi.hfi_mem->hostptr,
+					gen8_dev->gmu.hfi.hfi_mem->size);
+		if (ret)
+			return ret;
+	}
+
+	if (!IS_ERR_OR_NULL(gen8_dev->gmu.vrb)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_GMU_VRB_ENTRY,
+					gen8_dev->gmu.vrb->hostptr,
+					gen8_dev->gmu.vrb->size);
+			if (ret)
+				return ret;
+	}
+
+	if (!IS_ERR_OR_NULL(gen8_dev->gmu.trace.md)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_GMU_TRACE_ENTRY,
+					gen8_dev->gmu.trace.md->hostptr,
+					gen8_dev->gmu.trace.md->size);
+		if (ret)
+			return ret;
+	}
+
+	/* Dump HFI hwsched global mem alloc entries */
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i];
+		char hfi_minidump_str[MAX_VA_MINIDUMP_STR_LEN] = {0};
+		u32 rb_id = 0;
+
+		if (!hfi_get_minidump_string(entry->desc.mem_kind,
+						&hfi_minidump_str[0],
+						sizeof(hfi_minidump_str), &rb_id)) {
+			ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+						hfi_minidump_str,
+						entry->md->hostptr,
+						entry->md->size);
+			if (ret)
+				return ret;
+		}
+	}
+
+	if (!IS_ERR_OR_NULL(hw_hfi->big_ib)) {
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_HFI_BIG_IB_ENTRY,
+					hw_hfi->big_ib->hostptr,
+					hw_hfi->big_ib->size);
+		if (ret)
+			return ret;
+	}
+
+	if (!IS_ERR_OR_NULL(hw_hfi->big_ib_recurring))
+		ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev,
+					KGSL_HFI_BIG_IB_REC_ENTRY,
+					hw_hfi->big_ib_recurring->hostptr,
+					hw_hfi->big_ib_recurring->size);
+
+	return ret;
+}

+ 106 - 0
qcom/opensource/graphics-kernel/adreno_gen8_hwsched.h

@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_GEN8_HWSCHED_H_
+#define _ADRENO_GEN8_HWSCHED_H_
+
+#include "adreno_gen8_hwsched_hfi.h"
+
+/**
+ * struct gen8_hwsched_device - Container for the gen8 hwscheduling device
+ */
+struct gen8_hwsched_device {
+	/** @gen8_dev: Container for the gen8 device */
+	struct gen8_device gen8_dev;
+	/** @hwsched_hfi: Container for hwscheduling specific hfi resources */
+	struct gen8_hwsched_hfi hwsched_hfi;
+};
+
+/**
+ * gen8_hwsched_probe - Target specific probe for hwsched
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for hwsched enabled gmu targets.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hwsched_probe(struct platform_device *pdev,
+		u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * gen8_hwsched_reset_replay - Restart the gmu and gpu and replay inflight cmdbatches
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hwsched_reset_replay(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_snapshot - take gen8 hwsched snapshot
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * Snapshot the faulty ib and then snapshot rest of gen8 gmu things
+ */
+void gen8_hwsched_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+
+/**
+ * gen8_hwsched_handle_watchdog - Handle watchdog interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen8_hwsched_handle_watchdog(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_active_count_get - Increment the active count
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function increments the active count. If active count
+ * is 0, this function also powers up the device.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hwsched_active_count_get(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_active_count_put - Put back the active count
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function decrements the active count sets the idle
+ * timer if active count is zero.
+ */
+void gen8_hwsched_active_count_put(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_add_to_minidump - Register hwsched_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int gen8_hwsched_add_to_minidump(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_send_recurring_cmdobj - Dispatch IBs to GMU
+ * @adreno_dev: Pointer to adreno device structure
+ * @cmdobj: The command object which needs to be submitted
+ *
+ * This function is used to register the context if needed and submit
+ * recurring IBs to the GMU. Upon receiving ipc interrupt GMU will submit
+ * recurring IBs to GPU.
+
+ * Return: 0 on success and negative error on failure
+ */
+int gen8_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj);
+
+/**
+ * gen8_hwsched_fault - Set hwsched fault to request recovery
+ * @adreno_dev: A handle to adreno device
+ * @fault: The type of fault
+ */
+void gen8_hwsched_fault(struct adreno_device *adreno_dev, u32 fault);
+
+#endif

+ 3938 - 0
qcom/opensource/graphics-kernel/adreno_gen8_hwsched_hfi.c

@@ -0,0 +1,3938 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <dt-bindings/soc/qcom,ipcc.h>
+#include <linux/dma-fence-array.h>
+#include <linux/iommu.h>
+#include <linux/sched/clock.h>
+#include <soc/qcom/msm_performance.h>
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "adreno_gen8_hwsched.h"
+#include "adreno_hfi.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+#include "kgsl_device.h"
+#include "kgsl_eventlog.h"
+#include "kgsl_pwrctrl.h"
+#include "kgsl_trace.h"
+#include "kgsl_util.h"
+
+#if (KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE)
+#include <msm_hw_fence.h>
+#else
+#include <linux/soc/qcom/msm_hw_fence.h>
+#endif
+
+
+#define HFI_QUEUE_MAX (HFI_QUEUE_DEFAULT_CNT + HFI_QUEUE_DISPATCH_MAX_CNT)
+
+#define DEFINE_QHDR(gmuaddr, id, prio) \
+	{\
+		.status = 1, \
+		.start_addr = GMU_QUEUE_START_ADDR(gmuaddr, id), \
+		.type = QUEUE_HDR_TYPE(id, prio, 0, 0), \
+		.queue_size = SZ_4K >> 2, \
+		.msg_size = 0, \
+		.unused0 = 0, \
+		.unused1 = 0, \
+		.unused2 = 0, \
+		.unused3 = 0, \
+		.unused4 = 0, \
+		.read_index = 0, \
+		.write_index = 0, \
+}
+
+static struct dq_info {
+	/** @max_dq: Maximum number of dispatch queues per RB level */
+	u32 max_dq;
+	/** @base_dq_id: Base dqid for level */
+	u32 base_dq_id;
+	/** @offset: Next dqid to use for roundrobin context assignment */
+	u32 offset;
+} gen8_hfi_dqs[KGSL_PRIORITY_MAX_RB_LEVELS] = {
+	{ 4, 0, }, /* RB0 */
+	{ 4, 4, }, /* RB1 */
+	{ 3, 8, }, /* RB2 */
+	{ 3, 11, }, /* RB3 */
+}, gen8_hfi_dqs_lpac[KGSL_PRIORITY_MAX_RB_LEVELS + 1] = {
+	{ 4, 0, }, /* RB0 */
+	{ 4, 4, }, /* RB1 */
+	{ 3, 8, }, /* RB2 */
+	{ 2, 11, }, /* RB3 */
+	{ 1, 13, }, /* RB LPAC */
+};
+
+struct pending_cmd gen8_hw_fence_ack;
+
+struct gen8_hwsched_hfi *to_gen8_hwsched_hfi(
+	struct adreno_device *adreno_dev)
+{
+	struct gen8_device *gen8_dev = container_of(adreno_dev,
+					struct gen8_device, adreno_dev);
+	struct gen8_hwsched_device *gen8_hwsched = container_of(gen8_dev,
+					struct gen8_hwsched_device, gen8_dev);
+
+	return &gen8_hwsched->hwsched_hfi;
+}
+
+int gen8_hfi_send_lpac_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	if (!adreno_dev->lpac_enabled)
+		return 0;
+
+	return gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_LPAC, 1, 0);
+}
+
+static void add_waiter(struct gen8_hwsched_hfi *hfi, u32 hdr,
+	struct pending_cmd *ack)
+{
+	memset(ack, 0x0, sizeof(*ack));
+
+	init_completion(&ack->complete);
+	write_lock_irq(&hfi->msglock);
+	list_add_tail(&ack->node, &hfi->msglist);
+	write_unlock_irq(&hfi->msglock);
+
+	ack->sent_hdr = hdr;
+}
+
+static void del_waiter(struct gen8_hwsched_hfi *hfi, struct pending_cmd *ack)
+{
+	write_lock_irq(&hfi->msglock);
+	list_del(&ack->node);
+	write_unlock_irq(&hfi->msglock);
+}
+
+static void gen8_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct pending_cmd *cmd = NULL;
+	u32 waiters[64], num_waiters = 0, i;
+	u32 *ack = rcvd;
+	u32 hdr = ack[0];
+	u32 req_hdr = ack[1];
+	u32 size_bytes = MSG_HDR_GET_SIZE(hdr) << 2;
+
+	if (size_bytes > sizeof(cmd->results))
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"Ack result too big: %d Truncating to: %ld\n",
+			size_bytes, sizeof(cmd->results));
+
+	read_lock(&hfi->msglock);
+
+	list_for_each_entry(cmd, &hfi->msglist, node) {
+		if (CMP_HFI_ACK_HDR(cmd->sent_hdr, req_hdr)) {
+			memcpy(cmd->results, ack,
+				min_t(u32, size_bytes,
+					sizeof(cmd->results)));
+			complete(&cmd->complete);
+			read_unlock(&hfi->msglock);
+			return;
+		}
+
+		if (num_waiters < ARRAY_SIZE(waiters))
+			waiters[num_waiters++] = cmd->sent_hdr;
+	}
+
+	read_unlock(&hfi->msglock);
+
+	/* Didn't find the sender, list the waiter */
+	dev_err_ratelimited(&gmu->pdev->dev,
+		"Unexpectedly got id %d seqnum %d. Total waiters: %d Top %d Waiters:\n",
+		MSG_HDR_GET_ID(req_hdr), MSG_HDR_GET_SEQNUM(req_hdr),
+		num_waiters, min_t(u32, num_waiters, 5));
+
+	for (i = 0; i < num_waiters && i < 5; i++)
+		dev_err_ratelimited(&gmu->pdev->dev,
+			" id %d seqnum %d\n",
+			MSG_HDR_GET_ID(waiters[i]),
+			MSG_HDR_GET_SEQNUM(waiters[i]));
+}
+
+/* This function is called while holding the drawctxt spinlock */
+void gen8_remove_hw_fence_entry(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct adreno_context *drawctxt = entry->drawctxt;
+
+	atomic_dec(&hwsched->hw_fence_count);
+	drawctxt->hw_fence_count--;
+
+	dma_fence_put(&entry->kfence->fence);
+	list_del_init(&entry->node);
+	kmem_cache_free(hwsched->hw_fence_cache, entry);
+}
+
+static void _retire_inflight_hw_fences(struct adreno_device *adreno_dev,
+	struct kgsl_context *context)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct adreno_hw_fence_entry *entry, *tmp;
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags))
+		return;
+
+	spin_lock(&drawctxt->lock);
+
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) {
+		struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr;
+
+		/*
+		 * Since this list is sorted by timestamp, abort on the first fence that hasn't
+		 * yet been sent to TxQueue
+		 */
+		if (timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)
+			break;
+
+		gen8_remove_hw_fence_entry(adreno_dev, entry);
+	}
+	spin_unlock(&drawctxt->lock);
+}
+
+static void log_profiling_info(struct adreno_device *adreno_dev, u32 *rcvd)
+{
+	struct hfi_ts_retire_cmd *cmd = (struct hfi_ts_retire_cmd *)rcvd;
+	struct kgsl_context *context;
+	struct retire_info info = {0};
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	context = kgsl_context_get(device, cmd->ctxt_id);
+	if (context == NULL)
+		return;
+
+	info.timestamp = cmd->ts;
+	info.rb_id = adreno_get_level(context);
+	info.gmu_dispatch_queue = context->gmu_dispatch_queue;
+	info.submitted_to_rb = cmd->submitted_to_rb;
+	info.sop = cmd->sop;
+	info.eop = cmd->eop;
+	if (GMU_VER_MINOR(gmu->ver.hfi) < 4)
+		info.active = cmd->eop - cmd->sop;
+	else
+		info.active = cmd->active;
+	info.retired_on_gmu = cmd->retired_on_gmu;
+
+	/* protected GPU work must not be reported */
+	if  (!(context->flags & KGSL_CONTEXT_SECURE))
+		kgsl_work_period_update(device, context->proc_priv->period,
+					     info.active);
+
+	trace_adreno_cmdbatch_retired(context, &info, 0, 0, 0);
+
+	log_kgsl_cmdbatch_retired_event(context->id, cmd->ts,
+		context->priority, 0, cmd->sop, cmd->eop);
+
+	_retire_inflight_hw_fences(adreno_dev, context);
+
+	kgsl_context_put(context);
+}
+
+u32 gen8_hwsched_parse_payload(struct payload_section *payload, u32 key)
+{
+	u32 i;
+
+	/* Each key-value pair is 2 dwords */
+	for (i = 0; i < payload->dwords; i += 2) {
+		if (payload->data[i] == key)
+			return payload->data[i + 1];
+	}
+
+	return 0;
+}
+
+struct syncobj_flags {
+	unsigned long mask;
+	const char *name;
+};
+
+static void _get_syncobj_string(char *str, u32 max_size, struct hfi_syncobj *syncobj, u32 index)
+{
+	u32 count = scnprintf(str, max_size, "syncobj[%d] ctxt_id:%llu seqno:%llu flags:", index,
+			syncobj->ctxt_id, syncobj->seq_no);
+	u32 i;
+	bool first = true;
+	static const struct syncobj_flags _flags[] = {
+		GMU_SYNCOBJ_FLAGS, { -1, NULL }};
+
+	for (i = 0; _flags[i].name; i++) {
+		if (!(syncobj->flags & _flags[i].mask))
+			continue;
+
+		if (first) {
+			count += scnprintf(str + count, max_size - count, "%s", _flags[i].name);
+			first = false;
+		} else {
+			count += scnprintf(str + count, max_size - count, "|%s", _flags[i].name);
+		}
+	}
+}
+
+static void log_syncobj(struct gen8_gmu_device *gmu, struct hfi_submit_syncobj *cmd)
+{
+	struct hfi_syncobj *syncobj = (struct hfi_syncobj *)&cmd[1];
+	char str[128];
+	u32 i = 0;
+
+	for (i = 0; i < cmd->num_syncobj; i++) {
+		_get_syncobj_string(str, sizeof(str), syncobj, i);
+		dev_err(&gmu->pdev->dev, "%s\n", str);
+		syncobj++;
+	}
+}
+
+static void find_timeout_syncobj(struct adreno_device *adreno_dev, u32 ctxt_id, u32 ts)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_context *context = NULL;
+	struct adreno_context *drawctxt;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gmu_context_queue_header *hdr;
+	struct hfi_submit_syncobj *cmd;
+	u32 *queue, i;
+	int ret;
+
+	/* We want to get the context even if it is detached */
+	read_lock(&device->context_lock);
+	context = idr_find(&device->context_idr, ctxt_id);
+	ret = _kgsl_context_get(context);
+	read_unlock(&device->context_lock);
+
+	if (!ret)
+		return;
+
+	drawctxt = ADRENO_CONTEXT(context);
+
+	hdr = drawctxt->gmu_context_queue.hostptr;
+	queue = (u32 *)(drawctxt->gmu_context_queue.hostptr + sizeof(*hdr));
+
+	for (i = hdr->read_index; i != hdr->write_index;) {
+		if (MSG_HDR_GET_ID(queue[i]) != H2F_MSG_ISSUE_SYNCOBJ) {
+			i = (i + MSG_HDR_GET_SIZE(queue[i])) % hdr->queue_size;
+			continue;
+		}
+
+		cmd = (struct hfi_submit_syncobj *)&queue[i];
+
+		if (cmd->timestamp == ts) {
+			log_syncobj(gmu, cmd);
+			break;
+		}
+		i = (i + MSG_HDR_GET_SIZE(queue[i])) % hdr->queue_size;
+	}
+
+	if (i == hdr->write_index)
+		dev_err(&gmu->pdev->dev, "Couldn't find unsignaled syncobj ctx:%d ts:%d\n",
+			ctxt_id, ts);
+
+	kgsl_context_put(context);
+}
+
+/* Look up a particular key's value for a given type of payload */
+static u32 gen8_hwsched_lookup_key_value(struct adreno_device *adreno_dev,
+	u32 type, u32 key)
+{
+	struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+
+	if (!cmd->hdr)
+		return 0;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == type)
+			return gen8_hwsched_parse_payload(payload, key);
+
+		i += struct_size(payload, data, payload->dwords);
+	}
+
+	return 0;
+}
+
+static u32 get_payload_rb_key(struct adreno_device *adreno_dev,
+	u32 rb_id, u32 key)
+{
+	struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+
+	if (!cmd->hdr)
+		return 0;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		if (payload->type == PAYLOAD_RB) {
+			u32 id = gen8_hwsched_parse_payload(payload, KEY_RB_ID);
+
+			if (id == rb_id)
+				return gen8_hwsched_parse_payload(payload, key);
+		}
+
+		i += struct_size(payload, data, payload->dwords);
+	}
+
+	return 0;
+}
+
+static bool log_gpu_fault(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct device *dev = &gmu->pdev->dev;
+	struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad;
+
+	/* Return false for non fatal errors */
+	if (adreno_hwsched_log_nonfatal_gpu_fault(adreno_dev, dev, cmd->error))
+		return false;
+
+	switch (cmd->error) {
+	case GMU_GPU_HW_HANG:
+		dev_crit_ratelimited(dev, "MISC: GPU hang detected\n");
+		break;
+	case GMU_GPU_SW_HANG:
+		dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %d\n",
+			cmd->gc.ctxt_id, cmd->gc.ts);
+		break;
+	case GMU_CP_OPCODE_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP opcode error interrupt | opcode=0x%8.8x\n",
+			gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+			KEY_CP_OPCODE_ERROR));
+		break;
+	case GMU_CP_PROTECTED_ERROR: {
+		u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_PROTECTED_ERROR);
+
+		dev_crit_ratelimited(dev,
+			"CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n",
+			status & (1 << 20) ? "READ" : "WRITE",
+			status & 0x3FFFF, status);
+		}
+		break;
+	case GMU_CP_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "CP Illegal instruction error\n");
+		break;
+	case GMU_CP_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "CP ucode error interrupt\n");
+		break;
+	case GMU_CP_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP | Ringbuffer HW fault | status=0x%8.8x\n",
+			gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_HW_FAULT));
+		break;
+	case GMU_GPU_PREEMPT_TIMEOUT: {
+		u32 cur, next, cur_rptr, cur_wptr, next_rptr, next_wptr;
+
+		cur = gen8_hwsched_lookup_key_value(adreno_dev,
+			PAYLOAD_PREEMPT_TIMEOUT, KEY_PREEMPT_TIMEOUT_CUR_RB_ID);
+		next = gen8_hwsched_lookup_key_value(adreno_dev,
+			PAYLOAD_PREEMPT_TIMEOUT,
+			KEY_PREEMPT_TIMEOUT_NEXT_RB_ID);
+		cur_rptr = get_payload_rb_key(adreno_dev, cur, KEY_RB_RPTR);
+		cur_wptr = get_payload_rb_key(adreno_dev, cur, KEY_RB_WPTR);
+		next_rptr = get_payload_rb_key(adreno_dev, next, KEY_RB_RPTR);
+		next_wptr = get_payload_rb_key(adreno_dev, next, KEY_RB_WPTR);
+
+		dev_crit_ratelimited(dev,
+			"Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n",
+			cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr);
+		}
+		break;
+	case GMU_CP_GPC_ERROR:
+		dev_crit_ratelimited(dev, "RBBM: GPC error\n");
+		break;
+	case GMU_CP_BV_OPCODE_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP BV opcode error | opcode=0x%8.8x\n",
+			gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+			KEY_CP_BV_OPCODE_ERROR));
+		break;
+	case GMU_CP_BV_PROTECTED_ERROR: {
+		u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_BV_PROTECTED_ERROR);
+
+		dev_crit_ratelimited(dev,
+			"CP BV | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n",
+			status & (1 << 20) ? "READ" : "WRITE",
+			status & 0x3FFFF, status);
+		}
+		break;
+	case GMU_CP_BV_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP BV | Ringbuffer HW fault | status=0x%8.8x\n",
+			gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_HW_FAULT));
+		break;
+	case GMU_CP_BV_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n");
+		break;
+	case GMU_CP_BV_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "CP BV ucode error interrupt\n");
+		break;
+	case GMU_CP_LPAC_OPCODE_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP LPAC opcode error | opcode=0x%8.8x\n",
+			gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+			KEY_CP_LPAC_OPCODE_ERROR));
+		break;
+	case GMU_CP_LPAC_PROTECTED_ERROR: {
+		u32 status = gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_LPAC_PROTECTED_ERROR);
+
+		dev_crit_ratelimited(dev,
+			"CP LPAC | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n",
+			status & (1 << 20) ? "READ" : "WRITE",
+			status & 0x3FFFF, status);
+		}
+		break;
+	case GMU_CP_LPAC_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev,
+			"CP LPAC | Ringbuffer HW fault | status=0x%8.8x\n",
+			gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_CP_LPAC_HW_FAULT));
+		break;
+	case GMU_CP_LPAC_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "CP LPAC Illegal instruction error\n");
+		break;
+	case GMU_CP_LPAC_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "CP LPAC ucode error interrupt\n");
+		break;
+	case GMU_GPU_LPAC_SW_HANG:
+		dev_crit_ratelimited(dev, "LPAC: gpu timeout ctx %d ts %d\n",
+			cmd->lpac.ctxt_id, cmd->lpac.ts);
+		break;
+	case GMU_GPU_SW_FUSE_VIOLATION:
+		dev_crit_ratelimited(dev, "RBBM: SW Feature Fuse violation status=0x%8.8x\n",
+			gen8_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+				KEY_SWFUSE_VIOLATION_FAULT));
+		break;
+	case GMU_GPU_AQE0_OPCODE_ERRROR:
+		dev_crit_ratelimited(dev, "AQE0 opcode error | opcode=0x%8.8x\n",
+			gen8_hwsched_lookup_key_value(adreno_dev,
+				PAYLOAD_FAULT_REGS, KEY_AQE0_OPCODE_ERROR));
+		break;
+	case GMU_GPU_AQE0_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "AQE0 ucode error interrupt\n");
+		break;
+	case GMU_GPU_AQE0_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev, "AQE0 HW fault | status=0x%8.8x\n",
+			gen8_hwsched_lookup_key_value(adreno_dev,
+				PAYLOAD_FAULT_REGS, KEY_AQE0_HW_FAULT));
+		break;
+	case GMU_GPU_AQE0_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "AQE0 Illegal instruction error\n");
+		break;
+	case GMU_GPU_AQE1_OPCODE_ERRROR:
+		dev_crit_ratelimited(dev, "AQE1 opcode error | opcode=0x%8.8x\n",
+			gen8_hwsched_lookup_key_value(adreno_dev,
+				PAYLOAD_FAULT_REGS, KEY_AQE1_OPCODE_ERROR));
+		break;
+	case GMU_GPU_AQE1_UCODE_ERROR:
+		dev_crit_ratelimited(dev, "AQE1 ucode error interrupt\n");
+		break;
+	case GMU_GPU_AQE1_HW_FAULT_ERROR:
+		dev_crit_ratelimited(dev, "AQE1 HW fault | status=0x%8.8x\n",
+			gen8_hwsched_lookup_key_value(adreno_dev,
+				PAYLOAD_FAULT_REGS, KEY_AQE1_HW_FAULT));
+		break;
+	case GMU_GPU_AQE1_ILLEGAL_INST_ERROR:
+		dev_crit_ratelimited(dev, "AQE1 Illegal instruction error\n");
+		break;
+	case GMU_SYNCOBJ_TIMEOUT_ERROR:
+		dev_crit_ratelimited(dev, "syncobj timeout ctx %d ts %u\n",
+			cmd->gc.ctxt_id, cmd->gc.ts);
+		find_timeout_syncobj(adreno_dev, cmd->gc.ctxt_id, cmd->gc.ts);
+		break;
+	case GMU_CP_UNKNOWN_ERROR:
+		fallthrough;
+	default:
+		dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n",
+			cmd->error);
+		break;
+	}
+
+	/* Return true for fatal errors to perform recovery sequence */
+	return true;
+}
+
+static u32 peek_next_header(struct gen8_gmu_device *gmu, uint32_t queue_idx)
+{
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	u32 *queue;
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+		return 0;
+
+	if (hdr->read_index == hdr->write_index)
+		return 0;
+
+	queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx);
+
+	return queue[hdr->read_index];
+}
+
+static void process_ctx_bad(struct adreno_device *adreno_dev)
+{
+	/* Non fatal RBBM error interrupts don't go through reset and recovery */
+	if (!log_gpu_fault(adreno_dev)) {
+		memset(adreno_dev->hwsched.ctxt_bad, 0x0, HFI_MAX_MSG_SIZE);
+		return;
+	}
+
+	gen8_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT);
+}
+
+#define GET_QUERIED_FENCE_INDEX(x) (x / BITS_PER_SYNCOBJ_QUERY)
+#define GET_QUERIED_FENCE_BIT(x) (x % BITS_PER_SYNCOBJ_QUERY)
+
+static bool fence_is_queried(struct hfi_syncobj_query_cmd *cmd, u32 fence_index)
+{
+	u32 index = GET_QUERIED_FENCE_INDEX(fence_index);
+	u32 bit = GET_QUERIED_FENCE_BIT(fence_index);
+
+	return (cmd->queries[index].query_bitmask & BIT(bit));
+}
+
+static void set_fence_signal_bit(struct adreno_device *adreno_dev,
+	struct hfi_syncobj_query_cmd *reply, struct dma_fence *fence, u32 fence_index,
+	char *name)
+{
+	u32 index = GET_QUERIED_FENCE_INDEX(fence_index);
+	u32 bit = GET_QUERIED_FENCE_BIT(fence_index);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u64 flags = ADRENO_HW_FENCE_SW_STATUS_PENDING;
+	char value[32] = "unknown";
+
+	if (fence->ops->timeline_value_str)
+		fence->ops->timeline_value_str(fence, value, sizeof(value));
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+		dev_err(&gmu->pdev->dev,
+			"GMU is waiting for signaled fence(ctx:%llu seqno:%llu value:%s)\n",
+			fence->context, fence->seqno, value);
+		reply->queries[index].query_bitmask |= BIT(bit);
+		flags = ADRENO_HW_FENCE_SW_STATUS_SIGNALED;
+	}
+	trace_adreno_hw_fence_query(fence->context, fence->seqno, flags, name, value);
+}
+
+static void gen8_syncobj_query_reply(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj, struct hfi_syncobj_query_cmd *cmd)
+{
+	struct hfi_syncobj_query_cmd reply = {0};
+	int i, j, fence_index = 0;
+	struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	for (i = 0; i < syncobj->numsyncs; i++) {
+		struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i];
+		struct kgsl_sync_fence_cb *kcb = event->handle;
+		struct dma_fence **fences;
+		struct dma_fence_array *array;
+		struct event_fence_info *info = event->priv;
+		u32 num_fences;
+
+		array = to_dma_fence_array(kcb->fence);
+		if (array != NULL) {
+			num_fences = array->num_fences;
+			fences = array->fences;
+		} else {
+			num_fences = 1;
+			fences = &kcb->fence;
+		}
+
+		for (j = 0; j < num_fences; j++, fence_index++) {
+			if (!fence_is_queried(cmd, fence_index))
+				continue;
+
+			set_fence_signal_bit(adreno_dev, &reply, fences[j], fence_index,
+				info ? info->fences[j].name : "unknown");
+		}
+	}
+
+	reply.hdr = CREATE_MSG_HDR(F2H_MSG_SYNCOBJ_QUERY, HFI_MSG_CMD);
+	reply.gmu_ctxt_id = cmd->gmu_ctxt_id;
+	reply.sync_obj_ts = cmd->sync_obj_ts;
+
+	trace_adreno_syncobj_query_reply(reply.gmu_ctxt_id, reply.sync_obj_ts,
+		gpudev->read_alwayson(adreno_dev));
+
+	gen8_hfi_send_cmd_async(adreno_dev, &reply, sizeof(reply));
+}
+
+struct syncobj_query_work {
+	/** @cmd: The query command to be processed */
+	struct hfi_syncobj_query_cmd cmd;
+	/** @context: kgsl context that is waiting for this sync object */
+	struct kgsl_context *context;
+	/** @work: The work structure to execute syncobj query reply */
+	struct kthread_work work;
+};
+
+static void gen8_process_syncobj_query_work(struct kthread_work *work)
+{
+	struct syncobj_query_work *query_work = container_of(work,
+						struct syncobj_query_work, work);
+	struct hfi_syncobj_query_cmd *cmd = (struct hfi_syncobj_query_cmd *)&query_work->cmd;
+	struct kgsl_context *context = query_work->context;
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct cmd_list_obj *obj;
+	bool missing = true;
+
+	mutex_lock(&hwsched->mutex);
+	mutex_lock(&device->mutex);
+
+	list_for_each_entry(obj, &hwsched->cmd_list, node) {
+		struct kgsl_drawobj *drawobj = obj->drawobj;
+
+		if ((drawobj->type & SYNCOBJ_TYPE) == 0)
+			continue;
+
+		if ((drawobj->context->id == cmd->gmu_ctxt_id) &&
+			(drawobj->timestamp == cmd->sync_obj_ts)) {
+			gen8_syncobj_query_reply(adreno_dev, drawobj, cmd);
+			missing = false;
+			break;
+		}
+	}
+
+	if (missing) {
+		struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+		struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+		struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr;
+
+		/*
+		 * If the sync object is not found, it can only mean that the sync object was
+		 * retired by the GMU in the meanwhile. However, if that is not the case, then
+		 * we have a problem.
+		 */
+		if (timestamp_cmp(cmd->sync_obj_ts, hdr->sync_obj_ts) > 0) {
+			dev_err(&gmu->pdev->dev, "Missing sync object ctx:%d ts:%d retired:%d\n",
+				context->id, cmd->sync_obj_ts, hdr->sync_obj_ts);
+			gmu_core_fault_snapshot(device);
+			gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+		}
+	}
+
+	mutex_unlock(&device->mutex);
+	mutex_unlock(&hwsched->mutex);
+
+	kgsl_context_put(context);
+	kfree(query_work);
+}
+
+static void gen8_trigger_syncobj_query(struct adreno_device *adreno_dev,
+	u32 *rcvd)
+{
+	struct syncobj_query_work *query_work;
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct hfi_syncobj_query_cmd *cmd = (struct hfi_syncobj_query_cmd *)rcvd;
+	struct kgsl_context *context = NULL;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ret;
+
+	trace_adreno_syncobj_query(cmd->gmu_ctxt_id, cmd->sync_obj_ts,
+		gpudev->read_alwayson(adreno_dev));
+
+	/*
+	 * We need the context even if it is detached. Hence, we can't use kgsl_context_get here.
+	 * We must make sure that this context id doesn't get destroyed (to avoid re-use) until GMU
+	 * has ack'd the query reply.
+	 */
+	read_lock(&device->context_lock);
+	context = idr_find(&device->context_idr, cmd->gmu_ctxt_id);
+	ret = _kgsl_context_get(context);
+	read_unlock(&device->context_lock);
+
+	if (!ret)
+		return;
+
+	query_work = kzalloc(sizeof(*query_work), GFP_KERNEL);
+	if (!query_work) {
+		kgsl_context_put(context);
+		return;
+	}
+
+	kthread_init_work(&query_work->work, gen8_process_syncobj_query_work);
+	memcpy(&query_work->cmd, cmd, sizeof(*cmd));
+	query_work->context = context;
+
+	kthread_queue_work(hwsched->worker, &query_work->work);
+}
+
+/*
+ * This defines the maximum unack'd hardware fences that we allow. When this limit is reached, we
+ * will put all threads (that want to create a hardware fence) to sleep until the maximum unack'd
+ * hardware fence count drops to MIN_HW_FENCE_UNACK_COUNT
+ */
+#define MAX_HW_FENCE_UNACK_COUNT 20
+
+/*
+ * Once the maximum unack'd hardware fences drops to this value, wake up all the threads (that want
+ * to create hardware fences)
+ */
+#define MIN_HW_FENCE_UNACK_COUNT 10
+
+/*
+ * This is the maximum duration (in milliseconds) a thread (that wants to create a hardware fence)
+ * is put to sleep while we wait for the maximum number of unack'd hardware fences to drop from
+ * MAX_HW_FENCE_UNACK_COUNT to MIN_HW_FENCE_UNACK_COUNT. If the count doesn't drop to the desired
+ * value, then log an error and trigger snapshot and recovery.
+ */
+#define HW_FENCE_SLEEP_MS 200
+
+static void _enable_hw_fence_throttle(struct adreno_device *adreno_dev)
+{
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+
+	set_bit(GEN8_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags);
+	set_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags);
+
+	/* Avoid submitting new work to gpu until the unack count drops to a desired threshold */
+	adreno_get_gpu_halt(adreno_dev);
+
+	mod_timer(&hfi->hw_fence_timer, jiffies + msecs_to_jiffies(HW_FENCE_SLEEP_MS));
+}
+
+static void _increment_hw_fence_unack_count(struct adreno_device *adreno_dev)
+{
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+
+	if ((++hfi->hw_fence.unack_count) == MAX_HW_FENCE_UNACK_COUNT)
+		_enable_hw_fence_throttle(adreno_dev);
+}
+
+/**
+ * _send_hw_fence_no_ack - Send a hardware fence hfi packet to GMU without waiting for its ack.
+ * Increment the unack count on success
+ *
+ * Return: 0 on success or negative error on failure
+ */
+static int _send_hw_fence_no_ack(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry)
+{
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	u32 seqnum;
+	int ret;
+
+	seqnum = atomic_inc_return(&hfi->hw_fence.seqnum);
+	entry->cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(entry->cmd.hdr, seqnum, sizeof(entry->cmd) >> 2);
+
+	ret = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&entry->cmd, sizeof(entry->cmd));
+	if (!ret)
+		_increment_hw_fence_unack_count(adreno_dev);
+
+	return ret;
+}
+
+static struct adreno_hw_fence_entry *_get_deferred_hw_fence(struct adreno_context *drawctxt, u32 ts)
+{
+	struct adreno_hw_fence_entry *entry = NULL, *next, *deferred_hw_fence_entry = NULL;
+
+	spin_lock(&drawctxt->lock);
+	list_for_each_entry_safe(entry, next, &drawctxt->hw_fence_list, node) {
+
+		if (timestamp_cmp((u32)entry->cmd.ts, ts) > 0)
+			break;
+
+		/* We found a deferred hardware fence */
+		deferred_hw_fence_entry = entry;
+		break;
+	}
+	spin_unlock(&drawctxt->lock);
+
+	/*
+	 * This path executes in isolation from any paths that may release this entry. So, it is
+	 * safe to handle this entry outside of the drawctxt spinlock
+	 */
+	return deferred_hw_fence_entry;
+}
+
+static int _send_deferred_hw_fence(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, struct adreno_hw_fence_entry *entry, u32 ts)
+{
+	bool retired = kgsl_check_timestamp(KGSL_DEVICE(adreno_dev), &drawctxt->base, ts) ||
+				kgsl_context_is_bad(&drawctxt->base);
+	int ret = 0;
+	u32 flags = 0;
+
+	if (retired)
+		flags |= HW_FENCE_FLAG_SKIP_MEMSTORE;
+
+	ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry, flags);
+	if (ret)
+		return ret;
+
+	spin_lock(&drawctxt->lock);
+	if (!retired)
+		list_move_tail(&entry->node, &drawctxt->hw_fence_inflight_list);
+	else
+		gen8_remove_hw_fence_entry(adreno_dev, entry);
+	spin_unlock(&drawctxt->lock);
+
+	return 0;
+}
+
+/**
+ * process_hw_fence_deferred_ctxt - This function sends hardware fences to GMU (from the
+ * deferred drawctxt) which couldn't be sent earlier
+ */
+static int process_hw_fence_deferred_ctxt(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, u32 ts)
+{
+	struct adreno_hw_fence_entry *deferred_hw_fence_entry = NULL;
+	int ret = 0;
+
+	do {
+		deferred_hw_fence_entry = _get_deferred_hw_fence(drawctxt, ts);
+
+		if (!deferred_hw_fence_entry)
+			break;
+
+		ret = _send_deferred_hw_fence(adreno_dev, drawctxt, deferred_hw_fence_entry, ts);
+		if (ret)
+			break;
+
+	} while (deferred_hw_fence_entry != NULL);
+
+	return ret;
+}
+
+static void _disable_hw_fence_throttle(struct adreno_device *adreno_dev, bool clear_abort_bit)
+{
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	bool max;
+
+	spin_lock(&hfi->hw_fence.lock);
+
+	hfi->hw_fence.defer_drawctxt = NULL;
+	hfi->hw_fence.defer_ts = 0;
+	max = test_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags);
+	if (max) {
+		clear_bit(GEN8_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags);
+		clear_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags);
+	}
+
+	if (clear_abort_bit)
+		clear_bit(GEN8_HWSCHED_HW_FENCE_ABORT_BIT, &hfi->hw_fence.flags);
+	spin_unlock(&hfi->hw_fence.lock);
+
+	/* Wake up dispatcher and any sleeping threads that want to create hardware fences */
+	if (max) {
+		adreno_put_gpu_halt(adreno_dev);
+		adreno_hwsched_trigger(adreno_dev);
+		wake_up_all(&hfi->hw_fence.unack_wq);
+	}
+}
+
+static void gen8_defer_hw_fence_work(struct kthread_work *work)
+{
+	struct gen8_hwsched_hfi *hfi = container_of(work,
+						struct gen8_hwsched_hfi, defer_hw_fence_work);
+	struct adreno_context *drawctxt = NULL;
+	struct kgsl_device *device;
+	struct adreno_device *adreno_dev;
+	u32 ts;
+	int ret;
+
+	spin_lock(&hfi->hw_fence.lock);
+	drawctxt = hfi->hw_fence.defer_drawctxt;
+	ts = hfi->hw_fence.defer_ts;
+	spin_unlock(&hfi->hw_fence.lock);
+
+	device = drawctxt->base.device;
+	adreno_dev = ADRENO_DEVICE(device);
+
+	/*
+	 * Grab the dispatcher and device mutex as we don't want to race with concurrent fault
+	 * recovery
+	 */
+	mutex_lock(&adreno_dev->hwsched.mutex);
+	mutex_lock(&device->mutex);
+
+	ret = process_hw_fence_deferred_ctxt(adreno_dev, drawctxt, ts);
+	if (ret) {
+		/* the deferred drawctxt will be handled post fault recovery */
+		gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+		goto unlock;
+	}
+
+	/*
+	 * Put back the context reference which was incremented when hw_fence.defer_drawctxt was set
+	 */
+	kgsl_context_put(&drawctxt->base);
+
+	gen8_hwsched_active_count_put(adreno_dev);
+
+	_disable_hw_fence_throttle(adreno_dev, false);
+
+unlock:
+	mutex_unlock(&device->mutex);
+	mutex_unlock(&adreno_dev->hwsched.mutex);
+}
+
+static void process_hw_fence_ack(struct adreno_device *adreno_dev, u32 received_hdr)
+{
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct adreno_context *drawctxt = NULL;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	spin_lock(&hfi->hw_fence.lock);
+
+	/* If this ack is being waited on, we don't need to touch the unack count */
+	if (gen8_hw_fence_ack.sent_hdr &&
+	    CMP_HFI_ACK_HDR(gen8_hw_fence_ack.sent_hdr, received_hdr)) {
+		spin_unlock(&hfi->hw_fence.lock);
+		complete(&gen8_hw_fence_ack.complete);
+		return;
+	}
+
+	hfi->hw_fence.unack_count--;
+
+	/* The unack count should never be greater than MAX_HW_FENCE_UNACK_COUNT */
+	if (hfi->hw_fence.unack_count > MAX_HW_FENCE_UNACK_COUNT)
+		dev_err(&gmu->pdev->dev, "unexpected hardware fence unack count:%d\n",
+			hfi->hw_fence.unack_count);
+
+	if (!test_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags) ||
+		(hfi->hw_fence.unack_count != MIN_HW_FENCE_UNACK_COUNT)) {
+		spin_unlock(&hfi->hw_fence.lock);
+		return;
+	}
+
+	drawctxt = hfi->hw_fence.defer_drawctxt;
+
+	spin_unlock(&hfi->hw_fence.lock);
+
+	del_timer_sync(&hfi->hw_fence_timer);
+
+	/*
+	 * We need to handle the deferred context in another thread so that we can unblock the f2h
+	 * daemon here as it will need to process the acks for the hardware fences belonging to the
+	 * deferred context
+	 */
+	if (drawctxt) {
+		kthread_init_work(&hfi->defer_hw_fence_work, gen8_defer_hw_fence_work);
+		kthread_queue_work(adreno_dev->hwsched.worker, &hfi->defer_hw_fence_work);
+		return;
+	}
+
+	_disable_hw_fence_throttle(adreno_dev, false);
+}
+
+void gen8_hwsched_process_msgq(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev);
+	u32 rcvd[MAX_RCVD_SIZE], next_hdr, type;
+
+	mutex_lock(&hw_hfi->msgq_mutex);
+
+	for (;;) {
+		next_hdr = peek_next_header(gmu, HFI_MSG_ID);
+
+		if (!next_hdr)
+			break;
+
+		if (MSG_HDR_GET_TYPE(next_hdr) == HFI_MSG_ACK)
+			type = HFI_MSG_ACK;
+		else
+			type = MSG_HDR_GET_ID(next_hdr);
+
+		if (type != F2H_MSG_CONTEXT_BAD)
+			gen8_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd));
+
+		switch (type) {
+		case HFI_MSG_ACK:
+			/*
+			 * We are assuming that there is only one outstanding ack because hfi
+			 * sending thread waits for completion while holding the device mutex
+			 * (except when we send H2F_MSG_HW_FENCE_INFO packets)
+			 */
+			if (MSG_HDR_GET_ID(rcvd[1]) == H2F_MSG_HW_FENCE_INFO)
+				process_hw_fence_ack(adreno_dev, rcvd[1]);
+			else
+				gen8_receive_ack_async(adreno_dev, rcvd);
+			break;
+		case F2H_MSG_CONTEXT_BAD:
+			gen8_hfi_queue_read(gmu, HFI_MSG_ID, (u32 *)adreno_dev->hwsched.ctxt_bad,
+						HFI_MAX_MSG_SIZE);
+			process_ctx_bad(adreno_dev);
+			break;
+		case F2H_MSG_TS_RETIRE:
+			log_profiling_info(adreno_dev, rcvd);
+			adreno_hwsched_trigger(adreno_dev);
+			break;
+		case F2H_MSG_SYNCOBJ_QUERY:
+			gen8_trigger_syncobj_query(adreno_dev, rcvd);
+			break;
+		case F2H_MSG_GMU_CNTR_RELEASE: {
+			struct hfi_gmu_cntr_release_cmd *cmd =
+				(struct hfi_gmu_cntr_release_cmd *) rcvd;
+
+			adreno_perfcounter_put(adreno_dev,
+				cmd->group_id, cmd->countable, PERFCOUNTER_FLAG_KERNEL);
+
+			adreno_mark_for_coldboot(adreno_dev);
+			}
+			break;
+		}
+	}
+	mutex_unlock(&hw_hfi->msgq_mutex);
+}
+
+static void process_log_block(struct adreno_device *adreno_dev, void *data)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct hfi_log_block *cmd = data;
+	u32 *log_event = gmu->gmu_log->hostptr;
+	u32 start, end;
+
+	start = cmd->start_index;
+	end = cmd->stop_index;
+
+	log_event += start * 4;
+	while (start != end) {
+		trace_gmu_event(log_event);
+		log_event += 4;
+		start++;
+	}
+}
+
+static void gen8_hwsched_process_dbgq(struct adreno_device *adreno_dev, bool limited)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 rcvd[MAX_RCVD_SIZE];
+	bool recovery = false;
+
+	while (gen8_hfi_queue_read(gmu, HFI_DBG_ID, rcvd, sizeof(rcvd)) > 0) {
+
+		if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_ERR) {
+			adreno_gen8_receive_err_req(gmu, rcvd);
+			recovery = true;
+			break;
+		}
+
+		if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_DEBUG)
+			adreno_gen8_receive_debug_req(gmu, rcvd);
+
+		if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_LOG_BLOCK)
+			process_log_block(adreno_dev, rcvd);
+
+		/* Process one debug queue message and return to not delay msgq processing */
+		if (limited)
+			break;
+	}
+
+	if (!recovery)
+		return;
+
+	gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+}
+
+/* HFI interrupt handler */
+static irqreturn_t gen8_hwsched_hfi_handler(int irq, void *data)
+{
+	struct adreno_device *adreno_dev = data;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status = 0;
+
+	/*
+	 * GEN8_GMUCX_GMU2HOST_INTR_INFO may have bits set not specified in hfi->irq_mask.
+	 * Read and clear only those irq bits that we are processing here.
+	 */
+	gmu_core_regread(device, GEN8_GMUCX_GMU2HOST_INTR_INFO, &status);
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, status & hfi->irq_mask);
+
+	/*
+	 * If interrupts are not enabled on the HFI message queue,
+	 * the inline message processing loop will process it,
+	 * else, process it here.
+	 */
+	if (!(hfi->irq_mask & HFI_IRQ_MSGQ_MASK))
+		status &= ~HFI_IRQ_MSGQ_MASK;
+
+	if (status & (HFI_IRQ_MSGQ_MASK | HFI_IRQ_DBGQ_MASK)) {
+		wake_up_interruptible(&hfi->f2h_wq);
+		adreno_hwsched_trigger(adreno_dev);
+	}
+	if (status & HFI_IRQ_CM3_FAULT_MASK) {
+		atomic_set(&gmu->cm3_fault, 1);
+
+		/* make sure other CPUs see the update */
+		smp_wmb();
+
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"GMU CM3 fault interrupt received\n");
+
+		gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+	}
+
+	/* Ignore OOB bits */
+	status &= GENMASK(31 - (oob_max - 1), 0);
+
+	if (status & ~hfi->irq_mask)
+		dev_err_ratelimited(&gmu->pdev->dev,
+			"Unhandled HFI interrupts 0x%x\n",
+			status & ~hfi->irq_mask);
+
+	return IRQ_HANDLED;
+}
+
+#define HFI_IRQ_MSGQ_MASK BIT(0)
+
+static int check_ack_failure(struct adreno_device *adreno_dev,
+	struct pending_cmd *ack)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	if (ack->results[2] != 0xffffffff)
+		return 0;
+
+	dev_err(&gmu->pdev->dev,
+		"ACK error: sender id %d seqnum %d\n",
+		MSG_HDR_GET_ID(ack->sent_hdr),
+		MSG_HDR_GET_SEQNUM(ack->sent_hdr));
+
+	return -EINVAL;
+}
+
+int gen8_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 size_bytes)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	u32 *cmd = data;
+	u32 seqnum;
+	int rc;
+	struct pending_cmd pending_ack;
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	*cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2);
+
+	add_waiter(hfi, *cmd, &pending_ack);
+
+	rc = gen8_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+	if (rc)
+		goto done;
+
+	rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack,
+		gen8_hwsched_process_msgq);
+	if (rc)
+		goto done;
+
+	rc = check_ack_failure(adreno_dev, &pending_ack);
+
+done:
+	del_waiter(hfi, &pending_ack);
+
+	return rc;
+}
+
+static void init_queues(struct gen8_hfi *hfi)
+{
+	u32 gmuaddr = hfi->hfi_mem->gmuaddr;
+	struct hfi_queue_table hfi_table = {
+		.qtbl_hdr = {
+			.version = 0,
+			.size = sizeof(struct hfi_queue_table) >> 2,
+			.qhdr0_offset =
+				sizeof(struct hfi_queue_table_header) >> 2,
+			.qhdr_size = sizeof(struct hfi_queue_header) >> 2,
+			.num_q = HFI_QUEUE_MAX,
+			.num_active_q = HFI_QUEUE_MAX,
+		},
+		.qhdr = {
+			DEFINE_QHDR(gmuaddr, HFI_CMD_ID, 0),
+			DEFINE_QHDR(gmuaddr, HFI_MSG_ID, 0),
+			DEFINE_QHDR(gmuaddr, HFI_DBG_ID, 0),
+			/* 4 DQs for RB priority 0 */
+			DEFINE_QHDR(gmuaddr, 3, 0),
+			DEFINE_QHDR(gmuaddr, 4, 0),
+			DEFINE_QHDR(gmuaddr, 5, 0),
+			DEFINE_QHDR(gmuaddr, 6, 0),
+			/* 4 DQs for RB priority 1 */
+			DEFINE_QHDR(gmuaddr, 7, 1),
+			DEFINE_QHDR(gmuaddr, 8, 1),
+			DEFINE_QHDR(gmuaddr, 9, 1),
+			DEFINE_QHDR(gmuaddr, 10, 1),
+			/* 3 DQs for RB priority 2 */
+			DEFINE_QHDR(gmuaddr, 11, 2),
+			DEFINE_QHDR(gmuaddr, 12, 2),
+			DEFINE_QHDR(gmuaddr, 13, 2),
+			/* 2 DQs for RB priority 3 */
+			DEFINE_QHDR(gmuaddr, 14, 3),
+			DEFINE_QHDR(gmuaddr, 15, 3),
+			/* 1 DQ for LPAC RB priority 4 */
+			DEFINE_QHDR(gmuaddr, 16, 4),
+		},
+	};
+
+	memcpy(hfi->hfi_mem->hostptr, &hfi_table, sizeof(hfi_table));
+}
+
+/* Total header sizes + queue sizes + 16 for alignment */
+#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \
+	(SZ_4K * HFI_QUEUE_MAX))
+
+static int hfi_f2h_main(void *arg);
+
+int gen8_hwsched_hfi_init(struct adreno_device *adreno_dev)
+{
+	struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev);
+
+	if (IS_ERR_OR_NULL(hw_hfi->big_ib)) {
+		hw_hfi->big_ib = gen8_reserve_gmu_kernel_block(
+				to_gen8_gmu(adreno_dev), 0,
+				HWSCHED_MAX_IBS * sizeof(struct hfi_issue_ib),
+				GMU_NONCACHED_KERNEL, 0);
+		if (IS_ERR(hw_hfi->big_ib))
+			return PTR_ERR(hw_hfi->big_ib);
+	}
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LSR) &&
+			IS_ERR_OR_NULL(hw_hfi->big_ib_recurring)) {
+		hw_hfi->big_ib_recurring = gen8_reserve_gmu_kernel_block(
+				to_gen8_gmu(adreno_dev), 0,
+				HWSCHED_MAX_IBS * sizeof(struct hfi_issue_ib),
+				GMU_NONCACHED_KERNEL, 0);
+		if (IS_ERR(hw_hfi->big_ib_recurring))
+			return PTR_ERR(hw_hfi->big_ib_recurring);
+	}
+
+	if (IS_ERR_OR_NULL(hfi->hfi_mem)) {
+		hfi->hfi_mem = gen8_reserve_gmu_kernel_block(
+				to_gen8_gmu(adreno_dev),
+				0, HFIMEM_SIZE, GMU_NONCACHED_KERNEL, 0);
+		if (IS_ERR(hfi->hfi_mem))
+			return PTR_ERR(hfi->hfi_mem);
+		init_queues(hfi);
+	}
+
+	if (IS_ERR_OR_NULL(hw_hfi->f2h_task)) {
+		hw_hfi->f2h_task = kthread_run(hfi_f2h_main, adreno_dev, "gmu_f2h");
+		if (!IS_ERR(hw_hfi->f2h_task))
+			sched_set_fifo(hw_hfi->f2h_task);
+	}
+
+	return PTR_ERR_OR_ZERO(hw_hfi->f2h_task);
+}
+
+static int get_attrs(u32 flags)
+{
+	int attrs = IOMMU_READ;
+
+	if (flags & HFI_MEMFLAG_GMU_PRIV)
+		attrs |= IOMMU_PRIV;
+
+	if (flags & HFI_MEMFLAG_GMU_WRITEABLE)
+		attrs |= IOMMU_WRITE;
+
+	return attrs;
+}
+
+static int gmu_import_buffer(struct adreno_device *adreno_dev,
+	struct hfi_mem_alloc_entry *entry)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct hfi_mem_alloc_desc *desc = &entry->desc;
+	u32 vma_id = (desc->flags & HFI_MEMFLAG_GMU_CACHEABLE) ? GMU_CACHE : GMU_NONCACHED_KERNEL;
+
+	return gen8_gmu_import_buffer(gmu, vma_id, entry->md, get_attrs(desc->flags), desc->align);
+}
+
+static struct hfi_mem_alloc_entry *lookup_mem_alloc_table(
+	struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc)
+{
+	struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev);
+	int i;
+
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i];
+
+		if ((entry->desc.mem_kind == desc->mem_kind) &&
+			(entry->desc.gmu_mem_handle == desc->gmu_mem_handle))
+			return entry;
+	}
+
+	return NULL;
+}
+
+static struct hfi_mem_alloc_entry *get_mem_alloc_entry(
+	struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct hfi_mem_alloc_entry *entry =
+		lookup_mem_alloc_table(adreno_dev, desc);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u64 flags = 0;
+	u32 priv = 0;
+	int ret;
+	const char *memkind_string = desc->mem_kind < HFI_MEMKIND_MAX ?
+			hfi_memkind_strings[desc->mem_kind] : "UNKNOWN";
+
+	if (entry)
+		return entry;
+
+	if (desc->mem_kind >= HFI_MEMKIND_MAX) {
+		dev_err(&gmu->pdev->dev, "Invalid mem kind: %d\n",
+			desc->mem_kind);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (hfi->mem_alloc_entries == ARRAY_SIZE(hfi->mem_alloc_table)) {
+		dev_err(&gmu->pdev->dev,
+			"Reached max mem alloc entries\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	entry = &hfi->mem_alloc_table[hfi->mem_alloc_entries];
+
+	memcpy(&entry->desc, desc, sizeof(*desc));
+
+	entry->desc.host_mem_handle = desc->gmu_mem_handle;
+
+	if (desc->flags & HFI_MEMFLAG_GFX_PRIV)
+		priv |= KGSL_MEMDESC_PRIVILEGED;
+
+	if (!(desc->flags & HFI_MEMFLAG_GFX_WRITEABLE))
+		flags |= KGSL_MEMFLAGS_GPUREADONLY;
+
+	if (desc->flags & HFI_MEMFLAG_GFX_SECURE)
+		flags |= KGSL_MEMFLAGS_SECURE;
+
+	if (!(desc->flags & HFI_MEMFLAG_GFX_ACC) &&
+		(desc->mem_kind != HFI_MEMKIND_HW_FENCE)) {
+		if (desc->mem_kind == HFI_MEMKIND_MMIO_IPC_CORE)
+			entry->md = gen8_reserve_gmu_kernel_block_fixed(gmu, 0,
+					desc->size,
+					(desc->flags & HFI_MEMFLAG_GMU_CACHEABLE) ?
+					GMU_CACHE : GMU_NONCACHED_KERNEL,
+					"qcom,ipc-core", get_attrs(desc->flags),
+					desc->align);
+		else
+			entry->md = gen8_reserve_gmu_kernel_block(gmu, 0,
+					desc->size,
+					(desc->flags & HFI_MEMFLAG_GMU_CACHEABLE) ?
+					GMU_CACHE : GMU_NONCACHED_KERNEL,
+					desc->align);
+
+		if (IS_ERR(entry->md)) {
+			int ret = PTR_ERR(entry->md);
+
+			memset(entry, 0, sizeof(*entry));
+			return ERR_PTR(ret);
+		}
+		entry->desc.size = entry->md->size;
+		entry->desc.gmu_addr = entry->md->gmuaddr;
+
+		goto done;
+	}
+
+	/*
+	 * Use pre-allocated memory descriptors to map the HFI_MEMKIND_HW_FENCE and
+	 * HFI_MEMKIND_MEMSTORE
+	 */
+	switch (desc->mem_kind) {
+	case HFI_MEMKIND_HW_FENCE:
+		entry->md = &adreno_dev->hwsched.hw_fence.memdesc;
+		break;
+	case HFI_MEMKIND_MEMSTORE:
+		entry->md = device->memstore;
+		break;
+	default:
+		entry->md = kgsl_allocate_global(device, desc->size, 0, flags,
+			priv, memkind_string);
+		break;
+	}
+	if (IS_ERR(entry->md)) {
+		int ret = PTR_ERR(entry->md);
+
+		memset(entry, 0, sizeof(*entry));
+		return ERR_PTR(ret);
+	}
+
+	entry->desc.size = entry->md->size;
+	entry->desc.gpu_addr = entry->md->gpuaddr;
+
+	if (!(desc->flags & HFI_MEMFLAG_GMU_ACC))
+		goto done;
+
+	 /*
+	  * If gmu mapping fails, then we have to live with
+	  * leaking the gpu global buffer allocated above.
+	  */
+	ret = gmu_import_buffer(adreno_dev, entry);
+	if (ret) {
+		dev_err(&gmu->pdev->dev,
+			"gpuaddr: 0x%llx size: %lld bytes lost\n",
+			entry->md->gpuaddr, entry->md->size);
+		memset(entry, 0, sizeof(*entry));
+		return ERR_PTR(ret);
+	}
+
+	entry->desc.gmu_addr = entry->md->gmuaddr;
+done:
+	hfi->mem_alloc_entries++;
+
+	return entry;
+}
+
+static int process_mem_alloc(struct adreno_device *adreno_dev,
+	struct hfi_mem_alloc_desc *mad)
+{
+	struct hfi_mem_alloc_entry *entry;
+
+	entry = get_mem_alloc_entry(adreno_dev, mad);
+	if (IS_ERR(entry))
+		return PTR_ERR(entry);
+
+	if (entry->md) {
+		mad->gpu_addr = entry->md->gpuaddr;
+		mad->gmu_addr = entry->md->gmuaddr;
+	}
+
+	/*
+	 * GMU uses the host_mem_handle to check if this memalloc was
+	 * successful
+	 */
+	mad->host_mem_handle = mad->gmu_mem_handle;
+
+	return 0;
+}
+
+static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd)
+{
+	struct hfi_mem_alloc_desc desc = {0};
+	struct hfi_mem_alloc_reply_cmd out = {0};
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 seqnum;
+	int ret;
+
+	hfi_get_mem_alloc_desc(rcvd, &desc);
+
+	ret = process_mem_alloc(adreno_dev, &desc);
+	if (ret)
+		return ret;
+
+	memcpy(&out.desc, &desc, sizeof(out.desc));
+
+	out.hdr = ACK_MSG_HDR(F2H_MSG_MEM_ALLOC);
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2);
+
+	out.req_hdr = *(u32 *)rcvd;
+
+	return gen8_hfi_cmdq_write(adreno_dev, (u32 *)&out, sizeof(out));
+}
+
+static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd)
+{
+	struct hfi_gmu_cntr_register_cmd *in = (struct hfi_gmu_cntr_register_cmd *)rcvd;
+	struct hfi_gmu_cntr_register_reply_cmd out = {0};
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 lo = 0, hi = 0, seqnum;
+
+	/*
+	 * Failure to allocate counter is not fatal. Sending lo = 0, hi = 0
+	 * indicates to GMU that counter allocation failed.
+	 */
+	adreno_perfcounter_get(adreno_dev,
+		in->group_id, in->countable, &lo, &hi, PERFCOUNTER_FLAG_KERNEL);
+
+	out.hdr = ACK_MSG_HDR(F2H_MSG_GMU_CNTR_REGISTER);
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2);
+	out.req_hdr = in->hdr;
+	out.group_id = in->group_id;
+	out.countable = in->countable;
+	/* Fill in byte offset of counter */
+	out.cntr_lo = lo << 2;
+	out.cntr_hi = hi << 2;
+
+	return gen8_hfi_cmdq_write(adreno_dev, (u32 *)&out, sizeof(out));
+}
+
+static int send_warmboot_start_msg(struct adreno_device *adreno_dev)
+{
+	int ret = 0;
+	struct hfi_start_cmd cmd;
+
+	if (!adreno_dev->warmboot_enabled)
+		return ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_START);
+	if (ret)
+		return ret;
+
+	cmd.hdr = RECORD_NOP_MSG_HDR(cmd.hdr);
+
+	return gen8_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int send_start_msg(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret, rc = 0;
+	struct hfi_start_cmd cmd;
+	u32 seqnum, rcvd[MAX_RCVD_SIZE];
+	struct pending_cmd pending_ack = {0};
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_START);
+	if (ret)
+		return ret;
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2);
+
+	pending_ack.sent_hdr = cmd.hdr;
+
+	rc = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd));
+	if (rc)
+		return rc;
+
+poll:
+	rc = gmu_core_timed_poll_check(device, GEN8_GMUCX_GMU2HOST_INTR_INFO,
+		HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK);
+
+	if (rc) {
+		dev_err(&gmu->pdev->dev,
+			"Timed out processing MSG_START seqnum: %d\n",
+			seqnum);
+		gmu_core_fault_snapshot(device);
+		return rc;
+	}
+
+	/* Clear the interrupt */
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR,
+		HFI_IRQ_MSGQ_MASK);
+
+	if (gen8_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) {
+		dev_err(&gmu->pdev->dev, "MSG_START: no payload\n");
+		gmu_core_fault_snapshot(device);
+		return -EINVAL;
+	}
+
+	if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) {
+		rc = gen8_receive_ack_cmd(gmu, rcvd, &pending_ack);
+		if (rc)
+			return rc;
+
+		return check_ack_failure(adreno_dev, &pending_ack);
+	}
+
+	if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_MEM_ALLOC) {
+		rc = mem_alloc_reply(adreno_dev, rcvd);
+		if (rc)
+			return rc;
+
+		goto poll;
+	}
+
+	if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_GMU_CNTR_REGISTER) {
+		rc = gmu_cntr_register_reply(adreno_dev, rcvd);
+		if (rc)
+			return rc;
+		goto poll;
+	}
+
+	dev_err(&gmu->pdev->dev,
+		"MSG_START: unexpected response id:%d, type:%d\n",
+		MSG_HDR_GET_ID(rcvd[0]),
+		MSG_HDR_GET_TYPE(rcvd[0]));
+
+	gmu_core_fault_snapshot(device);
+
+	return rc;
+}
+
+static void reset_hfi_mem_records(struct adreno_device *adreno_dev)
+{
+	struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct kgsl_memdesc *md = NULL;
+	u32 i;
+
+	for (i = 0; i < hw_hfi->mem_alloc_entries; i++) {
+		struct hfi_mem_alloc_desc *desc = &hw_hfi->mem_alloc_table[i].desc;
+
+		if (desc->flags & HFI_MEMFLAG_HOST_INIT) {
+			md = hw_hfi->mem_alloc_table[i].md;
+			memset(md->hostptr, 0x0, md->size);
+		}
+	}
+}
+
+static void reset_hfi_queues(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr;
+	u32 i;
+
+	/* Flush HFI queues */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		struct hfi_queue_header *hdr = &tbl->qhdr[i];
+
+		if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+			continue;
+
+		hdr->read_index = hdr->write_index;
+	}
+}
+
+void gen8_hwsched_hfi_stop(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+
+	hfi->irq_mask &= ~HFI_IRQ_MSGQ_MASK;
+
+	/*
+	 * In some corner cases, it is possible that GMU put TS_RETIRE
+	 * on the msgq after we have turned off gmu interrupts. Hence,
+	 * drain the queue one last time before we reset HFI queues.
+	 */
+	gen8_hwsched_process_msgq(adreno_dev);
+
+	/* Drain the debug queue before we reset HFI queues */
+	gen8_hwsched_process_dbgq(adreno_dev, false);
+
+	kgsl_pwrctrl_axi(KGSL_DEVICE(adreno_dev), false);
+
+	clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	/*
+	 * Reset the hfi host access memory records, As GMU expects hfi memory
+	 * records to be clear in bootup.
+	 */
+	reset_hfi_mem_records(adreno_dev);
+}
+
+static void gen8_hwsched_enable_async_hfi(struct adreno_device *adreno_dev)
+{
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+
+	hfi->irq_mask |= HFI_IRQ_MSGQ_MASK;
+
+	gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN8_GMUCX_GMU2HOST_INTR_MASK,
+		(u32)~hfi->irq_mask);
+}
+
+static int enable_preemption(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 data;
+	int ret;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	/*
+	 * Bits [0:1] contains the preemption level
+	 * Bit 2 is to enable/disable gmem save/restore
+	 * Bit 3 is to enable/disable skipsaverestore
+	 */
+	data = FIELD_PREP(GENMASK(1, 0), adreno_dev->preempt.preempt_level) |
+			FIELD_PREP(BIT(2), adreno_dev->preempt.usesgmem) |
+			FIELD_PREP(BIT(3), adreno_dev->preempt.skipsaverestore);
+
+	ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_PREEMPTION, 1,
+			data);
+	if (ret)
+		return ret;
+
+	if (gen8_core->qos_value) {
+		int i;
+
+		for (i = 0; i < KGSL_PRIORITY_MAX_RB_LEVELS; i++) {
+			if (!gen8_core->qos_value[i])
+				continue;
+
+			gen8_hfi_send_set_value(adreno_dev,
+				HFI_VALUE_RB_GPU_QOS, i,
+				gen8_core->qos_value[i]);
+		}
+	}
+
+	if (device->pwrctrl.rt_bus_hint) {
+		ret = gen8_hfi_send_set_value(adreno_dev, HFI_VALUE_RB_IB_RULE, 0,
+			device->pwrctrl.rt_bus_hint);
+		if (ret)
+			device->pwrctrl.rt_bus_hint = 0;
+	}
+
+	/*
+	 * Bits[3:0] contain the preemption timeout enable bit per ringbuffer
+	 * Bits[31:4] contain the timeout in ms
+	 */
+	return gen8_hfi_send_set_value(adreno_dev, HFI_VALUE_BIN_TIME, 1,
+		FIELD_PREP(GENMASK(31, 4), ADRENO_PREEMPT_TIMEOUT) |
+		FIELD_PREP(GENMASK(3, 0), 0xf));
+
+}
+
+static int enable_gmu_stats(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 data;
+
+	if (!gmu->stats_enable)
+		return 0;
+
+	/*
+	 * Bits [23:0] contains the countables mask
+	 * Bits [31:24] is the sampling interval
+	 */
+	data = FIELD_PREP(GENMASK(23, 0), gmu->stats_mask) |
+		FIELD_PREP(GENMASK(31, 24), gmu->stats_interval);
+
+	return gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_GMU_STATS, 1, data);
+}
+
+static int gen8_hfi_send_perfcounter_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	/*
+	 * Perfcounter retention is disabled by default in GMU firmware.
+	 * In case perfcounter retention behaviour is overwritten by sysfs
+	 * setting dynmaically, send this HFI feature with 'enable = 0' to
+	 * disable this feature in GMU firmware.
+	 */
+	if (adreno_dev->perfcounter)
+		return gen8_hfi_send_feature_ctrl(adreno_dev,
+				HFI_FEATURE_PERF_NORETAIN, 0, 0);
+
+	return 0;
+}
+
+u32 gen8_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop)
+{
+	struct hfi_get_value_cmd cmd;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct pending_cmd pending_ack;
+	u32 seqnum;
+	int rc;
+
+	rc = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE);
+	if (rc)
+		return 0;
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2);
+	cmd.type = prop;
+	cmd.subtype = 0;
+
+	add_waiter(hfi, cmd.hdr, &pending_ack);
+
+	rc = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd));
+	if (rc)
+		goto done;
+
+	rc = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, &pending_ack,
+		gen8_hwsched_process_msgq);
+
+done:
+	del_waiter(hfi, &pending_ack);
+
+	if (rc || (pending_ack.results[2] == UINT_MAX))
+		return 0;
+
+	return pending_ack.results[2];
+}
+
+static void _context_queue_enable(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	if (GMU_VER_MINOR(gmu->ver.hfi) >= 3) {
+		if (gen8_hfi_send_get_value(adreno_dev, HFI_VALUE_CONTEXT_QUEUE, 0) == 1)
+			set_bit(ADRENO_HWSCHED_CONTEXT_QUEUE, &adreno_dev->hwsched.flags);
+	}
+}
+
+static int gen8_hfi_send_hw_fence_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	int ret;
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags))
+		return 0;
+
+	ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HW_FENCE, 1, 0);
+	if (ret && (ret == -ENOENT)) {
+		dev_err(&gmu->pdev->dev, "GMU doesn't support HW_FENCE feature\n");
+		adreno_hwsched_deregister_hw_fence(hwsched->hw_fence.handle);
+		return 0;
+	}
+
+	return ret;
+}
+
+static void gen8_spin_idle_debug_lpac(struct adreno_device *adreno_dev,
+				const char *str)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	u32 rptr, wptr, status, intstatus, global_status;
+	bool val = adreno_is_preemption_enabled(adreno_dev);
+
+	dev_err(device->dev, str);
+
+	kgsl_regread(device, GEN8_CP_RB_RPTR_LPAC, &rptr);
+	kgsl_regread(device, GEN8_CP_RB_WPTR_LPAC, &wptr);
+
+	kgsl_regread(device, GEN8_RBBM_STATUS, &status);
+	kgsl_regread(device, GEN8_RBBM_INT_0_STATUS, &intstatus);
+	kgsl_regread(device, GEN8_CP_INTERRUPT_STATUS_GLOBAL, &global_status);
+
+	dev_err(device->dev,
+		"LPAC rb=%d pos=%X/%X rbbm_status=%8.8X int_0_status=%8.8X global_status=%8.8X\n",
+		val ? KGSL_LPAC_RB_ID : 1, rptr, wptr,
+		status, intstatus, global_status);
+
+	kgsl_device_snapshot(device, NULL, NULL, false);
+}
+
+static bool gen8_hwsched_warmboot_possible(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	if (adreno_dev->warmboot_enabled && test_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags)
+		&& test_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags) &&
+		!test_bit(ADRENO_DEVICE_FORCE_COLDBOOT, &adreno_dev->priv))
+		return true;
+
+	return false;
+}
+
+static int gen8_hwsched_hfi_send_warmboot_cmd(struct adreno_device *adreno_dev,
+		struct kgsl_memdesc *desc, u32 flag, bool async, struct pending_cmd *ack)
+{
+	struct hfi_warmboot_scratch_cmd cmd = {0};
+	int ret;
+
+	if (!adreno_dev->warmboot_enabled)
+		return 0;
+
+	cmd.scratch_addr = desc->gmuaddr;
+	cmd.scratch_size =  desc->size;
+	cmd.flags = flag;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_WARMBOOT_CMD);
+	if (ret)
+		return ret;
+
+	if (async)
+		return gen8_hfi_send_cmd_async(adreno_dev, &cmd, sizeof(cmd));
+
+	return gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, ack, sizeof(cmd));
+}
+
+static int gen8_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev,
+		struct pending_cmd *ret_cmd)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct hfi_warmboot_scratch_cmd cmd = {
+		.scratch_addr = gmu->gpu_boot_scratch->gmuaddr,
+		.scratch_size = gmu->gpu_boot_scratch->size,
+		.flags = HFI_WARMBOOT_EXEC_SCRATCH,
+	};
+	int ret = 0;
+	u32 seqnum;
+
+	if (!adreno_dev->warmboot_enabled)
+		return 0;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_WARMBOOT_CMD);
+	if (ret)
+		return ret;
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2);
+	add_waiter(hfi, cmd.hdr, ret_cmd);
+
+	ret = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd));
+	if (ret)
+		goto err;
+
+	ret = adreno_hwsched_wait_ack_completion(adreno_dev, &gmu->pdev->dev, ret_cmd,
+		gen8_hwsched_process_msgq);
+err:
+	del_waiter(hfi, ret_cmd);
+
+	return ret;
+}
+
+static int gen8_hwsched_warmboot_gpu(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	int ret = 0;
+
+	ret = gen8_hwsched_hfi_warmboot_gpu_cmd(adreno_dev, &ret_cmd);
+	if (!ret)
+		return ret;
+
+	if (MSG_HDR_GET_TYPE(ret_cmd.results[1]) != H2F_MSG_WARMBOOT_CMD)
+		goto err;
+
+	switch (MSG_HDR_GET_TYPE(ret_cmd.results[2])) {
+	case H2F_MSG_ISSUE_CMD_RAW: {
+		if (ret_cmd.results[2] == gmu->cp_init_hdr)
+			gen8_spin_idle_debug(adreno_dev,
+				"CP initialization failed to idle\n");
+		else if (ret_cmd.results[2] == gmu->switch_to_unsec_hdr)
+			gen8_spin_idle_debug(adreno_dev,
+				"Switch to unsecure failed to idle\n");
+		}
+		break;
+	case H2F_MSG_ISSUE_LPAC_CMD_RAW:
+		gen8_spin_idle_debug_lpac(adreno_dev,
+			"LPAC CP initialization failed to idle\n");
+		break;
+	}
+err:
+	/* Clear the bit on error so that in the next slumber exit we coldboot */
+	clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags);
+	gen8_disable_gpu_irq(adreno_dev);
+	return ret;
+}
+
+static int gen8_hwsched_coldboot_gpu(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev);
+	struct pending_cmd ack = {0};
+	int ret = 0;
+
+	ret = gen8_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gpu_boot_scratch,
+		 HFI_WARMBOOT_SET_SCRATCH, true, &ack);
+	if (ret)
+		goto done;
+
+	ret = gen8_hwsched_cp_init(adreno_dev);
+	if (ret)
+		goto done;
+
+	ret = gen8_hwsched_lpac_cp_init(adreno_dev);
+	if (ret)
+		goto done;
+
+	ret = gen8_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gpu_boot_scratch,
+		HFI_WARMBOOT_QUERY_SCRATCH, true, &ack);
+	if (ret)
+		goto done;
+
+	if (adreno_dev->warmboot_enabled)
+		set_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags);
+
+done:
+	/* Clear the bitmask so that we don't send record bit with future HFI messages */
+	memset(hfi->wb_set_record_bitmask, 0x0, sizeof(hfi->wb_set_record_bitmask));
+
+	if (ret)
+		gen8_disable_gpu_irq(adreno_dev);
+
+	return ret;
+}
+
+int gen8_hwsched_boot_gpu(struct adreno_device *adreno_dev)
+{
+	/* If warmboot is possible just send the warmboot command else coldboot */
+	if (gen8_hwsched_warmboot_possible(adreno_dev))
+		return gen8_hwsched_warmboot_gpu(adreno_dev);
+	else
+		return gen8_hwsched_coldboot_gpu(adreno_dev);
+}
+
+static int gen8_hwsched_setup_default_votes(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+
+	/* Request default DCVS level */
+	ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+	if (ret)
+		return ret;
+
+	/* Request default BW vote */
+	return kgsl_pwrctrl_axi(device, true);
+}
+
+int gen8_hwsched_warmboot_init_gmu(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct pending_cmd ack = {0};
+	int ret = 0;
+
+	ret = gen8_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gmu_init_scratch,
+		 HFI_WARMBOOT_EXEC_SCRATCH, false, &ack);
+	if (ret)
+		goto err;
+
+	gen8_hwsched_enable_async_hfi(adreno_dev);
+
+	set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	ret = gen8_hwsched_setup_default_votes(adreno_dev);
+
+err:
+	if (ret) {
+		/* Clear the bit in case of an error so next boot will be coldboot */
+		clear_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags);
+		clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags);
+		gen8_hwsched_hfi_stop(adreno_dev);
+	}
+
+	return ret;
+}
+
+static void warmboot_init_message_record_bitmask(struct adreno_device *adreno_dev)
+{
+	struct gen8_hfi *hfi = to_gen8_hfi(adreno_dev);
+
+	if (!adreno_dev->warmboot_enabled)
+		return;
+
+	/* Set the record bit for all the messages */
+	memset(hfi->wb_set_record_bitmask, 0xFF, sizeof(hfi->wb_set_record_bitmask));
+
+	/* These messages should not be recorded */
+	clear_bit(H2F_MSG_WARMBOOT_CMD, hfi->wb_set_record_bitmask);
+	clear_bit(H2F_MSG_START, hfi->wb_set_record_bitmask);
+	clear_bit(H2F_MSG_GET_VALUE, hfi->wb_set_record_bitmask);
+	clear_bit(H2F_MSG_GX_BW_PERF_VOTE, hfi->wb_set_record_bitmask);
+}
+
+int gen8_hwsched_hfi_start(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct pending_cmd ack = {0};
+	int ret;
+
+	reset_hfi_queues(adreno_dev);
+
+	ret = gen8_gmu_hfi_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	if (gen8_hwsched_warmboot_possible(adreno_dev))
+		return gen8_hwsched_warmboot_init_gmu(adreno_dev);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_GMU_WARMBOOT) &&
+		(!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))) {
+		if (gen8_hfi_send_get_value(adreno_dev, HFI_VALUE_GMU_WARMBOOT, 0) == 1)
+			adreno_dev->warmboot_enabled = true;
+	}
+
+	warmboot_init_message_record_bitmask(adreno_dev);
+
+	/* Reset the variable here and set it when we successfully record the scratch */
+	clear_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags);
+	clear_bit(GMU_PRIV_WARMBOOT_GPU_BOOT_DONE, &gmu->flags);
+
+	ret = gen8_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gmu_init_scratch,
+		HFI_WARMBOOT_SET_SCRATCH, false, &ack);
+	if (ret)
+		goto err;
+
+	ret = gen8_hfi_send_gpu_perf_table(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen8_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table, sizeof(gmu->hfi.bw_table));
+	if (ret)
+		goto err;
+
+	ret = gen8_hfi_send_acd_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen8_hfi_send_bcl_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen8_hfi_send_clx_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen8_hfi_send_ifpc_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HWSCHED, 1, 0);
+	if (ret)
+		goto err;
+
+	ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_KPROF, 1, 0);
+	if (ret)
+		goto err;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LSR)) {
+		ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_LSR,
+				1, 0);
+		if (ret)
+			goto err;
+	}
+
+	ret = gen8_hfi_send_perfcounter_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	/* Enable the long ib timeout detection */
+	if (adreno_long_ib_detect(adreno_dev)) {
+		ret = gen8_hfi_send_feature_ctrl(adreno_dev,
+			HFI_FEATURE_BAIL_OUT_TIMER, 1, 0);
+		if (ret)
+			goto err;
+	}
+
+	enable_gmu_stats(adreno_dev);
+
+	if (gmu->log_stream_enable)
+		gen8_hfi_send_set_value(adreno_dev,
+			HFI_VALUE_LOG_STREAM_ENABLE, 0, 1);
+
+	if (gmu->log_group_mask)
+		gen8_hfi_send_set_value(adreno_dev,
+			HFI_VALUE_LOG_GROUP, 0, gmu->log_group_mask);
+
+	ret = gen8_hfi_send_core_fw_start(adreno_dev);
+	if (ret)
+		goto err;
+
+	/*
+	 * HFI_VALUE_CONTEXT_QUEUE can only be queried after GMU has initialized some of the
+	 * required resources as part of handling gen8_hfi_send_core_fw_start()
+	 */
+	if (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) {
+		_context_queue_enable(adreno_dev);
+		adreno_hwsched_register_hw_fence(adreno_dev);
+	}
+
+	ret = gen8_hfi_send_hw_fence_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = enable_preemption(adreno_dev);
+	if (ret)
+		goto err;
+
+	ret = gen8_hfi_send_lpac_feature_ctrl(adreno_dev);
+	if (ret)
+		goto err;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) {
+		ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_AQE, 1, 0);
+		if (ret)
+			goto err;
+	}
+
+	ret = send_start_msg(adreno_dev);
+	if (ret)
+		goto err;
+
+	/*
+	 * Send this additional start message on cold boot if warmboot is enabled.
+	 * This message will be recorded and on a warmboot this will trigger the
+	 * sequence to replay memory allocation requests and ECP task setup
+	 */
+	ret = send_warmboot_start_msg(adreno_dev);
+	if (ret)
+		goto err;
+
+	gen8_hwsched_enable_async_hfi(adreno_dev);
+
+	set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	/* Send this message only on cold boot */
+	ret = gen8_hwsched_hfi_send_warmboot_cmd(adreno_dev, gmu->gmu_init_scratch,
+		HFI_WARMBOOT_QUERY_SCRATCH, true, &ack);
+	if (ret)
+		goto err;
+
+	if (adreno_dev->warmboot_enabled)
+		set_bit(GMU_PRIV_WARMBOOT_GMU_INIT_DONE, &gmu->flags);
+
+	ret = gen8_hwsched_setup_default_votes(adreno_dev);
+
+err:
+	if (ret)
+		gen8_hwsched_hfi_stop(adreno_dev);
+
+	return ret;
+}
+
+static int submit_raw_cmds(struct adreno_device *adreno_dev, void *cmds, u32 size_bytes,
+	const char *str)
+{
+	int ret;
+
+	ret = gen8_hfi_send_cmd_async(adreno_dev, cmds, size_bytes);
+	if (ret)
+		return ret;
+
+	ret = gmu_core_timed_poll_check(KGSL_DEVICE(adreno_dev),
+			GEN8_GMUAO_GPU_CX_BUSY_STATUS, 0, 200, BIT(23));
+	if (ret)
+		gen8_spin_idle_debug(adreno_dev, str);
+
+	return ret;
+}
+
+static int submit_lpac_raw_cmds(struct adreno_device *adreno_dev, void *cmds, u32 size_bytes,
+	const char *str)
+{
+	int ret;
+
+	ret = gen8_hfi_send_cmd_async(adreno_dev, cmds, size_bytes);
+	if (ret)
+		return ret;
+
+	ret = gmu_core_timed_poll_check(KGSL_DEVICE(adreno_dev),
+			GEN8_GMUAO_LPAC_BUSY_STATUS, 0, 200, BIT(23));
+	if (ret)
+		gen8_spin_idle_debug_lpac(adreno_dev, str);
+
+	return ret;
+}
+
+static int cp_init(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 cmds[GEN8_CP_INIT_DWORDS + 1];
+	int ret = 0;
+
+	cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, HFI_MSG_CMD);
+
+	gen8_cp_init_cmds(adreno_dev, &cmds[1]);
+
+	ret = submit_raw_cmds(adreno_dev, cmds, sizeof(cmds),
+			"CP initialization failed to idle\n");
+
+	/* Save the header incase we need a warmboot debug */
+	gmu->cp_init_hdr = cmds[0];
+
+	return ret;
+}
+
+static int send_switch_to_unsecure(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 cmds[3];
+	int ret = 0;
+
+	cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, HFI_MSG_CMD);
+
+	cmds[1] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+	cmds[2] = 0;
+
+	ret = submit_raw_cmds(adreno_dev, cmds, sizeof(cmds),
+			"Switch to unsecure failed to idle\n");
+
+	/* Save the header incase we need a warmboot debug */
+	gmu->switch_to_unsec_hdr = cmds[0];
+
+	return ret;
+}
+
+int gen8_hwsched_cp_init(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	int ret;
+
+	ret = cp_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	ret = adreno_zap_shader_load(adreno_dev, gen8_core->zap_name);
+	if (ret)
+		return ret;
+
+	if (!adreno_dev->zap_loaded)
+		kgsl_regwrite(KGSL_DEVICE(adreno_dev),
+			GEN8_RBBM_SECVID_TRUST_CNTL, 0x0);
+	else
+		ret = send_switch_to_unsecure(adreno_dev);
+
+	return ret;
+}
+
+int gen8_hwsched_lpac_cp_init(struct adreno_device *adreno_dev)
+{
+	u32 cmds[GEN8_CP_INIT_DWORDS + 1];
+
+	if (!adreno_dev->lpac_enabled)
+		return 0;
+
+	cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_LPAC_CMD_RAW, HFI_MSG_CMD);
+
+	gen8_cp_init_cmds(adreno_dev, &cmds[1]);
+
+	return submit_lpac_raw_cmds(adreno_dev, cmds, sizeof(cmds),
+			"LPAC CP initialization failed to idle\n");
+}
+
+static bool is_queue_empty(struct adreno_device *adreno_dev, u32 queue_idx)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+		return true;
+
+	if (hdr->read_index == hdr->write_index)
+		return true;
+
+	return false;
+}
+
+static int hfi_f2h_main(void *arg)
+{
+	struct adreno_device *adreno_dev = arg;
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(hfi->f2h_wq, kthread_should_stop() ||
+			/* If msgq irq is enabled and msgq has messages to process */
+			(((hfi->irq_mask & HFI_IRQ_MSGQ_MASK) &&
+			  !is_queue_empty(adreno_dev, HFI_MSG_ID)) ||
+			 /* Trace buffer has messages to process */
+			 !gmu_core_is_trace_empty(gmu->trace.md->hostptr) ||
+			 /* Dbgq has messages to process */
+			 !is_queue_empty(adreno_dev, HFI_DBG_ID)));
+
+		if (kthread_should_stop())
+			break;
+
+		gen8_hwsched_process_msgq(adreno_dev);
+		gmu_core_process_trace_data(KGSL_DEVICE(adreno_dev),
+					&gmu->pdev->dev, &gmu->trace);
+		gen8_hwsched_process_dbgq(adreno_dev, true);
+	}
+
+	return 0;
+}
+
+static void gen8_hwsched_hw_fence_timeout(struct work_struct *work)
+{
+	struct gen8_hwsched_hfi *hfi = container_of(work, struct gen8_hwsched_hfi, hw_fence_ws);
+	struct gen8_hwsched_device *gen8_hw_dev = container_of(hfi, struct gen8_hwsched_device,
+						hwsched_hfi);
+	struct adreno_device *adreno_dev = &gen8_hw_dev->gen8_dev.adreno_dev;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 unack_count, ts;
+	struct adreno_context *drawctxt = NULL;
+	bool fault;
+
+	/* Check msgq one last time before recording a fault */
+	gen8_hwsched_process_msgq(adreno_dev);
+
+	spin_lock(&hfi->hw_fence.lock);
+
+	unack_count = hfi->hw_fence.unack_count;
+
+	fault = test_bit(GEN8_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags) &&
+		test_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags) &&
+		(unack_count > MIN_HW_FENCE_UNACK_COUNT);
+
+	drawctxt = hfi->hw_fence.defer_drawctxt;
+	ts = hfi->hw_fence.defer_ts;
+
+	spin_unlock(&hfi->hw_fence.lock);
+
+	if (!fault)
+		return;
+
+	dev_err(&gmu->pdev->dev, "Hardware fence unack(%d) timeout\n", unack_count);
+
+	if (drawctxt) {
+		struct kgsl_process_private *proc_priv = drawctxt->base.proc_priv;
+
+		dev_err(&gmu->pdev->dev,
+			"Hardware fence got deferred for ctx:%d ts:%d pid:%d proc:%s\n",
+			drawctxt->base.id, ts, pid_nr(proc_priv->pid), proc_priv->comm);
+	}
+	gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+}
+
+static void gen8_hwsched_hw_fence_timer(struct timer_list *t)
+{
+	struct gen8_hwsched_hfi *hfi = from_timer(hfi, t, hw_fence_timer);
+
+	kgsl_schedule_work(&hfi->hw_fence_ws);
+}
+
+int gen8_hwsched_hfi_probe(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev);
+
+	gmu->hfi.irq = kgsl_request_irq(gmu->pdev, "hfi",
+		gen8_hwsched_hfi_handler, adreno_dev);
+
+	if (gmu->hfi.irq < 0)
+		return gmu->hfi.irq;
+
+	hw_hfi->irq_mask = HFI_IRQ_MASK;
+
+	rwlock_init(&hw_hfi->msglock);
+
+	INIT_LIST_HEAD(&hw_hfi->msglist);
+	INIT_LIST_HEAD(&hw_hfi->detached_hw_fence_list);
+
+	init_waitqueue_head(&hw_hfi->f2h_wq);
+	init_waitqueue_head(&hw_hfi->hw_fence.unack_wq);
+
+	spin_lock_init(&hw_hfi->hw_fence.lock);
+
+	mutex_init(&hw_hfi->msgq_mutex);
+
+	INIT_WORK(&hw_hfi->hw_fence_ws, gen8_hwsched_hw_fence_timeout);
+
+	timer_setup(&hw_hfi->hw_fence_timer, gen8_hwsched_hw_fence_timer, 0);
+
+	return 0;
+}
+
+void gen8_hwsched_hfi_remove(struct adreno_device *adreno_dev)
+{
+	struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev);
+
+	if (hw_hfi->f2h_task)
+		kthread_stop(hw_hfi->f2h_task);
+}
+
+static void gen8_add_profile_events(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj_cmd *cmdobj, struct adreno_submit_time *time)
+{
+	unsigned long flags;
+	u64 time_in_s;
+	unsigned long time_in_ns;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct kgsl_context *context = drawobj->context;
+	struct submission_info info = {0};
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (!time)
+		return;
+
+	/*
+	 * Here we are attempting to create a mapping between the
+	 * GPU time domain (alwayson counter) and the CPU time domain
+	 * (local_clock) by sampling both values as close together as
+	 * possible. This is useful for many types of debugging and
+	 * profiling. In order to make this mapping as accurate as
+	 * possible, we must turn off interrupts to avoid running
+	 * interrupt handlers between the two samples.
+	 */
+
+	local_irq_save(flags);
+
+	/* Read always on registers */
+	time->ticks = gpudev->read_alwayson(adreno_dev);
+
+	/* Trace the GPU time to create a mapping to ftrace time */
+	trace_adreno_cmdbatch_sync(context->id, context->priority,
+		drawobj->timestamp, time->ticks);
+
+	/* Get the kernel clock for time since boot */
+	time->ktime = local_clock();
+
+	/* Get the timeofday for the wall time (for the user) */
+	ktime_get_real_ts64(&time->utime);
+
+	local_irq_restore(flags);
+
+	/* Return kernel clock time to the client if requested */
+	time_in_s = time->ktime;
+	time_in_ns = do_div(time_in_s, 1000000000);
+
+	info.inflight = hwsched->inflight;
+	info.rb_id = adreno_get_level(context);
+	info.gmu_dispatch_queue = context->gmu_dispatch_queue;
+
+	cmdobj->submit_ticks = time->ticks;
+
+	msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_SUBMIT,
+		pid_nr(context->proc_priv->pid),
+		context->id, drawobj->timestamp,
+		!!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME));
+	trace_adreno_cmdbatch_submitted(drawobj, &info, time->ticks,
+		(unsigned long) time_in_s, time_in_ns / 1000, 0);
+
+	log_kgsl_cmdbatch_submitted_event(context->id, drawobj->timestamp,
+			context->priority, drawobj->flags);
+}
+
+static void init_gmu_context_queue(struct adreno_context *drawctxt)
+{
+	struct kgsl_memdesc *md = &drawctxt->gmu_context_queue;
+	struct gmu_context_queue_header *hdr = md->hostptr;
+
+	hdr->start_addr = md->gmuaddr + sizeof(*hdr);
+	hdr->queue_size = (md->size - sizeof(*hdr)) >> 2;
+	hdr->hw_fence_buffer_va = drawctxt->gmu_hw_fence_queue.gmuaddr;
+	hdr->hw_fence_buffer_size = drawctxt->gmu_hw_fence_queue.size;
+}
+
+static u32 get_dq_id(struct adreno_device *adreno_dev, struct kgsl_context *context)
+{
+	struct dq_info *info;
+	u32 next;
+	u32 priority = adreno_get_level(context);
+
+	if (adreno_dev->lpac_enabled)
+		info = &gen8_hfi_dqs_lpac[priority];
+	else
+		info = &gen8_hfi_dqs[priority];
+
+	next = info->base_dq_id + info->offset;
+
+	info->offset = (info->offset + 1) % info->max_dq;
+
+	return next;
+}
+
+static int allocate_context_queues(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	int ret = 0;
+
+	if (!adreno_hwsched_context_queue_enabled(adreno_dev))
+		return 0;
+
+	if (test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags) &&
+		!drawctxt->gmu_hw_fence_queue.gmuaddr) {
+		ret = gen8_alloc_gmu_kernel_block(
+			to_gen8_gmu(adreno_dev), &drawctxt->gmu_hw_fence_queue,
+			HW_FENCE_QUEUE_SIZE, GMU_NONCACHED_KERNEL,
+			IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV);
+		if (ret) {
+			memset(&drawctxt->gmu_hw_fence_queue, 0x0,
+				sizeof(drawctxt->gmu_hw_fence_queue));
+			return ret;
+		}
+	}
+
+	if (!drawctxt->gmu_context_queue.gmuaddr) {
+		ret = gen8_alloc_gmu_kernel_block(
+			to_gen8_gmu(adreno_dev), &drawctxt->gmu_context_queue,
+			SZ_4K, GMU_NONCACHED_KERNEL,
+			IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV);
+		if (ret) {
+			memset(&drawctxt->gmu_context_queue, 0x0,
+				sizeof(drawctxt->gmu_context_queue));
+			return ret;
+		}
+		init_gmu_context_queue(drawctxt);
+	}
+
+	return 0;
+}
+
+static int send_context_register(struct adreno_device *adreno_dev,
+	struct kgsl_context *context)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct hfi_register_ctxt_cmd cmd;
+	struct kgsl_pagetable *pt = context->proc_priv->pagetable;
+	int ret, asid = kgsl_mmu_pagetable_get_asid(pt, context);
+
+	if (asid < 0)
+		return asid;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_REGISTER_CONTEXT);
+	if (ret)
+		return ret;
+
+	ret = allocate_context_queues(adreno_dev, drawctxt);
+	if (ret)
+		return ret;
+
+	cmd.ctxt_id = context->id;
+	cmd.flags = HFI_CTXT_FLAG_NOTIFY | context->flags;
+	/*
+	 * HLOS SMMU driver programs context bank to look up ASID from TTBR0 during a page
+	 * table walk. So the TLB entries are tagged with the ASID from TTBR0. TLBIASID
+	 * invalidates TLB entries whose ASID matches the value that was written to the
+	 * CBn_TLBIASID register. Set ASID along with PT address.
+	 */
+	cmd.pt_addr = kgsl_mmu_pagetable_get_ttbr0(pt) |
+		FIELD_PREP(GENMASK_ULL(63, KGSL_IOMMU_ASID_START_BIT), asid);
+	cmd.ctxt_idr = context->id;
+	cmd.ctxt_bank = kgsl_mmu_pagetable_get_context_bank(pt, context);
+
+	return gen8_hfi_send_cmd_async(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int send_context_pointers(struct adreno_device *adreno_dev,
+	struct kgsl_context *context)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct hfi_context_pointers_cmd cmd = {0};
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CONTEXT_POINTERS);
+	if (ret)
+		return ret;
+
+	cmd.ctxt_id = context->id;
+	cmd.sop_addr = MEMSTORE_ID_GPU_ADDR(device, context->id, soptimestamp);
+	cmd.eop_addr = MEMSTORE_ID_GPU_ADDR(device, context->id, eoptimestamp);
+	if (context->user_ctxt_record)
+		cmd.user_ctxt_record_addr =
+			context->user_ctxt_record->memdesc.gpuaddr;
+
+	if (adreno_hwsched_context_queue_enabled(adreno_dev))
+		cmd.gmu_context_queue_addr = drawctxt->gmu_context_queue.gmuaddr;
+
+	return gen8_hfi_send_cmd_async(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int hfi_context_register(struct adreno_device *adreno_dev,
+	struct kgsl_context *context)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (context->gmu_registered)
+		return 0;
+
+	ret = send_context_register(adreno_dev, context);
+	if (ret) {
+		dev_err(&gmu->pdev->dev,
+			"Unable to register context %u: %d\n",
+			context->id, ret);
+
+		if (device->gmu_fault)
+			gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+
+		return ret;
+	}
+
+	ret = send_context_pointers(adreno_dev, context);
+	if (ret) {
+		dev_err(&gmu->pdev->dev,
+			"Unable to register context %u pointers: %d\n",
+			context->id, ret);
+
+		if (device->gmu_fault)
+			gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+
+		return ret;
+	}
+
+	context->gmu_registered = true;
+	if (adreno_hwsched_context_queue_enabled(adreno_dev))
+		context->gmu_dispatch_queue = UINT_MAX;
+	else
+		context->gmu_dispatch_queue = get_dq_id(adreno_dev, context);
+
+	return 0;
+}
+
+static void populate_ibs(struct adreno_device *adreno_dev,
+	struct hfi_submit_cmd *cmd, struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct hfi_issue_ib *issue_ib;
+	struct kgsl_memobj_node *ib;
+
+	if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS) {
+		struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+		struct kgsl_memdesc *big_ib;
+
+		if (test_bit(CMDOBJ_RECURRING_START, &cmdobj->priv))
+			big_ib = hfi->big_ib_recurring;
+		else
+			big_ib = hfi->big_ib;
+		/*
+		 * The dispatcher ensures that there is only one big IB inflight
+		 */
+		cmd->big_ib_gmu_va = big_ib->gmuaddr;
+		cmd->flags |= CMDBATCH_INDIRECT;
+		issue_ib = big_ib->hostptr;
+	} else {
+		issue_ib = (struct hfi_issue_ib *)&cmd[1];
+	}
+
+	list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+		issue_ib->addr = ib->gpuaddr;
+		issue_ib->size = ib->size;
+		issue_ib++;
+	}
+
+	cmd->numibs = cmdobj->numibs;
+}
+
+#define HFI_DSP_IRQ_BASE 2
+
+#define DISPQ_IRQ_BIT(_idx) BIT((_idx) + HFI_DSP_IRQ_BASE)
+
+int gen8_gmu_context_queue_write(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, u32 *msg, u32 size_bytes,
+	struct kgsl_drawobj *drawobj, struct adreno_submit_time *time)
+{
+	struct gmu_context_queue_header *hdr = drawctxt->gmu_context_queue.hostptr;
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	u32 *queue = drawctxt->gmu_context_queue.hostptr + sizeof(*hdr);
+	u32 i, empty_space, write_idx = hdr->write_index, read_idx = hdr->read_index;
+	u32 size_dwords = size_bytes >> 2;
+	u32 align_size = ALIGN(size_dwords, SZ_4);
+	u32 id = MSG_HDR_GET_ID(*msg);
+	struct kgsl_drawobj_cmd *cmdobj = NULL;
+
+	empty_space = (write_idx >= read_idx) ?
+			(hdr->queue_size - (write_idx - read_idx))
+			: (read_idx - write_idx);
+
+	if (empty_space <= align_size)
+		return -ENOSPC;
+
+	if (!IS_ALIGNED(size_bytes, sizeof(u32)))
+		return -EINVAL;
+
+	for (i = 0; i < size_dwords; i++) {
+		queue[write_idx] = msg[i];
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	/* Cookify any non used data at the end of the write buffer */
+	for (; i < align_size; i++) {
+		queue[write_idx] = 0xfafafafa;
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	/* Ensure packet is written out before proceeding */
+	wmb();
+
+	if (drawobj->type & SYNCOBJ_TYPE) {
+		struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj);
+
+		trace_adreno_syncobj_submitted(drawobj->context->id, drawobj->timestamp,
+			syncobj->numsyncs, gpudev->read_alwayson(adreno_dev));
+		goto done;
+	}
+
+	cmdobj = CMDOBJ(drawobj);
+
+	gen8_add_profile_events(adreno_dev, cmdobj, time);
+
+	/*
+	 * Put the profiling information in the user profiling buffer.
+	 * The hfi_update_write_idx below has a wmb() before the actual
+	 * write index update to ensure that the GMU does not see the
+	 * packet before the profile data is written out.
+	 */
+	adreno_profile_submit_time(time);
+
+done:
+	trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg));
+
+	hfi_update_write_idx(&hdr->write_index, write_idx);
+
+	return 0;
+}
+
+static u32 get_irq_bit(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj)
+{
+	if (!adreno_hwsched_context_queue_enabled(adreno_dev))
+		return drawobj->context->gmu_dispatch_queue;
+
+	if (adreno_is_preemption_enabled(adreno_dev))
+		return adreno_get_level(drawobj->context);
+
+	if (kgsl_context_is_lpac(drawobj->context))
+		return 1;
+
+	return 0;
+}
+
+static int add_gmu_waiter(struct adreno_device *adreno_dev,
+	struct dma_fence *fence)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = msm_hw_fence_wait_update(adreno_dev->hwsched.hw_fence.handle,
+			&fence, 1, true);
+
+	if (ret)
+		dev_err_ratelimited(device->dev,
+			"Failed to add GMU as waiter ret:%d fence ctx:%llu ts:%llu\n",
+			ret, fence->context, fence->seqno);
+
+	return ret;
+}
+
+static void populate_kgsl_fence(struct hfi_syncobj *obj,
+	struct dma_fence *fence)
+{
+	struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence;
+	struct kgsl_sync_timeline *ktimeline = kfence->parent;
+	unsigned long flags;
+
+	obj->flags |= BIT(GMU_SYNCOBJ_FLAG_KGSL_FENCE_BIT);
+
+	spin_lock_irqsave(&ktimeline->lock, flags);
+	/* If the context is going away or the dma fence is signaled, mark the fence as triggered */
+	if (!ktimeline->context || dma_fence_is_signaled_locked(fence)) {
+		obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT);
+		spin_unlock_irqrestore(&ktimeline->lock, flags);
+		return;
+	}
+	obj->ctxt_id = ktimeline->context->id;
+	spin_unlock_irqrestore(&ktimeline->lock, flags);
+
+	obj->seq_no =  kfence->timestamp;
+}
+
+static int _submit_hw_fence(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj, void *cmdbuf)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	int i, j;
+	u32 cmd_sizebytes, seqnum;
+	struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj);
+	struct hfi_submit_syncobj *cmd;
+	struct hfi_syncobj *obj = NULL;
+
+	/* Add hfi_syncobj struct for sync object */
+	cmd_sizebytes = sizeof(*cmd) +
+			(sizeof(struct hfi_syncobj) *
+			syncobj->num_hw_fence);
+
+	if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE))
+		return -EMSGSIZE;
+
+	memset(cmdbuf, 0x0, cmd_sizebytes);
+	cmd = cmdbuf;
+	cmd->num_syncobj = syncobj->num_hw_fence;
+	obj = (struct hfi_syncobj *)&cmd[1];
+
+	for (i = 0; i < syncobj->numsyncs; i++) {
+		struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i];
+		struct kgsl_sync_fence_cb *kcb = event->handle;
+		struct dma_fence **fences;
+		struct dma_fence_array *array;
+		u32 num_fences;
+
+		if (!kcb)
+			return -EINVAL;
+
+		array = to_dma_fence_array(kcb->fence);
+		if (array != NULL) {
+			num_fences = array->num_fences;
+			fences = array->fences;
+		} else {
+			num_fences = 1;
+			fences = &kcb->fence;
+		}
+
+		for (j = 0; j < num_fences; j++) {
+
+			/*
+			 * If this sync object has a software only fence, make sure that it is
+			 * already signaled so that we can skip sending this fence to the GMU.
+			 */
+			if (!test_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fences[j]->flags)) {
+				if (WARN(!dma_fence_is_signaled(fences[j]),
+					"sync object has unsignaled software fence"))
+					return -EINVAL;
+				continue;
+			}
+
+			if (is_kgsl_fence(fences[j])) {
+				populate_kgsl_fence(obj, fences[j]);
+			} else {
+				int ret = add_gmu_waiter(adreno_dev, fences[j]);
+
+				if (ret) {
+					syncobj->flags &= ~KGSL_SYNCOBJ_HW;
+					return ret;
+				}
+
+				if (test_bit(MSM_HW_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags) ||
+					test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags))
+					obj->flags |= BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT);
+
+				obj->ctxt_id = fences[j]->context;
+				obj->seq_no =  fences[j]->seqno;
+			}
+			trace_adreno_input_hw_fence(drawobj->context->id, obj->ctxt_id,
+				obj->seq_no, obj->flags, fences[j]->ops->get_timeline_name ?
+				fences[j]->ops->get_timeline_name(fences[j]) : "unknown");
+
+			obj++;
+		}
+	}
+
+	/*
+	 * Attach a timestamp to this SYNCOBJ to keep track whether GMU has deemed it signaled
+	 * or not.
+	 */
+	drawobj->timestamp = ++drawctxt->syncobj_timestamp;
+	cmd->timestamp = drawobj->timestamp;
+
+	cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_SYNCOBJ, HFI_MSG_CMD);
+	seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum);
+	cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2);
+
+	return gen8_gmu_context_queue_write(adreno_dev, drawctxt, (u32 *)cmd, cmd_sizebytes,
+			drawobj, NULL);
+}
+
+int gen8_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_hw_fence_entry *entry, *tmp;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret = 0;
+
+	spin_lock(&drawctxt->lock);
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) {
+		struct gmu_context_queue_header *hdr =  drawctxt->gmu_context_queue.hostptr;
+
+		if (timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0) {
+			dev_err(&gmu->pdev->dev,
+				"detached ctx:%d has unsignaled fence ts:%d retired:%d\n",
+				drawctxt->base.id, (u32)entry->cmd.ts, hdr->out_fence_ts);
+			ret = -EINVAL;
+			break;
+		}
+
+		gen8_remove_hw_fence_entry(adreno_dev, entry);
+	}
+	spin_unlock(&drawctxt->lock);
+
+	return ret;
+}
+
+/**
+ * move_detached_context_hardware_fences - Move all pending hardware fences belonging to this
+ * context to the detached hardware fence list so as to send them to TxQueue after fault recovery.
+ * This is needed because this context may get destroyed before fault recovery gets executed.
+ */
+static void move_detached_context_hardware_fences(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_hw_fence_entry *entry, *tmp;
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+
+	/* We don't need the drawctxt lock here because this context has already been detached */
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) {
+		struct gmu_context_queue_header *hdr =  drawctxt->gmu_context_queue.hostptr;
+
+		if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) {
+			_kgsl_context_get(&drawctxt->base);
+			list_move_tail(&entry->node, &hfi->detached_hw_fence_list);
+			continue;
+		}
+
+		gen8_remove_hw_fence_entry(adreno_dev, entry);
+	}
+
+	/* Also grab all the hardware fences which were never sent to GMU */
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) {
+		_kgsl_context_get(&drawctxt->base);
+		list_move_tail(&entry->node, &hfi->detached_hw_fence_list);
+	}
+}
+
+/**
+ * check_detached_context_hardware_fences - When this context has been un-registered with the GMU,
+ * make sure all the hardware fences(that were sent to GMU) for this context have been sent to
+ * TxQueue. Also, send any hardware fences (to GMU) that were not yet dispatched to the GMU. In case
+ * of an error, move the pending hardware fences to detached hardware fence list, log the error,
+ * take a snapshot and trigger recovery.
+ */
+static int check_detached_context_hardware_fences(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_hw_fence_entry *entry, *tmp;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret = 0;
+
+	/* We don't need the drawctxt lock because this context has been detached */
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_inflight_list, node) {
+		struct gmu_context_queue_header *hdr =  drawctxt->gmu_context_queue.hostptr;
+
+		if ((timestamp_cmp((u32)entry->cmd.ts, hdr->out_fence_ts) > 0)) {
+			dev_err(&gmu->pdev->dev,
+				"detached ctx:%d has unsignaled fence ts:%d retired:%d\n",
+				drawctxt->base.id, (u32)entry->cmd.ts, hdr->out_fence_ts);
+			ret = -EINVAL;
+			goto fault;
+		}
+		gen8_remove_hw_fence_entry(adreno_dev, entry);
+	}
+
+	/* Send hardware fences (to TxQueue) that were not dispatched to GMU */
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) {
+
+		ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry,
+			HW_FENCE_FLAG_SKIP_MEMSTORE);
+		if (ret)
+			goto fault;
+
+		gen8_remove_hw_fence_entry(adreno_dev, entry);
+	}
+
+	return 0;
+
+fault:
+	move_detached_context_hardware_fences(adreno_dev, drawctxt);
+	gmu_core_fault_snapshot(device);
+	gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+
+	return ret;
+}
+
+static inline int setup_hw_fence_info_cmd(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry)
+{
+	struct kgsl_sync_fence *kfence = entry->kfence;
+	int ret;
+
+	ret = CMD_MSG_HDR(entry->cmd, H2F_MSG_HW_FENCE_INFO);
+	if (ret)
+		return ret;
+
+	entry->cmd.gmu_ctxt_id = entry->drawctxt->base.id;
+	entry->cmd.ctxt_id = kfence->fence.context;
+	entry->cmd.ts = kfence->fence.seqno;
+
+	entry->cmd.hash_index = kfence->hw_fence_index;
+
+	return 0;
+}
+
+/*
+ * gen8_send_hw_fence_hfi_wait_ack - This function is used in cases where multiple hardware fences
+ * are to be sent to GMU. Hence, we must send them one by one to avoid overwhelming the GMU with
+ * mutliple fences in a short span of time.
+ */
+int gen8_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry, u64 flags)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+	u32 seqnum;
+
+	/* Device mutex is necessary to ensure only one hardware fence ack is being waited for */
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EINVAL;
+
+	spin_lock(&hfi->hw_fence.lock);
+
+	init_completion(&gen8_hw_fence_ack.complete);
+
+	entry->cmd.flags |= flags;
+	seqnum = atomic_inc_return(&hfi->hw_fence.seqnum);
+
+	gen8_hw_fence_ack.sent_hdr = entry->cmd.hdr;
+
+	/*
+	 * We don't need to increment the unack count here as we are waiting for the ack for
+	 * this fence before sending another hardware fence.
+	 */
+	ret = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&entry->cmd, sizeof(entry->cmd));
+
+	spin_unlock(&hfi->hw_fence.lock);
+
+	if (!ret)
+		ret = adreno_hwsched_wait_ack_completion(adreno_dev,
+			&gmu->pdev->dev, &gen8_hw_fence_ack,
+			gen8_hwsched_process_msgq);
+
+	memset(&gen8_hw_fence_ack, 0x0, sizeof(gen8_hw_fence_ack));
+	return ret;
+}
+
+/**
+ * drawctxt_queue_hw_fence - Add a hardware fence to draw context's hardware fence list and make
+ * sure the list remains sorted (with the fence with the largest timestamp at the end)
+ */
+static void drawctxt_queue_hw_fence(struct adreno_context *drawctxt,
+	struct adreno_hw_fence_entry *new)
+{
+	struct adreno_hw_fence_entry *entry = NULL;
+	u32 ts = (u32)new->cmd.ts;
+
+	/* Walk the list backwards to find the right spot for this fence */
+	list_for_each_entry_reverse(entry, &drawctxt->hw_fence_list, node) {
+		if (timestamp_cmp(ts, (u32)entry->cmd.ts) > 0)
+			break;
+	}
+
+	list_add(&new->node, &entry->node);
+}
+
+#define DRAWCTXT_SLOT_AVAILABLE(count)  \
+	((count + 1) < (HW_FENCE_QUEUE_SIZE / sizeof(struct hfi_hw_fence_info)))
+
+/**
+ * allocate_hw_fence_entry - Allocate an entry to keep track of a hardware fence. This is free'd
+ * when we know GMU has sent this fence to the TxQueue
+ */
+static struct adreno_hw_fence_entry *allocate_hw_fence_entry(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, struct kgsl_sync_fence *kfence)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct adreno_hw_fence_entry *entry;
+
+	if (!DRAWCTXT_SLOT_AVAILABLE(drawctxt->hw_fence_count))
+		return NULL;
+
+	entry = kmem_cache_zalloc(hwsched->hw_fence_cache, GFP_ATOMIC);
+	if (!entry)
+		return NULL;
+
+	entry->kfence = kfence;
+	entry->drawctxt = drawctxt;
+
+	if (setup_hw_fence_info_cmd(adreno_dev, entry)) {
+		kmem_cache_free(hwsched->hw_fence_cache, entry);
+		return NULL;
+	}
+
+	dma_fence_get(&kfence->fence);
+
+	drawctxt->hw_fence_count++;
+	atomic_inc(&hwsched->hw_fence_count);
+
+	INIT_LIST_HEAD(&entry->node);
+	INIT_LIST_HEAD(&entry->reset_node);
+	return entry;
+}
+
+static bool _hw_fence_end_sleep(struct adreno_device *adreno_dev)
+{
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	bool ret;
+
+	spin_lock(&hfi->hw_fence.lock);
+	ret = !test_bit(GEN8_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags);
+	spin_unlock(&hfi->hw_fence.lock);
+
+	return ret;
+}
+
+/**
+ * _hw_fence_sleep() - Check if the thread needs to sleep until the hardware fence unack count
+ * drops to a desired threshold.
+ *
+ * Return: negative error code if the thread was woken up by a signal, or the context became bad in
+ * the meanwhile, or the hardware fence unack count hasn't yet dropped to a desired threshold, or
+ * if fault recovery is imminent.
+ * Otherwise, return 0.
+ */
+static int _hw_fence_sleep(struct adreno_device *adreno_dev, struct adreno_context *drawctxt)
+{
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	int ret = 0;
+
+	if (!test_bit(GEN8_HWSCHED_HW_FENCE_SLEEP_BIT, &hfi->hw_fence.flags))
+		return 0;
+
+	spin_unlock(&hfi->hw_fence.lock);
+	spin_unlock(&drawctxt->lock);
+
+	ret = wait_event_interruptible(hfi->hw_fence.unack_wq,
+		_hw_fence_end_sleep(adreno_dev));
+
+	spin_lock(&drawctxt->lock);
+	spin_lock(&hfi->hw_fence.lock);
+
+	/*
+	 * If the thread received a signal, or the context became bad in the meanwhile or the limit
+	 * is still not settled, then return error to avoid creating this hardware fence
+	 */
+	if ((ret == -ERESTARTSYS) || kgsl_context_is_bad(&drawctxt->base) ||
+		test_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags))
+		return -EINVAL;
+
+	/*
+	 * If fault recovery is imminent then return error code to avoid creating new hardware
+	 * fences until recovery is complete
+	 */
+	if (test_bit(GEN8_HWSCHED_HW_FENCE_ABORT_BIT, &hfi->hw_fence.flags))
+		return -EBUSY;
+
+	return ret;
+}
+
+void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev,
+	struct kgsl_sync_fence *kfence)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_sync_timeline *ktimeline = kfence->parent;
+	struct kgsl_context *context = ktimeline->context;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct adreno_hw_fence_entry *entry = NULL;
+	struct msm_hw_fence_create_params params = {0};
+	/* Only allow a single log in a second */
+	static DEFINE_RATELIMIT_STATE(_rs, HZ, 1);
+	struct gen8_hwsched_hfi *hw_hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	u32 retired = 0;
+	int ret = 0;
+	bool destroy_hw_fence = true;
+
+	params.fence = &kfence->fence;
+	params.handle = &kfence->hw_fence_index;
+	kfence->hw_fence_handle = adreno_dev->hwsched.hw_fence.handle;
+
+	ret = msm_hw_fence_create(kfence->hw_fence_handle, &params);
+	if ((ret || IS_ERR_OR_NULL(params.handle))) {
+		if (__ratelimit(&_rs))
+			dev_err(device->dev, "Failed to create ctx:%d ts:%d hardware fence:%d\n",
+				kfence->context_id, kfence->timestamp, ret);
+		return;
+	}
+
+	spin_lock(&drawctxt->lock);
+	spin_lock(&hw_hfi->hw_fence.lock);
+
+	/*
+	 * If we create a hardware fence and this context is going away, we may never dispatch
+	 * this fence to the GMU. Hence, avoid creating a hardware fence if context is going away.
+	 */
+	if (kgsl_context_is_bad(context))
+		goto done;
+
+	entry = allocate_hw_fence_entry(adreno_dev, drawctxt, kfence);
+	if (!entry)
+		goto done;
+
+	/* If recovery is imminent, then do not create a hardware fence */
+	if (test_bit(GEN8_HWSCHED_HW_FENCE_ABORT_BIT, &hw_hfi->hw_fence.flags)) {
+		destroy_hw_fence = true;
+		goto done;
+	}
+
+	ret = _hw_fence_sleep(adreno_dev, drawctxt);
+	if (ret)
+		goto done;
+
+	/*
+	 * If this ts hasn't been submitted yet, then store it in the drawctxt hardware fence
+	 * list and return. This fence will be sent to GMU when this ts is dispatched to GMU.
+	 */
+	if (timestamp_cmp(kfence->timestamp, drawctxt->internal_timestamp) > 0) {
+		drawctxt_queue_hw_fence(drawctxt, entry);
+		destroy_hw_fence = false;
+		goto done;
+	}
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &retired);
+
+	/*
+	 * Check if timestamp is retired. If we are in SLUMBER at this point, the timestamp is
+	 * guaranteed to be retired. This way, we don't need the device mutex to check the device
+	 * state explicitly.
+	 */
+	if (timestamp_cmp(retired, kfence->timestamp) >= 0) {
+		kgsl_sync_timeline_signal(ktimeline, kfence->timestamp);
+		goto done;
+	}
+
+	/*
+	 * If timestamp is not retired then GMU must already be powered up. This is because SLUMBER
+	 * thread has to wait for hardware fence spinlock to make sure the hardware fence unack
+	 * count is zero.
+	 */
+	ret = _send_hw_fence_no_ack(adreno_dev, entry);
+	if (ret) {
+		if (__ratelimit(&_rs))
+			dev_err(&gmu->pdev->dev, "Aborting hw fence for ctx:%d ts:%d ret:%d\n",
+				kfence->context_id, kfence->timestamp, ret);
+		goto done;
+	}
+
+	list_add_tail(&entry->node, &drawctxt->hw_fence_inflight_list);
+
+	destroy_hw_fence = false;
+
+done:
+	if (destroy_hw_fence) {
+		msm_hw_fence_destroy(kfence->hw_fence_handle, &kfence->fence);
+		if (entry)
+			gen8_remove_hw_fence_entry(adreno_dev, entry);
+	}
+
+	spin_unlock(&hw_hfi->hw_fence.lock);
+	spin_unlock(&drawctxt->lock);
+}
+
+/**
+ * setup_hw_fence_deferred_ctxt - The hardware fence(s) from this context couldn't be sent to the
+ * GMU because the hardware fence unack count reached a threshold. Hence, setup this context such
+ * that these hardware fences are sent to the GMU when the unack count drops to a desired threshold.
+ */
+static void setup_hw_fence_deferred_ctxt(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, u32 ts)
+{
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return;
+
+	hfi->hw_fence.defer_drawctxt = drawctxt;
+	hfi->hw_fence.defer_ts = ts;
+	/*
+	 * Increment the active count so that device doesn't get powered off until this fence has
+	 * been sent to GMU
+	 */
+	gen8_hwsched_active_count_get(adreno_dev);
+}
+
+/**
+ * process_hw_fence_queue - This function walks the draw context's list of hardware fences
+ * and sends the ones which have a timestamp less than or equal to the timestamp that just
+ * got submitted to the GMU.
+ */
+static void process_hw_fence_queue(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, u32 ts)
+{
+	struct adreno_hw_fence_entry *entry = NULL, *next;
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	int ret = 0;
+
+	/* This list is sorted with smallest timestamp at head and highest timestamp at tail */
+	list_for_each_entry_safe(entry, next, &drawctxt->hw_fence_list, node) {
+
+		if (timestamp_cmp((u32)entry->cmd.ts, ts) > 0)
+			return;
+
+		spin_lock(&hfi->hw_fence.lock);
+
+		if (test_bit(GEN8_HWSCHED_HW_FENCE_MAX_BIT, &hfi->hw_fence.flags)) {
+			setup_hw_fence_deferred_ctxt(adreno_dev, drawctxt, ts);
+			spin_unlock(&hfi->hw_fence.lock);
+			return;
+		}
+
+		ret = _send_hw_fence_no_ack(adreno_dev, entry);
+
+		spin_unlock(&hfi->hw_fence.lock);
+
+		if (ret)
+			return;
+
+		/*
+		 * A fence that is sent to GMU must be added to the drawctxt->hw_fence_inflight_list
+		 * so that we can keep track of when GMU sends it to the TxQueue
+		 */
+		list_del_init(&entry->node);
+		list_add_tail(&entry->node, &drawctxt->hw_fence_inflight_list);
+	}
+}
+
+/* Size in below functions are in unit of dwords */
+static int gen8_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 queue_idx,
+	u32 *msg, u32 size_bytes, struct kgsl_drawobj_cmd *cmdobj, struct adreno_submit_time *time)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	u32 *queue;
+	u32 i, write, empty_space;
+	u32 size_dwords = size_bytes >> 2;
+	u32 align_size = ALIGN(size_dwords, SZ_4);
+	u32 id = MSG_HDR_GET_ID(*msg);
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED || !IS_ALIGNED(size_bytes, sizeof(u32)))
+		return -EINVAL;
+
+	queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx);
+
+	empty_space = (hdr->write_index >= hdr->read_index) ?
+			(hdr->queue_size - (hdr->write_index - hdr->read_index))
+			: (hdr->read_index - hdr->write_index);
+
+	if (empty_space <= align_size)
+		return -ENOSPC;
+
+	write = hdr->write_index;
+
+	for (i = 0; i < size_dwords; i++) {
+		queue[write] = msg[i];
+		write = (write + 1) % hdr->queue_size;
+	}
+
+	/* Cookify any non used data at the end of the write buffer */
+	for (; i < align_size; i++) {
+		queue[write] = 0xfafafafa;
+		write = (write + 1) % hdr->queue_size;
+	}
+
+	/* Ensure packet is written out before proceeding */
+	wmb();
+
+	gen8_add_profile_events(adreno_dev, cmdobj, time);
+
+	/*
+	 * Put the profiling information in the user profiling buffer.
+	 * The hfi_update_write_idx below has a wmb() before the actual
+	 * write index update to ensure that the GMU does not see the
+	 * packet before the profile data is written out.
+	 */
+	adreno_profile_submit_time(time);
+
+	trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg));
+
+	hfi_update_write_idx(&hdr->write_index, write);
+
+	return 0;
+}
+
+int gen8_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj)
+{
+	int ret = 0;
+	u32 cmd_sizebytes, seqnum;
+	struct kgsl_drawobj_cmd *cmdobj = NULL;
+	struct hfi_submit_cmd *cmd;
+	struct adreno_submit_time time = {0};
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	static void *cmdbuf;
+
+	if (cmdbuf == NULL) {
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		cmdbuf = devm_kzalloc(&device->pdev->dev, HFI_MAX_MSG_SIZE,
+				GFP_KERNEL);
+		if (!cmdbuf)
+			return -ENOMEM;
+	}
+
+	ret = hfi_context_register(adreno_dev, drawobj->context);
+	if (ret)
+		return ret;
+
+	if ((drawobj->type & SYNCOBJ_TYPE) != 0)
+		return _submit_hw_fence(adreno_dev, drawobj, cmdbuf);
+
+	cmdobj = CMDOBJ(drawobj);
+
+	/*
+	 * If the MARKER object is retired, it doesn't need to be dispatched to GMU. Simply trigger
+	 * any pending fences that are less than/equal to this object's timestamp.
+	 */
+	if (test_bit(CMDOBJ_MARKER_EXPIRED, &cmdobj->priv)) {
+		spin_lock(&drawctxt->lock);
+		process_hw_fence_queue(adreno_dev, drawctxt, drawobj->timestamp);
+		spin_unlock(&drawctxt->lock);
+		return 0;
+	}
+
+	/* Add a *issue_ib struct for each IB */
+	if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS ||
+		test_bit(CMDOBJ_SKIP, &cmdobj->priv))
+		cmd_sizebytes = sizeof(*cmd);
+	else
+		cmd_sizebytes = sizeof(*cmd) +
+			(sizeof(struct hfi_issue_ib) * cmdobj->numibs);
+
+	if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE))
+		return -EMSGSIZE;
+
+	memset(cmdbuf, 0x0, cmd_sizebytes);
+
+	cmd = cmdbuf;
+
+	cmd->ctxt_id = drawobj->context->id;
+	cmd->flags = HFI_CTXT_FLAG_NOTIFY;
+	if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME)
+		cmd->flags |= CMDBATCH_EOF;
+
+	cmd->ts = drawobj->timestamp;
+
+	if (test_bit(CMDOBJ_SKIP, &cmdobj->priv))
+		goto skipib;
+
+	populate_ibs(adreno_dev, cmd, cmdobj);
+
+	if ((drawobj->flags & KGSL_DRAWOBJ_PROFILING) &&
+		cmdobj->profiling_buf_entry) {
+
+		time.drawobj = drawobj;
+
+		cmd->profile_gpuaddr_lo =
+			lower_32_bits(cmdobj->profiling_buffer_gpuaddr);
+		cmd->profile_gpuaddr_hi =
+			upper_32_bits(cmdobj->profiling_buffer_gpuaddr);
+
+		/* Indicate to GMU to do user profiling for this submission */
+		cmd->flags |= CMDBATCH_PROFILING;
+	}
+
+skipib:
+	adreno_drawobj_set_constraint(KGSL_DEVICE(adreno_dev), drawobj);
+
+	cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD);
+	seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum);
+	cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2);
+
+	if (adreno_hwsched_context_queue_enabled(adreno_dev))
+		ret = gen8_gmu_context_queue_write(adreno_dev,
+			drawctxt, (u32 *)cmd, cmd_sizebytes, drawobj, &time);
+	else
+		ret = gen8_hfi_dispatch_queue_write(adreno_dev,
+			HFI_DSP_ID_0 + drawobj->context->gmu_dispatch_queue,
+			(u32 *)cmd, cmd_sizebytes, cmdobj, &time);
+	if (ret)
+		return ret;
+
+	/* Send interrupt to GMU to receive the message */
+	gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN8_GMUCX_HOST2GMU_INTR_SET,
+		DISPQ_IRQ_BIT(get_irq_bit(adreno_dev, drawobj)));
+
+	spin_lock(&drawctxt->lock);
+	process_hw_fence_queue(adreno_dev, drawctxt, drawobj->timestamp);
+	/*
+	 * We need to update the internal timestamp while holding the drawctxt lock since we have to
+	 * check it in the hardware fence creation path, where we are not taking the device mutex.
+	 */
+	drawctxt->internal_timestamp = drawobj->timestamp;
+	spin_unlock(&drawctxt->lock);
+
+	return 0;
+}
+
+int gen8_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct hfi_submit_cmd *cmd;
+	struct kgsl_memobj_node *ib;
+	u32 cmd_sizebytes;
+	int ret;
+	static bool active;
+
+	if (adreno_gpu_halt(adreno_dev) || adreno_hwsched_gpu_fault(adreno_dev))
+		return -EBUSY;
+
+	if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) {
+		cmdobj->numibs = 0;
+	} else {
+		list_for_each_entry(ib, &cmdobj->cmdlist, node)
+			cmdobj->numibs++;
+	}
+
+	if (cmdobj->numibs > HWSCHED_MAX_IBS)
+		return -EINVAL;
+
+	if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS)
+		cmd_sizebytes = sizeof(*cmd);
+	else
+		cmd_sizebytes = sizeof(*cmd) +
+			(sizeof(struct hfi_issue_ib) * cmdobj->numibs);
+
+	if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE))
+		return -EMSGSIZE;
+
+	cmd = kzalloc(cmd_sizebytes, GFP_KERNEL);
+	if (cmd == NULL)
+		return -ENOMEM;
+
+	if (test_bit(CMDOBJ_RECURRING_START, &cmdobj->priv)) {
+		if (!active) {
+			ret = adreno_active_count_get(adreno_dev);
+			if (ret) {
+				kfree(cmd);
+				return ret;
+			}
+			active = true;
+		}
+		cmd->flags |= CMDBATCH_RECURRING_START;
+		populate_ibs(adreno_dev, cmd, cmdobj);
+	} else
+		cmd->flags |= CMDBATCH_RECURRING_STOP;
+
+	cmd->ctxt_id = drawobj->context->id;
+
+	ret = hfi_context_register(adreno_dev, drawobj->context);
+	if (ret) {
+		adreno_active_count_put(adreno_dev);
+		active = false;
+		kfree(cmd);
+		return ret;
+	}
+
+	cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_RECURRING_CMD, HFI_MSG_CMD);
+
+	ret = gen8_hfi_send_cmd_async(adreno_dev, cmd, sizeof(*cmd));
+
+	kfree(cmd);
+
+	if (ret) {
+		adreno_active_count_put(adreno_dev);
+		active = false;
+		return ret;
+	}
+
+	if (test_bit(CMDOBJ_RECURRING_STOP, &cmdobj->priv)) {
+		adreno_hwsched_retire_cmdobj(hwsched, hwsched->recurring_cmdobj);
+		del_timer_sync(&hwsched->lsr_timer);
+		hwsched->recurring_cmdobj = NULL;
+		if (active)
+			adreno_active_count_put(adreno_dev);
+		active = false;
+		return ret;
+	}
+
+	hwsched->recurring_cmdobj = cmdobj;
+	/* Star LSR timer for power stats collection */
+	mod_timer(&hwsched->lsr_timer, jiffies + msecs_to_jiffies(10));
+	return ret;
+}
+
+void gen8_trigger_hw_fence_cpu(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry)
+{
+	int ret = msm_hw_fence_update_txq(adreno_dev->hwsched.hw_fence.handle,
+			entry->cmd.hash_index, 0, 0);
+
+	if (ret) {
+		dev_err_ratelimited(adreno_dev->dev.dev,
+			"Failed to trigger hw fence via cpu: ctx:%d ts:%d ret:%d\n",
+			entry->drawctxt->base.id, (u32)entry->cmd.ts, ret);
+		return;
+	}
+
+	msm_hw_fence_trigger_signal(adreno_dev->hwsched.hw_fence.handle, IPCC_CLIENT_GPU,
+		IPCC_CLIENT_APSS, 0);
+}
+
+/* We don't want to unnecessarily wake the GMU to trigger hardware fences */
+static void drain_context_hw_fence_cpu(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_hw_fence_entry *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) {
+
+		gen8_trigger_hw_fence_cpu(adreno_dev, entry);
+
+		gen8_remove_hw_fence_entry(adreno_dev, entry);
+	}
+}
+
+int gen8_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_hw_fence_entry *entry, *tmp;
+	int ret = 0;
+
+	/* We don't need the drawctxt lock here as this context has already been invalidated */
+	list_for_each_entry_safe(entry, tmp, &drawctxt->hw_fence_list, node) {
+
+		/* Any error here is fatal */
+		ret = gen8_send_hw_fence_hfi_wait_ack(adreno_dev, entry,
+			HW_FENCE_FLAG_SKIP_MEMSTORE);
+		if (ret)
+			break;
+
+		gen8_remove_hw_fence_entry(adreno_dev, entry);
+	}
+
+	return ret;
+}
+
+static void trigger_context_unregister_fault(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev));
+
+	/* Make sure we send all fences from this context to the TxQueue after recovery */
+	move_detached_context_hardware_fences(adreno_dev, drawctxt);
+	gen8_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT);
+}
+
+static int send_context_unregister_hfi(struct adreno_device *adreno_dev,
+	struct kgsl_context *context, u32 ts)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct pending_cmd pending_ack;
+	struct hfi_unregister_ctxt_cmd cmd;
+	u32 seqnum;
+	int ret;
+
+	/* Only send HFI if device is not in SLUMBER */
+	if (!context->gmu_registered ||
+		!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) {
+		drain_context_hw_fence_cpu(adreno_dev, drawctxt);
+		return 0;
+	}
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_UNREGISTER_CONTEXT);
+	if (ret)
+		return ret;
+
+	cmd.ctxt_id = context->id,
+	cmd.ts = ts,
+
+	/*
+	 * Although we know device is powered on, we can still enter SLUMBER
+	 * because the wait for ack below is done without holding the mutex. So
+	 * take an active count before releasing the mutex so as to avoid a
+	 * concurrent SLUMBER sequence while GMU is un-registering this context.
+	 */
+	ret = gen8_hwsched_active_count_get(adreno_dev);
+	if (ret) {
+		trigger_context_unregister_fault(adreno_dev, drawctxt);
+		return ret;
+	}
+
+	seqnum = atomic_inc_return(&gmu->hfi.seqnum);
+	cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2);
+	add_waiter(hfi, cmd.hdr, &pending_ack);
+
+	ret = gen8_hfi_cmdq_write(adreno_dev, (u32 *)&cmd, sizeof(cmd));
+	if (ret) {
+		trigger_context_unregister_fault(adreno_dev, drawctxt);
+		goto done;
+	}
+
+	ret = adreno_hwsched_ctxt_unregister_wait_completion(adreno_dev,
+		&gmu->pdev->dev, &pending_ack, gen8_hwsched_process_msgq, &cmd);
+	if (ret) {
+		trigger_context_unregister_fault(adreno_dev, drawctxt);
+		goto done;
+	}
+
+	ret = check_detached_context_hardware_fences(adreno_dev, drawctxt);
+	if (!ret)
+		ret = check_ack_failure(adreno_dev, &pending_ack);
+
+done:
+	gen8_hwsched_active_count_put(adreno_dev);
+	del_waiter(hfi, &pending_ack);
+
+	return ret;
+}
+
+void gen8_hwsched_context_detach(struct adreno_context *drawctxt)
+{
+	struct kgsl_context *context = &drawctxt->base;
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret = 0;
+
+	mutex_lock(&device->mutex);
+
+	ret = send_context_unregister_hfi(adreno_dev, context,
+		drawctxt->internal_timestamp);
+
+	if (!ret) {
+		kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+			drawctxt->timestamp);
+
+		kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+			drawctxt->timestamp);
+
+		adreno_profile_process_results(adreno_dev);
+	}
+
+	context->gmu_registered = false;
+
+	mutex_unlock(&device->mutex);
+}
+
+u32 gen8_hwsched_preempt_count_get(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (device->state != KGSL_STATE_ACTIVE)
+		return 0;
+
+	return gen8_hwsched_hfi_get_value(adreno_dev, HFI_VALUE_PREEMPT_COUNT);
+}
+
+void gen8_hwsched_context_destroy(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	if (!adreno_hwsched_context_queue_enabled(adreno_dev))
+		return;
+
+	if (drawctxt->gmu_context_queue.gmuaddr)
+		gen8_free_gmu_block(to_gen8_gmu(adreno_dev), &drawctxt->gmu_context_queue);
+
+	if (drawctxt->gmu_hw_fence_queue.gmuaddr)
+		gen8_free_gmu_block(to_gen8_gmu(adreno_dev), &drawctxt->gmu_hw_fence_queue);
+}
+
+int gen8_hwsched_disable_hw_fence_throttle(struct adreno_device *adreno_dev)
+{
+	struct gen8_hwsched_hfi *hfi = to_gen8_hwsched_hfi(adreno_dev);
+	struct adreno_context *drawctxt = NULL;
+	u32 ts = 0;
+	int ret = 0;
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags))
+		return 0;
+
+	spin_lock(&hfi->hw_fence.lock);
+
+	drawctxt = hfi->hw_fence.defer_drawctxt;
+	ts = hfi->hw_fence.defer_ts;
+
+	spin_unlock(&hfi->hw_fence.lock);
+
+	if (!drawctxt)
+		goto done;
+
+	ret = process_hw_fence_deferred_ctxt(adreno_dev, drawctxt, ts);
+
+	kgsl_context_put(&drawctxt->base);
+	gen8_hwsched_active_count_put(adreno_dev);
+
+done:
+	_disable_hw_fence_throttle(adreno_dev, true);
+
+	return ret;
+}

+ 359 - 0
qcom/opensource/graphics-kernel/adreno_gen8_hwsched_hfi.h

@@ -0,0 +1,359 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_GEN8_HWSCHED_HFI_H_
+#define _ADRENO_GEN8_HWSCHED_HFI_H_
+
+/* Maximum number of IBs in a submission */
+#define HWSCHED_MAX_NUMIBS \
+	((HFI_MAX_MSG_SIZE - offsetof(struct hfi_issue_cmd_cmd, ibs)) \
+		/ sizeof(struct hfi_issue_ib))
+
+/*
+ * This is used to put userspace threads to sleep when hardware fence unack count reaches a
+ * threshold. This bit is cleared in two scenarios:
+ * 1. If the hardware fence unack count drops to a desired threshold.
+ * 2. If there is a GMU/GPU fault. Because we don't want the threads to keep sleeping through fault
+ *    recovery, which can easily take 100s of milliseconds to complete.
+ */
+#define GEN8_HWSCHED_HW_FENCE_SLEEP_BIT	0x0
+
+/*
+ * This is used to avoid creating any more hardware fences until the hardware fence unack count
+ * drops to a desired threshold. This bit is required in cases where GEN8_HWSCHED_HW_FENCE_SLEEP_BIT
+ * will be cleared, but we still want to avoid creating any more hardware fences. For example, if
+ * hardware fence unack count reaches a maximum threshold, both GEN8_HWSCHED_HW_FENCE_SLEEP_BIT and
+ * GEN8_HWSCHED_HW_FENCE_MAX_BIT will be set. Say, a GMU/GPU fault happens and
+ * GEN8_HWSCHED_HW_FENCE_SLEEP_BIT will be cleared to wake up any sleeping threads. But,
+ * GEN8_HWSCHED_HW_FENCE_MAX_BIT will remain set to avoid creating any new hardware fences until
+ * recovery is complete and deferred drawctxt (if any) is handled.
+ */
+#define GEN8_HWSCHED_HW_FENCE_MAX_BIT	0x1
+
+/*
+ * This is used to avoid creating any more hardware fences until concurrent reset/recovery completes
+ */
+#define GEN8_HWSCHED_HW_FENCE_ABORT_BIT 0x2
+
+struct gen8_hwsched_hfi {
+	struct hfi_mem_alloc_entry mem_alloc_table[32];
+	u32 mem_alloc_entries;
+	/** @irq_mask: Store the hfi interrupt mask */
+	u32 irq_mask;
+	/** @msglock: To protect the list of un-ACKed hfi packets */
+	rwlock_t msglock;
+	/** @msglist: List of un-ACKed hfi packets */
+	struct list_head msglist;
+	/** @f2h_task: Task for processing gmu fw to host packets */
+	struct task_struct *f2h_task;
+	/** @f2h_wq: Waitqueue for the f2h_task */
+	wait_queue_head_t f2h_wq;
+	/** @big_ib: GMU buffer to hold big IBs */
+	struct kgsl_memdesc *big_ib;
+	/** @big_ib_recurring: GMU buffer to hold big recurring IBs */
+	struct kgsl_memdesc *big_ib_recurring;
+	/** @msg_mutex: Mutex for accessing the msgq */
+	struct mutex msgq_mutex;
+	struct {
+		/** @lock: Spinlock for managing hardware fences */
+		spinlock_t lock;
+		/**
+		 * @unack_count: Number of hardware fences sent to GMU but haven't yet been ack'd
+		 * by GMU
+		 */
+		u32 unack_count;
+		/**
+		 * @unack_wq: Waitqueue to wait on till number of unacked hardware fences drops to
+		 * a desired threshold
+		 */
+		wait_queue_head_t unack_wq;
+		/**
+		 * @defer_drawctxt: Drawctxt to send hardware fences from as soon as unacked
+		 * hardware fences drops to a desired threshold
+		 */
+		struct adreno_context *defer_drawctxt;
+		/**
+		 * @defer_ts: The timestamp of the hardware fence which got deferred
+		 */
+		u32 defer_ts;
+		/**
+		 * @flags: Flags to control the creation of new hardware fences
+		 */
+		unsigned long flags;
+		/** @seqnum: Sequence number for hardware fence packet header */
+		atomic_t seqnum;
+	} hw_fence;
+	/**
+	 * @hw_fence_timer: Timer to trigger fault if unack'd hardware fence count does'nt drop
+	 * to a desired threshold in given amount of time
+	 */
+	struct timer_list hw_fence_timer;
+	/**
+	 * @hw_fence_ws: Work struct that gets scheduled when hw_fence_timer expires
+	 */
+	struct work_struct hw_fence_ws;
+	/** @detached_hw_fences_list: List of hardware fences belonging to detached contexts */
+	struct list_head detached_hw_fence_list;
+	/** @defer_hw_fence_work: The work structure to send deferred hardware fences to GMU */
+	struct kthread_work defer_hw_fence_work;
+};
+
+struct kgsl_drawobj_cmd;
+
+/**
+ * gen8_hwsched_hfi_probe - Probe hwsched hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen8_hwsched_hfi_probe(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_hfi_remove - Release hwsched hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ */
+void gen8_hwsched_hfi_remove(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_hfi_init - Initialize hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to initialize hfi resources
+ * once before the very first gmu boot
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen8_hwsched_hfi_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_hfi_start - Start hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * Send the various hfi packets before booting the gpu
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen8_hwsched_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_hfi_stop - Stop the hfi resources
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function does the hfi cleanup when powering down the gmu
+ */
+void gen8_hwsched_hfi_stop(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwched_cp_init - Send CP_INIT via HFI
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to send CP INIT packet and bring
+ * GPU out of secure mode using hfi raw packets.
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen8_hwsched_cp_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_cmd_async - Send an hfi packet
+ * @adreno_dev: Pointer to adreno device structure
+ * @data: Data to be sent in the hfi packet
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Send data in the form of an HFI packet to gmu and wait for
+ * it's ack asynchronously
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen8_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 size_bytes);
+
+/**
+ * gen8_hwsched_submit_drawobj - Dispatch IBs to dispatch queues
+ * @adreno_dev: Pointer to adreno device structure
+ * @drawobj: The command draw object which needs to be submitted
+ *
+ * This function is used to register the context if needed and submit
+ * IBs to the hfi dispatch queues.
+
+ * Return: 0 on success and negative error on failure
+ */
+int gen8_hwsched_submit_drawobj(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj *drawobj);
+
+/**
+ * gen8_hwsched_context_detach - Unregister a context with GMU
+ * @drawctxt: Pointer to the adreno context
+ *
+ * This function sends context unregister HFI and waits for the ack
+ * to ensure all submissions from this context have retired
+ */
+void gen8_hwsched_context_detach(struct adreno_context *drawctxt);
+
+/* Helper function to get to gen8 hwsched hfi device from adreno device */
+struct gen8_hwsched_hfi *to_gen8_hwsched_hfi(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_preempt_count_get - Get preemption count from GMU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * This function sends a GET_VALUE HFI packet to get the number of
+ * preemptions completed since last SLUMBER exit.
+ *
+ * Return: Preemption count
+ */
+u32 gen8_hwsched_preempt_count_get(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_parse_payload - Parse payload to look up a key
+ * @payload: Pointer to a payload section
+ * @key: The key who's value is to be looked up
+ *
+ * This function parses the payload data which is a sequence
+ * of key-value pairs.
+ *
+ * Return: The value of the key or 0 if key is not found
+ */
+u32 gen8_hwsched_parse_payload(struct payload_section *payload, u32 key);
+
+/**
+ * gen8_hwsched_lpac_cp_init - Send CP_INIT to LPAC via HFI
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to send CP INIT packet to LPAC and
+ * enable submission to LPAC queue.
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen8_hwsched_lpac_cp_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_lpac_feature_ctrl - Send the lpac feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_lpac_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_context_destroy - Destroy any hwsched related resources during context destruction
+ * @adreno_dev: Pointer to adreno device
+ * @drawctxt: Pointer to the adreno context
+ *
+ * This functions destroys any hwsched related resources when this context is destroyed
+ */
+void gen8_hwsched_context_destroy(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt);
+
+/**
+ * gen8_hwsched_hfi_get_value - Send GET_VALUE packet to GMU to get the value of a property
+ * @adreno_dev: Pointer to adreno device
+ * @prop: property to get from GMU
+ *
+ * This functions sends GET_VALUE HFI packet to query value of a property
+ *
+ * Return: On success, return the value in the GMU response. On failure, return 0
+ */
+u32 gen8_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop);
+
+/**
+ * gen8_send_hw_fence_hfi_wait_ack - Send hardware fence info to GMU
+ * @adreno_dev: Pointer to adreno device
+ * @entry: Pointer to the adreno hardware fence entry
+ * @flags: Flags for this hardware fence
+ *
+ * Send the hardware fence info to the GMU and wait for the ack
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry, u64 flags);
+
+/**
+ * gen8_hwsched_create_hw_fence - Create a hardware fence
+ * @adreno_dev: Pointer to adreno device
+ * @kfence: Pointer to the kgsl fence
+ *
+ * Create a hardware fence, set up hardware fence info and send it to GMU if required
+ */
+void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev,
+	struct kgsl_sync_fence *kfence);
+
+/**
+ * gen8_hwsched_drain_context_hw_fences - Drain context's hardware fences via GMU
+ * @adreno_dev: Pointer to adreno device
+ * @drawctxt: Pointer to the adreno context which is to be flushed
+ *
+ * Trigger hardware fences that were never dispatched to GMU
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen8_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt);
+
+/**
+ * gen8_hwsched_check_context_inflight_hw_fences - Check whether all hardware fences
+ * from a context have been sent to the TxQueue or not
+ * @adreno_dev: Pointer to adreno device
+ * @drawctxt: Pointer to the adreno context which is to be flushed
+ *
+ * Check if all hardware fences from this context have been sent to the
+ * TxQueue. If not, log an error and return error code.
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen8_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt);
+
+/**
+ * gen8_remove_hw_fence_entry - Remove hardware fence entry
+ * @adreno_dev: pointer to the adreno device
+ * @entry: Pointer to the hardware fence entry
+ */
+void gen8_remove_hw_fence_entry(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry);
+
+/**
+ * gen8_trigger_hw_fence_cpu - Trigger hardware fence from cpu
+ * @adreno_dev: pointer to the adreno device
+ * @fence: hardware fence entry to be triggered
+ *
+ * Trigger the hardware fence by sending it to GMU's TxQueue and raise the
+ * interrupt from GMU to APPS
+ */
+void gen8_trigger_hw_fence_cpu(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *fence);
+
+/**
+ * gen8_hwsched_disable_hw_fence_throttle - Disable hardware fence throttling after reset
+ * @adreno_dev: pointer to the adreno device
+ *
+ * After device reset, clear hardware fence related data structures and send any hardware fences
+ * that got deferred (prior to reset) and re-open the gates for hardware fence creation
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen8_hwsched_disable_hw_fence_throttle(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_process_msgq - Process msgq
+ * @adreno_dev: pointer to the adreno device
+ *
+ * This function grabs the msgq mutex and processes msgq for any outstanding hfi packets
+ */
+void gen8_hwsched_process_msgq(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_boot_gpu - Send the command to boot GPU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * Send the hfi to boot GPU, and check the ack, incase of a failure
+ * get a snapshot and capture registers of interest.
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen8_hwsched_boot_gpu(struct adreno_device *adreno_dev);
+
+#endif

+ 1135 - 0
qcom/opensource/graphics-kernel/adreno_gen8_perfcounter.c

@@ -0,0 +1,1135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "adreno_perfcounter.h"
+#include "adreno_pm4types.h"
+#include "kgsl_device.h"
+
+#define PERFCOUNTER_FLUSH_DONE_MASK BIT(0)
+
+static void gen8_rbbm_perfctr_flush(struct kgsl_device *device)
+{
+	u32 val;
+	int ret;
+
+	/*
+	 * Flush delta counters (both perf counters and pipe stats) present in
+	 * RBBM_S and RBBM_US to perf RAM logic to get the latest data.
+	 */
+	kgsl_regwrite(device, GEN8_RBBM_PERFCTR_FLUSH_HOST_CMD, BIT(0));
+	kgsl_regwrite(device, GEN8_RBBM_SLICE_PERFCTR_FLUSH_HOST_CMD, BIT(0));
+
+	ret = kgsl_regmap_read_poll_timeout(&device->regmap, GEN8_RBBM_PERFCTR_FLUSH_HOST_STATUS,
+		val, (val & PERFCOUNTER_FLUSH_DONE_MASK) == PERFCOUNTER_FLUSH_DONE_MASK,
+		100, 100 * 1000);
+
+	if (ret)
+		dev_err(device->dev, "Perfcounter flush timed out: status=0x%08x\n", val);
+}
+
+/*
+ * For registers that do not get restored on power cycle, read the value and add
+ * the stored shadow value
+ */
+static u64 gen8_counter_read_norestore(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group, u32 counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 hi, lo;
+
+	gen8_rbbm_perfctr_flush(device);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	return ((((u64) hi) << 32) | lo) + reg->value;
+}
+
+static int gen8_counter_br_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	int ret = 0;
+	u32 val = 0;
+
+	kgsl_regread(device, GEN8_CP_APERTURE_CNTL_HOST, &val);
+	kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(15, 12), PIPE_BR));
+
+	ret = gen8_perfcounter_update(adreno_dev, reg, true,
+			FIELD_PREP(GENMASK(15, 12), PIPE_BR), group->flags);
+
+	kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val);
+
+	/* Ensure all writes are posted before reading the piped register */
+	mb();
+
+	if (!ret)
+		reg->value = 0;
+
+	return ret;
+}
+
+static int gen8_counter_bv_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	int ret = 0;
+	u32 val = 0;
+
+	kgsl_regread(device, GEN8_CP_APERTURE_CNTL_HOST, &val);
+	kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, FIELD_PREP(GENMASK(15, 12), PIPE_BV));
+
+	ret = gen8_perfcounter_update(adreno_dev, reg, true,
+				FIELD_PREP(GENMASK(15, 12), PIPE_BV), group->flags);
+
+	kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, val);
+
+	/* Ensure all writes are posted before reading the piped register */
+	mb();
+
+	if (!ret)
+		reg->value = 0;
+
+	return ret;
+}
+
+static int gen8_counter_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable)
+{
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	int ret = 0;
+
+	ret = gen8_perfcounter_update(adreno_dev, reg, true,
+					FIELD_PREP(GENMASK(15, 12), PIPE_NONE), group->flags);
+	if (!ret)
+		reg->value = 0;
+
+	return ret;
+}
+
+static u64 gen8_counter_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group, u32 counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 hi, lo;
+
+	gen8_rbbm_perfctr_flush(device);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* These registers are restored on power resume */
+	return (((u64) hi) << 32) | lo;
+}
+
+static int gen8_counter_gbif_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 shift = counter << 3;
+	u32 select = BIT(counter);
+
+	if (countable > 0xff)
+		return -EINVAL;
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regrmw(device, GEN8_GBIF_PERF_PWR_CNT_CLR, select, select);
+	kgsl_regrmw(device, GEN8_GBIF_PERF_PWR_CNT_CLR, select, 0);
+
+	/* select the desired countable */
+	kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift);
+
+	/* enable counter */
+	kgsl_regrmw(device, GEN8_GBIF_PERF_PWR_CNT_EN, select, select);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int gen8_counter_gbif_pwr_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 shift = counter << 3;
+	u32 select = BIT(16 + counter);
+
+	if (countable > 0xff)
+		return -EINVAL;
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regrmw(device, GEN8_GBIF_PERF_PWR_CNT_CLR, select, select);
+	kgsl_regrmw(device, GEN8_GBIF_PERF_PWR_CNT_CLR, select, 0);
+
+	/* select the desired countable */
+	kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift);
+
+	/* Enable the counter */
+	kgsl_regrmw(device, GEN8_GBIF_PERF_PWR_CNT_EN, select, select);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int gen8_counter_alwayson_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable)
+{
+	return 0;
+}
+
+static u64 gen8_counter_alwayson_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group, u32 counter)
+{
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	return gpudev->read_alwayson(adreno_dev) + reg->value;
+}
+
+static void gen8_write_gmu_counter_enable(struct kgsl_device *device,
+		struct adreno_perfcount_register *reg, u32 bit, u32 countable)
+{
+	kgsl_regrmw(device, reg->select, 0xff << bit, countable << bit);
+}
+
+static int gen8_counter_gmu_pwr_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	/* Four counters can be programmed per select register */
+	int offset = counter % 4;
+
+	if (countable > 0xff)
+		return -EINVAL;
+
+	gen8_write_gmu_counter_enable(device, reg, offset << 3, countable);
+
+	kgsl_regwrite(device, GEN8_GMUCX_POWER_COUNTER_ENABLE, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int gen8_counter_gmu_perf_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	/* Four counters can be programmed per select register */
+	int offset = counter % 4;
+
+	if (countable > 0xff)
+		return -EINVAL;
+
+	gen8_write_gmu_counter_enable(device, reg, offset << 3, countable);
+
+	kgsl_regwrite(device, GEN8_GMUCX_PERF_COUNTER_ENABLE, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static struct adreno_perfcount_register gen8_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_0_LO,
+		GEN8_RBBM_PERFCTR_CP_0_HI, -1, GEN8_CP_PERFCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_1_LO,
+		GEN8_RBBM_PERFCTR_CP_1_HI, -1, GEN8_CP_PERFCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_2_LO,
+		GEN8_RBBM_PERFCTR_CP_2_HI, -1, GEN8_CP_PERFCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_3_LO,
+		GEN8_RBBM_PERFCTR_CP_3_HI, -1, GEN8_CP_PERFCTR_CP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_4_LO,
+		GEN8_RBBM_PERFCTR_CP_4_HI, -1, GEN8_CP_PERFCTR_CP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_5_LO,
+		GEN8_RBBM_PERFCTR_CP_5_HI, -1, GEN8_CP_PERFCTR_CP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_6_LO,
+		GEN8_RBBM_PERFCTR_CP_6_HI, -1, GEN8_CP_PERFCTR_CP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_7_LO,
+		GEN8_RBBM_PERFCTR_CP_7_HI, -1, GEN8_CP_PERFCTR_CP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_8_LO,
+		GEN8_RBBM_PERFCTR_CP_8_HI, -1, GEN8_CP_PERFCTR_CP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_9_LO,
+		GEN8_RBBM_PERFCTR_CP_9_HI, -1, GEN8_CP_PERFCTR_CP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_10_LO,
+		GEN8_RBBM_PERFCTR_CP_10_HI, -1, GEN8_CP_PERFCTR_CP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_11_LO,
+		GEN8_RBBM_PERFCTR_CP_11_HI, -1, GEN8_CP_PERFCTR_CP_SEL_11 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_12_LO,
+		GEN8_RBBM_PERFCTR_CP_12_HI, -1, GEN8_CP_PERFCTR_CP_SEL_12 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CP_13_LO,
+		GEN8_RBBM_PERFCTR_CP_13_HI, -1, GEN8_CP_PERFCTR_CP_SEL_13 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_bv_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_0_LO,
+		GEN8_RBBM_PERFCTR2_CP_0_HI, -1, GEN8_CP_PERFCTR_CP_SEL_14 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_1_LO,
+		GEN8_RBBM_PERFCTR2_CP_1_HI, -1, GEN8_CP_PERFCTR_CP_SEL_15 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_2_LO,
+		GEN8_RBBM_PERFCTR2_CP_2_HI, -1, GEN8_CP_PERFCTR_CP_SEL_16 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_3_LO,
+		GEN8_RBBM_PERFCTR2_CP_3_HI, -1, GEN8_CP_PERFCTR_CP_SEL_17 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_4_LO,
+		GEN8_RBBM_PERFCTR2_CP_4_HI, -1, GEN8_CP_PERFCTR_CP_SEL_18 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_5_LO,
+		GEN8_RBBM_PERFCTR2_CP_5_HI, -1, GEN8_CP_PERFCTR_CP_SEL_19 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_CP_6_LO,
+		GEN8_RBBM_PERFCTR2_CP_6_HI, -1, GEN8_CP_PERFCTR_CP_SEL_20 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_rbbm[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RBBM_0_LO,
+		GEN8_RBBM_PERFCTR_RBBM_0_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_0, 0,
+		{ GEN8_RBBM_SLICE_PERFCTR_RBBM_SEL_0 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RBBM_1_LO,
+		GEN8_RBBM_PERFCTR_RBBM_1_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_1, 0,
+		{ GEN8_RBBM_SLICE_PERFCTR_RBBM_SEL_0 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RBBM_2_LO,
+		GEN8_RBBM_PERFCTR_RBBM_2_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_2, 0,
+		{ GEN8_RBBM_SLICE_PERFCTR_RBBM_SEL_0 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RBBM_3_LO,
+		GEN8_RBBM_PERFCTR_RBBM_3_HI, -1, GEN8_RBBM_PERFCTR_RBBM_SEL_3, 0,
+		{ GEN8_RBBM_SLICE_PERFCTR_RBBM_SEL_0 } },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_0_LO,
+		GEN8_RBBM_PERFCTR_PC_0_HI, -1, GEN8_PC_PERFCTR_PC_SEL_0, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_0 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_1_LO,
+		GEN8_RBBM_PERFCTR_PC_1_HI, -1, GEN8_PC_PERFCTR_PC_SEL_1, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_1 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_2_LO,
+		GEN8_RBBM_PERFCTR_PC_2_HI, -1, GEN8_PC_PERFCTR_PC_SEL_2, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_2 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_3_LO,
+		GEN8_RBBM_PERFCTR_PC_3_HI, -1, GEN8_PC_PERFCTR_PC_SEL_3, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_3 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_4_LO,
+		GEN8_RBBM_PERFCTR_PC_4_HI, -1, GEN8_PC_PERFCTR_PC_SEL_4, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_4 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_5_LO,
+		GEN8_RBBM_PERFCTR_PC_5_HI, -1, GEN8_PC_PERFCTR_PC_SEL_5, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_5 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_6_LO,
+		GEN8_RBBM_PERFCTR_PC_6_HI, -1, GEN8_PC_PERFCTR_PC_SEL_6, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_6 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_PC_7_LO,
+		GEN8_RBBM_PERFCTR_PC_7_HI, -1, GEN8_PC_PERFCTR_PC_SEL_7, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_7 } },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_bv_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_0_LO,
+		GEN8_RBBM_PERFCTR_BV_PC_0_HI, -1, GEN8_PC_PERFCTR_PC_SEL_8, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_8 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_1_LO,
+		GEN8_RBBM_PERFCTR_BV_PC_1_HI, -1, GEN8_PC_PERFCTR_PC_SEL_9, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_9 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_2_LO,
+		GEN8_RBBM_PERFCTR_BV_PC_2_HI, -1, GEN8_PC_PERFCTR_PC_SEL_10, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_10 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_3_LO,
+		GEN8_RBBM_PERFCTR_BV_PC_3_HI, -1, GEN8_PC_PERFCTR_PC_SEL_11, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_11 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_4_LO,
+		GEN8_RBBM_PERFCTR_BV_PC_4_HI, -1, GEN8_PC_PERFCTR_PC_SEL_12, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_12 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_5_LO,
+		GEN8_RBBM_PERFCTR_BV_PC_5_HI, -1, GEN8_PC_PERFCTR_PC_SEL_13, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_13 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_6_LO,
+		GEN8_RBBM_PERFCTR_BV_PC_6_HI, -1, GEN8_PC_PERFCTR_PC_SEL_14, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_14 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_PC_7_LO,
+		GEN8_RBBM_PERFCTR_BV_PC_7_HI, -1, GEN8_PC_PERFCTR_PC_SEL_15, 0,
+		{ GEN8_PC_SLICE_PERFCTR_PC_SEL_15 } },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_0_LO,
+		GEN8_RBBM_PERFCTR_VFD_0_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_1_LO,
+		GEN8_RBBM_PERFCTR_VFD_1_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_2_LO,
+		GEN8_RBBM_PERFCTR_VFD_2_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_3_LO,
+		GEN8_RBBM_PERFCTR_VFD_3_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_4_LO,
+		GEN8_RBBM_PERFCTR_VFD_4_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_5_LO,
+		GEN8_RBBM_PERFCTR_VFD_5_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_6_LO,
+		GEN8_RBBM_PERFCTR_VFD_6_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VFD_7_LO,
+		GEN8_RBBM_PERFCTR_VFD_7_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_7 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_bv_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_0_LO,
+		GEN8_RBBM_PERFCTR_BV_VFD_0_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_1_LO,
+		GEN8_RBBM_PERFCTR_BV_VFD_1_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_2_LO,
+		GEN8_RBBM_PERFCTR_BV_VFD_2_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_3_LO,
+		GEN8_RBBM_PERFCTR_BV_VFD_3_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_11 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_4_LO,
+		GEN8_RBBM_PERFCTR_BV_VFD_4_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_12 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_5_LO,
+		GEN8_RBBM_PERFCTR_BV_VFD_5_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_13 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_6_LO,
+		GEN8_RBBM_PERFCTR_BV_VFD_6_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_14 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VFD_7_LO,
+		GEN8_RBBM_PERFCTR_BV_VFD_7_HI, -1, GEN8_VFD_PERFCTR_VFD_SEL_15 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_0_LO,
+		GEN8_RBBM_PERFCTR_HLSQ_0_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_0, 0,
+		{ GEN8_SP_PERFCTR_HLSQ_SEL_2_0 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_1_LO,
+		GEN8_RBBM_PERFCTR_HLSQ_1_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_1, 0,
+		{ GEN8_SP_PERFCTR_HLSQ_SEL_2_1 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_2_LO,
+		GEN8_RBBM_PERFCTR_HLSQ_2_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_2, 0,
+		{ GEN8_SP_PERFCTR_HLSQ_SEL_2_2 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_3_LO,
+		GEN8_RBBM_PERFCTR_HLSQ_3_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_3, 0,
+		{ GEN8_SP_PERFCTR_HLSQ_SEL_2_3 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_4_LO,
+		GEN8_RBBM_PERFCTR_HLSQ_4_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_4, 0,
+		{ GEN8_SP_PERFCTR_HLSQ_SEL_2_4 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_HLSQ_5_LO,
+		GEN8_RBBM_PERFCTR_HLSQ_5_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_5, 0,
+		{ GEN8_SP_PERFCTR_HLSQ_SEL_2_5 } },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_bv_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_0_LO,
+		GEN8_RBBM_PERFCTR2_HLSQ_0_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_0, 0,
+		{ GEN8_SP_PERFCTR_HLSQ_SEL_2_0 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_1_LO,
+		GEN8_RBBM_PERFCTR2_HLSQ_1_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_1, 0,
+		{ GEN8_SP_PERFCTR_HLSQ_SEL_2_1 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_2_LO,
+		GEN8_RBBM_PERFCTR2_HLSQ_2_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_2, 0,
+		{ GEN8_SP_PERFCTR_HLSQ_SEL_2_2 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_3_LO,
+		GEN8_RBBM_PERFCTR2_HLSQ_3_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_3, 0,
+		{ GEN8_SP_PERFCTR_HLSQ_SEL_2_3 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_4_LO,
+		GEN8_RBBM_PERFCTR2_HLSQ_4_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_4, 0,
+		{ GEN8_SP_PERFCTR_HLSQ_SEL_2_4 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_HLSQ_5_LO,
+		GEN8_RBBM_PERFCTR2_HLSQ_5_HI, -1, GEN8_SP_PERFCTR_HLSQ_SEL_5, 0,
+		{ GEN8_SP_PERFCTR_HLSQ_SEL_2_5 } },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_0_LO,
+		GEN8_RBBM_PERFCTR_VPC_0_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_0, 0,
+		{ GEN8_VPC_PERFCTR_VPC_SEL_1_0, GEN8_VPC_PERFCTR_VPC_SEL_2_0 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_1_LO,
+		GEN8_RBBM_PERFCTR_VPC_1_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_1, 0,
+		{ GEN8_VPC_PERFCTR_VPC_SEL_1_1, GEN8_VPC_PERFCTR_VPC_SEL_2_1 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_2_LO,
+		GEN8_RBBM_PERFCTR_VPC_2_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2, 0,
+		{ GEN8_VPC_PERFCTR_VPC_SEL_1_2, GEN8_VPC_PERFCTR_VPC_SEL_2_2 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_3_LO,
+		GEN8_RBBM_PERFCTR_VPC_3_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_3, 0,
+		{ GEN8_VPC_PERFCTR_VPC_SEL_1_3, GEN8_VPC_PERFCTR_VPC_SEL_2_3 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_4_LO,
+		GEN8_RBBM_PERFCTR_VPC_4_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_4, 0,
+		{ GEN8_VPC_PERFCTR_VPC_SEL_1_4, GEN8_VPC_PERFCTR_VPC_SEL_2_4 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VPC_5_LO,
+		GEN8_RBBM_PERFCTR_VPC_5_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_5, 0,
+		{ GEN8_VPC_PERFCTR_VPC_SEL_1_5, GEN8_VPC_PERFCTR_VPC_SEL_2_5 } },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_bv_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_0_LO,
+		GEN8_RBBM_PERFCTR_BV_VPC_0_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2_6, 0,
+		{ GEN8_VPC_PERFCTR_VPC_SEL_1_6, GEN8_VPC_PERFCTR_VPC_SEL_2_6 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_1_LO,
+		GEN8_RBBM_PERFCTR_BV_VPC_1_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2_7, 0,
+		{ GEN8_VPC_PERFCTR_VPC_SEL_1_7, GEN8_VPC_PERFCTR_VPC_SEL_2_7 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_2_LO,
+		GEN8_RBBM_PERFCTR_BV_VPC_2_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2_8, 0,
+		{ GEN8_VPC_PERFCTR_VPC_SEL_1_8, GEN8_VPC_PERFCTR_VPC_SEL_2_8 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_3_LO,
+		GEN8_RBBM_PERFCTR_BV_VPC_3_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2_9, 0,
+		{ GEN8_VPC_PERFCTR_VPC_SEL_1_9, GEN8_VPC_PERFCTR_VPC_SEL_2_9 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_4_LO,
+		GEN8_RBBM_PERFCTR_BV_VPC_4_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2_10, 0,
+		{ GEN8_VPC_PERFCTR_VPC_SEL_1_10, GEN8_VPC_PERFCTR_VPC_SEL_2_10 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_VPC_5_LO,
+		GEN8_RBBM_PERFCTR_BV_VPC_5_HI, -1, GEN8_VPC_PERFCTR_VPC_SEL_2_11, 0,
+		{ GEN8_VPC_PERFCTR_VPC_SEL_1_11, GEN8_VPC_PERFCTR_VPC_SEL_2_11 } },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_ccu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CCU_0_LO,
+		GEN8_RBBM_PERFCTR_CCU_0_HI, -1, GEN8_RB_PERFCTR_CCU_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CCU_1_LO,
+		GEN8_RBBM_PERFCTR_CCU_1_HI, -1, GEN8_RB_PERFCTR_CCU_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CCU_2_LO,
+		GEN8_RBBM_PERFCTR_CCU_2_HI, -1, GEN8_RB_PERFCTR_CCU_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CCU_3_LO,
+		GEN8_RBBM_PERFCTR_CCU_3_HI, -1, GEN8_RB_PERFCTR_CCU_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CCU_4_LO,
+		GEN8_RBBM_PERFCTR_CCU_4_HI, -1, GEN8_RB_PERFCTR_CCU_SEL_4 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TSE_0_LO,
+		GEN8_RBBM_PERFCTR_TSE_0_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_0, 0,
+		{ GEN8_GRAS_PERFCTR_TSEFE_SEL_0 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TSE_1_LO,
+		GEN8_RBBM_PERFCTR_TSE_1_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_1, 0,
+		{ GEN8_GRAS_PERFCTR_TSEFE_SEL_1 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TSE_2_LO,
+		GEN8_RBBM_PERFCTR_TSE_2_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_2, 0,
+		{ GEN8_GRAS_PERFCTR_TSEFE_SEL_2 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TSE_3_LO,
+		GEN8_RBBM_PERFCTR_TSE_3_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_3, 0,
+		{ GEN8_GRAS_PERFCTR_TSEFE_SEL_3 } },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_bv_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_TSE_0_LO,
+		GEN8_RBBM_PERFCTR_BV_TSE_0_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_0, 0,
+		{ GEN8_GRAS_PERFCTR_TSEFE_SEL_0 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_TSE_1_LO,
+		GEN8_RBBM_PERFCTR_BV_TSE_1_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_1, 0,
+		{ GEN8_GRAS_PERFCTR_TSEFE_SEL_1 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_TSE_2_LO,
+		GEN8_RBBM_PERFCTR_BV_TSE_2_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_2, 0,
+		{ GEN8_GRAS_PERFCTR_TSEFE_SEL_2 } },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_TSE_3_LO,
+		GEN8_RBBM_PERFCTR_BV_TSE_3_HI, -1, GEN8_GRAS_PERFCTR_TSE_SEL_3, 0,
+		{ GEN8_GRAS_PERFCTR_TSEFE_SEL_3 } },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RAS_0_LO,
+		GEN8_RBBM_PERFCTR_RAS_0_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RAS_1_LO,
+		GEN8_RBBM_PERFCTR_RAS_1_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RAS_2_LO,
+		GEN8_RBBM_PERFCTR_RAS_2_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RAS_3_LO,
+		GEN8_RBBM_PERFCTR_RAS_3_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_bv_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_RAS_0_LO,
+		GEN8_RBBM_PERFCTR_BV_RAS_0_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_RAS_1_LO,
+		GEN8_RBBM_PERFCTR_BV_RAS_1_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_RAS_2_LO,
+		GEN8_RBBM_PERFCTR_BV_RAS_2_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_RAS_3_LO,
+		GEN8_RBBM_PERFCTR_BV_RAS_3_HI, -1, GEN8_GRAS_PERFCTR_RAS_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_0_LO,
+		GEN8_RBBM_PERFCTR_UCHE_0_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_1_LO,
+		GEN8_RBBM_PERFCTR_UCHE_1_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_2_LO,
+		GEN8_RBBM_PERFCTR_UCHE_2_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_3_LO,
+		GEN8_RBBM_PERFCTR_UCHE_3_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_4_LO,
+		GEN8_RBBM_PERFCTR_UCHE_4_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_5_LO,
+		GEN8_RBBM_PERFCTR_UCHE_5_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_6_LO,
+		GEN8_RBBM_PERFCTR_UCHE_6_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_7_LO,
+		GEN8_RBBM_PERFCTR_UCHE_7_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_8_LO,
+		GEN8_RBBM_PERFCTR_UCHE_8_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_9_LO,
+		GEN8_RBBM_PERFCTR_UCHE_9_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_10_LO,
+		GEN8_RBBM_PERFCTR_UCHE_10_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_11_LO,
+		GEN8_RBBM_PERFCTR_UCHE_11_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_11 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_12_LO,
+		GEN8_RBBM_PERFCTR_UCHE_12_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_12 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_13_LO,
+		GEN8_RBBM_PERFCTR_UCHE_13_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_13 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_14_LO,
+		GEN8_RBBM_PERFCTR_UCHE_14_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_14 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_15_LO,
+		GEN8_RBBM_PERFCTR_UCHE_15_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_15 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_16_LO,
+		GEN8_RBBM_PERFCTR_UCHE_16_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_16 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_17_LO,
+		GEN8_RBBM_PERFCTR_UCHE_17_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_17 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_18_LO,
+		GEN8_RBBM_PERFCTR_UCHE_18_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_18 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_19_LO,
+		GEN8_RBBM_PERFCTR_UCHE_19_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_19 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_20_LO,
+		GEN8_RBBM_PERFCTR_UCHE_20_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_20 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_21_LO,
+		GEN8_RBBM_PERFCTR_UCHE_21_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_21 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_22_LO,
+		GEN8_RBBM_PERFCTR_UCHE_22_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_22 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UCHE_23_LO,
+		GEN8_RBBM_PERFCTR_UCHE_23_HI, -1, GEN8_UCHE_PERFCTR_UCHE_SEL_23 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_0_LO,
+		GEN8_RBBM_PERFCTR_TP_0_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_1_LO,
+		GEN8_RBBM_PERFCTR_TP_1_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_2_LO,
+		GEN8_RBBM_PERFCTR_TP_2_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_3_LO,
+		GEN8_RBBM_PERFCTR_TP_3_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_4_LO,
+		GEN8_RBBM_PERFCTR_TP_4_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_5_LO,
+		GEN8_RBBM_PERFCTR_TP_5_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_6_LO,
+		GEN8_RBBM_PERFCTR_TP_6_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_7_LO,
+		GEN8_RBBM_PERFCTR_TP_7_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_8_LO,
+		GEN8_RBBM_PERFCTR_TP_8_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_9_LO,
+		GEN8_RBBM_PERFCTR_TP_9_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_10_LO,
+		GEN8_RBBM_PERFCTR_TP_10_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_TP_11_LO,
+		GEN8_RBBM_PERFCTR_TP_11_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_11 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_bv_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_0_LO,
+		GEN8_RBBM_PERFCTR2_TP_0_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_12 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_1_LO,
+		GEN8_RBBM_PERFCTR2_TP_1_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_13 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_2_LO,
+		GEN8_RBBM_PERFCTR2_TP_2_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_14 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_3_LO,
+		GEN8_RBBM_PERFCTR2_TP_3_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_15 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_4_LO,
+		GEN8_RBBM_PERFCTR2_TP_4_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_16 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_5_LO,
+		GEN8_RBBM_PERFCTR2_TP_5_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_17 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_6_LO,
+		GEN8_RBBM_PERFCTR2_TP_6_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_18 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_TP_7_LO,
+		GEN8_RBBM_PERFCTR2_TP_7_HI, -1, GEN8_TPL1_PERFCTR_TP_SEL_19 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_0_LO,
+		GEN8_RBBM_PERFCTR_SP_0_HI, -1, GEN8_SP_PERFCTR_SP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_1_LO,
+		GEN8_RBBM_PERFCTR_SP_1_HI, -1, GEN8_SP_PERFCTR_SP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_2_LO,
+		GEN8_RBBM_PERFCTR_SP_2_HI, -1, GEN8_SP_PERFCTR_SP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_3_LO,
+		GEN8_RBBM_PERFCTR_SP_3_HI, -1, GEN8_SP_PERFCTR_SP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_4_LO,
+		GEN8_RBBM_PERFCTR_SP_4_HI, -1, GEN8_SP_PERFCTR_SP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_5_LO,
+		GEN8_RBBM_PERFCTR_SP_5_HI, -1, GEN8_SP_PERFCTR_SP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_6_LO,
+		GEN8_RBBM_PERFCTR_SP_6_HI, -1, GEN8_SP_PERFCTR_SP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_7_LO,
+		GEN8_RBBM_PERFCTR_SP_7_HI, -1, GEN8_SP_PERFCTR_SP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_8_LO,
+		GEN8_RBBM_PERFCTR_SP_8_HI, -1, GEN8_SP_PERFCTR_SP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_9_LO,
+		GEN8_RBBM_PERFCTR_SP_9_HI, -1, GEN8_SP_PERFCTR_SP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_10_LO,
+		GEN8_RBBM_PERFCTR_SP_10_HI, -1, GEN8_SP_PERFCTR_SP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_11_LO,
+		GEN8_RBBM_PERFCTR_SP_11_HI, -1, GEN8_SP_PERFCTR_SP_SEL_11 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_12_LO,
+		GEN8_RBBM_PERFCTR_SP_12_HI, -1, GEN8_SP_PERFCTR_SP_SEL_12 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_13_LO,
+		GEN8_RBBM_PERFCTR_SP_13_HI, -1, GEN8_SP_PERFCTR_SP_SEL_13 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_14_LO,
+		GEN8_RBBM_PERFCTR_SP_14_HI, -1, GEN8_SP_PERFCTR_SP_SEL_14 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_15_LO,
+		GEN8_RBBM_PERFCTR_SP_15_HI, -1, GEN8_SP_PERFCTR_SP_SEL_15 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_16_LO,
+		GEN8_RBBM_PERFCTR_SP_16_HI, -1, GEN8_SP_PERFCTR_SP_SEL_16 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_17_LO,
+		GEN8_RBBM_PERFCTR_SP_17_HI, -1, GEN8_SP_PERFCTR_SP_SEL_17 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_18_LO,
+		GEN8_RBBM_PERFCTR_SP_18_HI, -1, GEN8_SP_PERFCTR_SP_SEL_18 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_19_LO,
+		GEN8_RBBM_PERFCTR_SP_19_HI, -1, GEN8_SP_PERFCTR_SP_SEL_19 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_20_LO,
+		GEN8_RBBM_PERFCTR_SP_20_HI, -1, GEN8_SP_PERFCTR_SP_SEL_20 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_21_LO,
+		GEN8_RBBM_PERFCTR_SP_21_HI, -1, GEN8_SP_PERFCTR_SP_SEL_21 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_22_LO,
+		GEN8_RBBM_PERFCTR_SP_22_HI, -1, GEN8_SP_PERFCTR_SP_SEL_22 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_SP_23_LO,
+		GEN8_RBBM_PERFCTR_SP_23_HI, -1, GEN8_SP_PERFCTR_SP_SEL_23 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_bv_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_0_LO,
+		GEN8_RBBM_PERFCTR2_SP_0_HI, -1, GEN8_SP_PERFCTR_SP_SEL_24 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_1_LO,
+		GEN8_RBBM_PERFCTR2_SP_1_HI, -1, GEN8_SP_PERFCTR_SP_SEL_25 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_2_LO,
+		GEN8_RBBM_PERFCTR2_SP_2_HI, -1, GEN8_SP_PERFCTR_SP_SEL_26 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_3_LO,
+		GEN8_RBBM_PERFCTR2_SP_3_HI, -1, GEN8_SP_PERFCTR_SP_SEL_27 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_4_LO,
+		GEN8_RBBM_PERFCTR2_SP_4_HI, -1, GEN8_SP_PERFCTR_SP_SEL_28 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_5_LO,
+		GEN8_RBBM_PERFCTR2_SP_5_HI, -1, GEN8_SP_PERFCTR_SP_SEL_29 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_6_LO,
+		GEN8_RBBM_PERFCTR2_SP_6_HI, -1, GEN8_SP_PERFCTR_SP_SEL_30 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_7_LO,
+		GEN8_RBBM_PERFCTR2_SP_7_HI, -1, GEN8_SP_PERFCTR_SP_SEL_31 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_8_LO,
+		GEN8_RBBM_PERFCTR2_SP_8_HI, -1, GEN8_SP_PERFCTR_SP_SEL_32 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_9_LO,
+		GEN8_RBBM_PERFCTR2_SP_9_HI, -1, GEN8_SP_PERFCTR_SP_SEL_33 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_10_LO,
+		GEN8_RBBM_PERFCTR2_SP_10_HI, -1, GEN8_SP_PERFCTR_SP_SEL_34 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_SP_11_LO,
+		GEN8_RBBM_PERFCTR2_SP_11_HI, -1, GEN8_SP_PERFCTR_SP_SEL_35 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_0_LO,
+		GEN8_RBBM_PERFCTR_RB_0_HI, -1, GEN8_RB_PERFCTR_RB_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_1_LO,
+		GEN8_RBBM_PERFCTR_RB_1_HI, -1, GEN8_RB_PERFCTR_RB_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_2_LO,
+		GEN8_RBBM_PERFCTR_RB_2_HI, -1, GEN8_RB_PERFCTR_RB_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_3_LO,
+		GEN8_RBBM_PERFCTR_RB_3_HI, -1, GEN8_RB_PERFCTR_RB_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_4_LO,
+		GEN8_RBBM_PERFCTR_RB_4_HI, -1, GEN8_RB_PERFCTR_RB_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_5_LO,
+		GEN8_RBBM_PERFCTR_RB_5_HI, -1, GEN8_RB_PERFCTR_RB_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_6_LO,
+		GEN8_RBBM_PERFCTR_RB_6_HI, -1, GEN8_RB_PERFCTR_RB_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_RB_7_LO,
+		GEN8_RBBM_PERFCTR_RB_7_HI, -1, GEN8_RB_PERFCTR_RB_SEL_7 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_vsc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VSC_0_LO,
+		GEN8_RBBM_PERFCTR_VSC_0_HI, -1, GEN8_VSC_PERFCTR_VSC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_VSC_1_LO,
+		GEN8_RBBM_PERFCTR_VSC_1_HI, -1, GEN8_VSC_PERFCTR_VSC_SEL_1 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_lrz[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_LRZ_0_LO,
+		GEN8_RBBM_PERFCTR_LRZ_0_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_LRZ_1_LO,
+		GEN8_RBBM_PERFCTR_LRZ_1_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_LRZ_2_LO,
+		GEN8_RBBM_PERFCTR_LRZ_2_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_LRZ_3_LO,
+		GEN8_RBBM_PERFCTR_LRZ_3_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_bv_lrz[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_LRZ_0_LO,
+		GEN8_RBBM_PERFCTR_BV_LRZ_0_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_LRZ_1_LO,
+		GEN8_RBBM_PERFCTR_BV_LRZ_1_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_LRZ_2_LO,
+		GEN8_RBBM_PERFCTR_BV_LRZ_2_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_BV_LRZ_3_LO,
+		GEN8_RBBM_PERFCTR_BV_LRZ_3_HI, -1, GEN8_GRAS_PERFCTR_LRZ_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_cmp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CMP_0_LO,
+		GEN8_RBBM_PERFCTR_CMP_0_HI, -1, GEN8_RB_PERFCTR_CMP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CMP_1_LO,
+		GEN8_RBBM_PERFCTR_CMP_1_HI, -1, GEN8_RB_PERFCTR_CMP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CMP_2_LO,
+		GEN8_RBBM_PERFCTR_CMP_2_HI, -1, GEN8_RB_PERFCTR_CMP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_CMP_3_LO,
+		GEN8_RBBM_PERFCTR_CMP_3_HI, -1, GEN8_RB_PERFCTR_CMP_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_ufc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UFC_0_LO,
+		GEN8_RBBM_PERFCTR_UFC_0_HI, -1, GEN8_RB_PERFCTR_UFC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UFC_1_LO,
+		GEN8_RBBM_PERFCTR_UFC_1_HI, -1, GEN8_RB_PERFCTR_UFC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UFC_2_LO,
+		GEN8_RBBM_PERFCTR_UFC_2_HI, -1, GEN8_RB_PERFCTR_UFC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR_UFC_3_LO,
+		GEN8_RBBM_PERFCTR_UFC_3_HI, -1, GEN8_RB_PERFCTR_UFC_SEL_3 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_bv_ufc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_UFC_0_LO,
+		GEN8_RBBM_PERFCTR2_UFC_0_HI, -1, GEN8_RB_PERFCTR_UFC_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_RBBM_PERFCTR2_UFC_1_LO,
+		GEN8_RBBM_PERFCTR2_UFC_1_HI, -1, GEN8_RB_PERFCTR_UFC_SEL_5 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_gbif[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_0,
+		GEN8_GBIF_PERF_CNT_HI_0, -1, GEN8_GBIF_PERF_CNT_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_1,
+		GEN8_GBIF_PERF_CNT_HI_1, -1, GEN8_GBIF_PERF_CNT_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_2,
+		GEN8_GBIF_PERF_CNT_HI_2, -1, GEN8_GBIF_PERF_CNT_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_3,
+		GEN8_GBIF_PERF_CNT_HI_3, -1, GEN8_GBIF_PERF_CNT_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_4,
+		GEN8_GBIF_PERF_CNT_HI_4, -1, GEN8_GBIF_PERF_CNT_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_5,
+		GEN8_GBIF_PERF_CNT_HI_5, -1, GEN8_GBIF_PERF_CNT_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_6,
+		GEN8_GBIF_PERF_CNT_HI_6, -1, GEN8_GBIF_PERF_CNT_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PERF_CNT_LO_7,
+		GEN8_GBIF_PERF_CNT_HI_7, -1, GEN8_GBIF_PERF_CNT_SEL_1 },
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_gbif_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PWR_CNT_LO_0,
+		GEN8_GBIF_PWR_CNT_HI_0, -1, GEN8_GBIF_PWR_CNT_SEL },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PWR_CNT_LO_1,
+		GEN8_GBIF_PWR_CNT_HI_1, -1, GEN8_GBIF_PWR_CNT_SEL },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_GBIF_PWR_CNT_LO_2,
+		GEN8_GBIF_PWR_CNT_HI_2, -1, GEN8_GBIF_PWR_CNT_SEL },
+};
+
+#define GMU_COUNTER(lo, hi, sel) \
+	{ .countable = KGSL_PERFCOUNTER_NOT_USED, \
+	  .offset = lo, .offset_hi = hi, .select = sel }
+
+#define GMU_COUNTER_RESERVED(lo, hi, sel) \
+	{ .countable = KGSL_PERFCOUNTER_BROKEN, \
+	  .offset = lo, .offset_hi = hi, .select = sel }
+
+static struct adreno_perfcount_register gen8_perfcounters_gmu_xoclk[] = {
+	/*
+	 * COUNTER_XOCLK_0 and COUNTER_XOCLK_4 are used for the GPU
+	 * busy and ifpc count. Mark them as reserved to ensure they
+	 * are not re-used.
+	 */
+	GMU_COUNTER_RESERVED(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_0,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_0,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_1,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_1,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_2,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_2,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_3,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_3,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0),
+	GMU_COUNTER_RESERVED(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_4,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_4,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_5,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_5,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_6,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_6,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_7,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_7,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_8,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_8,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_2),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_9,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_9,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_2),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_10,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_10,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_2),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_11,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_11,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_2),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_12,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_12,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_3),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_13,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_13,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_3),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_14,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_14,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_3),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_15,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_15,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_3),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_16,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_16,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_4),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_17,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_17,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_4),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_18,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_18,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_4),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_19,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_19,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_4),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_20,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_20,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_5),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_21,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_21,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_5),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_22,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_22,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_5),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_23,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_23,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_5),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_24,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_24,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_6),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_25,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_25,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_6),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_26,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_26,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_6),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_27,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_27,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_6),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_28,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_28,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_7),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_29,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_29,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_7),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_30,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_30,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_7),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_31,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_31,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_7),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_32,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_32,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_8),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_33,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_33,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_8),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_34,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_34,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_8),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_35,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_35,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_8),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_36,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_36,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_9),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_37,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_37,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_9),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_38,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_38,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_9),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_XOCLK_L_39,
+		GEN8_GMUCX_POWER_COUNTER_XOCLK_H_39,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_9),
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_gmu_gmuclk[] = {
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_0,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_0,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_0),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_1,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_1,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_0),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_2,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_2,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_0),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_3,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_3,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_0),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_4,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_4,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_1),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_5,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_5,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_1),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_6,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_6,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_1),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_7,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_7,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_1),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_8,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_8,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_2),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_9,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_9,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_2),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_10,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_10,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_2),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_11,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_11,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_2),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_12,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_12,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_3),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_13,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_13,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_3),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_14,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_14,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_3),
+	GMU_COUNTER(GEN8_GMUCX_POWER_COUNTER_GMUCLK_L_15,
+		GEN8_GMUCX_POWER_COUNTER_GMUCLK_H_15,
+		GEN8_GMUCX_POWER_COUNTER_SELECT_GMUCLK_3),
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_gmu_perf[] = {
+	GMU_COUNTER(GEN8_GMUCX_PERF_COUNTER_L_0,
+		GEN8_GMUCX_PERF_COUNTER_H_0,
+		GEN8_GMUCX_PERF_COUNTER_SELECT_0),
+	GMU_COUNTER(GEN8_GMUCX_PERF_COUNTER_L_1,
+		GEN8_GMUCX_PERF_COUNTER_H_1,
+		GEN8_GMUCX_PERF_COUNTER_SELECT_0),
+	GMU_COUNTER(GEN8_GMUCX_PERF_COUNTER_L_2,
+		GEN8_GMUCX_PERF_COUNTER_H_2,
+		GEN8_GMUCX_PERF_COUNTER_SELECT_0),
+	GMU_COUNTER(GEN8_GMUCX_PERF_COUNTER_L_3,
+		GEN8_GMUCX_PERF_COUNTER_H_3,
+		GEN8_GMUCX_PERF_COUNTER_SELECT_0),
+	GMU_COUNTER(GEN8_GMUCX_PERF_COUNTER_L_4,
+		GEN8_GMUCX_PERF_COUNTER_H_4,
+		GEN8_GMUCX_PERF_COUNTER_SELECT_1),
+	GMU_COUNTER(GEN8_GMUCX_PERF_COUNTER_L_5,
+		GEN8_GMUCX_PERF_COUNTER_H_5,
+		GEN8_GMUCX_PERF_COUNTER_SELECT_1),
+};
+
+static struct adreno_perfcount_register gen8_perfcounters_alwayson[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN8_CP_ALWAYS_ON_COUNTER_LO,
+		GEN8_CP_ALWAYS_ON_COUNTER_HI, -1 },
+};
+
+/*
+ * ADRENO_PERFCOUNTER_GROUP_RESTORE flag is enabled by default
+ * because most of the perfcounter groups need to be restored
+ * as part of preemption and IFPC. Perfcounter groups that are
+ * not restored as part of preemption and IFPC should be defined
+ * using GEN8_PERFCOUNTER_GROUP_FLAGS macro
+ */
+
+#define GEN8_PERFCOUNTER_GROUP_FLAGS(core, offset, name, flags, \
+		enable, read) \
+	[KGSL_PERFCOUNTER_GROUP_##offset] = { core##_perfcounters_##name, \
+	ARRAY_SIZE(core##_perfcounters_##name), __stringify(name), flags, \
+	enable, read }
+
+#define GEN8_PERFCOUNTER_GROUP(offset, name, enable, read) \
+	GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, offset, name, \
+	ADRENO_PERFCOUNTER_GROUP_RESTORE, enable, read)
+
+#define GEN8_REGULAR_PERFCOUNTER_GROUP(offset, name) \
+	GEN8_PERFCOUNTER_GROUP(offset, name, \
+		gen8_counter_enable, gen8_counter_read)
+
+#define GEN8_BV_PERFCOUNTER_GROUP(offset, name, enable, read) \
+	GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, BV_##offset, bv_##name, \
+	ADRENO_PERFCOUNTER_GROUP_RESTORE, enable, read)
+
+#define GEN8_BV_REGULAR_PERFCOUNTER_GROUP(offset, name) \
+	GEN8_BV_PERFCOUNTER_GROUP(offset, name, \
+		gen8_counter_enable, gen8_counter_read)
+
+static const struct adreno_perfcount_group gen8_perfcounter_groups
+				[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	GEN8_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, RBBM, rbbm, 0,
+		gen8_counter_enable, gen8_counter_read),
+	GEN8_PERFCOUNTER_GROUP(PC, pc, gen8_counter_br_enable, gen8_counter_read),
+	GEN8_PERFCOUNTER_GROUP(VFD, vfd, gen8_counter_br_enable, gen8_counter_read),
+	GEN8_PERFCOUNTER_GROUP(HLSQ, hlsq, gen8_counter_br_enable, gen8_counter_read),
+	GEN8_PERFCOUNTER_GROUP(VPC, vpc, gen8_counter_br_enable, gen8_counter_read),
+	GEN8_PERFCOUNTER_GROUP(CCU, ccu, gen8_counter_br_enable, gen8_counter_read),
+	GEN8_PERFCOUNTER_GROUP(CMP, cmp, gen8_counter_br_enable, gen8_counter_read),
+	GEN8_PERFCOUNTER_GROUP(TSE, tse, gen8_counter_br_enable, gen8_counter_read),
+	GEN8_PERFCOUNTER_GROUP(RAS, ras, gen8_counter_br_enable, gen8_counter_read),
+	GEN8_PERFCOUNTER_GROUP(LRZ, lrz, gen8_counter_br_enable, gen8_counter_read),
+	GEN8_REGULAR_PERFCOUNTER_GROUP(UCHE, uche),
+	GEN8_REGULAR_PERFCOUNTER_GROUP(TP, tp),
+	GEN8_REGULAR_PERFCOUNTER_GROUP(SP, sp),
+	GEN8_PERFCOUNTER_GROUP(RB, rb, gen8_counter_br_enable, gen8_counter_read),
+	GEN8_REGULAR_PERFCOUNTER_GROUP(VSC, vsc),
+	GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, VBIF, gbif, 0,
+		gen8_counter_gbif_enable, gen8_counter_read_norestore),
+	GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, VBIF_PWR, gbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		gen8_counter_gbif_pwr_enable, gen8_counter_read_norestore),
+	GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		gen8_counter_alwayson_enable, gen8_counter_alwayson_read),
+	GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, GMU_XOCLK, gmu_xoclk, 0,
+		gen8_counter_gmu_pwr_enable, gen8_counter_read_norestore),
+	GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, GMU_GMUCLK, gmu_gmuclk, 0,
+		gen8_counter_gmu_pwr_enable, gen8_counter_read_norestore),
+	GEN8_PERFCOUNTER_GROUP_FLAGS(gen8, GMU_PERF, gmu_perf, 0,
+		gen8_counter_gmu_perf_enable, gen8_counter_read_norestore),
+	GEN8_REGULAR_PERFCOUNTER_GROUP(UFC, ufc),
+	GEN8_BV_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	GEN8_BV_PERFCOUNTER_GROUP(PC, pc, gen8_counter_bv_enable, gen8_counter_read),
+	GEN8_BV_PERFCOUNTER_GROUP(VFD, vfd, gen8_counter_bv_enable, gen8_counter_read),
+	GEN8_BV_PERFCOUNTER_GROUP(VPC, vpc, gen8_counter_bv_enable, gen8_counter_read),
+	GEN8_BV_REGULAR_PERFCOUNTER_GROUP(TP, tp),
+	GEN8_BV_REGULAR_PERFCOUNTER_GROUP(SP, sp),
+	GEN8_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc),
+	GEN8_BV_PERFCOUNTER_GROUP(TSE, tse, gen8_counter_bv_enable, gen8_counter_read),
+	GEN8_BV_PERFCOUNTER_GROUP(RAS, ras, gen8_counter_bv_enable, gen8_counter_read),
+	GEN8_BV_PERFCOUNTER_GROUP(LRZ, lrz, gen8_counter_bv_enable, gen8_counter_read),
+	GEN8_BV_PERFCOUNTER_GROUP(HLSQ, hlsq, gen8_counter_bv_enable, gen8_counter_read),
+};
+
+const struct adreno_perfcounters adreno_gen8_perfcounters = {
+	gen8_perfcounter_groups,
+	ARRAY_SIZE(gen8_perfcounter_groups),
+};
+

+ 807 - 0
qcom/opensource/graphics-kernel/adreno_gen8_preempt.c

@@ -0,0 +1,807 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+
+#define PREEMPT_RECORD(_field) \
+		offsetof(struct gen8_cp_preemption_record, _field)
+
+#define PREEMPT_SMMU_RECORD(_field) \
+		offsetof(struct gen8_cp_smmu_info, _field)
+
+static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer,
+	bool atomic)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+
+	if (!atomic) {
+		/*
+		 * We might have skipped updating the wptr in case we are in
+		 * dispatcher context. Do it now.
+		 */
+		if (rb->skip_inline_wptr) {
+
+			ret = gen8_fenced_write(adreno_dev,
+				GEN8_CP_RB_WPTR_GC, rb->wptr,
+				FENCE_STATUS_WRITEDROPPED0_MASK);
+
+			reset_timer = true;
+			rb->skip_inline_wptr = false;
+		}
+	} else {
+		u32 wptr;
+
+		kgsl_regread(device, GEN8_CP_RB_WPTR_GC, &wptr);
+		if (wptr != rb->wptr) {
+			kgsl_regwrite(device, GEN8_CP_RB_WPTR_GC, rb->wptr);
+			reset_timer = true;
+		}
+	}
+
+	if (reset_timer)
+		rb->dispatch_q.expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	if (!atomic) {
+		/* If WPTR update fails, set the fault and trigger recovery */
+		if (ret) {
+			gmu_core_fault_snapshot(device);
+			adreno_dispatcher_fault(adreno_dev,
+				ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+		}
+	}
+}
+
+static void _power_collapse_set(struct adreno_device *adreno_dev, bool val)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	gmu_core_regwrite(device,
+			GEN8_GMUCX_PWR_COL_PREEMPTION_KEEPALIVE, (val ? 1 : 0));
+}
+
+static void _gen8_preemption_done(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status;
+
+	/*
+	 * In the very unlikely case that the power is off, do nothing - the
+	 * state will be reset on power up and everybody will be happy
+	 */
+
+	if (!kgsl_state_is_awake(device))
+		return;
+
+	kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status & 0x1) {
+		dev_err(device->dev,
+			"Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n",
+			status, adreno_dev->cur_rb->id,
+			adreno_get_rptr(adreno_dev->cur_rb),
+			adreno_dev->cur_rb->wptr,
+			adreno_dev->next_rb->id,
+			adreno_get_rptr(adreno_dev->next_rb),
+			adreno_dev->next_rb->wptr);
+
+		/* Set a fault and restart */
+		adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+
+		return;
+	}
+
+	adreno_dev->preempt.count++;
+
+	del_timer_sync(&adreno_dev->preempt.timer);
+
+	kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		status, 0);
+
+	/* Clean up all the bits */
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr for the new command queue */
+	_update_wptr(adreno_dev, true, false);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	/* Clear the preempt state */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+}
+
+static void _gen8_preemption_fault(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status;
+
+	/*
+	 * If the power is on check the preemption status one more time - if it
+	 * was successful then just transition to the complete state
+	 */
+	if (kgsl_state_is_awake(device)) {
+		kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, &status);
+
+		if (!(status & 0x1)) {
+			adreno_set_preempt_state(adreno_dev,
+				ADRENO_PREEMPT_COMPLETE);
+
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+	}
+
+	dev_err(device->dev,
+		"Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n",
+		adreno_dev->cur_rb->id,
+		adreno_get_rptr(adreno_dev->cur_rb),
+		adreno_dev->cur_rb->wptr,
+		adreno_dev->next_rb->id,
+		adreno_get_rptr(adreno_dev->next_rb),
+		adreno_dev->next_rb->wptr);
+
+	adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+}
+
+static void _gen8_preemption_worker(struct work_struct *work)
+{
+	struct adreno_preemption *preempt = container_of(work,
+		struct adreno_preemption, work);
+	struct adreno_device *adreno_dev = container_of(preempt,
+		struct adreno_device, preempt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Need to take the mutex to make sure that the power stays on */
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED))
+		_gen8_preemption_fault(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+/* Find the highest priority active ringbuffer */
+static struct adreno_ringbuffer *gen8_next_ringbuffer(
+		struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb;
+	unsigned long flags;
+	u32 i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		bool empty;
+
+		spin_lock_irqsave(&rb->preempt_lock, flags);
+		empty = adreno_rb_empty(rb);
+		spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+		if (!empty)
+			return rb;
+	}
+
+	return NULL;
+}
+
+void gen8_preemption_trigger(struct adreno_device *adreno_dev, bool atomic)
+{
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *next;
+	u64 ttbr0, gpuaddr;
+	u32 contextidr, cntl;
+	unsigned long flags;
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+
+	/* Put ourselves into a possible trigger state */
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START))
+		return;
+
+	/* Get the next ringbuffer to preempt in */
+	next = gen8_next_ringbuffer(adreno_dev);
+
+	/*
+	 * Nothing to do if every ringbuffer is empty or if the current
+	 * ringbuffer is the only active one
+	 */
+	if (next == NULL || next == adreno_dev->cur_rb) {
+		/*
+		 * Update any critical things that might have been skipped while
+		 * we were looking for a new ringbuffer
+		 */
+
+		if (next != NULL) {
+			_update_wptr(adreno_dev, false, atomic);
+
+			mod_timer(&adreno_dev->dispatcher.timer,
+				adreno_dev->cur_rb->dispatch_q.expires);
+		}
+
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+		return;
+	}
+
+	/* Turn off the dispatcher timer */
+	del_timer(&adreno_dev->dispatcher.timer);
+
+	/*
+	 * This is the most critical section - we need to take care not to race
+	 * until we have programmed the CP for the switch
+	 */
+
+	spin_lock_irqsave(&next->preempt_lock, flags);
+
+	/* Get the pagetable from the pagetable info. */
+	kgsl_sharedmem_readq(device->scratch, &ttbr0,
+		SCRATCH_RB_OFFSET(next->id, ttbr0));
+	kgsl_sharedmem_readl(device->scratch, &contextidr,
+		SCRATCH_RB_OFFSET(next->id, contextidr));
+
+	kgsl_sharedmem_writel(next->preemption_desc,
+		PREEMPT_RECORD(wptr), next->wptr);
+
+	spin_unlock_irqrestore(&next->preempt_lock, flags);
+
+	/* And write it to the smmu info */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), ttbr0);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), contextidr);
+	}
+
+	kgsl_sharedmem_readq(preempt->scratch, &gpuaddr,
+		next->id * sizeof(u64));
+
+	/*
+	 * Set a keepalive bit before the first preemption register write.
+	 * This is required since while each individual write to the context
+	 * switch registers will wake the GPU from collapse, it will not in
+	 * itself cause GPU activity. Thus, the GPU could technically be
+	 * re-collapsed between subsequent register writes leading to a
+	 * prolonged preemption sequence. The keepalive bit prevents any
+	 * further power collapse while it is set.
+	 * It is more efficient to use a keepalive+wake-on-fence approach here
+	 * rather than an OOB. Both keepalive and the fence are effectively
+	 * free when the GPU is already powered on, whereas an OOB requires an
+	 * unconditional handshake with the GMU.
+	 */
+	_power_collapse_set(adreno_dev, true);
+
+	/*
+	 * Fenced writes on this path will make sure the GPU is woken up
+	 * in case it was power collapsed by the GMU.
+	 */
+	if (gen8_fenced_write(adreno_dev,
+		GEN8_CP_CONTEXT_SWITCH_PNSR_ADDR_LO,
+		lower_32_bits(next->preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	/*
+	 * Above fence writes will make sure GMU comes out of
+	 * IFPC state if its was in IFPC state but it doesn't
+	 * guarantee that GMU FW actually moved to ACTIVE state
+	 * i.e. wake-up from IFPC is complete.
+	 * Wait for GMU to move to ACTIVE state before triggering
+	 * preemption. This is require to make sure CP doesn't
+	 * interrupt GMU during wake-up from IFPC.
+	 */
+	if (!atomic && gmu_core_dev_wait_for_active_transition(device))
+		goto err;
+
+	if (gen8_fenced_write(adreno_dev,
+		GEN8_CP_CONTEXT_SWITCH_PNSR_ADDR_HI,
+		upper_32_bits(next->preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen8_fenced_write(adreno_dev,
+		GEN8_CP_CONTEXT_SWITCH_PSR_ADDR_LO,
+		lower_32_bits(next->secure_preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen8_fenced_write(adreno_dev,
+		GEN8_CP_CONTEXT_SWITCH_PSR_ADDR_HI,
+		upper_32_bits(next->secure_preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen8_fenced_write(adreno_dev,
+		GEN8_CP_CONTEXT_SWITCH_NPR_ADDR_LO,
+		lower_32_bits(gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen8_fenced_write(adreno_dev,
+		GEN8_CP_CONTEXT_SWITCH_NPR_ADDR_HI,
+		upper_32_bits(gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	adreno_dev->next_rb = next;
+
+	/* Start the timer to detect a stuck preemption */
+	mod_timer(&adreno_dev->preempt.timer,
+		jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT));
+
+	cntl = (preempt->preempt_level << 6) | 0x01;
+
+	/* Skip save/restore during L1 preemption */
+	if (preempt->skipsaverestore)
+		cntl |= (1 << 9);
+
+	/* Enable GMEM save/restore across preemption */
+	if (preempt->usesgmem)
+		cntl |= (1 << 8);
+
+	trace_adreno_preempt_trigger(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		cntl, 0);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED);
+
+	if (gen8_core->qos_value)
+		kgsl_sharedmem_writel(preempt->scratch,
+			PREEMPT_SCRATCH_OFFSET(QOS_VALUE_IDX),
+			gen8_core->qos_value[next->id]);
+
+	/* Trigger the preemption */
+	if (gen8_fenced_write(adreno_dev, GEN8_CP_CONTEXT_SWITCH_CNTL, cntl,
+					FENCE_STATUS_WRITEDROPPED1_MASK)) {
+		adreno_dev->next_rb = NULL;
+		del_timer(&adreno_dev->preempt.timer);
+		goto err;
+	}
+
+	return;
+err:
+	/* If fenced write fails, take inline snapshot and trigger recovery */
+	if (!in_interrupt()) {
+		gmu_core_fault_snapshot(device);
+		adreno_dispatcher_fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	} else {
+		adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT);
+	}
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+	/* Clear the keep alive */
+	_power_collapse_set(adreno_dev, false);
+
+}
+
+void gen8_preemption_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status;
+
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING))
+		return;
+
+	kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status & 0x1) {
+		dev_err(KGSL_DEVICE(adreno_dev)->dev,
+			"preempt interrupt with non-zero status: %X\n",
+			status);
+
+		/*
+		 * Under the assumption that this is a race between the
+		 * interrupt and the register, schedule the worker to clean up.
+		 * If the status still hasn't resolved itself by the time we get
+		 * there then we have to assume something bad happened
+		 */
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE);
+		adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+		return;
+	}
+
+	adreno_dev->preempt.count++;
+
+	/*
+	 * We can now safely clear the preemption keepalive bit, allowing
+	 * power collapse to resume its regular activity.
+	 */
+	_power_collapse_set(adreno_dev, false);
+
+	del_timer(&adreno_dev->preempt.timer);
+
+	kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		status, 0);
+
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr if it changed while preemption was ongoing */
+	_update_wptr(adreno_dev, true, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	gen8_preemption_trigger(adreno_dev, true);
+}
+
+void gen8_preemption_prepare_postamble(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	u32 *postamble, count = 0;
+
+	/*
+	 * First 28 dwords of the device scratch buffer are used to store shadow rb data.
+	 * Reserve 15 dwords in the device scratch buffer from SCRATCH_POSTAMBLE_OFFSET for
+	 * KMD postamble pm4 packets. This should be in *device->scratch* so that userspace
+	 * cannot access it.
+	 */
+	postamble = device->scratch->hostptr + SCRATCH_POSTAMBLE_OFFSET;
+
+	/*
+	 * Reserve 4 dwords in the scratch buffer for dynamic QOS control feature. To ensure QOS
+	 * value is updated for first preemption, send it during bootup
+	 */
+	if (gen8_core->qos_value) {
+		postamble[count++] = cp_type7_packet(CP_MEM_TO_REG, 3);
+		postamble[count++] = GEN8_RBBM_GBIF_CLIENT_QOS_CNTL;
+		postamble[count++] = lower_32_bits(PREEMPT_SCRATCH_ADDR(adreno_dev, QOS_VALUE_IDX));
+		postamble[count++] = upper_32_bits(PREEMPT_SCRATCH_ADDR(adreno_dev, QOS_VALUE_IDX));
+	}
+
+	/*
+	 * Since postambles are not preserved across slumber, necessary packets
+	 * must be sent to GPU before first submission.
+	 *
+	 * If a packet needs to be sent before first submission, add it above this.
+	 */
+	preempt->postamble_bootup_len = count;
+
+	/* Reserve 15 dwords in the device scratch buffer to clear perfcounters */
+	if (!adreno_dev->perfcounter) {
+		postamble[count++] = cp_type7_packet(CP_REG_RMW, 3);
+		postamble[count++] = GEN8_RBBM_PERFCTR_SRAM_INIT_CMD;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+		postamble[count++] = cp_type7_packet(CP_REG_RMW, 3);
+		postamble[count++] = GEN8_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+
+
+		postamble[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
+		postamble[count++] = 0x3;
+		postamble[count++] = GEN8_RBBM_PERFCTR_SRAM_INIT_STATUS;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+		postamble[count++] = 0x1;
+		postamble[count++] = 0x0;
+	}
+
+	preempt->postamble_len = count;
+}
+
+void gen8_preemption_schedule(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE))
+		_gen8_preemption_done(adreno_dev);
+
+	gen8_preemption_trigger(adreno_dev, false);
+
+	mutex_unlock(&device->mutex);
+}
+
+u32 gen8_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds)
+{
+	u32 *cmds_orig = cmds;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (test_and_set_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags))
+		goto done;
+
+	*cmds++ = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	*cmds++ = CP_SET_THREAD_BR;
+
+	*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12);
+
+	/* NULL SMMU_INFO buffer - we track in KMD */
+	*cmds++ = SET_PSEUDO_SMMU_INFO;
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0x0);
+
+	*cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr);
+
+	*cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->secure_preemption_desc->gpuaddr);
+
+	/*
+	 * There is no need to specify this address when we are about to
+	 * trigger preemption. This is because CP internally stores this
+	 * address specified here in the CP_SET_PSEUDO_REGISTER payload to
+	 * the context record and thus knows from where to restore
+	 * the saved perfcounters for the new ringbuffer.
+	 */
+	*cmds++ = SET_PSEUDO_COUNTER;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->perfcounter_save_restore_desc->gpuaddr);
+
+done:
+	if (drawctxt) {
+		struct adreno_ringbuffer *rb = drawctxt->rb;
+		u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, rb->id);
+		u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr;
+
+		*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2);
+		cmds += cp_gpuaddr(adreno_dev, cmds, dest);
+		*cmds++ = lower_32_bits(gpuaddr);
+		*cmds++ = upper_32_bits(gpuaddr);
+
+		if (adreno_dev->preempt.postamble_len) {
+			u64 kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev));
+
+			*cmds++ = cp_type7_packet(CP_SET_AMBLE, 3);
+			*cmds++ = lower_32_bits(kmd_postamble_addr);
+			*cmds++ = upper_32_bits(kmd_postamble_addr);
+			*cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE)
+				| (FIELD_PREP(GENMASK(19, 0), adreno_dev->preempt.postamble_len));
+		}
+	}
+
+	return (u32) (cmds - cmds_orig);
+}
+
+u32 gen8_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+		u32 *cmds)
+{
+	u32 index = 0;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (adreno_dev->cur_rb) {
+		u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, adreno_dev->cur_rb->id);
+
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 4);
+		cmds[index++] = lower_32_bits(dest);
+		cmds[index++] = upper_32_bits(dest);
+		cmds[index++] = 0;
+		cmds[index++] = 0;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BOTH;
+	cmds[index++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+	cmds[index++] = 1;
+	cmds[index++] = 0;
+
+	return index;
+}
+
+void gen8_preemption_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *rb;
+	u32 i;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	/* Force the state to be clear */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		/* smmu_info is allocated and mapped in gen8_preemption_iommu_init */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(magic), GEN8_CP_SMMU_INFO_MAGIC_REF);
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device));
+
+		/* The CP doesn't use the asid record, so poison it */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(asid), 0xdecafbad);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), 0);
+
+		kgsl_regwrite(device, GEN8_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+			lower_32_bits(iommu->smmu_info->gpuaddr));
+
+		kgsl_regwrite(device, GEN8_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+			upper_32_bits(iommu->smmu_info->gpuaddr));
+	}
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(rptr), 0);
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(wptr), 0);
+
+		adreno_ringbuffer_set_pagetable(device, rb,
+			device->mmu.defaultpagetable);
+
+		clear_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags);
+	}
+}
+
+static void reset_rb_preempt_record(struct adreno_device *adreno_dev,
+	struct adreno_ringbuffer *rb)
+{
+	memset(rb->preemption_desc->hostptr, 0x0, rb->preemption_desc->size);
+
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(magic), GEN8_CP_CTXRECORD_MAGIC_REF);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(cntl), GEN8_CP_RB_CNTL_DEFAULT);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR(
+		KGSL_DEVICE(adreno_dev), rb->id, rptr));
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(bv_rptr_addr), SCRATCH_RB_GPU_ADDR(
+		KGSL_DEVICE(adreno_dev), rb->id, bv_rptr));
+}
+
+void gen8_reset_preempt_records(struct adreno_device *adreno_dev)
+{
+	int i;
+	struct adreno_ringbuffer *rb;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		reset_rb_preempt_record(adreno_dev, rb);
+	}
+}
+
+static int gen8_preemption_ringbuffer_init(struct adreno_device *adreno_dev,
+	struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	u64 ctxt_record_size = GEN8_CP_CTXRECORD_SIZE_IN_BYTES;
+	int ret;
+
+	if (gen8_core->ctxt_record_size)
+		ctxt_record_size = gen8_core->ctxt_record_size;
+
+	ret = adreno_allocate_global(device, &rb->preemption_desc,
+		ctxt_record_size, SZ_16K, 0,
+		KGSL_MEMDESC_PRIVILEGED, "preemption_desc");
+	if (ret)
+		return ret;
+
+	ret = adreno_allocate_global(device, &rb->secure_preemption_desc,
+		ctxt_record_size, 0,
+		KGSL_MEMFLAGS_SECURE, KGSL_MEMDESC_PRIVILEGED,
+		"secure_preemption_desc");
+	if (ret)
+		return ret;
+
+	ret = adreno_allocate_global(device, &rb->perfcounter_save_restore_desc,
+		GEN8_CP_PERFCOUNTER_SAVE_RESTORE_SIZE, 0, 0,
+		KGSL_MEMDESC_PRIVILEGED,
+		"perfcounter_save_restore_desc");
+	if (ret)
+		return ret;
+
+	reset_rb_preempt_record(adreno_dev, rb);
+
+	return 0;
+}
+
+int gen8_preemption_init(struct adreno_device *adreno_dev)
+{
+	u32 flags = ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ? KGSL_MEMDESC_PRIVILEGED : 0;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	struct adreno_ringbuffer *rb;
+	int ret;
+	u32 i;
+
+	/* We are dependent on IOMMU to make preemption go on the CP side */
+	if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU) {
+		ret = -ENODEV;
+		goto done;
+	}
+
+	INIT_WORK(&preempt->work, _gen8_preemption_worker);
+
+	/* Allocate mem for storing preemption switch record */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		ret = gen8_preemption_ringbuffer_init(adreno_dev, rb);
+		if (ret)
+			goto done;
+	}
+
+	ret = adreno_allocate_global(device, &preempt->scratch, PAGE_SIZE,
+			0, 0, flags, "preempt_scratch");
+	if (ret)
+		goto done;
+
+	/* Allocate mem for storing preemption smmu record */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		ret = adreno_allocate_global(device, &iommu->smmu_info, PAGE_SIZE, 0,
+			KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED,
+			"smmu_info");
+		if (ret)
+			goto done;
+	}
+
+	return 0;
+done:
+	clear_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+	return ret;
+}
+
+int gen8_preemption_context_init(struct kgsl_context *context)
+{
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u64 flags = 0;
+
+	if (!adreno_preemption_feature_set(adreno_dev))
+		return 0;
+
+	if (context->flags & KGSL_CONTEXT_SECURE)
+		flags |= KGSL_MEMFLAGS_SECURE;
+
+	if (is_compat_task())
+		flags |= KGSL_MEMFLAGS_FORCE_32BIT;
+
+	/*
+	 * gpumem_alloc_entry takes an extra refcount. Put it only when
+	 * destroying the context to keep the context record valid
+	 */
+	context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv,
+			GEN8_CP_CTXRECORD_USER_RESTORE_SIZE, flags);
+	if (IS_ERR(context->user_ctxt_record)) {
+		int ret = PTR_ERR(context->user_ctxt_record);
+
+		context->user_ctxt_record = NULL;
+		return ret;
+	}
+
+	return 0;
+}

+ 649 - 0
qcom/opensource/graphics-kernel/adreno_gen8_ringbuffer.c

@@ -0,0 +1,649 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "adreno_pm4types.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_trace.h"
+#include "kgsl_trace.h"
+
+static bool is_concurrent_binning(struct adreno_context *drawctxt)
+{
+	if (!drawctxt)
+		return false;
+
+	return !(drawctxt->base.flags & KGSL_CONTEXT_SECURE);
+}
+
+static int gen8_rb_pagetable_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		struct kgsl_pagetable *pagetable, u32 *cmds)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable);
+	int count = 0;
+	u32 id = drawctxt ? drawctxt->base.id : 0;
+
+	if (pagetable == device->mmu.defaultpagetable)
+		return 0;
+
+	/* CP switches the pagetable and flushes the Caches */
+	cmds[count++] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3);
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+	cmds[count++] = id;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5);
+	cmds[count++] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device,
+				rb->id, ttbr0));
+	cmds[count++] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device,
+				rb->id, ttbr0));
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+	cmds[count++] = id;
+
+	/*
+	 * Sync both threads after switching pagetables and enable BR only
+	 * to make sure BV doesn't race ahead while BR is still switching
+	 * pagetables.
+	 */
+	cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR;
+
+	return count;
+}
+
+static int gen8_rb_context_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_pagetable *pagetable =
+		adreno_drawctxt_get_pagetable(drawctxt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int count = 0;
+	u32 cmds[57];
+
+	/* Sync both threads */
+	cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BOTH;
+	/* Reset context state */
+	cmds[count++] = cp_type7_packet(CP_RESET_CONTEXT_STATE, 1);
+	cmds[count++] = CP_RESET_GLOBAL_LOCAL_TS | CP_CLEAR_BV_BR_COUNTER |
+			CP_CLEAR_RESOURCE_TABLE | CP_CLEAR_ON_CHIP_TS;
+	/*
+	 * Enable/disable concurrent binning for pagetable switch and
+	 * set the thread to BR since only BR can execute the pagetable
+	 * switch packets.
+	 */
+	/* Sync both threads and enable BR only */
+	cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR;
+
+	if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) {
+
+		/* Clear performance counters during context switches */
+		if (!adreno_dev->perfcounter) {
+			cmds[count++] = cp_type4_packet(GEN8_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
+			cmds[count++] = 0x1;
+			cmds[count++] = cp_type4_packet(GEN8_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1);
+			cmds[count++] = 0x1;
+		}
+
+		count += gen8_rb_pagetable_switch(adreno_dev, rb,
+			drawctxt, pagetable, &cmds[count]);
+
+		/* Wait for performance counter clear to finish */
+		if (!adreno_dev->perfcounter) {
+			cmds[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
+			cmds[count++] = 0x3;
+			cmds[count++] = GEN8_RBBM_PERFCTR_SRAM_INIT_STATUS;
+			cmds[count++] = 0x0;
+			cmds[count++] = 0x1;
+			cmds[count++] = 0x1;
+			cmds[count++] = 0x0;
+		}
+	} else {
+		struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+
+		u32 offset = GEN8_SMMU_BASE + (iommu->cb0_offset >> 2) + 0x0d;
+
+		/*
+		 * Set the CONTEXTIDR register to the current context id so we
+		 * can use it in pagefault debugging. Unlike TTBR0 we don't
+		 * need any special sequence or locking to change it
+		 */
+		cmds[count++] = cp_type4_packet(offset, 1);
+		cmds[count++] = drawctxt->base.id;
+	}
+
+	cmds[count++] = cp_type7_packet(CP_NOP, 1);
+	cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[count++] = 0x31;
+
+	if (adreno_is_preemption_enabled(adreno_dev)) {
+		u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr;
+
+		cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3);
+		cmds[count++] = SET_PSEUDO_NON_PRIV_SAVE_ADDR;
+		cmds[count++] = lower_32_bits(gpuaddr);
+		cmds[count++] = upper_32_bits(gpuaddr);
+	}
+
+	return gen8_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED,
+			cmds, count, 0, NULL);
+}
+
+#define RB_SOPTIMESTAMP(device, rb) \
+	MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp)
+#define CTXT_SOPTIMESTAMP(device, drawctxt) \
+	MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp)
+
+#define RB_EOPTIMESTAMP(device, rb) \
+	MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp)
+#define CTXT_EOPTIMESTAMP(device, drawctxt) \
+	MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp)
+
+int gen8_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+	unsigned long flags;
+
+	adreno_get_submit_time(adreno_dev, rb, time);
+	adreno_profile_submit_time(time);
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+		if (adreno_dev->cur_rb == rb) {
+			kgsl_pwrscale_busy(device);
+			ret = gen8_fenced_write(adreno_dev,
+				GEN8_CP_RB_WPTR_GC, rb->_wptr,
+				FENCE_STATUS_WRITEDROPPED0_MASK);
+			rb->skip_inline_wptr = false;
+		}
+	} else {
+		if (adreno_dev->cur_rb == rb)
+			rb->skip_inline_wptr = true;
+	}
+
+	rb->wptr = rb->_wptr;
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	if (ret) {
+		/*
+		 * If WPTR update fails, take inline snapshot and trigger
+		 * recovery.
+		 */
+		gmu_core_fault_snapshot(device);
+		adreno_dispatcher_fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	}
+
+	return ret;
+}
+
+int gen8_ringbuffer_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int i, ret;
+
+	ret = adreno_allocate_global(device, &device->scratch, PAGE_SIZE,
+			0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED,
+			"scratch");
+	if (ret)
+		return ret;
+
+	adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+
+	if (!adreno_preemption_feature_set(adreno_dev)) {
+		adreno_dev->num_ringbuffers = 1;
+		return adreno_ringbuffer_setup(adreno_dev,
+				&adreno_dev->ringbuffers[0], 0);
+	}
+
+	adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers);
+
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		int ret;
+
+		ret = adreno_ringbuffer_setup(adreno_dev,
+				&adreno_dev->ringbuffers[i], i);
+		if (ret)
+			return ret;
+	}
+
+	timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0);
+	gen8_preemption_init(adreno_dev);
+	return 0;
+}
+
+#define GEN8_SUBMIT_MAX 104
+
+int gen8_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 size = GEN8_SUBMIT_MAX + dwords;
+	u32 *cmds, index = 0;
+	u64 profile_gpuaddr;
+	u32 profile_dwords;
+
+	if (adreno_drawctxt_detached(drawctxt))
+		return -ENOENT;
+
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return -EPROTO;
+
+	rb->timestamp++;
+
+	if (drawctxt)
+		drawctxt->internal_timestamp = rb->timestamp;
+
+	/* All submissions are run with protected mode off due to APRIV */
+	flags &= ~F_NOTPROTECTED;
+
+	cmds = adreno_ringbuffer_allocspace(rb, size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	/* Identify the start of a command */
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER;
+
+	/* This is 25 dwords when drawctxt is not NULL and perfcounter needs to be zapped*/
+	index += gen8_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt,
+		&cmds[index]);
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BOTH;
+
+	cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+	cmds[index++] = 0x101; /* IFPC disable */
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BR;
+
+	profile_gpuaddr = adreno_profile_preib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+		cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = rb->timestamp;
+
+	if (IS_SECURE(flags)) {
+		/* Sync BV and BR if entering secure mode */
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SYNC_THREADS | CP_CONCURRENT_BIN_DISABLE;
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	memcpy(&cmds[index], in, dwords << 2);
+	index += dwords;
+
+	profile_gpuaddr = adreno_profile_postib_processing(adreno_dev,
+		drawctxt, &dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy))
+		cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0);
+
+	if (is_concurrent_binning(drawctxt)) {
+		u64 addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_ts);
+
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BV;
+
+		/*
+		 * Make sure the timestamp is committed once BV pipe is
+		 * completely done with this submission.
+		 */
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(27);
+		cmds[index++] = lower_32_bits(addr);
+		cmds[index++] = upper_32_bits(addr);
+		cmds[index++] = rb->timestamp;
+
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BR;
+
+		/*
+		 * This makes sure that BR doesn't race ahead and commit
+		 * timestamp to memstore while BV is still processing
+		 * this submission.
+		 */
+		cmds[index++] = cp_type7_packet(CP_WAIT_TIMESTAMP, 4);
+		cmds[index++] = 0;
+		cmds[index++] = lower_32_bits(addr);
+		cmds[index++] = upper_32_bits(addr);
+		cmds[index++] = rb->timestamp;
+	}
+
+	/*
+	 * If this is an internal command, just write the ringbuffer timestamp,
+	 * otherwise, write both
+	 */
+	if (!drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	} else {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27);
+		cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(27);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	}
+
+	if (IS_WFI(flags))
+		cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+
+	if (IS_SECURE(flags)) {
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_CONCURRENT_BIN_DISABLE;
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 0;
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SYNC_THREADS;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BOTH;
+
+	cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+	cmds[index++] = 0x100; /* IFPC enable */
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BR;
+
+	/* 10 dwords */
+	index += gen8_preemption_post_ibsubmit(adreno_dev, &cmds[index]);
+
+	/* Adjust the thing for the number of bytes we actually wrote */
+	rb->_wptr -= (size - index);
+
+	return gen8_ringbuffer_submit(rb, time);
+}
+
+static u32 gen8_get_alwayson_counter(u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = GEN8_CP_ALWAYS_ON_COUNTER_LO | (1 << 30) | (2 << 18);
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+static u32 gen8_get_alwayson_context(u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = GEN8_CP_ALWAYS_ON_CONTEXT_LO | (1 << 30) | (2 << 18);
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+#define PROFILE_IB_DWORDS 4
+#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))
+
+static u64 gen8_get_user_profiling_ib(struct adreno_ringbuffer *rb,
+		struct kgsl_drawobj_cmd *cmdobj, u32 target_offset, u32 *cmds)
+{
+	u32 offset, *ib, dwords;
+
+	if (IS_ERR(rb->profile_desc))
+		return 0;
+
+	offset = rb->profile_index * (PROFILE_IB_DWORDS << 2);
+	ib = rb->profile_desc->hostptr + offset;
+	dwords = gen8_get_alwayson_counter(ib,
+		cmdobj->profiling_buffer_gpuaddr + target_offset);
+
+	cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+	cmds[1] = lower_32_bits(rb->profile_desc->gpuaddr + offset);
+	cmds[2] = upper_32_bits(rb->profile_desc->gpuaddr + offset);
+	cmds[3] = dwords;
+
+	rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;
+
+	return 4;
+}
+
+static int gen8_drawctxt_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (rb->drawctxt_active == drawctxt)
+		return 0;
+
+	if (kgsl_context_detached(&drawctxt->base))
+		return -ENOENT;
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return -ENOENT;
+
+	ret = gen8_rb_context_switch(adreno_dev, rb, drawctxt);
+	if (ret) {
+		kgsl_context_put(&drawctxt->base);
+		return ret;
+	}
+
+	trace_adreno_drawctxt_switch(rb, drawctxt);
+
+	/* Release the current drawctxt as soon as the new one is switched */
+	adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active,
+		rb, rb->timestamp);
+
+	rb->drawctxt_active = drawctxt;
+	return 0;
+}
+
+
+#define GEN8_USER_PROFILE_IB(rb, cmdobj, cmds, field) \
+	gen8_get_user_profiling_ib((rb), (cmdobj), \
+		offsetof(struct kgsl_drawobj_profiling_buffer, field), \
+		(cmds))
+
+#define GEN8_KERNEL_PROFILE(dev, cmdobj, cmds, field) \
+	gen8_get_alwayson_counter((cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define GEN8_KERNEL_PROFILE_CONTEXT(dev, cmdobj, cmds, field) \
+	gen8_get_alwayson_context((cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define GEN8_COMMAND_DWORDS 60
+
+int gen8_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct adreno_ringbuffer *rb = drawctxt->rb;
+	int ret = 0, numibs = 0, index = 0;
+	u32 *cmds;
+
+	/* Count the number of IBs (if we are not skipping) */
+	if (!IS_SKIP(flags)) {
+		struct list_head *tmp;
+
+		list_for_each(tmp, &cmdobj->cmdlist)
+			numibs++;
+	}
+
+	cmds = kvmalloc((GEN8_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL);
+	if (!cmds) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = START_IB_IDENTIFIER;
+
+	/* Kernel profiling: 8 dwords */
+	if (IS_KERNEL_PROFILE(flags)) {
+		index += GEN8_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			started);
+		index += GEN8_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
+			ctx_start);
+	}
+
+	/* User profiling: 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += GEN8_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
+			gpu_ticks_submitted);
+
+	if (is_concurrent_binning(drawctxt)) {
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BOTH;
+	}
+	if (numibs) {
+		struct kgsl_memobj_node *ib;
+
+		cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+		cmds[index++] = 0x00d; /* IB1LIST start */
+
+		list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+			if (ib->priv & MEMOBJ_SKIP ||
+				(ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE &&
+				 !IS_PREAMBLE(flags)))
+				cmds[index++] = cp_type7_packet(CP_NOP, 4);
+
+			cmds[index++] =
+				cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+			cmds[index++] = lower_32_bits(ib->gpuaddr);
+			cmds[index++] = upper_32_bits(ib->gpuaddr);
+
+			/* Double check that IB_PRIV is never set */
+			cmds[index++] = (ib->size >> 2) & 0xfffff;
+		}
+
+		cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+		cmds[index++] = 0x00e; /* IB1LIST end */
+	}
+
+	if (is_concurrent_binning(drawctxt)) {
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BR;
+	}
+	/* CCU invalidate depth */
+	cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 24;
+
+	/* CCU invalidate color */
+	cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 25;
+
+	/* 8 dwords */
+	if (IS_KERNEL_PROFILE(flags)) {
+		index += GEN8_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			retired);
+		index += GEN8_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
+			ctx_end);
+	}
+
+	/* 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += GEN8_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
+			gpu_ticks_retired);
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = END_IB_IDENTIFIER;
+
+	ret = gen8_drawctxt_switch(adreno_dev, rb, drawctxt);
+
+	/*
+	 * In the unlikely event of an error in the drawctxt switch,
+	 * treat it like a hang
+	 */
+	if (ret) {
+		/*
+		 * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it,
+		 * the upper layers know how to handle it
+		 */
+		if (ret != -ENOSPC && ret != -ENOENT)
+			dev_err(device->dev,
+				"Unable to switch draw context: %d\n", ret);
+		goto done;
+	}
+
+	adreno_drawobj_set_constraint(device, drawobj);
+
+	ret = gen8_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt,
+		flags, cmds, index, drawobj->timestamp, time);
+
+done:
+	trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs,
+		drawobj->timestamp, drawobj->flags, ret, drawctxt->type);
+
+	kvfree(cmds);
+	return ret;
+}

+ 517 - 0
qcom/opensource/graphics-kernel/adreno_gen8_rpmh.c

@@ -0,0 +1,517 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <soc/qcom/cmd-db.h>
+#include <soc/qcom/tcs.h>
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "kgsl_bus.h"
+#include "kgsl_device.h"
+
+struct rpmh_arc_vals {
+	u32 num;
+	const u16 *val;
+};
+
+struct bcm {
+	const char *name;
+	u32 buswidth;
+	u32 channels;
+	u32 unit;
+	u16 width;
+	u8 vcd;
+	bool fixed;
+};
+
+struct bcm_data {
+	__le32 unit;
+	__le16 width;
+	u8 vcd;
+	u8 reserved;
+};
+
+struct rpmh_bw_votes {
+	u32 wait_bitmask;
+	u32 num_cmds;
+	u32 *addrs;
+	u32 num_levels;
+	u32 **cmds;
+};
+
+#define ARC_VOTE_SET(pri, sec, vlvl) \
+	(FIELD_PREP(GENMASK(31, 16), vlvl) | \
+	 FIELD_PREP(GENMASK(15, 8), sec) | \
+	 FIELD_PREP(GENMASK(7, 0), pri))
+
+static int rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id)
+{
+	size_t len = 0;
+
+	arc->val = cmd_db_read_aux_data(res_id, &len);
+
+	/*
+	 * cmd_db_read_aux_data() gives us a zero-padded table of
+	 * size len that contains the arc values. To determine the
+	 * number of arc values, we loop through the table and count
+	 * them until we get to the end of the buffer or hit the
+	 * zero padding.
+	 */
+	for (arc->num = 1; arc->num < (len >> 1); arc->num++) {
+		if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0)
+			break;
+	}
+
+	return 0;
+}
+
+static int setup_volt_dependency_tbl(u32 *votes,
+		struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+		u16 *vlvl, u32 num_entries)
+{
+	int i, j, k;
+	uint16_t cur_vlvl;
+	bool found_match;
+
+	/* i tracks current KGSL GPU frequency table entry
+	 * j tracks secondary rail voltage table entry
+	 * k tracks primary rail voltage table entry
+	 */
+	for (i = 0; i < num_entries; i++) {
+		found_match = false;
+
+		/* Look for a primary rail voltage that matches a VLVL level */
+		for (k = 0; k < pri_rail->num; k++) {
+			if (pri_rail->val[k] >= vlvl[i]) {
+				cur_vlvl = pri_rail->val[k];
+				found_match = true;
+				break;
+			}
+		}
+
+		/* If we did not find a matching VLVL level then abort */
+		if (!found_match)
+			return -EINVAL;
+
+		/*
+		 * Look for a secondary rail index whose VLVL value
+		 * is greater than or equal to the VLVL value of the
+		 * corresponding index of the primary rail
+		 */
+		for (j = 0; j < sec_rail->num; j++) {
+			if (sec_rail->val[j] >= cur_vlvl ||
+					j + 1 == sec_rail->num)
+				break;
+		}
+
+		if (j == sec_rail->num)
+			j = 0;
+
+		votes[i] = ARC_VOTE_SET(k, j, cur_vlvl);
+	}
+
+	return 0;
+}
+
+/* Generate a set of bandwidth votes for the list of BCMs */
+static void tcs_cmd_data(struct bcm *bcms, int count,
+		u32 ab, u32 ib, u32 *data, u32 perfmode_vote, bool set_perfmode)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		bool valid = true;
+		bool commit = false;
+		u64 avg, peak, x, y;
+
+		if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd)
+			commit = true;
+
+		if (bcms[i].fixed) {
+			if (!ab && !ib)
+				data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0);
+			else
+				data[i] = BCM_TCS_CMD(commit, true, 0x0,
+							set_perfmode ? perfmode_vote : 0x0);
+			continue;
+		}
+
+		/* Multiple the bandwidth by the width of the connection */
+		avg = ((u64) ab) * bcms[i].width;
+
+		/* And then divide by the total width */
+		do_div(avg, bcms[i].buswidth);
+
+		peak = ((u64) ib) * bcms[i].width;
+		do_div(peak, bcms[i].buswidth);
+
+		/* Input bandwidth value is in KBps */
+		x = avg * 1000ULL;
+		do_div(x, bcms[i].unit);
+
+		/* Input bandwidth value is in KBps */
+		y = peak * 1000ULL;
+		do_div(y, bcms[i].unit);
+
+		/*
+		 * If a bandwidth value was specified but the calculation ends
+		 * rounding down to zero, set a minimum level
+		 */
+		if (ab && x == 0)
+			x = 1;
+
+		if (ib && y == 0)
+			y = 1;
+
+		x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK);
+		y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK);
+
+		if (!x && !y)
+			valid = false;
+
+		data[i] = BCM_TCS_CMD(commit, valid, x, y);
+	}
+}
+
+static void free_rpmh_bw_votes(struct rpmh_bw_votes *votes)
+{
+	int i;
+
+	if (!votes)
+		return;
+
+	for (i = 0; votes->cmds && i < votes->num_levels; i++)
+		kfree(votes->cmds[i]);
+
+	kfree(votes->cmds);
+	kfree(votes->addrs);
+	kfree(votes);
+}
+
+/* Build the votes table from the specified bandwidth levels */
+static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms,
+		int bcm_count, u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl)
+{
+	struct rpmh_bw_votes *votes;
+	bool set_perfmode;
+	int i;
+
+	votes = kzalloc(sizeof(*votes), GFP_KERNEL);
+	if (!votes)
+		return ERR_PTR(-ENOMEM);
+
+	votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL);
+	if (!votes->addrs) {
+		free_rpmh_bw_votes(votes);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL);
+	if (!votes->cmds) {
+		free_rpmh_bw_votes(votes);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	votes->num_cmds = bcm_count;
+	votes->num_levels = levels_count;
+
+	/* Get the cmd-db information for each BCM */
+	for (i = 0; i < bcm_count; i++) {
+		size_t l;
+		const struct bcm_data *data;
+
+		data = cmd_db_read_aux_data(bcms[i].name, &l);
+
+		votes->addrs[i] = cmd_db_read_addr(bcms[i].name);
+
+		bcms[i].unit = le32_to_cpu(data->unit);
+		bcms[i].width = le16_to_cpu(data->width);
+		bcms[i].vcd = data->vcd;
+	}
+
+	for (i = 0; i < bcm_count; i++) {
+		if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd)
+			votes->wait_bitmask |= (1 << i);
+	}
+
+	for (i = 0; i < levels_count; i++) {
+		votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL);
+		if (!votes->cmds[i]) {
+			free_rpmh_bw_votes(votes);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		set_perfmode = (i >= perfmode_lvl) ? true : false;
+		tcs_cmd_data(bcms, bcm_count, levels[i], levels[i], votes->cmds[i],
+								perfmode_vote, set_perfmode);
+	}
+
+	return votes;
+}
+
+/*
+ * setup_gmu_arc_votes - Build the gmu voting table
+ * @gmu: Pointer to gmu device
+ * @pri_rail: Pointer to primary power rail vlvl table
+ * @sec_rail: Pointer to second/dependent power rail vlvl table
+ *
+ * This function initializes the cx votes for all gmu frequencies
+ * for gmu dcvs
+ */
+static int setup_cx_arc_votes(struct gen8_gmu_device *gmu,
+	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail)
+{
+	/* Hardcoded values of GMU CX voltage levels */
+	u16 gmu_cx_vlvl[MAX_CX_LEVELS];
+	u32 cx_votes[MAX_CX_LEVELS];
+	struct gen8_dcvs_table *table = &gmu->dcvs_table;
+	u32 *freqs = gmu->freqs;
+	u32 *vlvls = gmu->vlvls;
+	int ret, i;
+
+	gmu_cx_vlvl[0] = 0;
+	gmu_cx_vlvl[1] = vlvls[0];
+	gmu_cx_vlvl[2] = vlvls[1];
+
+	table->gmu_level_num = 3;
+
+	table->cx_votes[0].freq = 0;
+	table->cx_votes[1].freq = freqs[0] / 1000;
+	table->cx_votes[2].freq = freqs[1] / 1000;
+
+	ret = setup_volt_dependency_tbl(cx_votes, pri_rail,
+			sec_rail, gmu_cx_vlvl, table->gmu_level_num);
+	if (!ret) {
+		for (i = 0; i < table->gmu_level_num; i++)
+			table->cx_votes[i].vote = cx_votes[i];
+	}
+
+	return ret;
+}
+
+static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl)
+{
+	u32 i;
+
+	/*
+	 * This means that the Gx level doesn't have a dependency on Cx level.
+	 * Return the same value to disable cx voting at GMU.
+	 */
+	if (vlvl == 0xffffffff) {
+		*hlvl = vlvl;
+		return 0;
+	}
+
+	for (i = 0; i < cx_rail->num; i++) {
+		if (cx_rail->val[i] >= vlvl) {
+			*hlvl = i;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * setup_gx_arc_votes - Build the gpu dcvs voting table
+ * @hfi: Pointer to hfi device
+ * @pri_rail: Pointer to primary power rail vlvl table
+ * @sec_rail: Pointer to second/dependent power rail vlvl table
+ *
+ * This function initializes the gx votes for all gpu frequencies
+ * for gpu dcvs
+ */
+static int setup_gx_arc_votes(struct adreno_device *adreno_dev,
+	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+	struct rpmh_arc_vals *cx_rail)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen8_dcvs_table *table = &gmu->dcvs_table;
+	u32 index;
+	u16 vlvl_tbl[MAX_GX_LEVELS];
+	u32 gx_votes[MAX_GX_LEVELS];
+	int ret, i;
+
+	if (pwr->num_pwrlevels + 1 > ARRAY_SIZE(vlvl_tbl)) {
+		dev_err(device->dev,
+			"Defined more GPU DCVS levels than RPMh can support\n");
+		return -ERANGE;
+	}
+
+	/* Add the zero powerlevel for the perf table */
+	table->gpu_level_num = pwr->num_pwrlevels + 1;
+
+	memset(vlvl_tbl, 0, sizeof(vlvl_tbl));
+
+	table->gx_votes[0].freq = 0;
+	table->gx_votes[0].cx_vote = 0;
+	/* Disable cx vote in gmu dcvs table if it is not supported in DT */
+	if (pwr->pwrlevels[0].cx_level == 0xffffffff)
+		table->gx_votes[0].cx_vote = 0xffffffff;
+
+	/* GMU power levels are in ascending order */
+	for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) {
+		u32 cx_vlvl = pwr->pwrlevels[i].cx_level;
+
+		vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level;
+		table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000;
+
+		ret = to_cx_hlvl(cx_rail, cx_vlvl,
+				&table->gx_votes[index].cx_vote);
+		if (ret) {
+			dev_err(device->dev, "Unsupported cx corner: %u\n",
+					cx_vlvl);
+			return ret;
+		}
+	}
+
+	ret = setup_volt_dependency_tbl(gx_votes, pri_rail,
+			sec_rail, vlvl_tbl, table->gpu_level_num);
+	if (!ret) {
+		for (i = 0; i < table->gpu_level_num; i++)
+			table->gx_votes[i].vote = gx_votes[i];
+	}
+
+	return ret;
+
+}
+
+static int build_dcvs_table(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct rpmh_arc_vals gx_arc, cx_arc, mx_arc;
+	int ret;
+
+	ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl");
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&cx_arc, "cx.lvl");
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&mx_arc, "mx.lvl");
+	if (ret)
+		return ret;
+
+	ret = setup_cx_arc_votes(gmu, &cx_arc, &mx_arc);
+	if (ret)
+		return ret;
+
+	return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc);
+}
+
+/*
+ * List of Bus Control Modules (BCMs) that need to be configured for the GPU
+ * to access DDR. For each bus level we will generate a vote each BC
+ */
+static struct bcm gen8_ddr_bcms[] = {
+	{ .name = "SH0", .buswidth = 16 },
+	{ .name = "MC0", .buswidth = 4 },
+	{ .name = "ACV", .fixed = true },
+};
+
+/* Same as above, but for the CNOC BCMs */
+static struct bcm gen8_cnoc_bcms[] = {
+	{ .name = "CN0", .buswidth = 4 },
+};
+
+static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd,
+		struct rpmh_bw_votes *ddr, struct rpmh_bw_votes *cnoc)
+{
+	u32 i, j;
+
+	cmd->bw_level_num = ddr->num_levels;
+	cmd->ddr_cmds_num = ddr->num_cmds;
+	cmd->ddr_wait_bitmask = ddr->wait_bitmask;
+
+	for (i = 0; i < ddr->num_cmds; i++)
+		cmd->ddr_cmd_addrs[i] = ddr->addrs[i];
+
+	for (i = 0; i < ddr->num_levels; i++)
+		for (j = 0; j < ddr->num_cmds; j++)
+			cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j];
+
+	if (!cnoc)
+		return;
+
+	cmd->cnoc_cmds_num = cnoc->num_cmds;
+		cmd->cnoc_wait_bitmask = cnoc->wait_bitmask;
+
+	for (i = 0; i < cnoc->num_cmds; i++)
+		cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i];
+
+	for (i = 0; i < cnoc->num_levels; i++)
+		for (j = 0; j < cnoc->num_cmds; j++)
+			cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j];
+}
+
+/* BIT(2) is used to vote for GPU performance mode through GMU */
+#define ACV_GPU_PERFMODE_VOTE	BIT(2)
+
+static int build_bw_table(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct rpmh_bw_votes *ddr, *cnoc = NULL;
+	u32 perfmode_lvl = kgsl_pwrctrl_get_acv_perfmode_lvl(device,
+			gen8_core->acv_perfmode_ddr_freq);
+	u32 *cnoc_table;
+	u32 count;
+	int ret;
+
+	ddr = build_rpmh_bw_votes(gen8_ddr_bcms, ARRAY_SIZE(gen8_ddr_bcms),
+		pwr->ddr_table, pwr->ddr_table_count, ACV_GPU_PERFMODE_VOTE, perfmode_lvl);
+	if (IS_ERR(ddr))
+		return PTR_ERR(ddr);
+
+	cnoc_table = kgsl_bus_get_table(device->pdev, "qcom,bus-table-cnoc",
+		&count);
+
+	if (count > 0)
+		cnoc = build_rpmh_bw_votes(gen8_cnoc_bcms,
+			ARRAY_SIZE(gen8_cnoc_bcms), cnoc_table, count, 0, 0);
+
+	kfree(cnoc_table);
+
+	if (IS_ERR(cnoc)) {
+		free_rpmh_bw_votes(ddr);
+		return PTR_ERR(cnoc);
+	}
+
+	ret = CMD_MSG_HDR(gmu->hfi.bw_table, H2F_MSG_BW_VOTE_TBL);
+	if (ret)
+		return ret;
+
+	build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc);
+
+	free_rpmh_bw_votes(ddr);
+	free_rpmh_bw_votes(cnoc);
+
+	return 0;
+}
+
+int gen8_build_rpmh_tables(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = build_dcvs_table(adreno_dev);
+	if (ret) {
+		dev_err(adreno_dev->dev.dev, "Failed to build dcvs table\n");
+		return ret;
+	}
+
+	ret = build_bw_table(adreno_dev);
+	if (ret)
+		dev_err(adreno_dev->dev.dev, "Failed to build bw table\n");
+
+	return ret;
+}

+ 1802 - 0
qcom/opensource/graphics-kernel/adreno_gen8_snapshot.c

@@ -0,0 +1,1802 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_gen8_3_0_snapshot.h"
+#include "adreno_snapshot.h"
+
+static struct kgsl_memdesc *gen8_capturescript;
+static struct kgsl_memdesc *gen8_crashdump_registers;
+static u32 *gen8_cd_reg_end;
+static const struct gen8_snapshot_block_list *gen8_snapshot_block_list;
+static bool gen8_crashdump_timedout;
+
+/* Starting kernel virtual address for QDSS TMC register block */
+static void __iomem *tmc_virt;
+
+const struct gen8_snapshot_block_list gen8_3_0_snapshot_block_list = {
+	.pre_crashdumper_regs = gen8_3_0_ahb_registers,
+	.num_pre_crashdumper_regs = ARRAY_SIZE(gen8_3_0_ahb_registers),
+	.debugbus_blocks = gen8_3_0_debugbus_blocks,
+	.debugbus_blocks_len = ARRAY_SIZE(gen8_3_0_debugbus_blocks),
+	.gbif_debugbus_blocks = gen8_gbif_debugbus_blocks,
+	.gbif_debugbus_blocks_len = ARRAY_SIZE(gen8_gbif_debugbus_blocks),
+	.cx_debugbus_blocks = gen8_cx_debugbus_blocks,
+	.cx_debugbus_blocks_len = ARRAY_SIZE(gen8_cx_debugbus_blocks),
+	.external_core_regs = gen8_3_0_external_core_regs,
+	.num_external_core_regs = ARRAY_SIZE(gen8_3_0_external_core_regs),
+	.gmu_cx_unsliced_regs = gen8_3_0_gmu_registers,
+	.gmu_gx_regs = gen8_3_0_gmu_gx_regs,
+	.num_gmu_gx_regs = ARRAY_SIZE(gen8_3_0_gmu_gx_regs),
+	.rscc_regs = gen8_3_0_rscc_rsc_registers,
+	.reg_list = gen8_3_0_reg_list,
+	.cx_misc_regs = gen8_3_0_cx_misc_registers,
+	.shader_blocks = gen8_3_0_shader_blocks,
+	.num_shader_blocks = ARRAY_SIZE(gen8_3_0_shader_blocks),
+	.cp_clusters = gen8_3_0_cp_clusters,
+	.num_cp_clusters = ARRAY_SIZE(gen8_3_0_cp_clusters),
+	.clusters = gen8_3_0_mvc_clusters,
+	.num_clusters = ARRAY_SIZE(gen8_3_0_mvc_clusters),
+	.sptp_clusters = gen8_3_0_sptp_clusters,
+	.num_sptp_clusters = ARRAY_SIZE(gen8_3_0_sptp_clusters),
+	.index_registers = gen8_3_0_cp_indexed_reg_list,
+	.index_registers_len = ARRAY_SIZE(gen8_3_0_cp_indexed_reg_list),
+	.mempool_index_registers = gen8_3_0_cp_mempool_reg_list,
+	.mempool_index_registers_len = ARRAY_SIZE(gen8_3_0_cp_mempool_reg_list),
+};
+
+#define GEN8_SP_READ_SEL_VAL(_sliceid, _location, _pipe, _statetype, _usptp, _sptp) \
+				(FIELD_PREP(GENMASK(25, 21), _sliceid) | \
+				 FIELD_PREP(GENMASK(20, 18), _location) | \
+				 FIELD_PREP(GENMASK(17, 16), _pipe) | \
+				 FIELD_PREP(GENMASK(15, 8), _statetype) | \
+				 FIELD_PREP(GENMASK(7, 4), _usptp) | \
+				 FIELD_PREP(GENMASK(3, 0), _sptp))
+
+#define GEN8_CP_APERTURE_REG_VAL(_sliceid, _pipe, _cluster, _context) \
+			(FIELD_PREP(GENMASK(23, 23), 1) | \
+			 FIELD_PREP(GENMASK(18, 16), _sliceid) | \
+			 FIELD_PREP(GENMASK(15, 12), _pipe) | \
+			 FIELD_PREP(GENMASK(11, 8), _cluster) | \
+			 FIELD_PREP(GENMASK(5, 4), _context))
+
+#define GEN8_DEBUGBUS_SECTION_SIZE (sizeof(struct kgsl_snapshot_debugbus) \
+			+ (GEN8_DEBUGBUS_BLOCK_SIZE << 3))
+
+#define CD_REG_END 0xaaaaaaaa
+
+static u32 CD_WRITE(u64 *ptr, u32 offset, u64 val)
+{
+	ptr[0] = val;
+	ptr[1] = FIELD_PREP(GENMASK(63, 44), offset) | BIT(21) | BIT(0);
+
+	return 2;
+}
+
+static u32 CD_READ(u64 *ptr, u32 offset, u32 size, u64 target)
+{
+	ptr[0] = target;
+	ptr[1] = FIELD_PREP(GENMASK(63, 44), offset) | size;
+
+	return 2;
+}
+
+static void CD_FINISH(u64 *ptr, u32 offset)
+{
+	gen8_cd_reg_end = gen8_crashdump_registers->hostptr + offset;
+	*gen8_cd_reg_end = CD_REG_END;
+	ptr[0] = gen8_crashdump_registers->gpuaddr + offset;
+	ptr[1] = FIELD_PREP(GENMASK(63, 44), GEN8_CP_CRASH_DUMP_STATUS) | BIT(0);
+	ptr[2] = 0;
+	ptr[3] = 0;
+}
+
+static bool CD_SCRIPT_CHECK(struct kgsl_device *device)
+{
+	return (adreno_smmu_is_stalled(ADRENO_DEVICE(device)) ||
+		(!device->snapshot_crashdumper) ||
+		IS_ERR_OR_NULL(gen8_capturescript) ||
+		IS_ERR_OR_NULL(gen8_crashdump_registers) ||
+		gen8_crashdump_timedout);
+}
+
+static bool _gen8_do_crashdump(struct kgsl_device *device)
+{
+	u32 reg = 0;
+	ktime_t timeout;
+
+	if (CD_SCRIPT_CHECK(device))
+		return false;
+
+	kgsl_regwrite(device, GEN8_CP_CRASH_DUMP_SCRIPT_BASE_LO,
+			lower_32_bits(gen8_capturescript->gpuaddr));
+	kgsl_regwrite(device, GEN8_CP_CRASH_DUMP_SCRIPT_BASE_HI,
+			upper_32_bits(gen8_capturescript->gpuaddr));
+	kgsl_regwrite(device, GEN8_CP_CRASH_DUMP_CNTL, 1);
+
+	timeout = ktime_add_ms(ktime_get(), CP_CRASH_DUMPER_TIMEOUT);
+
+	if (!device->snapshot_atomic)
+		might_sleep();
+	for (;;) {
+		/* make sure we're reading the latest value */
+		rmb();
+		if ((*gen8_cd_reg_end) != CD_REG_END)
+			break;
+		if (ktime_compare(ktime_get(), timeout) > 0)
+			break;
+		/* Wait 1msec to avoid unnecessary looping */
+		if (!device->snapshot_atomic)
+			usleep_range(100, 1000);
+	}
+
+	kgsl_regread(device, GEN8_CP_CRASH_DUMP_STATUS, &reg);
+
+	/*
+	 * Writing to the GEN8_CP_CRASH_DUMP_CNTL also resets the
+	 * GEN8_CP_CRASH_DUMP_STATUS. Make sure the read above is
+	 * complete before we change the value
+	 */
+	rmb();
+
+	kgsl_regwrite(device, GEN8_CP_CRASH_DUMP_CNTL, 0);
+
+	if (WARN(!(reg & 0x2), "Crashdumper timed out\n")) {
+		/*
+		 * Gen7 crash dumper script is broken down into multiple chunks
+		 * and script will be invoked multiple times to capture snapshot
+		 * of different sections of GPU. If crashdumper fails once, it is
+		 * highly likely it will fail subsequently as well. Hence update
+		 * gen8_crashdump_timedout variable to avoid running crashdumper
+		 * after it fails once.
+		 */
+		gen8_crashdump_timedout = true;
+		return false;
+	}
+
+	return true;
+}
+
+size_t gen8_legacy_snapshot_registers(struct kgsl_device *device,
+		 u8 *buf, size_t remain, void *priv)
+{
+	struct gen8_reg_list_info *info = (struct gen8_reg_list_info *)priv;
+	const u32 *ptr = info->regs->regs;
+	struct kgsl_snapshot_mvc_regs_v3 *header =
+			(struct kgsl_snapshot_mvc_regs_v3 *)buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	u32 size = (adreno_snapshot_regs_count(ptr) * sizeof(*data)) + sizeof(*header);
+	u32 count, k;
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	header->ctxt_id = 0;
+	header->cluster_id = CLUSTER_NONE;
+	header->pipe_id = PIPE_NONE;
+	header->location_id = UINT_MAX;
+	header->sp_id = UINT_MAX;
+	header->usptp_id = UINT_MAX;
+	header->slice_id = info->slice_id;
+
+	if (info->regs->sel)
+		kgsl_regwrite(device, info->regs->sel->host_reg, info->regs->sel->val);
+
+	if (info->regs->slice_region)
+		kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL
+				(info->slice_id, 0, 0, 0));
+
+	/* Make sure the previous writes are posted before reading */
+	mb();
+
+	for (ptr = info->regs->regs; ptr[0] != UINT_MAX; ptr += 2) {
+		count = REG_COUNT(ptr);
+
+		if (count == 1)
+			*data++ = ptr[0];
+		else {
+			*data++ = ptr[0] | (1 << 31);
+			*data++ = ptr[1];
+		}
+		for (k = ptr[0]; k <= ptr[1]; k++)
+			kgsl_regread(device, k, data++);
+	}
+
+	return size;
+}
+
+static size_t gen8_snapshot_registers(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct gen8_reg_list_info *info = (struct gen8_reg_list_info *)priv;
+	const u32 *ptr = info->regs->regs;
+	struct kgsl_snapshot_mvc_regs_v3 *header =
+			(struct kgsl_snapshot_mvc_regs_v3 *)buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	u32 *src;
+	u32 cnt;
+	u32 size = (adreno_snapshot_regs_count(ptr) * sizeof(*data)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	header->ctxt_id = 0;
+	header->cluster_id = CLUSTER_NONE;
+	header->pipe_id = PIPE_NONE;
+	header->location_id = UINT_MAX;
+	header->sp_id = UINT_MAX;
+	header->usptp_id = UINT_MAX;
+	header->slice_id = info->slice_id;
+
+	src = gen8_crashdump_registers->hostptr + info->offset;
+
+	for (ptr = info->regs->regs; ptr[0] != UINT_MAX; ptr += 2) {
+		cnt = REG_COUNT(ptr);
+
+		if (cnt == 1)
+			*data++ = ptr[0];
+		else {
+			*data++ = BIT(31) | ptr[0];
+			*data++ = ptr[1];
+		}
+		memcpy(data, src, cnt << 2);
+		data += cnt;
+		src += cnt;
+	}
+
+	/* Return the size of the section */
+	return size;
+}
+
+static size_t gen8_legacy_snapshot_shader(struct kgsl_device *device,
+				u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_shader_v3 *header =
+		(struct kgsl_snapshot_shader_v3 *) buf;
+	struct gen8_shader_block_info *info = (struct gen8_shader_block_info *) priv;
+	struct gen8_shader_block *block = info->block;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	u32 read_sel, i;
+
+	if (remain < (sizeof(*header) + (block->size << 2))) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	header->type = block->statetype;
+	header->slice_id = info->slice_id;
+	header->sp_index = info->sp_id;
+	header->usptp = info->usptp;
+	header->pipe_id = block->pipeid;
+	header->location = block->location;
+	header->ctxt_id = 1;
+	header->size = block->size;
+
+	read_sel = GEN8_SP_READ_SEL_VAL(info->slice_id, block->location, block->pipeid,
+				block->statetype, info->usptp, info->sp_id);
+
+	kgsl_regwrite(device, GEN8_SP_READ_SEL, read_sel);
+
+	/*
+	 * An explicit barrier is needed so that reads do not happen before
+	 * the register write.
+	 */
+	mb();
+
+	for (i = 0; i < block->size; i++)
+		data[i] = kgsl_regmap_read(&device->regmap, GEN8_SP_AHB_READ_APERTURE + i);
+
+	return (sizeof(*header) + (block->size << 2));
+}
+
+static size_t gen8_snapshot_shader_memory(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_shader_v3 *header =
+		(struct kgsl_snapshot_shader_v3 *) buf;
+	struct gen8_shader_block_info *info = (struct gen8_shader_block_info *) priv;
+	struct gen8_shader_block *block = info->block;
+	u32 *data = (u32 *) (buf + sizeof(*header));
+
+	if (remain < (sizeof(*header) + (block->size << 2))) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	header->type = block->statetype;
+	header->slice_id = info->slice_id;
+	header->sp_index = info->sp_id;
+	header->usptp = info->usptp;
+	header->pipe_id = block->pipeid;
+	header->location = block->location;
+	header->ctxt_id = 1;
+	header->size = block->size;
+
+	memcpy(data, gen8_crashdump_registers->hostptr + info->offset,
+			(block->size << 2));
+
+	return (sizeof(*header) + (block->size << 2));
+}
+
+static void qdss_regwrite(void __iomem *regbase, u32 offsetbytes, u32 value)
+{
+	void __iomem *reg;
+
+	reg = regbase + offsetbytes;
+
+	 /* Ensure previous write is committed */
+	wmb();
+	__raw_writel(value, reg);
+}
+
+static u32 qdss_regread(void __iomem *regbase, u32 offsetbytes)
+{
+	void __iomem *reg;
+	u32 val;
+
+	reg = regbase + offsetbytes;
+	val = __raw_readl(reg);
+
+	/* Make sure memory is updated before next access */
+	rmb();
+	return val;
+}
+
+static size_t gen8_snapshot_trace_buffer_gfx_trace(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	u32 start_idx = 0, status = 0, count = 0, wrap_count = 0, write_ptr = 0;
+	struct kgsl_snapshot_trace_buffer *header =
+			(struct kgsl_snapshot_trace_buffer *) buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	struct gen8_trace_buffer_info *info =
+				(struct gen8_trace_buffer_info *) priv;
+
+	if (remain < SZ_2K + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "TRACE 2K BUFFER");
+		return 0;
+	}
+
+	memcpy(header->ping_blk, info->ping_blk, sizeof(header->ping_blk));
+	memcpy(header->ping_idx, info->ping_idx, sizeof(header->ping_idx));
+	header->granularity = info->granularity;
+	header->segment = info->segment;
+	header->dbgc_ctrl = info->dbgc_ctrl;
+
+	/* Read the status of trace buffer to determine if it's full or empty */
+	kgsl_regread(device, GEN8_DBGC_TRACE_BUFFER_STATUS, &status);
+
+	/*
+	 * wrap_count and write ptr are part of status.
+	 * if status is 0 => wrap_count = 0 and write ptr = 0 buffer is empty.
+	 * if status is non zero and wrap count is 0 read partial buffer.
+	 * if wrap count in non zero read entier 2k buffer.
+	 * Always read the oldest data available.
+	 */
+
+	/* if status is 0 then buffer is empty */
+	if (!status) {
+		header->size = 0;
+		return sizeof(*header);
+	}
+
+	/* Number of times the circular buffer has wrapped around */
+	wrap_count = FIELD_GET(GENMASK(31, 12), status);
+	write_ptr = FIELD_GET(GENMASK(8, 0), status);
+
+	/* Read partial buffer starting from 0 */
+	if (!wrap_count) {
+		/* No of dwords to read : (write ptr - 0) of indexed register */
+		count = write_ptr;
+		header->size = count << 2;
+		start_idx = 0;
+	} else {
+		/* Read entire 2k buffer starting from write ptr */
+		start_idx = write_ptr + 1;
+		count = SZ_512;
+		header->size = SZ_2K;
+	}
+
+	kgsl_regmap_read_indexed_interleaved(&device->regmap,
+		GEN8_DBGC_DBG_TRACE_BUFFER_RD_ADDR, GEN8_DBGC_DBG_TRACE_BUFFER_RD_DATA, data,
+			start_idx, count);
+
+	return (sizeof(*header) + header->size);
+}
+
+static size_t gen8_snapshot_trace_buffer_etb(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	u32 read_ptr, count, write_ptr, val, idx = 0;
+	struct kgsl_snapshot_trace_buffer *header = (struct kgsl_snapshot_trace_buffer *) buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	struct gen8_trace_buffer_info *info = (struct gen8_trace_buffer_info *) priv;
+
+	/* Unlock ETB buffer */
+	qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_LAR, 0xC5ACCE55);
+
+	/* Make sure unlock goes through before proceeding further */
+	mb();
+
+	/* Flush the QDSS pipeline to ensure completion of pending write to buffer */
+	val = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_FFCR);
+	qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_FFCR, val | 0x40);
+
+	/* Make sure pipeline is flushed before we get read and write pointers */
+	mb();
+
+	/* Disable ETB */
+	qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_CTRL, 0);
+
+	/* Set to circular mode */
+	qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_MODE, 0);
+
+	/* Ensure buffer is set to circular mode before accessing it */
+	mb();
+
+	/* Size of buffer is specified in register TMC_RSZ */
+	count = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_RSZ) << 2;
+	read_ptr = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_RRP);
+	write_ptr = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_RWP);
+
+	/* ETB buffer if full read_ptr will be equal to write_ptr else write_ptr leads read_ptr */
+	count = (read_ptr == write_ptr) ? count : (write_ptr - read_ptr);
+
+	if (remain < count + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "ETB BUFFER");
+		return 0;
+	}
+
+	/*
+	 * Read pointer is 4 byte aligned and write pointer is 2 byte aligned
+	 * We read 4 bytes of data in one iteration below so aligin it down
+	 * to 4 bytes.
+	 */
+	count = ALIGN_DOWN(count, 4);
+
+	header->size = count;
+	header->dbgc_ctrl = info->dbgc_ctrl;
+	memcpy(header->ping_blk, info->ping_blk, sizeof(header->ping_blk));
+	memcpy(header->ping_idx, info->ping_idx, sizeof(header->ping_idx));
+	header->granularity = info->granularity;
+	header->segment = info->segment;
+
+	while (count != 0) {
+		/* This indexed register auto increments index as we read */
+		data[idx++] = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_RRD);
+		count = count - 4;
+	}
+
+	return (sizeof(*header) + header->size);
+}
+
+static void gen8_snapshot_trace_buffer(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot)
+{
+	u32 val_tmc_ctrl = 0, val_etr_ctrl = 0, val_etr1_ctrl = 0;
+	u32 i = 0, sel_gx = 0, sel_cx = 0, val_gx = 0, val_cx = 0, val = 0;
+	struct gen8_trace_buffer_info info;
+	struct resource *res1, *res2;
+	struct clk *clk;
+	int ret;
+	void __iomem *etr_virt;
+
+	/*
+	 * Data can be collected from CX_DBGC or DBGC and it's mutually exclusive.
+	 * Read the necessary select registers and determine the source of data.
+	 * This loop reads SEL_A to SEL_D of both CX_DBGC and DBGC and accordingly
+	 * updates the header information of trace buffer section.
+	 */
+	for (i = 0; i < TRACE_BUF_NUM_SIG; i++) {
+		kgsl_regread(device, GEN8_DBGC_CFG_DBGBUS_SEL_A + i, &sel_gx);
+		kgsl_regread(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A + i, &sel_cx);
+		val_gx |= sel_gx;
+		val_cx |= sel_cx;
+		info.ping_idx[i] = FIELD_GET(GENMASK(7, 0), (sel_gx | sel_cx));
+		info.ping_blk[i] = FIELD_GET(GENMASK(24, 16), (sel_gx | sel_cx));
+	}
+
+	/* Zero the header if not programmed to export any buffer */
+	if (!val_gx && !val_cx) {
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_TRACE_BUFFER,
+			snapshot, NULL, &info);
+		return;
+	}
+
+	/* Enable APB clock to read data from trace buffer */
+	clk = clk_get(&device->pdev->dev, "apb_pclk");
+
+	if (IS_ERR(clk)) {
+		dev_err(device->dev, "Unable to get QDSS clock\n");
+		return;
+	}
+
+	ret = clk_prepare_enable(clk);
+
+	if (ret) {
+		dev_err(device->dev, "QDSS Clock enable error: %d\n", ret);
+		clk_put(clk);
+		return;
+	}
+
+	res1 = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "qdss_etr");
+	res2 = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "qdss_tmc");
+
+	if (!res1 || !res2)
+		goto err_clk_put;
+
+	etr_virt = ioremap(res1->start, resource_size(res1));
+	tmc_virt = ioremap(res2->start, resource_size(res2));
+
+	if (!etr_virt || !tmc_virt)
+		goto err_unmap;
+
+	/*
+	 * Update header information based on source of data, read necessary CNTLT registers
+	 * for granularity and segment information.
+	 */
+	if (val_gx) {
+		info.dbgc_ctrl = GX_DBGC;
+		kgsl_regread(device, GEN8_DBGC_CFG_DBGBUS_CNTLT, &val);
+	} else {
+		info.dbgc_ctrl = CX_DBGC;
+		kgsl_regread(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT, &val);
+	}
+
+	info.granularity = FIELD_GET(GENMASK(14, 12), val);
+	info.segment = FIELD_GET(GENMASK(31, 28), val);
+
+	val_tmc_ctrl = qdss_regread(tmc_virt, QDSS_AOSS_APB_TMC_CTRL);
+
+	/*
+	 * Incase TMC CTRL is 0 and val_cx is non zero dump empty buffer.
+	 * Incase TMC CTRL is 0 and val_gx is non zero dump 2k gfx buffer.
+	 * 2k buffer is not present for CX blocks.
+	 * Incase both ETR's CTRL is 0 Dump ETB QDSS buffer and disable QDSS.
+	 * Incase either ETR's CTRL is 1 Disable QDSS dumping ETB buffer to DDR.
+	 */
+	if (!val_tmc_ctrl) {
+		if (val_gx)
+			kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_TRACE_BUFFER,
+				snapshot, gen8_snapshot_trace_buffer_gfx_trace, &info);
+		else
+			kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_TRACE_BUFFER,
+					snapshot, NULL, &info);
+	} else {
+		val_etr_ctrl = qdss_regread(etr_virt, QDSS_AOSS_APB_ETR_CTRL);
+		val_etr1_ctrl = qdss_regread(etr_virt, QDSS_AOSS_APB_ETR1_CTRL);
+		if (!val_etr_ctrl && !val_etr1_ctrl)
+			kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_TRACE_BUFFER,
+				snapshot, gen8_snapshot_trace_buffer_etb, &info);
+		qdss_regwrite(tmc_virt, QDSS_AOSS_APB_TMC_CTRL, 0);
+	}
+
+err_unmap:
+	iounmap(tmc_virt);
+	iounmap(etr_virt);
+
+err_clk_put:
+	clk_disable_unprepare(clk);
+	clk_put(clk);
+}
+
+static void gen8_snapshot_shader(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot)
+{
+	struct gen8_shader_block_info info = {0};
+	u64 *ptr;
+	u32 offset = 0;
+	struct gen8_shader_block *shader_blocks = gen8_snapshot_block_list->shader_blocks;
+	size_t num_shader_blocks = gen8_snapshot_block_list->num_shader_blocks;
+	u32 i, sp, usptp, slice;
+	size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain,
+		void *priv) = gen8_legacy_snapshot_shader;
+
+	if (CD_SCRIPT_CHECK(device)) {
+		for (i = 0; i < num_shader_blocks; i++) {
+			struct gen8_shader_block *block = &shader_blocks[i];
+
+			for (slice = 0; slice < block->num_slices; slice++) {
+				for (sp = 0; sp < block->num_sps; sp++) {
+					for (usptp = 0; usptp < block->num_usptps; usptp++) {
+						info.block = block;
+						info.sp_id = sp;
+						info.usptp = usptp;
+						info.slice_id = slice;
+						info.offset = offset;
+						offset += block->size << 2;
+
+						/* Shader working/shadow memory */
+						kgsl_snapshot_add_section(device,
+							KGSL_SNAPSHOT_SECTION_SHADER_V3,
+							snapshot, func, &info);
+					}
+				}
+			}
+		}
+
+		return;
+	}
+
+	for (i = 0; i < num_shader_blocks; i++) {
+		struct gen8_shader_block *block = &shader_blocks[i];
+
+		/* Build the crash script */
+		ptr = gen8_capturescript->hostptr;
+		offset = 0;
+
+		for (slice = 0; slice < block->num_slices; slice++) {
+			for (sp = 0; sp < block->num_sps; sp++) {
+				for (usptp = 0; usptp < block->num_usptps; usptp++) {
+					/* Program the aperture */
+					ptr += CD_WRITE(ptr, GEN8_SP_READ_SEL,
+						GEN8_SP_READ_SEL_VAL(slice, block->location,
+						block->pipeid, block->statetype, usptp, sp));
+
+					/* Read all the data in one chunk */
+					ptr += CD_READ(ptr, GEN8_SP_AHB_READ_APERTURE, block->size,
+						gen8_crashdump_registers->gpuaddr + offset);
+					offset += block->size << 2;
+				}
+			}
+		}
+		/* Marker for end of script */
+		CD_FINISH(ptr, offset);
+
+		/* Try to run the crash dumper */
+		func = gen8_legacy_snapshot_shader;
+		if (_gen8_do_crashdump(device))
+			func = gen8_snapshot_shader_memory;
+
+		offset = 0;
+		for (slice = 0; slice < block->num_slices; slice++) {
+			for (sp = 0; sp < block->num_sps; sp++) {
+				for (usptp = 0; usptp < block->num_usptps; usptp++) {
+					info.block = block;
+					info.sp_id = sp;
+					info.usptp = usptp;
+					info.slice_id = slice;
+					info.offset = offset;
+					offset += block->size << 2;
+
+					/* Shader working/shadow memory */
+					kgsl_snapshot_add_section(device,
+					KGSL_SNAPSHOT_SECTION_SHADER_V3, snapshot, func, &info);
+				}
+			}
+		}
+	}
+}
+
+static void gen8_rmw_aperture(struct kgsl_device *device,
+	u32 offsetwords, u32 mask, u32 val, u32 pipe, u32 slice_id, u32 use_slice_id)
+{
+	gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id);
+
+	kgsl_regmap_rmw(&device->regmap, offsetwords, mask, val);
+}
+
+static void gen8_snapshot_mempool(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot)
+{
+	struct gen8_cp_indexed_reg *cp_indexed_reg;
+	size_t mempool_index_registers_len  = gen8_snapshot_block_list->mempool_index_registers_len;
+	u32 i, j, slice;
+
+	for (i = 0; i < mempool_index_registers_len; i++) {
+		cp_indexed_reg = &gen8_snapshot_block_list->mempool_index_registers[i];
+		slice = NUMBER_OF_SLICES(cp_indexed_reg->slice_region);
+
+		for (j = 0; j < slice; j++) {
+
+			/* set CP_CHICKEN_DBG[StabilizeMVC] to stabilize it while dumping */
+			gen8_rmw_aperture(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x4,
+				cp_indexed_reg->pipe_id, 0, 0);
+
+			gen8_rmw_aperture(device, GEN8_CP_SLICE_CHICKEN_DBG_PIPE, 0x4, 0x4,
+				cp_indexed_reg->pipe_id, j, 1);
+
+			kgsl_snapshot_indexed_registers_v2(device, snapshot,
+				cp_indexed_reg->addr, cp_indexed_reg->data,
+				0, cp_indexed_reg->size, cp_indexed_reg->pipe_id,
+				SLICE_ID(cp_indexed_reg->slice_region, j));
+
+			/* Reset CP_CHICKEN_DBG[StabilizeMVC] once we are done */
+			gen8_rmw_aperture(device, GEN8_CP_CHICKEN_DBG_PIPE, 0x4, 0x0,
+				cp_indexed_reg->pipe_id, 0, 0);
+
+			gen8_rmw_aperture(device, GEN8_CP_SLICE_CHICKEN_DBG_PIPE, 0x4, 0x0,
+				cp_indexed_reg->pipe_id, j, 1);
+		}
+	}
+
+	/* Clear aperture register */
+	gen8_host_aperture_set(ADRENO_DEVICE(device), 0, 0, 0);
+}
+
+static u32 gen8_read_dbgahb(struct kgsl_device *device,
+				u32 regbase, u32 reg)
+{
+	u32 val;
+
+	kgsl_regread(device, (GEN8_SP_AHB_READ_APERTURE + reg - regbase), &val);
+	return val;
+}
+
+static size_t gen8_legacy_snapshot_cluster_dbgahb(struct kgsl_device *device,
+				u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mvc_regs_v3 *header =
+				(struct kgsl_snapshot_mvc_regs_v3 *)buf;
+	struct gen8_sptp_cluster_registers_info *info =
+			(struct gen8_sptp_cluster_registers_info *)priv;
+	const u32 *ptr = info->cluster->regs;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	u32 read_sel, j;
+	u32 size = adreno_snapshot_regs_count(ptr) * sizeof(*data);
+
+	if (remain < (sizeof(*header) + size)) {
+		SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS");
+		return 0;
+	}
+
+	header->ctxt_id = info->context_id;
+	header->cluster_id = info->cluster_id;
+	header->pipe_id = info->pipe_id;
+	header->location_id = info->location_id;
+	header->sp_id = info->sp_id;
+	header->usptp_id = info->usptp_id;
+	header->slice_id = info->slice_id;
+
+	read_sel = GEN8_SP_READ_SEL_VAL(info->slice_id, info->location_id,
+			info->pipe_id, info->statetype_id, info->usptp_id, info->sp_id);
+
+	kgsl_regwrite(device, GEN8_SP_READ_SEL, read_sel);
+
+	/*
+	 * An explicit barrier is needed so that reads do not happen before
+	 * the register write.
+	 */
+	mb();
+
+	for (; ptr[0] != UINT_MAX; ptr += 2) {
+		u32 count = REG_COUNT(ptr);
+
+		if (count == 1)
+			*data++ = ptr[0];
+		else {
+			*data++ = ptr[0] | (1 << 31);
+			*data++ = ptr[1];
+		}
+		for (j = ptr[0]; j <= ptr[1]; j++)
+			*data++ = gen8_read_dbgahb(device, info->cluster->regbase, j);
+	}
+
+	return (size + sizeof(*header));
+}
+
+static size_t gen8_snapshot_cluster_dbgahb(struct kgsl_device *device, u8 *buf,
+				size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mvc_regs_v3 *header =
+				(struct kgsl_snapshot_mvc_regs_v3 *)buf;
+	struct gen8_sptp_cluster_registers_info *info =
+				(struct gen8_sptp_cluster_registers_info *)priv;
+	const u32 *ptr = info->cluster->regs;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	u32 *src;
+	u32 size = adreno_snapshot_regs_count(ptr) * sizeof(*data);
+
+	if (remain < (sizeof(*header) + size)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	header->ctxt_id = info->context_id;
+	header->cluster_id = info->cluster_id;
+	header->pipe_id = info->pipe_id;
+	header->location_id = info->location_id;
+	header->sp_id = info->sp_id;
+	header->usptp_id = info->usptp_id;
+	header->slice_id = info->slice_id;
+
+	src = gen8_crashdump_registers->hostptr + info->offset;
+
+	for (ptr = info->cluster->regs; ptr[0] != UINT_MAX; ptr += 2) {
+		u32 cnt = REG_COUNT(ptr);
+
+		if (cnt == 1)
+			*data++ = ptr[0];
+		else {
+			*data++ = ptr[0] | (1 << 31);
+			*data++ = ptr[1];
+		}
+		memcpy(data, src, cnt << 2);
+		data += cnt;
+		src += cnt;
+	}
+
+	return (size + sizeof(*header));
+}
+
+static void gen8_snapshot_dbgahb_regs(struct kgsl_device *device,
+			struct kgsl_snapshot *snapshot)
+{
+	u32 i, j, sp, usptp, count, slice;
+	u64 *ptr, offset = 0;
+	struct gen8_sptp_cluster_registers_info info = {0};
+	struct gen8_sptp_cluster_registers *sptp_clusters = gen8_snapshot_block_list->sptp_clusters;
+	size_t num_sptp_clusters = gen8_snapshot_block_list->num_sptp_clusters;
+	size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain,
+		void *priv) = gen8_legacy_snapshot_cluster_dbgahb;
+
+	if (CD_SCRIPT_CHECK(device)) {
+		for (i = 0; i < num_sptp_clusters; i++) {
+			struct gen8_sptp_cluster_registers *cluster = &sptp_clusters[i];
+
+			slice = NUMBER_OF_SLICES(cluster->slice_region);
+			for (sp = 0; sp < cluster->num_sps; sp++) {
+				for (usptp = 0; usptp < cluster->num_usptps; usptp++) {
+					for (j = 0; j < slice; j++) {
+						info.cluster = cluster;
+						info.location_id = cluster->location_id;
+						info.pipe_id = cluster->pipe_id;
+						info.usptp_id = usptp;
+						info.sp_id = sp;
+						info.slice_id = SLICE_ID(cluster->slice_region, j);
+						info.statetype_id = cluster->statetype;
+						info.cluster_id = cluster->cluster_id;
+						info.context_id = cluster->context_id;
+						kgsl_snapshot_add_section(device,
+							KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot,
+							func, &info);
+					}
+				}
+			}
+		}
+		return;
+	}
+
+	for (i = 0; i < num_sptp_clusters; i++) {
+		struct gen8_sptp_cluster_registers *cluster = &sptp_clusters[i];
+
+		slice = NUMBER_OF_SLICES(cluster->slice_region);
+
+		cluster->offset = offset;
+
+		for (sp = 0; sp < cluster->num_sps; sp++) {
+			for (usptp = 0; usptp < cluster->num_usptps; usptp++) {
+				for (j = 0; j < slice; j++) {
+					const u32 *regs = cluster->regs;
+
+					info.cluster = cluster;
+					info.location_id = cluster->location_id;
+					info.pipe_id = cluster->pipe_id;
+					info.usptp_id = usptp;
+					info.sp_id = sp;
+					info.slice_id = SLICE_ID(cluster->slice_region, j);
+					info.statetype_id = cluster->statetype;
+					info.cluster_id = cluster->cluster_id;
+					info.context_id = cluster->context_id;
+					info.offset = offset;
+
+					/* Build the crash script */
+					ptr = gen8_capturescript->hostptr;
+
+					/* Program the aperture */
+					ptr += CD_WRITE(ptr, GEN8_SP_READ_SEL, GEN8_SP_READ_SEL_VAL
+						(j, cluster->location_id, cluster->pipe_id,
+						cluster->statetype, usptp, sp));
+
+					for (; regs[0] != UINT_MAX; regs += 2) {
+						count = REG_COUNT(regs);
+						ptr += CD_READ(ptr, (GEN8_SP_AHB_READ_APERTURE +
+							regs[0] - cluster->regbase), count,
+							(gen8_crashdump_registers->gpuaddr +
+								offset));
+
+						offset += count * sizeof(u32);
+					}
+					/* Marker for end of script */
+					CD_FINISH(ptr, offset);
+
+					func = gen8_legacy_snapshot_cluster_dbgahb;
+					/* Try to run the crash dumper */
+					if (_gen8_do_crashdump(device))
+						func = gen8_snapshot_cluster_dbgahb;
+
+					kgsl_snapshot_add_section(device,
+						KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot,
+						func, &info);
+				}
+			}
+		}
+	}
+}
+
+static size_t gen8_legacy_snapshot_mvc(struct kgsl_device *device, u8 *buf,
+				size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mvc_regs_v3 *header =
+					(struct kgsl_snapshot_mvc_regs_v3 *)buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	struct gen8_cluster_registers_info *info =
+			(struct gen8_cluster_registers_info *)priv;
+	const u32 *ptr = info->cluster->regs;
+	u32 size = adreno_snapshot_regs_count(ptr) * sizeof(*data);
+	u32 j;
+
+	if (remain < (sizeof(*header) + size)) {
+		SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS");
+		return 0;
+	}
+
+	header->ctxt_id = (info->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
+	header->cluster_id = info->cluster_id;
+	header->pipe_id = info->pipe_id;
+	header->location_id = UINT_MAX;
+	header->sp_id = UINT_MAX;
+	header->usptp_id = UINT_MAX;
+	header->slice_id = info->slice_id;
+
+	/*
+	 * Set the AHB control for the Host to read from the
+	 * cluster/context for this iteration.
+	 */
+	kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL
+			(info->slice_id, info->pipe_id, info->cluster_id, info->context_id));
+
+	if (info->cluster->sel)
+		kgsl_regwrite(device, info->cluster->sel->host_reg, info->cluster->sel->val);
+
+	/* Make sure the previous writes are posted before reading */
+	mb();
+
+	for (; ptr[0] != UINT_MAX; ptr += 2) {
+		u32 count = REG_COUNT(ptr);
+
+		if (count == 1)
+			*data++ = ptr[0];
+		else {
+			*data++ = ptr[0] | (1 << 31);
+			*data++ = ptr[1];
+		}
+		for (j = ptr[0]; j <= ptr[1]; j++)
+			kgsl_regread(device, j, data++);
+	}
+
+	return (size + sizeof(*header));
+}
+
+static size_t gen8_snapshot_mvc(struct kgsl_device *device, u8 *buf,
+				size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mvc_regs_v3 *header =
+				(struct kgsl_snapshot_mvc_regs_v3 *)buf;
+	struct gen8_cluster_registers_info *info =
+			(struct gen8_cluster_registers_info *)priv;
+	const u32 *ptr = info->cluster->regs;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	u32 *src;
+	u32 cnt;
+	u32 size = adreno_snapshot_regs_count(ptr) * sizeof(*data);
+
+	if (remain < (sizeof(*header) + size)) {
+		SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS");
+		return 0;
+	}
+
+	header->ctxt_id = (info->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
+	header->cluster_id = info->cluster_id;
+	header->pipe_id = info->pipe_id;
+	header->location_id = UINT_MAX;
+	header->sp_id = UINT_MAX;
+	header->usptp_id = UINT_MAX;
+	header->slice_id = info->slice_id;
+
+	src = gen8_crashdump_registers->hostptr + info->offset;
+
+	for (; ptr[0] != UINT_MAX; ptr += 2) {
+		cnt = REG_COUNT(ptr);
+
+		if (cnt == 1)
+			*data++ = ptr[0];
+		else {
+			*data++ = ptr[0] | (1 << 31);
+			*data++ = ptr[1];
+		}
+		memcpy(data, src, cnt << 2);
+		src += cnt;
+		data += cnt;
+	}
+
+	return (size + sizeof(*header));
+}
+
+static void gen8_snapshot_mvc_regs(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot,
+				struct gen8_cluster_registers *clusters,
+				size_t num_cluster)
+{
+	u32 i, j;
+	u64 *ptr, offset = 0;
+	u32 count, slice;
+	struct gen8_cluster_registers_info info = {0};
+	size_t (*func)(struct kgsl_device *device, u8 *buf,
+				size_t remain, void *priv) = gen8_legacy_snapshot_mvc;
+
+	if (CD_SCRIPT_CHECK(device)) {
+		for (i = 0; i < num_cluster; i++) {
+			struct gen8_cluster_registers *cluster = &clusters[i];
+
+			slice = NUMBER_OF_SLICES(cluster->slice_region);
+			for (j = 0; j < slice; j++) {
+				info.cluster = cluster;
+				info.pipe_id = cluster->pipe_id;
+				info.cluster_id = cluster->cluster_id;
+				info.context_id = cluster->context_id;
+				info.slice_id = SLICE_ID(cluster->slice_region, j);
+				kgsl_snapshot_add_section(device,
+					KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, func, &info);
+			}
+		}
+		return;
+	}
+
+	for (i = 0; i < num_cluster; i++) {
+		struct gen8_cluster_registers *cluster = &clusters[i];
+
+		slice = NUMBER_OF_SLICES(cluster->slice_region);
+		cluster->offset = offset;
+
+		for (j = 0; j < slice; j++) {
+			const u32 *regs = cluster->regs;
+
+			info.cluster = cluster;
+			info.pipe_id = cluster->pipe_id;
+			info.cluster_id = cluster->cluster_id;
+			info.context_id = cluster->context_id;
+			info.slice_id = SLICE_ID(cluster->slice_region, j);
+			info.offset = offset;
+
+			/* Build the crash script */
+			ptr = gen8_capturescript->hostptr;
+
+			ptr += CD_WRITE(ptr, GEN8_CP_APERTURE_CNTL_CD, GEN8_CP_APERTURE_REG_VAL
+				(j, cluster->pipe_id, cluster->cluster_id, cluster->context_id));
+
+			if (cluster->sel)
+				ptr += CD_WRITE(ptr, cluster->sel->cd_reg, cluster->sel->val);
+
+			for (; regs[0] != UINT_MAX; regs += 2) {
+				count = REG_COUNT(regs);
+
+				ptr += CD_READ(ptr, regs[0],
+					count, (gen8_crashdump_registers->gpuaddr + offset));
+
+				offset += count * sizeof(u32);
+			}
+
+			/* Marker for end of script */
+			CD_FINISH(ptr, offset);
+
+			func = gen8_legacy_snapshot_mvc;
+			/* Try to run the crash dumper */
+			if (_gen8_do_crashdump(device))
+				func = gen8_snapshot_mvc;
+
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot, func, &info);
+		}
+	}
+}
+
+/* gen8_dbgc_debug_bus_read() - Read data from trace bus */
+static void gen8_dbgc_debug_bus_read(struct kgsl_device *device,
+	u32 block_id, u32 index, u32 *val)
+{
+	u32 reg;
+
+	reg = FIELD_PREP(GENMASK(7, 0), index) |
+		FIELD_PREP(GENMASK(24, 16), block_id);
+
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_A, reg);
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_B, reg);
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_C, reg);
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_D, reg);
+
+	/*
+	 * There needs to be a delay of 1 us to ensure enough time for correct
+	 * data is funneled into the trace buffer
+	 */
+	udelay(1);
+
+	kgsl_regread(device, GEN8_DBGC_CFG_DBGBUS_TRACE_BUF2, val);
+	val++;
+	kgsl_regread(device, GEN8_DBGC_CFG_DBGBUS_TRACE_BUF1, val);
+}
+
+/* gen8_snapshot_dbgc_debugbus_block() - Capture debug data for a gpu block */
+static size_t gen8_snapshot_dbgc_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	const u32 *block = priv;
+	u32 i;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+
+	if (remain < GEN8_DEBUGBUS_SECTION_SIZE) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = *block;
+	header->count = GEN8_DEBUGBUS_BLOCK_SIZE * 2;
+
+	for (i = 0; i < GEN8_DEBUGBUS_BLOCK_SIZE; i++)
+		gen8_dbgc_debug_bus_read(device, *block, i, &data[i*2]);
+
+	return GEN8_DEBUGBUS_SECTION_SIZE;
+}
+
+static void gen8_dbgc_side_debug_bus_read(struct kgsl_device *device,
+	u32 block_id, u32 index, u32 *val)
+{
+	u32 reg = FIELD_PREP(GENMASK(7, 0), index) |
+			FIELD_PREP(GENMASK(24, 16), block_id);
+
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_A, reg);
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_B, reg);
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_C, reg);
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_SEL_D, reg);
+
+	/*
+	 * There needs to be a delay of 1 us to ensure enough time for correct
+	 * data is funneled into the trace buffer
+	 */
+	udelay(1);
+
+	reg = kgsl_regmap_read(&device->regmap, GEN8_DBGC_CFG_DBGBUS_OVER);
+
+	*val = FIELD_GET(GENMASK(27, 24), reg);
+}
+
+static size_t gen8_snapshot_dbgc_side_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_side_debugbus *header =
+		(struct kgsl_snapshot_side_debugbus *)buf;
+	const u32 *block = priv;
+	int i;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	size_t size = (GEN8_DEBUGBUS_BLOCK_SIZE * sizeof(u32)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = *block;
+	header->size = GEN8_DEBUGBUS_BLOCK_SIZE;
+	header->valid_data = 0x4;
+
+	for (i = 0; i < GEN8_DEBUGBUS_BLOCK_SIZE; i++)
+		gen8_dbgc_side_debug_bus_read(device, *block, i, &data[i]);
+
+	return size;
+}
+
+/* gen8_cx_dbgc_debug_bus_read() - Read data from trace bus */
+static void gen8_cx_debug_bus_read(struct kgsl_device *device,
+	u32 block_id, u32 index, u32 *val)
+{
+	u32 reg = FIELD_PREP(GENMASK(7, 0), index) |
+		FIELD_PREP(GENMASK(24, 16), block_id);
+
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
+
+	/*
+	 * There needs to be a delay of 1 us to ensure enough time for correct
+	 * data is funneled into the trace buffer
+	 */
+	udelay(1);
+
+	kgsl_regread(device, GEN8_CX_DBGC_CFG_DBGBUS_TRACE_BUF2, val);
+	val++;
+	kgsl_regread(device, GEN8_CX_DBGC_CFG_DBGBUS_TRACE_BUF1, val);
+}
+
+/*
+ * gen8_snapshot_cx_dbgc_debugbus_block() - Capture debug data for a gpu
+ * block from the CX DBGC block
+ */
+static size_t gen8_snapshot_cx_dbgc_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	const u32 *block = priv;
+	int i;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+
+	if (remain < GEN8_DEBUGBUS_SECTION_SIZE) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = *block;
+	header->count = GEN8_DEBUGBUS_BLOCK_SIZE * 2;
+
+	for (i = 0; i < GEN8_DEBUGBUS_BLOCK_SIZE; i++)
+		gen8_cx_debug_bus_read(device, *block, i, &data[i*2]);
+
+	return GEN8_DEBUGBUS_SECTION_SIZE;
+}
+
+/* gen8_cx_side_dbgc_debug_bus_read() - Read data from trace bus */
+static void gen8_cx_side_debug_bus_read(struct kgsl_device *device,
+	u32 block_id, u32 index, u32 *val)
+{
+	u32 reg = FIELD_PREP(GENMASK(7, 0), index) |
+			FIELD_PREP(GENMASK(24, 16), block_id);
+
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
+
+	/*
+	 * There needs to be a delay of 1 us to ensure enough time for correct
+	 * data is funneled into the trace buffer
+	 */
+	udelay(1);
+
+	kgsl_regread(device, GEN8_CX_DBGC_CFG_DBGBUS_OVER, &reg);
+	*val = FIELD_GET(GENMASK(27, 24), reg);
+}
+
+/*
+ * gen8_snapshot_cx_dbgc_debugbus_block() - Capture debug data for a gpu
+ * block from the CX DBGC block
+ */
+static size_t gen8_snapshot_cx_side_dbgc_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_side_debugbus *header =
+		(struct kgsl_snapshot_side_debugbus *)buf;
+	const u32 *block = priv;
+	int i;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	size_t size = (GEN8_DEBUGBUS_BLOCK_SIZE * sizeof(u32)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = *block;
+	header->size = GEN8_DEBUGBUS_BLOCK_SIZE;
+	header->valid_data = 0x4;
+
+	for (i = 0; i < GEN8_DEBUGBUS_BLOCK_SIZE; i++)
+		gen8_cx_side_debug_bus_read(device, *block, i, &data[i]);
+
+	return size;
+}
+
+static void gen8_snapshot_cx_debugbus(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	u32 i;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLT,
+			FIELD_PREP(GENMASK(31, 28), 0xf));
+
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_CNTLM,
+			FIELD_PREP(GENMASK(27, 24), 0xf));
+
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
+
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_0,
+			FIELD_PREP(GENMASK(3, 0), 0x0) |
+			FIELD_PREP(GENMASK(7, 4), 0x1) |
+			FIELD_PREP(GENMASK(11, 8), 0x2) |
+			FIELD_PREP(GENMASK(15, 12), 0x3) |
+			FIELD_PREP(GENMASK(19, 16), 0x4) |
+			FIELD_PREP(GENMASK(23, 20), 0x5) |
+			FIELD_PREP(GENMASK(27, 24), 0x6) |
+			FIELD_PREP(GENMASK(31, 28), 0x7));
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_BYTEL_1,
+			FIELD_PREP(GENMASK(3, 0), 0x8) |
+			FIELD_PREP(GENMASK(7, 4), 0x9) |
+			FIELD_PREP(GENMASK(11, 8), 0xa) |
+			FIELD_PREP(GENMASK(15, 12), 0xb) |
+			FIELD_PREP(GENMASK(19, 16), 0xc) |
+			FIELD_PREP(GENMASK(23, 20), 0xd) |
+			FIELD_PREP(GENMASK(27, 24), 0xe) |
+			FIELD_PREP(GENMASK(31, 28), 0xf));
+
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
+	kgsl_regwrite(device, GEN8_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
+
+	/* Dump the CX debugbus data if the block exists */
+	if (!kgsl_regmap_valid_offset(&device->regmap, GEN8_CX_DBGC_CFG_DBGBUS_SEL_A))
+		return;
+
+	for (i = 0; i < gen8_snapshot_block_list->cx_debugbus_blocks_len; i++) {
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+			snapshot, gen8_snapshot_cx_dbgc_debugbus_block,
+			(void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]);
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS,
+			snapshot, gen8_snapshot_cx_side_dbgc_debugbus_block,
+			(void *) &gen8_snapshot_block_list->cx_debugbus_blocks[i]);
+	}
+}
+
+/* gen8_snapshot_debugbus() - Capture debug bus data */
+static void gen8_snapshot_debugbus(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	u32 i;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_CNTLT,
+			FIELD_PREP(GENMASK(31, 28), 0xf));
+
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_CNTLM,
+			FIELD_PREP(GENMASK(27, 24), 0xf));
+
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_IVTL_0, 0);
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_IVTL_1, 0);
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_IVTL_2, 0);
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_IVTL_3, 0);
+
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_BYTEL_0,
+			FIELD_PREP(GENMASK(3, 0), 0x0) |
+			FIELD_PREP(GENMASK(7, 4), 0x1) |
+			FIELD_PREP(GENMASK(11, 8), 0x2) |
+			FIELD_PREP(GENMASK(15, 12), 0x3) |
+			FIELD_PREP(GENMASK(19, 16), 0x4) |
+			FIELD_PREP(GENMASK(23, 20), 0x5) |
+			FIELD_PREP(GENMASK(27, 24), 0x6) |
+			FIELD_PREP(GENMASK(31, 28), 0x7));
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_BYTEL_1,
+			FIELD_PREP(GENMASK(3, 0), 0x8) |
+			FIELD_PREP(GENMASK(7, 4), 0x9) |
+			FIELD_PREP(GENMASK(11, 8), 0xa) |
+			FIELD_PREP(GENMASK(15, 12), 0xb) |
+			FIELD_PREP(GENMASK(19, 16), 0xc) |
+			FIELD_PREP(GENMASK(23, 20), 0xd) |
+			FIELD_PREP(GENMASK(27, 24), 0xe) |
+			FIELD_PREP(GENMASK(31, 28), 0xf));
+
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_0, 0);
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_1, 0);
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_2, 0);
+	kgsl_regwrite(device, GEN8_DBGC_CFG_DBGBUS_MASKL_3, 0);
+
+	for (i = 0; i < gen8_snapshot_block_list->debugbus_blocks_len; i++) {
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+			snapshot, gen8_snapshot_dbgc_debugbus_block,
+			(void *) &gen8_snapshot_block_list->debugbus_blocks[i]);
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS,
+			snapshot, gen8_snapshot_dbgc_side_debugbus_block,
+			(void *) &gen8_snapshot_block_list->debugbus_blocks[i]);
+	}
+
+	for (i = 0; i < gen8_snapshot_block_list->gbif_debugbus_blocks_len; i++) {
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+			snapshot, gen8_snapshot_dbgc_debugbus_block,
+			(void *) &gen8_snapshot_block_list->gbif_debugbus_blocks[i]);
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS,
+			snapshot, gen8_snapshot_dbgc_side_debugbus_block,
+			(void *) &gen8_snapshot_block_list->gbif_debugbus_blocks[i]);
+	}
+}
+
+/* gen8_snapshot_sqe() - Dump SQE data in snapshot */
+static size_t gen8_snapshot_sqe(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE);
+
+	if (remain < DEBUG_SECTION_SZ(GEN8_SQE_FW_SNAPSHOT_DWORDS)) {
+		SNAPSHOT_ERR_NOMEM(device, "SQE VERSION DEBUG");
+		return 0;
+	}
+
+	/* Dump the SQE firmware version */
+	header->type = SNAPSHOT_DEBUG_SQE_VERSION;
+	header->size = GEN8_SQE_FW_SNAPSHOT_DWORDS;
+	memcpy(data, fw->memdesc->hostptr, (GEN8_SQE_FW_SNAPSHOT_DWORDS * sizeof(u32)));
+
+	return DEBUG_SECTION_SZ(GEN8_SQE_FW_SNAPSHOT_DWORDS);
+}
+
+/* gen8_snapshot_aqe() - Dump AQE data in snapshot */
+static size_t gen8_snapshot_aqe(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	u32 *data = (u32 *)(buf + sizeof(*header));
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_AQE);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_AQE))
+		return 0;
+
+	if (remain < DEBUG_SECTION_SZ(1)) {
+		SNAPSHOT_ERR_NOMEM(device, "AQE VERSION DEBUG");
+		return 0;
+	}
+
+	/* Dump the AQE firmware version */
+	header->type = SNAPSHOT_DEBUG_AQE_VERSION;
+	header->size = 1;
+	*data = fw->version;
+
+	return DEBUG_SECTION_SZ(1);
+}
+
+/* Snapshot the preemption related buffers */
+static size_t snapshot_preemption_record(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_memdesc *memdesc = priv;
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)buf;
+	u8 *ptr = buf + sizeof(*header);
+	u64 ctxt_record_size = max_t(u64, GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES,
+					device->snapshot_ctxt_record_size);
+
+	if (remain < (ctxt_record_size + sizeof(*header))) {
+		SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD");
+		return 0;
+	}
+
+	header->size = ctxt_record_size >> 2;
+	header->gpuaddr = memdesc->gpuaddr;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable);
+	header->type = SNAPSHOT_GPU_OBJECT_GLOBAL;
+
+	memcpy(ptr, memdesc->hostptr, ctxt_record_size);
+
+	return ctxt_record_size + sizeof(*header);
+}
+
+static void gen8_reglist_snapshot(struct kgsl_device *device,
+					struct kgsl_snapshot *snapshot)
+{
+	u64 *ptr, offset = 0;
+	u32 i, j, r, slices;
+	struct gen8_reg_list *reg_list = gen8_snapshot_block_list->reg_list;
+	size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain,
+		void *priv) = gen8_legacy_snapshot_registers;
+	struct gen8_reg_list_info info = {0};
+
+	if (CD_SCRIPT_CHECK(device)) {
+		for (i = 0; reg_list[i].regs; i++) {
+			struct gen8_reg_list *regs = &reg_list[i];
+
+			slices = NUMBER_OF_SLICES(regs->slice_region);
+			for (j = 0; j < slices; j++) {
+				info.regs = regs;
+				info.slice_id = SLICE_ID(regs->slice_region, j);
+				kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3,
+					snapshot, func, &info);
+			}
+		}
+		return;
+	}
+
+	for (i = 0; reg_list[i].regs; i++) {
+		struct gen8_reg_list *regs = &reg_list[i];
+
+		slices = NUMBER_OF_SLICES(regs->slice_region);
+		regs->offset = offset;
+
+		for (j = 0; j < slices; j++) {
+			const u32 *regs_ptr = regs->regs;
+
+			/* Build the crash script */
+			ptr = gen8_capturescript->hostptr;
+
+			ptr += CD_WRITE(ptr, GEN8_CP_APERTURE_CNTL_CD, GEN8_CP_APERTURE_REG_VAL
+					(j, 0, 0, 0));
+			/* Program the SEL_CNTL_CD register appropriately */
+			if (regs->sel)
+				ptr += CD_WRITE(ptr, regs->sel->cd_reg, regs->sel->val);
+			info.regs = regs;
+			info.slice_id = SLICE_ID(regs->slice_region, j);
+			info.offset = offset;
+
+			for (; regs_ptr[0] != UINT_MAX; regs_ptr += 2) {
+				r = REG_COUNT(regs_ptr);
+				ptr += CD_READ(ptr, regs_ptr[0], r,
+					(gen8_crashdump_registers->gpuaddr + offset));
+				offset += r * sizeof(u32);
+			}
+
+			/* Marker for end of script */
+			CD_FINISH(ptr, offset);
+
+			func = gen8_legacy_snapshot_registers;
+			/* Try to run the crash dumper */
+			if (_gen8_do_crashdump(device))
+				func = gen8_snapshot_registers;
+
+			kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3,
+				snapshot, func, &info);
+		}
+	}
+}
+
+static size_t gen8_snapshot_cx_misc_registers(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	const u32 *ptr = (u32 *)priv;
+	u32 *src, *data = (unsigned int *)buf;
+	size_t size = adreno_snapshot_regs_count(ptr) * sizeof(u32);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "CX_MISC REGISTERS");
+		return 0;
+	}
+
+	src = gen8_crashdump_registers->hostptr;
+
+	for (; ptr[0] != UINT_MAX; ptr += 2) {
+		u32 cnt = REG_COUNT(ptr);
+
+		if (cnt == 1)
+			*data++ = BIT(31) | ptr[0];
+		else {
+			*data++ = ptr[0];
+			*data++ = cnt;
+		}
+		memcpy(data, src, cnt << 2);
+		data += cnt;
+		src += cnt;
+	}
+
+	/* Return the size of the section */
+	return size;
+}
+
+static void gen8_cx_misc_regs_snapshot(struct kgsl_device *device,
+					struct kgsl_snapshot *snapshot)
+{
+	u64 *ptr, offset = 0;
+	const u32 *regs_ptr = (const u32 *)gen8_snapshot_block_list->cx_misc_regs;
+
+	if (CD_SCRIPT_CHECK(device) || !gen8_gmu_rpmh_pwr_state_is_active(device)
+		|| !gen8_gmu_gx_is_on(ADRENO_DEVICE(device)))
+		goto legacy_snapshot;
+
+	/* Build the crash script */
+	ptr = (u64 *)gen8_capturescript->hostptr;
+
+	for (; regs_ptr[0] != UINT_MAX; regs_ptr += 2) {
+		u32 r = REG_COUNT(regs_ptr);
+
+		ptr += CD_READ(ptr, regs_ptr[0], r,
+			(gen8_crashdump_registers->gpuaddr + offset));
+		offset += r * sizeof(u32);
+	}
+
+	/* Marker for end of script */
+	CD_FINISH(ptr, offset);
+
+	/* Try to run the crash dumper */
+	if (_gen8_do_crashdump(device)) {
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2,
+			snapshot, gen8_snapshot_cx_misc_registers,
+			(void *)gen8_snapshot_block_list->cx_misc_regs);
+		return;
+	}
+
+legacy_snapshot:
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2,
+		snapshot, adreno_snapshot_cx_misc_registers,
+		(void *)gen8_snapshot_block_list->cx_misc_regs);
+}
+
+void gen8_snapshot_external_core_regs(struct kgsl_device *device,
+			struct kgsl_snapshot *snapshot)
+{
+	const u32 **external_core_regs;
+	u32 i, num_external_core_regs;
+	const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device));
+
+	gen8_snapshot_block_list = gpucore->gen8_snapshot_block_list;
+	external_core_regs = gen8_snapshot_block_list->external_core_regs;
+	num_external_core_regs = gen8_snapshot_block_list->num_external_core_regs;
+
+	for (i = 0; i < num_external_core_regs; i++)
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2,
+			snapshot, adreno_snapshot_registers_v2,
+			(void *) external_core_regs[i]);
+}
+
+/*
+ * gen8_snapshot() - GEN8 GPU snapshot function
+ * @adreno_dev: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the GEN8 specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void gen8_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb;
+	u32 i;
+	const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device));
+	int is_current_rt;
+
+	gen8_crashdump_timedout = false;
+	gen8_snapshot_block_list = gpucore->gen8_snapshot_block_list;
+
+	/* External registers are dumped in the beginning of gmu snapshot */
+	if (!gmu_core_isenabled(device))
+		gen8_snapshot_external_core_regs(device, snapshot);
+
+	gen8_cx_misc_regs_snapshot(device, snapshot);
+
+	gen8_snapshot_cx_debugbus(adreno_dev, snapshot);
+
+	if (!gen8_gmu_rpmh_pwr_state_is_active(device) ||
+		!gen8_gmu_gx_is_on(adreno_dev))
+		return;
+
+	/* SQE Firmware */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, gen8_snapshot_sqe, NULL);
+
+	/* AQE Firmware */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, gen8_snapshot_aqe, NULL);
+
+	gen8_snapshot_trace_buffer(device, snapshot);
+
+	gen8_snapshot_debugbus(adreno_dev, snapshot);
+
+	is_current_rt = rt_task(current);
+
+	if (is_current_rt)
+		sched_set_normal(current, 0);
+
+	gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE,
+		GEN8_CP_IB1_BASE_HI_PIPE, &snapshot->ib1base, PIPE_BR, 0, 0);
+
+	gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE,
+		GEN8_CP_IB2_BASE_HI_PIPE, &snapshot->ib2base, PIPE_BR, 0, 0);
+
+	gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE,
+			&snapshot->ib1size, PIPE_BR, 0, 0);
+	gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE,
+			&snapshot->ib2size, PIPE_BR, 0, 0);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) {
+		gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE,
+			GEN8_CP_IB1_BASE_HI_PIPE, &snapshot->ib1base_lpac, PIPE_LPAC, 0, 0);
+
+		gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE,
+			GEN8_CP_IB2_BASE_HI_PIPE, &snapshot->ib2base_lpac, PIPE_LPAC, 0, 0);
+
+		gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE,
+			&snapshot->ib1size_lpac, PIPE_LPAC, 0, 0);
+		gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE,
+			&snapshot->ib2size_lpac, PIPE_LPAC, 0, 0);
+	}
+
+	/* Clear aperture register */
+	gen8_host_aperture_set(adreno_dev, 0, 0, 0);
+
+	/* Assert the isStatic bit before triggering snapshot */
+	kgsl_regwrite(device, GEN8_RBBM_SNAPSHOT_STATUS, 0x1);
+
+	/* Dump the registers which get affected by crash dumper trigger */
+	for (i = 0; i < gen8_snapshot_block_list->num_pre_crashdumper_regs; i++) {
+		struct gen8_reg_list *regs = &gen8_snapshot_block_list->pre_crashdumper_regs[i];
+		struct gen8_reg_list_info info = {0};
+		u32 j, slices;
+
+		slices = NUMBER_OF_SLICES(regs->slice_region);
+
+		for (j = 0; j < slices; j++) {
+			info.regs = regs;
+			info.slice_id = SLICE_ID(regs->slice_region, j);
+			kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3,
+				snapshot, gen8_legacy_snapshot_registers, &info);
+		}
+	}
+
+	gen8_reglist_snapshot(device, snapshot);
+
+	for (i = 0; i < gen8_snapshot_block_list->index_registers_len; i++) {
+		kgsl_regwrite(device, GEN8_CP_APERTURE_CNTL_HOST, GEN8_CP_APERTURE_REG_VAL
+				(0,  gen8_snapshot_block_list->index_registers[i].pipe_id, 0, 0));
+
+		kgsl_snapshot_indexed_registers_v2(device, snapshot,
+			gen8_snapshot_block_list->index_registers[i].addr,
+			gen8_snapshot_block_list->index_registers[i].data, 0,
+			gen8_snapshot_block_list->index_registers[i].size,
+			gen8_snapshot_block_list->index_registers[i].pipe_id, UINT_MAX);
+	}
+
+	/* Mempool debug data */
+	gen8_snapshot_mempool(device, snapshot);
+
+	/* CP MVC register section */
+	gen8_snapshot_mvc_regs(device, snapshot,
+		gen8_snapshot_block_list->cp_clusters, gen8_snapshot_block_list->num_cp_clusters);
+
+	/* MVC register section */
+	gen8_snapshot_mvc_regs(device, snapshot,
+		gen8_snapshot_block_list->clusters, gen8_snapshot_block_list->num_clusters);
+
+	/* registers dumped through DBG AHB */
+	gen8_snapshot_dbgahb_regs(device, snapshot);
+
+	/* Shader memory */
+	gen8_snapshot_shader(device, snapshot);
+
+	kgsl_regwrite(device, GEN8_RBBM_SNAPSHOT_STATUS, 0x0);
+
+	/* Preemption record */
+	if (adreno_is_preemption_enabled(adreno_dev)) {
+		FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, snapshot_preemption_record,
+				rb->preemption_desc);
+		}
+	}
+	if (is_current_rt)
+		sched_set_fifo(current);
+}
+
+void gen8_crashdump_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	ret = adreno_allocate_global(device, &gen8_capturescript,
+		50 * PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY,
+		KGSL_MEMDESC_PRIVILEGED, "capturescript");
+
+	if (!ret)
+		ret = adreno_allocate_global(device, &gen8_crashdump_registers,
+			200 * PAGE_SIZE, 0, 0,
+			KGSL_MEMDESC_PRIVILEGED, "capturescript_regs");
+
+	if (ret)
+		dev_err(device->dev, "Failed to init crashdumper err = %d\n", ret);
+}

+ 656 - 0
qcom/opensource/graphics-kernel/adreno_gen8_snapshot.h

@@ -0,0 +1,656 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN8_SNAPSHOT_H
+#define __ADRENO_GEN8_SNAPSHOT_H
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "kgsl_regmap.h"
+#include "kgsl_snapshot.h"
+
+enum cluster_id {
+	CLUSTER_NONE   = 0,
+	CLUSTER_FE_US  = 1,
+	CLUSTER_FE_S   = 2,
+	CLUSTER_SP_VS  = 3,
+	CLUSTER_VPC_VS = 4,
+	CLUSTER_VPC_US = 5,
+	CLUSTER_GRAS   = 6,
+	CLUSTER_SP_PS  = 7,
+	CLUSTER_VPC_PS = 8,
+	CLUSTER_PS     = 9,
+};
+
+enum location_id {
+	HLSQ_STATE  = 0,
+	HLSQ_DP     = 1,
+	SP_TOP      = 2,
+	USPTP       = 3,
+	HLSQ_DP_STR = 4,
+};
+
+#define STATE_NON_CONTEXT     0
+#define STATE_TOGGLE_CTXT     1
+#define STATE_FORCE_CTXT_0    2
+#define STATE_FORCE_CTXT_1    3
+
+#define UNSLICE                 0
+#define SLICE                   1
+
+#define MAX_PHYSICAL_SLICES     1
+
+#define NUMBER_OF_SLICES(region) ((region == SLICE) ? MAX_PHYSICAL_SLICES : 1)
+#define SLICE_ID(region, j) ((region == SLICE) ? j : UINT_MAX)
+
+#define GEN8_DEBUGBUS_BLOCK_SIZE 0x100
+
+/* Number of dword to dump in snapshot for CP SQE */
+#define GEN8_SQE_FW_SNAPSHOT_DWORDS 5
+
+struct sel_reg {
+	u32 host_reg;
+	u32 cd_reg;
+	u32 val;
+};
+
+struct gen8_shader_block_info {
+	struct gen8_shader_block *block;
+	u32 sp_id;
+	u32 usptp;
+	u32 slice_id;
+	u32 location_id;
+	u32 context_id;
+	u32 bank;
+	u64 offset;
+};
+
+struct gen8_shader_block {
+	/* statetype: Type identifier for the block */
+	u32 statetype;
+	/* size: Size of the block (in dwords) */
+	u32 size;
+	/* num_sps: The number of SPs to dump */
+	u32 num_sps;
+	/* num_usptps: The number of USPTPs to dump */
+	u32 num_usptps;
+	/* pipeid: Pipe identifier for the block data  */
+	u32 pipeid;
+	/* location: Location identifier for the block data */
+	u32 location;
+	/* num_slices: the number of slices to dump */
+	u32 num_slices;
+	/* num_ctx: repeat id to loop */
+	u32 num_ctx;
+	/* offset: The offset in the snasphot dump */
+	u64 offset;
+};
+
+struct gen8_cluster_registers_info {
+	struct gen8_cluster_registers *cluster;
+	u32 cluster_id;
+	u32 slice_id;
+	u32 pipe_id;
+	u32 context_id;
+	u64 offset;
+};
+
+struct gen8_cluster_registers {
+	/* cluster_id: Cluster identifier */
+	u32 cluster_id;
+	/* slice_region: is it slice or unslice */
+	u32 slice_region;
+	/* pipe_id: Pipe Identifier */
+	u32 pipe_id;
+	/* context_id: one of STATE_ that identifies the context to dump */
+	u32 context_id;
+	/* regs: Pointer to an array of register pairs */
+	const u32 *regs;
+	/* sel: Pointer to a selector register to write before reading */
+	const struct sel_reg *sel;
+	/* offset: Internal variable to track the state of the crashdump */
+	u32 offset;
+};
+
+struct gen8_reg_list_info {
+	struct gen8_reg_list *regs;
+	u32 cluster_id;
+	u32 slice_id;
+	u32 pipe_id;
+	u32 sp_id;
+	u32 usptp_id;
+	u32 context_id;
+	u64 offset;
+};
+
+struct gen8_sptp_cluster_registers_info {
+	struct gen8_sptp_cluster_registers *cluster;
+	u32 cluster_id;
+	u32 slice_id;
+	u32 pipe_id;
+	u32 sp_id;
+	u32 usptp_id;
+	u32 location_id;
+	u32 context_id;
+	u32 statetype_id;
+	u64 offset;
+};
+
+struct gen8_sptp_cluster_registers {
+	/* cluster_id: Cluster identifier */
+	u32 cluster_id;
+	/* slice_region: is it slice or unslice */
+	u32 slice_region;
+	/* num_sps: The number of SPs to dump */
+	u32 num_sps;
+	/* num_usptps: The number of USPs to dump */
+	u32 num_usptps;
+	/* statetype: SP block state type for the cluster */
+	u32 statetype;
+	/* pipe_id: Pipe identifier */
+	u32 pipe_id;
+	/* context_id: Context identifier */
+	u32 context_id;
+	/* location_id: Location identifier */
+	u32 location_id;
+	/* regs: Pointer to the list of register pairs to read */
+	const u32 *regs;
+	/* regbase: Dword offset of the register block in the GPu register space */
+	u32 regbase;
+	/* offset: Internal variable used to track the crashdump state */
+	u32 offset;
+};
+
+struct gen8_cp_indexed_reg {
+	u32 addr;
+	u32 data;
+	u32 slice_region;
+	u32 pipe_id;
+	u32 size;
+};
+
+struct gen8_reg_list {
+	u32 slice_region;
+	const u32 *regs;
+	const struct sel_reg *sel;
+	u64 offset;
+};
+
+struct gen8_trace_buffer_info {
+	u16 dbgc_ctrl;
+	u16 segment;
+	u16 granularity;
+	u16 ping_blk[TRACE_BUF_NUM_SIG];
+	u16 ping_idx[TRACE_BUF_NUM_SIG];
+};
+
+enum gen8_debugbus_ids {
+	DEBUGBUS_GBIF_CX_GC_US_I_0          = 1,
+	DEBUGBUS_GMU_CX_GC_US_I_0           = 2,
+	DEBUGBUS_CX_GC_US_I_0               = 3,
+	DEBUGBUS_GBIF_GX_GC_US_I_0          = 8,
+	DEBUGBUS_GMU_GX_GC_US_I_0           = 9,
+	DEBUGBUS_DBGC_GC_US_I_0             = 10,
+	DEBUGBUS_RBBM_GC_US_I_0             = 11,
+	DEBUGBUS_LARC_GC_US_I_0             = 12,
+	DEBUGBUS_COM_GC_US_I_0              = 13,
+	DEBUGBUS_HLSQ_GC_US_I_0             = 14,
+	DEBUGBUS_CGC_GC_US_I_0              = 15,
+	DEBUGBUS_VSC_GC_US_I_0_0            = 20,
+	DEBUGBUS_VSC_GC_US_I_0_1            = 21,
+	DEBUGBUS_UFC_GC_US_I_0              = 24,
+	DEBUGBUS_UFC_GC_US_I_1              = 25,
+	DEBUGBUS_CP_GC_US_I_0_0             = 40,
+	DEBUGBUS_CP_GC_US_I_0_1             = 41,
+	DEBUGBUS_CP_GC_US_I_0_2             = 42,
+	DEBUGBUS_PC_BR_US_I_0               = 56,
+	DEBUGBUS_PC_BV_US_I_0               = 57,
+	DEBUGBUS_GPC_BR_US_I_0              = 58,
+	DEBUGBUS_GPC_BV_US_I_0              = 59,
+	DEBUGBUS_VPC_BR_US_I_0              = 60,
+	DEBUGBUS_VPC_BV_US_I_0              = 61,
+	DEBUGBUS_UCHE_WRAPPER_GC_US_I_0     = 80,
+	DEBUGBUS_UCHE_GC_US_I_0             = 81,
+	DEBUGBUS_UCHE_GC_US_I_1             = 82,
+	DEBUGBUS_CP_GC_S_0_I_0              = 128,
+	DEBUGBUS_PC_BR_S_0_I_0              = 129,
+	DEBUGBUS_PC_BV_S_0_I_0              = 130,
+	DEBUGBUS_TESS_GC_S_0_I_0            = 131,
+	DEBUGBUS_TSEFE_GC_S_0_I_0           = 132,
+	DEBUGBUS_TSEBE_GC_S_0_I_0           = 133,
+	DEBUGBUS_RAS_GC_S_0_I_0             = 134,
+	DEBUGBUS_LRZ_BR_S_0_I_0             = 135,
+	DEBUGBUS_LRZ_BV_S_0_I_0             = 136,
+	DEBUGBUS_VFDP_GC_S_0_I_0            = 137,
+	DEBUGBUS_GPC_BR_S_0_I_0             = 138,
+	DEBUGBUS_GPC_BV_S_0_I_0             = 139,
+	DEBUGBUS_VPCFE_BR_S_0_I_0           = 140,
+	DEBUGBUS_VPCFE_BV_S_0_I_0           = 141,
+	DEBUGBUS_VPCBE_BR_S_0_I_0           = 142,
+	DEBUGBUS_VPCBE_BV_S_0_I_0           = 143,
+	DEBUGBUS_CCHE_GC_S_0_I_0            = 144,
+	DEBUGBUS_DBGC_GC_S_0_I_0            = 145,
+	DEBUGBUS_LARC_GC_S_0_I_0            = 146,
+	DEBUGBUS_RBBM_GC_S_0_I_0            = 147,
+	DEBUGBUS_CCRE_GC_S_0_I_0            = 148,
+	DEBUGBUS_CGC_GC_S_0_I_0             = 149,
+	DEBUGBUS_GMU_GC_S_0_I_0             = 150,
+	DEBUGBUS_SLICE_GC_S_0_I_0           = 151,
+	DEBUGBUS_HLSQ_SPTP_STAR_GC_S_0_I_0  = 152,
+	DEBUGBUS_USP_GC_S_0_I_0             = 160,
+	DEBUGBUS_USP_GC_S_0_I_1             = 161,
+	DEBUGBUS_USPTP_GC_S_0_I_0           = 166,
+	DEBUGBUS_USPTP_GC_S_0_I_1           = 167,
+	DEBUGBUS_USPTP_GC_S_0_I_2           = 168,
+	DEBUGBUS_USPTP_GC_S_0_I_3           = 169,
+	DEBUGBUS_TP_GC_S_0_I_0              = 178,
+	DEBUGBUS_TP_GC_S_0_I_1              = 179,
+	DEBUGBUS_TP_GC_S_0_I_2              = 180,
+	DEBUGBUS_TP_GC_S_0_I_3              = 181,
+	DEBUGBUS_RB_GC_S_0_I_0              = 190,
+	DEBUGBUS_RB_GC_S_0_I_1              = 191,
+	DEBUGBUS_CCU_GC_S_0_I_0             = 196,
+	DEBUGBUS_CCU_GC_S_0_I_1             = 197,
+	DEBUGBUS_HLSQ_GC_S_0_I_0            = 202,
+	DEBUGBUS_HLSQ_GC_S_0_I_1            = 203,
+	DEBUGBUS_VFD_GC_S_0_I_0             = 208,
+	DEBUGBUS_VFD_GC_S_0_I_1             = 209,
+	DEBUGBUS_CP_GC_S_1_I_0              = 256,
+	DEBUGBUS_PC_BR_S_1_I_0              = 257,
+	DEBUGBUS_PC_BV_S_1_I_0              = 258,
+	DEBUGBUS_TESS_GC_S_1_I_0            = 259,
+	DEBUGBUS_TSEFE_GC_S_1_I_0           = 260,
+	DEBUGBUS_TSEBE_GC_S_1_I_0           = 261,
+	DEBUGBUS_RAS_GC_S_1_I_0             = 262,
+	DEBUGBUS_LRZ_BR_S_1_I_0             = 263,
+	DEBUGBUS_LRZ_BV_S_1_I_0             = 264,
+	DEBUGBUS_VFDP_GC_S_1_I_0            = 265,
+	DEBUGBUS_GPC_BR_S_1_I_0             = 266,
+	DEBUGBUS_GPC_BV_S_1_I_0             = 267,
+	DEBUGBUS_VPCFE_BR_S_1_I_0           = 268,
+	DEBUGBUS_VPCFE_BV_S_1_I_0           = 269,
+	DEBUGBUS_VPCBE_BR_S_1_I_0           = 270,
+	DEBUGBUS_VPCBE_BV_S_1_I_0           = 271,
+	DEBUGBUS_CCHE_GC_S_1_I_0            = 272,
+	DEBUGBUS_DBGC_GC_S_1_I_0            = 273,
+	DEBUGBUS_LARC_GC_S_1_I_0            = 274,
+	DEBUGBUS_RBBM_GC_S_1_I_0            = 275,
+	DEBUGBUS_CCRE_GC_S_1_I_0            = 276,
+	DEBUGBUS_CGC_GC_S_1_I_0             = 277,
+	DEBUGBUS_GMU_GC_S_1_I_0             = 278,
+	DEBUGBUS_SLICE_GC_S_1_I_0           = 279,
+	DEBUGBUS_HLSQ_SPTP_STAR_GC_S_1_I_0  = 280,
+	DEBUGBUS_USP_GC_S_1_I_0             = 288,
+	DEBUGBUS_USP_GC_S_1_I_1             = 289,
+	DEBUGBUS_USPTP_GC_S_1_I_0           = 294,
+	DEBUGBUS_USPTP_GC_S_1_I_1           = 295,
+	DEBUGBUS_USPTP_GC_S_1_I_2           = 296,
+	DEBUGBUS_USPTP_GC_S_1_I_3           = 297,
+	DEBUGBUS_TP_GC_S_1_I_0              = 306,
+	DEBUGBUS_TP_GC_S_1_I_1              = 307,
+	DEBUGBUS_TP_GC_S_1_I_2              = 308,
+	DEBUGBUS_TP_GC_S_1_I_3              = 309,
+	DEBUGBUS_RB_GC_S_1_I_0              = 318,
+	DEBUGBUS_RB_GC_S_1_I_1              = 319,
+	DEBUGBUS_CCU_GC_S_1_I_0             = 324,
+	DEBUGBUS_CCU_GC_S_1_I_1             = 325,
+	DEBUGBUS_HLSQ_GC_S_1_I_0            = 330,
+	DEBUGBUS_HLSQ_GC_S_1_I_1            = 331,
+	DEBUGBUS_VFD_GC_S_1_I_0             = 336,
+	DEBUGBUS_VFD_GC_S_1_I_1             = 337,
+	DEBUGBUS_CP_GC_S_2_I_0              = 384,
+	DEBUGBUS_PC_BR_S_2_I_0              = 385,
+	DEBUGBUS_PC_BV_S_2_I_0              = 386,
+	DEBUGBUS_TESS_GC_S_2_I_0            = 387,
+	DEBUGBUS_TSEFE_GC_S_2_I_0           = 388,
+	DEBUGBUS_TSEBE_GC_S_2_I_0           = 389,
+	DEBUGBUS_RAS_GC_S_2_I_0             = 390,
+	DEBUGBUS_LRZ_BR_S_2_I_0             = 391,
+	DEBUGBUS_LRZ_BV_S_2_I_0             = 392,
+	DEBUGBUS_VFDP_GC_S_2_I_0            = 393,
+	DEBUGBUS_GPC_BR_S_2_I_0             = 394,
+	DEBUGBUS_GPC_BV_S_2_I_0             = 395,
+	DEBUGBUS_VPCFE_BR_S_2_I_0           = 396,
+	DEBUGBUS_VPCFE_BV_S_2_I_0           = 397,
+	DEBUGBUS_VPCBE_BR_S_2_I_0           = 398,
+	DEBUGBUS_VPCBE_BV_S_2_I_0           = 399,
+	DEBUGBUS_CCHE_GC_S_2_I_0            = 400,
+	DEBUGBUS_DBGC_GC_S_2_I_0            = 401,
+	DEBUGBUS_LARC_GC_S_2_I_0            = 402,
+	DEBUGBUS_RBBM_GC_S_2_I_0            = 403,
+	DEBUGBUS_CCRE_GC_S_2_I_0            = 404,
+	DEBUGBUS_CGC_GC_S_2_I_0             = 405,
+	DEBUGBUS_GMU_GC_S_2_I_0             = 406,
+	DEBUGBUS_SLICE_GC_S_2_I_0           = 407,
+	DEBUGBUS_HLSQ_SPTP_STAR_GC_S_2_I_0  = 408,
+	DEBUGBUS_USP_GC_S_2_I_0             = 416,
+	DEBUGBUS_USP_GC_S_2_I_1             = 417,
+	DEBUGBUS_USPTP_GC_S_2_I_0           = 422,
+	DEBUGBUS_USPTP_GC_S_2_I_1           = 423,
+	DEBUGBUS_USPTP_GC_S_2_I_2           = 424,
+	DEBUGBUS_USPTP_GC_S_2_I_3           = 425,
+	DEBUGBUS_TP_GC_S_2_I_0              = 434,
+	DEBUGBUS_TP_GC_S_2_I_1              = 435,
+	DEBUGBUS_TP_GC_S_2_I_2              = 436,
+	DEBUGBUS_TP_GC_S_2_I_3              = 437,
+	DEBUGBUS_RB_GC_S_2_I_0              = 446,
+	DEBUGBUS_RB_GC_S_2_I_1              = 447,
+	DEBUGBUS_CCU_GC_S_2_I_0             = 452,
+	DEBUGBUS_CCU_GC_S_2_I_1             = 453,
+	DEBUGBUS_HLSQ_GC_S_2_I_0            = 458,
+	DEBUGBUS_HLSQ_GC_S_2_I_1            = 459,
+	DEBUGBUS_VFD_GC_S_2_I_0             = 464,
+	DEBUGBUS_VFD_GC_S_2_I_1             = 465,
+};
+
+static const u32 gen8_debugbus_blocks[] = {
+	DEBUGBUS_GMU_GX_GC_US_I_0,
+	DEBUGBUS_DBGC_GC_US_I_0,
+	DEBUGBUS_RBBM_GC_US_I_0,
+	DEBUGBUS_LARC_GC_US_I_0,
+	DEBUGBUS_COM_GC_US_I_0,
+	DEBUGBUS_HLSQ_GC_US_I_0,
+	DEBUGBUS_CGC_GC_US_I_0,
+	DEBUGBUS_VSC_GC_US_I_0_0,
+	DEBUGBUS_VSC_GC_US_I_0_1,
+	DEBUGBUS_UFC_GC_US_I_0,
+	DEBUGBUS_UFC_GC_US_I_1,
+	DEBUGBUS_CP_GC_US_I_0_0,
+	DEBUGBUS_CP_GC_US_I_0_1,
+	DEBUGBUS_CP_GC_US_I_0_2,
+	DEBUGBUS_PC_BR_US_I_0,
+	DEBUGBUS_PC_BV_US_I_0,
+	DEBUGBUS_GPC_BR_US_I_0,
+	DEBUGBUS_GPC_BV_US_I_0,
+	DEBUGBUS_VPC_BR_US_I_0,
+	DEBUGBUS_VPC_BV_US_I_0,
+	DEBUGBUS_UCHE_WRAPPER_GC_US_I_0,
+	DEBUGBUS_UCHE_GC_US_I_0,
+	DEBUGBUS_UCHE_GC_US_I_1,
+	DEBUGBUS_CP_GC_S_0_I_0,
+	DEBUGBUS_PC_BR_S_0_I_0,
+	DEBUGBUS_PC_BV_S_0_I_0,
+	DEBUGBUS_TESS_GC_S_0_I_0,
+	DEBUGBUS_TSEFE_GC_S_0_I_0,
+	DEBUGBUS_TSEBE_GC_S_0_I_0,
+	DEBUGBUS_RAS_GC_S_0_I_0,
+	DEBUGBUS_LRZ_BR_S_0_I_0,
+	DEBUGBUS_LRZ_BV_S_0_I_0,
+	DEBUGBUS_VFDP_GC_S_0_I_0,
+	DEBUGBUS_GPC_BR_S_0_I_0,
+	DEBUGBUS_GPC_BV_S_0_I_0,
+	DEBUGBUS_VPCFE_BR_S_0_I_0,
+	DEBUGBUS_VPCFE_BV_S_0_I_0,
+	DEBUGBUS_VPCBE_BR_S_0_I_0,
+	DEBUGBUS_VPCBE_BV_S_0_I_0,
+	DEBUGBUS_CCHE_GC_S_0_I_0,
+	DEBUGBUS_DBGC_GC_S_0_I_0,
+	DEBUGBUS_LARC_GC_S_0_I_0,
+	DEBUGBUS_RBBM_GC_S_0_I_0,
+	DEBUGBUS_CCRE_GC_S_0_I_0,
+	DEBUGBUS_CGC_GC_S_0_I_0,
+	DEBUGBUS_GMU_GC_S_0_I_0,
+	DEBUGBUS_SLICE_GC_S_0_I_0,
+	DEBUGBUS_HLSQ_SPTP_STAR_GC_S_0_I_0,
+	DEBUGBUS_USP_GC_S_0_I_0,
+	DEBUGBUS_USP_GC_S_0_I_1,
+	DEBUGBUS_USPTP_GC_S_0_I_0,
+	DEBUGBUS_USPTP_GC_S_0_I_1,
+	DEBUGBUS_USPTP_GC_S_0_I_2,
+	DEBUGBUS_USPTP_GC_S_0_I_3,
+	DEBUGBUS_TP_GC_S_0_I_0,
+	DEBUGBUS_TP_GC_S_0_I_1,
+	DEBUGBUS_TP_GC_S_0_I_2,
+	DEBUGBUS_TP_GC_S_0_I_3,
+	DEBUGBUS_RB_GC_S_0_I_0,
+	DEBUGBUS_RB_GC_S_0_I_1,
+	DEBUGBUS_CCU_GC_S_0_I_0,
+	DEBUGBUS_CCU_GC_S_0_I_1,
+	DEBUGBUS_HLSQ_GC_S_0_I_0,
+	DEBUGBUS_HLSQ_GC_S_0_I_1,
+	DEBUGBUS_VFD_GC_S_0_I_0,
+	DEBUGBUS_VFD_GC_S_0_I_1,
+	DEBUGBUS_CP_GC_S_1_I_0,
+	DEBUGBUS_PC_BR_S_1_I_0,
+	DEBUGBUS_PC_BV_S_1_I_0,
+	DEBUGBUS_TESS_GC_S_1_I_0,
+	DEBUGBUS_TSEFE_GC_S_1_I_0,
+	DEBUGBUS_TSEBE_GC_S_1_I_0,
+	DEBUGBUS_RAS_GC_S_1_I_0,
+	DEBUGBUS_LRZ_BR_S_1_I_0,
+	DEBUGBUS_LRZ_BV_S_1_I_0,
+	DEBUGBUS_VFDP_GC_S_1_I_0,
+	DEBUGBUS_GPC_BR_S_1_I_0,
+	DEBUGBUS_GPC_BV_S_1_I_0,
+	DEBUGBUS_VPCFE_BR_S_1_I_0,
+	DEBUGBUS_VPCFE_BV_S_1_I_0,
+	DEBUGBUS_VPCBE_BR_S_1_I_0,
+	DEBUGBUS_VPCBE_BV_S_1_I_0,
+	DEBUGBUS_CCHE_GC_S_1_I_0,
+	DEBUGBUS_DBGC_GC_S_1_I_0,
+	DEBUGBUS_LARC_GC_S_1_I_0,
+	DEBUGBUS_RBBM_GC_S_1_I_0,
+	DEBUGBUS_CCRE_GC_S_1_I_0,
+	DEBUGBUS_CGC_GC_S_1_I_0,
+	DEBUGBUS_GMU_GC_S_1_I_0,
+	DEBUGBUS_SLICE_GC_S_1_I_0,
+	DEBUGBUS_HLSQ_SPTP_STAR_GC_S_1_I_0,
+	DEBUGBUS_USP_GC_S_1_I_0,
+	DEBUGBUS_USP_GC_S_1_I_1,
+	DEBUGBUS_USPTP_GC_S_1_I_0,
+	DEBUGBUS_USPTP_GC_S_1_I_1,
+	DEBUGBUS_USPTP_GC_S_1_I_2,
+	DEBUGBUS_USPTP_GC_S_1_I_3,
+	DEBUGBUS_TP_GC_S_1_I_0,
+	DEBUGBUS_TP_GC_S_1_I_1,
+	DEBUGBUS_TP_GC_S_1_I_2,
+	DEBUGBUS_TP_GC_S_1_I_3,
+	DEBUGBUS_RB_GC_S_1_I_0,
+	DEBUGBUS_RB_GC_S_1_I_1,
+	DEBUGBUS_CCU_GC_S_1_I_0,
+	DEBUGBUS_CCU_GC_S_1_I_1,
+	DEBUGBUS_HLSQ_GC_S_1_I_0,
+	DEBUGBUS_HLSQ_GC_S_1_I_1,
+	DEBUGBUS_VFD_GC_S_1_I_0,
+	DEBUGBUS_VFD_GC_S_1_I_1,
+	DEBUGBUS_CP_GC_S_2_I_0,
+	DEBUGBUS_PC_BR_S_2_I_0,
+	DEBUGBUS_PC_BV_S_2_I_0,
+	DEBUGBUS_TESS_GC_S_2_I_0,
+	DEBUGBUS_TSEFE_GC_S_2_I_0,
+	DEBUGBUS_TSEBE_GC_S_2_I_0,
+	DEBUGBUS_RAS_GC_S_2_I_0,
+	DEBUGBUS_LRZ_BR_S_2_I_0,
+	DEBUGBUS_LRZ_BV_S_2_I_0,
+	DEBUGBUS_VFDP_GC_S_2_I_0,
+	DEBUGBUS_GPC_BR_S_2_I_0,
+	DEBUGBUS_GPC_BV_S_2_I_0,
+	DEBUGBUS_VPCFE_BR_S_2_I_0,
+	DEBUGBUS_VPCFE_BV_S_2_I_0,
+	DEBUGBUS_VPCBE_BR_S_2_I_0,
+	DEBUGBUS_VPCBE_BV_S_2_I_0,
+	DEBUGBUS_CCHE_GC_S_2_I_0,
+	DEBUGBUS_DBGC_GC_S_2_I_0,
+	DEBUGBUS_LARC_GC_S_2_I_0,
+	DEBUGBUS_RBBM_GC_S_2_I_0,
+	DEBUGBUS_CCRE_GC_S_2_I_0,
+	DEBUGBUS_CGC_GC_S_2_I_0,
+	DEBUGBUS_GMU_GC_S_2_I_0,
+	DEBUGBUS_SLICE_GC_S_2_I_0,
+	DEBUGBUS_HLSQ_SPTP_STAR_GC_S_2_I_0,
+	DEBUGBUS_USP_GC_S_2_I_0,
+	DEBUGBUS_USP_GC_S_2_I_1,
+	DEBUGBUS_USPTP_GC_S_2_I_0,
+	DEBUGBUS_USPTP_GC_S_2_I_1,
+	DEBUGBUS_USPTP_GC_S_2_I_2,
+	DEBUGBUS_USPTP_GC_S_2_I_3,
+	DEBUGBUS_TP_GC_S_2_I_0,
+	DEBUGBUS_TP_GC_S_2_I_1,
+	DEBUGBUS_TP_GC_S_2_I_2,
+	DEBUGBUS_TP_GC_S_2_I_3,
+	DEBUGBUS_RB_GC_S_2_I_0,
+	DEBUGBUS_RB_GC_S_2_I_1,
+	DEBUGBUS_CCU_GC_S_2_I_0,
+	DEBUGBUS_CCU_GC_S_2_I_1,
+	DEBUGBUS_HLSQ_GC_S_2_I_0,
+	DEBUGBUS_HLSQ_GC_S_2_I_1,
+	DEBUGBUS_VFD_GC_S_2_I_0,
+	DEBUGBUS_VFD_GC_S_2_I_1,
+};
+
+static const u32 gen8_gbif_debugbus_blocks[] = {
+	DEBUGBUS_GBIF_GX_GC_US_I_0,
+};
+
+static const u32 gen8_cx_debugbus_blocks[] = {
+	DEBUGBUS_GBIF_CX_GC_US_I_0,
+	DEBUGBUS_GMU_CX_GC_US_I_0,
+	DEBUGBUS_CX_GC_US_I_0,
+};
+
+enum gen8_statetype_ids {
+	TP0_NCTX_REG                   = 0,
+	TP0_CTX0_3D_CVS_REG            = 1,
+	TP0_CTX0_3D_CPS_REG            = 2,
+	TP0_CTX1_3D_CVS_REG            = 3,
+	TP0_CTX1_3D_CPS_REG            = 4,
+	TP0_CTX2_3D_CPS_REG            = 5,
+	TP0_CTX3_3D_CPS_REG            = 6,
+	TP0_TMO_DATA                   = 9,
+	TP0_SMO_DATA                   = 10,
+	TP0_MIPMAP_BASE_DATA           = 11,
+	SP_INST_DATA_3                 = 31,
+	SP_NCTX_REG                    = 32,
+	SP_CTX0_3D_CVS_REG             = 33,
+	SP_CTX0_3D_CPS_REG             = 34,
+	SP_CTX1_3D_CVS_REG             = 35,
+	SP_CTX1_3D_CPS_REG             = 36,
+	SP_CTX2_3D_CPS_REG             = 37,
+	SP_CTX3_3D_CPS_REG             = 38,
+	SP_INST_DATA                   = 39,
+	SP_INST_DATA_1                 = 40,
+	SP_LB_0_DATA                   = 41,
+	SP_LB_1_DATA                   = 42,
+	SP_LB_2_DATA                   = 43,
+	SP_LB_3_DATA                   = 44,
+	SP_LB_4_DATA                   = 45,
+	SP_LB_5_DATA                   = 46,
+	SP_LB_6_DATA                   = 47,
+	SP_LB_7_DATA                   = 48,
+	SP_CB_RAM                      = 49,
+	SP_LB_13_DATA                  = 50,
+	SP_LB_14_DATA                  = 51,
+	SP_INST_TAG                    = 52,
+	SP_INST_DATA_2                 = 53,
+	SP_TMO_TAG                     = 54,
+	SP_SMO_TAG                     = 55,
+	SP_STATE_DATA                  = 56,
+	SP_HWAVE_RAM                   = 57,
+	SP_L0_INST_BUF                 = 58,
+	SP_LB_8_DATA                   = 59,
+	SP_LB_9_DATA                   = 60,
+	SP_LB_10_DATA                  = 61,
+	SP_LB_11_DATA                  = 62,
+	SP_LB_12_DATA                  = 63,
+	HLSQ_DATAPATH_DSTR_META        = 64,
+	HLSQ_DESC_REMAP_META           = 65,
+	HLSQ_SLICE_TOP_META            = 66,
+	HLSQ_L2STC_TAG_RAM             = 67,
+	HLSQ_L2STC_INFO_CMD            = 68,
+	HLSQ_CVS_BE_CTXT_BUF_RAM_TAG   = 69,
+	HLSQ_CPS_BE_CTXT_BUF_RAM_TAG   = 70,
+	HLSQ_GFX_CVS_BE_CTXT_BUF_RAM   = 71,
+	HLSQ_GFX_CPS_BE_CTXT_BUF_RAM   = 72,
+	HLSQ_CHUNK_CVS_RAM             = 73,
+	HLSQ_CHUNK_CPS_RAM             = 74,
+	HLSQ_CHUNK_CVS_RAM_TAG         = 75,
+	HLSQ_CHUNK_CPS_RAM_TAG         = 76,
+	HLSQ_ICB_CVS_CB_BASE_TAG       = 77,
+	HLSQ_ICB_CPS_CB_BASE_TAG       = 78,
+	HLSQ_CVS_MISC_RAM              = 79,
+	HLSQ_CPS_MISC_RAM              = 80,
+	HLSQ_CPS_MISC_RAM_1            = 81,
+	HLSQ_INST_RAM                  = 82,
+	HLSQ_GFX_CVS_CONST_RAM         = 83,
+	HLSQ_GFX_CPS_CONST_RAM         = 84,
+	HLSQ_CVS_MISC_RAM_TAG          = 85,
+	HLSQ_CPS_MISC_RAM_TAG          = 86,
+	HLSQ_INST_RAM_TAG              = 87,
+	HLSQ_GFX_CVS_CONST_RAM_TAG     = 88,
+	HLSQ_GFX_CPS_CONST_RAM_TAG     = 89,
+	HLSQ_GFX_LOCAL_MISC_RAM        = 90,
+	HLSQ_GFX_LOCAL_MISC_RAM_TAG    = 91,
+	HLSQ_INST_RAM_1                = 92,
+	HLSQ_STPROC_META               = 93,
+	HLSQ_SLICE_BACKEND_META        = 94,
+	HLSQ_INST_RAM_2                = 95,
+	HLSQ_DATAPATH_META             = 96,
+	HLSQ_FRONTEND_META             = 97,
+	HLSQ_INDIRECT_META             = 98,
+	HLSQ_BACKEND_META              = 99,
+};
+
+struct gen8_snapshot_block_list {
+	/* pre_crashdumper_regs : Registers which need to be dumped before CD runs */
+	struct gen8_reg_list *pre_crashdumper_regs;
+	/* pre_crashdumper_regs_size : Size of registers which need to be dumped before CD runs */
+	size_t num_pre_crashdumper_regs;
+	/* debugbus_blocks : List of debugbus blocks */
+	const u32 *debugbus_blocks;
+	/* debugbus_blocks_len : Length of the debugbus list */
+	size_t debugbus_blocks_len;
+	/* gbif_debugbus_blocks : List of GBIF debugbus blocks */
+	const u32 *gbif_debugbus_blocks;
+	/* gbif_debugbus_blocks_len : Length of GBIF debugbus list */
+	size_t gbif_debugbus_blocks_len;
+	/* cx_debugbus_blocks : List of CX debugbus blocks */
+	const u32 *cx_debugbus_blocks;
+	/* cx_debugbus_blocks_len : Length of the CX debugbus list */
+	size_t cx_debugbus_blocks_len;
+	/* external_core_regs : List of external core registers */
+	const u32 **external_core_regs;
+	/* num_external_core_regs : length of external core registers list */
+	size_t num_external_core_regs;
+	/* gmu_cx_unsliced_regs : List of GMU CX unsliced registers */
+	const u32 *gmu_cx_unsliced_regs;
+	/* gmu_gx_registers : List of GMU registers */
+	struct gen8_reg_list *gmu_gx_regs;
+	/* num_gmu_gx_regs : Length of GMU registers list */
+	size_t num_gmu_gx_regs;
+	/* rscc_regs : List of RSCC registers */
+	const u32 *rscc_regs;
+	/* reg_list : List of GPU internal registers */
+	struct gen8_reg_list *reg_list;
+	/* reg_list : List of cx_misc registers */
+	const u32 *cx_misc_regs;
+	/* shader_blocks : List of GPU shader memory */
+	struct gen8_shader_block *shader_blocks;
+	/* num_shader_blocks : Length of the shader memory list */
+	size_t num_shader_blocks;
+	/* cp_cluster_registers : List of GPU CP cluster registers */
+	struct gen8_cluster_registers *cp_clusters;
+	/* num_cp_clusters : Length of GPU CP cluster registers list */
+	size_t num_cp_clusters;
+	/* cluster_registers : List of GPU cluster registers */
+	struct gen8_cluster_registers *clusters;
+	/* num_clusters : Length of GPU cluster registers list */
+	size_t num_clusters;
+	/* spstp_cluster_registers : List of GPU SPTP cluster registers */
+	struct gen8_sptp_cluster_registers *sptp_clusters;
+	/* num_sptp_clusters : Length of GPU SPTP cluster registers list */
+	size_t num_sptp_clusters;
+	/* post_crashdumper_regs : Registers which need to be dumped after CD runs */
+	const u32 *post_crashdumper_regs;
+	/* index_registers : List of index_registers */
+	struct gen8_cp_indexed_reg *index_registers;
+	/* index_registers_len : Length of the index registers */
+	size_t index_registers_len;
+	/* mempool_index_registers : List of CP mempool_index_registers */
+	struct gen8_cp_indexed_reg *mempool_index_registers;
+	/* mempool_index_registers_len : Length of the mempool index registers */
+	size_t mempool_index_registers_len;
+};
+
+#endif /*__ADRENO_GEN8_SNAPSHOT_H */

+ 1414 - 0
qcom/opensource/graphics-kernel/adreno_hfi.h

@@ -0,0 +1,1414 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_HFI_H
+#define __ADRENO_HFI_H
+
+#include "kgsl_util.h"
+
+#define HW_FENCE_QUEUE_SIZE		SZ_4K
+#define HFI_QUEUE_SIZE			SZ_4K /* bytes, must be base 4dw */
+#define MAX_RCVD_PAYLOAD_SIZE		16 /* dwords */
+#define MAX_RCVD_SIZE			(MAX_RCVD_PAYLOAD_SIZE + 3) /* dwords */
+#define HFI_MAX_MSG_SIZE		(SZ_1K)
+
+#define HFI_CMD_ID 0
+#define HFI_MSG_ID 1
+#define HFI_DBG_ID 2
+#define HFI_DSP_ID_0 3
+
+#define HFI_CMD_IDX 0
+#define HFI_MSG_IDX 1
+#define HFI_DBG_IDX 2
+#define HFI_DSP_IDX_BASE 3
+#define HFI_DSP_IDX_0 3
+
+#define HFI_CMD_IDX_LEGACY 0
+#define HFI_DSP_IDX_0_LEGACY 1
+#define HFI_MSG_IDX_LEGACY 4
+#define HFI_DBG_IDX_LEGACY 5
+
+#define HFI_QUEUE_STATUS_DISABLED 0
+#define HFI_QUEUE_STATUS_ENABLED 1
+
+/* HTOF queue priority, 1 is highest priority */
+#define HFI_CMD_PRI 10
+#define HFI_MSG_PRI 10
+#define HFI_DBG_PRI 40
+#define HFI_DSP_PRI_0 20
+
+#define HFI_IRQ_SIDEMSGQ_MASK		BIT(1)
+#define HFI_IRQ_DBGQ_MASK		BIT(2)
+#define HFI_IRQ_CM3_FAULT_MASK		BIT(15)
+#define HFI_IRQ_OOB_MASK		GENMASK(31, 16)
+#define HFI_IRQ_MASK			(HFI_IRQ_SIDEMSGQ_MASK |\
+					HFI_IRQ_DBGQ_MASK |\
+					HFI_IRQ_CM3_FAULT_MASK)
+
+#define DCVS_ACK_NONBLOCK 0
+#define DCVS_ACK_BLOCK 1
+
+#define HFI_FEATURE_DCVS	0
+#define HFI_FEATURE_HWSCHED	1
+#define HFI_FEATURE_PREEMPTION	2
+#define HFI_FEATURE_CLOCKS_ON	3
+#define HFI_FEATURE_BUS_ON	4
+#define HFI_FEATURE_RAIL_ON	5
+#define HFI_FEATURE_HWCG	6
+#define HFI_FEATURE_LM		7
+#define HFI_FEATURE_THROTTLE	8
+#define HFI_FEATURE_IFPC	9
+#define HFI_FEATURE_NAP		10
+#define HFI_FEATURE_BCL		11
+#define HFI_FEATURE_ACD		12
+#define HFI_FEATURE_DIDT	13
+#define HFI_FEATURE_DEPRECATED	14
+#define HFI_FEATURE_CB		15
+#define HFI_FEATURE_KPROF	16
+#define HFI_FEATURE_BAIL_OUT_TIMER	17
+#define HFI_FEATURE_GMU_STATS	18
+#define HFI_FEATURE_DBQ		19
+#define HFI_FEATURE_MINBW	20
+#define HFI_FEATURE_CLX		21
+#define HFI_FEATURE_LSR		23
+#define HFI_FEATURE_LPAC	24
+#define HFI_FEATURE_HW_FENCE	25
+#define HFI_FEATURE_PERF_NORETAIN	26
+#define HFI_FEATURE_DMS		27
+#define HFI_FEATURE_AQE		29
+
+/* Types to be used with H2F_MSG_TABLE */
+enum hfi_table_type {
+	HFI_TABLE_BW_VOTE	= 0,
+	HFI_TABLE_GPU_PERF	= 1,
+	HFI_TABLE_DIDT		= 2,
+	HFI_TABLE_ACD		= 3,
+	HFI_TABLE_CLX_V1	= 4,
+	HFI_TABLE_CLX_V2	= 5,
+	HFI_TABLE_THERM		= 6,
+	HFI_TABLE_DCVS_DATA	= 7,
+	HFI_TABLE_MAX,
+};
+
+/* A6xx uses a different value for KPROF */
+#define HFI_FEATURE_A6XX_KPROF	14
+
+/* For Gen7 & Gen8 ACD */
+#define F_PWR_ACD_CALIBRATE	78
+
+#define HFI_VALUE_FT_POLICY		100
+#define HFI_VALUE_RB_MAX_CMDS		101
+#define HFI_VALUE_CTX_MAX_CMDS		102
+#define HFI_VALUE_ADDRESS		103
+#define HFI_VALUE_MAX_GPU_PERF_INDEX	104
+#define HFI_VALUE_MIN_GPU_PERF_INDEX	105
+#define HFI_VALUE_MAX_BW_PERF_INDEX	106
+#define HFI_VALUE_MIN_BW_PERF_INDEX	107
+#define HFI_VALUE_MAX_GPU_THERMAL_INDEX	108
+#define HFI_VALUE_GPUCLK		109
+#define HFI_VALUE_CLK_TIME		110
+#define HFI_VALUE_LOG_GROUP		111
+#define HFI_VALUE_LOG_EVENT_ON		112
+#define HFI_VALUE_LOG_EVENT_OFF		113
+#define HFI_VALUE_DCVS_OBJ		114
+#define HFI_VALUE_LM_CS0		115
+#define HFI_VALUE_DBG			116
+#define HFI_VALUE_BIN_TIME		117
+#define HFI_VALUE_LOG_STREAM_ENABLE	119
+#define HFI_VALUE_PREEMPT_COUNT		120
+#define HFI_VALUE_CONTEXT_QUEUE		121
+#define HFI_VALUE_GMU_AB_VOTE		122
+#define HFI_VALUE_RB_GPU_QOS		123
+#define HFI_VALUE_RB_IB_RULE		124
+#define HFI_VALUE_GMU_WARMBOOT		125
+#define HFI_VALUE_GLOBAL_TOKEN		0xFFFFFFFF
+
+#define HFI_CTXT_FLAG_PMODE			BIT(0)
+#define HFI_CTXT_FLAG_SWITCH_INTERNAL		BIT(1)
+#define HFI_CTXT_FLAG_SWITCH			BIT(3)
+#define HFI_CTXT_FLAG_NOTIFY			BIT(5)
+#define HFI_CTXT_FLAG_NO_FAULT_TOLERANCE	BIT(9)
+#define HFI_CTXT_FLAG_PWR_RULE			BIT(11)
+#define HFI_CTXT_FLAG_PRIORITY_MASK		GENMASK(15, 12)
+#define HFI_CTXT_FLAG_IFH_NOP			BIT(16)
+#define HFI_CTXT_FLAG_SECURE			BIT(17)
+#define HFI_CTXT_FLAG_TYPE_MASK			GENMASK(24, 20)
+#define HFI_CTXT_FLAG_TYPE_ANY			0
+#define HFI_CTXT_FLAG_TYPE_GL			1
+#define HFI_CTXT_FLAG_TYPE_CL			2
+#define HFI_CTXT_FLAG_TYPE_C2D			3
+#define HFI_CTXT_FLAG_TYPE_RS			4
+#define HFI_CTXT_FLAG_TYPE_VK			5
+#define HFI_CTXT_FLAG_TYPE_UNKNOWN		0x1e
+#define HFI_CTXT_FLAG_PREEMPT_STYLE_MASK	GENMASK(27, 25)
+#define HFI_CTXT_FLAG_PREEMPT_STYLE_ANY		0
+#define HFI_CTXT_FLAG_PREEMPT_STYLE_RB		1
+#define HFI_CTXT_FLAG_PREEMPT_STYLE_FG		2
+
+/* Default sampling interval in units of 50 us */
+#define HFI_FEATURE_GMU_STATS_INTERVAL		4
+
+enum hfi_mem_kind {
+	/** @HFI_MEMKIND_GENERIC: Used for requesting generic memory */
+	HFI_MEMKIND_GENERIC = 0,
+	/** @HFI_MEMKIND_RB: Used for requesting ringbuffer memory */
+	HFI_MEMKIND_RB,
+	/** @HFI_MEMKIND_SCRATCH: Used for requesting scratch memory */
+	HFI_MEMKIND_SCRATCH,
+	/**
+	 * @HFI_MEMKIND_CSW_SMMU_INFO: Used for requesting SMMU record for
+	 * preemption context switching
+	 */
+	HFI_MEMKIND_CSW_SMMU_INFO,
+	/**
+	 * @HFI_MEMKIND_CSW_PRIV_NON_SECURE: Used for requesting privileged non
+	 * secure preemption records
+	 */
+	HFI_MEMKIND_CSW_PRIV_NON_SECURE,
+	/**
+	 * @HFI_MEMKIND_CSW_PRIV_SECURE: Used for requesting privileged secure
+	 * preemption records
+	 */
+	HFI_MEMKIND_CSW_PRIV_SECURE,
+	/**
+	 * @HFI_MEMKIND_CSW_NON_PRIV: Used for requesting non privileged per
+	 * context preemption buffer
+	 */
+	HFI_MEMKIND_CSW_NON_PRIV,
+	/**
+	 * @HFI_MEMKIND_CSW_COUNTER: Used for requesting preemption performance
+	 * counter save/restore buffer
+	 */
+	HFI_MEMKIND_CSW_COUNTER,
+	/**
+	 * @HFI_MEMKIND_CTXTREC_PREEMPT_CNTR: Used for requesting preemption
+	 * counter buffer
+	 */
+	HFI_MEMKIND_CTXTREC_PREEMPT_CNTR,
+	/** @HFI_MEMKIND_SYSLOG: Used for requesting system log memory */
+	HFI_MEMKIND_SYS_LOG,
+	/** @HFI_MEMKIND_CRASH_DUMP: Used for requesting carsh dumper memory */
+	HFI_MEMKIND_CRASH_DUMP,
+	/**
+	 * @HFI_MEMKIND_MMIO_DPU: Used for requesting Display processing unit's
+	 * register space
+	 */
+	HFI_MEMKIND_MMIO_DPU,
+	/**
+	 * @HFI_MEMKIND_MMIO_TCSR: Used for requesting Top CSR(contains SoC
+	 * doorbells) register space
+	 */
+	HFI_MEMKIND_MMIO_TCSR,
+	/**
+	 * @HFI_MEMKIND_MMIO_QDSS_STM: Used for requesting QDSS STM register
+	 * space
+	 */
+	HFI_MEMKIND_MMIO_QDSS_STM,
+	/** @HFI_MEMKIND_PROFILE: Used for kernel profiling */
+	HFI_MEMKIND_PROFILE,
+	/** @HFI_MEMKIND_USER_PROFILING_IBS: Used for user profiling */
+	HFI_MEMKIND_USER_PROFILE_IBS,
+	/** @MEMKIND_CMD_BUFFER: Used for composing ringbuffer content */
+	HFI_MEMKIND_CMD_BUFFER,
+	/**
+	 * @HFI_MEMKIND_GPU_BUSY_DATA_BUFFER: Used for GPU busy buffer for
+	 * all the contexts
+	 */
+	HFI_MEMKIND_GPU_BUSY_DATA_BUFFER,
+	/** @HFI_MEMKIND_GPU_BUSY_CMD_BUFFER: Used for GPU busy cmd buffer
+	 * (Only readable to GPU)
+	 */
+	HFI_MEMKIND_GPU_BUSY_CMD_BUFFER,
+	/**
+	 *@MEMKIND_MMIO_IPC_CORE: Used for IPC_core region mapping to GMU space
+	 * for EVA to GPU communication.
+	 */
+	HFI_MEMKIND_MMIO_IPC_CORE,
+	/** @HFIMEMKIND_MMIO_IPCC_AOSS: Used for IPCC AOSS, second memory region */
+	HFI_MEMKIND_MMIO_IPCC_AOSS,
+	/**
+	 * @MEMKIND_CSW_LPAC_PRIV_NON_SECURE: Used for privileged nonsecure
+	 * memory for LPAC context record
+	 */
+	HFI_MEMKIND_CSW_LPAC_PRIV_NON_SECURE,
+	/** @HFI_MEMKIND_MEMSTORE: Buffer used to query a context's GPU sop/eop timestamps */
+	HFI_MEMKIND_MEMSTORE,
+	/** @HFI_MEMKIND_HW_FENCE:  Hardware fence Tx/Rx headers and queues */
+	HFI_MEMKIND_HW_FENCE,
+	/** @HFI_MEMKIND_PREEMPT_SCRATCH: Used for Preemption scratch memory */
+	HFI_MEMKIND_PREEMPT_SCRATCH,
+	/**
+	 * @HFI_MEMKIND_AQE_BUFFER: Sandbox memory used by AQE to switch
+	 * between LPAC and GC
+	 */
+	HFI_MEMKIND_AQE_BUFFER,
+	HFI_MEMKIND_MAX,
+};
+
+static const char * const hfi_memkind_strings[] = {
+	[HFI_MEMKIND_GENERIC] = "GMU GENERIC",
+	[HFI_MEMKIND_RB] = "GMU RB",
+	[HFI_MEMKIND_SCRATCH] = "GMU SCRATCH",
+	[HFI_MEMKIND_CSW_SMMU_INFO] = "GMU SMMU INFO",
+	[HFI_MEMKIND_CSW_PRIV_NON_SECURE] = "GMU CSW PRIV NON SECURE",
+	[HFI_MEMKIND_CSW_PRIV_SECURE] = "GMU CSW PRIV SECURE",
+	[HFI_MEMKIND_CSW_NON_PRIV] = "GMU CSW NON PRIV",
+	[HFI_MEMKIND_CSW_COUNTER] = "GMU CSW COUNTER",
+	[HFI_MEMKIND_CTXTREC_PREEMPT_CNTR] = "GMU PREEMPT CNTR",
+	[HFI_MEMKIND_SYS_LOG] = "GMU SYS LOG",
+	[HFI_MEMKIND_CRASH_DUMP] = "GMU CRASHDUMP",
+	[HFI_MEMKIND_MMIO_DPU] = "GMU MMIO DPU",
+	[HFI_MEMKIND_MMIO_TCSR] = "GMU MMIO TCSR",
+	[HFI_MEMKIND_MMIO_QDSS_STM] = "GMU MMIO QDSS STM",
+	[HFI_MEMKIND_PROFILE] = "GMU KERNEL PROFILING",
+	[HFI_MEMKIND_USER_PROFILE_IBS] = "GMU USER PROFILING",
+	[HFI_MEMKIND_CMD_BUFFER] = "GMU CMD BUFFER",
+	[HFI_MEMKIND_GPU_BUSY_DATA_BUFFER] = "GMU BUSY DATA BUFFER",
+	[HFI_MEMKIND_GPU_BUSY_CMD_BUFFER] = "GMU BUSY CMD BUFFER",
+	[HFI_MEMKIND_MMIO_IPC_CORE] = "GMU MMIO IPC",
+	[HFI_MEMKIND_MMIO_IPCC_AOSS] = "GMU MMIO IPCC AOSS",
+	[HFI_MEMKIND_CSW_LPAC_PRIV_NON_SECURE] = "GMU CSW LPAC PRIV NON SECURE",
+	[HFI_MEMKIND_MEMSTORE] = "GMU MEMSTORE",
+	[HFI_MEMKIND_HW_FENCE] = "GMU HW FENCE",
+	[HFI_MEMKIND_PREEMPT_SCRATCH] = "GMU PREEMPTION",
+	[HFI_MEMKIND_AQE_BUFFER] = "GMU AQE BUFFER",
+	[HFI_MEMKIND_MAX] = "GMU UNKNOWN",
+};
+
+/* CP/GFX pipeline can access */
+#define HFI_MEMFLAG_GFX_ACC		BIT(0)
+
+/* Buffer has APRIV protection in GFX PTEs */
+#define HFI_MEMFLAG_GFX_PRIV		BIT(1)
+
+/* Buffer is read-write for GFX PTEs. A 0 indicates read-only */
+#define HFI_MEMFLAG_GFX_WRITEABLE	BIT(2)
+
+/* GMU can access */
+#define HFI_MEMFLAG_GMU_ACC		BIT(3)
+
+/* Buffer has APRIV protection in GMU PTEs */
+#define HFI_MEMFLAG_GMU_PRIV		BIT(4)
+
+/* Buffer is read-write for GMU PTEs. A 0 indicates read-only */
+#define HFI_MEMFLAG_GMU_WRITEABLE	BIT(5)
+
+/* Buffer is located in GMU's non-cached bufferable VA range */
+#define HFI_MEMFLAG_GMU_BUFFERABLE	BIT(6)
+
+/* Buffer is located in GMU's cacheable VA range */
+#define HFI_MEMFLAG_GMU_CACHEABLE	BIT(7)
+
+/* Host can access */
+#define HFI_MEMFLAG_HOST_ACC		BIT(8)
+
+/* Host initializes(zero-init) the buffer */
+#define HFI_MEMFLAG_HOST_INIT		BIT(9)
+
+/* Gfx buffer needs to be secure */
+#define HFI_MEMFLAG_GFX_SECURE		BIT(12)
+
+/**
+ * struct hfi_queue_table_header - HFI queue table structure
+ * @version: HFI protocol version
+ * @size: queue table size in dwords
+ * @qhdr0_offset: first queue header offset (dwords) in this table
+ * @qhdr_size: queue header size
+ * @num_q: number of queues defined in this table
+ * @num_active_q: number of active queues
+ */
+struct hfi_queue_table_header {
+	u32 version;
+	u32 size;
+	u32 qhdr0_offset;
+	u32 qhdr_size;
+	u32 num_q;
+	u32 num_active_q;
+} __packed;
+
+/**
+ * struct gmu_context_queue_header - GMU context queue header structure
+ */
+struct gmu_context_queue_header {
+	/** @version: Version of the header */
+	u32 version;
+	/** @start_addr: GMU VA of start of the queue */
+	u32 start_addr;
+	/** @queue_size: queue size in dwords */
+	u32 queue_size;
+	/** @out_fence_ts: Timestamp of last hardware fence sent to Tx Queue */
+	volatile u32 out_fence_ts;
+	/** @sync_obj_ts: Timestamp of last sync object that GMU has digested */
+	volatile u32 sync_obj_ts;
+	/** @read_index: Read index of the queue */
+	volatile u32 read_index;
+	/** @write_index: Write index of the queue */
+	volatile u32 write_index;
+	/**
+	 * @hw_fence_buffer_va: GMU VA of the buffer to store output hardware fences for this
+	 * context
+	 */
+	u32 hw_fence_buffer_va;
+	/**
+	 * @hw_fence_buffer_size: Size of the buffer to store output hardware fences for this
+	 * context
+	 */
+	u32 hw_fence_buffer_size;
+	u32 unused1;
+	u32 unused2;
+	u32 unused3;
+} __packed;
+
+/**
+ * struct hfi_queue_header - HFI queue header structure
+ * @status: active: 1; inactive: 0
+ * @start_addr: starting address of the queue in GMU VA space
+ * @type: queue type encoded the priority, ID and send/recevie types
+ * @queue_size: size of the queue
+ * @msg_size: size of the message if each message has fixed size.
+ *	Otherwise, 0 means variable size of message in the queue.
+ * @read_index: read index of the queue
+ * @write_index: write index of the queue
+ */
+struct hfi_queue_header {
+	u32 status;
+	u32 start_addr;
+	u32 type;
+	u32 queue_size;
+	u32 msg_size;
+	u32 unused0;
+	u32 unused1;
+	u32 unused2;
+	u32 unused3;
+	u32 unused4;
+	volatile u32 read_index;
+	volatile u32 write_index;
+} __packed;
+
+#define HFI_MSG_CMD 0 /* V1 and V2 */
+#define HFI_MSG_ACK 1 /* V2 only */
+
+ /* Used to NOP a command when executing warmboot sequence */
+#define HFI_MSG_NOP BIT(18)
+ /* Used to record a command when executing coldboot sequence */
+#define HFI_MSG_RECORD BIT(19)
+
+#define HFI_V1_MSG_POST 1 /* V1 only */
+#define HFI_V1_MSG_ACK 2/* V1 only */
+
+#define MSG_HDR_SET_SIZE(hdr, size) \
+	(((size & 0xFF) << 8) | hdr)
+
+#define CREATE_MSG_HDR(id, type) \
+	(((type) << 16) | ((id) & 0xFF))
+
+#define ACK_MSG_HDR(id) CREATE_MSG_HDR(id, HFI_MSG_ACK)
+
+#define HFI_QUEUE_DEFAULT_CNT 3
+#define HFI_QUEUE_DISPATCH_MAX_CNT 14
+#define HFI_QUEUE_HDR_MAX (HFI_QUEUE_DEFAULT_CNT + HFI_QUEUE_DISPATCH_MAX_CNT)
+
+struct hfi_queue_table {
+	struct hfi_queue_table_header qtbl_hdr;
+	struct hfi_queue_header qhdr[HFI_QUEUE_HDR_MAX];
+} __packed;
+
+#define HFI_QUEUE_OFFSET(i) \
+		(ALIGN(sizeof(struct hfi_queue_table), SZ_16) + \
+		((i) * HFI_QUEUE_SIZE))
+
+#define GMU_QUEUE_START_ADDR(gmuaddr, i) \
+	(gmuaddr + HFI_QUEUE_OFFSET(i))
+
+#define HOST_QUEUE_START_ADDR(hfi_mem, i) \
+	((hfi_mem)->hostptr + HFI_QUEUE_OFFSET(i))
+
+#define MSG_HDR_GET_ID(hdr) ((hdr) & 0xFF)
+#define MSG_HDR_GET_SIZE(hdr) (((hdr) >> 8) & 0xFF)
+#define MSG_HDR_GET_TYPE(hdr) (((hdr) >> 16) & 0xF)
+#define MSG_HDR_GET_SEQNUM(hdr) (((hdr) >> 20) & 0xFFF)
+
+/* Clear the HFI_MSG_RECORD bit from both headers since some acks may have it set, and some not. */
+#define CMP_HFI_ACK_HDR(sent, rcvd) ((sent &= ~HFI_MSG_RECORD) == (rcvd &= ~HFI_MSG_RECORD))
+
+#define MSG_HDR_SET_SEQNUM(hdr, num) \
+	(((hdr) & 0xFFFFF) | ((num) << 20))
+
+#define MSG_HDR_SET_SEQNUM_SIZE(hdr, seqnum, sizedwords) \
+	(FIELD_PREP(GENMASK(31, 20), seqnum) | FIELD_PREP(GENMASK(15, 8), sizedwords) | hdr)
+
+#define MSG_HDR_SET_TYPE(hdr, type) \
+	(((hdr) & 0xFFFFF) | ((type) << 16))
+
+#define QUEUE_HDR_TYPE(id, prio, rtype, stype) \
+	(((id) & 0xFF) | (((prio) & 0xFF) << 8) | \
+	(((rtype) & 0xFF) << 16) | (((stype) & 0xFF) << 24))
+
+#define HFI_RSP_TIMEOUT 1000 /* msec */
+
+#define HFI_IRQ_MSGQ_MASK BIT(0)
+
+enum hfi_msg_type {
+	H2F_MSG_INIT			= 0,
+	H2F_MSG_FW_VER			= 1,
+	H2F_MSG_LM_CFG			= 2,
+	H2F_MSG_BW_VOTE_TBL		= 3,
+	H2F_MSG_PERF_TBL		= 4,
+	H2F_MSG_TEST			= 5,
+	H2F_MSG_ACD_TBL			= 7,
+	H2F_MSG_CLX_TBL			= 8,
+	H2F_MSG_START			= 10,
+	H2F_MSG_FEATURE_CTRL		= 11,
+	H2F_MSG_GET_VALUE		= 12,
+	H2F_MSG_SET_VALUE		= 13,
+	H2F_MSG_CORE_FW_START		= 14,
+	H2F_MSG_TABLE			= 15,
+	F2H_MSG_MEM_ALLOC		= 20,
+	H2F_MSG_GX_BW_PERF_VOTE		= 30,
+	H2F_MSG_FW_HALT			= 32,
+	H2F_MSG_PREPARE_SLUMBER		= 33,
+	F2H_MSG_ERR			= 100,
+	F2H_MSG_DEBUG			= 101,
+	F2H_MSG_LOG_BLOCK		= 102,
+	F2H_MSG_GMU_CNTR_REGISTER	= 110,
+	F2H_MSG_GMU_CNTR_RELEASE	= 111,
+	F2H_MSG_ACK			= 126, /* Deprecated for v2.0*/
+	H2F_MSG_ACK			= 127, /* Deprecated for v2.0*/
+	H2F_MSG_REGISTER_CONTEXT	= 128,
+	H2F_MSG_UNREGISTER_CONTEXT	= 129,
+	H2F_MSG_ISSUE_CMD		= 130,
+	H2F_MSG_ISSUE_CMD_RAW		= 131,
+	H2F_MSG_TS_NOTIFY		= 132,
+	F2H_MSG_TS_RETIRE		= 133,
+	H2F_MSG_CONTEXT_POINTERS	= 134,
+	H2F_MSG_ISSUE_LPAC_CMD_RAW	= 135,
+	H2F_MSG_CONTEXT_RULE		= 140, /* AKA constraint */
+	H2F_MSG_ISSUE_RECURRING_CMD	= 141,
+	F2H_MSG_CONTEXT_BAD		= 150,
+	H2F_MSG_HW_FENCE_INFO		= 151,
+	H2F_MSG_ISSUE_SYNCOBJ		= 152,
+	F2H_MSG_SYNCOBJ_QUERY		= 153,
+	H2F_MSG_WARMBOOT_CMD		= 154,
+	F2H_MSG_PROCESS_TRACE		= 155,
+	HFI_MAX_ID,
+};
+
+enum gmu_ret_type {
+	GMU_SUCCESS = 0,
+	GMU_ERROR_FATAL,
+	GMU_ERROR_MEM_FAIL,
+	GMU_ERROR_INVAL_PARAM,
+	GMU_ERROR_NULL_PTR,
+	GMU_ERROR_OUT_OF_BOUNDS,
+	GMU_ERROR_TIMEOUT,
+	GMU_ERROR_NOT_SUPPORTED,
+	GMU_ERROR_NO_ENTRY,
+};
+
+/* H2F */
+struct hfi_gmu_init_cmd {
+	u32 hdr;
+	u32 seg_id;
+	u32 dbg_buffer_addr;
+	u32 dbg_buffer_size;
+	u32 boot_state;
+} __packed;
+
+/* H2F */
+struct hfi_fw_version_cmd {
+	u32 hdr;
+	u32 supported_ver;
+} __packed;
+
+/* H2F */
+struct hfi_bwtable_cmd {
+	u32 hdr;
+	u32 bw_level_num;
+	u32 cnoc_cmds_num;
+	u32 ddr_cmds_num;
+	u32 cnoc_wait_bitmask;
+	u32 ddr_wait_bitmask;
+	u32 cnoc_cmd_addrs[MAX_CNOC_CMDS];
+	u32 cnoc_cmd_data[MAX_CNOC_LEVELS][MAX_CNOC_CMDS];
+	u32 ddr_cmd_addrs[MAX_BW_CMDS];
+	u32 ddr_cmd_data[MAX_BW_LEVELS][MAX_BW_CMDS];
+} __packed;
+
+struct opp_gx_desc {
+	u32 vote;
+	/* This is 'acdLvl' in gmu fw which is now repurposed for cx vote */
+	u32 cx_vote;
+	u32 freq;
+} __packed;
+
+struct opp_desc {
+	u32 vote;
+	u32 freq;
+} __packed;
+
+/* H2F */
+struct hfi_dcvstable_v1_cmd {
+	u32 hdr;
+	u32 gpu_level_num;
+	u32 gmu_level_num;
+	struct opp_desc gx_votes[MAX_GX_LEVELS_LEGACY];
+	struct opp_desc cx_votes[MAX_CX_LEVELS];
+} __packed;
+
+/* H2F */
+struct hfi_dcvstable_cmd {
+	u32 hdr;
+	u32 gpu_level_num;
+	u32 gmu_level_num;
+	struct opp_gx_desc gx_votes[MAX_GX_LEVELS_LEGACY];
+	struct opp_desc cx_votes[MAX_CX_LEVELS];
+} __packed;
+
+/* H2F */
+struct hfi_table_entry {
+	u32 count;
+	u32 stride;
+	u32 data[];
+} __packed;
+
+struct hfi_table_cmd {
+	u32 hdr;
+	u32 version;
+	u32 type;
+	struct hfi_table_entry entry[];
+} __packed;
+
+#define MAX_ACD_STRIDE 2
+#define MAX_ACD_NUM_LEVELS KGSL_MAX_PWRLEVELS
+
+/* H2F */
+struct hfi_acd_table_cmd {
+	u32 hdr;
+	u32 version;
+	u32 enable_by_level;
+	u32 stride;
+	u32 num_levels;
+	u32 data[MAX_ACD_NUM_LEVELS * MAX_ACD_STRIDE];
+} __packed;
+
+struct hfi_clx_table_v1_cmd {
+	/** @hdr: HFI header message */
+	u32 hdr;
+	/**
+	 * @data0: bits[0:15]  Feature enable control
+	 *         bits[16:31] Revision control
+	 */
+	u32 data0;
+	/**
+	 * @data1: bits[0:15]  Migration time
+	 *         bits[16:21] Current rating
+	 *         bits[22:27] Phases for domain
+	 *         bits[28:28] Path notifications
+	 *         bits[29:31] Extra feature bits
+	 */
+	u32 data1;
+	/** @clxt: CLX time in microseconds */
+	u32 clxt;
+	/** @clxh: CLH time in microseconds */
+	u32 clxh;
+	/** @urgmode: Urgent HW throttle mode of operation */
+	u32 urgmode;
+	/** @lkgen: Enable leakage current estimate */
+	u32 lkgen;
+} __packed;
+
+#define CLX_DOMAINS_V2 2
+struct clx_domain_v2 {
+	/**
+	 * @data0: bits[0:15]  Migration time
+	 *         bits[16:21] Current rating
+	 *         bits[22:27] Phases for domain
+	 *         bits[28:28] Path notifications
+	 *         bits[29:31] Extra feature bits
+	 */
+	u32 data0;
+	/** @clxt: CLX time in microseconds */
+	u32 clxt;
+	/** @clxh: CLH time in microseconds */
+	u32 clxh;
+	/** @urgmode: Urgent HW throttle mode of operation */
+	u32 urgmode;
+	/** @lkgen: Enable leakage current estimate */
+	u32 lkgen;
+	/** @currbudget: Current Budget */
+	u32 currbudget;
+} __packed;
+
+/* H2F */
+struct hfi_clx_table_v2_cmd {
+	/** @hdr: HFI header message */
+	u32 hdr;
+	/** @version: Version identifier for the format used for domains */
+	u32 version;
+	/** @domain: GFX and MXC Domain information */
+	struct clx_domain_v2 domain[CLX_DOMAINS_V2];
+} __packed;
+
+/* H2F */
+struct hfi_test_cmd {
+	u32 hdr;
+	u32 data;
+} __packed;
+
+/* H2F */
+struct hfi_start_cmd {
+	u32 hdr;
+} __packed;
+
+/* H2F */
+struct hfi_feature_ctrl_cmd {
+	u32 hdr;
+	u32 feature;
+	u32 enable;
+	u32 data;
+} __packed;
+
+/* H2F */
+struct hfi_get_value_cmd {
+	u32 hdr;
+	u32 type;
+	u32 subtype;
+} __packed;
+
+/* Internal */
+struct hfi_get_value_req {
+	struct hfi_get_value_cmd cmd;
+	u32 data[16];
+} __packed;
+
+/* F2H */
+struct hfi_get_value_reply_cmd {
+	u32 hdr;
+	u32 req_hdr;
+	u32 data[16];
+} __packed;
+
+/* H2F */
+struct hfi_set_value_cmd {
+	u32 hdr;
+	u32 type;
+	u32 subtype;
+	u32 data;
+} __packed;
+
+/* H2F */
+struct hfi_core_fw_start_cmd {
+	u32 hdr;
+	u32 handle;
+} __packed;
+
+struct hfi_mem_alloc_desc_legacy {
+	u64 gpu_addr;
+	u32 flags;
+	u32 mem_kind;
+	u32 host_mem_handle;
+	u32 gmu_mem_handle;
+	u32 gmu_addr;
+	u32 size; /* Bytes */
+} __packed;
+
+struct hfi_mem_alloc_desc {
+	u64 gpu_addr;
+	u32 flags;
+	u32 mem_kind;
+	u32 host_mem_handle;
+	u32 gmu_mem_handle;
+	u32 gmu_addr;
+	u32 size; /* Bytes */
+	/**
+	 * @align: bits[0:7] specify alignment requirement of the GMU VA specified as a power of
+	 * two. bits[8:15] specify alignment requirement for the size of the GMU mapping. For
+	 * example, a decimal value of 20 = (1 << 20) = 1 MB alignment
+	 */
+	u32 align;
+} __packed;
+
+struct hfi_mem_alloc_entry {
+	struct hfi_mem_alloc_desc desc;
+	struct kgsl_memdesc *md;
+};
+
+/* F2H */
+struct hfi_mem_alloc_cmd_legacy {
+	u32 hdr;
+	u32 reserved; /* Padding to ensure alignment of 'desc' below */
+	struct hfi_mem_alloc_desc_legacy desc;
+} __packed;
+
+struct hfi_mem_alloc_cmd {
+	u32 hdr;
+	u32 version;
+	struct hfi_mem_alloc_desc desc;
+} __packed;
+
+/* H2F */
+struct hfi_mem_alloc_reply_cmd {
+	u32 hdr;
+	u32 req_hdr;
+	struct hfi_mem_alloc_desc desc;
+} __packed;
+
+/* H2F */
+struct hfi_gx_bw_perf_vote_cmd {
+	u32 hdr;
+	u32 ack_type;
+	u32 freq;
+	u32 bw;
+} __packed;
+
+/* H2F */
+struct hfi_fw_halt_cmd {
+	u32 hdr;
+	u32 en_halt;
+} __packed;
+
+/* H2F */
+struct hfi_prep_slumber_cmd {
+	u32 hdr;
+	u32 bw;
+	u32 freq;
+} __packed;
+
+/* F2H */
+struct hfi_err_cmd {
+	u32 hdr;
+	u32 error_code;
+	u32 data[16];
+} __packed;
+
+/* F2H */
+struct hfi_debug_cmd {
+	u32 hdr;
+	u32 type;
+	u32 timestamp;
+	u32 data;
+} __packed;
+
+/* F2H */
+struct hfi_trace_cmd {
+	u32 hdr;
+	u32 version;
+	u64 identifier;
+} __packed;
+
+/* Trace packet definition */
+struct gmu_trace_packet {
+	u32 hdr;
+	u32 trace_id;
+	u64 ticks;
+	u32 payload[];
+} __packed;
+
+/* F2H */
+struct hfi_gmu_cntr_register_cmd {
+	u32 hdr;
+	u32 group_id;
+	u32 countable;
+} __packed;
+
+/* H2F */
+struct hfi_gmu_cntr_register_reply_cmd {
+	u32 hdr;
+	u32 req_hdr;
+	u32 group_id;
+	u32 countable;
+	u32 cntr_lo;
+	u32 cntr_hi;
+} __packed;
+
+/* F2H */
+struct hfi_gmu_cntr_release_cmd {
+	u32 hdr;
+	u32 group_id;
+	u32 countable;
+} __packed;
+
+/* H2F */
+struct hfi_register_ctxt_cmd {
+	u32 hdr;
+	u32 ctxt_id;
+	u32 flags;
+	u64 pt_addr;
+	u32 ctxt_idr;
+	u32 ctxt_bank;
+} __packed;
+
+/* H2F */
+struct hfi_unregister_ctxt_cmd {
+	u32 hdr;
+	u32 ctxt_id;
+	u32 ts;
+} __packed;
+
+struct hfi_issue_ib {
+	u64 addr;
+	u32 size;
+} __packed;
+
+/* H2F */
+/* The length of *buf will be embedded in the hdr */
+struct hfi_issue_cmd_raw_cmd {
+	u32 hdr;
+	u32 *buf;
+} __packed;
+
+/* Internal */
+struct hfi_issue_cmd_raw_req {
+	u32 queue;
+	u32 ctxt_id;
+	u32 len;
+	u32 *buf;
+} __packed;
+
+/* H2F */
+struct hfi_ts_notify_cmd {
+	u32 hdr;
+	u32 ctxt_id;
+	u32 ts;
+} __packed;
+
+#define CMDBATCH_SUCCESS	0
+#define CMDBATCH_RETIRED	1
+#define CMDBATCH_ERROR		2
+#define CMDBATCH_SKIP		3
+
+#define CMDBATCH_PROFILING		BIT(4)
+#define CMDBATCH_EOF			BIT(8)
+#define CMDBATCH_INDIRECT		BIT(9)
+#define CMDBATCH_RECURRING_START   BIT(18)
+#define CMDBATCH_RECURRING_STOP   BIT(19)
+
+
+/* This indicates that the SYNCOBJ is kgsl output fence */
+#define GMU_SYNCOBJ_FLAG_KGSL_FENCE_BIT		0
+/* This indicates that the SYNCOBJ is signaled */
+#define GMU_SYNCOBJ_FLAG_SIGNALED_BIT		1
+/* This indicates that the SYNCOBJ's software status is queried */
+#define GMU_SYNCOBJ_FLAG_QUERY_SW_STATUS_BIT	2
+/* This indicates that the SYNCOBJ's software status is signaled */
+#define GMU_SYNCOBJ_FLAG_SW_STATUS_SIGNALED_BIT	3
+/* This indicates that the SYNCOBJ's software status is pending */
+#define GMU_SYNCOBJ_FLAG_SW_STATUS_PENDING_BIT	4
+
+#define GMU_SYNCOBJ_FLAGS  \
+	{ BIT(GMU_SYNCOBJ_FLAG_KGSL_FENCE_BIT), "KGSL"}, \
+	{ BIT(GMU_SYNCOBJ_FLAG_SIGNALED_BIT), "SIGNALED"}, \
+	{ BIT(GMU_SYNCOBJ_FLAG_QUERY_SW_STATUS_BIT), "QUERIED"}, \
+	{ BIT(GMU_SYNCOBJ_FLAG_SW_STATUS_SIGNALED_BIT), "SW_SIGNALED"}, \
+	{ BIT(GMU_SYNCOBJ_FLAG_SW_STATUS_PENDING_BIT), "SW_PENDING"}
+
+/* F2H */
+struct hfi_ts_retire_cmd {
+	u32 hdr;
+	u32 ctxt_id;
+	u32 ts;
+	u32 type;
+	u64 submitted_to_rb;
+	u64 sop;
+	u64 eop;
+	u64 retired_on_gmu;
+	u64 active;
+	u32 version;
+} __packed;
+
+/* H2F */
+struct hfi_context_pointers_cmd {
+	u32 hdr;
+	u32 ctxt_id;
+	u64 sop_addr;
+	u64 eop_addr;
+	u64 user_ctxt_record_addr;
+	u32 version;
+	u32 gmu_context_queue_addr;
+} __packed;
+
+/* H2F */
+struct hfi_context_rule_cmd {
+	u32 hdr;
+	u32 ctxt_id;
+	u32 type;
+	u32 status;
+} __packed;
+
+struct fault_info {
+	u32 ctxt_id;
+	u32 policy;
+	u32 ts;
+} __packed;
+
+/* F2H */
+struct hfi_context_bad_cmd {
+	u32 hdr;
+	u32 version;
+	struct fault_info gc;
+	struct fault_info lpac;
+	u32 error;
+	u32 payload[];
+} __packed;
+
+/* F2H */
+struct hfi_context_bad_cmd_legacy {
+	u32 hdr;
+	u32 ctxt_id;
+	u32 policy;
+	u32 ts;
+	u32 error;
+	u32 payload[];
+} __packed;
+
+/* H2F */
+struct hfi_context_bad_reply_cmd {
+	u32 hdr;
+	u32 req_hdr;
+} __packed;
+
+/* H2F */
+struct hfi_submit_cmd {
+	u32 hdr;
+	u32 ctxt_id;
+	u32 flags;
+	u32 ts;
+	u32 profile_gpuaddr_lo;
+	u32 profile_gpuaddr_hi;
+	u32 numibs;
+	u32 big_ib_gmu_va;
+} __packed;
+
+struct hfi_syncobj {
+	u64 ctxt_id;
+	u64 seq_no;
+	u64 flags;
+} __packed;
+
+struct hfi_submit_syncobj {
+	u32 hdr;
+	u32 version;
+	u32 flags;
+	u32 timestamp;
+	u32 num_syncobj;
+} __packed;
+
+struct hfi_log_block {
+	u32 hdr;
+	u32 version;
+	u32 start_index;
+	u32 stop_index;
+} __packed;
+
+enum hfi_warmboot_cmd_type {
+	HFI_WARMBOOT_SET_SCRATCH = 0,
+	HFI_WARMBOOT_EXEC_SCRATCH,
+	HFI_WARMBOOT_QUERY_SCRATCH,
+};
+
+struct hfi_warmboot_scratch_cmd {
+	/** @hdr: Header for the scratch command packet */
+	u32 hdr;
+	/** @version: Version of the scratch command packet */
+	u32 version;
+	/** @flags: Set, Execute or Query scratch flag */
+	u32 flags;
+	/** @scratch_addr: Address of the scratch */
+	u32 scratch_addr;
+	/** @scratch_size: Size of the scratch in bytes*/
+	u32 scratch_size;
+} __packed;
+
+/* Request GMU to add this fence to TxQueue without checking whether this is retired or not */
+#define HW_FENCE_FLAG_SKIP_MEMSTORE 0x1
+
+struct hfi_hw_fence_info {
+	/** @hdr: Header for the fence info packet */
+	u32 hdr;
+	/** @version: Version of the fence info packet */
+	u32 version;
+	/** @gmu_ctxt_id: GMU Context id to which this fence belongs */
+	u32 gmu_ctxt_id;
+	/** @error: Any error code associated with this fence */
+	u32 error;
+	/** @ctxt_id: Context id for which hw fence is to be triggered */
+	u64 ctxt_id;
+	/** @ts: Timestamp for which hw fence is to be triggered */
+	u64 ts;
+	/** @flags: Flags on how to handle this hfi packet */
+	u64 flags;
+	/** @hash_index: Index of the hw fence in hw fence table */
+	u64 hash_index;
+} __packed;
+
+/* The software fence corresponding to the queried hardware fence has not signaled */
+#define ADRENO_HW_FENCE_SW_STATUS_PENDING  BIT(0)
+/* The software fence corresponding to the queried hardware fence has signaled */
+#define ADRENO_HW_FENCE_SW_STATUS_SIGNALED BIT(1)
+
+struct hfi_syncobj_query {
+	/**
+	 * @query_bitmask: Bitmask representing the sync object descriptors to be queried. For
+	 * example, to query the second sync object descriptor(index=1) in a sync object,
+	 * bit(1) should be set in this bitmask.
+	 */
+	u32 query_bitmask;
+} __packed;
+
+#define MAX_SYNCOBJ_QUERY_BITS	128
+#define BITS_PER_SYNCOBJ_QUERY	32
+#define MAX_SYNCOBJ_QUERY_DWORDS (MAX_SYNCOBJ_QUERY_BITS / BITS_PER_SYNCOBJ_QUERY)
+
+struct hfi_syncobj_query_cmd {
+	/** @hdr: Header for the fence info packet */
+	u32 hdr;
+	/** @version: Version of the fence info packet */
+	u32 version;
+	/** @gmu_ctxt_id: GMU Context id to which this SYNC object belongs */
+	u32 gmu_ctxt_id;
+	/** @sync_obj_ts: Timestamp of this SYNC object */
+	u32 sync_obj_ts;
+	/** @queries: Array of query bitmasks */
+	struct hfi_syncobj_query queries[MAX_SYNCOBJ_QUERY_DWORDS];
+} __packed;
+
+/**
+ * struct pending_cmd - data structure to track outstanding HFI
+ *	command messages
+ */
+struct pending_cmd {
+	/** @sent_hdr: Header of the un-ack'd hfi packet */
+	u32 sent_hdr;
+	/** @results: Array to store the ack packet */
+	u32 results[MAX_RCVD_SIZE];
+	/** @complete: Completion to signal hfi ack has been received */
+	struct completion complete;
+	/** @node: to add it to the list of hfi packets waiting for ack */
+	struct list_head node;
+};
+
+static inline int _CMD_MSG_HDR(u32 *hdr, int id, size_t size)
+{
+	if (WARN_ON(size > HFI_MAX_MSG_SIZE))
+		return -EMSGSIZE;
+
+	*hdr = CREATE_MSG_HDR(id, HFI_MSG_CMD);
+	return 0;
+}
+
+#define CMD_MSG_HDR(cmd, id) \
+	_CMD_MSG_HDR(&(cmd).hdr, id, sizeof(cmd))
+
+#define RECORD_MSG_HDR(hdr) \
+	((hdr) | HFI_MSG_RECORD)
+
+#define CLEAR_RECORD_MSG_HDR(hdr) \
+	((hdr) & (~(HFI_MSG_RECORD | HFI_MSG_NOP)))
+
+#define RECORD_NOP_MSG_HDR(hdr) \
+	((hdr) | (HFI_MSG_RECORD | HFI_MSG_NOP))
+
+/* Maximum number of IBs in a submission */
+#define HWSCHED_MAX_DISPATCH_NUMIBS \
+	((HFI_MAX_MSG_SIZE - sizeof(struct hfi_submit_cmd)) \
+		/ sizeof(struct hfi_issue_ib))
+
+/**
+ * struct payload_section - Container of keys values
+ *
+ * There may be a variable number of payload sections appended
+ * to the context bad HFI message. Each payload section contains
+ * a variable number of key-value pairs, both key and value being
+ * single dword each.
+ */
+struct payload_section {
+	/** @type: Type of the payload */
+	u16 type;
+	/** @dwords: Number of dwords in the data array. */
+	u16 dwords;
+	/** @data: A sequence of key-value pairs. Each pair is 2 dwords. */
+	u32 data[];
+} __packed;
+
+/* IDs for context bad hfi payloads */
+#define PAYLOAD_FAULT_REGS 1
+#define PAYLOAD_RB 2
+#define PAYLOAD_PREEMPT_TIMEOUT 3
+
+/* Keys for PAYLOAD_FAULT_REGS type payload */
+#define KEY_CP_OPCODE_ERROR 1
+#define KEY_CP_PROTECTED_ERROR 2
+#define KEY_CP_HW_FAULT 3
+#define KEY_CP_BV_OPCODE_ERROR 4
+#define KEY_CP_BV_PROTECTED_ERROR 5
+#define KEY_CP_BV_HW_FAULT 6
+#define KEY_CP_LPAC_OPCODE_ERROR 7
+#define KEY_CP_LPAC_PROTECTED_ERROR 8
+#define KEY_CP_LPAC_HW_FAULT 9
+#define KEY_SWFUSE_VIOLATION_FAULT 10
+#define KEY_AQE0_OPCODE_ERROR 11
+#define KEY_AQE0_HW_FAULT 12
+#define KEY_AQE1_OPCODE_ERROR 13
+#define KEY_AQE1_HW_FAULT 14
+#define KEY_CP_AHB_ERROR 30
+#define KEY_TSB_WRITE_ERROR 31
+
+/* Keys for PAYLOAD_RB type payload */
+#define KEY_RB_ID 1
+#define KEY_RB_RPTR 2
+#define KEY_RB_WPTR 3
+#define KEY_RB_SIZEDWORDS 4
+#define KEY_RB_QUEUED_TS 5
+#define KEY_RB_RETIRED_TS 6
+#define KEY_RB_GPUADDR_LO 7
+#define KEY_RB_GPUADDR_HI 8
+
+/* Keys for PAYLOAD_PREEMPT_TIMEOUT type payload */
+#define KEY_PREEMPT_TIMEOUT_CUR_RB_ID 1
+#define KEY_PREEMPT_TIMEOUT_NEXT_RB_ID 2
+
+/* Types of errors that trigger context bad HFI */
+
+/* GPU encountered a CP HW error */
+#define GMU_CP_HW_ERROR 600
+/* GPU encountered a GPU Hang interrupt */
+#define GMU_GPU_HW_HANG 601
+/* Preemption didn't complete in given time */
+#define GMU_GPU_PREEMPT_TIMEOUT 602
+/* Fault due to Long IB timeout */
+#define GMU_GPU_SW_HANG 603
+/* GPU encountered a bad opcode */
+#define GMU_CP_OPCODE_ERROR 604
+/* GPU encountered protected mode error */
+#define GMU_CP_PROTECTED_ERROR 605
+/* GPU encountered an illegal instruction */
+#define GMU_CP_ILLEGAL_INST_ERROR 606
+/* GPU encountered a CP ucode error */
+#define GMU_CP_UCODE_ERROR 607
+/* GPU encountered a CP hw fault error */
+#define GMU_CP_HW_FAULT_ERROR 608
+/* GPU encountered a GPC error */
+#define GMU_CP_GPC_ERROR 609
+/* GPU BV encountered a bad opcode */
+#define GMU_CP_BV_OPCODE_ERROR 610
+/* GPU BV encountered protected mode error */
+#define GMU_CP_BV_PROTECTED_ERROR 611
+/* GPU BV encountered a CP hw fault error */
+#define GMU_CP_BV_HW_FAULT_ERROR 612
+/* GPU BV encountered a CP ucode error */
+#define GMU_CP_BV_UCODE_ERROR 613
+/* GPU BV encountered an illegal instruction */
+#define GMU_CP_BV_ILLEGAL_INST_ERROR 614
+/* GPU encountered a bad LPAC opcode */
+#define GMU_CP_LPAC_OPCODE_ERROR 615
+/* GPU LPAC encountered a CP ucode error */
+#define GMU_CP_LPAC_UCODE_ERROR 616
+/* GPU LPAC encountered a CP hw fault error */
+#define GMU_CP_LPAC_HW_FAULT_ERROR 617
+/* GPU LPAC encountered protected mode error */
+#define GMU_CP_LPAC_PROTECTED_ERROR 618
+/* GPU LPAC encountered an illegal instruction */
+#define GMU_CP_LPAC_ILLEGAL_INST_ERROR 619
+/* Fault due to LPAC Long IB timeout */
+#define GMU_GPU_LPAC_SW_HANG 620
+/* Fault due to software fuse violation interrupt */
+#define GMU_GPU_SW_FUSE_VIOLATION 621
+/* AQE related error codes */
+#define GMU_GPU_AQE0_OPCODE_ERRROR 622
+#define GMU_GPU_AQE0_UCODE_ERROR 623
+#define GMU_GPU_AQE0_HW_FAULT_ERROR 624
+#define GMU_GPU_AQE0_ILLEGAL_INST_ERROR 625
+#define GMU_GPU_AQE1_OPCODE_ERRROR 626
+#define GMU_GPU_AQE1_UCODE_ERROR 627
+#define GMU_GPU_AQE1_HW_FAULT_ERROR 628
+#define GMU_GPU_AQE1_ILLEGAL_INST_ERROR 629
+/* GMU encountered a sync object which is signaled via software but not via hardware */
+#define GMU_SYNCOBJ_TIMEOUT_ERROR 630
+/* Non fatal GPU error codes */
+#define GMU_CP_AHB_ERROR 650
+#define GMU_ATB_ASYNC_FIFO_OVERFLOW 651
+#define GMU_RBBM_ATB_BUF_OVERFLOW 652
+#define GMU_UCHE_OOB_ACCESS 653
+#define GMU_UCHE_TRAP_INTR  654
+#define GMU_TSB_WRITE_ERROR 655
+
+/* GPU encountered an unknown CP error */
+#define GMU_CP_UNKNOWN_ERROR 700
+
+/**
+ * hfi_update_read_idx - Update the read index of an hfi queue
+ * hdr: Pointer to the hfi queue header
+ * index: New read index
+ *
+ * This function makes sure that kgsl has consumed f2h packets
+ * before GMU sees the updated read index. This avoids a corner
+ * case where GMU might over-write f2h packets that have not yet
+ * been consumed by kgsl.
+ */
+static inline void hfi_update_read_idx(struct hfi_queue_header *hdr, u32 index)
+{
+	/*
+	 * This is to make sure packets are consumed before gmu sees the updated
+	 * read index
+	 */
+	mb();
+
+	hdr->read_index = index;
+}
+
+/**
+ * hfi_update_write_idx - Update the write index of a GMU queue
+ * write_idx: Pointer to the write index
+ * index: New write index
+ *
+ * This function makes sure that the h2f packets are written out
+ * to memory before GMU sees the updated write index. This avoids
+ * corner cases where GMU might fetch stale entries that can happen
+ * if write index is updated before new packets have been written
+ * out to memory.
+ */
+static inline void hfi_update_write_idx(volatile u32 *write_idx, u32 index)
+{
+	/*
+	 * This is to make sure packets are written out before gmu sees the
+	 * updated write index
+	 */
+	wmb();
+
+	*write_idx = index;
+
+	/*
+	 * Memory barrier to make sure write index is written before an
+	 * interrupt is raised
+	 */
+	wmb();
+}
+
+/**
+ * hfi_get_mem_alloc_desc - Get the descriptor from F2H_MSG_MEM_ALLOC packet
+ * rcvd: Pointer to the F2H_MSG_MEM_ALLOC packet
+ * out: Pointer to copy the descriptor data to
+ *
+ * This function checks for the F2H_MSG_MEM_ALLOC packet version and based on that gets the
+ * descriptor data from the packet.
+ */
+static inline void hfi_get_mem_alloc_desc(void *rcvd, struct hfi_mem_alloc_desc *out)
+{
+	struct hfi_mem_alloc_cmd_legacy *in_legacy = (struct hfi_mem_alloc_cmd_legacy *)rcvd;
+	struct hfi_mem_alloc_cmd *in = (struct hfi_mem_alloc_cmd *)rcvd;
+
+	if (in->version > 0)
+		memcpy(out, &in->desc, sizeof(in->desc));
+	else
+		memcpy(out, &in_legacy->desc, sizeof(in_legacy->desc));
+}
+
+/**
+ * hfi_get_gmu_va_alignment - Get the alignment (in bytes) for a GMU VA
+ * align: Alignment specified as a power of two (2^n) in bits[0:7]
+ *
+ * This function derives the GMU VA alignment in bytes from bits[0:7] in the passed in value, which
+ * is specified in terms of power of two (2^n). For example, va_align = 20 means (1 << 20) = 1MB
+ * alignment. The minimum alignment (in bytes) is SZ_4K i.e. anything less than (or equal to) a
+ * va_align value of ilog2(SZ_4K) will default to SZ_4K alignment.
+ */
+static inline u32 hfi_get_gmu_va_alignment(u32 align)
+{
+	u32 va_align = FIELD_GET(GENMASK(7, 0), align);
+
+	return (va_align > ilog2(SZ_4K)) ? (1 << va_align) : SZ_4K;
+}
+
+/**
+ * hfi_get_gmu_sz_alignment - Get the alignment (in bytes) for GMU mapping size
+ * align: Alignment specified as a power of two (2^n) in bits[8:15]
+ *
+ * This function derives the GMU VA size alignment in bytes from bits[8:15] in the passed in value,
+ * which is specified in terms of power of two (2^n). For example, sz_align = 20 means
+ * (1 << 20) = 1MB alignment. The minimum alignment (in bytes) is SZ_4K i.e. anything less
+ * than (or equal to) a sz_align value of ilog2(SZ_4K) will default to SZ_4K alignment.
+ */
+static inline u32 hfi_get_gmu_sz_alignment(u32 align)
+{
+	u32 sz_align = FIELD_GET(GENMASK(15, 8), align);
+
+	return (sz_align > ilog2(SZ_4K)) ? (1 << sz_align) : SZ_4K;
+}
+
+/**
+ * adreno_hwsched_wait_ack_completion - Wait for HFI ack asynchronously
+ * adreno_dev: Pointer to the adreno device
+ * dev: Pointer to the device structure
+ * ack: Pointer to the pending ack
+ * process_msgq: Function pointer to the msgq processing function
+ *
+ * This function waits for the completion structure, which gets signaled asynchronously. In case
+ * there is a timeout, process the msgq one last time. If the ack is present, log an error and move
+ * on. If the ack isn't present, log an error, take a snapshot and return -ETIMEDOUT.
+ *
+ * Return: 0 on success and -ETIMEDOUT on failure
+ */
+int adreno_hwsched_wait_ack_completion(struct adreno_device *adreno_dev,
+	struct device *dev, struct pending_cmd *ack,
+	void (*process_msgq)(struct adreno_device *adreno_dev));
+
+/**
+ * adreno_hwsched_ctxt_unregister_wait_completion - Wait for HFI ack for context unregister
+ * adreno_dev: Pointer to the adreno device
+ * dev: Pointer to the device structure
+ * ack: Pointer to the pending ack
+ * process_msgq: Function pointer to the msgq processing function
+ * cmd: Pointer to the hfi packet header and data
+ *
+ * This function waits for the completion structure for context unregister hfi ack,
+ * which gets signaled asynchronously. In case there is a timeout, process the msgq
+ * one last time. If the ack is present, log an error and move on. If the ack isn't
+ * present, log an error and return -ETIMEDOUT.
+ *
+ * Return: 0 on success and -ETIMEDOUT on failure
+ */
+int adreno_hwsched_ctxt_unregister_wait_completion(
+	struct adreno_device *adreno_dev,
+	struct device *dev, struct pending_cmd *ack,
+	void (*process_msgq)(struct adreno_device *adreno_dev),
+	struct hfi_unregister_ctxt_cmd *cmd);
+
+/**
+ * hfi_get_minidump_string - Get the va-minidump string from entry
+ * mem_kind: mem_kind type
+ * hfi_minidump_str: Pointer to the output string
+ * size: Max size of the hfi_minidump_str
+ * rb_id: Pointer to the rb_id count
+ *
+ * This function return 0 on valid mem_kind and copies the VA-MINIDUMP string to
+ * hfi_minidump_str else return error
+ */
+static inline int hfi_get_minidump_string(u32 mem_kind, char *hfi_minidump_str,
+					   size_t size, u32 *rb_id)
+{
+	/* Extend this if the VA mindump need more hfi alloc entries */
+	switch (mem_kind) {
+	case HFI_MEMKIND_RB:
+		snprintf(hfi_minidump_str, size, KGSL_GMU_RB_ENTRY"_%d", (*rb_id)++);
+		break;
+	case HFI_MEMKIND_SCRATCH:
+		snprintf(hfi_minidump_str, size, KGSL_SCRATCH_ENTRY);
+		break;
+	case HFI_MEMKIND_PROFILE:
+		snprintf(hfi_minidump_str, size, KGSL_GMU_KERNEL_PROF_ENTRY);
+		break;
+	case HFI_MEMKIND_USER_PROFILE_IBS:
+		snprintf(hfi_minidump_str, size, KGSL_GMU_USER_PROF_ENTRY);
+		break;
+	case HFI_MEMKIND_CMD_BUFFER:
+		snprintf(hfi_minidump_str, size, KGSL_GMU_CMD_BUFFER_ENTRY);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#endif

+ 2510 - 0
qcom/opensource/graphics-kernel/adreno_hwsched.c

@@ -0,0 +1,2510 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/dma-fence-array.h>
+#include <soc/qcom/msm_performance.h>
+
+#include "adreno.h"
+#include "adreno_hfi.h"
+#include "adreno_snapshot.h"
+#include "adreno_sysfs.h"
+#include "adreno_trace.h"
+#include "kgsl_timeline.h"
+#include <linux/msm_kgsl.h>
+
+/*
+ * Number of commands that can be queued in a context before it sleeps
+ *
+ * Our code that "puts back" a command from the context is much cleaner
+ * if we are sure that there will always be enough room in the ringbuffer
+ * so restrict the size of the context queue to ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1
+ */
+static u32 _context_drawqueue_size = ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1;
+
+/* Number of milliseconds to wait for the context queue to clear */
+static unsigned int _context_queue_wait = 10000;
+
+/*
+ * GFT throttle parameters. If GFT recovered more than
+ * X times in Y ms invalidate the context and do not attempt recovery.
+ * X -> _fault_throttle_burst
+ * Y -> _fault_throttle_time
+ */
+static unsigned int _fault_throttle_time = 2000;
+static unsigned int _fault_throttle_burst = 3;
+
+/* Use a kmem cache to speed up allocations for dispatcher jobs */
+static struct kmem_cache *jobs_cache;
+/* Use a kmem cache to speed up allocations for inflight command objects */
+static struct kmem_cache *obj_cache;
+
+inline bool adreno_hwsched_context_queue_enabled(struct adreno_device *adreno_dev)
+{
+	return test_bit(ADRENO_HWSCHED_CONTEXT_QUEUE, &adreno_dev->hwsched.flags);
+}
+
+static bool is_cmdobj(struct kgsl_drawobj *drawobj)
+{
+	return (drawobj->type & CMDOBJ_TYPE);
+}
+
+static bool _check_context_queue(struct adreno_context *drawctxt, u32 count)
+{
+	bool ret;
+
+	spin_lock(&drawctxt->lock);
+
+	/*
+	 * Wake up if there is room in the context or if the whole thing got
+	 * invalidated while we were asleep
+	 */
+
+	if (kgsl_context_invalid(&drawctxt->base))
+		ret = false;
+	else
+		ret = ((drawctxt->queued + count) < _context_drawqueue_size) ? 1 : 0;
+
+	spin_unlock(&drawctxt->lock);
+
+	return ret;
+}
+
+static void _pop_drawobj(struct adreno_context *drawctxt)
+{
+	drawctxt->drawqueue_head = DRAWQUEUE_NEXT(drawctxt->drawqueue_head,
+		ADRENO_CONTEXT_DRAWQUEUE_SIZE);
+	drawctxt->queued--;
+}
+
+static int _retire_syncobj(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj_sync *syncobj, struct adreno_context *drawctxt)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+
+	if (!kgsl_drawobj_events_pending(syncobj)) {
+		_pop_drawobj(drawctxt);
+		kgsl_drawobj_destroy(DRAWOBJ(syncobj));
+		return 0;
+	}
+
+	/*
+	 * If hardware fences are enabled, and this SYNCOBJ is backed by hardware fences,
+	 * send it to the GMU
+	 */
+	if (test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags) &&
+		((syncobj->flags & KGSL_SYNCOBJ_HW)))
+		return 1;
+
+	/*
+	 * If we got here, there are pending events for sync object.
+	 * Start the canary timer if it hasnt been started already.
+	 */
+	if (!syncobj->timeout_jiffies) {
+		syncobj->timeout_jiffies = jiffies + msecs_to_jiffies(5000);
+			mod_timer(&syncobj->timer, syncobj->timeout_jiffies);
+	}
+
+	return -EAGAIN;
+}
+
+static bool _marker_expired(struct kgsl_drawobj_cmd *markerobj)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(markerobj);
+
+	return (drawobj->flags & KGSL_DRAWOBJ_MARKER) &&
+		kgsl_check_timestamp(drawobj->device, drawobj->context,
+		markerobj->marker_timestamp);
+}
+
+/* Only retire the timestamp. The drawobj will be destroyed later */
+static void _retire_timestamp_only(struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_context *context = drawobj->context;
+	struct kgsl_device *device = context->device;
+
+	/*
+	 * Write the start and end timestamp to the memstore to keep the
+	 * accounting sane
+	 */
+	kgsl_sharedmem_writel(device->memstore,
+		KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+		drawobj->timestamp);
+
+	kgsl_sharedmem_writel(device->memstore,
+		KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+		drawobj->timestamp);
+
+	msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_RETIRED,
+		pid_nr(context->proc_priv->pid),
+		context->id, drawobj->timestamp,
+		!!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME));
+
+	if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME) {
+		atomic64_inc(&drawobj->context->proc_priv->frame_count);
+		atomic_inc(&drawobj->context->proc_priv->period->frames);
+	}
+
+	/* Retire pending GPU events for the object */
+	kgsl_process_event_group(device, &context->events);
+}
+
+static void _retire_timestamp(struct kgsl_drawobj *drawobj)
+{
+	_retire_timestamp_only(drawobj);
+
+	kgsl_drawobj_destroy(drawobj);
+}
+
+static int _retire_markerobj(struct adreno_device *adreno_dev, struct kgsl_drawobj_cmd *cmdobj,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+
+	if (_marker_expired(cmdobj)) {
+		set_bit(CMDOBJ_MARKER_EXPIRED, &cmdobj->priv);
+		/*
+		 * There may be pending hardware fences that need to be signaled upon retiring
+		 * this MARKER object. Hence, send it to the target specific layers to trigger
+		 * the hardware fences.
+		 */
+		if (test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags)) {
+			_retire_timestamp_only(DRAWOBJ(cmdobj));
+			return 1;
+		}
+		_pop_drawobj(drawctxt);
+		_retire_timestamp(DRAWOBJ(cmdobj));
+		return 0;
+	}
+
+	/*
+	 * If the marker isn't expired but the SKIP bit
+	 * is set then there are real commands following
+	 * this one in the queue. This means that we
+	 * need to dispatch the command so that we can
+	 * keep the timestamp accounting correct. If
+	 * skip isn't set then we block this queue
+	 * until the dependent timestamp expires
+	 */
+
+	return test_bit(CMDOBJ_SKIP, &cmdobj->priv) ? 1 : -EAGAIN;
+}
+
+static int _retire_timelineobj(struct kgsl_drawobj *drawobj,
+		struct adreno_context *drawctxt)
+{
+	_pop_drawobj(drawctxt);
+	kgsl_drawobj_destroy(drawobj);
+	return 0;
+}
+
+static int drawqueue_retire_bindobj(struct kgsl_drawobj *drawobj,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_drawobj_bind *bindobj = BINDOBJ(drawobj);
+
+	if (test_bit(KGSL_BINDOBJ_STATE_DONE, &bindobj->state)) {
+		_pop_drawobj(drawctxt);
+		_retire_timestamp(drawobj);
+		return 0;
+	}
+
+	if (!test_and_set_bit(KGSL_BINDOBJ_STATE_START, &bindobj->state)) {
+		/*
+		 * Take a reference to the drawobj and the context because both
+		 * get referenced in the bind callback
+		 */
+		_kgsl_context_get(&drawctxt->base);
+		kref_get(&drawobj->refcount);
+
+		kgsl_sharedmem_bind_ranges(bindobj->bind);
+	}
+
+	return -EAGAIN;
+}
+
+/*
+ * Retires all expired marker and sync objs from the context
+ * queue and returns one of the below
+ * a) next drawobj that needs to be sent to ringbuffer
+ * b) -EAGAIN for syncobj with syncpoints pending.
+ * c) -EAGAIN for markerobj whose marker timestamp has not expired yet.
+ * c) NULL for no commands remaining in drawqueue.
+ */
+static struct kgsl_drawobj *_process_drawqueue_get_next_drawobj(
+	struct adreno_device *adreno_dev, struct adreno_context *drawctxt)
+{
+	struct kgsl_drawobj *drawobj;
+	unsigned int i = drawctxt->drawqueue_head;
+	struct kgsl_drawobj_cmd *cmdobj;
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	int ret = 0;
+
+	if (drawctxt->drawqueue_head == drawctxt->drawqueue_tail)
+		return NULL;
+
+	for (i = drawctxt->drawqueue_head; i != drawctxt->drawqueue_tail;
+			i = DRAWQUEUE_NEXT(i, ADRENO_CONTEXT_DRAWQUEUE_SIZE)) {
+
+		drawobj = drawctxt->drawqueue[i];
+
+		if (!drawobj)
+			return NULL;
+
+		switch (drawobj->type) {
+		case CMDOBJ_TYPE:
+			cmdobj = CMDOBJ(drawobj);
+
+			/* We only support one big IB inflight */
+			if ((cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS) &&
+				hwsched->big_cmdobj)
+				return ERR_PTR(-ENOSPC);
+
+			return drawobj;
+		case SYNCOBJ_TYPE:
+			ret = _retire_syncobj(adreno_dev, SYNCOBJ(drawobj), drawctxt);
+			if (ret == 1)
+				return drawobj;
+			break;
+		case MARKEROBJ_TYPE:
+			ret = _retire_markerobj(adreno_dev, CMDOBJ(drawobj), drawctxt);
+			/* Special case where marker needs to be sent to GPU */
+			if (ret == 1)
+				return drawobj;
+			break;
+		case BINDOBJ_TYPE:
+			ret = drawqueue_retire_bindobj(drawobj, drawctxt);
+			break;
+		case TIMELINEOBJ_TYPE:
+			ret = _retire_timelineobj(drawobj, drawctxt);
+			break;
+		default:
+			ret = -EINVAL;
+			break;
+		}
+
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
+	return NULL;
+}
+
+/**
+ * hwsched_dispatcher_requeue_drawobj() - Put a draw objet back on the context
+ * queue
+ * @drawctxt: Pointer to the adreno draw context
+ * @drawobj: Pointer to the KGSL draw object to requeue
+ *
+ * Failure to submit a drawobj to the ringbuffer isn't the fault of the drawobj
+ * being submitted so if a failure happens, push it back on the head of the
+ * context queue to be reconsidered again unless the context got detached.
+ */
+static inline int hwsched_dispatcher_requeue_drawobj(
+		struct adreno_context *drawctxt,
+		struct kgsl_drawobj *drawobj)
+{
+	unsigned int prev;
+
+	spin_lock(&drawctxt->lock);
+
+	if (kgsl_context_is_bad(&drawctxt->base)) {
+		spin_unlock(&drawctxt->lock);
+		/* get rid of this drawobj since the context is bad */
+		kgsl_drawobj_destroy(drawobj);
+		return -ENOENT;
+	}
+
+	prev = drawctxt->drawqueue_head == 0 ?
+		(ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1) :
+		(drawctxt->drawqueue_head - 1);
+
+	/*
+	 * The maximum queue size always needs to be one less then the size of
+	 * the ringbuffer queue so there is "room" to put the drawobj back in
+	 */
+
+	WARN_ON(prev == drawctxt->drawqueue_tail);
+
+	drawctxt->drawqueue[prev] = drawobj;
+	drawctxt->queued++;
+
+	/* Reset the command queue head to reflect the newly requeued change */
+	drawctxt->drawqueue_head = prev;
+	if (is_cmdobj(drawobj)) {
+		struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj);
+
+		cmdobj->requeue_cnt++;
+	}
+	spin_unlock(&drawctxt->lock);
+	return 0;
+}
+
+/**
+ * hwsched_queue_context() - Queue a context in the dispatcher list of jobs
+ * @adreno_dev: Pointer to the adreno device structure
+ * @drawctxt: Pointer to the adreno draw context
+ *
+ * Add a context to the dispatcher list of jobs.
+ */
+static int hwsched_queue_context(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct adreno_dispatch_job *job;
+
+	/* Refuse to queue a detached context */
+	if (kgsl_context_detached(&drawctxt->base))
+		return 0;
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return 0;
+
+	job = kmem_cache_alloc(jobs_cache, GFP_ATOMIC);
+	if (!job) {
+		kgsl_context_put(&drawctxt->base);
+		return -ENOMEM;
+	}
+
+	job->drawctxt = drawctxt;
+
+	trace_dispatch_queue_context(drawctxt);
+	llist_add(&job->node, &hwsched->jobs[drawctxt->base.priority]);
+
+	return 0;
+}
+
+void adreno_hwsched_flush(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+
+	kthread_flush_worker(hwsched->worker);
+}
+
+/**
+ * is_marker_skip() - Check if the draw object is a MARKEROBJ_TYPE and CMDOBJ_SKIP bit is set
+ */
+static bool is_marker_skip(struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_drawobj_cmd *cmdobj = NULL;
+
+	if (drawobj->type != MARKEROBJ_TYPE)
+		return false;
+
+	cmdobj = CMDOBJ(drawobj);
+
+	if (test_bit(CMDOBJ_SKIP, &cmdobj->priv))
+		return true;
+
+	return false;
+}
+
+static bool _abort_submission(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+
+	/* We only need a single barrier before reading all the atomic variables below */
+	smp_rmb();
+
+	if (atomic_read(&adreno_dev->halt) || atomic_read(&hwsched->fault))
+		return true;
+
+	return false;
+}
+
+/**
+ * sendcmd() - Send a drawobj to the GPU hardware
+ * @dispatcher: Pointer to the adreno dispatcher struct
+ * @drawobj: Pointer to the KGSL drawobj being sent
+ *
+ * Send a KGSL drawobj to the GPU hardware
+ */
+static int hwsched_sendcmd(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct kgsl_context *context = drawobj->context;
+	int ret;
+	struct cmd_list_obj *obj;
+
+	obj = kmem_cache_alloc(obj_cache, GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	mutex_lock(&device->mutex);
+
+	if (_abort_submission(adreno_dev)) {
+		mutex_unlock(&device->mutex);
+		kmem_cache_free(obj_cache, obj);
+		return -EBUSY;
+	}
+
+
+	if (kgsl_context_detached(context)) {
+		mutex_unlock(&device->mutex);
+		kmem_cache_free(obj_cache, obj);
+		return -ENOENT;
+	}
+
+	hwsched->inflight++;
+
+	if (hwsched->inflight == 1 &&
+		!test_bit(ADRENO_HWSCHED_POWER, &hwsched->flags)) {
+		ret = adreno_active_count_get(adreno_dev);
+		if (ret) {
+			hwsched->inflight--;
+			mutex_unlock(&device->mutex);
+			kmem_cache_free(obj_cache, obj);
+			return ret;
+		}
+		set_bit(ADRENO_HWSCHED_POWER, &hwsched->flags);
+	}
+
+	ret = hwsched->hwsched_ops->submit_drawobj(adreno_dev, drawobj);
+	if (ret) {
+		/*
+		 * If the first submission failed, then put back the active
+		 * count to relinquish active vote
+		 */
+		if (hwsched->inflight == 1) {
+			adreno_active_count_put(adreno_dev);
+			clear_bit(ADRENO_HWSCHED_POWER, &hwsched->flags);
+		}
+
+		hwsched->inflight--;
+		kmem_cache_free(obj_cache, obj);
+		mutex_unlock(&device->mutex);
+		return ret;
+	}
+
+	if ((hwsched->inflight == 1) &&
+		!test_and_set_bit(ADRENO_HWSCHED_ACTIVE, &hwsched->flags))
+		reinit_completion(&hwsched->idle_gate);
+
+	if (is_cmdobj(drawobj)) {
+		struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj);
+
+		/* If this MARKER object is already retired, we can destroy it here */
+		if ((test_bit(CMDOBJ_MARKER_EXPIRED, &cmdobj->priv))) {
+			kmem_cache_free(obj_cache, obj);
+			kgsl_drawobj_destroy(drawobj);
+			goto done;
+		}
+
+		if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS) {
+			hwsched->big_cmdobj = cmdobj;
+			kref_get(&drawobj->refcount);
+		}
+	}
+
+	obj->drawobj = drawobj;
+	list_add_tail(&obj->node, &hwsched->cmd_list);
+
+done:
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+/**
+ * hwsched_sendcmds() - Send commands from a context to the GPU
+ * @adreno_dev: Pointer to the adreno device struct
+ * @drawctxt: Pointer to the adreno context to dispatch commands from
+ *
+ * Dequeue and send a burst of commands from the specified context to the GPU
+ * Returns postive if the context needs to be put back on the pending queue
+ * 0 if the context is empty or detached and negative on error
+ */
+static int hwsched_sendcmds(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	int count = 0;
+	int ret = 0;
+
+	while (1) {
+		struct kgsl_drawobj *drawobj;
+		struct kgsl_drawobj_cmd *cmdobj = NULL;
+		struct kgsl_context *context;
+
+		spin_lock(&drawctxt->lock);
+		drawobj = _process_drawqueue_get_next_drawobj(adreno_dev,
+				drawctxt);
+
+		/*
+		 * adreno_context_get_drawobj returns -EAGAIN if the current
+		 * drawobj has pending sync points so no more to do here.
+		 * When the sync points are satisfied then the context will get
+		 * reqeueued
+		 */
+
+		if (IS_ERR_OR_NULL(drawobj)) {
+			if (IS_ERR(drawobj))
+				ret = PTR_ERR(drawobj);
+			spin_unlock(&drawctxt->lock);
+			break;
+		}
+		_pop_drawobj(drawctxt);
+		spin_unlock(&drawctxt->lock);
+
+		if (is_cmdobj(drawobj) || is_marker_skip(drawobj)) {
+			cmdobj = CMDOBJ(drawobj);
+			context = drawobj->context;
+			trace_adreno_cmdbatch_ready(context->id,
+				context->priority, drawobj->timestamp,
+				cmdobj->requeue_cnt);
+		}
+		ret = hwsched_sendcmd(adreno_dev, drawobj);
+
+		/*
+		 * On error from hwsched_sendcmd() try to requeue the cmdobj
+		 * unless we got back -ENOENT which means that the context has
+		 * been detached and there will be no more deliveries from here
+		 */
+		if (ret != 0) {
+			/* Destroy the cmdobj on -ENOENT */
+			if (ret == -ENOENT)
+				kgsl_drawobj_destroy(drawobj);
+			else {
+				/*
+				 * If we couldn't put it on dispatch queue
+				 * then return it to the context queue
+				 */
+				int r = hwsched_dispatcher_requeue_drawobj(
+					drawctxt, drawobj);
+				if (r)
+					ret = r;
+			}
+
+			break;
+		}
+
+		if (cmdobj)
+			drawctxt->submitted_timestamp = drawobj->timestamp;
+
+		count++;
+	}
+
+	/*
+	 * Wake up any snoozing threads if we have consumed any real commands
+	 * or marker commands and we have room in the context queue.
+	 */
+
+	if (_check_context_queue(drawctxt, 0))
+		wake_up_all(&drawctxt->wq);
+
+	if (!ret)
+		ret = count;
+
+	/* Return error or the number of commands queued */
+	return ret;
+}
+
+static void hwsched_handle_jobs_list(struct adreno_device *adreno_dev,
+	int id, unsigned long *map, struct llist_node *list)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct adreno_dispatch_job *job, *next;
+
+	if (!list)
+		return;
+
+	/* Reverse the list so we deal with oldest submitted contexts first */
+	list = llist_reverse_order(list);
+
+	llist_for_each_entry_safe(job, next, list, node) {
+		int ret;
+
+		if (kgsl_context_is_bad(&job->drawctxt->base)) {
+			kgsl_context_put(&job->drawctxt->base);
+			kmem_cache_free(jobs_cache, job);
+			continue;
+		}
+
+		/*
+		 * Due to the nature of the lockless queue the same context
+		 * might have multiple jobs on the list. We allow this so we
+		 * don't have to query the list on the producer side but on the
+		 * consumer side we only want each context to be considered
+		 * once. Use a bitmap to remember which contexts we've already
+		 * seen and quietly discard duplicate jobs
+		 */
+		if (test_and_set_bit(job->drawctxt->base.id, map)) {
+			kgsl_context_put(&job->drawctxt->base);
+			kmem_cache_free(jobs_cache, job);
+			continue;
+		}
+
+		ret = hwsched_sendcmds(adreno_dev, job->drawctxt);
+
+		/*
+		 * If the context had nothing queued or the context has been
+		 * destroyed then drop the job
+		 */
+		if (!ret || ret == -ENOENT) {
+			kgsl_context_put(&job->drawctxt->base);
+			kmem_cache_free(jobs_cache, job);
+			continue;
+		}
+
+		/*
+		 * If the dispatch queue is full then requeue the job to be
+		 * considered first next time. Otherwise the context
+		 * either successfully submmitted to the GPU or another error
+		 * happened and it should go back on the regular queue
+		 */
+		if (ret == -ENOSPC)
+			llist_add(&job->node, &hwsched->requeue[id]);
+		else
+			llist_add(&job->node, &hwsched->jobs[id]);
+	}
+}
+
+static void hwsched_handle_jobs(struct adreno_device *adreno_dev, int id)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	unsigned long map[BITS_TO_LONGS(KGSL_MEMSTORE_MAX)];
+	struct llist_node *requeue, *jobs;
+
+	memset(map, 0, sizeof(map));
+
+	requeue = llist_del_all(&hwsched->requeue[id]);
+	jobs = llist_del_all(&hwsched->jobs[id]);
+
+	hwsched_handle_jobs_list(adreno_dev, id, map, requeue);
+	hwsched_handle_jobs_list(adreno_dev, id, map, jobs);
+}
+
+/**
+ * hwsched_issuecmds() - Issue commmands from pending contexts
+ * @adreno_dev: Pointer to the adreno device struct
+ *
+ * Issue as many commands as possible (up to inflight) from the pending contexts
+ * This function assumes the dispatcher mutex has been locked.
+ */
+static void hwsched_issuecmds(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(hwsched->jobs); i++)
+		hwsched_handle_jobs(adreno_dev, i);
+}
+
+void adreno_hwsched_trigger(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+
+	kthread_queue_work(hwsched->worker, &hwsched->work);
+}
+
+static inline void _decrement_submit_now(struct kgsl_device *device)
+{
+	spin_lock(&device->submit_lock);
+	device->submit_now--;
+	spin_unlock(&device->submit_lock);
+}
+
+u32 adreno_hwsched_gpu_fault(struct adreno_device *adreno_dev)
+{
+	/* make sure we're reading the latest value */
+	smp_rmb();
+	return atomic_read(&adreno_dev->hwsched.fault);
+}
+
+/**
+ * adreno_hwsched_issuecmds() - Issue commmands from pending contexts
+ * @adreno_dev: Pointer to the adreno device struct
+ *
+ * Lock the dispatcher and call hwsched_issuecmds
+ */
+static void adreno_hwsched_issuecmds(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	spin_lock(&device->submit_lock);
+	/* If GPU state is not ACTIVE, schedule the work for later */
+	if (device->skip_inline_submit) {
+		spin_unlock(&device->submit_lock);
+		goto done;
+	}
+	device->submit_now++;
+	spin_unlock(&device->submit_lock);
+
+	/* If the dispatcher is busy then schedule the work for later */
+	if (!mutex_trylock(&hwsched->mutex)) {
+		_decrement_submit_now(device);
+		goto done;
+	}
+
+	if (!adreno_hwsched_gpu_fault(adreno_dev))
+		hwsched_issuecmds(adreno_dev);
+
+	if (hwsched->inflight > 0) {
+		mutex_lock(&device->mutex);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		mutex_unlock(&device->mutex);
+	}
+
+	mutex_unlock(&hwsched->mutex);
+	_decrement_submit_now(device);
+	return;
+
+done:
+	adreno_hwsched_trigger(adreno_dev);
+}
+
+/**
+ * get_timestamp() - Return the next timestamp for the context
+ * @drawctxt - Pointer to an adreno draw context struct
+ * @drawobj - Pointer to a drawobj
+ * @timestamp - Pointer to a timestamp value possibly passed from the user
+ * @user_ts - user generated timestamp
+ *
+ * Assign a timestamp based on the settings of the draw context and the command
+ * batch.
+ */
+static int get_timestamp(struct adreno_context *drawctxt,
+		struct kgsl_drawobj *drawobj, unsigned int *timestamp,
+		unsigned int user_ts)
+{
+
+	if (drawctxt->base.flags & KGSL_CONTEXT_USER_GENERATED_TS) {
+		/*
+		 * User specified timestamps need to be greater than the last
+		 * issued timestamp in the context
+		 */
+		if (timestamp_cmp(drawctxt->timestamp, user_ts) >= 0)
+			return -ERANGE;
+
+		drawctxt->timestamp = user_ts;
+	} else
+		drawctxt->timestamp++;
+
+	*timestamp = drawctxt->timestamp;
+	drawobj->timestamp = *timestamp;
+	return 0;
+}
+
+static inline int _wait_for_room_in_context_queue(
+	struct adreno_context *drawctxt, u32 count)
+{
+	int ret = 0;
+
+	/*
+	 * There is always a possibility that dispatcher may end up pushing
+	 * the last popped draw object back to the context drawqueue. Hence,
+	 * we can only queue up to _context_drawqueue_size - 1 here to make
+	 * sure we never let drawqueue->queued exceed _context_drawqueue_size.
+	 */
+	if ((drawctxt->queued + count) > (_context_drawqueue_size - 1)) {
+		trace_adreno_drawctxt_sleep(drawctxt);
+		spin_unlock(&drawctxt->lock);
+
+		ret = wait_event_interruptible_timeout(drawctxt->wq,
+			_check_context_queue(drawctxt, count),
+			msecs_to_jiffies(_context_queue_wait));
+
+		spin_lock(&drawctxt->lock);
+		trace_adreno_drawctxt_wake(drawctxt);
+
+		/*
+		 * Account for the possibility that the context got invalidated
+		 * while we were sleeping
+		 */
+		if (ret > 0)
+			ret = kgsl_check_context_state(&drawctxt->base);
+		else if (ret == 0)
+			ret = -ETIMEDOUT;
+	}
+
+	return ret;
+}
+
+static unsigned int _check_context_state_to_queue_cmds(
+	struct adreno_context *drawctxt, u32 count)
+{
+	int ret = kgsl_check_context_state(&drawctxt->base);
+
+	if (ret)
+		return ret;
+
+	return _wait_for_room_in_context_queue(drawctxt, count);
+}
+
+static void _queue_drawobj(struct adreno_context *drawctxt,
+	struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_context *context = drawobj->context;
+
+	/* Put the command into the queue */
+	drawctxt->drawqueue[drawctxt->drawqueue_tail] = drawobj;
+	drawctxt->drawqueue_tail = (drawctxt->drawqueue_tail + 1) %
+			ADRENO_CONTEXT_DRAWQUEUE_SIZE;
+	drawctxt->queued++;
+	msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_QUEUE,
+		pid_nr(context->proc_priv->pid),
+		context->id, drawobj->timestamp,
+		!!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME));
+	trace_adreno_cmdbatch_queued(drawobj, drawctxt->queued);
+}
+
+static int _queue_cmdobj(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, struct kgsl_drawobj_cmd *cmdobj,
+	uint32_t *timestamp, unsigned int user_ts)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	u32 j;
+	int ret;
+
+	ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts);
+	if (ret)
+		return ret;
+
+	/*
+	 * If this is a real command then we need to force any markers
+	 * queued before it to dispatch to keep time linear - set the
+	 * skip bit so the commands get NOPed.
+	 */
+	j = drawctxt->drawqueue_head;
+
+	while (j != drawctxt->drawqueue_tail) {
+		if (drawctxt->drawqueue[j]->type == MARKEROBJ_TYPE) {
+			struct kgsl_drawobj_cmd *markerobj =
+				CMDOBJ(drawctxt->drawqueue[j]);
+
+			set_bit(CMDOBJ_SKIP, &markerobj->priv);
+		}
+
+		j = DRAWQUEUE_NEXT(j, ADRENO_CONTEXT_DRAWQUEUE_SIZE);
+	}
+
+	drawctxt->queued_timestamp = *timestamp;
+
+	_queue_drawobj(drawctxt, drawobj);
+
+	return 0;
+}
+
+static void _queue_syncobj(struct adreno_context *drawctxt,
+	struct kgsl_drawobj_sync *syncobj, uint32_t *timestamp)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj);
+
+	*timestamp = 0;
+	drawobj->timestamp = 0;
+
+	_queue_drawobj(drawctxt, drawobj);
+}
+
+static int _queue_markerobj(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, struct kgsl_drawobj_cmd *markerobj,
+	u32 *timestamp, u32 user_ts)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(markerobj);
+	int ret;
+
+	ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts);
+	if (ret)
+		return ret;
+
+	/*
+	 * See if we can fastpath this thing - if nothing is queued
+	 * and nothing is inflight retire without bothering the GPU
+	 */
+	if (!drawctxt->queued && kgsl_check_timestamp(drawobj->device,
+		drawobj->context, drawctxt->queued_timestamp)) {
+		_retire_timestamp(drawobj);
+		return 1;
+	}
+
+	/*
+	 * Remember the last queued timestamp - the marker will block
+	 * until that timestamp is expired (unless another command
+	 * comes along and forces the marker to execute)
+	 */
+	 markerobj->marker_timestamp = drawctxt->queued_timestamp;
+	 drawctxt->queued_timestamp = *timestamp;
+
+	_queue_drawobj(drawctxt, drawobj);
+
+	return 0;
+}
+
+static int _queue_bindobj(struct adreno_context *drawctxt,
+		struct kgsl_drawobj *drawobj, u32 *timestamp, u32 user_ts)
+{
+	int ret;
+
+	ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts);
+	if (ret)
+		return ret;
+
+	drawctxt->queued_timestamp = *timestamp;
+	_queue_drawobj(drawctxt, drawobj);
+
+	return 0;
+}
+
+static void _queue_timelineobj(struct adreno_context *drawctxt,
+		struct kgsl_drawobj *drawobj)
+{
+	/*
+	 * This drawobj is not submitted to the GPU so use a timestamp of 0.
+	 * Update the timestamp through a subsequent marker to keep userspace
+	 * happy.
+	 */
+	drawobj->timestamp = 0;
+
+	_queue_drawobj(drawctxt, drawobj);
+}
+
+static int adreno_hwsched_queue_cmds(struct kgsl_device_private *dev_priv,
+	struct kgsl_context *context, struct kgsl_drawobj *drawobj[],
+	u32 count, u32 *timestamp)
+
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct adreno_dispatch_job *job;
+	int ret;
+	unsigned int i, user_ts;
+
+	/*
+	 * There is always a possibility that dispatcher may end up pushing
+	 * the last popped draw object back to the context drawqueue. Hence,
+	 * we can only queue up to _context_drawqueue_size - 1 here to make
+	 * sure we never let drawqueue->queued exceed _context_drawqueue_size.
+	 */
+	if (!count || count > _context_drawqueue_size - 1)
+		return -EINVAL;
+
+	for (i = 0; i < count; i++) {
+		struct kgsl_drawobj_cmd *cmdobj;
+		struct kgsl_memobj_node *ib;
+
+		if (!is_cmdobj(drawobj[i]))
+			continue;
+
+		cmdobj = CMDOBJ(drawobj[i]);
+
+		list_for_each_entry(ib, &cmdobj->cmdlist, node)
+			cmdobj->numibs++;
+
+		if (cmdobj->numibs > HWSCHED_MAX_IBS)
+			return -EINVAL;
+	}
+
+	ret = kgsl_check_context_state(&drawctxt->base);
+	if (ret)
+		return ret;
+
+	ret = adreno_verify_cmdobj(dev_priv, context, drawobj, count);
+	if (ret)
+		return ret;
+
+	/* wait for the suspend gate */
+	wait_for_completion(&device->halt_gate);
+
+	job = kmem_cache_alloc(jobs_cache, GFP_KERNEL);
+	if (!job)
+		return -ENOMEM;
+
+	job->drawctxt = drawctxt;
+
+	spin_lock(&drawctxt->lock);
+
+	ret = _check_context_state_to_queue_cmds(drawctxt, count);
+	if (ret) {
+		spin_unlock(&drawctxt->lock);
+		kmem_cache_free(jobs_cache, job);
+		return ret;
+	}
+
+	user_ts = *timestamp;
+
+	/*
+	 * If there is only one drawobj in the array and it is of
+	 * type SYNCOBJ_TYPE, skip comparing user_ts as it can be 0
+	 */
+	if (!(count == 1 && drawobj[0]->type == SYNCOBJ_TYPE) &&
+		(drawctxt->base.flags & KGSL_CONTEXT_USER_GENERATED_TS)) {
+		/*
+		 * User specified timestamps need to be greater than the last
+		 * issued timestamp in the context
+		 */
+		if (timestamp_cmp(drawctxt->timestamp, user_ts) >= 0) {
+			spin_unlock(&drawctxt->lock);
+			kmem_cache_free(jobs_cache, job);
+			return -ERANGE;
+		}
+	}
+
+	for (i = 0; i < count; i++) {
+
+		switch (drawobj[i]->type) {
+		case MARKEROBJ_TYPE:
+			ret = _queue_markerobj(adreno_dev, drawctxt,
+					CMDOBJ(drawobj[i]),
+					timestamp, user_ts);
+			if (ret == 1) {
+				spin_unlock(&drawctxt->lock);
+				kmem_cache_free(jobs_cache, job);
+				return 0;
+			} else if (ret) {
+				spin_unlock(&drawctxt->lock);
+				kmem_cache_free(jobs_cache, job);
+				return ret;
+			}
+			break;
+		case CMDOBJ_TYPE:
+			ret = _queue_cmdobj(adreno_dev, drawctxt,
+						CMDOBJ(drawobj[i]),
+						timestamp, user_ts);
+			if (ret) {
+				spin_unlock(&drawctxt->lock);
+				kmem_cache_free(jobs_cache, job);
+				return ret;
+			}
+			break;
+		case SYNCOBJ_TYPE:
+			_queue_syncobj(drawctxt, SYNCOBJ(drawobj[i]),
+						timestamp);
+			break;
+		case BINDOBJ_TYPE:
+			ret = _queue_bindobj(drawctxt, drawobj[i], timestamp,
+						user_ts);
+			if (ret) {
+				spin_unlock(&drawctxt->lock);
+				kmem_cache_free(jobs_cache, job);
+				return ret;
+			}
+			break;
+		case TIMELINEOBJ_TYPE:
+			_queue_timelineobj(drawctxt, drawobj[i]);
+			break;
+		default:
+			spin_unlock(&drawctxt->lock);
+			kmem_cache_free(jobs_cache, job);
+			return -EINVAL;
+		}
+
+	}
+
+	adreno_track_context(adreno_dev, NULL, drawctxt);
+
+	spin_unlock(&drawctxt->lock);
+
+	/* Add the context to the dispatcher pending list */
+	if (_kgsl_context_get(&drawctxt->base)) {
+		trace_dispatch_queue_context(drawctxt);
+		llist_add(&job->node, &hwsched->jobs[drawctxt->base.priority]);
+		adreno_hwsched_issuecmds(adreno_dev);
+
+	} else
+		kmem_cache_free(jobs_cache, job);
+
+	return 0;
+}
+
+void adreno_hwsched_retire_cmdobj(struct adreno_hwsched *hwsched,
+	struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct kgsl_mem_entry *entry;
+	struct kgsl_drawobj_profiling_buffer *profile_buffer;
+	struct kgsl_context *context = drawobj->context;
+
+	msm_perf_events_update(MSM_PERF_GFX, MSM_PERF_RETIRED,
+		pid_nr(context->proc_priv->pid),
+		context->id, drawobj->timestamp,
+		!!(drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME));
+
+	if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME) {
+		atomic64_inc(&drawobj->context->proc_priv->frame_count);
+		atomic_inc(&drawobj->context->proc_priv->period->frames);
+	}
+
+	entry = cmdobj->profiling_buf_entry;
+	if (entry) {
+		profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc,
+			cmdobj->profiling_buffer_gpuaddr);
+
+		if (profile_buffer == NULL)
+			return;
+
+		kgsl_memdesc_unmap(&entry->memdesc);
+	}
+
+	trace_adreno_cmdbatch_done(drawobj->context->id,
+		drawobj->context->priority, drawobj->timestamp);
+
+	if (hwsched->big_cmdobj == cmdobj) {
+		hwsched->big_cmdobj = NULL;
+		kgsl_drawobj_put(drawobj);
+	}
+
+	kgsl_drawobj_destroy(drawobj);
+}
+
+static bool drawobj_retired(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj_cmd *cmdobj;
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+
+	if ((drawobj->type & SYNCOBJ_TYPE) != 0) {
+		struct gmu_context_queue_header *hdr =
+			drawctxt->gmu_context_queue.hostptr;
+
+		if (timestamp_cmp(drawobj->timestamp, hdr->sync_obj_ts) > 0)
+			return false;
+
+		trace_adreno_syncobj_retired(drawobj->context->id, drawobj->timestamp);
+		kgsl_drawobj_destroy(drawobj);
+		return true;
+	}
+
+	cmdobj = CMDOBJ(drawobj);
+
+	if (!kgsl_check_timestamp(device, drawobj->context,
+		drawobj->timestamp))
+		return false;
+
+	adreno_hwsched_retire_cmdobj(hwsched, cmdobj);
+	return true;
+}
+
+static void retire_drawobj_list(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct cmd_list_obj *obj, *tmp;
+
+	list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) {
+		struct kgsl_drawobj *drawobj = obj->drawobj;
+
+		if (!drawobj_retired(adreno_dev, drawobj))
+			continue;
+
+		list_del_init(&obj->node);
+
+		kmem_cache_free(obj_cache, obj);
+
+		hwsched->inflight--;
+	}
+}
+
+/* Take down the dispatcher and release any power states */
+static void hwsched_power_down(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+
+	mutex_lock(&device->mutex);
+
+	if (test_and_clear_bit(ADRENO_HWSCHED_ACTIVE, &hwsched->flags))
+		complete_all(&hwsched->idle_gate);
+
+	if (test_bit(ADRENO_HWSCHED_POWER, &hwsched->flags)) {
+		adreno_active_count_put(adreno_dev);
+		clear_bit(ADRENO_HWSCHED_POWER, &hwsched->flags);
+	}
+
+	mutex_unlock(&device->mutex);
+}
+
+static void adreno_hwsched_queue_context(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	hwsched_queue_context(adreno_dev, drawctxt);
+	adreno_hwsched_trigger(adreno_dev);
+}
+
+void adreno_hwsched_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	complete_all(&device->halt_gate);
+
+	adreno_hwsched_trigger(adreno_dev);
+}
+
+static void change_preemption(struct adreno_device *adreno_dev, void *priv)
+{
+	change_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+}
+
+static int _preemption_store(struct adreno_device *adreno_dev, bool val)
+{
+	if (!adreno_preemption_feature_set(adreno_dev) ||
+		(test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv) == val))
+		return 0;
+
+	return adreno_power_cycle(adreno_dev, change_preemption, NULL);
+}
+
+static bool _preemption_show(struct adreno_device *adreno_dev)
+{
+	return adreno_is_preemption_enabled(adreno_dev);
+}
+
+static unsigned int _preempt_count_show(struct adreno_device *adreno_dev)
+{
+	const struct adreno_hwsched_ops *hwsched_ops =
+		adreno_dev->hwsched.hwsched_ops;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 count;
+
+	mutex_lock(&device->mutex);
+
+	count = hwsched_ops->preempt_count(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static int _ft_long_ib_detect_store(struct adreno_device *adreno_dev, bool val)
+{
+	return adreno_power_cycle_bool(adreno_dev, &adreno_dev->long_ib_detect,
+			val);
+}
+
+static bool _ft_long_ib_detect_show(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->long_ib_detect;
+}
+
+static ADRENO_SYSFS_BOOL(preemption);
+static ADRENO_SYSFS_RO_U32(preempt_count);
+static ADRENO_SYSFS_BOOL(ft_long_ib_detect);
+
+static const struct attribute *_hwsched_attr_list[] = {
+	&adreno_attr_preemption.attr.attr,
+	&adreno_attr_preempt_count.attr.attr,
+	&adreno_attr_ft_long_ib_detect.attr.attr,
+	NULL,
+};
+
+void adreno_hwsched_deregister_hw_fence(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct adreno_hw_fence *hw_fence = &hwsched->hw_fence;
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags))
+		return;
+
+	msm_hw_fence_deregister(hwsched->hw_fence.handle);
+
+	if (hw_fence->memdesc.sgt)
+		sg_free_table(hw_fence->memdesc.sgt);
+
+	memset(&hw_fence->memdesc, 0x0, sizeof(hw_fence->memdesc));
+
+	kmem_cache_destroy(hwsched->hw_fence_cache);
+
+	clear_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags);
+}
+
+static void adreno_hwsched_dispatcher_close(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!IS_ERR_OR_NULL(hwsched->worker))
+		kthread_destroy_worker(hwsched->worker);
+
+	adreno_set_dispatch_ops(adreno_dev, NULL);
+
+	kmem_cache_destroy(jobs_cache);
+	kmem_cache_destroy(obj_cache);
+
+	sysfs_remove_files(&device->dev->kobj, _hwsched_attr_list);
+
+	kfree(hwsched->ctxt_bad);
+
+	adreno_hwsched_deregister_hw_fence(adreno_dev);
+
+	if (hwsched->global_ctxtq.hostptr)
+		kgsl_sharedmem_free(&hwsched->global_ctxtq);
+}
+
+static void force_retire_timestamp(struct kgsl_device *device,
+	struct kgsl_drawobj *drawobj)
+{
+	kgsl_sharedmem_writel(device->memstore,
+		KGSL_MEMSTORE_OFFSET(drawobj->context->id, soptimestamp),
+		drawobj->timestamp);
+
+	kgsl_sharedmem_writel(device->memstore,
+		KGSL_MEMSTORE_OFFSET(drawobj->context->id, eoptimestamp),
+		drawobj->timestamp);
+}
+
+/* Return true if drawobj needs to replayed, false otherwise */
+static bool drawobj_replay(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj_cmd *cmdobj;
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+
+	if ((drawobj->type & SYNCOBJ_TYPE) != 0) {
+
+		if (kgsl_drawobj_events_pending(SYNCOBJ(drawobj)))
+			return true;
+
+		trace_adreno_syncobj_retired(drawobj->context->id, drawobj->timestamp);
+		kgsl_drawobj_destroy(drawobj);
+		return false;
+	}
+
+	cmdobj = CMDOBJ(drawobj);
+
+	if (kgsl_check_timestamp(device, drawobj->context,
+		drawobj->timestamp) || kgsl_context_is_bad(drawobj->context)) {
+		adreno_hwsched_retire_cmdobj(hwsched, cmdobj);
+		return false;
+	}
+
+	return true;
+}
+
+void adreno_hwsched_replay(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+	struct cmd_list_obj *obj, *tmp;
+	u32 retired = 0;
+
+	list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) {
+		struct kgsl_drawobj *drawobj = obj->drawobj;
+
+		/*
+		 * Get rid of retired objects or objects that belong to detached
+		 * or invalidated contexts
+		 */
+		if (drawobj_replay(adreno_dev, drawobj)) {
+			hwsched->hwsched_ops->submit_drawobj(adreno_dev, drawobj);
+			continue;
+		}
+
+		retired++;
+
+		list_del_init(&obj->node);
+		kmem_cache_free(obj_cache, obj);
+		hwsched->inflight--;
+	}
+
+	if (hwsched->recurring_cmdobj) {
+		u32 event;
+
+		if (kgsl_context_invalid(
+			hwsched->recurring_cmdobj->base.context)) {
+			clear_bit(CMDOBJ_RECURRING_START,
+					&hwsched->recurring_cmdobj->priv);
+			set_bit(CMDOBJ_RECURRING_STOP,
+					&hwsched->recurring_cmdobj->priv);
+			event = GPU_SSR_FATAL;
+		} else {
+			event = GPU_SSR_END;
+		}
+		gpudev->send_recurring_cmdobj(adreno_dev,
+			hwsched->recurring_cmdobj);
+		srcu_notifier_call_chain(&device->nh, event, NULL);
+	}
+
+	/* Signal fences */
+	if (retired)
+		kgsl_process_event_groups(device);
+}
+
+static void do_fault_header(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj, int fault)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_context *drawctxt;
+	u32 status = 0, rptr = 0, wptr = 0, ib1sz = 0, ib2sz = 0;
+	u64 ib1base = 0, ib2base = 0;
+	bool gx_on = adreno_gx_is_on(adreno_dev);
+	u32 ctxt_id = 0, ts = 0;
+	int rb_id = -1;
+
+	dev_err(device->dev, "Fault id:%d and GX is %s\n", fault, gx_on ? "ON" : "OFF");
+
+	if (!gx_on && !drawobj)
+		return;
+
+	if (gpudev->fault_header)
+		return gpudev->fault_header(adreno_dev, drawobj);
+
+	if (gx_on) {
+		adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, &status);
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr);
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr);
+		adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
+				ADRENO_REG_CP_IB1_BASE_HI, &ib1base);
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &ib1sz);
+		adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB2_BASE,
+				ADRENO_REG_CP_IB2_BASE_HI, &ib2base);
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ib2sz);
+
+		dev_err(device->dev,
+			"status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
+			status, rptr, wptr, ib1base, ib1sz, ib2base, ib2sz);
+	}
+
+	if (drawobj) {
+		drawctxt = ADRENO_CONTEXT(drawobj->context);
+		drawobj->context->last_faulted_cmd_ts = drawobj->timestamp;
+		drawobj->context->total_fault_count++;
+		ctxt_id = drawobj->context->id;
+		ts = drawobj->timestamp;
+		rb_id = adreno_get_level(drawobj->context);
+
+		pr_context(device, drawobj->context,
+			"ctx %u ctx_type %s ts %u policy %lX dispatch_queue=%d\n",
+			drawobj->context->id, kgsl_context_type(drawctxt->type),
+			drawobj->timestamp, CMDOBJ(drawobj)->fault_recovery,
+			drawobj->context->gmu_dispatch_queue);
+
+		pr_context(device, drawobj->context,
+			   "cmdline: %s\n", drawctxt->base.proc_priv->cmdline);
+	}
+
+	trace_adreno_gpu_fault(ctxt_id, ts, status, rptr, wptr, ib1base, ib1sz,
+			       ib2base, ib2sz, rb_id);
+}
+
+static struct cmd_list_obj *get_active_cmdobj_lpac(
+	struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct cmd_list_obj *obj, *tmp, *active_obj = NULL;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 consumed = 0, retired = 0;
+	struct kgsl_drawobj *drawobj = NULL;
+
+	list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) {
+		drawobj = obj->drawobj;
+
+		if (!(kgsl_context_is_lpac(drawobj->context)))
+			continue;
+
+		kgsl_readtimestamp(device, drawobj->context,
+			KGSL_TIMESTAMP_CONSUMED, &consumed);
+		kgsl_readtimestamp(device, drawobj->context,
+			KGSL_TIMESTAMP_RETIRED, &retired);
+
+		if (!consumed)
+			continue;
+
+		if (consumed == retired)
+			continue;
+
+		/*
+		 * Find the first submission that started but didn't finish
+		 * We only care about one ringbuffer for LPAC so just look for the
+		 * first unfinished submission
+		 */
+		if (!active_obj)
+			active_obj = obj;
+	}
+
+	if (active_obj) {
+		drawobj = active_obj->drawobj;
+
+		if (kref_get_unless_zero(&drawobj->refcount)) {
+			struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj);
+
+			set_bit(CMDOBJ_FAULT, &cmdobj->priv);
+			return active_obj;
+		}
+	}
+
+	return NULL;
+}
+
+static struct cmd_list_obj *get_active_cmdobj(
+	struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct cmd_list_obj *obj, *tmp, *active_obj = NULL;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 consumed = 0, retired = 0, prio = UINT_MAX;
+	struct kgsl_drawobj *drawobj = NULL;
+
+	list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) {
+		drawobj = obj->drawobj;
+
+		/* We track LPAC separately */
+		if (!is_cmdobj(drawobj) || kgsl_context_is_lpac(drawobj->context))
+			continue;
+
+		kgsl_readtimestamp(device, drawobj->context,
+			KGSL_TIMESTAMP_CONSUMED, &consumed);
+		kgsl_readtimestamp(device, drawobj->context,
+			KGSL_TIMESTAMP_RETIRED, &retired);
+
+		if (!consumed)
+			continue;
+
+		if (consumed == retired)
+			continue;
+
+		/* Find the first submission that started but didn't finish */
+		if (!active_obj) {
+			active_obj = obj;
+			prio = adreno_get_level(drawobj->context);
+			continue;
+		}
+
+		/* Find the highest priority active submission */
+		if (adreno_get_level(drawobj->context) < prio) {
+			active_obj = obj;
+			prio = adreno_get_level(drawobj->context);
+		}
+	}
+
+	if (active_obj) {
+		struct kgsl_drawobj_cmd *cmdobj;
+
+		drawobj = active_obj->drawobj;
+		cmdobj = CMDOBJ(drawobj);
+
+		if (kref_get_unless_zero(&drawobj->refcount)) {
+			set_bit(CMDOBJ_FAULT, &cmdobj->priv);
+			return active_obj;
+		}
+	}
+
+	return NULL;
+}
+
+static struct cmd_list_obj *get_fault_cmdobj(struct adreno_device *adreno_dev,
+				u32 ctxt_id, u32 ts)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct cmd_list_obj *obj, *tmp;
+
+	list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) {
+		struct kgsl_drawobj *drawobj = obj->drawobj;
+
+		if (!is_cmdobj(drawobj))
+			continue;
+
+		if ((ctxt_id == drawobj->context->id) &&
+			(ts == drawobj->timestamp)) {
+			if (kref_get_unless_zero(&drawobj->refcount)) {
+				struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj);
+
+				set_bit(CMDOBJ_FAULT, &cmdobj->priv);
+				return obj;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static bool context_is_throttled(struct kgsl_device *device,
+	struct kgsl_context *context)
+{
+	if (ktime_ms_delta(ktime_get(), context->fault_time) >
+		_fault_throttle_time) {
+		context->fault_time = ktime_get();
+		context->fault_count = 1;
+		return false;
+	}
+
+	context->fault_count++;
+
+	if (context->fault_count > _fault_throttle_burst) {
+		pr_context(device, context,
+			"gpu fault threshold exceeded %d faults in %d msecs\n",
+			_fault_throttle_burst, _fault_throttle_time);
+		return true;
+	}
+
+	return false;
+}
+
+static void _print_syncobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj)
+{
+	int i, j, fence_index = 0;
+	struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	for (i = 0; i < syncobj->numsyncs; i++) {
+		struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i];
+		struct kgsl_sync_fence_cb *kcb = event->handle;
+		struct dma_fence **fences;
+		struct dma_fence_array *array;
+		u32 num_fences;
+
+		array = to_dma_fence_array(kcb->fence);
+		if (array != NULL) {
+			num_fences = array->num_fences;
+			fences = array->fences;
+		} else {
+			num_fences = 1;
+			fences = &kcb->fence;
+		}
+
+		for (j = 0; j < num_fences; j++, fence_index++) {
+			bool kgsl = is_kgsl_fence(fences[j]);
+			bool signaled = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fences[j]->flags);
+			char value[32] = "unknown";
+
+			if (fences[j]->ops->timeline_value_str)
+				fences[j]->ops->timeline_value_str(fences[j], value, sizeof(value));
+
+			dev_err(device->dev,
+				"dma fence[%d] signaled:%d kgsl:%d ctx:%llu seqno:%llu value:%s\n",
+				fence_index, signaled, kgsl, fences[j]->context, fences[j]->seqno,
+				value);
+		}
+	}
+
+}
+
+static void print_fault_syncobj(struct adreno_device *adreno_dev,
+				u32 ctxt_id, u32 ts)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct cmd_list_obj *obj;
+
+	list_for_each_entry(obj, &hwsched->cmd_list, node) {
+		struct kgsl_drawobj *drawobj = obj->drawobj;
+
+		if (drawobj->type == SYNCOBJ_TYPE) {
+			if ((ctxt_id == drawobj->context->id) &&
+			(ts == drawobj->timestamp))
+				_print_syncobj(adreno_dev, drawobj);
+		}
+	}
+}
+
+static void adreno_hwsched_reset_and_snapshot_legacy(struct adreno_device *adreno_dev, int fault)
+{
+	struct kgsl_drawobj *drawobj = NULL;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_context *context = NULL;
+	struct cmd_list_obj *obj;
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct hfi_context_bad_cmd_legacy *cmd = hwsched->ctxt_bad;
+
+	if (device->state != KGSL_STATE_ACTIVE)
+		return;
+
+	if (hwsched->recurring_cmdobj)
+		srcu_notifier_call_chain(&device->nh, GPU_SSR_BEGIN, NULL);
+
+	if (cmd->error == GMU_SYNCOBJ_TIMEOUT_ERROR) {
+		print_fault_syncobj(adreno_dev, cmd->ctxt_id, cmd->ts);
+		gmu_core_fault_snapshot(device);
+		goto done;
+	}
+
+	/*
+	 * First, try to see if the faulted command object is marked
+	 * in case there was a context bad hfi. But, with stall-on-fault,
+	 * we know that GMU cannot send context bad hfi. Hence, attempt
+	 * to walk the list of active submissions to find the one that
+	 * faulted.
+	 */
+	obj = get_fault_cmdobj(adreno_dev, cmd->ctxt_id, cmd->ts);
+	if (!obj && (fault & ADRENO_IOMMU_PAGE_FAULT))
+		obj = get_active_cmdobj(adreno_dev);
+
+	if (obj) {
+		drawobj = obj->drawobj;
+		trace_adreno_cmdbatch_fault(CMDOBJ(drawobj), fault);
+	} else if (hwsched->recurring_cmdobj &&
+		hwsched->recurring_cmdobj->base.context->id == cmd->ctxt_id) {
+		drawobj = DRAWOBJ(hwsched->recurring_cmdobj);
+		trace_adreno_cmdbatch_fault(hwsched->recurring_cmdobj, fault);
+		if (!kref_get_unless_zero(&drawobj->refcount))
+			drawobj = NULL;
+	}
+
+	if (!drawobj) {
+		if (fault & ADRENO_GMU_FAULT)
+			gmu_core_fault_snapshot(device);
+		else
+			kgsl_device_snapshot(device, NULL, NULL, false);
+		goto done;
+	}
+
+	context = drawobj->context;
+
+	do_fault_header(adreno_dev, drawobj, fault);
+
+	kgsl_device_snapshot(device, context, NULL, false);
+
+	force_retire_timestamp(device, drawobj);
+
+	if ((context->flags & KGSL_CONTEXT_INVALIDATE_ON_FAULT) ||
+		(context->flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE) ||
+		(cmd->error == GMU_GPU_SW_HANG) ||
+		(cmd->error == GMU_GPU_SW_FUSE_VIOLATION) ||
+		context_is_throttled(device, context)) {
+		adreno_drawctxt_set_guilty(device, context);
+	}
+
+	/*
+	 * Put back the reference which we incremented while trying to find
+	 * faulted command object
+	 */
+	kgsl_drawobj_put(drawobj);
+done:
+	memset(hwsched->ctxt_bad, 0x0, HFI_MAX_MSG_SIZE);
+	gpudev->reset(adreno_dev);
+}
+
+static void adreno_hwsched_reset_and_snapshot(struct adreno_device *adreno_dev, int fault)
+{
+	struct kgsl_drawobj *drawobj = NULL;
+	struct kgsl_drawobj *drawobj_lpac = NULL;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_context *context = NULL;
+	struct kgsl_context *context_lpac = NULL;
+	struct cmd_list_obj *obj;
+	struct cmd_list_obj *obj_lpac;
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct hfi_context_bad_cmd *cmd = hwsched->ctxt_bad;
+
+	if (device->state != KGSL_STATE_ACTIVE)
+		return;
+
+	if (hwsched->recurring_cmdobj)
+		srcu_notifier_call_chain(&device->nh, GPU_SSR_BEGIN, NULL);
+
+	if (cmd->error == GMU_SYNCOBJ_TIMEOUT_ERROR) {
+		print_fault_syncobj(adreno_dev, cmd->gc.ctxt_id, cmd->gc.ts);
+		gmu_core_fault_snapshot(device);
+		goto done;
+	}
+
+	/*
+	 * First, try to see if the faulted command object is marked
+	 * in case there was a context bad hfi. But, with stall-on-fault,
+	 * we know that GMU cannot send context bad hfi. Hence, attempt
+	 * to walk the list of active submissions to find the one that
+	 * faulted.
+	 */
+	obj = get_fault_cmdobj(adreno_dev, cmd->gc.ctxt_id, cmd->gc.ts);
+	obj_lpac = get_fault_cmdobj(adreno_dev, cmd->lpac.ctxt_id, cmd->lpac.ts);
+
+	if (!obj && (fault & ADRENO_IOMMU_PAGE_FAULT))
+		obj = get_active_cmdobj(adreno_dev);
+
+	if (obj) {
+		drawobj = obj->drawobj;
+		CMDOBJ(drawobj)->fault_recovery = cmd->gc.policy;
+	} else if (hwsched->recurring_cmdobj &&
+		hwsched->recurring_cmdobj->base.context->id == cmd->gc.ctxt_id) {
+		drawobj = DRAWOBJ(hwsched->recurring_cmdobj);
+		CMDOBJ(drawobj)->fault_recovery = cmd->gc.policy;
+		if (!kref_get_unless_zero(&drawobj->refcount))
+			drawobj = NULL;
+	}
+
+	do_fault_header(adreno_dev, drawobj, fault);
+
+	if (!obj_lpac && (fault & ADRENO_IOMMU_PAGE_FAULT))
+		obj_lpac = get_active_cmdobj_lpac(adreno_dev);
+
+	if (!obj && !obj_lpac) {
+		if (fault & ADRENO_GMU_FAULT)
+			gmu_core_fault_snapshot(device);
+		else
+			kgsl_device_snapshot(device, NULL, NULL, false);
+		goto done;
+	}
+
+	if (obj)
+		context = drawobj->context;
+
+	if (obj_lpac) {
+		drawobj_lpac = obj_lpac->drawobj;
+		CMDOBJ(drawobj_lpac)->fault_recovery = cmd->lpac.policy;
+		context_lpac  = drawobj_lpac->context;
+		if (gpudev->lpac_fault_header)
+			gpudev->lpac_fault_header(adreno_dev, drawobj_lpac);
+	}
+
+	kgsl_device_snapshot(device, context, context_lpac, false);
+
+	if (drawobj) {
+		force_retire_timestamp(device, drawobj);
+		if (context && ((context->flags & KGSL_CONTEXT_INVALIDATE_ON_FAULT) ||
+			(context->flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE) ||
+			(cmd->error == GMU_GPU_SW_HANG) ||
+			(cmd->error == GMU_GPU_SW_FUSE_VIOLATION) ||
+			context_is_throttled(device, context)))
+			adreno_drawctxt_set_guilty(device, context);
+		/*
+		 * Put back the reference which we incremented while trying to find
+		 * faulted command object
+		 */
+		kgsl_drawobj_put(drawobj);
+	}
+
+	if (drawobj_lpac) {
+		force_retire_timestamp(device, drawobj_lpac);
+		if (context_lpac && ((context_lpac->flags & KGSL_CONTEXT_INVALIDATE_ON_FAULT) ||
+			(context_lpac->flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE) ||
+			(cmd->error == GMU_GPU_SW_HANG) ||
+			(cmd->error == GMU_GPU_SW_FUSE_VIOLATION) ||
+			context_is_throttled(device, context_lpac)))
+			adreno_drawctxt_set_guilty(device, context_lpac);
+		/*
+		 * Put back the reference which we incremented while trying to find
+		 * faulted command object
+		 */
+		kgsl_drawobj_put(drawobj_lpac);
+	}
+done:
+	memset(hwsched->ctxt_bad, 0x0, HFI_MAX_MSG_SIZE);
+	gpudev->reset(adreno_dev);
+}
+
+static bool adreno_hwsched_do_fault(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	int fault;
+
+	fault = atomic_xchg(&hwsched->fault, 0);
+	if (fault == 0)
+		return false;
+
+	mutex_lock(&device->mutex);
+
+	if (test_bit(ADRENO_HWSCHED_CTX_BAD_LEGACY, &hwsched->flags))
+		adreno_hwsched_reset_and_snapshot_legacy(adreno_dev, fault);
+	else
+		adreno_hwsched_reset_and_snapshot(adreno_dev, fault);
+
+	adreno_hwsched_trigger(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+
+	return true;
+}
+
+static void adreno_hwsched_work(struct kthread_work *work)
+{
+	struct adreno_hwsched *hwsched = container_of(work,
+			struct adreno_hwsched, work);
+	struct adreno_device *adreno_dev = container_of(hwsched,
+			struct adreno_device, hwsched);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	mutex_lock(&hwsched->mutex);
+
+	if (adreno_hwsched_do_fault(adreno_dev)) {
+		mutex_unlock(&hwsched->mutex);
+		return;
+	}
+
+	/*
+	 * As long as there are inflight commands, process retired comamnds from
+	 * all drawqueues
+	 */
+	retire_drawobj_list(adreno_dev);
+
+	/* Signal fences */
+	kgsl_process_event_groups(device);
+
+	/* Run the scheduler for to dispatch new commands */
+	hwsched_issuecmds(adreno_dev);
+
+	if (hwsched->inflight == 0) {
+		hwsched_power_down(adreno_dev);
+	} else {
+		mutex_lock(&device->mutex);
+		kgsl_pwrscale_update(device);
+		kgsl_start_idle_timer(device);
+		mutex_unlock(&device->mutex);
+	}
+
+	mutex_unlock(&hwsched->mutex);
+}
+
+void adreno_hwsched_fault(struct adreno_device *adreno_dev,
+		u32 fault)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	u32 curr = atomic_read(&hwsched->fault);
+
+	atomic_set(&hwsched->fault, curr | fault);
+
+	/* make sure fault is written before triggering dispatcher */
+	smp_wmb();
+
+	adreno_hwsched_trigger(adreno_dev);
+}
+
+void adreno_hwsched_clear_fault(struct adreno_device *adreno_dev)
+{
+	atomic_set(&adreno_dev->hwsched.fault, 0);
+
+	/* make sure other CPUs see the update */
+	smp_wmb();
+}
+
+static bool is_tx_slot_available(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	void *ptr = hwsched->hw_fence.mem_descriptor.virtual_addr;
+	struct msm_hw_fence_hfi_queue_header *hdr = (struct msm_hw_fence_hfi_queue_header *)
+		(ptr + sizeof(struct msm_hw_fence_hfi_queue_table_header));
+	u32 queue_size_dwords = hdr->queue_size / sizeof(u32);
+	u32 payload_size_dwords = hdr->pkt_size / sizeof(u32);
+	u32 free_dwords, write_idx = hdr->write_index, read_idx = hdr->read_index;
+	u32 reserved_dwords = atomic_read(&hwsched->hw_fence_count) * payload_size_dwords;
+
+	free_dwords = read_idx <= write_idx ?
+		queue_size_dwords - (write_idx - read_idx) :
+		read_idx - write_idx;
+
+	if (free_dwords - reserved_dwords <= payload_size_dwords)
+		return false;
+
+	return true;
+}
+
+static void adreno_hwsched_create_hw_fence(struct adreno_device *adreno_dev,
+	struct kgsl_sync_fence *kfence)
+{
+	struct kgsl_sync_timeline *ktimeline = kfence->parent;
+	struct kgsl_context *context = ktimeline->context;
+	const struct adreno_hwsched_ops *hwsched_ops =
+				adreno_dev->hwsched.hwsched_ops;
+
+	if (!test_bit(ADRENO_HWSCHED_HW_FENCE, &adreno_dev->hwsched.flags))
+		return;
+
+	/* Do not create a hardware backed fence, if this context is bad or going away */
+	if (kgsl_context_is_bad(context))
+		return;
+
+	if (!is_tx_slot_available(adreno_dev))
+		return;
+
+	hwsched_ops->create_hw_fence(adreno_dev, kfence);
+}
+
+static const struct adreno_dispatch_ops hwsched_ops = {
+	.close = adreno_hwsched_dispatcher_close,
+	.queue_cmds = adreno_hwsched_queue_cmds,
+	.queue_context = adreno_hwsched_queue_context,
+	.fault = adreno_hwsched_fault,
+	.create_hw_fence = adreno_hwsched_create_hw_fence,
+	.get_fault = adreno_hwsched_gpu_fault,
+};
+
+static void hwsched_lsr_check(struct work_struct *work)
+{
+	struct adreno_hwsched *hwsched = container_of(work,
+		struct adreno_hwsched, lsr_check_ws);
+	struct adreno_device *adreno_dev = container_of(hwsched,
+		struct adreno_device, hwsched);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	mutex_lock(&device->mutex);
+	kgsl_pwrscale_update_stats(device);
+	kgsl_pwrscale_update(device);
+	mutex_unlock(&device->mutex);
+
+	mod_timer(&hwsched->lsr_timer, jiffies + msecs_to_jiffies(10));
+}
+
+static void hwsched_lsr_timer(struct timer_list *t)
+{
+	struct adreno_hwsched *hwsched = container_of(t, struct adreno_hwsched,
+					lsr_timer);
+
+	kgsl_schedule_work(&hwsched->lsr_check_ws);
+}
+
+int adreno_hwsched_init(struct adreno_device *adreno_dev,
+	const struct adreno_hwsched_ops *target_hwsched_ops)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	int i;
+
+	memset(hwsched, 0, sizeof(*hwsched));
+
+	hwsched->ctxt_bad = kzalloc(HFI_MAX_MSG_SIZE, GFP_KERNEL);
+	if (!hwsched->ctxt_bad)
+		return -ENOMEM;
+
+	hwsched->worker = kthread_create_worker(0, "kgsl_hwsched");
+	if (IS_ERR(hwsched->worker)) {
+		kfree(hwsched->ctxt_bad);
+		return PTR_ERR(hwsched->worker);
+	}
+
+	mutex_init(&hwsched->mutex);
+
+	kthread_init_work(&hwsched->work, adreno_hwsched_work);
+
+	jobs_cache = KMEM_CACHE(adreno_dispatch_job, 0);
+	obj_cache = KMEM_CACHE(cmd_list_obj, 0);
+
+	INIT_LIST_HEAD(&hwsched->cmd_list);
+
+	for (i = 0; i < ARRAY_SIZE(hwsched->jobs); i++) {
+		init_llist_head(&hwsched->jobs[i]);
+		init_llist_head(&hwsched->requeue[i]);
+	}
+
+	sched_set_fifo(hwsched->worker->task);
+
+	WARN_ON(sysfs_create_files(&device->dev->kobj, _hwsched_attr_list));
+	adreno_set_dispatch_ops(adreno_dev, &hwsched_ops);
+	hwsched->hwsched_ops = target_hwsched_ops;
+	init_completion(&hwsched->idle_gate);
+	complete_all(&hwsched->idle_gate);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LSR)) {
+		INIT_WORK(&hwsched->lsr_check_ws, hwsched_lsr_check);
+		timer_setup(&hwsched->lsr_timer, hwsched_lsr_timer, 0);
+	}
+
+	return 0;
+}
+
+void adreno_hwsched_parse_fault_cmdobj(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct cmd_list_obj *obj, *tmp;
+
+	/*
+	 * During IB parse, vmalloc is called which can sleep and
+	 * should not be called from atomic context. Since IBs are not
+	 * dumped during atomic snapshot, there is no need to parse it.
+	 */
+	if (adreno_dev->dev.snapshot_atomic)
+		return;
+
+	list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) {
+		struct kgsl_drawobj *drawobj = obj->drawobj;
+		struct kgsl_drawobj_cmd *cmdobj;
+
+		if (!is_cmdobj(drawobj))
+			continue;
+
+		cmdobj = CMDOBJ(drawobj);
+
+		if (test_bit(CMDOBJ_FAULT, &cmdobj->priv)) {
+			struct kgsl_memobj_node *ib;
+
+			list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+				if (drawobj->context->flags & KGSL_CONTEXT_LPAC)
+					adreno_parse_ib_lpac(KGSL_DEVICE(adreno_dev),
+						snapshot, snapshot->process_lpac,
+						ib->gpuaddr, ib->size >> 2);
+				else
+					adreno_parse_ib(KGSL_DEVICE(adreno_dev),
+						snapshot, snapshot->process,
+						ib->gpuaddr, ib->size >> 2);
+			}
+			clear_bit(CMDOBJ_FAULT, &cmdobj->priv);
+		}
+	}
+}
+
+static int unregister_context(int id, void *ptr, void *data)
+{
+	struct kgsl_context *context = ptr;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+
+	if (drawctxt->gmu_context_queue.gmuaddr != 0) {
+		struct gmu_context_queue_header *header =  drawctxt->gmu_context_queue.hostptr;
+
+		header->read_index = header->write_index;
+		/* This is to make sure GMU sees the correct indices after recovery */
+		mb();
+	}
+
+	/*
+	 * We don't need to send the unregister hfi packet because
+	 * we are anyway going to lose the gmu state of registered
+	 * contexts. So just reset the flag so that the context
+	 * registers with gmu on its first submission post slumber.
+	 */
+	context->gmu_registered = false;
+
+	/*
+	 * Consider the scenario where non-recurring submissions were made
+	 * by a context. Here internal_timestamp of context would be non
+	 * zero. After slumber, last retired timestamp is not held by GMU.
+	 * If this context submits a recurring workload, the context is
+	 * registered again, but the internal timestamp is not updated. When
+	 * the context is unregistered in send_context_unregister_hfi(),
+	 * we could be waiting on old internal_timestamp which is not held by
+	 * GMU. This can result in GMU errors. Hence set internal_timestamp
+	 * to zero when entering slumber.
+	 */
+	drawctxt->internal_timestamp = 0;
+
+	return 0;
+}
+
+void adreno_hwsched_unregister_contexts(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+
+	read_lock(&device->context_lock);
+	idr_for_each(&device->context_idr, unregister_context, NULL);
+	read_unlock(&device->context_lock);
+
+	if (hwsched->global_ctxtq.hostptr) {
+		struct gmu_context_queue_header *header = hwsched->global_ctxtq.hostptr;
+
+		header->read_index = header->write_index;
+		/* This is to make sure GMU sees the correct indices after recovery */
+		mb();
+	}
+
+	hwsched->global_ctxt_gmu_registered = false;
+}
+
+static int hwsched_idle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	int ret;
+
+	/* Block any new submissions from being submitted */
+	adreno_get_gpu_halt(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+
+	/*
+	 * Flush the worker to make sure all executing
+	 * or pending dispatcher works on worker are
+	 * finished
+	 */
+	adreno_hwsched_flush(adreno_dev);
+
+	ret = wait_for_completion_timeout(&hwsched->idle_gate,
+			msecs_to_jiffies(ADRENO_IDLE_TIMEOUT));
+	if (ret == 0) {
+		ret = -ETIMEDOUT;
+		WARN(1, "hwsched halt timeout\n");
+	} else if (ret < 0) {
+		dev_err(device->dev, "hwsched halt failed %d\n", ret);
+	} else {
+		ret = 0;
+	}
+
+	mutex_lock(&device->mutex);
+
+	/*
+	 * This will allow the dispatcher to start submitting to
+	 * hardware once device mutex is released
+	 */
+	adreno_put_gpu_halt(adreno_dev);
+
+	/*
+	 * Requeue dispatcher work to resubmit pending commands
+	 * that may have been blocked due to this idling request
+	 */
+	adreno_hwsched_trigger(adreno_dev);
+	return ret;
+}
+
+int adreno_hwsched_idle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned long wait = jiffies + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ret;
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EDEADLK;
+
+	if (!kgsl_state_is_awake(device))
+		return 0;
+
+	ret = hwsched_idle(adreno_dev);
+	if (ret)
+		return ret;
+
+	do {
+		if (adreno_hwsched_gpu_fault(adreno_dev))
+			return -EDEADLK;
+
+		if (gpudev->hw_isidle(adreno_dev))
+			return 0;
+	} while (time_before(jiffies, wait));
+
+	/*
+	 * Under rare conditions, preemption can cause the while loop to exit
+	 * without checking if the gpu is idle. check one last time before we
+	 * return failure.
+	 */
+	if (adreno_hwsched_gpu_fault(adreno_dev))
+		return -EDEADLK;
+
+	if (gpudev->hw_isidle(adreno_dev))
+		return 0;
+
+	return -ETIMEDOUT;
+}
+
+void adreno_hwsched_register_hw_fence(struct adreno_device *adreno_dev)
+{
+	struct adreno_hwsched *hwsched = &adreno_dev->hwsched;
+	struct adreno_hw_fence *hw_fence = &hwsched->hw_fence;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_HW_FENCE))
+		return;
+
+	/* Enable hardware fences only if context queues are enabled */
+	if (!adreno_hwsched_context_queue_enabled(adreno_dev))
+		return;
+
+	if (test_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags))
+		return;
+
+	hw_fence->handle = msm_hw_fence_register(HW_FENCE_CLIENT_ID_CTX0,
+				&hw_fence->mem_descriptor);
+	if (IS_ERR_OR_NULL(hw_fence->handle)) {
+		dev_err(device->dev, "HW fences not supported: %d\n",
+			PTR_ERR_OR_ZERO(hw_fence->handle));
+		hw_fence->handle = NULL;
+		return;
+	}
+
+	/*
+	 * We need to set up the memory descriptor with the physical address of the Tx/Rx Queues so
+	 * that these buffers can be imported in to GMU VA space
+	 */
+	kgsl_memdesc_init(device, &hw_fence->memdesc, 0);
+	hw_fence->memdesc.physaddr = hw_fence->mem_descriptor.device_addr;
+	hw_fence->memdesc.size = hw_fence->mem_descriptor.size;
+	hw_fence->memdesc.hostptr = hw_fence->mem_descriptor.virtual_addr;
+
+	ret = kgsl_memdesc_sg_dma(&hw_fence->memdesc, hw_fence->memdesc.physaddr,
+		hw_fence->memdesc.size);
+	if (ret) {
+		dev_err(device->dev, "Failed to setup HW fences memdesc: %d\n",
+			ret);
+		msm_hw_fence_deregister(hw_fence->handle);
+		hw_fence->handle = NULL;
+		memset(&hw_fence->memdesc, 0x0, sizeof(hw_fence->memdesc));
+		return;
+	}
+
+	hwsched->hw_fence_cache = KMEM_CACHE(adreno_hw_fence_entry, 0);
+
+	set_bit(ADRENO_HWSCHED_HW_FENCE, &hwsched->flags);
+}
+
+int adreno_hwsched_wait_ack_completion(struct adreno_device *adreno_dev,
+	struct device *dev, struct pending_cmd *ack,
+	void (*process_msgq)(struct adreno_device *adreno_dev))
+{
+	int rc;
+	/* Only allow a single log in a second */
+	static DEFINE_RATELIMIT_STATE(_rs, HZ, 1);
+	static u32 unprocessed, processed;
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	u64 start, end;
+
+	start = gpudev->read_alwayson(adreno_dev);
+	rc = wait_for_completion_timeout(&ack->complete,
+		msecs_to_jiffies(HFI_RSP_TIMEOUT));
+	/*
+	 * A non-zero return value means the completion is complete, whereas zero indicates
+	 * timeout
+	 */
+	if (rc) {
+		/*
+		 * If an ack goes unprocessed, keep track of processed and unprocessed acks
+		 * because we may not log each unprocessed ack due to ratelimiting
+		 */
+		if (unprocessed)
+			processed++;
+		return 0;
+	}
+
+	/*
+	 * It is possible the ack came, but due to HLOS latencies in processing hfi interrupt
+	 * and/or the f2h daemon, the ack isn't processed yet. Hence, process the msgq one last
+	 * time.
+	 */
+	process_msgq(adreno_dev);
+	end = gpudev->read_alwayson(adreno_dev);
+	if (completion_done(&ack->complete)) {
+		unprocessed++;
+		if (__ratelimit(&_rs))
+			dev_err(dev, "Ack unprocessed for id:%d sequence=%d count=%d/%d ticks=%llu/%llu\n",
+				MSG_HDR_GET_ID(ack->sent_hdr), MSG_HDR_GET_SEQNUM(ack->sent_hdr),
+				unprocessed, processed, start, end);
+		return 0;
+	}
+
+	dev_err(dev, "Ack timeout for id:%d sequence=%d ticks=%llu/%llu\n",
+		MSG_HDR_GET_ID(ack->sent_hdr), MSG_HDR_GET_SEQNUM(ack->sent_hdr), start, end);
+	gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev));
+	return -ETIMEDOUT;
+}
+
+int adreno_hwsched_ctxt_unregister_wait_completion(
+	struct adreno_device *adreno_dev,
+	struct device *dev, struct pending_cmd *ack,
+	void (*process_msgq)(struct adreno_device *adreno_dev),
+	struct hfi_unregister_ctxt_cmd *cmd)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ret;
+	u64 start, end;
+
+	start = gpudev->read_alwayson(adreno_dev);
+	mutex_unlock(&device->mutex);
+
+	ret = wait_for_completion_timeout(&ack->complete,
+		msecs_to_jiffies(msecs_to_jiffies(30 * 1000)));
+
+	mutex_lock(&device->mutex);
+	if (ret)
+		return 0;
+
+	/*
+	 * It is possible the ack came, but due to HLOS latencies in processing hfi interrupt
+	 * and/or the f2h daemon, the ack isn't processed yet. Hence, process the msgq one last
+	 * time.
+	 */
+	process_msgq(adreno_dev);
+	end = gpudev->read_alwayson(adreno_dev);
+
+	if (completion_done(&ack->complete)) {
+		dev_err_ratelimited(dev,
+			"Ack unprocessed for context unregister seq: %d ctx: %u ts: %u ticks=%llu/%llu\n",
+			MSG_HDR_GET_SEQNUM(ack->sent_hdr), cmd->ctxt_id,
+			cmd->ts, start, end);
+		return 0;
+	}
+
+	dev_err_ratelimited(dev,
+		"Ack timeout for context unregister seq: %d ctx: %u ts: %u ticks=%llu/%llu\n",
+		MSG_HDR_GET_SEQNUM(ack->sent_hdr), cmd->ctxt_id, cmd->ts, start, end);
+	return -ETIMEDOUT;
+}
+
+u32 adreno_hwsched_parse_payload(struct payload_section *payload, u32 key)
+{
+	u32 i;
+
+	/* Each key-value pair is 2 dwords */
+	for (i = 0; i < payload->dwords; i += 2) {
+		if (payload->data[i] == key)
+			return payload->data[i + 1];
+	}
+
+	return 0;
+}
+
+static void adreno_hwsched_lookup_key_value(struct adreno_device *adreno_dev,
+		u32 type, u32 key, u32 *ptr, u32 num_values)
+{
+	struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad;
+	u32 i = 0, payload_bytes;
+	void *start;
+
+	if (!cmd->hdr)
+		return;
+
+	payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) -
+			offsetof(struct hfi_context_bad_cmd, payload);
+
+	start = &cmd->payload[0];
+
+	while (i < payload_bytes) {
+		struct payload_section *payload = start + i;
+
+		/* key-value pair is 'num_values + 1' dwords */
+		if ((payload->type == type) && (payload->data[i] == key)) {
+			u32 j = 1;
+
+			do {
+				ptr[j - 1] = payload->data[i + j];
+				j++;
+			} while (num_values--);
+			break;
+		}
+
+		i += struct_size(payload, data, payload->dwords);
+	}
+}
+
+bool adreno_hwsched_log_nonfatal_gpu_fault(struct adreno_device *adreno_dev,
+		struct device *dev, u32 error)
+{
+	bool non_fatal = true;
+
+	switch (error) {
+	case GMU_CP_AHB_ERROR: {
+		u32 err_details[2];
+
+		adreno_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+						KEY_CP_AHB_ERROR, err_details, 2);
+		dev_crit_ratelimited(dev,
+			"CP: AHB bus error, CP_RL_ERROR_DETAILS_0:0x%x CP_RL_ERROR_DETAILS_1:0x%x\n",
+			err_details[0], err_details[1]);
+		break;
+	}
+	case GMU_ATB_ASYNC_FIFO_OVERFLOW:
+		dev_crit_ratelimited(dev, "RBBM: ATB ASYNC overflow\n");
+		break;
+	case GMU_RBBM_ATB_BUF_OVERFLOW:
+		dev_crit_ratelimited(dev, "RBBM: ATB bus overflow\n");
+		break;
+	case GMU_UCHE_OOB_ACCESS:
+		dev_crit_ratelimited(dev, "UCHE: Out of bounds access\n");
+		break;
+	case GMU_UCHE_TRAP_INTR:
+		dev_crit_ratelimited(dev, "UCHE: Trap interrupt\n");
+		break;
+	case GMU_TSB_WRITE_ERROR: {
+		u32 addr[2];
+
+		adreno_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS,
+						KEY_TSB_WRITE_ERROR, addr, 2);
+		dev_crit_ratelimited(dev, "TSB: Write error interrupt: Address: 0x%lx MID: %lu\n",
+			FIELD_GET(GENMASK(16, 0), addr[1]) << 32 | addr[0],
+			FIELD_GET(GENMASK(31, 23), addr[1]));
+		break;
+	}
+	default:
+		non_fatal = false;
+		break;
+	}
+
+	return non_fatal;
+}

+ 273 - 0
qcom/opensource/graphics-kernel/adreno_hwsched.h

@@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_HWSCHED_H_
+#define _ADRENO_HWSCHED_H_
+
+#include <linux/soc/qcom/msm_hw_fence.h>
+
+#include "kgsl_sync.h"
+
+/* This structure represents inflight command object */
+struct cmd_list_obj {
+	/** @drawobj: Handle to the draw object */
+	struct kgsl_drawobj *drawobj;
+	/** @node: List node to put it in the list of inflight commands */
+	struct list_head node;
+};
+
+/**
+ * struct adreno_hw_fence_entry - A structure to store hardware fence and the context
+ */
+struct adreno_hw_fence_entry {
+	/** @cmd: H2F_MSG_HW_FENCE_INFO packet for this hardware fence */
+	struct hfi_hw_fence_info cmd;
+	/** @kfence: Pointer to the kgsl fence */
+	struct kgsl_sync_fence *kfence;
+	/** @drawctxt: Pointer to the context */
+	struct adreno_context *drawctxt;
+	/** @node: list node to add it to a list */
+	struct list_head node;
+	/** @reset_node: list node to add it to post reset list of hardware fences */
+	struct list_head reset_node;
+};
+
+/**
+ * struct adreno_hwsched_ops - Function table to hook hwscheduler things
+ * to target specific routines
+ */
+struct adreno_hwsched_ops {
+	/**
+	 * @submit_drawobj - Target specific function to submit IBs to hardware
+	 */
+	int (*submit_drawobj)(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj *drawobj);
+	/**
+	 * @preempt_count - Target specific function to get preemption count
+	 */
+	u32 (*preempt_count)(struct adreno_device *adreno_dev);
+	/**
+	 * @create_hw_fence - Target specific function to create a hardware fence
+	 */
+	void (*create_hw_fence)(struct adreno_device *adreno_dev,
+		struct kgsl_sync_fence *kfence);
+
+};
+
+/**
+ * struct adreno_hw_fence - Container for hardware fences instance
+ */
+struct adreno_hw_fence {
+	/** @handle: Handle for hardware fences */
+	void *handle;
+	/** @descriptor: Memory descriptor for hardware fences */
+	struct msm_hw_fence_mem_addr mem_descriptor;
+	/** @memdesc: Kgsl memory descriptor for hardware fences queue */
+	struct kgsl_memdesc memdesc;
+};
+
+/**
+ * struct adreno_hwsched - Container for the hardware scheduler
+ */
+struct adreno_hwsched {
+	 /** @mutex: Mutex needed to run dispatcher function */
+	struct mutex mutex;
+	/** @flags: Container for the dispatcher internal flags */
+	unsigned long flags;
+	/** @inflight: Number of active submissions to the dispatch queues */
+	u32 inflight;
+	/** @jobs - Array of dispatch job lists for each priority level */
+	struct llist_head jobs[16];
+	/** @requeue - Array of lists for dispatch jobs that got requeued */
+	struct llist_head requeue[16];
+	/** @work: The work structure to execute dispatcher function */
+	struct kthread_work work;
+	/** @cmd_list: List of objects submitted to dispatch queues */
+	struct list_head cmd_list;
+	/** @fault: Atomic to record a fault */
+	atomic_t fault;
+	struct kthread_worker *worker;
+	/** @hwsched_ops: Container for target specific hwscheduler ops */
+	const struct adreno_hwsched_ops *hwsched_ops;
+	/** @ctxt_bad: Container for the context bad hfi packet */
+	void *ctxt_bad;
+	/** @idle_gate: Gate to wait on for hwscheduler to idle */
+	struct completion idle_gate;
+	/** @big_cmdobj = Points to the big IB that is inflight */
+	struct kgsl_drawobj_cmd *big_cmdobj;
+	/** @recurring_cmdobj: Recurring commmand object sent to GMU */
+	struct kgsl_drawobj_cmd *recurring_cmdobj;
+	/** @lsr_timer: Timer struct to schedule lsr work */
+	struct timer_list lsr_timer;
+	/** @lsr_check_ws: Lsr work to update power stats */
+	struct work_struct lsr_check_ws;
+	/** @hw_fence: Container for the hw fences instance */
+	struct adreno_hw_fence hw_fence;
+	/** @hw_fence_cache: kmem cache for storing hardware output fences */
+	struct kmem_cache *hw_fence_cache;
+	/** @hw_fence_count: Number of hardware fences that haven't yet been sent to Tx Queue */
+	atomic_t hw_fence_count;
+	/**
+	 * @submission_seqnum: Sequence number for sending submissions to GMU context queues or
+	 * dispatch queues
+	 */
+	atomic_t submission_seqnum;
+	/** @global_ctxtq: Memory descriptor for global context queue */
+	struct kgsl_memdesc global_ctxtq;
+	/** @global_ctxt_gmu_registered: Whether global context is registered with gmu */
+	bool global_ctxt_gmu_registered;
+};
+
+/*
+ * This value is based on maximum number of IBs that can fit
+ * in the ringbuffer.
+ */
+#define HWSCHED_MAX_IBS 2000
+
+enum adreno_hwsched_flags {
+	ADRENO_HWSCHED_POWER = 0,
+	ADRENO_HWSCHED_ACTIVE,
+	ADRENO_HWSCHED_CTX_BAD_LEGACY,
+	ADRENO_HWSCHED_CONTEXT_QUEUE,
+	ADRENO_HWSCHED_HW_FENCE,
+};
+
+/**
+ * adreno_hwsched_trigger - Function to schedule the hwsched thread
+ * @adreno_dev: A handle to adreno device
+ *
+ * Schedule the hw dispatcher for retiring and submitting command objects
+ */
+void adreno_hwsched_trigger(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_start() - activate the hwsched dispatcher
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Enable dispatcher thread to execute
+ */
+void adreno_hwsched_start(struct adreno_device *adreno_dev);
+/**
+ * adreno_hwsched_init() - Initialize the hwsched
+ * @adreno_dev: pointer to the adreno device
+ * @hwsched_ops: Pointer to target specific hwsched ops
+ *
+ * Set up the hwsched resources.
+ * Return: 0 on success or negative on failure.
+ */
+int adreno_hwsched_init(struct adreno_device *adreno_dev,
+	const struct adreno_hwsched_ops *hwsched_ops);
+
+/**
+ * adreno_hwsched_fault - Set hwsched fault to request recovery
+ * @adreno_dev: A handle to adreno device
+ * @fault: The type of fault
+ */
+void adreno_hwsched_fault(struct adreno_device *adreno_dev, u32 fault);
+
+/**
+ * adreno_hwsched_clear_fault() - Clear the hwsched fault
+ * @adreno_dev: A pointer to an adreno_device structure
+ *
+ * Clear the hwsched fault status for adreno device
+ */
+void adreno_hwsched_clear_fault(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_parse_fault_ib - Parse the faulty submission
+ * @adreno_dev: pointer to the adreno device
+ * @snapshot: Pointer to the snapshot structure
+ *
+ * Walk the list of active submissions to find the one that faulted and
+ * parse it so that relevant command buffers can be added to the snapshot
+ */
+void adreno_hwsched_parse_fault_cmdobj(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot);
+
+void adreno_hwsched_flush(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_unregister_contexts - Reset context gmu_registered bit
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Walk the list of contexts and reset the gmu_registered for all
+ * contexts
+ */
+void adreno_hwsched_unregister_contexts(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_idle - Wait for dispatcher and hardware to become idle
+ * @adreno_dev: A handle to adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int adreno_hwsched_idle(struct adreno_device *adreno_dev);
+
+void adreno_hwsched_retire_cmdobj(struct adreno_hwsched *hwsched,
+	struct kgsl_drawobj_cmd *cmdobj);
+
+bool adreno_hwsched_context_queue_enabled(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_register_hw_fence - Register GPU as a hardware fence client
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Register with the hardware fence driver to be able to trigger and wait
+ * for hardware fences. Also, set up the memory descriptor for mapping the
+ * client queue to the GMU.
+ */
+void adreno_hwsched_register_hw_fence(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_deregister_hw_fence - Deregister GPU as a hardware fence client
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Deregister with the hardware fence driver and free up any resources allocated
+ * as part of registering with the hardware fence driver
+ */
+void adreno_hwsched_deregister_hw_fence(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_replay - Resubmit inflight cmdbatches after gpu reset
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Resubmit all cmdbatches to GMU after device reset
+ */
+void adreno_hwsched_replay(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_parse_payload - Parse payload to look up a key
+ * @payload: Pointer to a payload section
+ * @key: The key who's value is to be looked up
+ *
+ * This function parses the payload data which is a sequence
+ * of key-value pairs.
+ *
+ * Return: The value of the key or 0 if key is not found
+ */
+u32 adreno_hwsched_parse_payload(struct payload_section *payload, u32 key);
+
+/**
+ * adreno_hwsched_gpu_fault - Gets hwsched gpu fault info
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Returns zero for hwsched fault else non zero value
+ */
+u32 adreno_hwsched_gpu_fault(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_log_nonfatal_gpu_fault - Logs non fatal GPU error from context bad hfi packet
+ * @adreno_dev: pointer to the adreno device
+ * @dev: Pointer to the struct device for the GMU platform device
+ * @error: Types of error that triggered from context bad HFI
+ *
+ * This function parses context bad hfi packet and logs error information.
+ *
+ * Return: True for non fatal error code else false.
+ */
+bool adreno_hwsched_log_nonfatal_gpu_fault(struct adreno_device *adreno_dev,
+		struct device *dev, u32 error);
+#endif

+ 313 - 0
qcom/opensource/graphics-kernel/adreno_ioctl.c

@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/slab.h>
+
+#include "adreno.h"
+#include "adreno_a5xx.h"
+
+/*
+ * Add a perfcounter to the per-fd list.
+ * Call with the device mutex held
+ */
+static int adreno_process_perfcounter_add(struct kgsl_device_private *dev_priv,
+	unsigned int groupid, unsigned int countable)
+{
+	struct adreno_device_private *adreno_priv = container_of(dev_priv,
+		struct adreno_device_private, dev_priv);
+	struct adreno_perfcounter_list_node *perfctr;
+
+	perfctr = kmalloc(sizeof(*perfctr), GFP_KERNEL);
+	if (!perfctr)
+		return -ENOMEM;
+
+	perfctr->groupid = groupid;
+	perfctr->countable = countable;
+
+	/* add the pair to process perfcounter list */
+	list_add(&perfctr->node, &adreno_priv->perfcounter_list);
+	return 0;
+}
+
+/*
+ * Remove a perfcounter from the per-fd list.
+ * Call with the device mutex held
+ */
+static int adreno_process_perfcounter_del(struct kgsl_device_private *dev_priv,
+	unsigned int groupid, unsigned int countable)
+{
+	struct adreno_device_private *adreno_priv = container_of(dev_priv,
+		struct adreno_device_private, dev_priv);
+	struct adreno_perfcounter_list_node *p;
+
+	list_for_each_entry(p, &adreno_priv->perfcounter_list, node) {
+		if (p->groupid == groupid && p->countable == countable) {
+			list_del(&p->node);
+			kfree(p);
+			return 0;
+		}
+	}
+	return -ENODEV;
+}
+
+long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_perfcounter_get *get = data;
+	int result;
+
+	mutex_lock(&device->mutex);
+
+	/*
+	 * adreno_perfcounter_get() is called by kernel clients
+	 * during start(), so it is not safe to take an
+	 * active count inside that function.
+	 */
+
+	result = adreno_perfcntr_active_oob_get(adreno_dev);
+	if (result) {
+		mutex_unlock(&device->mutex);
+		return (long)result;
+	}
+
+	result = adreno_perfcounter_get(adreno_dev,
+			get->groupid, get->countable, &get->offset,
+			&get->offset_hi, PERFCOUNTER_FLAG_NONE);
+
+	/* Add the perfcounter into the list */
+	if (!result) {
+		result = adreno_process_perfcounter_add(dev_priv, get->groupid,
+				get->countable);
+		if (result)
+			adreno_perfcounter_put(adreno_dev, get->groupid,
+				get->countable, PERFCOUNTER_FLAG_NONE);
+	}
+
+	adreno_perfcntr_active_oob_put(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+
+	return (long) result;
+}
+
+long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_perfcounter_put *put = data;
+	int result;
+
+	mutex_lock(&device->mutex);
+
+	/* Delete the perfcounter from the process list */
+	result = adreno_process_perfcounter_del(dev_priv, put->groupid,
+		put->countable);
+
+	/* Put the perfcounter refcount */
+	if (!result)
+		adreno_perfcounter_put(adreno_dev, put->groupid,
+			put->countable, PERFCOUNTER_FLAG_NONE);
+	mutex_unlock(&device->mutex);
+
+	return (long) result;
+}
+
+static long adreno_ioctl_perfcounter_query(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_query *query = data;
+
+	return (long) adreno_perfcounter_query_group(adreno_dev, query->groupid,
+			query->countables, query->count, &query->max_counters);
+}
+
+static long adreno_ioctl_perfcounter_read(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_read *read = data;
+
+	/*
+	 * When performance counter zapping is enabled, the counters are cleared
+	 * across context switches. Reading the counters when they are zapped is
+	 * not permitted.
+	 */
+	if (!adreno_dev->perfcounter)
+		return -EPERM;
+
+	return (long) adreno_perfcounter_read_group(adreno_dev, read->reads,
+		read->count);
+}
+
+static long adreno_ioctl_preemption_counters_query(
+		struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_preemption_counters_query *read = data;
+	int size_level = A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE;
+	int levels_to_copy;
+
+	if (!adreno_is_a5xx(adreno_dev) ||
+		!adreno_is_preemption_enabled(adreno_dev))
+		return -EOPNOTSUPP;
+
+	if (read->size_user < size_level)
+		return -EINVAL;
+
+	/* Calculate number of preemption counter levels to copy to userspace */
+	levels_to_copy = (read->size_user / size_level);
+
+	levels_to_copy = min_t(int, levels_to_copy,
+		ARRAY_SIZE(adreno_dev->ringbuffers));
+
+	if (copy_to_user(u64_to_user_ptr(read->counters),
+			adreno_dev->preempt.scratch->hostptr,
+			levels_to_copy * size_level))
+		return -EFAULT;
+
+	read->max_priority_level = levels_to_copy;
+	read->size_priority_level = size_level;
+
+	return 0;
+}
+
+static long adreno_ioctl_read_calibrated_ts(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_read_calibrated_timestamps *reads = data;
+	unsigned long flags;
+	u32 *sources = NULL;
+	u64 *ts = NULL;
+	u64 start;
+	u64 samples[KGSL_CALIBRATED_TIME_DOMAIN_MAX] = {0};
+	u32 i;
+	int ret = 0;
+
+	/* Reading calibrated timestamps requires the CX timer be initialized */
+	if (!test_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv))
+		return -EOPNOTSUPP;
+
+	/* Check that the number of timestamps is reasonable */
+	if (!reads->count ||
+		(reads->count > (2 * KGSL_CALIBRATED_TIME_DOMAIN_MAX)))
+		return -EINVAL;
+
+	sources = kvcalloc(reads->count, sizeof(*sources), GFP_KERNEL);
+	if (!sources)
+		return -ENOMEM;
+
+	if (copy_from_user(sources, u64_to_user_ptr(reads->sources),
+			reads->count * sizeof(*sources))) {
+		ret = -EFAULT;
+		goto done;
+	}
+
+	for (i = 0; i < reads->count; i++) {
+		if (sources[i] >= KGSL_CALIBRATED_TIME_DOMAIN_MAX) {
+			ret = -EINVAL;
+			goto done;
+		}
+	}
+
+	ts = kvcalloc(reads->count, sizeof(*ts), GFP_KERNEL);
+	if (!ts) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	/* Disable local irqs to prevent context switch delays */
+	local_irq_save(flags);
+
+	/* Sample the MONOTONIC_RAW domain for use in calculating deviation */
+	start = (u64)ktime_to_ns(ktime_get_raw());
+
+	samples[KGSL_CALIBRATED_TIME_DOMAIN_DEVICE] =
+				adreno_read_cx_timer(adreno_dev);
+	samples[KGSL_CALIBRATED_TIME_DOMAIN_MONOTONIC] =
+				(u64)ktime_to_ns(ktime_get());
+	samples[KGSL_CALIBRATED_TIME_DOMAIN_MONOTONIC_RAW] =
+				(u64)ktime_to_ns(ktime_get_raw());
+
+	/* Done collecting timestamps. Re-enable irqs */
+	local_irq_restore(flags);
+
+	/* Calculate deviation in reads based on the MONOTONIC_RAW samples */
+	reads->deviation = samples[KGSL_CALIBRATED_TIME_DOMAIN_MONOTONIC_RAW] - start;
+
+	for (i = 0; i < reads->count; i++)
+		ts[i] = samples[sources[i]];
+
+	if (copy_to_user(u64_to_user_ptr(reads->ts), ts, reads->count * sizeof(*ts)))
+		ret = -EFAULT;
+
+done:
+	kvfree(ts);
+	kvfree(sources);
+	return ret;
+}
+
+long adreno_ioctl_helper(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg,
+		const struct kgsl_ioctl *cmds, int len)
+{
+	unsigned char data[128] = { 0 };
+	long ret;
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (_IOC_NR(cmd) == _IOC_NR(cmds[i].cmd))
+			break;
+	}
+
+	if (i == len)
+		return -ENOIOCTLCMD;
+
+	if (_IOC_SIZE(cmds[i].cmd > sizeof(data))) {
+		dev_err_ratelimited(dev_priv->device->dev,
+			"data too big for ioctl 0x%08x: %d/%zu\n",
+			cmd, _IOC_SIZE(cmds[i].cmd), sizeof(data));
+		return -EINVAL;
+	}
+
+	if (_IOC_SIZE(cmds[i].cmd)) {
+		ret = kgsl_ioctl_copy_in(cmds[i].cmd, cmd, arg, data);
+
+		if (ret)
+			return ret;
+	} else {
+		memset(data, 0, sizeof(data));
+	}
+
+	ret = cmds[i].func(dev_priv, cmd, data);
+
+	if (ret == 0 && _IOC_SIZE(cmds[i].cmd))
+		ret = kgsl_ioctl_copy_out(cmds[i].cmd, cmd, arg, data);
+
+	return ret;
+}
+
+static struct kgsl_ioctl adreno_ioctl_funcs[] = {
+	{ IOCTL_KGSL_PERFCOUNTER_GET, adreno_ioctl_perfcounter_get },
+	{ IOCTL_KGSL_PERFCOUNTER_PUT, adreno_ioctl_perfcounter_put },
+	{ IOCTL_KGSL_PERFCOUNTER_QUERY, adreno_ioctl_perfcounter_query },
+	{ IOCTL_KGSL_PERFCOUNTER_READ, adreno_ioctl_perfcounter_read },
+	{ IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY,
+		adreno_ioctl_preemption_counters_query },
+	{ IOCTL_KGSL_READ_CALIBRATED_TIMESTAMPS, adreno_ioctl_read_calibrated_ts },
+};
+
+long adreno_ioctl(struct kgsl_device_private *dev_priv,
+			      unsigned int cmd, unsigned long arg)
+{
+	return adreno_ioctl_helper(dev_priv, cmd, arg,
+		adreno_ioctl_funcs, ARRAY_SIZE(adreno_ioctl_funcs));
+}

Some files were not shown because too many files changed in this diff