Sfoglia il codice sorgente

mm-drivers: hw_fence: Add support for hw-fence driver

This change adds support for the hw-fence driver that initialize, expose
and manage the interfaces for the hw-fences, which are the synchronization
primitives to allow the hardware to hardware signalization of the fences
for the frame buffers shared between gpu and display hw-cores.

Change-Id: If2313585d5a9f3ac90e16aad3464600641a6fa04
Signed-off-by: Ingrid Gallardo <[email protected]>
Ingrid Gallardo 3 anni fa
parent
commit
77ae3f31f0

+ 1 - 0
Android.mk

@@ -1,5 +1,6 @@
 MM_DRIVER_PATH := $(call my-dir)
 include $(MM_DRIVER_PATH)/msm_ext_display/Android.mk
+include $(MM_DRIVER_PATH)/hw_fence/Android.mk
 ifneq ($(TARGET_BOARD_PLATFORM), taro)
 include $(MM_DRIVER_PATH)/sync_fence/Android.mk
 endif

+ 1 - 0
config/kalamammdrivers.conf

@@ -4,3 +4,4 @@
 
 export CONFIG_MSM_EXT_DISPLAY=y
 export CONFIG_QCOM_SPEC_SYNC=y
+export CONFIG_QTI_HW_FENCE=y

+ 1 - 0
config/kalamammdriversconf.h

@@ -6,3 +6,4 @@
 
 #define CONFIG_MSM_EXT_DISPLAY 1
 #define CONFIG_QCOM_SPEC_SYNC 1
+#define CONFIG_QTI_HW_FENCE 1

+ 41 - 0
hw_fence/Android.mk

@@ -0,0 +1,41 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+# This makefile is only for DLKM
+ifneq ($(findstring vendor,$(LOCAL_PATH)),)
+
+ifneq ($(findstring opensource,$(LOCAL_PATH)),)
+	MSM_HW_FENCE_BLD_DIR := $(TOP)/vendor/qcom/opensource/mm-drivers/hw_fence
+endif # opensource
+
+DLKM_DIR := $(TOP)/device/qcom/common/dlkm
+
+LOCAL_ADDITIONAL_DEPENDENCIES := $(wildcard $(LOCAL_PATH)/**/*) $(wildcard $(LOCAL_PATH)/*)
+
+###########################################################
+# This is set once per LOCAL_PATH, not per (kernel) module
+KBUILD_OPTIONS := MSM_HW_FENCE_ROOT=$(MSM_HW_FENCE_BLD_DIR)
+KBUILD_OPTIONS += MODNAME=msm_hw_fence
+KBUILD_OPTIONS += BOARD_PLATFORM=$(TARGET_BOARD_PLATFORM)
+
+###########################################################
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES           := $(wildcard $(LOCAL_PATH)/**/*) $(wildcard $(LOCAL_PATH)/*)
+LOCAL_MODULE              := hw-fence-module-symvers
+LOCAL_MODULE_STEM         := Module.symvers
+LOCAL_MODULE_KBUILD_NAME  := Module.symvers
+LOCAL_MODULE_PATH         := $(KERNEL_MODULES_OUT)
+
+include $(DLKM_DIR)/Build_external_kernelmodule.mk
+###########################################################
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES   := $(wildcard $(LOCAL_PATH)/**/*) $(wildcard $(LOCAL_PATH)/*)
+LOCAL_MODULE              := msm_hw_fence.ko
+LOCAL_MODULE_KBUILD_NAME  := msm_hw_fence.ko
+LOCAL_MODULE_TAGS         := optional
+LOCAL_MODULE_DEBUG_ENABLE := true
+LOCAL_MODULE_PATH         := $(KERNEL_MODULES_OUT)
+
+include $(DLKM_DIR)/Build_external_kernelmodule.mk
+###########################################################
+endif # DLKM check

+ 18 - 0
hw_fence/Kbuild

@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+KDIR := $(TOP)/kernel_platform/msm-kernel
+include $(MSM_HW_FENCE_ROOT)/config/kalamammdrivers.conf
+LINUXINCLUDE += -include $(MSM_HW_FENCE_ROOT)/config/kalamammdriversconf.h \
+		-I$(MSM_HW_FENCE_ROOT)hw_fence/include/
+
+ifdef CONFIG_QTI_HW_FENCE
+obj-m += msm_hw_fence.o
+
+msm_hw_fence-y := src/msm_hw_fence.o \
+		src/hw_fence_drv_priv.o \
+		src/hw_fence_drv_utils.o \
+		src/hw_fence_drv_debug.o \
+		src/hw_fence_drv_ipc.o
+
+CDEFINES += -DBUILD_TIMESTAMP=\"$(shell date -u +'%Y-%m-%dT%H:%M:%SZ')\"
+endif

+ 14 - 0
hw_fence/Makefile

@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
+KBUILD_OPTIONS += MSM_HW_FENCE_ROOT=$(KERNEL_SRC)/$(M)/../
+
+all: modules
+
+modules_install:
+	$(MAKE) INSTALL_MOD_STRIP=1 -C $(KERNEL_SRC) M=$(M) modules_install
+
+%:
+	$(MAKE) -C $(KERNEL_SRC) M=$(M) $@ $(KBUILD_OPTIONS)
+
+clean:
+	rm -f *.o *.ko *.mod.c *.mod.o *~ .*.cmd Module.symvers
+	rm -rf .tmp_versions

+ 61 - 0
hw_fence/include/hw_fence_drv_debug.h

@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __HW_FENCE_DRV_DEBUG
+#define __HW_FENCE_DRV_DEBUG
+
+enum hw_fence_drv_prio {
+	HW_FENCE_HIGH = 0x000001,	/* High density debug messages (noisy) */
+	HW_FENCE_LOW = 0x000002,	/* Low density debug messages */
+	HW_FENCE_INFO = 0x000004,	/* Informational prints */
+	HW_FENCE_INIT = 0x00008,	/* Initialization logs */
+	HW_FENCE_QUEUE = 0x000010,	/* Queue logs */
+	HW_FENCE_LUT = 0x000020,	/* Look-up and algorithm logs */
+	HW_FENCE_IRQ = 0x000040,	/* Interrupt-related messages */
+	HW_FENCE_PRINTK = 0x010000,
+};
+
+extern u32 msm_hw_fence_debug_level;
+
+#define dprintk(__level, __fmt, ...) \
+	do { \
+		if (msm_hw_fence_debug_level & __level) \
+			if (msm_hw_fence_debug_level & HW_FENCE_PRINTK) \
+				pr_err(__fmt, ##__VA_ARGS__); \
+	} while (0)
+
+
+#define HWFNC_ERR(fmt, ...) \
+	pr_err("[hwfence:%s:%d][err][%pS] "fmt, __func__, __LINE__, \
+	__builtin_return_address(0), ##__VA_ARGS__)
+
+#define HWFNC_DBG_H(fmt, ...) \
+	dprintk(HW_FENCE_HIGH, "[hwfence:%s:%d][dbgh]"fmt, __func__, __LINE__, ##__VA_ARGS__)
+
+#define HWFNC_DBG_L(fmt, ...) \
+	dprintk(HW_FENCE_LOW, "[hwfence:%s:%d][dbgl]"fmt, __func__, __LINE__, ##__VA_ARGS__)
+
+#define HWFNC_DBG_INFO(fmt, ...) \
+	dprintk(HW_FENCE_INFO, "[hwfence:%s:%d][dbgi]"fmt, __func__, __LINE__, ##__VA_ARGS__)
+
+#define HWFNC_DBG_INIT(fmt, ...) \
+	dprintk(HW_FENCE_INIT, "[hwfence:%s:%d][dbg]"fmt, __func__, __LINE__, ##__VA_ARGS__)
+
+#define HWFNC_DBG_Q(fmt, ...) \
+	dprintk(HW_FENCE_QUEUE, "[hwfence:%s:%d][dbgq]"fmt, __func__, __LINE__, ##__VA_ARGS__)
+
+#define HWFNC_DBG_LUT(fmt, ...) \
+	dprintk(HW_FENCE_LUT, "[hwfence:%s:%d][dbglut]"fmt, __func__, __LINE__, ##__VA_ARGS__)
+
+#define HWFNC_DBG_IRQ(fmt, ...) \
+	dprintk(HW_FENCE_IRQ, "[hwfence:%s:%d][dbgirq]"fmt, __func__, __LINE__, ##__VA_ARGS__)
+
+#define HWFNC_WARN(fmt, ...) \
+	pr_warn("[hwfence:%s:%d][warn][%pS] "fmt, __func__, __LINE__, \
+	__builtin_return_address(0), ##__VA_ARGS__)
+
+int hw_fence_debug_debugfs_register(struct hw_fence_driver_data *drv_data);
+
+#endif /* __HW_FENCE_DRV_DEBUG */

+ 90 - 0
hw_fence/include/hw_fence_drv_ipc.h

@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __HW_FENCE_DRV_IPC_H
+#define __HW_FENCE_DRV_IPC_H
+
+#define HW_FENCE_IPC_CLIENT_ID_APPS 8
+#define HW_FENCE_IPC_CLIENT_ID_GPU 9
+#define HW_FENCE_IPC_CLIENT_ID_DPU 25
+
+#define HW_FENCE_IPC_COMPUTE_L1_PROTOCOL_ID_LAHAINA 2
+#define HW_FENCE_IPC_COMPUTE_L1_PROTOCOL_ID_WAIPIO 1
+#define HW_FENCE_IPC_COMPUTE_L1_PROTOCOL_ID_KAILUA 2
+
+#define HW_FENCE_IPCC_HW_REV_100 0x00010000  /* Lahaina */
+#define HW_FENCE_IPCC_HW_REV_110 0x00010100  /* Waipio */
+#define HW_FENCE_IPCC_HW_REV_170 0x00010700  /* Kailua */
+
+#define IPC_PROTOCOLp_CLIENTc_VERSION(base, p, c) (base + (0x40000*p) + (0x1000*c))
+#define IPC_PROTOCOLp_CLIENTc_CONFIG(base, p, c) (base + 0x8 + (0x40000*p) + (0x1000*c))
+#define IPC_PROTOCOLp_CLIENTc_RECV_SIGNAL_ENABLE(base, p, c) \
+	(base + 0x14 + (0x40000*p) + (0x1000*c))
+#define IPC_PROTOCOLp_CLIENTc_SEND(base, p, c) ((base + 0xc) + (0x40000*p) + (0x1000*c))
+
+/**
+ * hw_fence_ipcc_trigger_signal() - Trigger ipc signal for the requested client/signal pair.
+ * @drv_data: driver data.
+ * @tx_client_id: ipc client id that sends the ipc signal.
+ * @rx_client_id: ipc client id that receives the ipc signal.
+ * @signal_id: signal id to send.
+ *
+ * This API triggers the ipc 'signal_id' from the 'tx_client_id' to the 'rx_client_id'
+ */
+void hw_fence_ipcc_trigger_signal(struct hw_fence_driver_data *drv_data,
+	u32 tx_client_id, u32 rx_client_id, u32 signal_id);
+
+/**
+ * hw_fence_ipcc_enable_signaling() - Enable ipcc signaling for hw-fence driver.
+ * @drv_data: driver data.
+ *
+ * Return: 0 on success or negative errno (-EINVAL)
+ */
+int hw_fence_ipcc_enable_signaling(struct hw_fence_driver_data *drv_data);
+
+#ifdef HW_DPU_IPCC
+/**
+ * hw_fence_ipcc_enable_dpu_signaling() - Enable ipcc signaling for dpu client.
+ * @drv_data: driver data.
+ *
+ * Return: 0 on success or negative errno (-EINVAL)
+ */
+int hw_fence_ipcc_enable_dpu_signaling(struct hw_fence_driver_data *drv_data);
+#endif /* HW_DPU_IPCC */
+
+/**
+ * hw_fence_ipcc_get_client_id() - Returns the ipc client id that corresponds to the hw fence
+ *		driver client.
+ * @drv_data: driver data.
+ * @client_id: hw fence driver client id.
+ *
+ * The ipc client id returned by this API is used by the hw fence driver when signaling the fence.
+ *
+ * Return: client_id on success or negative errno (-EINVAL)
+ */
+int hw_fence_ipcc_get_client_id(struct hw_fence_driver_data *drv_data, u32 client_id);
+
+/**
+ * hw_fence_ipcc_get_signal_id() - Returns the ipc signal id that corresponds to the hw fence
+ *		driver client.
+ * @drv_data: driver data.
+ * @client_id: hw fence driver client id.
+ *
+ * The ipc signal id returned by this API is used by the hw fence driver when signaling the fence.
+ *
+ * Return: client_id on success or negative errno (-EINVAL)
+ */
+int hw_fence_ipcc_get_signal_id(struct hw_fence_driver_data *drv_data, u32 client_id);
+
+/**
+ * hw_fence_ipcc_needs_rxq_update() - Returns bool to indicate if client uses rx-queue.
+ * @drv_data: driver data.
+ * @client_id: hw fence driver client id.
+ *
+ * Return: true if client needs to update rxq, false otherwise
+ */
+bool hw_fence_ipcc_needs_rxq_update(struct hw_fence_driver_data *drv_data, int client_id);
+
+#endif /* __HW_FENCE_DRV_IPC_H */

+ 386 - 0
hw_fence/include/hw_fence_drv_priv.h

@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __HW_FENCE_DRV_INTERNAL_H
+#define __HW_FENCE_DRV_INTERNAL_H
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/dma-mapping.h>
+#include <linux/soc/qcom/msm_hw_fence.h>
+#include <linux/dma-fence-array.h>
+#include <linux/slab.h>
+
+/* Add define only for platforms that support IPCC in dpu-hw */
+#define HW_DPU_IPCC 1
+
+/* max u64 to indicate invalid fence */
+#define HW_FENCE_INVALID_PARENT_FENCE (~0ULL)
+
+/* hash algorithm constants */
+#define HW_FENCE_HASH_A_MULT	4969 /* a multiplier for Hash algorithm */
+#define HW_FENCE_HASH_C_MULT	907  /* c multiplier for Hash algorithm */
+
+/* number of queues per type (i.e. ctrl or client queues) */
+#define HW_FENCE_CTRL_QUEUES	2 /* Rx and Tx Queues */
+#define HW_FENCE_CLIENT_QUEUES	2 /* Rx and Tx Queues */
+
+/* hfi headers calculation */
+#define HW_FENCE_HFI_TABLE_HEADER_SIZE (sizeof(struct msm_hw_fence_hfi_queue_table_header))
+#define HW_FENCE_HFI_QUEUE_HEADER_SIZE (sizeof(struct msm_hw_fence_hfi_queue_header))
+
+#define HW_FENCE_HFI_CTRL_HEADERS_SIZE (HW_FENCE_HFI_TABLE_HEADER_SIZE + \
+			(HW_FENCE_HFI_QUEUE_HEADER_SIZE * HW_FENCE_CTRL_QUEUES))
+
+#define HW_FENCE_HFI_CLIENT_HEADERS_SIZE (HW_FENCE_HFI_TABLE_HEADER_SIZE + \
+			(HW_FENCE_HFI_QUEUE_HEADER_SIZE * HW_FENCE_CLIENT_QUEUES))
+
+/*
+ * Max Payload size is the bigest size of the message that we can have in the CTRL queue
+ * in this case the max message is calculated like following, using 32-bits elements:
+ * 1 header + 1 msg-type + 1 client_id + 2 hash + 1 error
+ */
+#define HW_FENCE_CTRL_QUEUE_MAX_PAYLOAD_SIZE ((1 + 1 + 1 + 2 + 1) * sizeof(u32))
+
+#define HW_FENCE_CTRL_QUEUE_PAYLOAD HW_FENCE_CTRL_QUEUE_MAX_PAYLOAD_SIZE
+#define HW_FENCE_CLIENT_QUEUE_PAYLOAD (sizeof(struct msm_hw_fence_queue_payload))
+
+/* Locks area for all the clients */
+#define HW_FENCE_MEM_LOCKS_SIZE (sizeof(u64) * (HW_FENCE_CLIENT_MAX - 1))
+
+#define HW_FENCE_TX_QUEUE 1
+#define HW_FENCE_RX_QUEUE 2
+
+/* ClientID for the internal join fence, this is used by the framework when creating a join-fence */
+#define HW_FENCE_JOIN_FENCE_CLIENT_ID (~(u32)0)
+
+/**
+ * msm hw fence flags:
+ * MSM_HW_FENCE_FLAG_SIGNAL - Flag set when the hw-fence is signaled
+ */
+#define MSM_HW_FENCE_FLAG_SIGNAL	BIT(0)
+
+/**
+ * MSM_HW_FENCE_MAX_JOIN_PARENTS:
+ * Maximum number of parents that a fence can have for a join-fence
+ */
+#define MSM_HW_FENCE_MAX_JOIN_PARENTS	3
+
+enum hw_fence_lookup_ops {
+	HW_FENCE_LOOKUP_OP_CREATE = 0x1,
+	HW_FENCE_LOOKUP_OP_DESTROY,
+	HW_FENCE_LOOKUP_OP_CREATE_JOIN,
+	HW_FENCE_LOOKUP_OP_FIND_FENCE
+};
+
+/**
+ * enum hw_fence_loopback_id - Enum with the clients having a loopback signal (i.e AP to AP signal).
+ * HW_FENCE_LOOPBACK_DPU_CTL_0: dpu client 0. Used in platforms with no dpu-ipc.
+ * HW_FENCE_LOOPBACK_DPU_CTL_1: dpu client 1. Used in platforms with no dpu-ipc.
+ * HW_FENCE_LOOPBACK_DPU_CTL_2: dpu client 2. Used in platforms with no dpu-ipc.
+ * HW_FENCE_LOOPBACK_DPU_CTL_3: dpu client 3. Used in platforms with no dpu-ipc.
+ * HW_FENCE_LOOPBACK_DPU_CTL_4: dpu client 4. Used in platforms with no dpu-ipc.
+ * HW_FENCE_LOOPBACK_DPU_CTL_5: dpu client 5. Used in platforms with no dpu-ipc.
+ * HW_FENCE_LOOPBACK_DPU_CTX_0: gfx client 0. Used in platforms with no gmu support.
+ */
+enum hw_fence_loopback_id {
+	HW_FENCE_LOOPBACK_DPU_CTL_0,
+	HW_FENCE_LOOPBACK_DPU_CTL_1,
+	HW_FENCE_LOOPBACK_DPU_CTL_2,
+	HW_FENCE_LOOPBACK_DPU_CTL_3,
+	HW_FENCE_LOOPBACK_DPU_CTL_4,
+	HW_FENCE_LOOPBACK_DPU_CTL_5,
+	HW_FENCE_LOOPBACK_GFX_CTX_0,
+	HW_FENCE_LOOPBACK_MAX,
+};
+
+#define HW_FENCE_MAX_DPU_LOOPBACK_CLIENTS (HW_FENCE_LOOPBACK_DPU_CTL_5 + 1)
+
+/**
+ * struct msm_hw_fence_queue - Structure holding the data of the hw fence queues.
+ * @va_queue: pointer to the virtual address of the queue elements
+ * @q_size_bytes: size of the queue
+ * @va_header: pointer to the hfi header virtual address
+ * @pa_queue: physical address of the queue
+ */
+struct msm_hw_fence_queue {
+	void *va_queue;
+	u32 q_size_bytes;
+	void *va_header;
+	phys_addr_t pa_queue;
+};
+
+/**
+ * struct msm_hw_fence_client - Structure holding the per-Client allocated resources.
+ * @client_id: id of the client
+ * @mem_descriptor: hfi header memory descriptor
+ * @queues: queues descriptor
+ * @ipc_signal_id: id of the signal to be triggered for this client
+ * @ipc_client_id: id of the ipc client for this hw fence driver client
+ * @update_rxq: bool to indicate if client uses rx-queue
+ */
+struct msm_hw_fence_client {
+	enum hw_fence_client_id client_id;
+	struct msm_hw_fence_mem_addr mem_descriptor;
+	struct msm_hw_fence_queue queues[HW_FENCE_CLIENT_QUEUES];
+	int ipc_signal_id;
+	int ipc_client_id;
+	bool update_rxq;
+};
+
+/**
+ * struct msm_hw_fence_mem_data - Structure holding internal memory attributes
+ *
+ * @attrs: attributes for the memory allocation
+ */
+struct msm_hw_fence_mem_data {
+	unsigned long attrs;
+};
+
+/**
+ * struct msm_hw_fence_dbg_data - Structure holding debugfs data
+ *
+ * @root: debugfs root
+ * @entry_rd: flag to indicate if debugfs dumps a single line or table
+ * @context_rd: debugfs setting to indicate which context id to dump
+ * @seqno_rd: debugfs setting to indicate which seqno to dump
+ * @hw_fence_sim_release_delay: delay in micro seconds for the debugfs node that simulates the
+ *                              hw-fences behavior, to release the hw-fences
+ * @create_hw_fences: boolean to continuosly create hw-fences within debugfs
+ * @clients_list: list of debug clients registered
+ * @clients_list_lock: lock to synchronize access to the clients list
+ */
+struct msm_hw_fence_dbg_data {
+	struct dentry *root;
+
+	bool entry_rd;
+	u64 context_rd;
+	u64 seqno_rd;
+
+	u32 hw_fence_sim_release_delay;
+	bool create_hw_fences;
+
+	struct list_head clients_list;
+	struct mutex clients_list_lock;
+};
+
+/**
+ * struct hw_fence_driver_data - Structure holding internal hw-fence driver data
+ *
+ * @dev: device driver pointer
+ * @resources_ready: value set by driver at end of probe, once all resources are ready
+ * @hw_fence_table_entries: total number of hw-fences in the global table
+ * @hw_fence_mem_fences_table_size: hw-fences global table total size
+ * @hw_fence_queue_entries: total number of entries that can be available in the queue
+ * @hw_fence_ctrl_queue_size: size of the ctrl queue for the payload
+ * @hw_fence_mem_ctrl_queues_size: total size of ctrl queues, including: header + rxq + txq
+ * @hw_fence_client_queue_size: size of the client queue for the payload
+ * @hw_fence_mem_clients_queues_size: total size of client queues, including: header + rxq + txq
+ * @hw_fences_tbl: pointer to the hw-fences table
+ * @hw_fences_tbl_cnt: number of elements in the hw-fence table
+ * @client_lock_tbl: pointer to the per-client locks table
+ * @client_lock_tbl_cnt: number of elements in the locks table
+ * @hw_fences_mem_desc: memory descriptor for the hw-fence table
+ * @clients_locks_mem_desc: memory descriptor for the locks table
+ * @ctrl_queue_mem_desc: memory descriptor for the ctrl queues
+ * @ctrl_queues: pointer to the ctrl queues
+ * @io_mem_base: pointer to the carved-out io memory
+ * @res: resources for the carved out memory
+ * @size: size of the carved-out memory
+ * @label: label for the carved-out memory (this is used by SVM to find the memory)
+ * @peer_name: peer name for this carved-out memory
+ * @rm_nb: hyp resource manager notifier
+ * @memparcel: memparcel for the allocated memory
+ * @db_label: doorbell label
+ * @rx_dbl: handle to the Rx doorbell
+ * @debugfs_data: debugfs info
+ * @ipcc_reg_base: base for ipcc regs mapping
+ * @ipcc_io_mem: base for the ipcc io mem map
+ * @ipcc_size: size of the ipcc io mem mapping
+ * @protocol_id: ipcc protocol id used by this driver
+ * @ipcc_client_id: ipcc client id for this driver
+ * @ipc_clients_table: table with the ipcc mapping for each client of this driver
+ * @qtime_reg_base: qtimer register base address
+ * @qtime_io_mem: qtimer io mem map
+ * @qtime_size: qtimer io mem map size
+ * @ctl_start_ptr: pointer to the ctl_start registers of the display hw (platforms with no dpu-ipc)
+ * @ctl_start_size: size of the ctl_start registers of the display hw (platforms with no dpu-ipc)
+ * @client_id_mask: bitmask for tracking registered client_ids
+ * @clients_mask_lock: lock to synchronize access to the clients mask
+ * @msm_hw_fence_client: table with the handles of the registered clients
+ * @ipcc_dpu_initialized: flag to indicate if dpu hw is initialized
+ */
+struct hw_fence_driver_data {
+
+	struct device *dev;
+	bool resources_ready;
+
+	/* Table & Queues info */
+	u32 hw_fence_table_entries;
+	u32 hw_fence_mem_fences_table_size;
+	u32 hw_fence_queue_entries;
+	/* ctrl queues */
+	u32 hw_fence_ctrl_queue_size;
+	u32 hw_fence_mem_ctrl_queues_size;
+	/* client queues */
+	u32 hw_fence_client_queue_size;
+	u32 hw_fence_mem_clients_queues_size;
+
+	/* HW Fences Table VA */
+	struct msm_hw_fence *hw_fences_tbl;
+	u32 hw_fences_tbl_cnt;
+
+	/* Table with a Per-Client Lock */
+	u64 *client_lock_tbl;
+	u32 client_lock_tbl_cnt;
+
+	/* Memory Descriptors */
+	struct msm_hw_fence_mem_addr hw_fences_mem_desc;
+	struct msm_hw_fence_mem_addr clients_locks_mem_desc;
+	struct msm_hw_fence_mem_addr ctrl_queue_mem_desc;
+	struct msm_hw_fence_queue ctrl_queues[HW_FENCE_CTRL_QUEUES];
+
+	/* carved out memory */
+	void __iomem *io_mem_base;
+	struct resource res;
+	size_t size;
+	u32 label;
+	u32 peer_name;
+	struct notifier_block rm_nb;
+	u32 memparcel;
+
+	/* doorbell */
+	u32 db_label;
+
+	/* VM virq */
+	void *rx_dbl;
+
+	/* debugfs */
+	struct msm_hw_fence_dbg_data debugfs_data;
+
+	/* ipcc regs */
+	phys_addr_t ipcc_reg_base;
+	void __iomem *ipcc_io_mem;
+	uint32_t ipcc_size;
+	u32 protocol_id;
+	u32 ipcc_client_id;
+
+	/* table with mapping of ipc client for each hw-fence client */
+	struct hw_fence_client_ipc_map *ipc_clients_table;
+
+	/* qtime reg */
+	phys_addr_t qtime_reg_base;
+	void __iomem *qtime_io_mem;
+	uint32_t qtime_size;
+
+	/* base address for dpu ctl start regs */
+	void *ctl_start_ptr[HW_FENCE_MAX_DPU_LOOPBACK_CLIENTS];
+	uint32_t ctl_start_size[HW_FENCE_MAX_DPU_LOOPBACK_CLIENTS];
+
+	/* bitmask for tracking registered client_ids */
+	u64 client_id_mask;
+	struct mutex clients_mask_lock;
+
+	/* table with registered client handles */
+	struct msm_hw_fence_client *clients[HW_FENCE_CLIENT_MAX];
+#ifdef HW_DPU_IPCC
+	/* state variables */
+	bool ipcc_dpu_initialized;
+#endif /* HW_DPU_IPCC */
+};
+
+/**
+ * struct msm_hw_fence_queue_payload - hardware fence clients queues payload.
+ * @ctxt_id: context id of the dma fence
+ * @seqno: sequence number of the dma fence
+ * @hash: fence hash
+ * @flags: see MSM_HW_FENCE_FLAG_* flags descriptions
+ * @error: error code for this fence, fence controller receives this
+ *		  error from the signaling client through the tx queue and
+ *		  propagates the error to the waiting client through rx queue
+ */
+struct msm_hw_fence_queue_payload {
+	u64 ctxt_id;
+	u64 seqno;
+	u64 hash;
+	u64 flags;
+	u32 error;
+	u32 unused; /* align to 64-bit */
+};
+
+/**
+ * struct msm_hw_fence - structure holding each hw fence data.
+ * @valid: field updated when a hw-fence is reserved. True if hw-fence is in use
+ * @error: field to hold a hw-fence error
+ * @ctx_id: context id
+ * @seq_id: sequence id
+ * @wait_client_mask: bitmask holding the waiting-clients of the fence
+ * @fence_allocator: field to indicate the client_id that reserved the fence
+ * @fence_signal-client:
+ * @lock: this field is required to share information between the Driver & Driver ||
+ *        Driver & FenceCTL. Needs to be 64-bit atomic inter-processor lock.
+ * @flags: field to indicate the state of the fence
+ * @parent_list: list of indexes with the parents for a child-fence in a join-fence
+ * @parent_cnt: total number of parents for a child-fence in a join-fence
+ * @pending_child_cnt: children refcount for a parent-fence in a join-fence. Access must be atomic
+ *        or locked
+ * @fence_create_time: debug info with the create time timestamp
+ * @fence_trigger_time: debug info with the trigger time timestamp
+ * @fence_wait_time: debug info with the register-for-wait timestamp
+ * @debug_refcount: refcount used for debugging
+ */
+struct msm_hw_fence {
+	u32 valid;
+	u32 error;
+	u64 ctx_id;
+	u64 seq_id;
+	u64 wait_client_mask;
+	u32 fence_allocator;
+	u32 fence_signal_client;
+	u64 lock;	/* Datatype must be 64-bit. */
+	u64 flags;
+	u64 parent_list[MSM_HW_FENCE_MAX_JOIN_PARENTS];
+	u32 parents_cnt;
+	u32 pending_child_cnt;
+	u64 fence_create_time;
+	u64 fence_trigger_time;
+	u64 fence_wait_time;
+	u64 debug_refcount;
+};
+
+int hw_fence_init(struct hw_fence_driver_data *drv_data);
+int hw_fence_alloc_client_resources(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client,
+	struct msm_hw_fence_mem_addr *mem_descriptor);
+int hw_fence_init_controller_signal(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client);
+int hw_fence_init_controller_resources(struct msm_hw_fence_client *hw_fence_client);
+void hw_fence_cleanup_client(struct hw_fence_driver_data *drv_data,
+	 struct msm_hw_fence_client *hw_fence_client);
+int hw_fence_create(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client,
+	u64 context, u64 seqno, u64 *hash);
+int hw_fence_destroy(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client,
+	u64 context, u64 seqno);
+int hw_fence_process_fence_array(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client,
+	struct dma_fence_array *array);
+int hw_fence_process_fence(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client, struct dma_fence *fence);
+int hw_fence_update_queue(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client, u64 ctxt_id, u64 seqno, u64 hash,
+	u64 flags, u32 error, int queue_type);
+inline u64 hw_fence_get_qtime(struct hw_fence_driver_data *drv_data);
+int hw_fence_read_queue(struct msm_hw_fence_client *hw_fence_client,
+	struct msm_hw_fence_queue_payload *payload, int queue_type);
+int hw_fence_register_wait_client(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client, u64 context, u64 seqno);
+struct msm_hw_fence *msm_hw_fence_find(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client,
+	u64 context, u64 seqno, u64 *hash);
+
+#endif /* __HW_FENCE_DRV_INTERNAL_H */

+ 113 - 0
hw_fence/include/hw_fence_drv_utils.h

@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __HW_FENCE_DRV_UTILS_H
+#define __HW_FENCE_DRV_UTILS_H
+
+/**
+ * enum hw_fence_mem_reserve - Types of reservations for the carved-out memory.
+ * HW_FENCE_MEM_RESERVE_CTRL_QUEUE: Reserve memory for the ctrl rx/tx queues.
+ * HW_FENCE_MEM_RESERVE_LOCKS_REGION: Reserve memory for the per-client locks memory region.
+ * HW_FENCE_MEM_RESERVE_TABLE: Reserve memory for the hw-fences global table.
+ * HW_FENCE_MEM_RESERVE_CLIENT_QUEUE: Reserve memory per-client for the rx/tx queues.
+ */
+enum hw_fence_mem_reserve {
+	HW_FENCE_MEM_RESERVE_CTRL_QUEUE,
+	HW_FENCE_MEM_RESERVE_LOCKS_REGION,
+	HW_FENCE_MEM_RESERVE_TABLE,
+	HW_FENCE_MEM_RESERVE_CLIENT_QUEUE
+};
+
+/**
+ * global_atomic_store() - Inter-processor lock
+ * @lock: memory to lock
+ * @val: if true, api locks the memory, if false it unlocks the memory
+ */
+void global_atomic_store(uint64_t *lock, bool val);
+
+/**
+ * hw_fence_utils_init_virq() - Initialilze doorbell (i.e. vIRQ) for SVM to HLOS signaling
+ * @drv_data: hw fence driver data
+ *
+ * Returns zero if success, otherwise returns negative error code.
+ */
+int hw_fence_utils_init_virq(struct hw_fence_driver_data *drv_data);
+
+/**
+ * hw_fence_utils_process_doorbell_mask() - Sends doorbell mask to process the signaled clients
+ *                                          this API is only exported for simulation purposes.
+ * @drv_data: hw fence driver data.
+ * @db_flags: doorbell flag
+ */
+void hw_fence_utils_process_doorbell_mask(struct hw_fence_driver_data *drv_data, u64 db_flags);
+
+/**
+ * hw_fence_utils_alloc_mem() - Allocates the carved-out memory pool that will be used for the HW
+ *                              Fence global table, locks and queues.
+ * @hw_fence_drv_data: hw fence driver data
+ *
+ * Returns zero if success, otherwise returns negative error code.
+ */
+int hw_fence_utils_alloc_mem(struct hw_fence_driver_data *hw_fence_drv_data);
+
+/**
+ * hw_fence_utils_reserve_mem() - Reserves memory from the carved-out memory pool.
+ * @drv_data: hw fence driver data.
+ * @type: memory reservation type.
+ * @phys: physical address of the carved-out memory pool
+ *
+ * Returns zero if success, otherwise returns negative error code.
+ */
+int hw_fence_utils_reserve_mem(struct hw_fence_driver_data *drv_data,
+	enum hw_fence_mem_reserve type, phys_addr_t *phys, void **pa, u32 *size, int client_id);
+
+/**
+ * hw_fence_utils_parse_dt_props() -  Init dt properties
+ * @drv_data: hw fence driver data
+ *
+ * Returns zero if success, otherwise returns negative error code.
+ */
+int hw_fence_utils_parse_dt_props(struct hw_fence_driver_data *drv_data);
+
+/**
+ * hw_fence_utils_map_ipcc() -  Maps IPCC registers and enable signaling
+ * @drv_data: hw fence driver data
+ *
+ * Returns zero if success, otherwise returns negative error code.
+ */
+int hw_fence_utils_map_ipcc(struct hw_fence_driver_data *drv_data);
+
+/**
+ * hw_fence_utils_map_qtime() -  Maps qtime register
+ * @drv_data: hw fence driver data
+ *
+ * Returns zero if success, otherwise returns negative error code.
+ */
+int hw_fence_utils_map_qtime(struct hw_fence_driver_data *drv_data);
+
+/**
+ * hw_fence_utils_map_ctl_start() -  Maps ctl_start registers from dpu hw
+ * @drv_data: hw fence driver data
+ *
+ * Returns zero if success, otherwise returns negative error code. This API is only used
+ * for simulation purposes in platforms where dpu does not support ipc signal.
+ */
+int hw_fence_utils_map_ctl_start(struct hw_fence_driver_data *drv_data);
+
+/**
+ * hw_fence_utils_cleanup_fence() -  Cleanup the hw-fence from a specified client
+ * @drv_data: hw fence driver data
+ * @hw_fence_client: client, for which the fence must be cleared
+ * @hw_fence: hw-fence to cleanup
+ * @hash: hash of the hw-fence to cleanup
+ * @reset_flags: flags to determine how to handle the reset
+ *
+ * Returns zero if success, otherwise returns negative error code.
+ */
+int hw_fence_utils_cleanup_fence(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client, struct msm_hw_fence *hw_fence, u64 hash,
+	u32 reset_flags);
+
+#endif /* __HW_FENCE_DRV_UTILS_H */

+ 1000 - 0
hw_fence/src/hw_fence_drv_debug.c

@@ -0,0 +1,1000 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/iopoll.h>
+
+#include "hw_fence_drv_priv.h"
+#include "hw_fence_drv_debug.h"
+#include "hw_fence_drv_ipc.h"
+#include "hw_fence_drv_utils.h"
+
+#define HW_FENCE_NAME_SIZE 64
+#define HW_FENCE_DEBUG_MAX_LOOPS 200
+
+u32 msm_hw_fence_debug_level = HW_FENCE_PRINTK;
+
+/**
+ * struct client_data - Structure holding the data of the debug clients.
+ *
+ * @client_id: client id.
+ * @dma_context: context id to create the dma-fences for the client.
+ * @seqno_cnt: sequence number, this is a counter to simulate the seqno for debugging.
+ * @client_handle: handle for the client, this is returned by the hw-fence driver after
+ *                 a successful registration of the client.
+ * @mem_descriptor: memory descriptor for the client-queues. This is populated by the hw-fence
+ *                 driver after a successful registration of the client.
+ * @list: client node.
+ */
+struct client_data {
+	int client_id;
+	u64 dma_context;
+	u64 seqno_cnt;
+	void *client_handle;
+	struct msm_hw_fence_mem_addr mem_descriptor;
+	struct list_head list;
+};
+
+/**
+ * struct hw_dma_fence - fences created by hw-fence for debugging.
+ * @base: base dma-fence structure, this must remain at beginning of the struct.
+ * @name: name of each fence.
+ * @client_handle: handle for the client owner of this fence, this is returned by the hw-fence
+ *                 driver after a successful registration of the client and used by this fence
+ *                 during release.
+ */
+struct hw_dma_fence {
+	struct dma_fence base;
+	char name[HW_FENCE_NAME_SIZE];
+	void *client_handle;
+};
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+static int _get_debugfs_input_client(struct file *file,
+	const char __user *user_buf, size_t count, loff_t *ppos,
+	struct hw_fence_driver_data **drv_data)
+{
+	char buf[10];
+	int client_id;
+
+	if (!file || !file->private_data) {
+		HWFNC_ERR("unexpected data %d\n", !file);
+		return -EINVAL;
+	}
+	*drv_data = file->private_data;
+
+	if (count >= sizeof(buf))
+		return -EFAULT;
+
+	if (copy_from_user(buf, user_buf, count))
+		return -EFAULT;
+
+	buf[count] = 0; /* end of string */
+
+	if (kstrtouint(buf, 0, &client_id))
+		return -EFAULT;
+
+	if (client_id < HW_FENCE_CLIENT_ID_CTX0 || client_id >= HW_FENCE_CLIENT_MAX) {
+		HWFNC_ERR("invalid client_id:%d min:%d max:%d\n", client_id,
+			HW_FENCE_CLIENT_ID_CTX0, HW_FENCE_CLIENT_MAX);
+		return -EINVAL;
+	}
+
+	return client_id;
+}
+
+static int _debugfs_ipcc_trigger(struct file *file, const char __user *user_buf,
+	size_t count, loff_t *ppos, u32 tx_client, u32 rx_client)
+{
+	struct hw_fence_driver_data *drv_data;
+	int client_id, signal_id;
+
+	client_id = _get_debugfs_input_client(file, user_buf, count, ppos, &drv_data);
+	if (client_id < 0)
+		return -EINVAL;
+
+	/* Get signal-id that hw-fence driver would trigger for this client */
+	signal_id = hw_fence_ipcc_get_signal_id(drv_data, client_id);
+	if (signal_id < 0)
+		return -EINVAL;
+
+	HWFNC_DBG_IRQ("client_id:%d ipcc write tx_client:%d rx_client:%d signal_id:%d qtime:%llu\n",
+		client_id, tx_client, rx_client, signal_id, hw_fence_get_qtime(drv_data));
+	hw_fence_ipcc_trigger_signal(drv_data, tx_client, rx_client, signal_id);
+
+	return count;
+}
+
+/**
+ * hw_fence_dbg_ipcc_write() - debugfs write to trigger an ipcc irq.
+ * @file: file handler.
+ * @user_buf: user buffer content from debugfs.
+ * @count: size of the user buffer.
+ * @ppos: position offset of the user buffer.
+ *
+ * This debugfs receives as parameter a hw-fence driver client_id, and triggers an ipcc signal
+ * from apps to apps for that client id.
+ */
+static ssize_t hw_fence_dbg_ipcc_write(struct file *file, const char __user *user_buf,
+	size_t count, loff_t *ppos)
+{
+	return _debugfs_ipcc_trigger(file, user_buf, count, ppos, HW_FENCE_IPC_CLIENT_ID_APPS,
+		HW_FENCE_IPC_CLIENT_ID_APPS);
+}
+
+#ifdef HW_DPU_IPCC
+/**
+ * hw_fence_dbg_ipcc_dpu_write() - debugfs write to trigger an ipcc irq to dpu core.
+ * @file: file handler.
+ * @user_buf: user buffer content from debugfs.
+ * @count: size of the user buffer.
+ * @ppos: position offset of the user buffer.
+ *
+ * This debugfs receives as parameter a hw-fence driver client_id, and triggers an ipcc signal
+ * from apps to dpu for that client id.
+ */
+static ssize_t hw_fence_dbg_ipcc_dpu_write(struct file *file, const char __user *user_buf,
+	size_t count, loff_t *ppos)
+{
+	return _debugfs_ipcc_trigger(file, user_buf, count, ppos, HW_FENCE_IPC_CLIENT_ID_APPS,
+		HW_FENCE_IPC_CLIENT_ID_DPU);
+
+}
+
+static const struct file_operations hw_fence_dbg_ipcc_dpu_fops = {
+	.open = simple_open,
+	.write = hw_fence_dbg_ipcc_dpu_write,
+};
+#endif /* HW_DPU_IPCC */
+
+static const struct file_operations hw_fence_dbg_ipcc_fops = {
+	.open = simple_open,
+	.write = hw_fence_dbg_ipcc_write,
+};
+
+static inline struct hw_dma_fence *to_hw_dma_fence(struct dma_fence *fence)
+{
+	return container_of(fence, struct hw_dma_fence, base);
+}
+
+static const char *hw_fence_dbg_get_driver_name(struct dma_fence *fence)
+{
+	struct hw_dma_fence *hw_dma_fence = to_hw_dma_fence(fence);
+
+	return hw_dma_fence->name;
+}
+
+static const char *hw_fence_dbg_get_timeline_name(struct dma_fence *fence)
+{
+	struct hw_dma_fence *hw_dma_fence = to_hw_dma_fence(fence);
+
+	return hw_dma_fence->name;
+}
+
+static bool hw_fence_dbg_enable_signaling(struct dma_fence *fence)
+{
+	return true;
+}
+
+static void _hw_fence_release(struct hw_dma_fence *hw_dma_fence)
+{
+	if (IS_ERR_OR_NULL(hw_dma_fence->client_handle)) {
+		HWFNC_ERR("invalid hwfence data, won't release hw_fence\n");
+		return;
+	}
+
+	/* release hw-fence */
+	if (msm_hw_fence_destroy(hw_dma_fence->client_handle, &hw_dma_fence->base))
+		HWFNC_ERR("failed to release hw_fence\n");
+}
+
+static void hw_fence_dbg_release(struct dma_fence *fence)
+{
+	struct hw_dma_fence *hw_dma_fence;
+
+	if (!fence)
+		return;
+
+	HWFNC_DBG_H("release backing fence %pK\n", fence);
+	hw_dma_fence = to_hw_dma_fence(fence);
+
+	if (test_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fence->flags))
+		_hw_fence_release(hw_dma_fence);
+
+	kfree(fence->lock);
+	kfree(hw_dma_fence);
+}
+
+static struct dma_fence_ops hw_fence_dbg_ops = {
+	.get_driver_name = hw_fence_dbg_get_driver_name,
+	.get_timeline_name = hw_fence_dbg_get_timeline_name,
+	.enable_signaling = hw_fence_dbg_enable_signaling,
+	.wait = dma_fence_default_wait,
+	.release = hw_fence_dbg_release,
+};
+
+struct client_data *_get_client_node(struct hw_fence_driver_data *drv_data, u32 client_id)
+{
+	struct client_data *node = NULL;
+	bool found = false;
+
+	mutex_lock(&drv_data->debugfs_data.clients_list_lock);
+	list_for_each_entry(node, &drv_data->debugfs_data.clients_list, list) {
+		if (node->client_id == client_id) {
+			found = true;
+			break;
+		}
+	}
+	mutex_unlock(&drv_data->debugfs_data.clients_list_lock);
+
+	return found ? node : NULL;
+}
+
+/**
+ * hw_fence_dbg_reset_client_wr() - debugfs write to trigger reset in a debug hw-fence client.
+ * @file: file handler.
+ * @user_buf: user buffer content from debugfs.
+ * @count: size of the user buffer.
+ * @ppos: position offset of the user buffer.
+ *
+ * This debugfs receives as parameter a hw-fence driver client_id, and triggers a reset for
+ * this client. Note that this operation will only perform on hw-fence clients created through
+ * the debug framework.
+ */
+static ssize_t hw_fence_dbg_reset_client_wr(struct file *file,
+	const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	int client_id, ret;
+	struct client_data *client_info;
+	struct hw_fence_driver_data *drv_data;
+
+	client_id = _get_debugfs_input_client(file, user_buf, count, ppos, &drv_data);
+	if (client_id < 0)
+		return -EINVAL;
+
+	client_info = _get_client_node(drv_data, client_id);
+	if (!client_info || IS_ERR_OR_NULL(client_info->client_handle)) {
+		HWFNC_ERR("client:%d not registered as debug client\n", client_id);
+		return -EINVAL;
+	}
+
+	HWFNC_DBG_H("resetting client: %d\n", client_id);
+	ret = msm_hw_fence_reset_client(client_info->client_handle, 0);
+	if (ret)
+		HWFNC_ERR("failed to reset client:%d\n", client_id);
+
+	return count;
+}
+
+/**
+ * hw_fence_dbg_register_clients_wr() - debugfs write to register a client with the hw-fence
+ *                                      driver for debugging.
+ * @file: file handler.
+ * @user_buf: user buffer content from debugfs.
+ * @count: size of the user buffer.
+ * @ppos: position offset of the user buffer.
+ *
+ * This debugfs receives as parameter a hw-fence driver client_id to register for debug.
+ * Note that if the client_id received was already registered by any other driver, the
+ * registration here will fail.
+ */
+static ssize_t hw_fence_dbg_register_clients_wr(struct file *file,
+		const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	int client_id;
+	struct client_data *client_info;
+	struct hw_fence_driver_data *drv_data;
+
+	client_id = _get_debugfs_input_client(file, user_buf, count, ppos, &drv_data);
+	if (client_id < 0)
+		return -EINVAL;
+
+	/* we cannot create same debug client twice */
+	if (_get_client_node(drv_data, client_id)) {
+		HWFNC_ERR("client:%d already registered as debug client\n", client_id);
+		return -EINVAL;
+	}
+
+	client_info = kzalloc(sizeof(*client_info), GFP_KERNEL);
+	if (!client_info)
+		return -ENOMEM;
+
+	HWFNC_DBG_H("register client %d\n", client_id);
+	client_info->client_handle = msm_hw_fence_register(client_id,
+		&client_info->mem_descriptor);
+	if (IS_ERR_OR_NULL(client_info->client_handle)) {
+		HWFNC_ERR("error registering as debug client:%d\n", client_id);
+		client_info->client_handle = NULL;
+		return -EFAULT;
+	}
+
+	client_info->dma_context = dma_fence_context_alloc(1);
+	client_info->client_id = client_id;
+
+	mutex_lock(&drv_data->debugfs_data.clients_list_lock);
+	list_add(&client_info->list, &drv_data->debugfs_data.clients_list);
+	mutex_unlock(&drv_data->debugfs_data.clients_list_lock);
+
+	return count;
+}
+
+struct hw_fence_out_clients_map {
+	int ipc_client_id; /* ipc client id for the hw fence client */
+	int ipc_signal_id; /* ipc signal id for the hw fence client */
+};
+
+/* NOTE: These signals are the ones that the actual clients should be triggering, hw-fence driver
+ * does not need to have knowledge of these signals. Adding them here for debugging purposes.
+ * Only fence controller and the cliens know these id's, since these
+ * are to trigger the ipcc from the 'client hw-core' to the 'hw-fence controller'
+ *
+ * Note that the index of this struct must match the enum hw_fence_client_id
+ */
+struct hw_fence_out_clients_map dbg_out_clients_signal_map_no_dpu[HW_FENCE_CLIENT_MAX] = {
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 0},  /* CTRL_LOOPBACK */
+	{HW_FENCE_IPC_CLIENT_ID_GPU, 0},  /* CTX0 */
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 2},  /* CTL0 */
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 4},  /* CTL1 */
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 6},  /* CTL2 */
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 8},  /* CTL3 */
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 10}, /* CTL4 */
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 12} /* CTL5 */
+};
+
+/**
+ * hw_fence_dbg_tx_and_signal_clients_wr() - debugfs write to simulate the lifecycle of a hw-fence.
+ * @file: file handler.
+ * @user_buf: user buffer content from debugfs.
+ * @count: size of the user buffer.
+ * @ppos: position offset of the user buffer.
+ *
+ * This debugfs receives as parameter the number of iterations that the simulation will run,
+ * each iteration will: create, signal, register-for-signal and destroy a hw-fence.
+ * Note that this simulation relies in the user first registering the clients as debug-clients
+ * through the debugfs 'hw_fence_dbg_register_clients_wr'. If the clients are not previously
+ * registered as debug-clients, this simulation will fail and won't run.
+ */
+static ssize_t hw_fence_dbg_tx_and_signal_clients_wr(struct file *file,
+		const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	u32 input_data, client_id_src, client_id_dst, tx_client, rx_client;
+	struct client_data *client_info_src, *client_info_dst;
+	struct hw_fence_driver_data *drv_data;
+	struct msm_hw_fence_client *hw_fence_client, *hw_fence_client_dst;
+	u64 context, seqno, hash;
+	char buf[10];
+	int signal_id, ret;
+
+	if (!file || !file->private_data) {
+		HWFNC_ERR("unexpected data %d\n", file);
+		return -EINVAL;
+	}
+	drv_data = file->private_data;
+
+	if (count >= sizeof(buf))
+		return -EFAULT;
+
+	if (copy_from_user(buf, user_buf, count))
+		return -EFAULT;
+
+	buf[count] = 0; /* end of string */
+
+	if (kstrtouint(buf, 0, &input_data))
+		return -EFAULT;
+
+	if (input_data <= 0) {
+		HWFNC_ERR("won't do anything, write value greather than 0 to start..\n");
+		return 0;
+	} else if (input_data > HW_FENCE_DEBUG_MAX_LOOPS) {
+		HWFNC_ERR("requested loops:%d exceed max:%d, setting max\n", input_data,
+			HW_FENCE_DEBUG_MAX_LOOPS);
+		input_data = HW_FENCE_DEBUG_MAX_LOOPS;
+	}
+
+	client_id_src = HW_FENCE_CLIENT_ID_CTL0;
+	client_id_dst = HW_FENCE_CLIENT_ID_CTL1;
+
+	client_info_src = _get_client_node(drv_data, client_id_src);
+	client_info_dst = _get_client_node(drv_data, client_id_dst);
+
+	if (!client_info_src || IS_ERR_OR_NULL(client_info_src->client_handle) ||
+			!client_info_dst || IS_ERR_OR_NULL(client_info_dst->client_handle)) {
+		/* Make sure we registered this client through debugfs */
+		HWFNC_ERR("client_id_src:%d or client_id_dst:%d not registered as debug client!\n",
+			client_id_src, client_id_dst);
+		return -EINVAL;
+	}
+
+	hw_fence_client = (struct msm_hw_fence_client *)client_info_src->client_handle;
+	hw_fence_client_dst = (struct msm_hw_fence_client *)client_info_dst->client_handle;
+
+	while (drv_data->debugfs_data.create_hw_fences && input_data > 0) {
+
+		/***********************************************************/
+		/***** SRC CLIENT - CREATE HW FENCE & TX QUEUE UPDATE ******/
+		/***********************************************************/
+
+		/* we will use the context and the seqno of the source client */
+		context = client_info_src->dma_context;
+		seqno = client_info_src->seqno_cnt;
+
+		/* linear increment of the seqno for the src client*/
+		client_info_src->seqno_cnt++;
+
+		/* Create hw fence for src client */
+		ret = hw_fence_create(drv_data, hw_fence_client, context, seqno, &hash);
+		if (ret) {
+			HWFNC_ERR("Error creating HW fence\n");
+			goto exit;
+		}
+
+		/* Write to Tx queue */
+		hw_fence_update_queue(drv_data, hw_fence_client, context, seqno, hash,
+			0, 0, HW_FENCE_TX_QUEUE - 1); // no flags and no error
+
+		/**********************************************/
+		/***** DST CLIENT - REGISTER WAIT CLIENT ******/
+		/**********************************************/
+		/* use same context and seqno that src client used to create fence */
+		ret = hw_fence_register_wait_client(drv_data, hw_fence_client_dst, context, seqno);
+		if (ret) {
+			HWFNC_ERR("failed to register for wait\n");
+			return -EINVAL;
+		}
+
+		/*********************************************/
+		/***** SRC CLIENT - TRIGGER IPCC SIGNAL ******/
+		/*********************************************/
+
+		/* AFTER THIS IS WHEN SVM WILL GET CALLED AND WILL PROCESS SRC AND DST CLIENTS */
+
+		/* Trigger IPCC for SVM to read the queue */
+
+		/* Get signal-id that hw-fence driver would trigger for this client */
+		signal_id = dbg_out_clients_signal_map_no_dpu[client_id_src].ipc_signal_id;
+		if (signal_id < 0)
+			return -EINVAL;
+
+		/*  Write to ipcc to trigger the irq */
+		tx_client = HW_FENCE_IPC_CLIENT_ID_APPS;
+		rx_client = HW_FENCE_IPC_CLIENT_ID_APPS;
+		HWFNC_DBG_IRQ("client:%d tx_client:%d rx_client:%d signal:%d delay:%d in_data%d\n",
+			client_id_src, tx_client, rx_client, signal_id,
+			drv_data->debugfs_data.hw_fence_sim_release_delay, input_data);
+
+		hw_fence_ipcc_trigger_signal(drv_data, tx_client, rx_client, signal_id);
+
+		/********************************************/
+		/******** WAIT ******************************/
+		/********************************************/
+
+		/* wait between iterations */
+		usleep_range(drv_data->debugfs_data.hw_fence_sim_release_delay,
+			(drv_data->debugfs_data.hw_fence_sim_release_delay + 5));
+
+		/******************************************/
+		/***** SRC CLIENT - CLEANUP HW FENCE ******/
+		/******************************************/
+
+		/* cleanup hw fence for src client */
+		ret = hw_fence_destroy(drv_data, hw_fence_client, context, seqno);
+		if (ret) {
+			HWFNC_ERR("Error destroying HW fence\n");
+			goto exit;
+		}
+
+		input_data--;
+	} /* LOOP.. */
+
+exit:
+	return count;
+}
+
+/**
+ * hw_fence_dbg_create_wr() - debugfs write to simulate the creation of a hw-fence.
+ * @file: file handler.
+ * @user_buf: user buffer content from debugfs.
+ * @count: size of the user buffer.
+ * @ppos: position offset of the user buffer.
+ *
+ * This debugfs receives as parameter the client-id, for which the hw-fence will be created.
+ * Note that this simulation relies in the user first registering the client as a debug-client
+ * through the debugfs 'hw_fence_dbg_register_clients_wr'. If the client is not previously
+ * registered as debug-client, this simulation will fail and won't run.
+ */
+static ssize_t hw_fence_dbg_create_wr(struct file *file,
+		const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	struct msm_hw_fence_create_params params;
+	struct hw_fence_driver_data *drv_data;
+	struct client_data *client_info;
+	struct hw_dma_fence *dma_fence;
+	spinlock_t *fence_lock;
+	static u64 hw_fence_dbg_seqno = 1;
+	int client_id, ret;
+	u64 hash;
+
+	client_id = _get_debugfs_input_client(file, user_buf, count, ppos, &drv_data);
+	if (client_id < 0)
+		return -EINVAL;
+
+	client_info = _get_client_node(drv_data, client_id);
+	if (!client_info || IS_ERR_OR_NULL(client_info->client_handle)) {
+		HWFNC_ERR("client:%d not registered as debug client\n", client_id);
+		return -EINVAL;
+	}
+
+	/* create debug dma_fence */
+	fence_lock = kzalloc(sizeof(*fence_lock), GFP_KERNEL);
+	if (!fence_lock)
+		return -ENOMEM;
+
+	dma_fence = kzalloc(sizeof(*dma_fence), GFP_KERNEL);
+	if (!dma_fence) {
+		kfree(fence_lock);
+		return -ENOMEM;
+	}
+
+	snprintf(dma_fence->name, HW_FENCE_NAME_SIZE, "hwfence:id:%d:ctx=%lu:seqno:%lu",
+		client_id, client_info->dma_context, hw_fence_dbg_seqno);
+
+	spin_lock_init(fence_lock);
+	dma_fence_init(&dma_fence->base, &hw_fence_dbg_ops, fence_lock,
+		client_info->dma_context, hw_fence_dbg_seqno);
+
+	HWFNC_DBG_H("creating hw_fence for client:%d ctx:%llu seqno:%llu\n", client_id,
+		client_info->dma_context, hw_fence_dbg_seqno);
+	params.fence = &dma_fence->base;
+	params.handle = &hash;
+	ret = msm_hw_fence_create(client_info->client_handle, &params);
+	if (ret) {
+		HWFNC_ERR("failed to create hw_fence for client:%d ctx:%llu seqno:%llu\n",
+			client_id, client_info->dma_context, hw_fence_dbg_seqno);
+		dma_fence_put(&dma_fence->base);
+		return -EINVAL;
+	}
+	hw_fence_dbg_seqno++;
+
+	/* keep handle in dma_fence, to destroy hw-fence during release */
+	dma_fence->client_handle = client_info->client_handle;
+
+	return count;
+}
+
+#define HFENCE_TBL_MSG \
+	"[%d]hfence[%d] v:%d err:%d ctx:%d seqno:%d wait:0x%llx alloc:%d f:0x%lx tt:%llu wt:%llu\n"
+
+static inline int _dump_fence(struct msm_hw_fence *hw_fence, char *buf, int len, int max_size,
+		u32 index, u32 cnt)
+{
+	int ret;
+
+	ret = scnprintf(buf + len, max_size - len, HFENCE_TBL_MSG,
+		cnt, index, hw_fence->valid, hw_fence->error,
+		hw_fence->ctx_id, hw_fence->seq_id,
+		hw_fence->wait_client_mask, hw_fence->fence_allocator,
+		hw_fence->flags, hw_fence->fence_trigger_time, hw_fence->fence_wait_time);
+
+	HWFNC_DBG_L(HFENCE_TBL_MSG,
+		cnt, index, hw_fence->valid, hw_fence->error,
+		hw_fence->ctx_id, hw_fence->seq_id,
+		hw_fence->wait_client_mask, hw_fence->fence_allocator,
+		hw_fence->flags, hw_fence->fence_trigger_time, hw_fence->fence_wait_time);
+
+	return ret;
+}
+
+static int dump_single_entry(struct hw_fence_driver_data *drv_data, char *buf, u32 *index,
+	int max_size)
+{
+	struct msm_hw_fence *hw_fence;
+	u64 context, seqno, hash = 0;
+	int len = 0;
+
+	context = drv_data->debugfs_data.context_rd;
+	seqno = drv_data->debugfs_data.seqno_rd;
+
+	hw_fence = msm_hw_fence_find(drv_data, NULL, context, seqno, &hash);
+	if (!hw_fence) {
+		HWFNC_ERR("no valid hfence found for context:%lu seqno:%lu", context, seqno, hash);
+		len = scnprintf(buf + len, max_size - len,
+			"no valid hfence found for context:%lu seqno:%lu hash:%lu\n",
+			context, seqno, hash);
+
+		goto exit;
+	}
+
+	len = _dump_fence(hw_fence, buf, len, max_size, hash, 0);
+
+exit:
+	/* move idx to end of table to stop the dump */
+	*index = drv_data->hw_fences_tbl_cnt;
+
+	return len;
+}
+
+static int dump_full_table(struct hw_fence_driver_data *drv_data, char *buf, u32 *index,
+	u32 *cnt, int max_size, int entry_size)
+{
+	struct msm_hw_fence *hw_fence;
+	int len = 0;
+
+	while (((*index)++ < drv_data->hw_fences_tbl_cnt) && (len < (max_size - entry_size))) {
+		hw_fence = &drv_data->hw_fences_tbl[*index];
+
+		if (!hw_fence->valid)
+			continue;
+
+		len += _dump_fence(hw_fence, buf, len, max_size, *index, *cnt);
+		(*cnt)++;
+	}
+
+	return len;
+}
+
+/**
+ * hw_fence_dbg_dump_table_rd() - debugfs read to dump the hw-fences table.
+ * @file: file handler.
+ * @user_buf: user buffer content for debugfs.
+ * @user_buf_size: size of the user buffer.
+ * @ppos: position offset of the user buffer.
+ *
+ * This debugfs dumps the hw-fence table. By default debugfs will dump all the valid entries of the
+ * whole table. However, if user only wants to dump only one particular entry, user can provide the
+ * context-id and seqno of the dma-fence of interest by writing to this debugfs node (see
+ * documentation for the write in 'hw_fence_dbg_dump_table_wr').
+ */
+static ssize_t hw_fence_dbg_dump_table_rd(struct file *file, char __user *user_buf,
+	size_t user_buf_size, loff_t *ppos)
+{
+	struct hw_fence_driver_data *drv_data;
+	int entry_size = sizeof(struct msm_hw_fence);
+	char *buf = NULL;
+	int len = 0, max_size = SZ_4K;
+	static u32 index, cnt;
+
+	if (!file || !file->private_data) {
+		HWFNC_ERR("unexpected data %d\n", file);
+		return -EINVAL;
+	}
+	drv_data = file->private_data;
+
+	if (!drv_data->hw_fences_tbl) {
+		HWFNC_ERR("Failed to dump table: Null fence table\n");
+		return -EINVAL;
+	}
+
+	if (index >= drv_data->hw_fences_tbl_cnt) {
+		HWFNC_DBG_H("no more data index:%d cnt:%d\n", index, drv_data->hw_fences_tbl_cnt);
+		index = cnt = 0;
+		return 0;
+	}
+
+	if (user_buf_size < entry_size) {
+		HWFNC_ERR("Not enough buff size:%d to dump entries:%d\n", user_buf_size,
+			entry_size);
+		return -EINVAL;
+	}
+
+	buf = kzalloc(max_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	len = drv_data->debugfs_data.entry_rd ?
+		dump_single_entry(drv_data, buf, &index, max_size) :
+		dump_full_table(drv_data, buf, &index, &cnt, max_size, entry_size);
+
+	if (len <= 0 || len > user_buf_size) {
+		HWFNC_ERR("len:%d invalid buff size:%d\n", len, user_buf_size);
+		len = 0;
+		goto exit;
+	}
+
+	if (copy_to_user(user_buf, buf, len)) {
+		HWFNC_ERR("failed to copy to user!\n");
+		len = -EFAULT;
+		goto exit;
+	}
+	*ppos += len;
+exit:
+	kfree(buf);
+	return len;
+}
+
+/**
+ * hw_fence_dbg_dump_table_wr() - debugfs write to control the dump of the hw-fences table.
+ * @file: file handler.
+ * @user_buf: user buffer content from debugfs.
+ * @user_buf_size: size of the user buffer.
+ * @ppos: position offset of the user buffer.
+ *
+ * This debugfs receives as parameters the settings to dump either the whole hw-fences table
+ * or only one element on the table in the next read of the same debugfs node.
+ * If this debugfs receives two input values, it will interpret them as the 'context-id' and the
+ * 'sequence-id' to dump from the hw-fence table in the subsequent reads of the debugfs.
+ * Otherwise, if the debugfs receives only one input value, the next read from the debugfs, will
+ * dump the whole hw-fences table.
+ */
+static ssize_t hw_fence_dbg_dump_table_wr(struct file *file,
+		const char __user *user_buf, size_t user_buf_size, loff_t *ppos)
+{
+	struct hw_fence_driver_data *drv_data;
+	u64 param_0, param_1;
+	char buf[24];
+	int num_input_params;
+
+	if (!file || !file->private_data) {
+		HWFNC_ERR("unexpected data %d\n", file);
+		return -EINVAL;
+	}
+	drv_data = file->private_data;
+
+	if (user_buf_size >= sizeof(buf)) {
+		HWFNC_ERR("wrong size:%d size:%d\n", user_buf_size, sizeof(buf));
+		return -EFAULT;
+	}
+
+	if (copy_from_user(buf, user_buf, user_buf_size))
+		return -EFAULT;
+
+	buf[user_buf_size] = 0; /* end of string */
+
+	/* read the input params */
+	num_input_params = sscanf(buf, "%lu %lu", &param_0, &param_1);
+
+	if (num_input_params == 2) { /* if debugfs receives two input params */
+		drv_data->debugfs_data.context_rd = param_0;
+		drv_data->debugfs_data.seqno_rd = param_1;
+		drv_data->debugfs_data.entry_rd = true;
+	} else if (num_input_params == 1) { /* if debugfs receives one param */
+		drv_data->debugfs_data.context_rd = 0;
+		drv_data->debugfs_data.seqno_rd = 0;
+		drv_data->debugfs_data.entry_rd = false;
+	} else {
+		HWFNC_ERR("invalid num params:%d\n", num_input_params);
+		return -EFAULT;
+	}
+
+	return user_buf_size;
+}
+
+static void _cleanup_fences(int i, struct dma_fence **fences, spinlock_t **fences_lock)
+{
+	struct hw_dma_fence *dma_fence;
+	int idx;
+
+	for (idx = i; idx >= 0 ; idx--) {
+		kfree(fences_lock[idx]);
+
+		dma_fence = to_hw_dma_fence(fences[idx]);
+		kfree(dma_fence);
+	}
+
+	kfree(fences_lock);
+	kfree(fences);
+}
+
+/**
+ * hw_fence_dbg_create_join_fence() - debugfs write to simulate the lifecycle of a join hw-fence.
+ * @file: file handler.
+ * @user_buf: user buffer content from debugfs.
+ * @count: size of the user buffer.
+ * @ppos: position offset of the user buffer.
+ *
+ * This debugfs will: create, signal, register-for-signal and destroy a join hw-fence.
+ * Note that this simulation relies in the user first registering the clients as debug-clients
+ * through the debugfs 'hw_fence_dbg_register_clients_wr'. If the clients are not previously
+ * registered as debug-clients, this simulation will fail and won't run.
+ */
+static ssize_t hw_fence_dbg_create_join_fence(struct file *file,
+			const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	struct dma_fence_array *fence_array;
+	struct hw_fence_driver_data *drv_data;
+	struct dma_fence *fence_array_fence;
+	struct client_data *client_info_src, *client_info_dst;
+	u64 hw_fence_dbg_seqno = 1;
+	int client_id_src, client_id_dst;
+	struct msm_hw_fence_create_params params;
+	int i, ret = 0;
+	u64 hash;
+	struct msm_hw_fence_client *hw_fence_client;
+	int tx_client, rx_client, signal_id;
+
+	/* creates 3 fences and a parent fence */
+	int num_fences = 3;
+	struct dma_fence **fences = NULL;
+	spinlock_t **fences_lock = NULL;
+
+	if (!file || !file->private_data) {
+		HWFNC_ERR("unexpected data %d\n", file);
+		return -EINVAL;
+	}
+	drv_data = file->private_data;
+	client_id_src = HW_FENCE_CLIENT_ID_CTL0;
+	client_id_dst = HW_FENCE_CLIENT_ID_CTL1;
+	client_info_src = _get_client_node(drv_data, client_id_src);
+	client_info_dst = _get_client_node(drv_data, client_id_dst);
+	if (!client_info_src || IS_ERR_OR_NULL(client_info_src->client_handle) ||
+			!client_info_dst || IS_ERR_OR_NULL(client_info_dst->client_handle)) {
+		HWFNC_ERR("client_src:%d or client:%d is not register as debug client\n",
+			client_id_src, client_id_dst);
+		return -EINVAL;
+	}
+	hw_fence_client = (struct msm_hw_fence_client *)client_info_src->client_handle;
+
+	fences_lock = kcalloc(num_fences, sizeof(*fences_lock), GFP_KERNEL);
+	if (!fences_lock)
+		return -ENOMEM;
+
+	fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL);
+	if (!fences) {
+		kfree(fences_lock);
+		return -ENOMEM;
+	}
+
+	/* Create the array of dma fences */
+	for (i = 0; i < num_fences; i++) {
+		struct hw_dma_fence *dma_fence;
+
+		fences_lock[i] = kzalloc(sizeof(*fences_lock), GFP_KERNEL);
+		if (!fences_lock[i]) {
+			_cleanup_fences(i, fences, fences_lock);
+			return -ENOMEM;
+		}
+
+		dma_fence = kzalloc(sizeof(*dma_fence), GFP_KERNEL);
+		if (!dma_fence) {
+			_cleanup_fences(i, fences, fences_lock);
+			return -ENOMEM;
+		}
+		fences[i] = &dma_fence->base;
+
+		spin_lock_init(fences_lock[i]);
+		dma_fence_init(fences[i], &hw_fence_dbg_ops, fences_lock[i],
+			client_info_src->dma_context, hw_fence_dbg_seqno + i);
+	}
+
+	/* create the fence array from array of dma fences */
+	fence_array = dma_fence_array_create(num_fences, fences,
+				client_info_src->dma_context, hw_fence_dbg_seqno + num_fences, 0);
+	if (!fence_array) {
+		HWFNC_ERR("Error creating fence_array\n");
+		_cleanup_fences(num_fences - 1, fences, fences_lock);
+		return -EINVAL;
+	}
+
+	/* create hw fence and write to tx queue for each dma fence */
+	for (i = 0; i < num_fences; i++) {
+		params.fence = fences[i];
+		params.handle = &hash;
+
+		ret = msm_hw_fence_create(client_info_src->client_handle, &params);
+		if (ret) {
+			HWFNC_ERR("Error creating HW fence\n");
+			count = -EINVAL;
+			goto error;
+		}
+
+		/* Write to Tx queue */
+		hw_fence_update_queue(drv_data, hw_fence_client, client_info_src->dma_context,
+			hw_fence_dbg_seqno + i, hash, 0, 0,
+			HW_FENCE_TX_QUEUE - 1);
+	}
+
+	/* wait on the fence array */
+	fence_array_fence = &fence_array->base;
+	msm_hw_fence_wait_update(client_info_dst->client_handle, &fence_array_fence, 1, 1);
+
+	signal_id = dbg_out_clients_signal_map_no_dpu[client_id_src].ipc_signal_id;
+	if (signal_id < 0) {
+		count = -EINVAL;
+		goto error;
+	}
+
+	/* write to ipcc to trigger the irq */
+	tx_client = HW_FENCE_IPC_CLIENT_ID_APPS;
+	rx_client = HW_FENCE_IPC_CLIENT_ID_APPS;
+	hw_fence_ipcc_trigger_signal(drv_data, tx_client, rx_client, signal_id);
+
+	usleep_range(drv_data->debugfs_data.hw_fence_sim_release_delay,
+		(drv_data->debugfs_data.hw_fence_sim_release_delay + 5));
+
+error:
+	/* this frees the memory for the fence-array and each dma-fence */
+	dma_fence_put(&fence_array->base);
+
+	/*
+	 * free array of pointers, no need to call kfree in 'fences', since that is released
+	 * from the fence-array release api
+	 */
+	kfree(fences_lock);
+
+	return count;
+}
+
+static const struct file_operations hw_fence_reset_client_fops = {
+	.open = simple_open,
+	.write = hw_fence_dbg_reset_client_wr,
+};
+
+static const struct file_operations hw_fence_register_clients_fops = {
+	.open = simple_open,
+	.write = hw_fence_dbg_register_clients_wr,
+};
+
+static const struct file_operations hw_fence_tx_and_signal_clients_fops = {
+	.open = simple_open,
+	.write = hw_fence_dbg_tx_and_signal_clients_wr,
+};
+
+static const struct file_operations hw_fence_create_fops = {
+	.open = simple_open,
+	.write = hw_fence_dbg_create_wr,
+};
+
+static const struct file_operations hw_fence_dump_table_fops = {
+	.open = simple_open,
+	.write = hw_fence_dbg_dump_table_wr,
+	.read = hw_fence_dbg_dump_table_rd,
+};
+
+static const struct file_operations hw_fence_create_join_fence_fops = {
+	.open = simple_open,
+	.write = hw_fence_dbg_create_join_fence,
+};
+
+int hw_fence_debug_debugfs_register(struct hw_fence_driver_data *drv_data)
+{
+	struct dentry *debugfs_root;
+
+	debugfs_root = debugfs_create_dir("hw_fence", NULL);
+	if (IS_ERR_OR_NULL(debugfs_root)) {
+		HWFNC_ERR("debugfs_root create_dir fail, error %ld\n",
+			PTR_ERR(debugfs_root));
+		drv_data->debugfs_data.root = NULL;
+		return -EINVAL;
+	}
+
+	mutex_init(&drv_data->debugfs_data.clients_list_lock);
+	INIT_LIST_HEAD(&drv_data->debugfs_data.clients_list);
+	drv_data->debugfs_data.root = debugfs_root;
+	drv_data->debugfs_data.create_hw_fences = true;
+	drv_data->debugfs_data.hw_fence_sim_release_delay = 8333; /* uS */
+
+	debugfs_create_file("ipc_trigger", 0600, debugfs_root, drv_data,
+		&hw_fence_dbg_ipcc_fops);
+#ifdef HW_DPU_IPCC
+	debugfs_create_file("dpu_trigger", 0600, debugfs_root, drv_data,
+		&hw_fence_dbg_ipcc_dpu_fops);
+#endif /* HW_DPU_IPCC */
+	debugfs_create_file("hw_fence_reset_client", 0600, debugfs_root, drv_data,
+		&hw_fence_reset_client_fops);
+	debugfs_create_file("hw_fence_register_clients", 0600, debugfs_root, drv_data,
+		&hw_fence_register_clients_fops);
+	debugfs_create_file("hw_fence_tx_and_signal", 0600, debugfs_root, drv_data,
+		&hw_fence_tx_and_signal_clients_fops);
+	debugfs_create_file("hw_fence_create_join_fence", 0600, debugfs_root, drv_data,
+		&hw_fence_create_join_fence_fops);
+	debugfs_create_bool("create_hw_fences", 0600, debugfs_root,
+		&drv_data->debugfs_data.create_hw_fences);
+	debugfs_create_u32("sleep_range_us", 0600, debugfs_root,
+		&drv_data->debugfs_data.hw_fence_sim_release_delay);
+	debugfs_create_file("hw_fence_create", 0600, debugfs_root, drv_data,
+		&hw_fence_create_fops);
+	debugfs_create_u32("hw_fence_debug_level", 0600, debugfs_root, &msm_hw_fence_debug_level);
+	debugfs_create_file("hw_fence_dump_table", 0600, debugfs_root, drv_data,
+		&hw_fence_dump_table_fops);
+
+	return 0;
+}
+
+#else
+int hw_fence_debug_debugfs_register(struct hw_fence_driver_data *drv_data)
+{
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */

+ 247 - 0
hw_fence/src/hw_fence_drv_ipc.c

@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "hw_fence_drv_priv.h"
+#include "hw_fence_drv_utils.h"
+#include "hw_fence_drv_ipc.h"
+#include "hw_fence_drv_debug.h"
+
+/**
+ * struct hw_fence_client_ipc_map - map client id with ipc signal for trigger.
+ * @ipc_client_id: ipc client id for the hw-fence client.
+ * @ipc_signal_id: ipc signal id for the hw-fence client.
+ * @update_rxq: bool to indicate if clinet uses rx-queue.
+ */
+struct hw_fence_client_ipc_map {
+	int ipc_client_id;
+	int ipc_signal_id;
+	bool update_rxq;
+};
+
+/**
+ * struct hw_fence_clients_ipc_map_no_dpu - Table makes the 'client to signal' mapping, which
+ *		is used by the hw fence driver to trigger ipc signal when the hw fence is already
+ *		signaled.
+ *		This no_dpu version is for targets that do not support dpu client id
+ *
+ * Notes:
+ * The index of this struct must match the enum hw_fence_client_id.
+ * To change to a loopback signal instead of GMU, change ctx0 row to use:
+ *   {HW_FENCE_IPC_CLIENT_ID_APPS, 20}.
+ */
+struct hw_fence_client_ipc_map hw_fence_clients_ipc_map_no_dpu[HW_FENCE_CLIENT_MAX] = {
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 1, true}, /* ctrl queue loopback */
+	{HW_FENCE_IPC_CLIENT_ID_GPU,  0, true}, /* ctx0 */
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 14, false}, /* ctl0 */
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 15, false}, /* ctl1 */
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 16, false}, /* ctl2 */
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 17, false}, /* ctl3 */
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 18, false}, /* ctl4 */
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 19, false}, /* ctl5 */
+};
+
+/**
+ * struct hw_fence_clients_ipc_map - Table makes the 'client to signal' mapping, which is
+ *		used by the hw fence driver to trigger ipc signal when hw fence is already
+ *		signaled.
+ *		This version is for targets that support dpu client id.
+ *
+ * Note that the index of this struct must match the enum hw_fence_client_id
+ */
+struct hw_fence_client_ipc_map hw_fence_clients_ipc_map[HW_FENCE_CLIENT_MAX] = {
+	{HW_FENCE_IPC_CLIENT_ID_APPS, 1, true}, /* ctrl queue loopback */
+	{HW_FENCE_IPC_CLIENT_ID_GPU,  0, true}, /* ctx0 */
+	{HW_FENCE_IPC_CLIENT_ID_DPU,  0, false}, /* ctl0 */
+	{HW_FENCE_IPC_CLIENT_ID_DPU,  1, false}, /* ctl1 */
+	{HW_FENCE_IPC_CLIENT_ID_DPU,  2, false}, /* ctl2 */
+	{HW_FENCE_IPC_CLIENT_ID_DPU,  3, false}, /* ctl3 */
+	{HW_FENCE_IPC_CLIENT_ID_DPU,  4, false}, /* ctl4 */
+	{HW_FENCE_IPC_CLIENT_ID_DPU,  5, false}, /* ctl5 */
+};
+
+int hw_fence_ipcc_get_client_id(struct hw_fence_driver_data *drv_data, u32 client_id)
+{
+	if (!drv_data || client_id >= HW_FENCE_CLIENT_MAX)
+		return -EINVAL;
+
+	return drv_data->ipc_clients_table[client_id].ipc_client_id;
+}
+
+int hw_fence_ipcc_get_signal_id(struct hw_fence_driver_data *drv_data, u32 client_id)
+{
+	if (!drv_data || client_id >= HW_FENCE_CLIENT_MAX)
+		return -EINVAL;
+
+	return drv_data->ipc_clients_table[client_id].ipc_signal_id;
+}
+
+bool hw_fence_ipcc_needs_rxq_update(struct hw_fence_driver_data *drv_data, int client_id)
+{
+	if (!drv_data || client_id >= HW_FENCE_CLIENT_MAX)
+		return -EINVAL;
+
+	return drv_data->ipc_clients_table[client_id].update_rxq;
+}
+
+/**
+ * _get_ipc_client_name() - Returns ipc client name, used for debugging.
+ */
+static inline char *_get_ipc_client_name(u32 client_id)
+{
+	switch (client_id) {
+	case HW_FENCE_IPC_CLIENT_ID_APPS:
+		return "APPS";
+	case HW_FENCE_IPC_CLIENT_ID_GPU:
+		return "GPU";
+	case HW_FENCE_IPC_CLIENT_ID_DPU:
+		return "DPU";
+	}
+
+	return "UNKNOWN";
+}
+
+void hw_fence_ipcc_trigger_signal(struct hw_fence_driver_data *drv_data,
+	u32 tx_client_id, u32 rx_client_id, u32 signal_id)
+{
+	void __iomem *ptr;
+	u32 val;
+
+	/* Send signal */
+	ptr = IPC_PROTOCOLp_CLIENTc_SEND(drv_data->ipcc_io_mem, drv_data->protocol_id,
+		tx_client_id);
+	val = (rx_client_id << 16) | signal_id;
+
+	HWFNC_DBG_IRQ("Sending ipcc from %s (%d) to %s (%d) signal_id:%d [wr:0x%x to off:0x%pK]\n",
+		_get_ipc_client_name(tx_client_id), tx_client_id,
+		_get_ipc_client_name(rx_client_id), rx_client_id,
+		signal_id, val, ptr);
+	HWFNC_DBG_H("Write:0x%x to RegOffset:0x%pK\n", val, ptr);
+	writel_relaxed(val, ptr);
+
+	/* Make sure value is written */
+	wmb();
+}
+
+/**
+ * _hw_fence_ipcc_hwrev_init() - Initializes internal driver struct with corresponding ipcc data,
+ *		according to the ipcc hw revision.
+ * @drv_data: driver data.
+ * @hwrev: ipcc hw revision.
+ */
+static int _hw_fence_ipcc_hwrev_init(struct hw_fence_driver_data *drv_data, u32 hwrev)
+{
+	switch (hwrev) {
+	case HW_FENCE_IPCC_HW_REV_100:
+		drv_data->ipcc_client_id = HW_FENCE_IPC_CLIENT_ID_APPS;
+		drv_data->protocol_id = HW_FENCE_IPC_COMPUTE_L1_PROTOCOL_ID_LAHAINA;
+		drv_data->ipc_clients_table = hw_fence_clients_ipc_map_no_dpu;
+		HWFNC_DBG_INIT("ipcc protocol_id: Lahaina\n");
+		break;
+	case HW_FENCE_IPCC_HW_REV_110:
+		drv_data->ipcc_client_id = HW_FENCE_IPC_CLIENT_ID_APPS;
+		drv_data->protocol_id = HW_FENCE_IPC_COMPUTE_L1_PROTOCOL_ID_WAIPIO;
+		drv_data->ipc_clients_table = hw_fence_clients_ipc_map_no_dpu;
+		HWFNC_DBG_INIT("ipcc protocol_id: Waipio\n");
+		break;
+	case HW_FENCE_IPCC_HW_REV_170:
+		drv_data->ipcc_client_id = HW_FENCE_IPC_CLIENT_ID_APPS;
+		drv_data->protocol_id = HW_FENCE_IPC_COMPUTE_L1_PROTOCOL_ID_KAILUA;
+		drv_data->ipc_clients_table = hw_fence_clients_ipc_map;
+		HWFNC_DBG_INIT("ipcc protocol_id: Kailua\n");
+		break;
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+int hw_fence_ipcc_enable_signaling(struct hw_fence_driver_data *drv_data)
+{
+	void __iomem *ptr;
+	u32 val;
+
+	HWFNC_DBG_H("enable ipc +\n");
+
+	/* Read IPC Version from Client=0x8 (apps) for protocol=2 (compute_l1) */
+	val = readl_relaxed(IPC_PROTOCOLp_CLIENTc_VERSION(drv_data->ipcc_io_mem,
+		HW_FENCE_IPC_COMPUTE_L1_PROTOCOL_ID_LAHAINA, HW_FENCE_IPC_CLIENT_ID_APPS));
+	HWFNC_DBG_INIT("ipcc version:0x%x\n", val);
+
+	if (_hw_fence_ipcc_hwrev_init(drv_data, val)) {
+		HWFNC_ERR("ipcc protocol id not supported\n");
+		return -EINVAL;
+	}
+
+	/* Enable compute l1 (protocol_id = 2) */
+	val = 0x00000000;
+	ptr = IPC_PROTOCOLp_CLIENTc_CONFIG(drv_data->ipcc_io_mem, drv_data->protocol_id,
+		HW_FENCE_IPC_CLIENT_ID_APPS);
+	HWFNC_DBG_H("Write:0x%x to RegOffset:0x%pK\n", val, ptr);
+	writel_relaxed(val, ptr);
+
+	/* Enable Client-Signal pairs from APPS(NS) (0x8) to APPS(NS) (0x8) */
+	val = 0x000080000;
+	ptr = IPC_PROTOCOLp_CLIENTc_RECV_SIGNAL_ENABLE(drv_data->ipcc_io_mem, drv_data->protocol_id,
+		HW_FENCE_IPC_CLIENT_ID_APPS);
+	HWFNC_DBG_H("Write:0x%x to RegOffset:0x%pK\n", val, ptr);
+	writel_relaxed(val, ptr);
+
+	HWFNC_DBG_H("enable ipc -\n");
+
+	return 0;
+}
+
+#ifdef HW_DPU_IPCC
+int hw_fence_ipcc_enable_dpu_signaling(struct hw_fence_driver_data *drv_data)
+{
+	struct hw_fence_client_ipc_map *hw_fence_client;
+	void __iomem *ptr;
+	u32 val;
+	int i;
+
+	HWFNC_DBG_H("enable dpu ipc +\n");
+
+	if (!drv_data || !drv_data->protocol_id || !drv_data->ipc_clients_table) {
+		HWFNC_ERR("invalid drv data\n");
+		return -1;
+	}
+
+	HWFNC_DBG_H("ipcc_io_mem:0x%lx\n", (u64)drv_data->ipcc_io_mem);
+
+	/*
+	 * Enable compute l1 (protocol_id = 2) for dpu (25)
+	 * Sets bit(1) to clear when RECV_ID is read
+	 */
+	val = 0x00000001;
+	ptr = IPC_PROTOCOLp_CLIENTc_CONFIG(drv_data->ipcc_io_mem, drv_data->protocol_id,
+		HW_FENCE_IPC_CLIENT_ID_DPU);
+	HWFNC_DBG_H("Write:0x%x to RegOffset:0x%lx\n", val, (u64)ptr);
+	writel_relaxed(val, ptr);
+
+	HWFNC_DBG_H("Initialize dpu signals\n");
+	/* Enable Client-Signal pairs from DPU (25) to APPS(NS) (8) */
+	for (i = 0; i < HW_FENCE_CLIENT_MAX; i++) {
+		hw_fence_client = &drv_data->ipc_clients_table[i];
+
+		/* skip any client that is not a dpu client */
+		if (hw_fence_client->ipc_client_id != HW_FENCE_IPC_CLIENT_ID_DPU)
+			continue;
+
+		/* Enable signals for dpu client */
+		HWFNC_DBG_H("dpu:%d client:%d signal:%d\n", hw_fence_client->ipc_client_id, i,
+			hw_fence_client->ipc_signal_id);
+		val = 0x000080000 | (hw_fence_client->ipc_signal_id & 0xFFFF);
+		ptr = IPC_PROTOCOLp_CLIENTc_RECV_SIGNAL_ENABLE(drv_data->ipcc_io_mem,
+			drv_data->protocol_id, HW_FENCE_IPC_CLIENT_ID_DPU);
+		HWFNC_DBG_H("Write:0x%x to RegOffset:0x%lx\n", val, (u64)ptr);
+		writel_relaxed(val, ptr);
+	}
+
+	HWFNC_DBG_H("enable dpu ipc -\n");
+
+	return 0;
+}
+#endif /* HW_DPU_IPCC */

+ 1317 - 0
hw_fence/src/hw_fence_drv_priv.c

@@ -0,0 +1,1317 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+
+#include "hw_fence_drv_priv.h"
+#include "hw_fence_drv_utils.h"
+#include "hw_fence_drv_ipc.h"
+#include "hw_fence_drv_debug.h"
+
+/* Global atomic lock */
+#define GLOBAL_ATOMIC_STORE(lock, val) global_atomic_store(lock, val)
+
+inline u64 hw_fence_get_qtime(struct hw_fence_driver_data *drv_data)
+{
+	return readl_relaxed(drv_data->qtime_io_mem);
+}
+
+static int init_hw_fences_queues(struct hw_fence_driver_data *drv_data,
+	enum hw_fence_mem_reserve mem_reserve_id,
+	struct msm_hw_fence_mem_addr *mem_descriptor,
+	struct msm_hw_fence_queue *queues, int queues_num,
+	int client_id)
+{
+	struct msm_hw_fence_hfi_queue_table_header *hfi_table_header;
+	struct msm_hw_fence_hfi_queue_header *hfi_queue_header;
+	void *ptr, *qptr;
+	phys_addr_t phys, qphys;
+	u32 size, start_queue_offset;
+	int headers_size, queue_size;
+	int i, ret = 0;
+
+	HWFNC_DBG_INIT("mem_reserve_id:%d client_id:%d\n", mem_reserve_id, client_id);
+	switch (mem_reserve_id) {
+	case HW_FENCE_MEM_RESERVE_CTRL_QUEUE:
+		headers_size = HW_FENCE_HFI_CTRL_HEADERS_SIZE;
+		queue_size = drv_data->hw_fence_ctrl_queue_size;
+		break;
+	case HW_FENCE_MEM_RESERVE_CLIENT_QUEUE:
+		headers_size = HW_FENCE_HFI_CLIENT_HEADERS_SIZE;
+		queue_size = drv_data->hw_fence_client_queue_size;
+		break;
+	default:
+		HWFNC_ERR("Unexpected mem reserve id: %d\n", mem_reserve_id);
+		return -EINVAL;
+	}
+
+	/* Reserve Virtual and Physical memory for HFI headers */
+	ret = hw_fence_utils_reserve_mem(drv_data, mem_reserve_id, &phys, &ptr, &size, client_id);
+	if (ret) {
+		HWFNC_ERR("Failed to reserve id:%d client %d\n", mem_reserve_id, client_id);
+		return -ENOMEM;
+	}
+	HWFNC_DBG_INIT("phys:0x%x ptr:0x%pK size:%d\n", phys, ptr, size);
+
+	/* Populate Memory descriptor with address */
+	mem_descriptor->virtual_addr = ptr;
+	mem_descriptor->device_addr = phys;
+	mem_descriptor->size = size; /* bytes */
+	mem_descriptor->mem_data = NULL; /* Currently we don't need any special info */
+
+	HWFNC_DBG_INIT("Initialize headers\n");
+	/* Initialize headers info within hfi memory */
+	hfi_table_header = (struct msm_hw_fence_hfi_queue_table_header *)ptr;
+	hfi_table_header->version = 0;
+	hfi_table_header->size = size; /* bytes */
+	/* Offset, from the Base Address, where the first queue header starts */
+	hfi_table_header->qhdr0_offset =
+		sizeof(struct msm_hw_fence_hfi_queue_table_header);
+	hfi_table_header->qhdr_size =
+		sizeof(struct msm_hw_fence_hfi_queue_header);
+	hfi_table_header->num_q = queues_num; /* number of queues */
+	hfi_table_header->num_active_q = queues_num;
+
+	/* Initialize Queues Info within HFI memory */
+
+	/*
+	 * Calculate offset where hfi queue header starts, which it is at the
+	 * end of the hfi table header
+	 */
+	HWFNC_DBG_INIT("Initialize queues\n");
+	hfi_queue_header = (struct msm_hw_fence_hfi_queue_header *)
+					   ((char *)ptr + HW_FENCE_HFI_TABLE_HEADER_SIZE);
+	for (i = 0; i < queues_num; i++) {
+		HWFNC_DBG_INIT("init queue[%d]\n", i);
+
+		/* Calculate the offset where the Queue starts */
+		start_queue_offset = headers_size + (i * queue_size); /* Bytes */
+		qphys = phys + start_queue_offset; /* start of the PA for the queue elems */
+		qptr = (char *)ptr + start_queue_offset; /* start of the va for queue elems */
+
+		/* Set the physical start address in the HFI queue header */
+		hfi_queue_header->start_addr = qphys;
+
+		/* Set the queue type (i.e. RX or TX queue) */
+		hfi_queue_header->type = (i == 0) ? HW_FENCE_TX_QUEUE : HW_FENCE_RX_QUEUE;
+
+		/* Set the size of this header */
+		hfi_queue_header->queue_size = queue_size;
+
+		/* Store Memory info in the Client data */
+		queues[i].va_queue = qptr;
+		queues[i].pa_queue = qphys;
+		queues[i].va_header = hfi_queue_header;
+		queues[i].q_size_bytes = queue_size;
+		HWFNC_DBG_INIT("init:%s client:%d queue[%d]: va=0x%pK pa=0x%x va_hd:0x%pK sz:%d\n",
+			hfi_queue_header->type == HW_FENCE_TX_QUEUE ? "TX_QUEUE" : "RX_QUEUE",
+			client_id, i, queues[i].va_queue, queues[i].pa_queue, queues[i].va_header,
+			queues[i].q_size_bytes);
+
+		/* Next header */
+		hfi_queue_header++;
+	}
+
+	return ret;
+}
+
+static inline _lock_client_queue(int queue_type)
+{
+	/* Only lock Rx Queue */
+	return (queue_type == (HW_FENCE_RX_QUEUE - 1)) ? true : false;
+}
+
+char *_get_queue_type(int queue_type)
+{
+	return (queue_type == (HW_FENCE_RX_QUEUE - 1)) ? "RXQ" : "TXQ";
+}
+
+int hw_fence_read_queue(struct msm_hw_fence_client *hw_fence_client,
+		 struct msm_hw_fence_queue_payload *payload, int queue_type)
+{
+	struct msm_hw_fence_hfi_queue_header *hfi_header;
+	struct msm_hw_fence_queue *queue;
+	u32 read_idx;
+	u32 write_idx;
+	u32 to_read_idx;
+	u32 *read_ptr;
+	u32 payload_size_u32;
+	u32 q_size_u32;
+	struct msm_hw_fence_queue_payload *read_ptr_payload;
+
+	if (queue_type >= HW_FENCE_CLIENT_QUEUES || !hw_fence_client || !payload) {
+		HWFNC_ERR("Invalid queue type:%s hw_fence_client:0x%pK payload:0x%pK\n", queue_type,
+			hw_fence_client, payload);
+		return -EINVAL;
+	}
+
+	queue = &hw_fence_client->queues[queue_type];
+	hfi_header = queue->va_header;
+
+	q_size_u32 = (queue->q_size_bytes / sizeof(u32));
+	payload_size_u32 = (sizeof(struct msm_hw_fence_queue_payload) / sizeof(u32));
+	HWFNC_DBG_Q("sizeof payload:%d\n", sizeof(struct msm_hw_fence_queue_payload));
+
+	if (!hfi_header || !payload) {
+		HWFNC_ERR("Invalid queue\n");
+		return -EINVAL;
+	}
+
+	/* Get read and write index */
+	read_idx = readl_relaxed(&hfi_header->read_index);
+	write_idx = readl_relaxed(&hfi_header->write_index);
+
+	/* Make sure we read the values */
+	rmb();
+
+	HWFNC_DBG_Q("read client:%d rd_ptr:0x%pK wr_ptr:0x%pK rd_idx:%d wr_idx:%d queue:0x%pK\n",
+		hw_fence_client->client_id, &hfi_header->read_index, &hfi_header->write_index,
+		read_idx, write_idx, queue);
+
+	if (read_idx == write_idx) {
+		HWFNC_DBG_Q("Nothing to read!\n");
+		return 0;
+	}
+
+	/* Move the pointer where we need to read and cast it */
+	read_ptr = ((u32 *)queue->va_queue + read_idx);
+	read_ptr_payload = (struct msm_hw_fence_queue_payload *)read_ptr;
+	HWFNC_DBG_Q("read_ptr:0x%pK queue: va=0x%pK pa=0x%pK read_ptr_payload:0x%pK\n", read_ptr,
+		queue->va_queue, queue->pa_queue, read_ptr_payload);
+
+	/* Calculate the index after the read */
+	to_read_idx = read_idx + payload_size_u32;
+
+	/*
+	 * wrap-around case, here we are reading the last element of the queue, therefore set
+	 * to_read_idx, which is the index after the read, to the beginning of the
+	 * queue
+	 */
+	if (to_read_idx >= q_size_u32)
+		to_read_idx = 0;
+
+	/* Read the Client Queue */
+	payload->ctxt_id = readq_relaxed(&read_ptr_payload->ctxt_id);
+	payload->seqno = readq_relaxed(&read_ptr_payload->seqno);
+	payload->hash = readq_relaxed(&read_ptr_payload->hash);
+	payload->flags = readq_relaxed(&read_ptr_payload->flags);
+	payload->error = readl_relaxed(&read_ptr_payload->error);
+
+	/* update the read index */
+	writel_relaxed(to_read_idx, &hfi_header->read_index);
+
+	/* update memory for the index */
+	wmb();
+
+	/* Return one if queue still has contents after read */
+	return to_read_idx == write_idx ? 0 : 1;
+}
+
+/*
+ * This function writes to the queue of the client. The 'queue_type' determines
+ * if this function is writing to the rx or tx queue
+ */
+int hw_fence_update_queue(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client, u64 ctxt_id, u64 seqno, u64 hash,
+	u64 flags, u32 error, int queue_type)
+{
+	struct msm_hw_fence_hfi_queue_header *hfi_header;
+	struct msm_hw_fence_queue *queue;
+	u32 read_idx;
+	u32 write_idx;
+	u32 to_write_idx;
+	u32 q_size_u32;
+	u32 q_free_u32;
+	u32 *q_payload_write_ptr;
+	u32 payload_size_u32;
+	struct msm_hw_fence_queue_payload *write_ptr_payload;
+	bool lock_client = false;
+	u32 lock_idx;
+	int ret = 0;
+
+	if (queue_type >= HW_FENCE_CLIENT_QUEUES) {
+		HWFNC_ERR("Invalid queue type:%s\n", queue_type);
+		return -EINVAL;
+	}
+
+	queue = &hw_fence_client->queues[queue_type];
+	hfi_header = queue->va_header;
+
+	q_size_u32 = (queue->q_size_bytes / sizeof(u32));
+	payload_size_u32 = (sizeof(struct msm_hw_fence_queue_payload) / sizeof(u32));
+
+	if (!hfi_header) {
+		HWFNC_ERR("Invalid queue\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * We need to lock the client if there is an Rx Queue update, since that
+	 * is the only time when HW Fence driver can have a race condition updating
+	 * the Rx Queue, which also could be getting updated by the Fence CTL
+	 */
+	lock_client = _lock_client_queue(queue_type);
+	if (lock_client) {
+		lock_idx = hw_fence_client->client_id - 1;
+
+		if (lock_idx >= drv_data->client_lock_tbl_cnt) {
+			HWFNC_ERR("lock for client id:%d exceed max:%d\n",
+				hw_fence_client->client_id, drv_data->client_lock_tbl_cnt);
+			return -EINVAL;
+		}
+		HWFNC_DBG_Q("Locking client id:%d: idx:%d\n", hw_fence_client->client_id, lock_idx);
+
+		/* lock the client rx queue to update */
+		GLOBAL_ATOMIC_STORE(&drv_data->client_lock_tbl[lock_idx], 1); /* lock */
+	}
+
+	/* Get read and write index */
+	read_idx = readl_relaxed(&hfi_header->read_index);
+	write_idx = readl_relaxed(&hfi_header->write_index);
+
+	/* Make sure we read the values */
+	rmb();
+
+	HWFNC_DBG_Q("wr client:%d rd_ptr:0x%pK wr_ptr:0x%pK rd_idx:%d wr_idx:%d q:0x%pK type:%d\n",
+		hw_fence_client->client_id, &hfi_header->read_index, &hfi_header->write_index,
+		read_idx, write_idx, queue, queue_type);
+
+	/* Check queue to make sure message will fit */
+	q_free_u32 = read_idx <= write_idx ? (q_size_u32 - (write_idx - read_idx)) :
+		(read_idx - write_idx);
+	if (q_free_u32 <= payload_size_u32) {
+		HWFNC_ERR("cannot fit the message size:%d\n", payload_size_u32);
+		ret = -EINVAL;
+		goto exit;
+	}
+	HWFNC_DBG_Q("q_free_u32:%d payload_size_u32:%d\n", q_free_u32, payload_size_u32);
+
+	/* Move the pointer where we need to write and cast it */
+	q_payload_write_ptr = ((u32 *)queue->va_queue + write_idx);
+	write_ptr_payload = (struct msm_hw_fence_queue_payload *)q_payload_write_ptr;
+	HWFNC_DBG_Q("q_payload_write_ptr:0x%pK queue: va=0x%pK pa=0x%pK write_ptr_payload:0x%pK\n",
+		q_payload_write_ptr, queue->va_queue, queue->pa_queue, write_ptr_payload);
+
+	/* calculate the index after the write */
+	to_write_idx = write_idx + payload_size_u32;
+
+	HWFNC_DBG_Q("to_write_idx:%d write_idx:%d payload_size\n", to_write_idx, write_idx,
+		payload_size_u32);
+	HWFNC_DBG_L("client_id:%d update %s hash:%llu ctx_id:%llu seqno:%llu flags:%llu error:%u\n",
+		hw_fence_client->client_id, _get_queue_type(queue_type),
+		hash, ctxt_id, seqno, flags, error);
+
+	/*
+	 * wrap-around case, here we are writing to the last element of the queue, therefore
+	 * set to_write_idx, which is the index after the write, to the beginning of the
+	 * queue
+	 */
+	if (to_write_idx >= q_size_u32)
+		to_write_idx = 0;
+
+	/* Update Client Queue */
+	writeq_relaxed(ctxt_id, &write_ptr_payload->ctxt_id);
+	writeq_relaxed(seqno, &write_ptr_payload->seqno);
+	writeq_relaxed(hash, &write_ptr_payload->hash);
+	writeq_relaxed(flags, &write_ptr_payload->flags);
+	writel_relaxed(error, &write_ptr_payload->error);
+
+	/* update memory for the message */
+	wmb();
+
+	/* update the write index */
+	writel_relaxed(to_write_idx, &hfi_header->write_index);
+
+	/* update memory for the index */
+	wmb();
+
+exit:
+	if (lock_client)
+		GLOBAL_ATOMIC_STORE(&drv_data->client_lock_tbl[lock_idx], 0); /* unlock */
+
+	return ret;
+}
+
+static int init_global_locks(struct hw_fence_driver_data *drv_data)
+{
+	struct msm_hw_fence_mem_addr *mem_descriptor;
+	phys_addr_t phys;
+	void *ptr;
+	u32 size;
+	int ret;
+
+	ret = hw_fence_utils_reserve_mem(drv_data, HW_FENCE_MEM_RESERVE_LOCKS_REGION, &phys, &ptr,
+		&size, 0);
+	if (ret) {
+		HWFNC_ERR("Failed to reserve clients locks mem %d\n", ret);
+		return -ENOMEM;
+	}
+	HWFNC_DBG_INIT("phys:0x%x ptr:0x%pK size:%d\n", phys, ptr, size);
+
+	/* Populate Memory descriptor with address */
+	mem_descriptor = &drv_data->clients_locks_mem_desc;
+	mem_descriptor->virtual_addr = ptr;
+	mem_descriptor->device_addr = phys;
+	mem_descriptor->size = size;
+	mem_descriptor->mem_data = NULL; /* not storing special info for now */
+
+	/* Initialize internal pointers for managing the tables */
+	drv_data->client_lock_tbl = (u64 *)drv_data->clients_locks_mem_desc.virtual_addr;
+	drv_data->client_lock_tbl_cnt = drv_data->clients_locks_mem_desc.size / sizeof(u64);
+
+	return 0;
+}
+
+static int init_hw_fences_table(struct hw_fence_driver_data *drv_data)
+{
+	struct msm_hw_fence_mem_addr *mem_descriptor;
+	phys_addr_t phys;
+	void *ptr;
+	u32 size;
+	int ret;
+
+	ret = hw_fence_utils_reserve_mem(drv_data, HW_FENCE_MEM_RESERVE_TABLE, &phys, &ptr,
+		&size, 0);
+	if (ret) {
+		HWFNC_ERR("Failed to reserve table mem %d\n", ret);
+		return -ENOMEM;
+	}
+	HWFNC_DBG_INIT("phys:0x%x ptr:0x%pK size:%d\n", phys, ptr, size);
+
+	/* Populate Memory descriptor with address */
+	mem_descriptor = &drv_data->hw_fences_mem_desc;
+	mem_descriptor->virtual_addr = ptr;
+	mem_descriptor->device_addr = phys;
+	mem_descriptor->size = size;
+	mem_descriptor->mem_data = NULL; /* not storing special info for now */
+
+	/* Initialize internal pointers for managing the tables */
+	drv_data->hw_fences_tbl = (struct msm_hw_fence *)drv_data->hw_fences_mem_desc.virtual_addr;
+	drv_data->hw_fences_tbl_cnt = drv_data->hw_fences_mem_desc.size /
+		sizeof(struct msm_hw_fence);
+
+	HWFNC_DBG_INIT("hw_fences_table:0x%pK cnt:%u\n", drv_data->hw_fences_tbl,
+		drv_data->hw_fences_tbl_cnt);
+
+	return 0;
+}
+
+static int init_ctrl_queue(struct hw_fence_driver_data *drv_data)
+{
+	struct msm_hw_fence_mem_addr *mem_descriptor;
+	int ret;
+
+	mem_descriptor = &drv_data->ctrl_queue_mem_desc;
+
+	/* Init ctrl queue */
+	ret = init_hw_fences_queues(drv_data, HW_FENCE_MEM_RESERVE_CTRL_QUEUE,
+		mem_descriptor, drv_data->ctrl_queues,
+		HW_FENCE_CTRL_QUEUES, 0);
+	if (ret)
+		HWFNC_ERR("Failure to init ctrl queue\n");
+
+	return ret;
+}
+
+int hw_fence_init(struct hw_fence_driver_data *drv_data)
+{
+	int ret;
+	__le32 *mem;
+
+	ret = hw_fence_utils_parse_dt_props(drv_data);
+	if (ret) {
+		HWFNC_ERR("failed to set dt properties\n");
+		goto exit;
+	}
+
+	/* Allocate hw fence driver mem pool and share it with HYP */
+	ret = hw_fence_utils_alloc_mem(drv_data);
+	if (ret) {
+		HWFNC_ERR("failed to alloc base memory\n");
+		goto exit;
+	}
+
+	/* Initialize ctrl queue */
+	ret = init_ctrl_queue(drv_data);
+	if (ret)
+		goto exit;
+
+	ret = init_global_locks(drv_data);
+	if (ret)
+		goto exit;
+	HWFNC_DBG_INIT("Locks allocated at 0x%pK total locks:%d\n", drv_data->client_lock_tbl,
+		drv_data->client_lock_tbl_cnt);
+
+	/* Initialize hw fences table */
+	ret = init_hw_fences_table(drv_data);
+	if (ret)
+		goto exit;
+
+	/* Map ipcc registers */
+	ret = hw_fence_utils_map_ipcc(drv_data);
+	if (ret) {
+		HWFNC_ERR("ipcc regs mapping failed\n");
+		goto exit;
+	}
+
+	/* Map time register */
+	ret = hw_fence_utils_map_qtime(drv_data);
+	if (ret) {
+		HWFNC_ERR("qtime reg mapping failed\n");
+		goto exit;
+	}
+
+	/* Map ctl_start registers */
+	ret = hw_fence_utils_map_ctl_start(drv_data);
+	if (ret) {
+		/* This is not fatal error, since platfoms with dpu-ipc
+		 * won't use this option
+		 */
+		HWFNC_WARN("no ctl_start regs, won't trigger the frame\n");
+	}
+
+	/* Init debugfs */
+	ret = hw_fence_debug_debugfs_register(drv_data);
+	if (ret) {
+		HWFNC_ERR("debugfs init failed\n");
+		goto exit;
+	}
+
+	/* Init vIRQ from VM */
+	ret = hw_fence_utils_init_virq(drv_data);
+	if (ret) {
+		HWFNC_ERR("failed to init virq\n");
+		goto exit;
+	}
+
+	mem = drv_data->io_mem_base;
+	HWFNC_DBG_H("memory ptr:0x%pK val:0x%x\n", mem, *mem);
+
+	HWFNC_DBG_INIT("HW Fences Table Initialized: 0x%pK cnt:%d\n",
+		drv_data->hw_fences_tbl, drv_data->hw_fences_tbl_cnt);
+
+exit:
+	return ret;
+}
+
+int hw_fence_alloc_client_resources(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client,
+	struct msm_hw_fence_mem_addr *mem_descriptor)
+{
+	int ret;
+
+	/* Init client queues */
+	ret = init_hw_fences_queues(drv_data, HW_FENCE_MEM_RESERVE_CLIENT_QUEUE,
+		&hw_fence_client->mem_descriptor, hw_fence_client->queues,
+		HW_FENCE_CLIENT_QUEUES, hw_fence_client->client_id);
+	if (ret) {
+		HWFNC_ERR("Failure to init the queue for client:%d\n",
+			hw_fence_client->client_id);
+		goto exit;
+	}
+
+	/* Init client memory descriptor */
+	memcpy(mem_descriptor, &hw_fence_client->mem_descriptor,
+		sizeof(struct msm_hw_fence_mem_addr));
+
+exit:
+	return ret;
+}
+
+int hw_fence_init_controller_signal(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client)
+{
+	int ret = 0;
+
+	/*
+	 * Initialize IPCC Signals for this client
+	 *
+	 * NOTE: Fore each Client HW-Core, the client drivers might be the ones making
+	 * it's own initialization (in case that any hw-sequence must be enforced),
+	 * however, if that is  not the case, any per-client ipcc init to enable the
+	 * signaling, can go here.
+	 */
+	switch (hw_fence_client->client_id) {
+	case HW_FENCE_CLIENT_ID_CTX0:
+		/* nothing to initialize for gpu client */
+		break;
+	case HW_FENCE_CLIENT_ID_CTL0:
+	case HW_FENCE_CLIENT_ID_CTL1:
+	case HW_FENCE_CLIENT_ID_CTL2:
+	case HW_FENCE_CLIENT_ID_CTL3:
+	case HW_FENCE_CLIENT_ID_CTL4:
+	case HW_FENCE_CLIENT_ID_CTL5:
+#ifdef HW_DPU_IPCC
+		/* initialize ipcc signals for dpu clients */
+		HWFNC_DBG_H("init_controller_signal: DPU client:%d initialized:%d\n",
+			hw_fence_client->client_id, drv_data->ipcc_dpu_initialized);
+		if (!drv_data->ipcc_dpu_initialized) {
+			drv_data->ipcc_dpu_initialized = true;
+
+			/* Init dpu client ipcc signal */
+			hw_fence_ipcc_enable_dpu_signaling(drv_data);
+		}
+#endif /* HW_DPU_IPCC */
+		break;
+	default:
+		HWFNC_ERR("Unexpected client:%d\n", hw_fence_client->client_id);
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+int hw_fence_init_controller_resources(struct msm_hw_fence_client *hw_fence_client)
+{
+
+	/*
+	 * Initialize Fence Controller resources for this Client,
+	 *  here we need to use the CTRL queue to communicate to the Fence
+	 *  Controller the shared memory for the Rx/Tx queue for this client
+	 *  as well as any information that Fence Controller might need to
+	 *  know for this client.
+	 *
+	 * NOTE: For now, we are doing a static allocation of the
+	 *  client's queues, so currently we don't need any notification
+	 *  to the Fence CTL here through the CTRL queue.
+	 *  Later-on we might need it, once the PVM to SVM (and vice versa)
+	 *  communication for initialization is supported.
+	 */
+
+	return 0;
+}
+
+void hw_fence_cleanup_client(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client)
+{
+	/*
+	 * Deallocate any resource allocated for this client.
+	 *  If fence controller was notified about existence of this client,
+	 *  we will need to notify fence controller that this client is gone
+	 *
+	 * NOTE: Since currently we are doing a 'fixed' memory for the clients queues,
+	 *  we don't need any notification to the Fence Controller, yet..
+	 *  however, if the memory allocation is removed from 'fixed' to a dynamic
+	 *  allocation, then we will need to notify FenceCTL about the client that is
+	 *  going-away here.
+	 */
+	mutex_lock(&drv_data->clients_mask_lock);
+	drv_data->client_id_mask &= ~BIT(hw_fence_client->client_id);
+	drv_data->clients[hw_fence_client->client_id] = NULL;
+	mutex_unlock(&drv_data->clients_mask_lock);
+
+	/* Deallocate client's object */
+	HWFNC_DBG_LUT("freeing client_id:%d\n", hw_fence_client->client_id);
+	kfree(hw_fence_client);
+}
+
+static inline int _calculate_hash(u32 table_total_entries, u64 context, u64 seqno,
+	u64 step, u64 *hash)
+{
+	u64 m_size = table_total_entries;
+	int val = 0;
+
+	if (step == 0) {
+		u64 a_multiplier = HW_FENCE_HASH_A_MULT;
+		u64 c_multiplier = HW_FENCE_HASH_C_MULT;
+		u64 b_multiplier = context + (context - 1); /* odd multiplier */
+
+		/*
+		 * if m, is power of 2, we can optimize with right shift,
+		 * for now we don't do it, to avoid assuming a power of two
+		 */
+		*hash = (a_multiplier * seqno * b_multiplier + (c_multiplier * context)) % m_size;
+	} else {
+		if (step >= m_size) {
+			/*
+			 * If we already traversed the whole table, return failure since this means
+			 * there are not available spots, table is either full or full-enough
+			 * that we couldn't find an available spot after traverse the whole table.
+			 * Ideally table shouldn't be so full that we cannot find a value after some
+			 * iterations, so this maximum step size could be optimized to fail earlier.
+			 */
+			HWFNC_ERR("Fence Table tranversed and no available space!\n");
+			val = -EINVAL;
+		} else {
+			/*
+			 * Linearly increment the hash value to find next element in the table
+			 * note that this relies in the 'scrambled' data from the original hash
+			 * Also, add a mod division to wrap-around in case that we reached the
+			 * end of the table
+			 */
+			*hash = (*hash + 1) % m_size;
+		}
+	}
+
+	return val;
+}
+
+static inline struct msm_hw_fence *_get_hw_fence(u32 table_total_entries,
+	struct msm_hw_fence *hw_fences_tbl,
+	u64 hash)
+{
+	if (hash >= table_total_entries) {
+		HWFNC_ERR("hash:%llu out of max range:%llu\n",
+			hash, table_total_entries);
+		return NULL;
+	}
+
+	return &hw_fences_tbl[hash];
+}
+
+static bool _is_hw_fence_free(struct msm_hw_fence *hw_fence, u64 context, u64 seqno)
+{
+	/* If valid is set, the hw fence is not free */
+	return hw_fence->valid ? false : true;
+}
+
+static bool _hw_fence_match(struct msm_hw_fence *hw_fence, u64 context, u64 seqno)
+{
+	return ((hw_fence->ctx_id == context && hw_fence->seq_id == seqno) ? true : false);
+}
+
+/* clears everything but the 'valid' field */
+static void _cleanup_hw_fence(struct msm_hw_fence *hw_fence)
+{
+	int i;
+
+	hw_fence->error = 0;
+	wmb(); /* update memory to avoid mem-abort */
+	hw_fence->ctx_id = 0;
+	hw_fence->seq_id = 0;
+	hw_fence->wait_client_mask = 0;
+	hw_fence->fence_allocator = 0;
+	hw_fence->fence_signal_client = 0;
+
+	hw_fence->flags = 0;
+
+	hw_fence->fence_create_time = 0;
+	hw_fence->fence_trigger_time = 0;
+	hw_fence->fence_wait_time = 0;
+	hw_fence->debug_refcount = 0;
+	hw_fence->parents_cnt = 0;
+	hw_fence->pending_child_cnt = 0;
+
+	for (i = 0; i < MSM_HW_FENCE_MAX_JOIN_PARENTS; i++)
+		hw_fence->parent_list[i] = HW_FENCE_INVALID_PARENT_FENCE;
+}
+
+/* This function must be called with the hw fence lock */
+static void  _reserve_hw_fence(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence *hw_fence, u32 client_id,
+	u64 context, u64 seqno, u32 hash, u32 pending_child_cnt)
+{
+	_cleanup_hw_fence(hw_fence);
+
+	/* reserve this HW fence */
+	hw_fence->valid = 1;
+
+	hw_fence->ctx_id = context;
+	hw_fence->seq_id = seqno;
+	hw_fence->flags = 0; /* fence just reserved, there shouldn't be any flags set */
+	hw_fence->fence_allocator = client_id;
+	hw_fence->fence_create_time = hw_fence_get_qtime(drv_data);
+	hw_fence->debug_refcount++;
+
+	HWFNC_DBG_LUT("Reserved fence client:%d ctx:%llu seq:%llu hash:%llu\n",
+		client_id, context, seqno, hash);
+}
+
+/* This function must be called with the hw fence lock */
+static void  _unreserve_hw_fence(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence *hw_fence, u32 client_id,
+	u64 context, u64 seqno, u32 hash, u32 pending_child_cnt)
+{
+	_cleanup_hw_fence(hw_fence);
+
+	/* unreserve this HW fence */
+	hw_fence->valid = 0;
+
+	HWFNC_DBG_LUT("Unreserved fence client:%d ctx:%llu seq:%llu hash:%llu\n",
+		client_id, context, seqno, hash);
+}
+
+/* This function must be called with the hw fence lock */
+static void  _reserve_join_fence(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence *hw_fence, u32 client_id, u64 context,
+	u64 seqno, u32 hash, u32 pending_child_cnt)
+{
+	_cleanup_hw_fence(hw_fence);
+
+	/* reserve this HW fence */
+	hw_fence->valid = true;
+
+	hw_fence->ctx_id = context;
+	hw_fence->seq_id = seqno;
+	hw_fence->fence_allocator = client_id;
+	hw_fence->fence_create_time = hw_fence_get_qtime(drv_data);
+	hw_fence->debug_refcount++;
+
+	hw_fence->pending_child_cnt = pending_child_cnt;
+
+	HWFNC_DBG_LUT("Reserved join fence client:%d ctx:%llu seq:%llu hash:%llu\n",
+		client_id, context, seqno, hash);
+}
+
+/* This function must be called with the hw fence lock */
+static void  _fence_found(struct hw_fence_driver_data *drv_data,
+	 struct msm_hw_fence *hw_fence, u32 client_id,
+	u64 context, u64 seqno, u32 hash, u32 pending_child_cnt)
+{
+	/*
+	 * Do nothing, when this find fence fn is invoked, all processing is done outside.
+	 * Currently just keeping this function for debugging purposes, can be removed
+	 * in final versions
+	 */
+	HWFNC_DBG_LUT("Found fence client:%d ctx:%llu seq:%llu hash:%llu\n",
+		client_id, context, seqno, hash);
+}
+
+char *_get_op_mode(enum hw_fence_lookup_ops op_code)
+{
+	switch (op_code) {
+	case HW_FENCE_LOOKUP_OP_CREATE:
+		return "CREATE";
+	case HW_FENCE_LOOKUP_OP_DESTROY:
+		return "DESTROY";
+	case HW_FENCE_LOOKUP_OP_CREATE_JOIN:
+		return "CREATE_JOIN";
+	case HW_FENCE_LOOKUP_OP_FIND_FENCE:
+		return "FIND_FENCE";
+	default:
+		return "UNKNOWN";
+	}
+
+	return "UNKNOWN";
+}
+
+struct msm_hw_fence *_hw_fence_lookup_and_process(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence *hw_fences_tbl, u64 context, u64 seqno, u32 client_id,
+	u32 pending_child_cnt, enum hw_fence_lookup_ops op_code, u64 *hash)
+{
+	bool (*compare_fnc)(struct msm_hw_fence *hfence, u64 context, u64 seqno);
+	void (*process_fnc)(struct hw_fence_driver_data *drv_data, struct msm_hw_fence *hfence,
+			u32 client_id, u64 context, u64 seqno, u32 hash, u32 pending);
+	struct msm_hw_fence *hw_fence = NULL;
+	u64 step = 0;
+	int ret = 0;
+	bool hw_fence_found = false;
+
+	if (!hash | !drv_data | !hw_fences_tbl) {
+		HWFNC_ERR("Invalid input for hw_fence_lookup\n");
+		return NULL;
+	}
+
+	*hash = ~0;
+
+	HWFNC_DBG_LUT("hw_fence_lookup: %d\n", op_code);
+
+	switch (op_code) {
+	case HW_FENCE_LOOKUP_OP_CREATE:
+		compare_fnc = &_is_hw_fence_free;
+		process_fnc = &_reserve_hw_fence;
+		break;
+	case HW_FENCE_LOOKUP_OP_DESTROY:
+		compare_fnc = &_hw_fence_match;
+		process_fnc = &_unreserve_hw_fence;
+		break;
+	case HW_FENCE_LOOKUP_OP_CREATE_JOIN:
+		compare_fnc = &_is_hw_fence_free;
+		process_fnc = &_reserve_join_fence;
+		break;
+	case HW_FENCE_LOOKUP_OP_FIND_FENCE:
+		compare_fnc = &_hw_fence_match;
+		process_fnc = &_fence_found;
+		break;
+	default:
+		HWFNC_ERR("Unknown op code:%d\n", op_code);
+		return NULL;
+	}
+
+	while (!hw_fence_found && (step < drv_data->hw_fence_table_entries)) {
+
+		/* Calculate the Hash for the Fence */
+		ret = _calculate_hash(drv_data->hw_fence_table_entries, context, seqno, step, hash);
+		if (ret) {
+			HWFNC_ERR("error calculating hash ctx:%llu seqno:%llu hash:%llu\n",
+				context, seqno, *hash);
+			break;
+		}
+		HWFNC_DBG_LUT("calculated hash:%llu [ctx:%llu seqno:%llu]\n", *hash, context,
+			seqno);
+
+		/* Get element from the table using the hash */
+		hw_fence = _get_hw_fence(drv_data->hw_fence_table_entries, hw_fences_tbl, *hash);
+		HWFNC_DBG_LUT("hw_fence_tbl:0x%pK hw_fence:0x%pK, hash:%llu valid:0x%x\n",
+			hw_fences_tbl, hw_fence, *hash, hw_fence ? hw_fence->valid : 0xbad);
+		if (!hw_fence) {
+			HWFNC_ERR("bad hw fence ctx:%llu seqno:%llu hash:%llu\n",
+				context, seqno, *hash);
+			break;
+		}
+
+		GLOBAL_ATOMIC_STORE(&hw_fence->lock, 1);
+
+		/* compare to either find a free fence or find an allocated fence */
+		if (compare_fnc(hw_fence, context, seqno)) {
+
+			/* Process the hw fence found by the algorithm */
+			if (process_fnc) {
+				process_fnc(drv_data, hw_fence, client_id, context, seqno, *hash,
+					pending_child_cnt);
+
+				/* update memory table with processing */
+				wmb();
+			}
+
+			HWFNC_DBG_L("client_id:%lu op:%s ctx:%llu seqno:%llu hash:%llu step:%llu\n",
+				client_id, _get_op_mode(op_code), context, seqno, *hash, step);
+
+			hw_fence_found = true;
+		} else {
+			if ((op_code == HW_FENCE_LOOKUP_OP_CREATE ||
+				op_code == HW_FENCE_LOOKUP_OP_CREATE_JOIN) &&
+				seqno == hw_fence->seq_id && context == hw_fence->ctx_id) {
+				/* ctx & seqno must be unique creating a hw-fence */
+				HWFNC_ERR("cannot create hw fence with same ctx:%llu seqno:%llu\n",
+					context, seqno);
+				GLOBAL_ATOMIC_STORE(&hw_fence->lock, 0);
+				break;
+			}
+			/* compare can fail if we have a collision, we will linearly resolve it */
+			HWFNC_DBG_H("compare failed for hash:%llu [ctx:%llu seqno:%llu]\n", *hash,
+				context, seqno);
+		}
+
+		GLOBAL_ATOMIC_STORE(&hw_fence->lock, 0);
+
+		/* Increment step for the next loop */
+		step++;
+	}
+
+	/* If we iterated through the whole list and didn't find the fence, return null */
+	if (!hw_fence_found) {
+		HWFNC_ERR("fail to create hw-fence step:%llu\n", step);
+		hw_fence = NULL;
+	}
+
+	HWFNC_DBG_LUT("lookup:%d hw_fence:%pK ctx:%llu seqno:%llu hash:%llu flags:0x%llx\n",
+		op_code, hw_fence, context, seqno, *hash, hw_fence ? hw_fence->flags : -1);
+
+	return hw_fence;
+}
+
+int hw_fence_create(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client,
+	u64 context, u64 seqno, u64 *hash)
+{
+	u32 client_id = hw_fence_client->client_id;
+	struct msm_hw_fence *hw_fences_tbl = drv_data->hw_fences_tbl;
+
+	int ret = 0;
+
+	/* allocate hw fence in table */
+	if (!_hw_fence_lookup_and_process(drv_data, hw_fences_tbl,
+		context, seqno, client_id, 0, HW_FENCE_LOOKUP_OP_CREATE, hash)) {
+		HWFNC_ERR("Fail to create fence client:%lu ctx:%llu seqno:%llu\n",
+			client_id, context, seqno);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static  inline int _hw_fence_cleanup(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence *hw_fences_tbl, u32 client_id, u64 context, u64 seqno) {
+	u64 hash;
+
+	if (!_hw_fence_lookup_and_process(drv_data, hw_fences_tbl,
+			context, seqno, client_id, 0, HW_FENCE_LOOKUP_OP_DESTROY, &hash))
+		return -EINVAL;
+
+	return 0;
+}
+
+int hw_fence_destroy(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client,
+	u64 context, u64 seqno)
+{
+	u32 client_id = hw_fence_client->client_id;
+	struct msm_hw_fence *hw_fences_tbl = drv_data->hw_fences_tbl;
+	int ret = 0;
+
+	/* remove hw fence from table*/
+	if (_hw_fence_cleanup(drv_data, hw_fences_tbl, client_id, context, seqno)) {
+		HWFNC_ERR("Fail destroying fence client:%lu ctx:%llu seqno:%llu\n",
+			client_id, context, seqno);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static struct msm_hw_fence *_hw_fence_process_join_fence(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client,
+	struct dma_fence_array *array, u64 *hash, bool create)
+{
+	struct msm_hw_fence *hw_fences_tbl;
+	struct msm_hw_fence *join_fence = NULL;
+	u64 context, seqno;
+	u32 client_id, pending_child_cnt;
+
+	/*
+	 * NOTE: For now we are allocating the join fences from the same table as all
+	 * the other fences (i.e. drv_data->hw_fences_tbl), functionally this will work, however,
+	 * this might impact the lookup algorithm, since the "join-fences" are created with the
+	 * context and seqno of a fence-array, and those might not be changing by the client,
+	 * so this will linearly increment the look-up and very likely impact the other fences if
+	 * these join-fences start to fill-up a particular region of the fences global table.
+	 * So we might have to allocate a different table altogether for these join fences.
+	 * However, to do this, just alloc another table and change it here:
+	 */
+	hw_fences_tbl = drv_data->hw_fences_tbl;
+
+	context = array->base.context;
+	seqno = array->base.seqno;
+	pending_child_cnt = array->num_fences;
+	client_id = HW_FENCE_JOIN_FENCE_CLIENT_ID;
+
+	if (create) {
+		/* allocate the fence */
+		join_fence = _hw_fence_lookup_and_process(drv_data, hw_fences_tbl, context,
+			seqno, client_id, pending_child_cnt, HW_FENCE_LOOKUP_OP_CREATE_JOIN, hash);
+		if (!join_fence)
+			HWFNC_ERR("Fail to create join fence client:%lu ctx:%llu seqno:%llu\n",
+				client_id, context, seqno);
+	} else {
+		/* destroy the fence */
+		if (_hw_fence_cleanup(drv_data, hw_fences_tbl, client_id, context, seqno))
+			HWFNC_ERR("Fail destroying join fence client:%lu ctx:%llu seqno:%llu\n",
+				client_id, context, seqno);
+	}
+
+	return join_fence;
+}
+
+struct msm_hw_fence *msm_hw_fence_find(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client,
+	u64 context, u64 seqno, u64 *hash)
+{
+	struct msm_hw_fence *hw_fences_tbl = drv_data->hw_fences_tbl;
+	struct msm_hw_fence *hw_fence;
+	u32 client_id = hw_fence_client ? hw_fence_client->client_id : 0xff;
+
+	/* find the hw fence */
+	hw_fence = _hw_fence_lookup_and_process(drv_data, hw_fences_tbl, context,
+		seqno, client_id, 0, HW_FENCE_LOOKUP_OP_FIND_FENCE, hash);
+	if (!hw_fence)
+		HWFNC_ERR("Fail to find hw fence client:%lu ctx:%llu seqno:%llu\n",
+			client_id, context, seqno);
+
+	return hw_fence;
+}
+
+static void _fence_ctl_signal(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client, struct msm_hw_fence *hw_fence, u64 hash,
+	u64 flags, u32 error)
+{
+	u32 tx_client_id = drv_data->ipcc_client_id;
+	u32 rx_client_id = hw_fence_client->ipc_client_id;
+
+	HWFNC_DBG_H("We must signal the client now! hfence hash:%llu\n", hash);
+
+	/* Write to Rx queue */
+	if (hw_fence_client->update_rxq)
+		hw_fence_update_queue(drv_data, hw_fence_client, hw_fence->ctx_id,
+			hw_fence->seq_id, hash, flags, error, HW_FENCE_RX_QUEUE - 1);
+
+	/* Signal the hw fence now */
+	hw_fence_ipcc_trigger_signal(drv_data, tx_client_id, rx_client_id,
+		hw_fence_client->ipc_signal_id);
+}
+
+static void _cleanup_join_and_child_fences(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client, int iteration, struct dma_fence_array *array,
+	struct msm_hw_fence *join_fence, u64 hash_join_fence)
+{
+	struct dma_fence *child_fence;
+	struct msm_hw_fence *hw_fence_child;
+	int idx, j;
+	u64 hash = 0;
+
+	/* cleanup the child-fences from the parent join-fence */
+	for (idx = iteration; idx >= 0; idx--) {
+		child_fence = array->fences[idx];
+
+		hw_fence_child = msm_hw_fence_find(drv_data, hw_fence_client, child_fence->context,
+			child_fence->seqno, &hash);
+		if (!hw_fence_child) {
+			HWFNC_ERR("Cannot cleanup child fence context:%lu seqno:%lu hash:%lu\n",
+				child_fence->context, child_fence->seqno, hash);
+
+			/*
+			 * ideally this should not have happened, but if it did, try to keep
+			 * cleaning-up other fences after printing the error
+			 */
+			continue;
+		}
+
+		/* lock the child while we clean it up from the parent join-fence */
+		GLOBAL_ATOMIC_STORE(&hw_fence_child->lock, 1); /* lock */
+		for (j = hw_fence_child->parents_cnt; j > 0; j--) {
+
+			if (j > MSM_HW_FENCE_MAX_JOIN_PARENTS) {
+				HWFNC_ERR("Invalid max parents_cnt:%d, will reset to max:%d\n",
+					hw_fence_child->parents_cnt, MSM_HW_FENCE_MAX_JOIN_PARENTS);
+
+				j = MSM_HW_FENCE_MAX_JOIN_PARENTS;
+			}
+
+			if (hw_fence_child->parent_list[j - 1] == hash_join_fence) {
+				hw_fence_child->parent_list[j - 1] = HW_FENCE_INVALID_PARENT_FENCE;
+
+				if (hw_fence_child->parents_cnt)
+					hw_fence_child->parents_cnt--;
+
+				/* update memory for the table update */
+				wmb();
+			}
+		}
+		GLOBAL_ATOMIC_STORE(&hw_fence_child->lock, 0); /* unlock */
+	}
+
+	/* destroy join fence */
+	_hw_fence_process_join_fence(drv_data, hw_fence_client, array, &hash_join_fence,
+		false);
+}
+
+int hw_fence_process_fence_array(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client, struct dma_fence_array *array)
+{
+	struct msm_hw_fence *join_fence;
+	struct msm_hw_fence *hw_fence_child;
+	struct dma_fence *child_fence;
+	u32 signaled_fences = 0;
+	u64 hash_join_fence, hash;
+	int i, ret = 0;
+
+	/*
+	 * Create join fence from the join-fences table,
+	 * This function initializes:
+	 * join_fence->pending_child_count = array->num_fences
+	 */
+	join_fence = _hw_fence_process_join_fence(drv_data, hw_fence_client, array,
+		&hash_join_fence, true);
+	if (!join_fence) {
+		HWFNC_ERR("cannot alloc hw fence for join fence array\n");
+		return -EINVAL;
+	}
+
+	/* update this as waiting client of the join-fence */
+	GLOBAL_ATOMIC_STORE(&join_fence->lock, 1); /* lock */
+	join_fence->wait_client_mask |= BIT(hw_fence_client->client_id);
+	GLOBAL_ATOMIC_STORE(&join_fence->lock, 0); /* unlock */
+
+	/* Iterate through fences of the array */
+	for (i = 0; i < array->num_fences; i++) {
+		child_fence = array->fences[i];
+
+		/* Nested fence-arrays are not supported */
+		if (to_dma_fence_array(child_fence)) {
+			HWFNC_ERR("This is a nested fence, fail!\n");
+			ret = -EINVAL;
+			goto error_array;
+		}
+
+		/* All elements in the fence-array must be hw-fences */
+		if (!test_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &child_fence->flags)) {
+			HWFNC_ERR("DMA Fence in FenceArray is not a HW Fence\n");
+			ret = -EINVAL;
+			goto error_array;
+		}
+
+		/* Find the HW Fence in the Global Table */
+		hw_fence_child = msm_hw_fence_find(drv_data, hw_fence_client, child_fence->context,
+			child_fence->seqno, &hash);
+		if (!hw_fence_child) {
+			HWFNC_ERR("Cannot find child fence context:%lu seqno:%lu hash:%lu\n",
+				child_fence->context, child_fence->seqno, hash);
+			ret = -EINVAL;
+			goto error_array;
+		}
+
+		GLOBAL_ATOMIC_STORE(&hw_fence_child->lock, 1); /* lock */
+		if (hw_fence_child->flags & MSM_HW_FENCE_FLAG_SIGNAL) {
+
+			/* child fence is already signaled */
+			GLOBAL_ATOMIC_STORE(&join_fence->lock, 1); /* lock */
+			join_fence->pending_child_cnt--;
+
+			/* update memory for the table update */
+			wmb();
+
+			GLOBAL_ATOMIC_STORE(&join_fence->lock, 0); /* unlock */
+			signaled_fences++;
+		} else {
+
+			/* child fence is not signaled */
+			hw_fence_child->parents_cnt++;
+
+			if (hw_fence_child->parents_cnt >= MSM_HW_FENCE_MAX_JOIN_PARENTS
+					|| hw_fence_child->parents_cnt < 1) {
+
+				/* Max number of parents for a fence is exceeded */
+				HWFNC_ERR("DMA Fence in FenceArray exceeds parents:%d\n",
+					hw_fence_child->parents_cnt);
+				hw_fence_child->parents_cnt--;
+
+				/* update memory for the table update */
+				wmb();
+
+				GLOBAL_ATOMIC_STORE(&hw_fence_child->lock, 0); /* unlock */
+				ret = -EINVAL;
+				goto error_array;
+			}
+
+			hw_fence_child->parent_list[hw_fence_child->parents_cnt - 1] =
+				hash_join_fence;
+
+			/* update memory for the table update */
+			wmb();
+		}
+		GLOBAL_ATOMIC_STORE(&hw_fence_child->lock, 0); /* unlock */
+	}
+
+	/* all fences were signaled, signal client now */
+	if (signaled_fences == array->num_fences) {
+
+		/* signal the join hw fence */
+		_fence_ctl_signal(drv_data, hw_fence_client, join_fence, hash_join_fence, 0, 0);
+
+		/*
+		 * job of the join-fence is finished since we already signaled,
+		 * we can delete it now. This can happen when all the fences that
+		 * are part of the join-fence are already signaled.
+		 */
+		_hw_fence_process_join_fence(drv_data, hw_fence_client, array, &hash_join_fence,
+			false);
+	}
+
+	return ret;
+
+error_array:
+	_cleanup_join_and_child_fences(drv_data, hw_fence_client, i, array, join_fence,
+		hash_join_fence);
+
+	return -EINVAL;
+}
+
+int hw_fence_register_wait_client(struct hw_fence_driver_data *drv_data,
+		struct msm_hw_fence_client *hw_fence_client, u64 context, u64 seqno)
+{
+	struct msm_hw_fence *hw_fence;
+	u64 hash;
+
+	/* find the hw fence within the table */
+	hw_fence = msm_hw_fence_find(drv_data, hw_fence_client, context, seqno, &hash);
+	if (!hw_fence) {
+		HWFNC_ERR("Cannot find fence!\n");
+		return -EINVAL;
+	}
+
+	GLOBAL_ATOMIC_STORE(&hw_fence->lock, 1); /* lock */
+
+	/* register client in the hw fence */
+	hw_fence->wait_client_mask |= BIT(hw_fence_client->client_id);
+	hw_fence->fence_wait_time = hw_fence_get_qtime(drv_data);
+	hw_fence->debug_refcount++;
+
+	/* update memory for the table update */
+	wmb();
+
+	/* if hw fence already signaled, signal the client */
+	if (hw_fence->flags & MSM_HW_FENCE_FLAG_SIGNAL)
+		_fence_ctl_signal(drv_data, hw_fence_client, hw_fence, hash, 0, 0);
+
+	GLOBAL_ATOMIC_STORE(&hw_fence->lock, 0); /* unlock */
+
+	return 0;
+}
+
+int hw_fence_process_fence(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client,
+	struct dma_fence *fence)
+{
+	int ret = 0;
+
+	if (!drv_data | !hw_fence_client | !fence) {
+		HWFNC_ERR("Invalid Input!\n");
+		return -EINVAL;
+	}
+	/* fence must be hw-fence */
+	if (!test_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fence->flags)) {
+		HWFNC_ERR("DMA Fence in is not a HW Fence flags:0x%llx\n", fence->flags);
+		return -EINVAL;
+	}
+
+	ret = hw_fence_register_wait_client(drv_data, hw_fence_client, fence->context,
+		fence->seqno);
+	if (ret)
+		HWFNC_ERR("Error registering for wait client:%d\n", hw_fence_client->client_id);
+
+	return ret;
+}
+
+int hw_fence_utils_cleanup_fence(struct hw_fence_driver_data *drv_data,
+	struct msm_hw_fence_client *hw_fence_client, struct msm_hw_fence *hw_fence, u64 hash,
+	u32 reset_flags)
+{
+	int ret = 0;
+	enum hw_fence_client_id wait_client_id;
+	struct msm_hw_fence_client *hw_fence_wait_client;
+	int error = (reset_flags & MSM_HW_FENCE_RESET_WITHOUT_ERROR) ? 0 : MSM_HW_FENCE_ERROR_RESET;
+
+	GLOBAL_ATOMIC_STORE(&hw_fence->lock, 1); /* lock */
+	if (hw_fence->wait_client_mask & BIT(hw_fence_client->client_id)) {
+		HWFNC_DBG_H("clearing client:%d wait bit for fence: ctx:%d seqno:%d\n",
+			hw_fence_client->client_id, hw_fence->ctx_id,
+			hw_fence->seq_id);
+		hw_fence->wait_client_mask &= ~BIT(hw_fence_client->client_id);
+
+		/* update memory for the table update */
+		wmb();
+	}
+	GLOBAL_ATOMIC_STORE(&hw_fence->lock, 0); /* unlock */
+
+	if (hw_fence->fence_allocator == hw_fence_client->client_id) {
+
+		/* signal with an error all the waiting clients for this fence */
+		for (wait_client_id = 0; wait_client_id < HW_FENCE_CLIENT_MAX; wait_client_id++) {
+			if (hw_fence->wait_client_mask & BIT(wait_client_id)) {
+				hw_fence_wait_client = drv_data->clients[wait_client_id];
+
+				if (hw_fence_wait_client)
+					_fence_ctl_signal(drv_data, hw_fence_wait_client, hw_fence,
+						hash, 0, error);
+			}
+		}
+
+		if (reset_flags & MSM_HW_FENCE_RESET_WITHOUT_DESTROY)
+			goto skip_destroy;
+
+		ret = hw_fence_destroy(drv_data, hw_fence_client,
+			hw_fence->ctx_id, hw_fence->seq_id);
+		if (ret) {
+			HWFNC_ERR("Error destroying HW fence: ctx:%d seqno:%d\n",
+				hw_fence->ctx_id, hw_fence->seq_id);
+		}
+	}
+
+skip_destroy:
+	return ret;
+}

+ 644 - 0
hw_fence/src/hw_fence_drv_utils.c

@@ -0,0 +1,644 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+#include <linux/gunyah/gh_rm_drv.h>
+#include <linux/gunyah/gh_dbl.h>
+#include <soc/qcom/secure_buffer.h>
+
+#include "hw_fence_drv_priv.h"
+#include "hw_fence_drv_utils.h"
+#include "hw_fence_drv_ipc.h"
+#include "hw_fence_drv_debug.h"
+
+static void _lock(uint64_t *wait)
+{
+	/* WFE Wait */
+#if defined(__aarch64__)
+	__asm__("SEVL\n\t"
+		"PRFM PSTL1KEEP, [%x[i_lock]]\n\t"
+		"1:\n\t"
+		"WFE\n\t"
+		"LDAXR W5, [%x[i_lock]]\n\t"
+		"CBNZ W5, 1b\n\t"
+		"STXR W5, W0, [%x[i_lock]]\n\t"
+		"CBNZ W5, 1b\n"
+		:
+		: [i_lock] "r" (wait)
+		: "memory");
+#endif
+}
+
+static void _unlock(uint64_t *lock)
+{
+	/* Signal Client */
+#if defined(__aarch64__)
+	__asm__("STLR WZR, [%x[i_out]]\n\t"
+		"SEV\n"
+		:
+		: [i_out] "r" (lock)
+		: "memory");
+#endif
+}
+
+void global_atomic_store(uint64_t *lock, bool val)
+{
+	if (val)
+		_lock(lock);
+	else
+		_unlock(lock);
+}
+
+/*
+ * Each bit in this mask represents each of the loopback clients supported in
+ * the enum hw_fence_loopback_id
+ */
+#define HW_FENCE_LOOPBACK_CLIENTS_MASK 0x7f
+
+static inline int _process_dpu_client_loopback(struct hw_fence_driver_data *drv_data,
+	int client_id)
+{
+	int ctl_id = client_id; /* dpu ctl path id is mapped to client id used for the loopback */
+	void *ctl_start_reg;
+	u32 val;
+
+	if (ctl_id > HW_FENCE_LOOPBACK_DPU_CTL_5) {
+		HWFNC_ERR("invalid ctl_id:%d\n", ctl_id);
+		return -EINVAL;
+	}
+
+	ctl_start_reg = drv_data->ctl_start_ptr[ctl_id];
+	if (!ctl_start_reg) {
+		HWFNC_ERR("ctl_start reg not valid for ctl_id:%d\n", ctl_id);
+		return -EINVAL;
+	}
+
+	HWFNC_DBG_H("Processing DPU loopback ctl_id:%d\n", ctl_id);
+
+	val = 0x1; /* ctl_start trigger */
+#ifdef CTL_START_SIM
+	HWFNC_DBG_IRQ("ctl_id:%d Write: to RegOffset:0x%pK val:0x%x\n", ctl_start_reg, val, ctl_id);
+	writel_relaxed(val, ctl_start_reg);
+#else
+	HWFNC_DBG_IRQ("ctl_id:%d Write: to RegOffset:0x%pK val:0x%x (COMMENTED)\n", ctl_id,
+		ctl_start_reg, val);
+#endif
+
+	return 0;
+}
+
+static inline int _process_gfx_client_loopback(struct hw_fence_driver_data *drv_data,
+	int client_id)
+{
+	int queue_type = HW_FENCE_RX_QUEUE - 1; /* rx queue index */
+	struct msm_hw_fence_queue_payload payload;
+	int read = 1;
+
+	HWFNC_DBG_IRQ("Processing GFX loopback client_id:%d\n", client_id);
+	while (read) {
+		/*
+		 * 'client_id' is the loopback-client-id, not the hw-fence client_id,
+		 * so use GFX hw-fence client id, to get the client data
+		 */
+		read = hw_fence_read_queue(drv_data->clients[HW_FENCE_CLIENT_ID_CTX0], &payload,
+			queue_type);
+		if (read < 0) {
+			HWFNC_ERR("unable to read gfx rxq\n");
+			break;
+		}
+		HWFNC_DBG_L("GFX loopback rxq read: hash:%llu ctx:%llu seq:%llu f:%llu e:%lu\n",
+			payload.hash, payload.ctxt_id, payload.seqno, payload.flags, payload.error);
+	}
+
+	return read;
+}
+
+static int _process_doorbell_client(struct hw_fence_driver_data *drv_data, int client_id)
+{
+	int ret;
+
+	HWFNC_DBG_H("Processing loopback client_id:%d\n", client_id);
+	switch (client_id) {
+	case HW_FENCE_LOOPBACK_DPU_CTL_0:
+	case HW_FENCE_LOOPBACK_DPU_CTL_1:
+	case HW_FENCE_LOOPBACK_DPU_CTL_2:
+	case HW_FENCE_LOOPBACK_DPU_CTL_3:
+	case HW_FENCE_LOOPBACK_DPU_CTL_4:
+	case HW_FENCE_LOOPBACK_DPU_CTL_5:
+		ret = _process_dpu_client_loopback(drv_data, client_id);
+		break;
+	case HW_FENCE_LOOPBACK_GFX_CTX_0:
+		ret = _process_gfx_client_loopback(drv_data, client_id);
+		break;
+	default:
+		HWFNC_ERR("unknown client:%d\n", client_id);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+void hw_fence_utils_process_doorbell_mask(struct hw_fence_driver_data *drv_data, u64 db_flags)
+{
+	int client_id = HW_FENCE_LOOPBACK_DPU_CTL_0;
+	u64 mask;
+
+	for (; client_id < HW_FENCE_LOOPBACK_MAX; client_id++) {
+		mask = 1 << client_id;
+		if (mask & db_flags) {
+			HWFNC_DBG_H("client_id:%d signaled! flags:0x%llx\n", client_id, db_flags);
+
+			/* process client */
+			if (_process_doorbell_client(drv_data, client_id))
+				HWFNC_ERR("Failed to process client:%d\n", client_id);
+
+			/* clear mask for this client and if nothing else pending finish */
+			db_flags = db_flags & ~(mask);
+			HWFNC_DBG_H("client_id:%d cleared flags:0x%llx mask:0x%llx ~mask:0x%llx\n",
+				client_id, db_flags, mask, ~(mask));
+			if (!db_flags)
+				break;
+		}
+	}
+}
+
+/* doorbell callback */
+static void _hw_fence_cb(int irq, void *data)
+{
+	struct hw_fence_driver_data *drv_data = (struct hw_fence_driver_data *)data;
+	gh_dbl_flags_t clear_flags = HW_FENCE_LOOPBACK_CLIENTS_MASK;
+	int ret;
+
+	if (!drv_data)
+		return;
+
+	ret = gh_dbl_read_and_clean(drv_data->rx_dbl, &clear_flags, 0);
+	if (ret) {
+		HWFNC_ERR("hw_fence db callback, retrieve flags fail ret:%d\n", ret);
+		return;
+	}
+
+	HWFNC_DBG_IRQ("db callback label:%d irq:%d flags:0x%llx qtime:%llu\n", drv_data->db_label,
+		irq, clear_flags, hw_fence_get_qtime(drv_data));
+
+	hw_fence_utils_process_doorbell_mask(drv_data, clear_flags);
+}
+
+int hw_fence_utils_init_virq(struct hw_fence_driver_data *drv_data)
+{
+	struct device_node *node = drv_data->dev->of_node;
+	struct device_node *node_compat;
+	const char *compat = "qcom,msm-hw-fence-db";
+	int ret;
+
+	node_compat = of_find_compatible_node(node, NULL, compat);
+	if (!node_compat) {
+		HWFNC_ERR("Failed to find dev node with compat:%s\n", compat);
+		return -EINVAL;
+	}
+
+	ret = of_property_read_u32(node_compat, "gunyah-label", &drv_data->db_label);
+	if (ret) {
+		HWFNC_ERR("failed to find label info %d\n", ret);
+		return ret;
+	}
+
+	HWFNC_DBG_IRQ("registering doorbell db_label:%d\n", drv_data->db_label);
+	drv_data->rx_dbl = gh_dbl_rx_register(drv_data->db_label, _hw_fence_cb, drv_data);
+	if (IS_ERR_OR_NULL(drv_data->rx_dbl)) {
+		ret = PTR_ERR(drv_data->rx_dbl);
+		HWFNC_ERR("Failed to register doorbell\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hw_fence_gunyah_share_mem(struct hw_fence_driver_data *drv_data,
+				gh_vmid_t self, gh_vmid_t peer)
+{
+	u32 src_vmlist[1] = {self};
+	int src_perms[2] = {PERM_READ | PERM_WRITE | PERM_EXEC};
+	int dst_vmlist[2] = {self, peer};
+	int dst_perms[2] = {PERM_READ | PERM_WRITE, PERM_READ | PERM_WRITE};
+	struct gh_acl_desc *acl;
+	struct gh_sgl_desc *sgl;
+	int ret;
+
+	ret = hyp_assign_phys(drv_data->res.start, resource_size(&drv_data->res),
+			src_vmlist, 1, dst_vmlist, dst_perms, 2);
+	if (ret) {
+		HWFNC_ERR("%s: hyp_assign_phys failed addr=%x size=%u err=%d\n",
+			__func__, drv_data->res.start, drv_data->size, ret);
+		return ret;
+	}
+
+	acl = kzalloc(offsetof(struct gh_acl_desc, acl_entries[2]), GFP_KERNEL);
+	if (!acl)
+		return -ENOMEM;
+	sgl = kzalloc(offsetof(struct gh_sgl_desc, sgl_entries[1]), GFP_KERNEL);
+	if (!sgl) {
+		kfree(acl);
+		return -ENOMEM;
+	}
+	acl->n_acl_entries = 2;
+	acl->acl_entries[0].vmid = (u16)self;
+	acl->acl_entries[0].perms = GH_RM_ACL_R | GH_RM_ACL_W;
+	acl->acl_entries[1].vmid = (u16)peer;
+	acl->acl_entries[1].perms = GH_RM_ACL_R | GH_RM_ACL_W;
+
+	sgl->n_sgl_entries = 1;
+	sgl->sgl_entries[0].ipa_base = drv_data->res.start;
+	sgl->sgl_entries[0].size = resource_size(&drv_data->res);
+
+	ret = gh_rm_mem_share(GH_RM_MEM_TYPE_NORMAL, 0, drv_data->label,
+			acl, sgl, NULL, &drv_data->memparcel);
+	if (ret) {
+		HWFNC_ERR("%s: gh_rm_mem_share failed addr=%x size=%u err=%d\n",
+			__func__, drv_data->res.start, drv_data->size, ret);
+		/* Attempt to give resource back to HLOS */
+		hyp_assign_phys(drv_data->res.start, resource_size(&drv_data->res),
+				dst_vmlist, 2,
+				src_vmlist, src_perms, 1);
+		ret = -EPROBE_DEFER;
+	}
+
+	kfree(acl);
+	kfree(sgl);
+
+	return ret;
+}
+
+static int hw_fence_rm_cb(struct notifier_block *nb, unsigned long cmd, void *data)
+{
+	struct gh_rm_notif_vm_status_payload *vm_status_payload;
+	struct hw_fence_driver_data *drv_data;
+	gh_vmid_t peer_vmid;
+	gh_vmid_t self_vmid;
+
+	drv_data = container_of(nb, struct hw_fence_driver_data, rm_nb);
+
+	HWFNC_DBG_INIT("cmd:0x%lx ++\n", cmd);
+	if (cmd != GH_RM_NOTIF_VM_STATUS)
+		goto end;
+
+	vm_status_payload = data;
+	HWFNC_DBG_INIT("payload vm_status:%d\n", vm_status_payload->vm_status);
+	if (vm_status_payload->vm_status != GH_RM_VM_STATUS_READY &&
+	    vm_status_payload->vm_status != GH_RM_VM_STATUS_RESET)
+		goto end;
+
+	if (gh_rm_get_vmid(drv_data->peer_name, &peer_vmid))
+		goto end;
+
+	if (gh_rm_get_vmid(GH_PRIMARY_VM, &self_vmid))
+		goto end;
+
+	if (peer_vmid != vm_status_payload->vmid)
+		goto end;
+
+	switch (vm_status_payload->vm_status) {
+	case GH_RM_VM_STATUS_READY:
+		HWFNC_DBG_INIT("init mem\n");
+		if (hw_fence_gunyah_share_mem(drv_data, self_vmid, peer_vmid))
+			HWFNC_ERR("failed to share memory\n");
+		break;
+	case GH_RM_VM_STATUS_RESET:
+		HWFNC_DBG_INIT("reset\n");
+		break;
+	}
+
+end:
+	return NOTIFY_DONE;
+}
+
+/* Allocates carved-out mapped memory */
+int hw_fence_utils_alloc_mem(struct hw_fence_driver_data *drv_data)
+{
+	struct device_node *node = drv_data->dev->of_node;
+	struct device_node *node_compat;
+	const char *compat = "qcom,msm-hw-fence-mem";
+	struct device *dev = drv_data->dev;
+	struct device_node *np;
+	int notifier_ret, ret;
+
+	node_compat = of_find_compatible_node(node, NULL, compat);
+	if (!node_compat) {
+		HWFNC_ERR("Failed to find dev node with compat:%s\n", compat);
+		return -EINVAL;
+	}
+
+	ret = of_property_read_u32(node_compat, "gunyah-label", &drv_data->label);
+	if (ret) {
+		HWFNC_ERR("failed to find label info %d\n", ret);
+		return ret;
+	}
+
+	np = of_parse_phandle(node_compat, "shared-buffer", 0);
+	if (!np) {
+		HWFNC_ERR("failed to read shared-buffer info\n");
+		return -ENOMEM;
+	}
+
+	ret = of_address_to_resource(np, 0, &drv_data->res);
+	of_node_put(np);
+	if (ret) {
+		HWFNC_ERR("of_address_to_resource failed %d\n", ret);
+		return -EINVAL;
+	}
+
+	drv_data->io_mem_base = devm_ioremap(dev, drv_data->res.start,
+		resource_size(&drv_data->res));
+	if (!drv_data->io_mem_base) {
+		HWFNC_ERR("ioremap failed!\n");
+		return -ENXIO;
+	}
+	drv_data->size = resource_size(&drv_data->res);
+
+	HWFNC_DBG_INIT("io_mem_base:0x%x start:0x%x end:0x%x size:0x%x name:%s\n",
+		drv_data->io_mem_base, drv_data->res.start,
+		drv_data->res.end, drv_data->size, drv_data->res.name);
+
+	memset_io(drv_data->io_mem_base, 0x0, drv_data->size);
+
+	/* Register memory with HYP */
+	ret = of_property_read_u32(node_compat, "peer-name", &drv_data->peer_name);
+	if (ret)
+		drv_data->peer_name = GH_SELF_VM;
+
+	drv_data->rm_nb.notifier_call = hw_fence_rm_cb;
+	drv_data->rm_nb.priority = INT_MAX;
+	notifier_ret = gh_rm_register_notifier(&drv_data->rm_nb);
+	HWFNC_DBG_INIT("notifier: ret:%d peer_name:%d notifier_ret:%d\n", ret,
+		drv_data->peer_name, notifier_ret);
+	if (notifier_ret) {
+		HWFNC_ERR("fail to register notifier ret:%d\n", notifier_ret);
+		return -EPROBE_DEFER;
+	}
+
+	return 0;
+}
+
+char *_get_mem_reserve_type(enum hw_fence_mem_reserve type)
+{
+	switch (type) {
+	case HW_FENCE_MEM_RESERVE_CTRL_QUEUE:
+		return "HW_FENCE_MEM_RESERVE_CTRL_QUEUE";
+	case HW_FENCE_MEM_RESERVE_LOCKS_REGION:
+		return "HW_FENCE_MEM_RESERVE_LOCKS_REGION";
+	case HW_FENCE_MEM_RESERVE_TABLE:
+		return "HW_FENCE_MEM_RESERVE_TABLE";
+	case HW_FENCE_MEM_RESERVE_CLIENT_QUEUE:
+		return "HW_FENCE_MEM_RESERVE_CLIENT_QUEUE";
+	}
+
+	return "Unknown";
+}
+
+/* Calculates the memory range for each of the elements in the carved-out memory */
+int hw_fence_utils_reserve_mem(struct hw_fence_driver_data *drv_data,
+	enum hw_fence_mem_reserve type, phys_addr_t *phys, void **pa, u32 *size, int client_id)
+{
+	int ret = 0;
+	u32 start_offset = 0;
+
+	switch (type) {
+	case HW_FENCE_MEM_RESERVE_CTRL_QUEUE:
+		start_offset = 0;
+		*size = drv_data->hw_fence_mem_ctrl_queues_size;
+		break;
+	case HW_FENCE_MEM_RESERVE_LOCKS_REGION:
+		/* Locks region starts at the end of the ctrl queues */
+		start_offset = drv_data->hw_fence_mem_ctrl_queues_size;
+		*size = HW_FENCE_MEM_LOCKS_SIZE;
+		break;
+	case HW_FENCE_MEM_RESERVE_TABLE:
+		/* HW Fence table starts at the end of the Locks region */
+		start_offset = drv_data->hw_fence_mem_ctrl_queues_size + HW_FENCE_MEM_LOCKS_SIZE;
+		*size = drv_data->hw_fence_mem_fences_table_size;
+		break;
+	case HW_FENCE_MEM_RESERVE_CLIENT_QUEUE:
+		if (client_id >= HW_FENCE_CLIENT_MAX) {
+			HWFNC_ERR("unexpected client_id:%d\n", client_id);
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		start_offset = PAGE_ALIGN(drv_data->hw_fence_mem_ctrl_queues_size +
+			HW_FENCE_MEM_LOCKS_SIZE +
+			drv_data->hw_fence_mem_fences_table_size) +
+			((client_id - 1) * drv_data->hw_fence_mem_clients_queues_size);
+		*size = drv_data->hw_fence_mem_clients_queues_size;
+
+		break;
+	default:
+		HWFNC_ERR("Invalid mem reserve type:%d\n", type);
+		ret = -EINVAL;
+		break;
+	}
+
+	if (start_offset + *size > drv_data->size) {
+		HWFNC_ERR("reservation request:%lu exceeds total size:%d\n",
+			start_offset + *size, drv_data->size);
+		return -ENOMEM;
+	}
+
+	HWFNC_DBG_INIT("type:%s (%d) io_mem_base:0x%x start:0x%x start_offset:%lu size:0x%x\n",
+		_get_mem_reserve_type(type), type, drv_data->io_mem_base, drv_data->res.start,
+		start_offset, *size);
+
+
+	*phys = drv_data->res.start + (phys_addr_t)start_offset;
+	*pa = (drv_data->io_mem_base + start_offset); /* offset is in bytes */
+	HWFNC_DBG_H("phys:0x%x pa:0x%pK\n", *phys, *pa);
+
+exit:
+	return ret;
+}
+
+int hw_fence_utils_parse_dt_props(struct hw_fence_driver_data *drv_data)
+{
+	int ret;
+	u32 val = 0;
+
+	ret = of_property_read_u32(drv_data->dev->of_node, "qcom,hw-fence-table-entries", &val);
+	if (ret || !val) {
+		HWFNC_ERR("missing hw fences table entry or invalid ret:%d val:%d\n", ret, val);
+		return ret;
+	}
+	drv_data->hw_fence_table_entries = val;
+
+	if (drv_data->hw_fence_table_entries >= U32_MAX / sizeof(struct msm_hw_fence)) {
+		HWFNC_ERR("table entries:%lu will overflow table size\n",
+			drv_data->hw_fence_table_entries);
+		return -EINVAL;
+	}
+	drv_data->hw_fence_mem_fences_table_size = (sizeof(struct msm_hw_fence) *
+		drv_data->hw_fence_table_entries);
+
+	ret = of_property_read_u32(drv_data->dev->of_node, "qcom,hw-fence-queue-entries", &val);
+	if (ret || !val) {
+		HWFNC_ERR("missing queue entries table entry or invalid ret:%d val:%d\n", ret, val);
+		return ret;
+	}
+	drv_data->hw_fence_queue_entries = val;
+
+	/* ctrl queues init */
+
+	if (drv_data->hw_fence_queue_entries >= U32_MAX / HW_FENCE_CTRL_QUEUE_PAYLOAD) {
+		HWFNC_ERR("queue entries:%lu will overflow ctrl queue size\n",
+			drv_data->hw_fence_queue_entries);
+		return -EINVAL;
+	}
+	drv_data->hw_fence_ctrl_queue_size = HW_FENCE_CTRL_QUEUE_PAYLOAD *
+		drv_data->hw_fence_queue_entries;
+
+	if (drv_data->hw_fence_ctrl_queue_size >= (U32_MAX - HW_FENCE_HFI_CTRL_HEADERS_SIZE) /
+			HW_FENCE_CTRL_QUEUES) {
+		HWFNC_ERR("queue size:%lu will overflow ctrl queue mem size\n",
+			drv_data->hw_fence_ctrl_queue_size);
+		return -EINVAL;
+	}
+	drv_data->hw_fence_mem_ctrl_queues_size = HW_FENCE_HFI_CTRL_HEADERS_SIZE +
+		(HW_FENCE_CTRL_QUEUES * drv_data->hw_fence_ctrl_queue_size);
+
+	/* clients queues init */
+
+	if (drv_data->hw_fence_queue_entries >= U32_MAX / HW_FENCE_CLIENT_QUEUE_PAYLOAD) {
+		HWFNC_ERR("queue entries:%lu will overflow client queue size\n",
+			drv_data->hw_fence_queue_entries);
+		return -EINVAL;
+	}
+	drv_data->hw_fence_client_queue_size = HW_FENCE_CLIENT_QUEUE_PAYLOAD *
+		drv_data->hw_fence_queue_entries;
+
+	if (drv_data->hw_fence_client_queue_size >= ((U32_MAX & PAGE_MASK) -
+			HW_FENCE_HFI_CLIENT_HEADERS_SIZE) / HW_FENCE_CLIENT_QUEUES) {
+		HWFNC_ERR("queue size:%lu will overflow client queue mem size\n",
+			drv_data->hw_fence_client_queue_size);
+		return -EINVAL;
+	}
+	drv_data->hw_fence_mem_clients_queues_size = PAGE_ALIGN(HW_FENCE_HFI_CLIENT_HEADERS_SIZE +
+		(HW_FENCE_CLIENT_QUEUES * drv_data->hw_fence_client_queue_size));
+
+	HWFNC_DBG_INIT("table: entries=%lu mem_size=%lu queue: entries=%lu\b",
+		drv_data->hw_fence_table_entries, drv_data->hw_fence_mem_fences_table_size,
+		drv_data->hw_fence_queue_entries);
+	HWFNC_DBG_INIT("ctrl queue: size=%lu mem_size=%lu clients queues: size=%lu mem_size=%lu\b",
+		drv_data->hw_fence_ctrl_queue_size, drv_data->hw_fence_mem_ctrl_queues_size,
+		drv_data->hw_fence_client_queue_size, drv_data->hw_fence_mem_clients_queues_size);
+
+	return 0;
+}
+
+int hw_fence_utils_map_ipcc(struct hw_fence_driver_data *drv_data)
+{
+	int ret;
+	u32 reg_config[2];
+	void __iomem *ptr;
+
+	/* Get ipcc memory range */
+	ret = of_property_read_u32_array(drv_data->dev->of_node, "qcom,ipcc-reg",
+				reg_config, 2);
+	if (ret) {
+		HWFNC_ERR("failed to read ipcc reg: %d\n", ret);
+		return ret;
+	}
+	drv_data->ipcc_reg_base = reg_config[0];
+	drv_data->ipcc_size = reg_config[1];
+
+	/* Mmap ipcc registers */
+	ptr = devm_ioremap(drv_data->dev, drv_data->ipcc_reg_base, drv_data->ipcc_size);
+	if (!ptr) {
+		HWFNC_ERR("failed to ioremap ipcc regs\n");
+		return -ENOMEM;
+	}
+	drv_data->ipcc_io_mem = ptr;
+
+	HWFNC_DBG_H("mapped address:0x%x size:0x%x io_mem:0x%pK\n",
+		drv_data->ipcc_reg_base, drv_data->ipcc_size,
+		drv_data->ipcc_io_mem);
+
+	hw_fence_ipcc_enable_signaling(drv_data);
+
+	return ret;
+}
+
+int hw_fence_utils_map_qtime(struct hw_fence_driver_data *drv_data)
+{
+	int ret = 0;
+	unsigned int reg_config[2];
+	void __iomem *ptr;
+
+	ret = of_property_read_u32_array(drv_data->dev->of_node, "qcom,qtime-reg",
+			reg_config, 2);
+	if (ret) {
+		HWFNC_ERR("failed to read qtimer reg: %d\n", ret);
+		return ret;
+	}
+
+	drv_data->qtime_reg_base = reg_config[0];
+	drv_data->qtime_size = reg_config[1];
+
+	ptr = devm_ioremap(drv_data->dev, drv_data->qtime_reg_base, drv_data->qtime_size);
+	if (!ptr) {
+		HWFNC_ERR("failed to ioremap qtime regs\n");
+		return -ENOMEM;
+	}
+
+	drv_data->qtime_io_mem = ptr;
+
+	return ret;
+}
+
+static int _map_ctl_start(struct hw_fence_driver_data *drv_data, u32 ctl_id,
+	void **iomem_ptr, uint32_t *iomem_size)
+{
+	u32 reg_config[2];
+	void __iomem *ptr;
+	char name[30] = {0};
+	int ret;
+
+	snprintf(name, sizeof(name), "qcom,dpu-ctl-start-%d-reg", ctl_id);
+	ret = of_property_read_u32_array(drv_data->dev->of_node, name, reg_config, 2);
+	if (ret)
+		return 0; /* this is an optional property */
+
+	/* Mmap registers */
+	ptr = devm_ioremap(drv_data->dev, reg_config[0], reg_config[1]);
+	if (!ptr) {
+		HWFNC_ERR("failed to ioremap %s reg\n", name);
+		return -ENOMEM;
+	}
+
+	*iomem_ptr = ptr;
+	*iomem_size = reg_config[1];
+
+	HWFNC_DBG_INIT("mapped ctl_start ctl_id:%d name:%s address:0x%x size:0x%x io_mem:0x%pK\n",
+		ctl_id, name, reg_config[0], reg_config[1], ptr);
+
+	return 0;
+}
+
+int hw_fence_utils_map_ctl_start(struct hw_fence_driver_data *drv_data)
+{
+	u32 ctl_id = HW_FENCE_LOOPBACK_DPU_CTL_0;
+
+	for (; ctl_id <= HW_FENCE_LOOPBACK_DPU_CTL_5; ctl_id++) {
+		if (_map_ctl_start(drv_data, ctl_id, &drv_data->ctl_start_ptr[ctl_id],
+			&drv_data->ctl_start_size[ctl_id])) {
+			HWFNC_ERR("cannot map ctl_start ctl_id:%d\n", ctl_id);
+		} else {
+			if (drv_data->ctl_start_ptr[ctl_id])
+				HWFNC_DBG_INIT("mapped ctl_id:%d ctl_start_ptr:0x%pK size:%u\n",
+					ctl_id, drv_data->ctl_start_ptr[ctl_id],
+					drv_data->ctl_start_size[ctl_id]);
+		}
+	}
+
+	return 0;
+}

+ 486 - 0
hw_fence/src/msm_hw_fence.c

@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+
+#include "hw_fence_drv_priv.h"
+#include "hw_fence_drv_utils.h"
+#include "hw_fence_drv_debug.h"
+#include "hw_fence_drv_ipc.h"
+
+struct hw_fence_driver_data *hw_fence_drv_data;
+
+void *msm_hw_fence_register(enum hw_fence_client_id client_id,
+	struct msm_hw_fence_mem_addr *mem_descriptor)
+{
+	struct msm_hw_fence_client *hw_fence_client;
+	int ret;
+
+	HWFNC_DBG_H("++ client_id:%d\n", client_id);
+
+	if (IS_ERR_OR_NULL(hw_fence_drv_data) || !hw_fence_drv_data->resources_ready) {
+		HWFNC_ERR("hw fence driver not ready\n");
+		return ERR_PTR(-EAGAIN);
+	}
+
+	if (!mem_descriptor || client_id >= HW_FENCE_CLIENT_MAX) {
+		HWFNC_ERR("Invalid params: %d client_id:%d\n",
+			!mem_descriptor, client_id);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Avoid race condition if multiple-threads request same client at same time */
+	mutex_lock(&hw_fence_drv_data->clients_mask_lock);
+	if (hw_fence_drv_data->client_id_mask & BIT(client_id)) {
+		HWFNC_ERR("client with id %d already registered\n", client_id);
+		mutex_unlock(&hw_fence_drv_data->clients_mask_lock);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Mark client as registered */
+	hw_fence_drv_data->client_id_mask |= BIT(client_id);
+	mutex_unlock(&hw_fence_drv_data->clients_mask_lock);
+
+	/* Alloc client handle */
+	hw_fence_client =  kzalloc(sizeof(*hw_fence_client), GFP_KERNEL);
+	if (!hw_fence_client) {
+		mutex_lock(&hw_fence_drv_data->clients_mask_lock);
+		hw_fence_drv_data->client_id_mask &= ~BIT(client_id);
+		mutex_unlock(&hw_fence_drv_data->clients_mask_lock);
+		return ERR_PTR(-ENOMEM);
+	}
+	hw_fence_client->client_id = client_id;
+	hw_fence_client->ipc_client_id = hw_fence_ipcc_get_client_id(hw_fence_drv_data, client_id);
+
+	if (hw_fence_client->ipc_client_id <= 0) {
+		HWFNC_ERR("Failed to find client:%d ipc id\n", client_id);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	hw_fence_client->ipc_signal_id = hw_fence_ipcc_get_signal_id(hw_fence_drv_data, client_id);
+	if (hw_fence_client->ipc_signal_id < 0) {
+		HWFNC_ERR("Failed to find client:%d signal\n", client_id);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	hw_fence_client->update_rxq = hw_fence_ipcc_needs_rxq_update(hw_fence_drv_data, client_id);
+
+	hw_fence_drv_data->clients[client_id] = hw_fence_client;
+
+	/* Alloc Client HFI Headers and Queues */
+	ret = hw_fence_alloc_client_resources(hw_fence_drv_data,
+		hw_fence_client, mem_descriptor);
+	if (ret)
+		goto error;
+
+	/* Initialize signal for communication withe FenceCTL */
+	ret = hw_fence_init_controller_signal(hw_fence_drv_data, hw_fence_client);
+	if (ret)
+		goto error;
+
+	/*
+	 * Update Fence Controller with the address of the Queues and
+	 * the Fences Tables for this client
+	 */
+	ret = hw_fence_init_controller_resources(hw_fence_client);
+	if (ret)
+		goto error;
+
+	HWFNC_DBG_INIT("-- Initialized ptr:0x%p client_id:%d ipc_signal_id:%d ipc_client_id:%d\n",
+		hw_fence_client, hw_fence_client->client_id, hw_fence_client->ipc_signal_id,
+		hw_fence_client->ipc_client_id);
+
+	return (void *)hw_fence_client;
+error:
+
+	/* Free all the allocated resources */
+	hw_fence_cleanup_client(hw_fence_drv_data, hw_fence_client);
+
+	HWFNC_ERR("failed with error:%d\n", ret);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(msm_hw_fence_register);
+
+int msm_hw_fence_deregister(void *client_handle)
+{
+	struct msm_hw_fence_client *hw_fence_client;
+
+	if (IS_ERR_OR_NULL(client_handle)) {
+		HWFNC_ERR("Invalid client handle\n");
+		return -EINVAL;
+	}
+	hw_fence_client = (struct msm_hw_fence_client *)client_handle;
+
+	HWFNC_DBG_H("+\n");
+
+	/* Free all the allocated resources */
+	hw_fence_cleanup_client(hw_fence_drv_data, hw_fence_client);
+
+	HWFNC_DBG_H("-\n");
+
+	return 0;
+}
+EXPORT_SYMBOL(msm_hw_fence_deregister);
+
+int msm_hw_fence_create(void *client_handle,
+	struct msm_hw_fence_create_params *params)
+{
+	struct msm_hw_fence_client *hw_fence_client;
+	struct dma_fence_array *array;
+	struct dma_fence *fence;
+	int ret;
+
+	if (IS_ERR_OR_NULL(client_handle) || !params || !params->fence || !params->handle) {
+		HWFNC_ERR("Invalid input\n");
+		return -EINVAL;
+	}
+	hw_fence_client = (struct msm_hw_fence_client *)client_handle;
+	fence = (struct dma_fence *)params->fence;
+
+	HWFNC_DBG_H("+\n");
+
+	/* Block any Fence-Array, we should only get individual fences */
+	array = to_dma_fence_array(fence);
+	if (array) {
+		HWFNC_ERR("HW Fence must be created for individual fences\n");
+		return -EINVAL;
+	}
+
+	/* This Fence is already a HW-Fence */
+	if (test_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fence->flags)) {
+		HWFNC_ERR("DMA Fence already has HW Fence Flag set\n");
+		return -EINVAL;
+	}
+
+	/* Create the HW Fence, i.e. add entry in the Global Table for this Fence */
+	ret = hw_fence_create(hw_fence_drv_data, hw_fence_client,
+		  fence->context, fence->seqno, params->handle);
+	if (ret) {
+		HWFNC_ERR("Error creating HW fence\n");
+		return ret;
+	}
+
+	/* If no error, set the HW Fence Flag in the dma-fence */
+	set_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fence->flags);
+
+	HWFNC_DBG_H("-\n");
+
+	return 0;
+}
+EXPORT_SYMBOL(msm_hw_fence_create);
+
+int msm_hw_fence_destroy(void *client_handle,
+	struct dma_fence *fence)
+{
+	struct msm_hw_fence_client *hw_fence_client;
+	struct dma_fence_array *array;
+	int ret;
+
+	if (IS_ERR_OR_NULL(client_handle) || !fence) {
+		HWFNC_ERR("Invalid data\n");
+		return -EINVAL;
+	}
+	hw_fence_client = (struct msm_hw_fence_client *)client_handle;
+
+	HWFNC_DBG_H("+\n");
+
+	/* Block any Fence-Array, we should only get individual fences */
+	array = to_dma_fence_array(fence);
+	if (array) {
+		HWFNC_ERR("HW Fence must be destroy for individual fences\n");
+		return -EINVAL;
+	}
+
+	/* This Fence not a HW-Fence */
+	if (!test_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fence->flags)) {
+		HWFNC_ERR("DMA Fence is not a HW Fence flags:0x%llx\n", fence->flags);
+		return -EINVAL;
+	}
+
+	/* Destroy the HW Fence, i.e. remove entry in the Global Table for the Fence */
+	ret = hw_fence_destroy(hw_fence_drv_data, hw_fence_client,
+		fence->context, fence->seqno);
+	if (ret) {
+		HWFNC_ERR("Error destroying the HW fence\n");
+		return ret;
+	}
+
+	/* Clear the HW Fence Flag in the dma-fence */
+	clear_bit(MSM_HW_FENCE_FLAG_ENABLED_BIT, &fence->flags);
+
+	HWFNC_DBG_H("-\n");
+
+	return 0;
+}
+EXPORT_SYMBOL(msm_hw_fence_destroy);
+
+int msm_hw_fence_wait_update(void *client_handle,
+	struct dma_fence **fence_list, u32 num_fences, bool create)
+{
+	struct msm_hw_fence_client *hw_fence_client;
+	struct dma_fence_array *array;
+	int i, ret = 0;
+
+	if (IS_ERR_OR_NULL(client_handle) || !fence_list || !*fence_list) {
+		HWFNC_ERR("Invalid data\n");
+		return -EINVAL;
+	}
+	hw_fence_client = (struct msm_hw_fence_client *)client_handle;
+
+	HWFNC_DBG_H("+\n");
+
+	/* Process all the list of fences */
+	for (i = 0; i < num_fences; i++) {
+		struct dma_fence *fence = fence_list[i];
+
+		/* Process a Fence-Array */
+		array = to_dma_fence_array(fence);
+		if (array) {
+			ret = hw_fence_process_fence_array(hw_fence_drv_data, hw_fence_client,
+				array);
+			if (ret) {
+				HWFNC_ERR("Failed to create FenceArray\n");
+				return ret;
+			}
+		} else {
+			/* Process individual Fence */
+			ret = hw_fence_process_fence(hw_fence_drv_data, hw_fence_client, fence);
+			if (ret) {
+				HWFNC_ERR("Failed to create Fence\n");
+				return ret;
+			}
+		}
+	}
+
+	HWFNC_DBG_H("-\n");
+
+	return 0;
+}
+EXPORT_SYMBOL(msm_hw_fence_wait_update);
+
+int msm_hw_fence_reset_client(void *client_handle, u32 reset_flags)
+{
+	struct msm_hw_fence_client *hw_fence_client;
+	struct msm_hw_fence *hw_fences_tbl;
+	int i;
+
+	if (IS_ERR_OR_NULL(client_handle)) {
+		HWFNC_ERR("Invalid client handle!\n");
+		return -EINVAL;
+	}
+	hw_fence_client = (struct msm_hw_fence_client *)client_handle;
+	hw_fences_tbl = hw_fence_drv_data->hw_fences_tbl;
+
+	HWFNC_DBG_L("reset fences for client:%d\n", hw_fence_client->client_id);
+	for (i = 0; i < hw_fence_drv_data->hw_fences_tbl_cnt; i++)
+		hw_fence_utils_cleanup_fence(hw_fence_drv_data, hw_fence_client,
+			&hw_fences_tbl[i], i, reset_flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(msm_hw_fence_reset_client);
+
+int msm_hw_fence_update_txq(void *client_handle, u64 handle, u64 flags, u32 error)
+{
+	struct msm_hw_fence_client *hw_fence_client;
+
+	if (IS_ERR_OR_NULL(hw_fence_drv_data) || !hw_fence_drv_data->resources_ready) {
+		HWFNC_ERR("hw fence driver not ready\n");
+		return -EAGAIN;
+	} else if (IS_ERR_OR_NULL(client_handle) ||
+			(handle >= hw_fence_drv_data->hw_fences_tbl_cnt)) {
+		HWFNC_ERR("Invalid handle:%d or client handle:%d max:%d\n", handle,
+			IS_ERR_OR_NULL(client_handle), hw_fence_drv_data->hw_fences_tbl_cnt);
+		return -EINVAL;
+	}
+	hw_fence_client = (struct msm_hw_fence_client *)client_handle;
+
+	/* Write to Tx queue */
+	hw_fence_update_queue(hw_fence_drv_data, hw_fence_client,
+		hw_fence_drv_data->hw_fences_tbl[handle].ctx_id,
+		hw_fence_drv_data->hw_fences_tbl[handle].seq_id, handle,
+		flags, error, HW_FENCE_TX_QUEUE - 1);
+
+	return 0;
+}
+EXPORT_SYMBOL(msm_hw_fence_update_txq);
+
+int msm_hw_fence_trigger_signal(void *client_handle,
+	u32 tx_client_id, u32 rx_client_id,
+	u32 signal_id)
+{
+	struct msm_hw_fence_client *hw_fence_client;
+
+	if (IS_ERR_OR_NULL(hw_fence_drv_data) || !hw_fence_drv_data->resources_ready) {
+		HWFNC_ERR("hw fence driver not ready\n");
+		return -EAGAIN;
+	} else if (IS_ERR_OR_NULL(client_handle)) {
+		HWFNC_ERR("Invalid client\n");
+		return -EINVAL;
+	}
+	hw_fence_client = (struct msm_hw_fence_client *)client_handle;
+
+	HWFNC_DBG_H("sending ipc for client:%d\n", hw_fence_client->client_id);
+	hw_fence_ipcc_trigger_signal(hw_fence_drv_data, tx_client_id,
+		rx_client_id, signal_id);
+
+	return 0;
+}
+EXPORT_SYMBOL(msm_hw_fence_trigger_signal);
+
+/* Function used for simulation purposes only. */
+int msm_hw_fence_driver_doorbell_sim(u64 db_mask)
+{
+	if (IS_ERR_OR_NULL(hw_fence_drv_data) || !hw_fence_drv_data->resources_ready) {
+		HWFNC_ERR("hw fence driver not ready\n");
+		return -EAGAIN;
+	}
+
+	HWFNC_DBG_IRQ("db callback sim-mode flags:0x%llx qtime:%llu\n",
+		db_mask, hw_fence_get_qtime(hw_fence_drv_data));
+
+	hw_fence_utils_process_doorbell_mask(hw_fence_drv_data, db_mask);
+
+	return 0;
+}
+EXPORT_SYMBOL(msm_hw_fence_driver_doorbell_sim);
+
+static int msm_hw_fence_probe_init(struct platform_device *pdev)
+{
+	int rc;
+
+	HWFNC_DBG_H("+\n");
+
+	hw_fence_drv_data = kzalloc(sizeof(*hw_fence_drv_data), GFP_KERNEL);
+	if (!hw_fence_drv_data)
+		return -ENOMEM;
+
+	dev_set_drvdata(&pdev->dev, hw_fence_drv_data);
+	hw_fence_drv_data->dev = &pdev->dev;
+
+	/* Initialize HW Fence Driver resources */
+	rc = hw_fence_init(hw_fence_drv_data);
+	if (rc)
+		goto error;
+
+	mutex_init(&hw_fence_drv_data->clients_mask_lock);
+
+	/* set ready ealue so clients can register */
+	hw_fence_drv_data->resources_ready = true;
+
+	HWFNC_DBG_H("-\n");
+
+	return rc;
+
+error:
+	dev_set_drvdata(&pdev->dev, NULL);
+	kfree(hw_fence_drv_data);
+	hw_fence_drv_data = (void *) -EPROBE_DEFER;
+
+	HWFNC_ERR("error %d\n", rc);
+	return rc;
+}
+
+static int msm_hw_fence_probe(struct platform_device *pdev)
+{
+	int rc = -EINVAL;
+
+	HWFNC_DBG_H("+\n");
+
+	if (!pdev) {
+		HWFNC_ERR("null platform dev\n");
+		return -EINVAL;
+	}
+
+	if (of_device_is_compatible(pdev->dev.of_node, "qcom,msm-hw-fence"))
+		rc = msm_hw_fence_probe_init(pdev);
+	if (rc)
+		goto err_exit;
+
+	HWFNC_DBG_H("-\n");
+
+	return 0;
+
+err_exit:
+	HWFNC_ERR("error %d\n", rc);
+	return rc;
+}
+
+static int msm_hw_fence_remove(struct platform_device *pdev)
+{
+	HWFNC_DBG_H("+\n");
+
+	if (!pdev) {
+		HWFNC_ERR("null platform dev\n");
+		return -EINVAL;
+	}
+
+	hw_fence_drv_data = dev_get_drvdata(&pdev->dev);
+	if (!hw_fence_drv_data) {
+		HWFNC_ERR("null driver data\n");
+		return -EINVAL;
+	}
+
+	dev_set_drvdata(&pdev->dev, NULL);
+	kfree(hw_fence_drv_data);
+	hw_fence_drv_data = (void *) -EPROBE_DEFER;
+
+	HWFNC_DBG_H("-\n");
+
+	return 0;
+}
+
+static const struct of_device_id msm_hw_fence_dt_match[] = {
+	{.compatible = "qcom,msm-hw-fence"},
+	{}
+};
+
+static struct platform_driver msm_hw_fence_driver = {
+	.probe = msm_hw_fence_probe,
+	.remove = msm_hw_fence_remove,
+	.driver = {
+		.name = "msm-hw-fence",
+		.of_match_table = of_match_ptr(msm_hw_fence_dt_match),
+	},
+};
+
+static int __init msm_hw_fence_init(void)
+{
+	int rc = 0;
+
+	HWFNC_DBG_H("+\n");
+
+	rc = platform_driver_register(&msm_hw_fence_driver);
+	if (rc) {
+		HWFNC_ERR("%s: failed to register platform driver\n",
+			__func__);
+		return rc;
+	}
+
+	HWFNC_DBG_H("-\n");
+
+	return 0;
+}
+
+static void __exit msm_hw_fence_exit(void)
+{
+	HWFNC_DBG_H("+\n");
+
+	platform_driver_unregister(&msm_hw_fence_driver);
+
+	HWFNC_DBG_H("-\n");
+}
+
+module_init(msm_hw_fence_init);
+module_exit(msm_hw_fence_exit);
+
+MODULE_DESCRIPTION("QTI HW Fence Driver");
+MODULE_LICENSE("GPL v2");

+ 6 - 3
mm_driver_board.mk

@@ -2,9 +2,12 @@
 
 ifneq ($(TARGET_BOARD_AUTO),true)
 	ifeq ($(call is-board-platform-in-list,$(TARGET_BOARD_PLATFORM)),true)
-		BOARD_VENDOR_KERNEL_MODULES += $(KERNEL_MODULES_OUT)/msm_ext_display.ko
-		BOARD_VENDOR_RAMDISK_KERNEL_MODULES += $(KERNEL_MODULES_OUT)/msm_ext_display.ko
-		BOARD_VENDOR_RAMDISK_RECOVERY_KERNEL_MODULES_LOAD += $(KERNEL_MODULES_OUT)/msm_ext_display.ko
+		BOARD_VENDOR_KERNEL_MODULES += $(KERNEL_MODULES_OUT)/msm_ext_display.ko \
+					       $(KERNEL_MODULES_OUT)/msm_hw_fence.ko
+		BOARD_VENDOR_RAMDISK_KERNEL_MODULES += $(KERNEL_MODULES_OUT)/msm_ext_display.ko \
+						       $(KERNEL_MODULES_OUT)/msm_hw_fence.ko
+		BOARD_VENDOR_RAMDISK_RECOVERY_KERNEL_MODULES_LOAD += $(KERNEL_MODULES_OUT)/msm_ext_display.ko \
+					                             $(KERNEL_MODULES_OUT)/msm_hw_fence.ko
 
 		ifneq ($(TARGET_BOARD_PLATFORM), taro)
 		BOARD_VENDOR_KERNEL_MODULES += $(KERNEL_MODULES_OUT)/sync_fence.ko

+ 1 - 1
mm_driver_product.mk

@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
-PRODUCT_PACKAGES += msm_ext_display.ko
+PRODUCT_PACKAGES += msm_ext_display.ko msm_hw_fence.ko
 
 ifneq ($(TARGET_BOARD_PLATFORM), taro)
 PRODUCT_PACKAGES += sync_fence.ko