Merge tag 'drm-next-5.6-2019-12-11' of git://people.freedesktop.org/~agd5f/linux into drm-next
drm-next-5.6-2019-12-11: amdgpu: - Add MST atomic routines - Add support for DMCUB (new helper microengine for displays) - Add OEM i2c support in DC - Use vstartup for vblank events on DCN - Simplify Kconfig for DC - Renoir fixes for DC - Clean up function pointers in DC - Initial support for HDCP 2.x - Misc code cleanups - GFX10 fixes - Rework JPEG engine handling for VCN - Add clock and power gating support for JPEG - BACO support for Arcturus - Cleanup PSP ring handling - Add framework for using BACO with runtime pm to save power - Move core pci state handling out of the driver for pm ops - Allow guest power control in 1 VF case with SR-IOV - SR-IOV fixes - RAS fixes - Support for power metrics on renoir - Golden settings updates for gfx10 - Enable gfxoff on supported navi10 skus - Update MAINTAINERS amdkfd: - Clean up generational gfx code - Fixes for gfx10 - DIQ fixes - Share more code with amdgpu radeon: - PPC DMA fix - Register checker fixes for r1xx/r2xx - Misc cleanups From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191211223020.7510-1-alexander.deucher@amd.com
This commit is contained in:
@@ -38,11 +38,9 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
|
||||
$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
|
||||
$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
|
||||
$(AMDKFD_PATH)/kfd_kernel_queue.o \
|
||||
$(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
|
||||
$(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
|
||||
$(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
|
||||
$(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
|
||||
$(AMDKFD_PATH)/kfd_packet_manager.o \
|
||||
$(AMDKFD_PATH)/kfd_packet_manager_vi.o \
|
||||
$(AMDKFD_PATH)/kfd_packet_manager_v9.o \
|
||||
$(AMDKFD_PATH)/kfd_process_queue_manager.o \
|
||||
$(AMDKFD_PATH)/kfd_device_queue_manager.o \
|
||||
$(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \
|
||||
|
@@ -258,6 +258,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
|
||||
unsigned int queue_id;
|
||||
struct kfd_process_device *pdd;
|
||||
struct queue_properties q_properties;
|
||||
uint32_t doorbell_offset_in_process = 0;
|
||||
|
||||
memset(&q_properties, 0, sizeof(struct queue_properties));
|
||||
|
||||
@@ -286,7 +287,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
|
||||
p->pasid,
|
||||
dev->id);
|
||||
|
||||
err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
|
||||
err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
|
||||
&doorbell_offset_in_process);
|
||||
if (err != 0)
|
||||
goto err_create_queue;
|
||||
|
||||
@@ -296,14 +298,11 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
|
||||
/* Return gpu_id as doorbell offset for mmap usage */
|
||||
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
|
||||
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
|
||||
args->doorbell_offset <<= PAGE_SHIFT;
|
||||
if (KFD_IS_SOC15(dev->device_info->asic_family))
|
||||
/* On SOC15 ASICs, doorbell allocation must be
|
||||
* per-device, and independent from the per-process
|
||||
* queue_id. Return the doorbell offset within the
|
||||
* doorbell aperture to user mode.
|
||||
/* On SOC15 ASICs, include the doorbell offset within the
|
||||
* process doorbell frame, which is 2 pages.
|
||||
*/
|
||||
args->doorbell_offset |= q_properties.doorbell_off;
|
||||
args->doorbell_offset |= doorbell_offset_in_process;
|
||||
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
@@ -1312,10 +1311,9 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
||||
/* MMIO is mapped through kfd device
|
||||
* Generate a kfd mmap offset
|
||||
*/
|
||||
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
|
||||
args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id);
|
||||
args->mmap_offset <<= PAGE_SHIFT;
|
||||
}
|
||||
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
|
||||
args->mmap_offset = KFD_MMAP_TYPE_MMIO
|
||||
| KFD_MMAP_GPU_ID(args->gpu_id);
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -1899,20 +1897,19 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
struct kfd_process *process;
|
||||
struct kfd_dev *dev = NULL;
|
||||
unsigned long vm_pgoff;
|
||||
unsigned long mmap_offset;
|
||||
unsigned int gpu_id;
|
||||
|
||||
process = kfd_get_process(current);
|
||||
if (IS_ERR(process))
|
||||
return PTR_ERR(process);
|
||||
|
||||
vm_pgoff = vma->vm_pgoff;
|
||||
vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
|
||||
gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
|
||||
mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
|
||||
gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
|
||||
if (gpu_id)
|
||||
dev = kfd_device_by_id(gpu_id);
|
||||
|
||||
switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
|
||||
switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
|
||||
case KFD_MMAP_TYPE_DOORBELL:
|
||||
if (!dev)
|
||||
return -ENODEV;
|
||||
|
@@ -72,11 +72,11 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
|
||||
* The receive packet buff will be sitting on the Indirect Buffer
|
||||
* and in the PQ we put the IB packet + sync packet(s).
|
||||
*/
|
||||
status = kq->ops.acquire_packet_buffer(kq,
|
||||
status = kq_acquire_packet_buffer(kq,
|
||||
pq_packets_size_in_bytes / sizeof(uint32_t),
|
||||
&ib_packet_buff);
|
||||
if (status) {
|
||||
pr_err("acquire_packet_buffer failed\n");
|
||||
pr_err("kq_acquire_packet_buffer failed\n");
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -115,7 +115,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
|
||||
|
||||
if (status) {
|
||||
pr_err("Failed to allocate GART memory\n");
|
||||
kq->ops.rollback_packet(kq);
|
||||
kq_rollback_packet(kq);
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -151,7 +151,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
|
||||
|
||||
rm_packet->data_lo = QUEUESTATE__ACTIVE;
|
||||
|
||||
kq->ops.submit_packet(kq);
|
||||
kq_submit_packet(kq);
|
||||
|
||||
/* Wait till CP writes sync code: */
|
||||
status = amdkfd_fence_wait_timeout(
|
||||
@@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
|
||||
properties.type = KFD_QUEUE_TYPE_DIQ;
|
||||
|
||||
status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
|
||||
&properties, &qid);
|
||||
&properties, &qid, NULL);
|
||||
|
||||
if (status) {
|
||||
pr_err("Failed to create DIQ\n");
|
||||
|
@@ -742,7 +742,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
|
||||
|
||||
int kgd2kfd_post_reset(struct kfd_dev *kfd)
|
||||
{
|
||||
int ret, count;
|
||||
int ret;
|
||||
|
||||
if (!kfd->init_complete)
|
||||
return 0;
|
||||
@@ -750,7 +750,7 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
|
||||
ret = kfd_resume(kfd);
|
||||
if (ret)
|
||||
return ret;
|
||||
count = atomic_dec_return(&kfd_locked);
|
||||
atomic_dec(&kfd_locked);
|
||||
|
||||
atomic_set(&kfd->sram_ecc_flag, 0);
|
||||
|
||||
|
@@ -170,7 +170,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
|
||||
}
|
||||
|
||||
q->properties.doorbell_off =
|
||||
kfd_doorbell_id_to_offset(dev, q->process,
|
||||
kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
|
||||
q->doorbell_id);
|
||||
|
||||
return 0;
|
||||
@@ -1595,7 +1595,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
|
||||
goto dqm_unlock;
|
||||
}
|
||||
|
||||
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
|
||||
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
|
||||
|
||||
if (!mqd_mgr->get_wave_state) {
|
||||
r = -EINVAL;
|
||||
|
@@ -91,7 +91,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
|
||||
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
|
||||
doorbell_start_offset;
|
||||
|
||||
kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
|
||||
kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
|
||||
|
||||
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
|
||||
kfd_doorbell_process_slice(kfd));
|
||||
@@ -103,8 +103,8 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
|
||||
pr_debug("doorbell base == 0x%08lX\n",
|
||||
(uintptr_t)kfd->doorbell_base);
|
||||
|
||||
pr_debug("doorbell_id_offset == 0x%08lX\n",
|
||||
kfd->doorbell_id_offset);
|
||||
pr_debug("doorbell_base_dw_offset == 0x%08lX\n",
|
||||
kfd->doorbell_base_dw_offset);
|
||||
|
||||
pr_debug("doorbell_process_limit == 0x%08lX\n",
|
||||
doorbell_process_limit);
|
||||
@@ -185,7 +185,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
||||
* Calculating the kernel doorbell offset using the first
|
||||
* doorbell page.
|
||||
*/
|
||||
*doorbell_off = kfd->doorbell_id_offset + inx;
|
||||
*doorbell_off = kfd->doorbell_base_dw_offset + inx;
|
||||
|
||||
pr_debug("Get kernel queue doorbell\n"
|
||||
" doorbell offset == 0x%08X\n"
|
||||
@@ -225,17 +225,17 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
|
||||
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
|
||||
struct kfd_process *process,
|
||||
unsigned int doorbell_id)
|
||||
{
|
||||
/*
|
||||
* doorbell_id_offset accounts for doorbells taken by KGD.
|
||||
* doorbell_base_dw_offset accounts for doorbells taken by KGD.
|
||||
* index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
|
||||
* the process's doorbells. The offset returned is in dword
|
||||
* units regardless of the ASIC-dependent doorbell size.
|
||||
*/
|
||||
return kfd->doorbell_id_offset +
|
||||
return kfd->doorbell_base_dw_offset +
|
||||
process->doorbell_index
|
||||
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
|
||||
doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
|
||||
|
@@ -346,7 +346,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
|
||||
ret = create_signal_event(devkfd, p, ev);
|
||||
if (!ret) {
|
||||
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
|
||||
*event_page_offset <<= PAGE_SHIFT;
|
||||
*event_slot_index = ev->event_id;
|
||||
}
|
||||
break;
|
||||
|
@@ -62,9 +62,6 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
|
||||
struct amd_iommu_device_info iommu_info;
|
||||
unsigned int pasid_limit;
|
||||
int err;
|
||||
struct kfd_topology_device *top_dev;
|
||||
|
||||
top_dev = kfd_topology_device_by_id(kfd->id);
|
||||
|
||||
if (!kfd->device_info->needs_iommu_device)
|
||||
return 0;
|
||||
|
@@ -34,7 +34,10 @@
|
||||
|
||||
#define PM4_COUNT_ZERO (((1 << 15) - 1) << 16)
|
||||
|
||||
static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
/* Initialize a kernel queue, including allocations of GART memory
|
||||
* needed for the queue.
|
||||
*/
|
||||
static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size)
|
||||
{
|
||||
struct queue_properties prop;
|
||||
@@ -87,9 +90,17 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
kq->pq_kernel_addr = kq->pq->cpu_ptr;
|
||||
kq->pq_gpu_addr = kq->pq->gpu_addr;
|
||||
|
||||
retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size);
|
||||
if (!retval)
|
||||
goto err_eop_allocate_vidmem;
|
||||
/* For CIK family asics, kq->eop_mem is not needed */
|
||||
if (dev->device_info->asic_family > CHIP_MULLINS) {
|
||||
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
|
||||
if (retval != 0)
|
||||
goto err_eop_allocate_vidmem;
|
||||
|
||||
kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
|
||||
kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
|
||||
|
||||
memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
|
||||
}
|
||||
|
||||
retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
|
||||
&kq->rptr_mem);
|
||||
@@ -183,7 +194,8 @@ err_get_kernel_doorbell:
|
||||
|
||||
}
|
||||
|
||||
static void uninitialize(struct kernel_queue *kq)
|
||||
/* Uninitialize a kernel queue and free all its memory usages. */
|
||||
static void kq_uninitialize(struct kernel_queue *kq)
|
||||
{
|
||||
if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
|
||||
kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
|
||||
@@ -200,14 +212,19 @@ static void uninitialize(struct kernel_queue *kq)
|
||||
|
||||
kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
|
||||
kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
|
||||
kq->ops_asic_specific.uninitialize(kq);
|
||||
|
||||
/* For CIK family asics, kq->eop_mem is Null, kfd_gtt_sa_free()
|
||||
* is able to handle NULL properly.
|
||||
*/
|
||||
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
|
||||
|
||||
kfd_gtt_sa_free(kq->dev, kq->pq);
|
||||
kfd_release_kernel_doorbell(kq->dev,
|
||||
kq->queue->properties.doorbell_ptr);
|
||||
uninit_queue(kq->queue);
|
||||
}
|
||||
|
||||
static int acquire_packet_buffer(struct kernel_queue *kq,
|
||||
int kq_acquire_packet_buffer(struct kernel_queue *kq,
|
||||
size_t packet_size_in_dwords, unsigned int **buffer_ptr)
|
||||
{
|
||||
size_t available_size;
|
||||
@@ -268,7 +285,7 @@ err_no_space:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void submit_packet(struct kernel_queue *kq)
|
||||
void kq_submit_packet(struct kernel_queue *kq)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
int i;
|
||||
@@ -280,11 +297,18 @@ static void submit_packet(struct kernel_queue *kq)
|
||||
}
|
||||
pr_debug("\n");
|
||||
#endif
|
||||
|
||||
kq->ops_asic_specific.submit_packet(kq);
|
||||
if (kq->dev->device_info->doorbell_size == 8) {
|
||||
*kq->wptr64_kernel = kq->pending_wptr64;
|
||||
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
|
||||
kq->pending_wptr64);
|
||||
} else {
|
||||
*kq->wptr_kernel = kq->pending_wptr;
|
||||
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
|
||||
kq->pending_wptr);
|
||||
}
|
||||
}
|
||||
|
||||
static void rollback_packet(struct kernel_queue *kq)
|
||||
void kq_rollback_packet(struct kernel_queue *kq)
|
||||
{
|
||||
if (kq->dev->device_info->doorbell_size == 8) {
|
||||
kq->pending_wptr64 = *kq->wptr64_kernel;
|
||||
@@ -304,60 +328,18 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
|
||||
if (!kq)
|
||||
return NULL;
|
||||
|
||||
kq->ops.initialize = initialize;
|
||||
kq->ops.uninitialize = uninitialize;
|
||||
kq->ops.acquire_packet_buffer = acquire_packet_buffer;
|
||||
kq->ops.submit_packet = submit_packet;
|
||||
kq->ops.rollback_packet = rollback_packet;
|
||||
|
||||
switch (dev->device_info->asic_family) {
|
||||
case CHIP_CARRIZO:
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
kernel_queue_init_vi(&kq->ops_asic_specific);
|
||||
break;
|
||||
|
||||
case CHIP_KAVERI:
|
||||
case CHIP_HAWAII:
|
||||
kernel_queue_init_cik(&kq->ops_asic_specific);
|
||||
break;
|
||||
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_RAVEN:
|
||||
case CHIP_RENOIR:
|
||||
case CHIP_ARCTURUS:
|
||||
kernel_queue_init_v9(&kq->ops_asic_specific);
|
||||
break;
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI12:
|
||||
case CHIP_NAVI14:
|
||||
kernel_queue_init_v10(&kq->ops_asic_specific);
|
||||
break;
|
||||
default:
|
||||
WARN(1, "Unexpected ASIC family %u",
|
||||
dev->device_info->asic_family);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
|
||||
if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
|
||||
return kq;
|
||||
|
||||
pr_err("Failed to init kernel queue\n");
|
||||
|
||||
out_free:
|
||||
kfree(kq);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void kernel_queue_uninit(struct kernel_queue *kq)
|
||||
{
|
||||
kq->ops.uninitialize(kq);
|
||||
kq_uninitialize(kq);
|
||||
kfree(kq);
|
||||
}
|
||||
|
||||
@@ -377,7 +359,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
|
||||
return;
|
||||
}
|
||||
|
||||
retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer);
|
||||
retval = kq_acquire_packet_buffer(kq, 5, &buffer);
|
||||
if (unlikely(retval != 0)) {
|
||||
pr_err(" Failed to acquire packet buffer\n");
|
||||
pr_err("Kernel queue test failed\n");
|
||||
@@ -385,7 +367,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
|
||||
}
|
||||
for (i = 0; i < 5; i++)
|
||||
buffer[i] = kq->nop_packet;
|
||||
kq->ops.submit_packet(kq);
|
||||
kq_submit_packet(kq);
|
||||
|
||||
pr_err("Ending kernel queue test\n");
|
||||
}
|
||||
|
@@ -29,45 +29,28 @@
|
||||
#include "kfd_priv.h"
|
||||
|
||||
/**
|
||||
* struct kernel_queue_ops
|
||||
*
|
||||
* @initialize: Initialize a kernel queue, including allocations of GART memory
|
||||
* needed for the queue.
|
||||
*
|
||||
* @uninitialize: Uninitialize a kernel queue and free all its memory usages.
|
||||
*
|
||||
* @acquire_packet_buffer: Returns a pointer to the location in the kernel
|
||||
* kq_acquire_packet_buffer: Returns a pointer to the location in the kernel
|
||||
* queue ring buffer where the calling function can write its packet. It is
|
||||
* Guaranteed that there is enough space for that packet. It also updates the
|
||||
* pending write pointer to that location so subsequent calls to
|
||||
* acquire_packet_buffer will get a correct write pointer
|
||||
*
|
||||
* @submit_packet: Update the write pointer and doorbell of a kernel queue.
|
||||
* kq_submit_packet: Update the write pointer and doorbell of a kernel queue.
|
||||
*
|
||||
* @sync_with_hw: Wait until the write pointer and the read pointer of a kernel
|
||||
* queue are equal, which means the CP has read all the submitted packets.
|
||||
*
|
||||
* @rollback_packet: This routine is called if we failed to build an acquired
|
||||
* kq_rollback_packet: This routine is called if we failed to build an acquired
|
||||
* packet for some reason. It just overwrites the pending wptr with the current
|
||||
* one
|
||||
*
|
||||
*/
|
||||
struct kernel_queue_ops {
|
||||
bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size);
|
||||
void (*uninitialize)(struct kernel_queue *kq);
|
||||
int (*acquire_packet_buffer)(struct kernel_queue *kq,
|
||||
size_t packet_size_in_dwords,
|
||||
unsigned int **buffer_ptr);
|
||||
|
||||
void (*submit_packet)(struct kernel_queue *kq);
|
||||
void (*rollback_packet)(struct kernel_queue *kq);
|
||||
};
|
||||
int kq_acquire_packet_buffer(struct kernel_queue *kq,
|
||||
size_t packet_size_in_dwords,
|
||||
unsigned int **buffer_ptr);
|
||||
void kq_submit_packet(struct kernel_queue *kq);
|
||||
void kq_rollback_packet(struct kernel_queue *kq);
|
||||
|
||||
|
||||
struct kernel_queue {
|
||||
struct kernel_queue_ops ops;
|
||||
struct kernel_queue_ops ops_asic_specific;
|
||||
|
||||
/* data */
|
||||
struct kfd_dev *dev;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
@@ -99,9 +82,4 @@ struct kernel_queue {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
void kernel_queue_init_cik(struct kernel_queue_ops *ops);
|
||||
void kernel_queue_init_vi(struct kernel_queue_ops *ops);
|
||||
void kernel_queue_init_v9(struct kernel_queue_ops *ops);
|
||||
void kernel_queue_init_v10(struct kernel_queue_ops *ops);
|
||||
|
||||
#endif /* KFD_KERNEL_QUEUE_H_ */
|
||||
|
@@ -1,53 +0,0 @@
|
||||
/*
|
||||
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kfd_kernel_queue.h"
|
||||
|
||||
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size);
|
||||
static void uninitialize_cik(struct kernel_queue *kq);
|
||||
static void submit_packet_cik(struct kernel_queue *kq);
|
||||
|
||||
void kernel_queue_init_cik(struct kernel_queue_ops *ops)
|
||||
{
|
||||
ops->initialize = initialize_cik;
|
||||
ops->uninitialize = uninitialize_cik;
|
||||
ops->submit_packet = submit_packet_cik;
|
||||
}
|
||||
|
||||
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void uninitialize_cik(struct kernel_queue *kq)
|
||||
{
|
||||
}
|
||||
|
||||
static void submit_packet_cik(struct kernel_queue *kq)
|
||||
{
|
||||
*kq->wptr_kernel = kq->pending_wptr;
|
||||
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
|
||||
kq->pending_wptr);
|
||||
}
|
@@ -1,348 +0,0 @@
|
||||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kfd_kernel_queue.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_pm4_headers_ai.h"
|
||||
#include "kfd_pm4_opcodes.h"
|
||||
#include "gc/gc_10_1_0_sh_mask.h"
|
||||
|
||||
static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size);
|
||||
static void uninitialize_v10(struct kernel_queue *kq);
|
||||
static void submit_packet_v10(struct kernel_queue *kq);
|
||||
|
||||
void kernel_queue_init_v10(struct kernel_queue_ops *ops)
|
||||
{
|
||||
ops->initialize = initialize_v10;
|
||||
ops->uninitialize = uninitialize_v10;
|
||||
ops->submit_packet = submit_packet_v10;
|
||||
}
|
||||
|
||||
static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
|
||||
if (retval != 0)
|
||||
return false;
|
||||
|
||||
kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
|
||||
kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
|
||||
|
||||
memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void uninitialize_v10(struct kernel_queue *kq)
|
||||
{
|
||||
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
|
||||
}
|
||||
|
||||
static void submit_packet_v10(struct kernel_queue *kq)
|
||||
{
|
||||
*kq->wptr64_kernel = kq->pending_wptr64;
|
||||
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
|
||||
kq->pending_wptr64);
|
||||
}
|
||||
|
||||
static int pm_map_process_v10(struct packet_manager *pm,
|
||||
uint32_t *buffer, struct qcm_process_device *qpd)
|
||||
{
|
||||
struct pm4_mes_map_process *packet;
|
||||
uint64_t vm_page_table_base_addr = qpd->page_table_base;
|
||||
|
||||
packet = (struct pm4_mes_map_process *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_map_process));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
|
||||
sizeof(struct pm4_mes_map_process));
|
||||
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
|
||||
packet->bitfields2.process_quantum = 1;
|
||||
packet->bitfields2.pasid = qpd->pqm->process->pasid;
|
||||
packet->bitfields14.gds_size = qpd->gds_size;
|
||||
packet->bitfields14.num_gws = qpd->num_gws;
|
||||
packet->bitfields14.num_oac = qpd->num_oac;
|
||||
packet->bitfields14.sdma_enable = 1;
|
||||
|
||||
packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
|
||||
|
||||
packet->sh_mem_config = qpd->sh_mem_config;
|
||||
packet->sh_mem_bases = qpd->sh_mem_bases;
|
||||
if (qpd->tba_addr) {
|
||||
packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
|
||||
packet->sq_shader_tba_hi = (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT) |
|
||||
upper_32_bits(qpd->tba_addr >> 8);
|
||||
packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
|
||||
packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
|
||||
}
|
||||
|
||||
packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
|
||||
packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
|
||||
|
||||
packet->vm_context_page_table_base_addr_lo32 =
|
||||
lower_32_bits(vm_page_table_base_addr);
|
||||
packet->vm_context_page_table_base_addr_hi32 =
|
||||
upper_32_bits(vm_page_table_base_addr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_runlist_v10(struct packet_manager *pm, uint32_t *buffer,
|
||||
uint64_t ib, size_t ib_size_in_dwords, bool chain)
|
||||
{
|
||||
struct pm4_mes_runlist *packet;
|
||||
|
||||
int concurrent_proc_cnt = 0;
|
||||
struct kfd_dev *kfd = pm->dqm->dev;
|
||||
|
||||
/* Determine the number of processes to map together to HW:
|
||||
* it can not exceed the number of VMIDs available to the
|
||||
* scheduler, and it is determined by the smaller of the number
|
||||
* of processes in the runlist and kfd module parameter
|
||||
* hws_max_conc_proc.
|
||||
* Note: the arbitration between the number of VMIDs and
|
||||
* hws_max_conc_proc has been done in
|
||||
* kgd2kfd_device_init().
|
||||
*/
|
||||
concurrent_proc_cnt = min(pm->dqm->processes_count,
|
||||
kfd->max_proc_per_quantum);
|
||||
|
||||
|
||||
packet = (struct pm4_mes_runlist *)buffer;
|
||||
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_runlist));
|
||||
packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
|
||||
sizeof(struct pm4_mes_runlist));
|
||||
|
||||
packet->bitfields4.ib_size = ib_size_in_dwords;
|
||||
packet->bitfields4.chain = chain ? 1 : 0;
|
||||
packet->bitfields4.offload_polling = 0;
|
||||
packet->bitfields4.valid = 1;
|
||||
packet->bitfields4.process_cnt = concurrent_proc_cnt;
|
||||
packet->ordinal2 = lower_32_bits(ib);
|
||||
packet->ib_base_hi = upper_32_bits(ib);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_map_queues_v10(struct packet_manager *pm, uint32_t *buffer,
|
||||
struct queue *q, bool is_static)
|
||||
{
|
||||
struct pm4_mes_map_queues *packet;
|
||||
bool use_static = is_static;
|
||||
|
||||
packet = (struct pm4_mes_map_queues *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
|
||||
sizeof(struct pm4_mes_map_queues));
|
||||
packet->bitfields2.num_queues = 1;
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
|
||||
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_map_queues__compute_vi;
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__normal_compute_vi;
|
||||
|
||||
switch (q->properties.type) {
|
||||
case KFD_QUEUE_TYPE_COMPUTE:
|
||||
if (use_static)
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__normal_latency_static_queue_vi;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__debug_interface_queue_vi;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
|
||||
engine_sel__mes_map_queues__sdma0_vi;
|
||||
use_static = false; /* no static queues under SDMA */
|
||||
break;
|
||||
default:
|
||||
WARN(1, "queue type %d\n", q->properties.type);
|
||||
return -EINVAL;
|
||||
}
|
||||
packet->bitfields3.doorbell_offset =
|
||||
q->properties.doorbell_off;
|
||||
|
||||
packet->mqd_addr_lo =
|
||||
lower_32_bits(q->gart_mqd_addr);
|
||||
|
||||
packet->mqd_addr_hi =
|
||||
upper_32_bits(q->gart_mqd_addr);
|
||||
|
||||
packet->wptr_addr_lo =
|
||||
lower_32_bits((uint64_t)q->properties.write_ptr);
|
||||
|
||||
packet->wptr_addr_hi =
|
||||
upper_32_bits((uint64_t)q->properties.write_ptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pm_unmap_queues_v10(struct packet_manager *pm, uint32_t *buffer,
|
||||
enum kfd_queue_type type,
|
||||
enum kfd_unmap_queues_filter filter,
|
||||
uint32_t filter_param, bool reset,
|
||||
unsigned int sdma_engine)
|
||||
{
|
||||
struct pm4_mes_unmap_queues *packet;
|
||||
|
||||
packet = (struct pm4_mes_unmap_queues *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
|
||||
sizeof(struct pm4_mes_unmap_queues));
|
||||
switch (type) {
|
||||
case KFD_QUEUE_TYPE_COMPUTE:
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_unmap_queues__compute;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
|
||||
break;
|
||||
default:
|
||||
WARN(1, "queue type %d\n", type);
|
||||
break;
|
||||
}
|
||||
|
||||
if (reset)
|
||||
packet->bitfields2.action =
|
||||
action__mes_unmap_queues__reset_queues;
|
||||
else
|
||||
packet->bitfields2.action =
|
||||
action__mes_unmap_queues__preempt_queues;
|
||||
|
||||
switch (filter) {
|
||||
case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
|
||||
packet->bitfields2.num_queues = 1;
|
||||
packet->bitfields3b.doorbell_offset0 = filter_param;
|
||||
break;
|
||||
case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
|
||||
packet->bitfields3a.pasid = filter_param;
|
||||
break;
|
||||
case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__unmap_all_queues;
|
||||
break;
|
||||
case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
|
||||
/* in this case, we do not preempt static queues */
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
|
||||
break;
|
||||
default:
|
||||
WARN(1, "filter %d\n", filter);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
|
||||
uint64_t fence_address, uint32_t fence_value)
|
||||
{
|
||||
struct pm4_mes_query_status *packet;
|
||||
|
||||
packet = (struct pm4_mes_query_status *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mes_query_status));
|
||||
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
|
||||
sizeof(struct pm4_mes_query_status));
|
||||
|
||||
packet->bitfields2.context_id = 0;
|
||||
packet->bitfields2.interrupt_sel =
|
||||
interrupt_sel__mes_query_status__completion_status;
|
||||
packet->bitfields2.command =
|
||||
command__mes_query_status__fence_only_after_write_ack;
|
||||
|
||||
packet->addr_hi = upper_32_bits((uint64_t)fence_address);
|
||||
packet->addr_lo = lower_32_bits((uint64_t)fence_address);
|
||||
packet->data_hi = upper_32_bits((uint64_t)fence_value);
|
||||
packet->data_lo = lower_32_bits((uint64_t)fence_value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
|
||||
{
|
||||
struct pm4_mec_release_mem *packet;
|
||||
|
||||
WARN_ON(!buffer);
|
||||
|
||||
packet = (struct pm4_mec_release_mem *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
|
||||
sizeof(struct pm4_mec_release_mem));
|
||||
|
||||
packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
|
||||
packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
|
||||
packet->bitfields2.tcl1_action_ena = 1;
|
||||
packet->bitfields2.tc_action_ena = 1;
|
||||
packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
|
||||
|
||||
packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
|
||||
packet->bitfields3.int_sel =
|
||||
int_sel__mec_release_mem__send_interrupt_after_write_confirm;
|
||||
|
||||
packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
|
||||
packet->address_hi = upper_32_bits(gpu_addr);
|
||||
|
||||
packet->data_lo = 0;
|
||||
|
||||
return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
|
||||
}
|
||||
|
||||
const struct packet_manager_funcs kfd_v10_pm_funcs = {
|
||||
.map_process = pm_map_process_v10,
|
||||
.runlist = pm_runlist_v10,
|
||||
.set_resources = pm_set_resources_vi,
|
||||
.map_queues = pm_map_queues_v10,
|
||||
.unmap_queues = pm_unmap_queues_v10,
|
||||
.query_status = pm_query_status_v10,
|
||||
.release_mem = pm_release_mem_v10,
|
||||
.map_process_size = sizeof(struct pm4_mes_map_process),
|
||||
.runlist_size = sizeof(struct pm4_mes_runlist),
|
||||
.set_resources_size = sizeof(struct pm4_mes_set_resources),
|
||||
.map_queues_size = sizeof(struct pm4_mes_map_queues),
|
||||
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
|
||||
.query_status_size = sizeof(struct pm4_mes_query_status),
|
||||
.release_mem_size = sizeof(struct pm4_mec_release_mem)
|
||||
};
|
||||
|
@@ -374,7 +374,6 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||
|
||||
switch (type) {
|
||||
case KFD_MQD_TYPE_CP:
|
||||
case KFD_MQD_TYPE_COMPUTE:
|
||||
mqd->allocate_mqd = allocate_mqd;
|
||||
mqd->init_mqd = init_mqd;
|
||||
mqd->free_mqd = free_mqd;
|
||||
@@ -401,7 +400,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_DIQ:
|
||||
mqd->allocate_mqd = allocate_hiq_mqd;
|
||||
mqd->allocate_mqd = allocate_mqd;
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->free_mqd = free_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
@@ -442,7 +441,7 @@ struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
|
||||
mqd = mqd_manager_init_cik(type, dev);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
|
||||
if (type == KFD_MQD_TYPE_CP)
|
||||
mqd->update_mqd = update_mqd_hawaii;
|
||||
return mqd;
|
||||
}
|
||||
|
@@ -66,6 +66,12 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||
m->compute_static_thread_mgmt_se3);
|
||||
}
|
||||
|
||||
static void set_priority(struct v10_compute_mqd *m, struct queue_properties *q)
|
||||
{
|
||||
m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
|
||||
m->cp_hqd_queue_priority = q->priority;
|
||||
}
|
||||
|
||||
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
@@ -109,9 +115,6 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
|
||||
10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
|
||||
|
||||
m->cp_hqd_pipe_priority = 1;
|
||||
m->cp_hqd_queue_priority = 15;
|
||||
|
||||
if (q->format == KFD_QUEUE_FORMAT_AQL) {
|
||||
m->cp_hqd_aql_control =
|
||||
1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
|
||||
@@ -208,11 +211,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
m->cp_hqd_ctx_save_control = 0;
|
||||
|
||||
update_cu_mask(mm, mqd, q);
|
||||
set_priority(m, q);
|
||||
|
||||
q->is_active = (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0 &&
|
||||
!q->is_evicted);
|
||||
q->is_active = QUEUE_IS_ACTIVE(*q);
|
||||
}
|
||||
|
||||
static int destroy_mqd(struct mqd_manager *mm, void *mqd,
|
||||
@@ -247,18 +248,22 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
|
||||
{
|
||||
struct v10_compute_mqd *m;
|
||||
|
||||
/* Control stack is located one page after MQD. */
|
||||
void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
|
||||
/* Control stack is written backwards, while workgroup context data
|
||||
* is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
|
||||
* Current position is at m->cp_hqd_cntl_stack_offset and
|
||||
* m->cp_hqd_wg_state_offset, respectively.
|
||||
*/
|
||||
*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
|
||||
m->cp_hqd_cntl_stack_offset;
|
||||
*save_area_used_size = m->cp_hqd_wg_state_offset -
|
||||
m->cp_hqd_cntl_stack_size;
|
||||
|
||||
if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
|
||||
return -EFAULT;
|
||||
/* Control stack is not copied to user mode for GFXv10 because
|
||||
* it's part of the context save area that is already
|
||||
* accessible to user mode
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -277,18 +282,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
|
||||
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
|
||||
}
|
||||
|
||||
static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct v10_compute_mqd *m;
|
||||
|
||||
update_mqd(mm, mqd, q);
|
||||
|
||||
/* TODO: what's the point? update_mqd already does this. */
|
||||
m = get_mqd(mqd);
|
||||
m->cp_hqd_vmid = q->vmid;
|
||||
}
|
||||
|
||||
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
@@ -340,11 +333,7 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
m->sdma_queue_id = q->sdma_queue_id;
|
||||
m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
|
||||
|
||||
|
||||
q->is_active = (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0 &&
|
||||
!q->is_evicted);
|
||||
q->is_active = QUEUE_IS_ACTIVE(*q);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -392,7 +381,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
|
||||
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
|
||||
return NULL;
|
||||
|
||||
mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
|
||||
mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
|
||||
@@ -400,7 +389,6 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
|
||||
|
||||
switch (type) {
|
||||
case KFD_MQD_TYPE_CP:
|
||||
case KFD_MQD_TYPE_COMPUTE:
|
||||
pr_debug("%s@%i\n", __func__, __LINE__);
|
||||
mqd->allocate_mqd = allocate_mqd;
|
||||
mqd->init_mqd = init_mqd;
|
||||
@@ -422,7 +410,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->free_mqd = free_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->update_mqd = update_mqd;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct v10_compute_mqd);
|
||||
@@ -432,11 +420,11 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
|
||||
pr_debug("%s@%i\n", __func__, __LINE__);
|
||||
break;
|
||||
case KFD_MQD_TYPE_DIQ:
|
||||
mqd->allocate_mqd = allocate_hiq_mqd;
|
||||
mqd->allocate_mqd = allocate_mqd;
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->free_mqd = free_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->update_mqd = update_mqd;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct v10_compute_mqd);
|
||||
|
@@ -92,7 +92,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
|
||||
* instead of sub-allocation function.
|
||||
*/
|
||||
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
|
||||
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
|
||||
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
||||
if (!mqd_mem_obj)
|
||||
return NULL;
|
||||
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
|
||||
@@ -302,7 +302,8 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
|
||||
|
||||
*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
|
||||
m->cp_hqd_cntl_stack_offset;
|
||||
*save_area_used_size = m->cp_hqd_wg_state_offset;
|
||||
*save_area_used_size = m->cp_hqd_wg_state_offset -
|
||||
m->cp_hqd_cntl_stack_size;
|
||||
|
||||
if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
|
||||
return -EFAULT;
|
||||
@@ -324,18 +325,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
|
||||
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
|
||||
}
|
||||
|
||||
static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct v9_mqd *m;
|
||||
|
||||
update_mqd(mm, mqd, q);
|
||||
|
||||
/* TODO: what's the point? update_mqd already does this. */
|
||||
m = get_mqd(mqd);
|
||||
m->cp_hqd_vmid = q->vmid;
|
||||
}
|
||||
|
||||
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
@@ -443,7 +432,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
|
||||
|
||||
switch (type) {
|
||||
case KFD_MQD_TYPE_CP:
|
||||
case KFD_MQD_TYPE_COMPUTE:
|
||||
mqd->allocate_mqd = allocate_mqd;
|
||||
mqd->init_mqd = init_mqd;
|
||||
mqd->free_mqd = free_mqd;
|
||||
@@ -462,7 +450,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->free_mqd = free_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->update_mqd = update_mqd;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct v9_mqd);
|
||||
@@ -471,11 +459,11 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_DIQ:
|
||||
mqd->allocate_mqd = allocate_hiq_mqd;
|
||||
mqd->allocate_mqd = allocate_mqd;
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->free_mqd = free_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->update_mqd = update_mqd;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct v9_mqd);
|
||||
|
@@ -312,11 +312,7 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
|
||||
static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct vi_mqd *m;
|
||||
__update_mqd(mm, mqd, q, MTYPE_UC, 0);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
m->cp_hqd_vmid = q->vmid;
|
||||
}
|
||||
|
||||
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||
@@ -425,7 +421,6 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||
|
||||
switch (type) {
|
||||
case KFD_MQD_TYPE_CP:
|
||||
case KFD_MQD_TYPE_COMPUTE:
|
||||
mqd->allocate_mqd = allocate_mqd;
|
||||
mqd->init_mqd = init_mqd;
|
||||
mqd->free_mqd = free_mqd;
|
||||
@@ -453,7 +448,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_DIQ:
|
||||
mqd->allocate_mqd = allocate_hiq_mqd;
|
||||
mqd->allocate_mqd = allocate_mqd;
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->free_mqd = free_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
@@ -494,7 +489,7 @@ struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
|
||||
mqd = mqd_manager_init_vi(type, dev);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
|
||||
if (type == KFD_MQD_TYPE_CP)
|
||||
mqd->update_mqd = update_mqd_tonga;
|
||||
return mqd;
|
||||
}
|
||||
|
@@ -241,12 +241,10 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
|
||||
case CHIP_RAVEN:
|
||||
case CHIP_RENOIR:
|
||||
case CHIP_ARCTURUS:
|
||||
pm->pmf = &kfd_v9_pm_funcs;
|
||||
break;
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI12:
|
||||
case CHIP_NAVI14:
|
||||
pm->pmf = &kfd_v10_pm_funcs;
|
||||
pm->pmf = &kfd_v9_pm_funcs;
|
||||
break;
|
||||
default:
|
||||
WARN(1, "Unexpected ASIC family %u",
|
||||
@@ -280,7 +278,7 @@ int pm_send_set_resources(struct packet_manager *pm,
|
||||
|
||||
size = pm->pmf->set_resources_size;
|
||||
mutex_lock(&pm->lock);
|
||||
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
|
||||
kq_acquire_packet_buffer(pm->priv_queue,
|
||||
size / sizeof(uint32_t),
|
||||
(unsigned int **)&buffer);
|
||||
if (!buffer) {
|
||||
@@ -291,9 +289,9 @@ int pm_send_set_resources(struct packet_manager *pm,
|
||||
|
||||
retval = pm->pmf->set_resources(pm, buffer, res);
|
||||
if (!retval)
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
kq_submit_packet(pm->priv_queue);
|
||||
else
|
||||
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
|
||||
kq_rollback_packet(pm->priv_queue);
|
||||
|
||||
out:
|
||||
mutex_unlock(&pm->lock);
|
||||
@@ -318,7 +316,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
|
||||
packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
|
||||
mutex_lock(&pm->lock);
|
||||
|
||||
retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
|
||||
retval = kq_acquire_packet_buffer(pm->priv_queue,
|
||||
packet_size_dwords, &rl_buffer);
|
||||
if (retval)
|
||||
goto fail_acquire_packet_buffer;
|
||||
@@ -328,14 +326,14 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
|
||||
if (retval)
|
||||
goto fail_create_runlist;
|
||||
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
kq_submit_packet(pm->priv_queue);
|
||||
|
||||
mutex_unlock(&pm->lock);
|
||||
|
||||
return retval;
|
||||
|
||||
fail_create_runlist:
|
||||
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
|
||||
kq_rollback_packet(pm->priv_queue);
|
||||
fail_acquire_packet_buffer:
|
||||
mutex_unlock(&pm->lock);
|
||||
fail_create_runlist_ib:
|
||||
@@ -354,7 +352,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
|
||||
|
||||
size = pm->pmf->query_status_size;
|
||||
mutex_lock(&pm->lock);
|
||||
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
|
||||
kq_acquire_packet_buffer(pm->priv_queue,
|
||||
size / sizeof(uint32_t), (unsigned int **)&buffer);
|
||||
if (!buffer) {
|
||||
pr_err("Failed to allocate buffer on kernel queue\n");
|
||||
@@ -364,9 +362,9 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
|
||||
|
||||
retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
|
||||
if (!retval)
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
kq_submit_packet(pm->priv_queue);
|
||||
else
|
||||
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
|
||||
kq_rollback_packet(pm->priv_queue);
|
||||
|
||||
out:
|
||||
mutex_unlock(&pm->lock);
|
||||
@@ -383,7 +381,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
|
||||
|
||||
size = pm->pmf->unmap_queues_size;
|
||||
mutex_lock(&pm->lock);
|
||||
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
|
||||
kq_acquire_packet_buffer(pm->priv_queue,
|
||||
size / sizeof(uint32_t), (unsigned int **)&buffer);
|
||||
if (!buffer) {
|
||||
pr_err("Failed to allocate buffer on kernel queue\n");
|
||||
@@ -394,9 +392,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
|
||||
retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
|
||||
reset, sdma_engine);
|
||||
if (!retval)
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
kq_submit_packet(pm->priv_queue);
|
||||
else
|
||||
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
|
||||
kq_rollback_packet(pm->priv_queue);
|
||||
|
||||
out:
|
||||
mutex_unlock(&pm->lock);
|
||||
@@ -441,7 +439,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
|
||||
|
||||
size = pm->pmf->query_status_size;
|
||||
mutex_lock(&pm->lock);
|
||||
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
|
||||
kq_acquire_packet_buffer(pm->priv_queue,
|
||||
size / sizeof(uint32_t), (unsigned int **)&buffer);
|
||||
if (!buffer) {
|
||||
pr_err("Failed to allocate buffer on kernel queue\n");
|
||||
@@ -449,7 +447,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
|
||||
goto out;
|
||||
}
|
||||
memset(buffer, 0x55, size);
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
kq_submit_packet(pm->priv_queue);
|
||||
|
||||
pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
|
||||
buffer[0], buffer[1], buffer[2], buffer[3],
|
||||
|
@@ -25,47 +25,7 @@
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_pm4_headers_ai.h"
|
||||
#include "kfd_pm4_opcodes.h"
|
||||
|
||||
static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size);
|
||||
static void uninitialize_v9(struct kernel_queue *kq);
|
||||
static void submit_packet_v9(struct kernel_queue *kq);
|
||||
|
||||
void kernel_queue_init_v9(struct kernel_queue_ops *ops)
|
||||
{
|
||||
ops->initialize = initialize_v9;
|
||||
ops->uninitialize = uninitialize_v9;
|
||||
ops->submit_packet = submit_packet_v9;
|
||||
}
|
||||
|
||||
static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
|
||||
if (retval)
|
||||
return false;
|
||||
|
||||
kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
|
||||
kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
|
||||
|
||||
memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void uninitialize_v9(struct kernel_queue *kq)
|
||||
{
|
||||
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
|
||||
}
|
||||
|
||||
static void submit_packet_v9(struct kernel_queue *kq)
|
||||
{
|
||||
*kq->wptr64_kernel = kq->pending_wptr64;
|
||||
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
|
||||
kq->pending_wptr64);
|
||||
}
|
||||
#include "gc/gc_10_1_0_sh_mask.h"
|
||||
|
||||
static int pm_map_process_v9(struct packet_manager *pm,
|
||||
uint32_t *buffer, struct qcm_process_device *qpd)
|
||||
@@ -90,10 +50,17 @@ static int pm_map_process_v9(struct packet_manager *pm,
|
||||
|
||||
packet->sh_mem_config = qpd->sh_mem_config;
|
||||
packet->sh_mem_bases = qpd->sh_mem_bases;
|
||||
packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
|
||||
packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
|
||||
packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
|
||||
packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
|
||||
if (qpd->tba_addr) {
|
||||
packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
|
||||
/* On GFX9, unlike GFX10, bit TRAP_EN of SQ_SHADER_TBA_HI is
|
||||
* not defined, so setting it won't do any harm.
|
||||
*/
|
||||
packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8)
|
||||
| 1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT;
|
||||
|
||||
packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
|
||||
packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
|
||||
}
|
||||
|
||||
packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
|
||||
packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
|
||||
@@ -341,35 +308,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
|
||||
{
|
||||
struct pm4_mec_release_mem *packet;
|
||||
|
||||
packet = (struct pm4_mec_release_mem *)buffer;
|
||||
memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
|
||||
sizeof(struct pm4_mec_release_mem));
|
||||
|
||||
packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
|
||||
packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
|
||||
packet->bitfields2.tcl1_action_ena = 1;
|
||||
packet->bitfields2.tc_action_ena = 1;
|
||||
packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
|
||||
|
||||
packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
|
||||
packet->bitfields3.int_sel =
|
||||
int_sel__mec_release_mem__send_interrupt_after_write_confirm;
|
||||
|
||||
packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
|
||||
packet->address_hi = upper_32_bits(gpu_addr);
|
||||
|
||||
packet->data_lo = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct packet_manager_funcs kfd_v9_pm_funcs = {
|
||||
.map_process = pm_map_process_v9,
|
||||
.runlist = pm_runlist_v9,
|
||||
@@ -377,12 +315,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
|
||||
.map_queues = pm_map_queues_v9,
|
||||
.unmap_queues = pm_unmap_queues_v9,
|
||||
.query_status = pm_query_status_v9,
|
||||
.release_mem = pm_release_mem_v9,
|
||||
.release_mem = NULL,
|
||||
.map_process_size = sizeof(struct pm4_mes_map_process),
|
||||
.runlist_size = sizeof(struct pm4_mes_runlist),
|
||||
.set_resources_size = sizeof(struct pm4_mes_set_resources),
|
||||
.map_queues_size = sizeof(struct pm4_mes_map_queues),
|
||||
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
|
||||
.query_status_size = sizeof(struct pm4_mes_query_status),
|
||||
.release_mem_size = sizeof(struct pm4_mec_release_mem)
|
||||
.release_mem_size = 0,
|
||||
};
|
@@ -26,47 +26,6 @@
|
||||
#include "kfd_pm4_headers_vi.h"
|
||||
#include "kfd_pm4_opcodes.h"
|
||||
|
||||
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size);
|
||||
static void uninitialize_vi(struct kernel_queue *kq);
|
||||
static void submit_packet_vi(struct kernel_queue *kq);
|
||||
|
||||
void kernel_queue_init_vi(struct kernel_queue_ops *ops)
|
||||
{
|
||||
ops->initialize = initialize_vi;
|
||||
ops->uninitialize = uninitialize_vi;
|
||||
ops->submit_packet = submit_packet_vi;
|
||||
}
|
||||
|
||||
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
|
||||
if (retval != 0)
|
||||
return false;
|
||||
|
||||
kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
|
||||
kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
|
||||
|
||||
memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void uninitialize_vi(struct kernel_queue *kq)
|
||||
{
|
||||
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
|
||||
}
|
||||
|
||||
static void submit_packet_vi(struct kernel_queue *kq)
|
||||
{
|
||||
*kq->wptr_kernel = kq->pending_wptr;
|
||||
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
|
||||
kq->pending_wptr);
|
||||
}
|
||||
|
||||
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
|
||||
{
|
||||
union PM4_MES_TYPE_3_HEADER header;
|
@@ -59,24 +59,21 @@
|
||||
* NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
|
||||
* defines are w.r.t to PAGE_SIZE
|
||||
*/
|
||||
#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_SHIFT 62
|
||||
#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
|
||||
#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
|
||||
#define KFD_MMAP_GPU_ID_SHIFT 46
|
||||
#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
|
||||
<< KFD_MMAP_GPU_ID_SHIFT)
|
||||
#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
|
||||
& KFD_MMAP_GPU_ID_MASK)
|
||||
#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
|
||||
#define KFD_MMAP_GET_GPU_ID(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
|
||||
>> KFD_MMAP_GPU_ID_SHIFT)
|
||||
|
||||
#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT)
|
||||
#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
|
||||
|
||||
/*
|
||||
* When working with cp scheduler we should assign the HIQ manually or via
|
||||
* the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
|
||||
@@ -238,9 +235,10 @@ struct kfd_dev {
|
||||
* KFD. It is aligned for mapping
|
||||
* into user mode
|
||||
*/
|
||||
size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell
|
||||
* to HW doorbell, GFX reserved some
|
||||
* at the start)
|
||||
size_t doorbell_base_dw_offset; /* Offset from the start of the PCI
|
||||
* doorbell BAR to the first KFD
|
||||
* doorbell in dwords. GFX reserves
|
||||
* the segment before this offset.
|
||||
*/
|
||||
u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
|
||||
* page used by kernel queue
|
||||
@@ -510,8 +508,7 @@ struct queue {
|
||||
* Please read the kfd_mqd_manager.h description.
|
||||
*/
|
||||
enum KFD_MQD_TYPE {
|
||||
KFD_MQD_TYPE_COMPUTE = 0, /* for no cp scheduling */
|
||||
KFD_MQD_TYPE_HIQ, /* for hiq */
|
||||
KFD_MQD_TYPE_HIQ = 0, /* for hiq */
|
||||
KFD_MQD_TYPE_CP, /* for cp queues and diq */
|
||||
KFD_MQD_TYPE_SDMA, /* for sdma queues */
|
||||
KFD_MQD_TYPE_DIQ, /* for diq */
|
||||
@@ -818,7 +815,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
|
||||
u32 read_kernel_doorbell(u32 __iomem *db);
|
||||
void write_kernel_doorbell(void __iomem *db, u32 value);
|
||||
void write_kernel_doorbell64(void __iomem *db, u64 value);
|
||||
unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
|
||||
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
|
||||
struct kfd_process *process,
|
||||
unsigned int doorbell_id);
|
||||
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
|
||||
@@ -904,7 +901,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
struct kfd_dev *dev,
|
||||
struct file *f,
|
||||
struct queue_properties *properties,
|
||||
unsigned int *qid);
|
||||
unsigned int *qid,
|
||||
uint32_t *p_doorbell_offset_in_process);
|
||||
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
|
||||
int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
|
||||
struct queue_properties *p);
|
||||
@@ -972,7 +970,6 @@ struct packet_manager_funcs {
|
||||
|
||||
extern const struct packet_manager_funcs kfd_vi_pm_funcs;
|
||||
extern const struct packet_manager_funcs kfd_v9_pm_funcs;
|
||||
extern const struct packet_manager_funcs kfd_v10_pm_funcs;
|
||||
|
||||
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
|
||||
void pm_uninit(struct packet_manager *pm);
|
||||
@@ -991,9 +988,6 @@ void pm_release_ib(struct packet_manager *pm);
|
||||
|
||||
/* Following PM funcs can be shared among VI and AI */
|
||||
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
|
||||
int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||
struct scheduling_resources *res);
|
||||
|
||||
|
||||
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
|
||||
|
||||
|
@@ -560,8 +560,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
|
||||
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
|
||||
continue;
|
||||
|
||||
offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
|
||||
<< PAGE_SHIFT;
|
||||
offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
|
||||
qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
|
||||
KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
|
||||
MAP_SHARED, offset);
|
||||
|
@@ -162,7 +162,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
|
||||
pqm->queue_slot_bitmap = NULL;
|
||||
}
|
||||
|
||||
static int create_cp_queue(struct process_queue_manager *pqm,
|
||||
static int init_user_queue(struct process_queue_manager *pqm,
|
||||
struct kfd_dev *dev, struct queue **q,
|
||||
struct queue_properties *q_properties,
|
||||
struct file *f, unsigned int qid)
|
||||
@@ -192,7 +192,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
struct kfd_dev *dev,
|
||||
struct file *f,
|
||||
struct queue_properties *properties,
|
||||
unsigned int *qid)
|
||||
unsigned int *qid,
|
||||
uint32_t *p_doorbell_offset_in_process)
|
||||
{
|
||||
int retval;
|
||||
struct kfd_process_device *pdd;
|
||||
@@ -250,7 +251,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
goto err_create_queue;
|
||||
}
|
||||
|
||||
retval = create_cp_queue(pqm, dev, &q, properties, f, *qid);
|
||||
retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
|
||||
if (retval != 0)
|
||||
goto err_create_queue;
|
||||
pqn->q = q;
|
||||
@@ -271,7 +272,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
goto err_create_queue;
|
||||
}
|
||||
|
||||
retval = create_cp_queue(pqm, dev, &q, properties, f, *qid);
|
||||
retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
|
||||
if (retval != 0)
|
||||
goto err_create_queue;
|
||||
pqn->q = q;
|
||||
@@ -303,12 +304,15 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
goto err_create_queue;
|
||||
}
|
||||
|
||||
if (q)
|
||||
if (q && p_doorbell_offset_in_process)
|
||||
/* Return the doorbell offset within the doorbell page
|
||||
* to the caller so it can be passed up to user mode
|
||||
* (in bytes).
|
||||
* There are always 1024 doorbells per process, so in case
|
||||
* of 8-byte doorbells, there are two doorbell pages per
|
||||
* process.
|
||||
*/
|
||||
properties->doorbell_off =
|
||||
*p_doorbell_offset_in_process =
|
||||
(q->properties.doorbell_off * sizeof(uint32_t)) &
|
||||
(kfd_doorbell_process_slice(dev) - 1);
|
||||
|
||||
|
Reference in New Issue
Block a user