Merge tag 'drm-next-5.6-2019-12-11' of git://people.freedesktop.org/~agd5f/linux into drm-next

drm-next-5.6-2019-12-11:

amdgpu:
- Add MST atomic routines
- Add support for DMCUB (new helper microengine for displays)
- Add OEM i2c support in DC
- Use vstartup for vblank events on DCN
- Simplify Kconfig for DC
- Renoir fixes for DC
- Clean up function pointers in DC
- Initial support for HDCP 2.x
- Misc code cleanups
- GFX10 fixes
- Rework JPEG engine handling for VCN
- Add clock and power gating support for JPEG
- BACO support for Arcturus
- Cleanup PSP ring handling
- Add framework for using BACO with runtime pm to save power
- Move core pci state handling out of the driver for pm ops
- Allow guest power control in 1 VF case with SR-IOV
- SR-IOV fixes
- RAS fixes
- Support for power metrics on renoir
- Golden settings updates for gfx10
- Enable gfxoff on supported navi10 skus
- Update MAINTAINERS

amdkfd:
- Clean up generational gfx code
- Fixes for gfx10
- DIQ fixes
- Share more code with amdgpu

radeon:
- PPC DMA fix
- Register checker fixes for r1xx/r2xx
- Misc cleanups

From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191211223020.7510-1-alexander.deucher@amd.com
This commit is contained in:
Daniel Vetter
2019-12-17 18:47:46 +01:00
374 changed files with 16013 additions and 6522 deletions

View File

@@ -38,11 +38,9 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
$(AMDKFD_PATH)/kfd_kernel_queue.o \
$(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
$(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
$(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
$(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
$(AMDKFD_PATH)/kfd_packet_manager_vi.o \
$(AMDKFD_PATH)/kfd_packet_manager_v9.o \
$(AMDKFD_PATH)/kfd_process_queue_manager.o \
$(AMDKFD_PATH)/kfd_device_queue_manager.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \

View File

@@ -258,6 +258,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
unsigned int queue_id;
struct kfd_process_device *pdd;
struct queue_properties q_properties;
uint32_t doorbell_offset_in_process = 0;
memset(&q_properties, 0, sizeof(struct queue_properties));
@@ -286,7 +287,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
p->pasid,
dev->id);
err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
&doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
@@ -296,14 +298,11 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
/* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
args->doorbell_offset <<= PAGE_SHIFT;
if (KFD_IS_SOC15(dev->device_info->asic_family))
/* On SOC15 ASICs, doorbell allocation must be
* per-device, and independent from the per-process
* queue_id. Return the doorbell offset within the
* doorbell aperture to user mode.
/* On SOC15 ASICs, include the doorbell offset within the
* process doorbell frame, which is 2 pages.
*/
args->doorbell_offset |= q_properties.doorbell_off;
args->doorbell_offset |= doorbell_offset_in_process;
mutex_unlock(&p->mutex);
@@ -1312,10 +1311,9 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
/* MMIO is mapped through kfd device
* Generate a kfd mmap offset
*/
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id);
args->mmap_offset <<= PAGE_SHIFT;
}
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
args->mmap_offset = KFD_MMAP_TYPE_MMIO
| KFD_MMAP_GPU_ID(args->gpu_id);
return 0;
@@ -1899,20 +1897,19 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct kfd_process *process;
struct kfd_dev *dev = NULL;
unsigned long vm_pgoff;
unsigned long mmap_offset;
unsigned int gpu_id;
process = kfd_get_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
vm_pgoff = vma->vm_pgoff;
vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
if (gpu_id)
dev = kfd_device_by_id(gpu_id);
switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
case KFD_MMAP_TYPE_DOORBELL:
if (!dev)
return -ENODEV;

View File

@@ -72,11 +72,11 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
* The receive packet buff will be sitting on the Indirect Buffer
* and in the PQ we put the IB packet + sync packet(s).
*/
status = kq->ops.acquire_packet_buffer(kq,
status = kq_acquire_packet_buffer(kq,
pq_packets_size_in_bytes / sizeof(uint32_t),
&ib_packet_buff);
if (status) {
pr_err("acquire_packet_buffer failed\n");
pr_err("kq_acquire_packet_buffer failed\n");
return status;
}
@@ -115,7 +115,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
if (status) {
pr_err("Failed to allocate GART memory\n");
kq->ops.rollback_packet(kq);
kq_rollback_packet(kq);
return status;
}
@@ -151,7 +151,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
rm_packet->data_lo = QUEUESTATE__ACTIVE;
kq->ops.submit_packet(kq);
kq_submit_packet(kq);
/* Wait till CP writes sync code: */
status = amdkfd_fence_wait_timeout(
@@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
properties.type = KFD_QUEUE_TYPE_DIQ;
status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
&properties, &qid);
&properties, &qid, NULL);
if (status) {
pr_err("Failed to create DIQ\n");

View File

@@ -742,7 +742,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
int kgd2kfd_post_reset(struct kfd_dev *kfd)
{
int ret, count;
int ret;
if (!kfd->init_complete)
return 0;
@@ -750,7 +750,7 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
ret = kfd_resume(kfd);
if (ret)
return ret;
count = atomic_dec_return(&kfd_locked);
atomic_dec(&kfd_locked);
atomic_set(&kfd->sram_ecc_flag, 0);

View File

@@ -170,7 +170,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
}
q->properties.doorbell_off =
kfd_doorbell_id_to_offset(dev, q->process,
kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
q->doorbell_id);
return 0;
@@ -1595,7 +1595,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
goto dqm_unlock;
}
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
if (!mqd_mgr->get_wave_state) {
r = -EINVAL;

View File

@@ -91,7 +91,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
doorbell_start_offset;
kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
kfd_doorbell_process_slice(kfd));
@@ -103,8 +103,8 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
pr_debug("doorbell base == 0x%08lX\n",
(uintptr_t)kfd->doorbell_base);
pr_debug("doorbell_id_offset == 0x%08lX\n",
kfd->doorbell_id_offset);
pr_debug("doorbell_base_dw_offset == 0x%08lX\n",
kfd->doorbell_base_dw_offset);
pr_debug("doorbell_process_limit == 0x%08lX\n",
doorbell_process_limit);
@@ -185,7 +185,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
* Calculating the kernel doorbell offset using the first
* doorbell page.
*/
*doorbell_off = kfd->doorbell_id_offset + inx;
*doorbell_off = kfd->doorbell_base_dw_offset + inx;
pr_debug("Get kernel queue doorbell\n"
" doorbell offset == 0x%08X\n"
@@ -225,17 +225,17 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
}
}
unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
struct kfd_process *process,
unsigned int doorbell_id)
{
/*
* doorbell_id_offset accounts for doorbells taken by KGD.
* doorbell_base_dw_offset accounts for doorbells taken by KGD.
* index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
* the process's doorbells. The offset returned is in dword
* units regardless of the ASIC-dependent doorbell size.
*/
return kfd->doorbell_id_offset +
return kfd->doorbell_base_dw_offset +
process->doorbell_index
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);

View File

@@ -346,7 +346,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
ret = create_signal_event(devkfd, p, ev);
if (!ret) {
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
*event_page_offset <<= PAGE_SHIFT;
*event_slot_index = ev->event_id;
}
break;

View File

@@ -62,9 +62,6 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
struct amd_iommu_device_info iommu_info;
unsigned int pasid_limit;
int err;
struct kfd_topology_device *top_dev;
top_dev = kfd_topology_device_by_id(kfd->id);
if (!kfd->device_info->needs_iommu_device)
return 0;

View File

@@ -34,7 +34,10 @@
#define PM4_COUNT_ZERO (((1 << 15) - 1) << 16)
static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
/* Initialize a kernel queue, including allocations of GART memory
* needed for the queue.
*/
static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size)
{
struct queue_properties prop;
@@ -87,9 +90,17 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->pq_kernel_addr = kq->pq->cpu_ptr;
kq->pq_gpu_addr = kq->pq->gpu_addr;
retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size);
if (!retval)
goto err_eop_allocate_vidmem;
/* For CIK family asics, kq->eop_mem is not needed */
if (dev->device_info->asic_family > CHIP_MULLINS) {
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
if (retval != 0)
goto err_eop_allocate_vidmem;
kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
}
retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
&kq->rptr_mem);
@@ -183,7 +194,8 @@ err_get_kernel_doorbell:
}
static void uninitialize(struct kernel_queue *kq)
/* Uninitialize a kernel queue and free all its memory usages. */
static void kq_uninitialize(struct kernel_queue *kq)
{
if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
@@ -200,14 +212,19 @@ static void uninitialize(struct kernel_queue *kq)
kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
kq->ops_asic_specific.uninitialize(kq);
/* For CIK family asics, kq->eop_mem is Null, kfd_gtt_sa_free()
* is able to handle NULL properly.
*/
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
kfd_gtt_sa_free(kq->dev, kq->pq);
kfd_release_kernel_doorbell(kq->dev,
kq->queue->properties.doorbell_ptr);
uninit_queue(kq->queue);
}
static int acquire_packet_buffer(struct kernel_queue *kq,
int kq_acquire_packet_buffer(struct kernel_queue *kq,
size_t packet_size_in_dwords, unsigned int **buffer_ptr)
{
size_t available_size;
@@ -268,7 +285,7 @@ err_no_space:
return -ENOMEM;
}
static void submit_packet(struct kernel_queue *kq)
void kq_submit_packet(struct kernel_queue *kq)
{
#ifdef DEBUG
int i;
@@ -280,11 +297,18 @@ static void submit_packet(struct kernel_queue *kq)
}
pr_debug("\n");
#endif
kq->ops_asic_specific.submit_packet(kq);
if (kq->dev->device_info->doorbell_size == 8) {
*kq->wptr64_kernel = kq->pending_wptr64;
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
kq->pending_wptr64);
} else {
*kq->wptr_kernel = kq->pending_wptr;
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
kq->pending_wptr);
}
}
static void rollback_packet(struct kernel_queue *kq)
void kq_rollback_packet(struct kernel_queue *kq)
{
if (kq->dev->device_info->doorbell_size == 8) {
kq->pending_wptr64 = *kq->wptr64_kernel;
@@ -304,60 +328,18 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
if (!kq)
return NULL;
kq->ops.initialize = initialize;
kq->ops.uninitialize = uninitialize;
kq->ops.acquire_packet_buffer = acquire_packet_buffer;
kq->ops.submit_packet = submit_packet;
kq->ops.rollback_packet = rollback_packet;
switch (dev->device_info->asic_family) {
case CHIP_CARRIZO:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
kernel_queue_init_vi(&kq->ops_asic_specific);
break;
case CHIP_KAVERI:
case CHIP_HAWAII:
kernel_queue_init_cik(&kq->ops_asic_specific);
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
case CHIP_RENOIR:
case CHIP_ARCTURUS:
kernel_queue_init_v9(&kq->ops_asic_specific);
break;
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
kernel_queue_init_v10(&kq->ops_asic_specific);
break;
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
goto out_free;
}
if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
return kq;
pr_err("Failed to init kernel queue\n");
out_free:
kfree(kq);
return NULL;
}
void kernel_queue_uninit(struct kernel_queue *kq)
{
kq->ops.uninitialize(kq);
kq_uninitialize(kq);
kfree(kq);
}
@@ -377,7 +359,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
return;
}
retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer);
retval = kq_acquire_packet_buffer(kq, 5, &buffer);
if (unlikely(retval != 0)) {
pr_err(" Failed to acquire packet buffer\n");
pr_err("Kernel queue test failed\n");
@@ -385,7 +367,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
}
for (i = 0; i < 5; i++)
buffer[i] = kq->nop_packet;
kq->ops.submit_packet(kq);
kq_submit_packet(kq);
pr_err("Ending kernel queue test\n");
}

View File

@@ -29,45 +29,28 @@
#include "kfd_priv.h"
/**
* struct kernel_queue_ops
*
* @initialize: Initialize a kernel queue, including allocations of GART memory
* needed for the queue.
*
* @uninitialize: Uninitialize a kernel queue and free all its memory usages.
*
* @acquire_packet_buffer: Returns a pointer to the location in the kernel
* kq_acquire_packet_buffer: Returns a pointer to the location in the kernel
* queue ring buffer where the calling function can write its packet. It is
* Guaranteed that there is enough space for that packet. It also updates the
* pending write pointer to that location so subsequent calls to
* acquire_packet_buffer will get a correct write pointer
*
* @submit_packet: Update the write pointer and doorbell of a kernel queue.
* kq_submit_packet: Update the write pointer and doorbell of a kernel queue.
*
* @sync_with_hw: Wait until the write pointer and the read pointer of a kernel
* queue are equal, which means the CP has read all the submitted packets.
*
* @rollback_packet: This routine is called if we failed to build an acquired
* kq_rollback_packet: This routine is called if we failed to build an acquired
* packet for some reason. It just overwrites the pending wptr with the current
* one
*
*/
struct kernel_queue_ops {
bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
void (*uninitialize)(struct kernel_queue *kq);
int (*acquire_packet_buffer)(struct kernel_queue *kq,
size_t packet_size_in_dwords,
unsigned int **buffer_ptr);
void (*submit_packet)(struct kernel_queue *kq);
void (*rollback_packet)(struct kernel_queue *kq);
};
int kq_acquire_packet_buffer(struct kernel_queue *kq,
size_t packet_size_in_dwords,
unsigned int **buffer_ptr);
void kq_submit_packet(struct kernel_queue *kq);
void kq_rollback_packet(struct kernel_queue *kq);
struct kernel_queue {
struct kernel_queue_ops ops;
struct kernel_queue_ops ops_asic_specific;
/* data */
struct kfd_dev *dev;
struct mqd_manager *mqd_mgr;
@@ -99,9 +82,4 @@ struct kernel_queue {
struct list_head list;
};
void kernel_queue_init_cik(struct kernel_queue_ops *ops);
void kernel_queue_init_vi(struct kernel_queue_ops *ops);
void kernel_queue_init_v9(struct kernel_queue_ops *ops);
void kernel_queue_init_v10(struct kernel_queue_ops *ops);
#endif /* KFD_KERNEL_QUEUE_H_ */

View File

@@ -1,53 +0,0 @@
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "kfd_kernel_queue.h"
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
static void uninitialize_cik(struct kernel_queue *kq);
static void submit_packet_cik(struct kernel_queue *kq);
void kernel_queue_init_cik(struct kernel_queue_ops *ops)
{
ops->initialize = initialize_cik;
ops->uninitialize = uninitialize_cik;
ops->submit_packet = submit_packet_cik;
}
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size)
{
return true;
}
static void uninitialize_cik(struct kernel_queue *kq)
{
}
static void submit_packet_cik(struct kernel_queue *kq)
{
*kq->wptr_kernel = kq->pending_wptr;
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
kq->pending_wptr);
}

View File

@@ -1,348 +0,0 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "kfd_kernel_queue.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_ai.h"
#include "kfd_pm4_opcodes.h"
#include "gc/gc_10_1_0_sh_mask.h"
static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
static void uninitialize_v10(struct kernel_queue *kq);
static void submit_packet_v10(struct kernel_queue *kq);
void kernel_queue_init_v10(struct kernel_queue_ops *ops)
{
ops->initialize = initialize_v10;
ops->uninitialize = uninitialize_v10;
ops->submit_packet = submit_packet_v10;
}
static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size)
{
int retval;
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
if (retval != 0)
return false;
kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
return true;
}
static void uninitialize_v10(struct kernel_queue *kq)
{
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
}
static void submit_packet_v10(struct kernel_queue *kq)
{
*kq->wptr64_kernel = kq->pending_wptr64;
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
kq->pending_wptr64);
}
static int pm_map_process_v10(struct packet_manager *pm,
uint32_t *buffer, struct qcm_process_device *qpd)
{
struct pm4_mes_map_process *packet;
uint64_t vm_page_table_base_addr = qpd->page_table_base;
packet = (struct pm4_mes_map_process *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_process));
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process));
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 1;
packet->bitfields2.pasid = qpd->pqm->process->pasid;
packet->bitfields14.gds_size = qpd->gds_size;
packet->bitfields14.num_gws = qpd->num_gws;
packet->bitfields14.num_oac = qpd->num_oac;
packet->bitfields14.sdma_enable = 1;
packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
packet->sh_mem_config = qpd->sh_mem_config;
packet->sh_mem_bases = qpd->sh_mem_bases;
if (qpd->tba_addr) {
packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
packet->sq_shader_tba_hi = (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT) |
upper_32_bits(qpd->tba_addr >> 8);
packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
}
packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
packet->vm_context_page_table_base_addr_lo32 =
lower_32_bits(vm_page_table_base_addr);
packet->vm_context_page_table_base_addr_hi32 =
upper_32_bits(vm_page_table_base_addr);
return 0;
}
static int pm_runlist_v10(struct packet_manager *pm, uint32_t *buffer,
uint64_t ib, size_t ib_size_in_dwords, bool chain)
{
struct pm4_mes_runlist *packet;
int concurrent_proc_cnt = 0;
struct kfd_dev *kfd = pm->dqm->dev;
/* Determine the number of processes to map together to HW:
* it can not exceed the number of VMIDs available to the
* scheduler, and it is determined by the smaller of the number
* of processes in the runlist and kfd module parameter
* hws_max_conc_proc.
* Note: the arbitration between the number of VMIDs and
* hws_max_conc_proc has been done in
* kgd2kfd_device_init().
*/
concurrent_proc_cnt = min(pm->dqm->processes_count,
kfd->max_proc_per_quantum);
packet = (struct pm4_mes_runlist *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_runlist));
packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
sizeof(struct pm4_mes_runlist));
packet->bitfields4.ib_size = ib_size_in_dwords;
packet->bitfields4.chain = chain ? 1 : 0;
packet->bitfields4.offload_polling = 0;
packet->bitfields4.valid = 1;
packet->bitfields4.process_cnt = concurrent_proc_cnt;
packet->ordinal2 = lower_32_bits(ib);
packet->ib_base_hi = upper_32_bits(ib);
return 0;
}
static int pm_map_queues_v10(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static)
{
struct pm4_mes_map_queues *packet;
bool use_static = is_static;
packet = (struct pm4_mes_map_queues *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
sizeof(struct pm4_mes_map_queues));
packet->bitfields2.num_queues = 1;
packet->bitfields2.queue_sel =
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__compute_vi;
packet->bitfields2.queue_type =
queue_type__mes_map_queues__normal_compute_vi;
switch (q->properties.type) {
case KFD_QUEUE_TYPE_COMPUTE:
if (use_static)
packet->bitfields2.queue_type =
queue_type__mes_map_queues__normal_latency_static_queue_vi;
break;
case KFD_QUEUE_TYPE_DIQ:
packet->bitfields2.queue_type =
queue_type__mes_map_queues__debug_interface_queue_vi;
break;
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
engine_sel__mes_map_queues__sdma0_vi;
use_static = false; /* no static queues under SDMA */
break;
default:
WARN(1, "queue type %d\n", q->properties.type);
return -EINVAL;
}
packet->bitfields3.doorbell_offset =
q->properties.doorbell_off;
packet->mqd_addr_lo =
lower_32_bits(q->gart_mqd_addr);
packet->mqd_addr_hi =
upper_32_bits(q->gart_mqd_addr);
packet->wptr_addr_lo =
lower_32_bits((uint64_t)q->properties.write_ptr);
packet->wptr_addr_hi =
upper_32_bits((uint64_t)q->properties.write_ptr);
return 0;
}
static int pm_unmap_queues_v10(struct packet_manager *pm, uint32_t *buffer,
enum kfd_queue_type type,
enum kfd_unmap_queues_filter filter,
uint32_t filter_param, bool reset,
unsigned int sdma_engine)
{
struct pm4_mes_unmap_queues *packet;
packet = (struct pm4_mes_unmap_queues *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
sizeof(struct pm4_mes_unmap_queues));
switch (type) {
case KFD_QUEUE_TYPE_COMPUTE:
case KFD_QUEUE_TYPE_DIQ:
packet->bitfields2.engine_sel =
engine_sel__mes_unmap_queues__compute;
break;
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel =
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
break;
default:
WARN(1, "queue type %d\n", type);
break;
}
if (reset)
packet->bitfields2.action =
action__mes_unmap_queues__reset_queues;
else
packet->bitfields2.action =
action__mes_unmap_queues__preempt_queues;
switch (filter) {
case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
packet->bitfields2.num_queues = 1;
packet->bitfields3b.doorbell_offset0 = filter_param;
break;
case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
packet->bitfields3a.pasid = filter_param;
break;
case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__unmap_all_queues;
break;
case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
/* in this case, we do not preempt static queues */
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
break;
default:
WARN(1, "filter %d\n", filter);
break;
}
return 0;
}
static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
uint64_t fence_address, uint32_t fence_value)
{
struct pm4_mes_query_status *packet;
packet = (struct pm4_mes_query_status *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_query_status));
packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
sizeof(struct pm4_mes_query_status));
packet->bitfields2.context_id = 0;
packet->bitfields2.interrupt_sel =
interrupt_sel__mes_query_status__completion_status;
packet->bitfields2.command =
command__mes_query_status__fence_only_after_write_ack;
packet->addr_hi = upper_32_bits((uint64_t)fence_address);
packet->addr_lo = lower_32_bits((uint64_t)fence_address);
packet->data_hi = upper_32_bits((uint64_t)fence_value);
packet->data_lo = lower_32_bits((uint64_t)fence_value);
return 0;
}
static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
{
struct pm4_mec_release_mem *packet;
WARN_ON(!buffer);
packet = (struct pm4_mec_release_mem *)buffer;
memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
sizeof(struct pm4_mec_release_mem));
packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
packet->bitfields2.tcl1_action_ena = 1;
packet->bitfields2.tc_action_ena = 1;
packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
packet->bitfields3.int_sel =
int_sel__mec_release_mem__send_interrupt_after_write_confirm;
packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
packet->address_hi = upper_32_bits(gpu_addr);
packet->data_lo = 0;
return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
}
const struct packet_manager_funcs kfd_v10_pm_funcs = {
.map_process = pm_map_process_v10,
.runlist = pm_runlist_v10,
.set_resources = pm_set_resources_vi,
.map_queues = pm_map_queues_v10,
.unmap_queues = pm_unmap_queues_v10,
.query_status = pm_query_status_v10,
.release_mem = pm_release_mem_v10,
.map_process_size = sizeof(struct pm4_mes_map_process),
.runlist_size = sizeof(struct pm4_mes_runlist),
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = sizeof(struct pm4_mec_release_mem)
};

View File

@@ -374,7 +374,6 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
switch (type) {
case KFD_MQD_TYPE_CP:
case KFD_MQD_TYPE_COMPUTE:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
mqd->free_mqd = free_mqd;
@@ -401,7 +400,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
#endif
break;
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_hiq_mqd;
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
@@ -442,7 +441,7 @@ struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
mqd = mqd_manager_init_cik(type, dev);
if (!mqd)
return NULL;
if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
if (type == KFD_MQD_TYPE_CP)
mqd->update_mqd = update_mqd_hawaii;
return mqd;
}

View File

@@ -66,6 +66,12 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se3);
}
static void set_priority(struct v10_compute_mqd *m, struct queue_properties *q)
{
m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
m->cp_hqd_queue_priority = q->priority;
}
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
struct queue_properties *q)
{
@@ -109,9 +115,6 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
m->cp_hqd_pipe_priority = 1;
m->cp_hqd_queue_priority = 15;
if (q->format == KFD_QUEUE_FORMAT_AQL) {
m->cp_hqd_aql_control =
1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
@@ -208,11 +211,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_ctx_save_control = 0;
update_cu_mask(mm, mqd, q);
set_priority(m, q);
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0 &&
!q->is_evicted);
q->is_active = QUEUE_IS_ACTIVE(*q);
}
static int destroy_mqd(struct mqd_manager *mm, void *mqd,
@@ -247,18 +248,22 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
{
struct v10_compute_mqd *m;
/* Control stack is located one page after MQD. */
void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
m = get_mqd(mqd);
/* Control stack is written backwards, while workgroup context data
* is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
* Current position is at m->cp_hqd_cntl_stack_offset and
* m->cp_hqd_wg_state_offset, respectively.
*/
*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
m->cp_hqd_cntl_stack_offset;
*save_area_used_size = m->cp_hqd_wg_state_offset -
m->cp_hqd_cntl_stack_size;
if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
return -EFAULT;
/* Control stack is not copied to user mode for GFXv10 because
* it's part of the context save area that is already
* accessible to user mode
*/
return 0;
}
@@ -277,18 +282,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
}
static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
struct v10_compute_mqd *m;
update_mqd(mm, mqd, q);
/* TODO: what's the point? update_mqd already does this. */
m = get_mqd(mqd);
m->cp_hqd_vmid = q->vmid;
}
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -340,11 +333,7 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdma_queue_id = q->sdma_queue_id;
m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0 &&
!q->is_evicted);
q->is_active = QUEUE_IS_ACTIVE(*q);
}
/*
@@ -392,7 +381,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL;
mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
if (!mqd)
return NULL;
@@ -400,7 +389,6 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
switch (type) {
case KFD_MQD_TYPE_CP:
case KFD_MQD_TYPE_COMPUTE:
pr_debug("%s@%i\n", __func__, __LINE__);
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
@@ -422,7 +410,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
@@ -432,11 +420,11 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
pr_debug("%s@%i\n", __func__, __LINE__);
break;
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_hiq_mqd;
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v10_compute_mqd);

View File

@@ -92,7 +92,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
* instead of sub-allocation function.
*/
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
return NULL;
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
@@ -302,7 +302,8 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
m->cp_hqd_cntl_stack_offset;
*save_area_used_size = m->cp_hqd_wg_state_offset;
*save_area_used_size = m->cp_hqd_wg_state_offset -
m->cp_hqd_cntl_stack_size;
if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
return -EFAULT;
@@ -324,18 +325,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
}
static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
struct v9_mqd *m;
update_mqd(mm, mqd, q);
/* TODO: what's the point? update_mqd already does this. */
m = get_mqd(mqd);
m->cp_hqd_vmid = q->vmid;
}
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -443,7 +432,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
switch (type) {
case KFD_MQD_TYPE_CP:
case KFD_MQD_TYPE_COMPUTE:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
mqd->free_mqd = free_mqd;
@@ -462,7 +450,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v9_mqd);
@@ -471,11 +459,11 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
#endif
break;
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_hiq_mqd;
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v9_mqd);

View File

@@ -312,11 +312,7 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
struct vi_mqd *m;
__update_mqd(mm, mqd, q, MTYPE_UC, 0);
m = get_mqd(mqd);
m->cp_hqd_vmid = q->vmid;
}
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
@@ -425,7 +421,6 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
switch (type) {
case KFD_MQD_TYPE_CP:
case KFD_MQD_TYPE_COMPUTE:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
mqd->free_mqd = free_mqd;
@@ -453,7 +448,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
#endif
break;
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_hiq_mqd;
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
@@ -494,7 +489,7 @@ struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
mqd = mqd_manager_init_vi(type, dev);
if (!mqd)
return NULL;
if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
if (type == KFD_MQD_TYPE_CP)
mqd->update_mqd = update_mqd_tonga;
return mqd;
}

View File

@@ -241,12 +241,10 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_RAVEN:
case CHIP_RENOIR:
case CHIP_ARCTURUS:
pm->pmf = &kfd_v9_pm_funcs;
break;
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
pm->pmf = &kfd_v10_pm_funcs;
pm->pmf = &kfd_v9_pm_funcs;
break;
default:
WARN(1, "Unexpected ASIC family %u",
@@ -280,7 +278,7 @@ int pm_send_set_resources(struct packet_manager *pm,
size = pm->pmf->set_resources_size;
mutex_lock(&pm->lock);
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t),
(unsigned int **)&buffer);
if (!buffer) {
@@ -291,9 +289,9 @@ int pm_send_set_resources(struct packet_manager *pm,
retval = pm->pmf->set_resources(pm, buffer, res);
if (!retval)
pm->priv_queue->ops.submit_packet(pm->priv_queue);
kq_submit_packet(pm->priv_queue);
else
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
kq_rollback_packet(pm->priv_queue);
out:
mutex_unlock(&pm->lock);
@@ -318,7 +316,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
mutex_lock(&pm->lock);
retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
retval = kq_acquire_packet_buffer(pm->priv_queue,
packet_size_dwords, &rl_buffer);
if (retval)
goto fail_acquire_packet_buffer;
@@ -328,14 +326,14 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
if (retval)
goto fail_create_runlist;
pm->priv_queue->ops.submit_packet(pm->priv_queue);
kq_submit_packet(pm->priv_queue);
mutex_unlock(&pm->lock);
return retval;
fail_create_runlist:
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
kq_rollback_packet(pm->priv_queue);
fail_acquire_packet_buffer:
mutex_unlock(&pm->lock);
fail_create_runlist_ib:
@@ -354,7 +352,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
size = pm->pmf->query_status_size;
mutex_lock(&pm->lock);
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
pr_err("Failed to allocate buffer on kernel queue\n");
@@ -364,9 +362,9 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
if (!retval)
pm->priv_queue->ops.submit_packet(pm->priv_queue);
kq_submit_packet(pm->priv_queue);
else
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
kq_rollback_packet(pm->priv_queue);
out:
mutex_unlock(&pm->lock);
@@ -383,7 +381,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
size = pm->pmf->unmap_queues_size;
mutex_lock(&pm->lock);
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
pr_err("Failed to allocate buffer on kernel queue\n");
@@ -394,9 +392,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
reset, sdma_engine);
if (!retval)
pm->priv_queue->ops.submit_packet(pm->priv_queue);
kq_submit_packet(pm->priv_queue);
else
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
kq_rollback_packet(pm->priv_queue);
out:
mutex_unlock(&pm->lock);
@@ -441,7 +439,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
size = pm->pmf->query_status_size;
mutex_lock(&pm->lock);
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
pr_err("Failed to allocate buffer on kernel queue\n");
@@ -449,7 +447,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
goto out;
}
memset(buffer, 0x55, size);
pm->priv_queue->ops.submit_packet(pm->priv_queue);
kq_submit_packet(pm->priv_queue);
pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
buffer[0], buffer[1], buffer[2], buffer[3],

View File

@@ -25,47 +25,7 @@
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_ai.h"
#include "kfd_pm4_opcodes.h"
static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
static void uninitialize_v9(struct kernel_queue *kq);
static void submit_packet_v9(struct kernel_queue *kq);
void kernel_queue_init_v9(struct kernel_queue_ops *ops)
{
ops->initialize = initialize_v9;
ops->uninitialize = uninitialize_v9;
ops->submit_packet = submit_packet_v9;
}
static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size)
{
int retval;
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
if (retval)
return false;
kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
return true;
}
static void uninitialize_v9(struct kernel_queue *kq)
{
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
}
static void submit_packet_v9(struct kernel_queue *kq)
{
*kq->wptr64_kernel = kq->pending_wptr64;
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
kq->pending_wptr64);
}
#include "gc/gc_10_1_0_sh_mask.h"
static int pm_map_process_v9(struct packet_manager *pm,
uint32_t *buffer, struct qcm_process_device *qpd)
@@ -90,10 +50,17 @@ static int pm_map_process_v9(struct packet_manager *pm,
packet->sh_mem_config = qpd->sh_mem_config;
packet->sh_mem_bases = qpd->sh_mem_bases;
packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
if (qpd->tba_addr) {
packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
/* On GFX9, unlike GFX10, bit TRAP_EN of SQ_SHADER_TBA_HI is
* not defined, so setting it won't do any harm.
*/
packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8)
| 1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT;
packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
}
packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
@@ -341,35 +308,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
{
struct pm4_mec_release_mem *packet;
packet = (struct pm4_mec_release_mem *)buffer;
memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
sizeof(struct pm4_mec_release_mem));
packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
packet->bitfields2.tcl1_action_ena = 1;
packet->bitfields2.tc_action_ena = 1;
packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
packet->bitfields3.int_sel =
int_sel__mec_release_mem__send_interrupt_after_write_confirm;
packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
packet->address_hi = upper_32_bits(gpu_addr);
packet->data_lo = 0;
return 0;
}
const struct packet_manager_funcs kfd_v9_pm_funcs = {
.map_process = pm_map_process_v9,
.runlist = pm_runlist_v9,
@@ -377,12 +315,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
.query_status = pm_query_status_v9,
.release_mem = pm_release_mem_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process),
.runlist_size = sizeof(struct pm4_mes_runlist),
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = sizeof(struct pm4_mec_release_mem)
.release_mem_size = 0,
};

View File

@@ -26,47 +26,6 @@
#include "kfd_pm4_headers_vi.h"
#include "kfd_pm4_opcodes.h"
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
static void uninitialize_vi(struct kernel_queue *kq);
static void submit_packet_vi(struct kernel_queue *kq);
void kernel_queue_init_vi(struct kernel_queue_ops *ops)
{
ops->initialize = initialize_vi;
ops->uninitialize = uninitialize_vi;
ops->submit_packet = submit_packet_vi;
}
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size)
{
int retval;
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
if (retval != 0)
return false;
kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
return true;
}
static void uninitialize_vi(struct kernel_queue *kq)
{
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
}
static void submit_packet_vi(struct kernel_queue *kq)
{
*kq->wptr_kernel = kq->pending_wptr;
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
kq->pending_wptr);
}
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
{
union PM4_MES_TYPE_3_HEADER header;

View File

@@ -59,24 +59,21 @@
* NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
* defines are w.r.t to PAGE_SIZE
*/
#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
#define KFD_MMAP_TYPE_SHIFT 62
#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
#define KFD_MMAP_GPU_ID_SHIFT 46
#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
<< KFD_MMAP_GPU_ID_SHIFT)
#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
& KFD_MMAP_GPU_ID_MASK)
#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
#define KFD_MMAP_GET_GPU_ID(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
>> KFD_MMAP_GPU_ID_SHIFT)
#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT)
#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
/*
* When working with cp scheduler we should assign the HIQ manually or via
* the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
@@ -238,9 +235,10 @@ struct kfd_dev {
* KFD. It is aligned for mapping
* into user mode
*/
size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell
* to HW doorbell, GFX reserved some
* at the start)
size_t doorbell_base_dw_offset; /* Offset from the start of the PCI
* doorbell BAR to the first KFD
* doorbell in dwords. GFX reserves
* the segment before this offset.
*/
u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
* page used by kernel queue
@@ -510,8 +508,7 @@ struct queue {
* Please read the kfd_mqd_manager.h description.
*/
enum KFD_MQD_TYPE {
KFD_MQD_TYPE_COMPUTE = 0, /* for no cp scheduling */
KFD_MQD_TYPE_HIQ, /* for hiq */
KFD_MQD_TYPE_HIQ = 0, /* for hiq */
KFD_MQD_TYPE_CP, /* for cp queues and diq */
KFD_MQD_TYPE_SDMA, /* for sdma queues */
KFD_MQD_TYPE_DIQ, /* for diq */
@@ -818,7 +815,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
u32 read_kernel_doorbell(u32 __iomem *db);
void write_kernel_doorbell(void __iomem *db, u32 value);
void write_kernel_doorbell64(void __iomem *db, u64 value);
unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
struct kfd_process *process,
unsigned int doorbell_id);
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
@@ -904,7 +901,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev,
struct file *f,
struct queue_properties *properties,
unsigned int *qid);
unsigned int *qid,
uint32_t *p_doorbell_offset_in_process);
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
@@ -972,7 +970,6 @@ struct packet_manager_funcs {
extern const struct packet_manager_funcs kfd_vi_pm_funcs;
extern const struct packet_manager_funcs kfd_v9_pm_funcs;
extern const struct packet_manager_funcs kfd_v10_pm_funcs;
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
void pm_uninit(struct packet_manager *pm);
@@ -991,9 +988,6 @@ void pm_release_ib(struct packet_manager *pm);
/* Following PM funcs can be shared among VI and AI */
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
struct scheduling_resources *res);
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);

View File

@@ -560,8 +560,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
continue;
offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
<< PAGE_SHIFT;
offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
MAP_SHARED, offset);

View File

@@ -162,7 +162,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
pqm->queue_slot_bitmap = NULL;
}
static int create_cp_queue(struct process_queue_manager *pqm,
static int init_user_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev, struct queue **q,
struct queue_properties *q_properties,
struct file *f, unsigned int qid)
@@ -192,7 +192,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev,
struct file *f,
struct queue_properties *properties,
unsigned int *qid)
unsigned int *qid,
uint32_t *p_doorbell_offset_in_process)
{
int retval;
struct kfd_process_device *pdd;
@@ -250,7 +251,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
retval = create_cp_queue(pqm, dev, &q, properties, f, *qid);
retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -271,7 +272,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
retval = create_cp_queue(pqm, dev, &q, properties, f, *qid);
retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -303,12 +304,15 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
if (q)
if (q && p_doorbell_offset_in_process)
/* Return the doorbell offset within the doorbell page
* to the caller so it can be passed up to user mode
* (in bytes).
* There are always 1024 doorbells per process, so in case
* of 8-byte doorbells, there are two doorbell pages per
* process.
*/
properties->doorbell_off =
*p_doorbell_offset_in_process =
(q->properties.doorbell_off * sizeof(uint32_t)) &
(kfd_doorbell_process_slice(dev) - 1);