Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie: "This is the main drm pull, it has a shared branch with some alsa crossover but everything should be acked by relevant people. New drivers: - ATMEL HLCDC driver - designware HDMI core support (used in multiple SoCs). core: - lots more atomic modesetting work, properties and atomic ioctl (hidden under option) - bridge rework allows support for Samsung exynos chromebooks to work finally. - some more panels supported i915: - atomic plane update support - DSI uses shared DSI infrastructure - Skylake basic support is all merged now - component framework used for i915/snd-hda interactions - write-combine cpu memory mappings - engine init code refactored - full ppgtt enabled where execlists are enabled. - cherryview rps/gpu turbo and pipe CRC support. radeon: - indirect draw support for evergreen/cayman - SMC and manual fan control for SI/CI - Displayport audio support amdkfd: - SDMA usermode queue support - replace suballocator usage with more suitable one - rework for allowing interfacing to more than radeon nouveau: - major renaming in prep for later splitting work - merge arm platform driver into nouveau - GK20A reclocking support msm: - conversion to atomic modesetting - YUV support for mdp4/5 - eDP support - hw cursor for mdp5 tegra: - conversion to atomic modesetting - better suspend/resume support for child devices rcar-du: - interlaced support imx: - move to using dw_hdmi shared support - mode_fixup support sti: - DVO support - HDMI infoframe support exynos: - refactoring and cleanup, removed lots of internal unnecessary abstraction - exynos7 DECON display controller support Along with the usual bunch of fixes, cleanups etc" * 'drm-next' of git://people.freedesktop.org/~airlied/linux: (724 commits) drm/radeon: fix voltage setup on hawaii drm/radeon/dp: Set EDP_CONFIGURATION_SET for bridge chips if necessary drm/radeon: only enable kv/kb dpm interrupts once v3 drm/radeon: workaround for CP HW bug on CIK drm/radeon: Don't try to enable write-combining without PAT drm/radeon: use 0-255 rather than 0-100 for pwm fan range drm/i915: Clamp efficient frequency to valid range drm/i915: Really ignore long HPD pulses on eDP drm/exynos: Add DECON driver drm/i915: Correct the base value while updating LP_OUTPUT_HOLD in MIPI_PORT_CTRL drm/i915: Insert a command barrier on BLT/BSD cache flushes drm/i915: Drop vblank wait from intel_dp_link_down drm/exynos: fix NULL pointer reference drm/exynos: remove exynos_plane_dpms drm/exynos: remove mode property of exynos crtc drm/exynos: Remove exynos_plane_dpms() call with no effect drm/i915: Squelch overzealous uncore reset WARN_ON drm/i915: Take runtime pm reference on hangcheck_info drm/i915: Correct the IOSF Dev_FN field for IOSF transfers drm/exynos: fix DMA_ATTR_NO_KERNEL_MAPPING usage ...
Этот коммит содержится в:
@@ -7,7 +7,10 @@ ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/
|
||||
amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
|
||||
kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
|
||||
kfd_process.o kfd_queue.o kfd_mqd_manager.o \
|
||||
kfd_kernel_queue.o kfd_packet_manager.o \
|
||||
kfd_process_queue_manager.o kfd_device_queue_manager.o
|
||||
kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
|
||||
kfd_kernel_queue.o kfd_kernel_queue_cik.o \
|
||||
kfd_kernel_queue_vi.o kfd_packet_manager.o \
|
||||
kfd_process_queue_manager.o kfd_device_queue_manager.o \
|
||||
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
|
||||
|
||||
obj-$(CONFIG_HSA_AMD) += amdkfd.o
|
||||
|
@@ -168,6 +168,8 @@
|
||||
#define IB_ATC_EN (1U << 23)
|
||||
#define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20)
|
||||
|
||||
#define AQL_ENABLE 1
|
||||
|
||||
#define CP_HQD_DEQUEUE_REQUEST 0xC974
|
||||
#define DEQUEUE_REQUEST_DRAIN 1
|
||||
#define DEQUEUE_REQUEST_RESET 2
|
||||
@@ -188,6 +190,17 @@
|
||||
#define MQD_VMID_MASK (0xf << 0)
|
||||
#define MQD_CONTROL_PRIV_STATE_EN (1U << 8)
|
||||
|
||||
#define SDMA_RB_VMID(x) (x << 24)
|
||||
#define SDMA_RB_ENABLE (1 << 0)
|
||||
#define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */
|
||||
#define SDMA_RPTR_WRITEBACK_ENABLE (1 << 12)
|
||||
#define SDMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
|
||||
#define SDMA_OFFSET(x) (x << 0)
|
||||
#define SDMA_DB_ENABLE (1 << 28)
|
||||
#define SDMA_ATC (1 << 0)
|
||||
#define SDMA_VA_PTR32 (1 << 4)
|
||||
#define SDMA_VA_SHARED_BASE(x) (x << 8)
|
||||
|
||||
#define GRBM_GFX_INDEX 0x30800
|
||||
#define INSTANCE_INDEX(x) ((x) << 0)
|
||||
#define SH_INDEX(x) ((x) << 8)
|
||||
|
@@ -178,6 +178,22 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (args->eop_buffer_address &&
|
||||
!access_ok(VERIFY_WRITE,
|
||||
(const void __user *) args->eop_buffer_address,
|
||||
sizeof(uint32_t))) {
|
||||
pr_debug("kfd: can't access eop buffer");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (args->ctx_save_restore_address &&
|
||||
!access_ok(VERIFY_WRITE,
|
||||
(const void __user *) args->ctx_save_restore_address,
|
||||
sizeof(uint32_t))) {
|
||||
pr_debug("kfd: can't access ctx save restore buffer");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
q_properties->is_interop = false;
|
||||
q_properties->queue_percent = args->queue_percentage;
|
||||
q_properties->priority = args->queue_priority;
|
||||
@@ -185,9 +201,16 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
|
||||
q_properties->queue_size = args->ring_size;
|
||||
q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
|
||||
q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
|
||||
q_properties->eop_ring_buffer_address = args->eop_buffer_address;
|
||||
q_properties->eop_ring_buffer_size = args->eop_buffer_size;
|
||||
q_properties->ctx_save_restore_area_address =
|
||||
args->ctx_save_restore_address;
|
||||
q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
|
||||
if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
|
||||
args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
|
||||
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
|
||||
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
|
||||
q_properties->type = KFD_QUEUE_TYPE_SDMA;
|
||||
else
|
||||
return -ENOTSUPP;
|
||||
|
||||
@@ -214,6 +237,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
|
||||
|
||||
pr_debug("Queue Format (%d)\n", q_properties->format);
|
||||
|
||||
pr_debug("Queue EOP (0x%llX)\n", q_properties->eop_ring_buffer_address);
|
||||
|
||||
pr_debug("Queue CTX save arex (0x%llX)\n",
|
||||
q_properties->ctx_save_restore_area_address);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -235,9 +263,12 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
pr_debug("kfd: looking for gpu id 0x%x\n", args->gpu_id);
|
||||
dev = kfd_device_by_id(args->gpu_id);
|
||||
if (dev == NULL)
|
||||
if (dev == NULL) {
|
||||
pr_debug("kfd: gpu id 0x%x was not found\n", args->gpu_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
|
||||
@@ -251,8 +282,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
|
||||
p->pasid,
|
||||
dev->id);
|
||||
|
||||
err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, 0,
|
||||
KFD_QUEUE_TYPE_COMPUTE, &queue_id);
|
||||
err = pqm_create_queue(&p->pqm, dev, filep, &q_properties,
|
||||
0, q_properties.type, &queue_id);
|
||||
if (err != 0)
|
||||
goto err_create_queue;
|
||||
|
||||
@@ -385,7 +416,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
|
||||
(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
|
||||
? cache_policy_coherent : cache_policy_noncoherent;
|
||||
|
||||
if (!dev->dqm->set_cache_memory_policy(dev->dqm,
|
||||
if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
|
||||
&pdd->qpd,
|
||||
default_policy,
|
||||
alternate_policy,
|
||||
|
@@ -31,11 +31,20 @@
|
||||
#define MQD_SIZE_ALIGNED 768
|
||||
|
||||
static const struct kfd_device_info kaveri_device_info = {
|
||||
.asic_family = CHIP_KAVERI,
|
||||
.max_pasid_bits = 16,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED
|
||||
};
|
||||
|
||||
static const struct kfd_device_info carrizo_device_info = {
|
||||
.asic_family = CHIP_CARRIZO,
|
||||
.max_pasid_bits = 16,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED
|
||||
};
|
||||
|
||||
struct kfd_deviceid {
|
||||
unsigned short did;
|
||||
const struct kfd_device_info *device_info;
|
||||
@@ -64,9 +73,13 @@ static const struct kfd_deviceid supported_devices[] = {
|
||||
{ 0x1318, &kaveri_device_info }, /* Kaveri */
|
||||
{ 0x131B, &kaveri_device_info }, /* Kaveri */
|
||||
{ 0x131C, &kaveri_device_info }, /* Kaveri */
|
||||
{ 0x131D, &kaveri_device_info }, /* Kaveri */
|
||||
{ 0x131D, &kaveri_device_info } /* Kaveri */
|
||||
};
|
||||
|
||||
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
|
||||
unsigned int chunk_size);
|
||||
static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
|
||||
|
||||
static const struct kfd_device_info *lookup_device_info(unsigned short did)
|
||||
{
|
||||
size_t i;
|
||||
@@ -173,16 +186,39 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
size = max_num_of_queues_per_device *
|
||||
kfd->device_info->mqd_size_aligned;
|
||||
|
||||
/* add another 512KB for all other allocations on gart */
|
||||
/*
|
||||
* calculate max size of runlist packet.
|
||||
* There can be only 2 packets at once
|
||||
*/
|
||||
size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) +
|
||||
max_num_of_queues_per_device *
|
||||
sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2;
|
||||
|
||||
/* Add size of HIQ & DIQ */
|
||||
size += KFD_KERNEL_QUEUE_SIZE * 2;
|
||||
|
||||
/* add another 512KB for all other allocations on gart (HPD, fences) */
|
||||
size += 512 * 1024;
|
||||
|
||||
if (kfd2kgd->init_sa_manager(kfd->kgd, size)) {
|
||||
if (kfd2kgd->init_gtt_mem_allocation(kfd->kgd, size, &kfd->gtt_mem,
|
||||
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)) {
|
||||
dev_err(kfd_device,
|
||||
"Error initializing sa manager for device (%x:%x)\n",
|
||||
kfd->pdev->vendor, kfd->pdev->device);
|
||||
"Could not allocate %d bytes for device (%x:%x)\n",
|
||||
size, kfd->pdev->vendor, kfd->pdev->device);
|
||||
goto out;
|
||||
}
|
||||
|
||||
dev_info(kfd_device,
|
||||
"Allocated %d bytes on gart for device(%x:%x)\n",
|
||||
size, kfd->pdev->vendor, kfd->pdev->device);
|
||||
|
||||
/* Initialize GTT sa with 512 byte chunk size */
|
||||
if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
|
||||
dev_err(kfd_device,
|
||||
"Error initializing gtt sub-allocator\n");
|
||||
goto kfd_gtt_sa_init_error;
|
||||
}
|
||||
|
||||
kfd_doorbell_init(kfd);
|
||||
|
||||
if (kfd_topology_add_device(kfd) != 0) {
|
||||
@@ -209,7 +245,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
goto device_queue_manager_error;
|
||||
}
|
||||
|
||||
if (kfd->dqm->start(kfd->dqm) != 0) {
|
||||
if (kfd->dqm->ops.start(kfd->dqm) != 0) {
|
||||
dev_err(kfd_device,
|
||||
"Error starting queuen manager for device (%x:%x)\n",
|
||||
kfd->pdev->vendor, kfd->pdev->device);
|
||||
@@ -232,7 +268,9 @@ device_queue_manager_error:
|
||||
device_iommu_pasid_error:
|
||||
kfd_topology_remove_device(kfd);
|
||||
kfd_topology_add_device_error:
|
||||
kfd2kgd->fini_sa_manager(kfd->kgd);
|
||||
kfd_gtt_sa_fini(kfd);
|
||||
kfd_gtt_sa_init_error:
|
||||
kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
|
||||
dev_err(kfd_device,
|
||||
"device (%x:%x) NOT added due to errors\n",
|
||||
kfd->pdev->vendor, kfd->pdev->device);
|
||||
@@ -246,6 +284,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
|
||||
device_queue_manager_uninit(kfd->dqm);
|
||||
amd_iommu_free_device(kfd->pdev);
|
||||
kfd_topology_remove_device(kfd);
|
||||
kfd_gtt_sa_fini(kfd);
|
||||
kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
|
||||
}
|
||||
|
||||
kfree(kfd);
|
||||
@@ -256,7 +296,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
|
||||
BUG_ON(kfd == NULL);
|
||||
|
||||
if (kfd->init_complete) {
|
||||
kfd->dqm->stop(kfd->dqm);
|
||||
kfd->dqm->ops.stop(kfd->dqm);
|
||||
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
|
||||
amd_iommu_free_device(kfd->pdev);
|
||||
}
|
||||
@@ -277,7 +317,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
|
||||
return -ENXIO;
|
||||
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
|
||||
iommu_pasid_shutdown_callback);
|
||||
kfd->dqm->start(kfd->dqm);
|
||||
kfd->dqm->ops.start(kfd->dqm);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -288,3 +328,188 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
|
||||
{
|
||||
/* Process interrupts / schedule work as necessary */
|
||||
}
|
||||
|
||||
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
|
||||
unsigned int chunk_size)
|
||||
{
|
||||
unsigned int num_of_bits;
|
||||
|
||||
BUG_ON(!kfd);
|
||||
BUG_ON(!kfd->gtt_mem);
|
||||
BUG_ON(buf_size < chunk_size);
|
||||
BUG_ON(buf_size == 0);
|
||||
BUG_ON(chunk_size == 0);
|
||||
|
||||
kfd->gtt_sa_chunk_size = chunk_size;
|
||||
kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
|
||||
|
||||
num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE;
|
||||
BUG_ON(num_of_bits == 0);
|
||||
|
||||
kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL);
|
||||
|
||||
if (!kfd->gtt_sa_bitmap)
|
||||
return -ENOMEM;
|
||||
|
||||
pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
|
||||
kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
|
||||
|
||||
mutex_init(&kfd->gtt_sa_lock);
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
|
||||
{
|
||||
mutex_destroy(&kfd->gtt_sa_lock);
|
||||
kfree(kfd->gtt_sa_bitmap);
|
||||
}
|
||||
|
||||
static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
|
||||
unsigned int bit_num,
|
||||
unsigned int chunk_size)
|
||||
{
|
||||
return start_addr + bit_num * chunk_size;
|
||||
}
|
||||
|
||||
static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
|
||||
unsigned int bit_num,
|
||||
unsigned int chunk_size)
|
||||
{
|
||||
return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
|
||||
}
|
||||
|
||||
int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
|
||||
struct kfd_mem_obj **mem_obj)
|
||||
{
|
||||
unsigned int found, start_search, cur_size;
|
||||
|
||||
BUG_ON(!kfd);
|
||||
|
||||
if (size == 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
|
||||
return -ENOMEM;
|
||||
|
||||
*mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
||||
if ((*mem_obj) == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size);
|
||||
|
||||
start_search = 0;
|
||||
|
||||
mutex_lock(&kfd->gtt_sa_lock);
|
||||
|
||||
kfd_gtt_restart_search:
|
||||
/* Find the first chunk that is free */
|
||||
found = find_next_zero_bit(kfd->gtt_sa_bitmap,
|
||||
kfd->gtt_sa_num_of_chunks,
|
||||
start_search);
|
||||
|
||||
pr_debug("kfd: found = %d\n", found);
|
||||
|
||||
/* If there wasn't any free chunk, bail out */
|
||||
if (found == kfd->gtt_sa_num_of_chunks)
|
||||
goto kfd_gtt_no_free_chunk;
|
||||
|
||||
/* Update fields of mem_obj */
|
||||
(*mem_obj)->range_start = found;
|
||||
(*mem_obj)->range_end = found;
|
||||
(*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
|
||||
kfd->gtt_start_gpu_addr,
|
||||
found,
|
||||
kfd->gtt_sa_chunk_size);
|
||||
(*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
|
||||
kfd->gtt_start_cpu_ptr,
|
||||
found,
|
||||
kfd->gtt_sa_chunk_size);
|
||||
|
||||
pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n",
|
||||
(uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
|
||||
|
||||
/* If we need only one chunk, mark it as allocated and get out */
|
||||
if (size <= kfd->gtt_sa_chunk_size) {
|
||||
pr_debug("kfd: single bit\n");
|
||||
set_bit(found, kfd->gtt_sa_bitmap);
|
||||
goto kfd_gtt_out;
|
||||
}
|
||||
|
||||
/* Otherwise, try to see if we have enough contiguous chunks */
|
||||
cur_size = size - kfd->gtt_sa_chunk_size;
|
||||
do {
|
||||
(*mem_obj)->range_end =
|
||||
find_next_zero_bit(kfd->gtt_sa_bitmap,
|
||||
kfd->gtt_sa_num_of_chunks, ++found);
|
||||
/*
|
||||
* If next free chunk is not contiguous than we need to
|
||||
* restart our search from the last free chunk we found (which
|
||||
* wasn't contiguous to the previous ones
|
||||
*/
|
||||
if ((*mem_obj)->range_end != found) {
|
||||
start_search = found;
|
||||
goto kfd_gtt_restart_search;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we reached end of buffer, bail out with error
|
||||
*/
|
||||
if (found == kfd->gtt_sa_num_of_chunks)
|
||||
goto kfd_gtt_no_free_chunk;
|
||||
|
||||
/* Check if we don't need another chunk */
|
||||
if (cur_size <= kfd->gtt_sa_chunk_size)
|
||||
cur_size = 0;
|
||||
else
|
||||
cur_size -= kfd->gtt_sa_chunk_size;
|
||||
|
||||
} while (cur_size > 0);
|
||||
|
||||
pr_debug("kfd: range_start = %d, range_end = %d\n",
|
||||
(*mem_obj)->range_start, (*mem_obj)->range_end);
|
||||
|
||||
/* Mark the chunks as allocated */
|
||||
for (found = (*mem_obj)->range_start;
|
||||
found <= (*mem_obj)->range_end;
|
||||
found++)
|
||||
set_bit(found, kfd->gtt_sa_bitmap);
|
||||
|
||||
kfd_gtt_out:
|
||||
mutex_unlock(&kfd->gtt_sa_lock);
|
||||
return 0;
|
||||
|
||||
kfd_gtt_no_free_chunk:
|
||||
pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj);
|
||||
mutex_unlock(&kfd->gtt_sa_lock);
|
||||
kfree(mem_obj);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
|
||||
{
|
||||
unsigned int bit;
|
||||
|
||||
BUG_ON(!kfd);
|
||||
|
||||
/* Act like kfree when trying to free a NULL object */
|
||||
if (!mem_obj)
|
||||
return 0;
|
||||
|
||||
pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n",
|
||||
mem_obj, mem_obj->range_start, mem_obj->range_end);
|
||||
|
||||
mutex_lock(&kfd->gtt_sa_lock);
|
||||
|
||||
/* Mark the chunks as free */
|
||||
for (bit = mem_obj->range_start;
|
||||
bit <= mem_obj->range_end;
|
||||
bit++)
|
||||
clear_bit(bit, kfd->gtt_sa_bitmap);
|
||||
|
||||
mutex_unlock(&kfd->gtt_sa_lock);
|
||||
|
||||
kfree(mem_obj);
|
||||
return 0;
|
||||
}
|
||||
|
@@ -26,34 +26,40 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/sched.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_mqd_manager.h"
|
||||
#include "cik_regs.h"
|
||||
#include "kfd_kernel_queue.h"
|
||||
#include "../../radeon/cik_reg.h"
|
||||
|
||||
/* Size of the per-pipe EOP queue */
|
||||
#define CIK_HPD_EOP_BYTES_LOG2 11
|
||||
#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
|
||||
|
||||
static bool is_mem_initialized;
|
||||
|
||||
static int init_memory(struct device_queue_manager *dqm);
|
||||
static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
|
||||
unsigned int pasid, unsigned int vmid);
|
||||
|
||||
static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
|
||||
static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
|
||||
static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
|
||||
|
||||
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
|
||||
static inline unsigned int get_pipes_num(struct device_queue_manager *dqm)
|
||||
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
|
||||
unsigned int sdma_queue_id);
|
||||
|
||||
static inline
|
||||
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
|
||||
{
|
||||
BUG_ON(!dqm || !dqm->dev);
|
||||
return dqm->dev->shared_resources.compute_pipe_count;
|
||||
if (type == KFD_QUEUE_TYPE_SDMA)
|
||||
return KFD_MQD_TYPE_SDMA;
|
||||
return KFD_MQD_TYPE_CP;
|
||||
}
|
||||
|
||||
static inline unsigned int get_first_pipe(struct device_queue_manager *dqm)
|
||||
@@ -67,61 +73,7 @@ static inline unsigned int get_pipes_num_cpsch(void)
|
||||
return PIPE_PER_ME_CP_SCHEDULING;
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd)
|
||||
{
|
||||
uint32_t nybble;
|
||||
|
||||
nybble = (pdd->lds_base >> 60) & 0x0E;
|
||||
|
||||
return nybble;
|
||||
|
||||
}
|
||||
|
||||
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
|
||||
{
|
||||
unsigned int shared_base;
|
||||
|
||||
shared_base = (pdd->lds_base >> 16) & 0xFF;
|
||||
|
||||
return shared_base;
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble);
|
||||
static void init_process_memory(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
unsigned int temp;
|
||||
|
||||
BUG_ON(!dqm || !qpd);
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
|
||||
/* check if sh_mem_config register already configured */
|
||||
if (qpd->sh_mem_config == 0) {
|
||||
qpd->sh_mem_config =
|
||||
ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
|
||||
DEFAULT_MTYPE(MTYPE_NONCACHED) |
|
||||
APE1_MTYPE(MTYPE_NONCACHED);
|
||||
qpd->sh_mem_ape1_limit = 0;
|
||||
qpd->sh_mem_ape1_base = 0;
|
||||
}
|
||||
|
||||
if (qpd->pqm->process->is_32bit_user_mode) {
|
||||
temp = get_sh_mem_bases_32(pdd);
|
||||
qpd->sh_mem_bases = SHARED_BASE(temp);
|
||||
qpd->sh_mem_config |= PTR32;
|
||||
} else {
|
||||
temp = get_sh_mem_bases_nybble_64(pdd);
|
||||
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
|
||||
}
|
||||
|
||||
pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
|
||||
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
|
||||
}
|
||||
|
||||
static void program_sh_mem_settings(struct device_queue_manager *dqm,
|
||||
void program_sh_mem_settings(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid,
|
||||
@@ -200,7 +152,10 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
*allocated_vmid = qpd->vmid;
|
||||
q->properties.vmid = qpd->vmid;
|
||||
|
||||
retval = create_compute_queue_nocpsch(dqm, q, qpd);
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
|
||||
retval = create_compute_queue_nocpsch(dqm, q, qpd);
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
retval = create_sdma_queue_nocpsch(dqm, q, qpd);
|
||||
|
||||
if (retval != 0) {
|
||||
if (list_empty(&qpd->queues_list)) {
|
||||
@@ -212,7 +167,11 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
}
|
||||
|
||||
list_add(&q->list, &qpd->queues_list);
|
||||
dqm->queue_count++;
|
||||
if (q->properties.is_active)
|
||||
dqm->queue_count++;
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
dqm->sdma_queue_count++;
|
||||
|
||||
/*
|
||||
* Unconditionally increment this counter, regardless of the queue's
|
||||
@@ -229,12 +188,12 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
|
||||
{
|
||||
bool set;
|
||||
int pipe, bit;
|
||||
int pipe, bit, i;
|
||||
|
||||
set = false;
|
||||
|
||||
for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm);
|
||||
pipe = (pipe + 1) % get_pipes_num(dqm)) {
|
||||
for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_num(dqm);
|
||||
pipe = ((pipe + 1) % get_pipes_num(dqm)), ++i) {
|
||||
if (dqm->allocated_queues[pipe] != 0) {
|
||||
bit = find_first_bit(
|
||||
(unsigned long *)&dqm->allocated_queues[pipe],
|
||||
@@ -275,7 +234,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
|
||||
BUG_ON(!dqm || !q || !qpd);
|
||||
|
||||
mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
|
||||
mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
|
||||
if (mqd == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -319,28 +278,44 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
pr_debug("kfd: In Func %s\n", __func__);
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
|
||||
if (mqd == NULL) {
|
||||
retval = -ENOMEM;
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
|
||||
mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
|
||||
if (mqd == NULL) {
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
deallocate_hqd(dqm, q);
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
|
||||
if (mqd == NULL) {
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
dqm->sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
} else {
|
||||
pr_debug("q->properties.type is invalid (%d)\n",
|
||||
q->properties.type);
|
||||
retval = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
retval = mqd->destroy_mqd(mqd, q->mqd,
|
||||
KFD_PREEMPT_TYPE_WAVEFRONT,
|
||||
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
|
||||
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
|
||||
q->pipe, q->queue);
|
||||
|
||||
if (retval != 0)
|
||||
goto out;
|
||||
|
||||
deallocate_hqd(dqm, q);
|
||||
|
||||
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
|
||||
|
||||
list_del(&q->list);
|
||||
if (list_empty(&qpd->queues_list))
|
||||
deallocate_vmid(dqm, qpd, q);
|
||||
dqm->queue_count--;
|
||||
if (q->properties.is_active)
|
||||
dqm->queue_count--;
|
||||
|
||||
/*
|
||||
* Unconditionally decrement this counter, regardless of the queue's
|
||||
@@ -364,7 +339,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
||||
BUG_ON(!dqm || !q || !q->mqd);
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (mqd == NULL) {
|
||||
mutex_unlock(&dqm->lock);
|
||||
return -ENOMEM;
|
||||
@@ -415,6 +391,7 @@ static int register_process_nocpsch(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct device_process_node *n;
|
||||
int retval;
|
||||
|
||||
BUG_ON(!dqm || !qpd);
|
||||
|
||||
@@ -429,12 +406,13 @@ static int register_process_nocpsch(struct device_queue_manager *dqm,
|
||||
mutex_lock(&dqm->lock);
|
||||
list_add(&n->list, &dqm->queues);
|
||||
|
||||
init_process_memory(dqm, qpd);
|
||||
retval = dqm->ops_asic_specific.register_process(dqm, qpd);
|
||||
|
||||
dqm->processes_count++;
|
||||
|
||||
mutex_unlock(&dqm->lock);
|
||||
|
||||
return 0;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int unregister_process_nocpsch(struct device_queue_manager *dqm,
|
||||
@@ -479,48 +457,7 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
|
||||
vmid);
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
||||
{
|
||||
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
|
||||
* scratch and GPUVM apertures.
|
||||
* The hardware fills in the remaining 59 bits according to the
|
||||
* following pattern:
|
||||
* LDS: X0000000'00000000 - X0000001'00000000 (4GB)
|
||||
* Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
|
||||
* GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
|
||||
*
|
||||
* (where X/Y is the configurable nybble with the low-bit 0)
|
||||
*
|
||||
* LDS and scratch will have the same top nybble programmed in the
|
||||
* top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
|
||||
* GPUVM can have a different top nybble programmed in the
|
||||
* top 3 bits of SH_MEM_BASES.SHARED_BASE.
|
||||
* We don't bother to support different top nybbles
|
||||
* for LDS/Scratch and GPUVM.
|
||||
*/
|
||||
|
||||
BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
|
||||
top_address_nybble == 0);
|
||||
|
||||
return PRIVATE_BASE(top_address_nybble << 12) |
|
||||
SHARED_BASE(top_address_nybble << 12);
|
||||
}
|
||||
|
||||
static int init_memory(struct device_queue_manager *dqm)
|
||||
{
|
||||
int i, retval;
|
||||
|
||||
for (i = 8; i < 16; i++)
|
||||
set_pasid_vmid_mapping(dqm, 0, i);
|
||||
|
||||
retval = kfd2kgd->init_memory(dqm->dev->kgd);
|
||||
if (retval == 0)
|
||||
is_mem_initialized = true;
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
static int init_pipelines(struct device_queue_manager *dqm,
|
||||
int init_pipelines(struct device_queue_manager *dqm,
|
||||
unsigned int pipes_num, unsigned int first_pipe)
|
||||
{
|
||||
void *hpdptr;
|
||||
@@ -539,11 +476,8 @@ static int init_pipelines(struct device_queue_manager *dqm,
|
||||
* because it contains no data when there are no active queues.
|
||||
*/
|
||||
|
||||
err = kfd2kgd->allocate_mem(dqm->dev->kgd,
|
||||
CIK_HPD_EOP_BYTES * pipes_num,
|
||||
PAGE_SIZE,
|
||||
KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
|
||||
(struct kgd_mem **) &dqm->pipeline_mem);
|
||||
err = kfd_gtt_sa_allocate(dqm->dev, CIK_HPD_EOP_BYTES * pipes_num,
|
||||
&dqm->pipeline_mem);
|
||||
|
||||
if (err) {
|
||||
pr_err("kfd: error allocate vidmem num pipes: %d\n",
|
||||
@@ -556,10 +490,9 @@ static int init_pipelines(struct device_queue_manager *dqm,
|
||||
|
||||
memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num);
|
||||
|
||||
mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
|
||||
mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
|
||||
if (mqd == NULL) {
|
||||
kfd2kgd->free_mem(dqm->dev->kgd,
|
||||
(struct kgd_mem *) dqm->pipeline_mem);
|
||||
kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -579,7 +512,6 @@ static int init_pipelines(struct device_queue_manager *dqm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int init_scheduler(struct device_queue_manager *dqm)
|
||||
{
|
||||
int retval;
|
||||
@@ -589,11 +521,6 @@ static int init_scheduler(struct device_queue_manager *dqm)
|
||||
pr_debug("kfd: In %s\n", __func__);
|
||||
|
||||
retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
|
||||
if (retval != 0)
|
||||
return retval;
|
||||
|
||||
retval = init_memory(dqm);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
@@ -609,6 +536,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
|
||||
mutex_init(&dqm->lock);
|
||||
INIT_LIST_HEAD(&dqm->queues);
|
||||
dqm->queue_count = dqm->next_pipe_to_allocate = 0;
|
||||
dqm->sdma_queue_count = 0;
|
||||
dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
|
||||
sizeof(unsigned int), GFP_KERNEL);
|
||||
if (!dqm->allocated_queues) {
|
||||
@@ -620,6 +548,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
|
||||
dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
|
||||
|
||||
dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
|
||||
dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
|
||||
|
||||
init_scheduler(dqm);
|
||||
return 0;
|
||||
@@ -637,8 +566,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm)
|
||||
for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
|
||||
kfree(dqm->mqds[i]);
|
||||
mutex_destroy(&dqm->lock);
|
||||
kfd2kgd->free_mem(dqm->dev->kgd,
|
||||
(struct kgd_mem *) dqm->pipeline_mem);
|
||||
kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
|
||||
}
|
||||
|
||||
static int start_nocpsch(struct device_queue_manager *dqm)
|
||||
@@ -651,6 +579,77 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int allocate_sdma_queue(struct device_queue_manager *dqm,
|
||||
unsigned int *sdma_queue_id)
|
||||
{
|
||||
int bit;
|
||||
|
||||
if (dqm->sdma_bitmap == 0)
|
||||
return -ENOMEM;
|
||||
|
||||
bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
|
||||
CIK_SDMA_QUEUES);
|
||||
|
||||
clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
|
||||
*sdma_queue_id = bit;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
|
||||
unsigned int sdma_queue_id)
|
||||
{
|
||||
if (sdma_queue_id >= CIK_SDMA_QUEUES)
|
||||
return;
|
||||
set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
|
||||
}
|
||||
|
||||
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
uint32_t value = SDMA_ATC;
|
||||
|
||||
if (q->process->is_32bit_user_mode)
|
||||
value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
|
||||
else
|
||||
value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
|
||||
qpd_to_pdd(qpd)));
|
||||
q->properties.sdma_vm_addr = value;
|
||||
}
|
||||
|
||||
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct mqd_manager *mqd;
|
||||
int retval;
|
||||
|
||||
mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
|
||||
if (!mqd)
|
||||
return -ENOMEM;
|
||||
|
||||
retval = allocate_sdma_queue(dqm, &q->sdma_id);
|
||||
if (retval != 0)
|
||||
return retval;
|
||||
|
||||
q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
|
||||
q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;
|
||||
|
||||
pr_debug("kfd: sdma id is: %d\n", q->sdma_id);
|
||||
pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id);
|
||||
pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id);
|
||||
|
||||
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
|
||||
&q->gart_mqd_addr, &q->properties);
|
||||
if (retval != 0) {
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
return retval;
|
||||
}
|
||||
|
||||
init_sdma_vm(dqm, q, qpd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Device Queue Manager implementation for cp scheduler
|
||||
*/
|
||||
@@ -692,8 +691,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
|
||||
mutex_init(&dqm->lock);
|
||||
INIT_LIST_HEAD(&dqm->queues);
|
||||
dqm->queue_count = dqm->processes_count = 0;
|
||||
dqm->sdma_queue_count = 0;
|
||||
dqm->active_runlist = false;
|
||||
retval = init_pipelines(dqm, get_pipes_num(dqm), 0);
|
||||
retval = dqm->ops_asic_specific.initialize(dqm);
|
||||
if (retval != 0)
|
||||
goto fail_init_pipelines;
|
||||
|
||||
@@ -724,18 +724,14 @@ static int start_cpsch(struct device_queue_manager *dqm)
|
||||
pr_debug("kfd: allocating fence memory\n");
|
||||
|
||||
/* allocate fence memory on the gart */
|
||||
retval = kfd2kgd->allocate_mem(dqm->dev->kgd,
|
||||
sizeof(*dqm->fence_addr),
|
||||
32,
|
||||
KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
|
||||
(struct kgd_mem **) &dqm->fence_mem);
|
||||
retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
|
||||
&dqm->fence_mem);
|
||||
|
||||
if (retval != 0)
|
||||
goto fail_allocate_vidmem;
|
||||
|
||||
dqm->fence_addr = dqm->fence_mem->cpu_ptr;
|
||||
dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
|
||||
|
||||
list_for_each_entry(node, &dqm->queues, list)
|
||||
if (node->qpd->pqm->process && dqm->dev)
|
||||
kfd_bind_process_to_device(dqm->dev,
|
||||
@@ -764,8 +760,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
|
||||
pdd = qpd_to_pdd(node->qpd);
|
||||
pdd->bound = false;
|
||||
}
|
||||
kfd2kgd->free_mem(dqm->dev->kgd,
|
||||
(struct kgd_mem *) dqm->fence_mem);
|
||||
kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
|
||||
pm_uninit(&dqm->packets);
|
||||
|
||||
return 0;
|
||||
@@ -828,6 +823,14 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
|
||||
mutex_unlock(&dqm->lock);
|
||||
}
|
||||
|
||||
static void select_sdma_engine_id(struct queue *q)
|
||||
{
|
||||
static int sdma_id;
|
||||
|
||||
q->sdma_id = sdma_id;
|
||||
sdma_id = (sdma_id + 1) % 2;
|
||||
}
|
||||
|
||||
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd, int *allocate_vmid)
|
||||
{
|
||||
@@ -850,7 +853,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
goto out;
|
||||
}
|
||||
|
||||
mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
select_sdma_engine_id(q);
|
||||
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
|
||||
if (mqd == NULL) {
|
||||
mutex_unlock(&dqm->lock);
|
||||
return -ENOMEM;
|
||||
@@ -867,6 +875,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
retval = execute_queues_cpsch(dqm, false);
|
||||
}
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
dqm->sdma_queue_count++;
|
||||
/*
|
||||
* Unconditionally increment this counter, regardless of the queue's
|
||||
* type or whether the queue is active.
|
||||
@@ -893,12 +903,20 @@ static int fence_wait_timeout(unsigned int *fence_addr,
|
||||
pr_err("kfd: qcm fence wait loop timeout expired\n");
|
||||
return -ETIME;
|
||||
}
|
||||
cpu_relax();
|
||||
schedule();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int destroy_sdma_queues(struct device_queue_manager *dqm,
|
||||
unsigned int sdma_engine)
|
||||
{
|
||||
return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
|
||||
KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
|
||||
sdma_engine);
|
||||
}
|
||||
|
||||
static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
|
||||
{
|
||||
int retval;
|
||||
@@ -911,6 +929,15 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
|
||||
mutex_lock(&dqm->lock);
|
||||
if (dqm->active_runlist == false)
|
||||
goto out;
|
||||
|
||||
pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n",
|
||||
dqm->sdma_queue_count);
|
||||
|
||||
if (dqm->sdma_queue_count > 0) {
|
||||
destroy_sdma_queues(dqm, 0);
|
||||
destroy_sdma_queues(dqm, 1);
|
||||
}
|
||||
|
||||
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
|
||||
KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
|
||||
if (retval != 0)
|
||||
@@ -982,15 +1009,19 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||
|
||||
/* remove queue from list to prevent rescheduling after preemption */
|
||||
mutex_lock(&dqm->lock);
|
||||
|
||||
mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd) {
|
||||
retval = -ENOMEM;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
dqm->sdma_queue_count--;
|
||||
|
||||
list_del(&q->list);
|
||||
dqm->queue_count--;
|
||||
if (q->properties.is_active)
|
||||
dqm->queue_count--;
|
||||
|
||||
execute_queues_cpsch(dqm, false);
|
||||
|
||||
@@ -1028,8 +1059,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size)
|
||||
{
|
||||
uint32_t default_mtype;
|
||||
uint32_t ape1_mtype;
|
||||
bool retval;
|
||||
|
||||
pr_debug("kfd: In func %s\n", __func__);
|
||||
|
||||
@@ -1066,18 +1096,13 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
|
||||
qpd->sh_mem_ape1_limit = limit >> 16;
|
||||
}
|
||||
|
||||
default_mtype = (default_policy == cache_policy_coherent) ?
|
||||
MTYPE_NONCACHED :
|
||||
MTYPE_CACHED;
|
||||
|
||||
ape1_mtype = (alternate_policy == cache_policy_coherent) ?
|
||||
MTYPE_NONCACHED :
|
||||
MTYPE_CACHED;
|
||||
|
||||
qpd->sh_mem_config = (qpd->sh_mem_config & PTR32)
|
||||
| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
|
||||
| DEFAULT_MTYPE(default_mtype)
|
||||
| APE1_MTYPE(ape1_mtype);
|
||||
retval = dqm->ops_asic_specific.set_cache_memory_policy(
|
||||
dqm,
|
||||
qpd,
|
||||
default_policy,
|
||||
alternate_policy,
|
||||
alternate_aperture_base,
|
||||
alternate_aperture_size);
|
||||
|
||||
if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
|
||||
program_sh_mem_settings(dqm, qpd);
|
||||
@@ -1087,7 +1112,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
|
||||
qpd->sh_mem_ape1_limit);
|
||||
|
||||
mutex_unlock(&dqm->lock);
|
||||
return true;
|
||||
return retval;
|
||||
|
||||
out:
|
||||
mutex_unlock(&dqm->lock);
|
||||
@@ -1100,6 +1125,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
||||
|
||||
BUG_ON(!dev);
|
||||
|
||||
pr_debug("kfd: loading device queue manager\n");
|
||||
|
||||
dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
|
||||
if (!dqm)
|
||||
return NULL;
|
||||
@@ -1109,40 +1136,50 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
||||
case KFD_SCHED_POLICY_HWS:
|
||||
case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
|
||||
/* initialize dqm for cp scheduling */
|
||||
dqm->create_queue = create_queue_cpsch;
|
||||
dqm->initialize = initialize_cpsch;
|
||||
dqm->start = start_cpsch;
|
||||
dqm->stop = stop_cpsch;
|
||||
dqm->destroy_queue = destroy_queue_cpsch;
|
||||
dqm->update_queue = update_queue;
|
||||
dqm->get_mqd_manager = get_mqd_manager_nocpsch;
|
||||
dqm->register_process = register_process_nocpsch;
|
||||
dqm->unregister_process = unregister_process_nocpsch;
|
||||
dqm->uninitialize = uninitialize_nocpsch;
|
||||
dqm->create_kernel_queue = create_kernel_queue_cpsch;
|
||||
dqm->destroy_kernel_queue = destroy_kernel_queue_cpsch;
|
||||
dqm->set_cache_memory_policy = set_cache_memory_policy;
|
||||
dqm->ops.create_queue = create_queue_cpsch;
|
||||
dqm->ops.initialize = initialize_cpsch;
|
||||
dqm->ops.start = start_cpsch;
|
||||
dqm->ops.stop = stop_cpsch;
|
||||
dqm->ops.destroy_queue = destroy_queue_cpsch;
|
||||
dqm->ops.update_queue = update_queue;
|
||||
dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
|
||||
dqm->ops.register_process = register_process_nocpsch;
|
||||
dqm->ops.unregister_process = unregister_process_nocpsch;
|
||||
dqm->ops.uninitialize = uninitialize_nocpsch;
|
||||
dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
|
||||
dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
|
||||
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
|
||||
break;
|
||||
case KFD_SCHED_POLICY_NO_HWS:
|
||||
/* initialize dqm for no cp scheduling */
|
||||
dqm->start = start_nocpsch;
|
||||
dqm->stop = stop_nocpsch;
|
||||
dqm->create_queue = create_queue_nocpsch;
|
||||
dqm->destroy_queue = destroy_queue_nocpsch;
|
||||
dqm->update_queue = update_queue;
|
||||
dqm->get_mqd_manager = get_mqd_manager_nocpsch;
|
||||
dqm->register_process = register_process_nocpsch;
|
||||
dqm->unregister_process = unregister_process_nocpsch;
|
||||
dqm->initialize = initialize_nocpsch;
|
||||
dqm->uninitialize = uninitialize_nocpsch;
|
||||
dqm->set_cache_memory_policy = set_cache_memory_policy;
|
||||
dqm->ops.start = start_nocpsch;
|
||||
dqm->ops.stop = stop_nocpsch;
|
||||
dqm->ops.create_queue = create_queue_nocpsch;
|
||||
dqm->ops.destroy_queue = destroy_queue_nocpsch;
|
||||
dqm->ops.update_queue = update_queue;
|
||||
dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
|
||||
dqm->ops.register_process = register_process_nocpsch;
|
||||
dqm->ops.unregister_process = unregister_process_nocpsch;
|
||||
dqm->ops.initialize = initialize_nocpsch;
|
||||
dqm->ops.uninitialize = uninitialize_nocpsch;
|
||||
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
|
||||
if (dqm->initialize(dqm) != 0) {
|
||||
switch (dev->device_info->asic_family) {
|
||||
case CHIP_CARRIZO:
|
||||
device_queue_manager_init_vi(&dqm->ops_asic_specific);
|
||||
break;
|
||||
|
||||
case CHIP_KAVERI:
|
||||
device_queue_manager_init_cik(&dqm->ops_asic_specific);
|
||||
break;
|
||||
}
|
||||
|
||||
if (dqm->ops.initialize(dqm) != 0) {
|
||||
kfree(dqm);
|
||||
return NULL;
|
||||
}
|
||||
@@ -1154,7 +1191,6 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
|
||||
{
|
||||
BUG_ON(!dqm);
|
||||
|
||||
dqm->uninitialize(dqm);
|
||||
dqm->ops.uninitialize(dqm);
|
||||
kfree(dqm);
|
||||
}
|
||||
|
||||
|
@@ -36,6 +36,9 @@
|
||||
#define KFD_VMID_START_OFFSET (8)
|
||||
#define VMID_PER_DEVICE CIK_VMID_NUM
|
||||
#define KFD_DQM_FIRST_PIPE (0)
|
||||
#define CIK_SDMA_QUEUES (4)
|
||||
#define CIK_SDMA_QUEUES_PER_ENGINE (2)
|
||||
#define CIK_SDMA_ENGINE_NUM (2)
|
||||
|
||||
struct device_process_node {
|
||||
struct qcm_process_device *qpd;
|
||||
@@ -43,7 +46,7 @@ struct device_process_node {
|
||||
};
|
||||
|
||||
/**
|
||||
* struct device_queue_manager
|
||||
* struct device_queue_manager_ops
|
||||
*
|
||||
* @create_queue: Queue creation routine.
|
||||
*
|
||||
@@ -78,15 +81,9 @@ struct device_process_node {
|
||||
* @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the
|
||||
* memory apertures.
|
||||
*
|
||||
* This struct is a base class for the kfd queues scheduler in the
|
||||
* device level. The device base class should expose the basic operations
|
||||
* for queue creation and queue destruction. This base class hides the
|
||||
* scheduling mode of the driver and the specific implementation of the
|
||||
* concrete device. This class is the only class in the queues scheduler
|
||||
* that configures the H/W.
|
||||
*/
|
||||
|
||||
struct device_queue_manager {
|
||||
struct device_queue_manager_ops {
|
||||
int (*create_queue)(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd,
|
||||
@@ -121,7 +118,23 @@ struct device_queue_manager {
|
||||
enum cache_policy alternate_policy,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct device_queue_manager
|
||||
*
|
||||
* This struct is a base class for the kfd queues scheduler in the
|
||||
* device level. The device base class should expose the basic operations
|
||||
* for queue creation and queue destruction. This base class hides the
|
||||
* scheduling mode of the driver and the specific implementation of the
|
||||
* concrete device. This class is the only class in the queues scheduler
|
||||
* that configures the H/W.
|
||||
*
|
||||
*/
|
||||
|
||||
struct device_queue_manager {
|
||||
struct device_queue_manager_ops ops;
|
||||
struct device_queue_manager_ops ops_asic_specific;
|
||||
|
||||
struct mqd_manager *mqds[KFD_MQD_TYPE_MAX];
|
||||
struct packet_manager packets;
|
||||
@@ -130,9 +143,11 @@ struct device_queue_manager {
|
||||
struct list_head queues;
|
||||
unsigned int processes_count;
|
||||
unsigned int queue_count;
|
||||
unsigned int sdma_queue_count;
|
||||
unsigned int total_queue_count;
|
||||
unsigned int next_pipe_to_allocate;
|
||||
unsigned int *allocated_queues;
|
||||
unsigned int sdma_bitmap;
|
||||
unsigned int vmid_bitmap;
|
||||
uint64_t pipelines_addr;
|
||||
struct kfd_mem_obj *pipeline_mem;
|
||||
@@ -142,6 +157,28 @@ struct device_queue_manager {
|
||||
bool active_runlist;
|
||||
};
|
||||
|
||||
void device_queue_manager_init_cik(struct device_queue_manager_ops *ops);
|
||||
void device_queue_manager_init_vi(struct device_queue_manager_ops *ops);
|
||||
void program_sh_mem_settings(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
int init_pipelines(struct device_queue_manager *dqm,
|
||||
unsigned int pipes_num, unsigned int first_pipe);
|
||||
|
||||
extern inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
|
||||
{
|
||||
return (pdd->lds_base >> 16) & 0xFF;
|
||||
}
|
||||
|
||||
extern inline unsigned int
|
||||
get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd)
|
||||
{
|
||||
return (pdd->lds_base >> 60) & 0x0E;
|
||||
}
|
||||
|
||||
extern inline unsigned int get_pipes_num(struct device_queue_manager *dqm)
|
||||
{
|
||||
BUG_ON(!dqm || !dqm->dev);
|
||||
return dqm->dev->shared_resources.compute_pipe_count;
|
||||
}
|
||||
|
||||
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
|
||||
|
135
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
Обычный файл
135
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
Обычный файл
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "cik_regs.h"
|
||||
|
||||
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd,
|
||||
enum cache_policy default_policy,
|
||||
enum cache_policy alternate_policy,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size);
|
||||
static int register_process_cik(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
static int initialize_cpsch_cik(struct device_queue_manager *dqm);
|
||||
|
||||
void device_queue_manager_init_cik(struct device_queue_manager_ops *ops)
|
||||
{
|
||||
ops->set_cache_memory_policy = set_cache_memory_policy_cik;
|
||||
ops->register_process = register_process_cik;
|
||||
ops->initialize = initialize_cpsch_cik;
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
||||
{
|
||||
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
|
||||
* scratch and GPUVM apertures.
|
||||
* The hardware fills in the remaining 59 bits according to the
|
||||
* following pattern:
|
||||
* LDS: X0000000'00000000 - X0000001'00000000 (4GB)
|
||||
* Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
|
||||
* GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
|
||||
*
|
||||
* (where X/Y is the configurable nybble with the low-bit 0)
|
||||
*
|
||||
* LDS and scratch will have the same top nybble programmed in the
|
||||
* top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
|
||||
* GPUVM can have a different top nybble programmed in the
|
||||
* top 3 bits of SH_MEM_BASES.SHARED_BASE.
|
||||
* We don't bother to support different top nybbles
|
||||
* for LDS/Scratch and GPUVM.
|
||||
*/
|
||||
|
||||
BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
|
||||
top_address_nybble == 0);
|
||||
|
||||
return PRIVATE_BASE(top_address_nybble << 12) |
|
||||
SHARED_BASE(top_address_nybble << 12);
|
||||
}
|
||||
|
||||
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd,
|
||||
enum cache_policy default_policy,
|
||||
enum cache_policy alternate_policy,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size)
|
||||
{
|
||||
uint32_t default_mtype;
|
||||
uint32_t ape1_mtype;
|
||||
|
||||
default_mtype = (default_policy == cache_policy_coherent) ?
|
||||
MTYPE_NONCACHED :
|
||||
MTYPE_CACHED;
|
||||
|
||||
ape1_mtype = (alternate_policy == cache_policy_coherent) ?
|
||||
MTYPE_NONCACHED :
|
||||
MTYPE_CACHED;
|
||||
|
||||
qpd->sh_mem_config = (qpd->sh_mem_config & PTR32)
|
||||
| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
|
||||
| DEFAULT_MTYPE(default_mtype)
|
||||
| APE1_MTYPE(ape1_mtype);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int register_process_cik(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
unsigned int temp;
|
||||
|
||||
BUG_ON(!dqm || !qpd);
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
|
||||
/* check if sh_mem_config register already configured */
|
||||
if (qpd->sh_mem_config == 0) {
|
||||
qpd->sh_mem_config =
|
||||
ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
|
||||
DEFAULT_MTYPE(MTYPE_NONCACHED) |
|
||||
APE1_MTYPE(MTYPE_NONCACHED);
|
||||
qpd->sh_mem_ape1_limit = 0;
|
||||
qpd->sh_mem_ape1_base = 0;
|
||||
}
|
||||
|
||||
if (qpd->pqm->process->is_32bit_user_mode) {
|
||||
temp = get_sh_mem_bases_32(pdd);
|
||||
qpd->sh_mem_bases = SHARED_BASE(temp);
|
||||
qpd->sh_mem_config |= PTR32;
|
||||
} else {
|
||||
temp = get_sh_mem_bases_nybble_64(pdd);
|
||||
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
|
||||
}
|
||||
|
||||
pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
|
||||
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int initialize_cpsch_cik(struct device_queue_manager *dqm)
|
||||
{
|
||||
return init_pipelines(dqm, get_pipes_num(dqm), 0);
|
||||
}
|
64
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
Обычный файл
64
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
Обычный файл
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kfd_device_queue_manager.h"
|
||||
|
||||
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd,
|
||||
enum cache_policy default_policy,
|
||||
enum cache_policy alternate_policy,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size);
|
||||
static int register_process_vi(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
static int initialize_cpsch_vi(struct device_queue_manager *dqm);
|
||||
|
||||
void device_queue_manager_init_vi(struct device_queue_manager_ops *ops)
|
||||
{
|
||||
pr_warn("amdkfd: VI DQM is not currently supported\n");
|
||||
|
||||
ops->set_cache_memory_policy = set_cache_memory_policy_vi;
|
||||
ops->register_process = register_process_vi;
|
||||
ops->initialize = initialize_cpsch_vi;
|
||||
}
|
||||
|
||||
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd,
|
||||
enum cache_policy default_policy,
|
||||
enum cache_policy alternate_policy,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static int register_process_vi(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int initialize_cpsch_vi(struct device_queue_manager *dqm)
|
||||
{
|
||||
return 0;
|
||||
}
|
@@ -137,10 +137,6 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
|
||||
if (dev == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
/* Find if pdd exists for combination of process and gpu id */
|
||||
if (!kfd_get_process_device_data(dev, process, 0))
|
||||
return -EINVAL;
|
||||
|
||||
/* Calculate physical address of doorbell */
|
||||
address = kfd_get_process_doorbells(dev, process);
|
||||
|
||||
|
@@ -303,10 +303,11 @@ int kfd_init_apertures(struct kfd_process *process)
|
||||
while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL &&
|
||||
id < NUM_OF_SUPPORTED_GPUS) {
|
||||
|
||||
pdd = kfd_get_process_device_data(dev, process, 1);
|
||||
if (!pdd)
|
||||
pdd = kfd_create_process_device_data(dev, process);
|
||||
if (pdd == NULL) {
|
||||
pr_err("Failed to create process device data\n");
|
||||
return -1;
|
||||
|
||||
}
|
||||
/*
|
||||
* For 64 bit process aperture will be statically reserved in
|
||||
* the x86_64 non canonical process address space
|
||||
|
@@ -56,8 +56,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
switch (type) {
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
case KFD_QUEUE_TYPE_HIQ:
|
||||
kq->mqd = dev->dqm->get_mqd_manager(dev->dqm,
|
||||
KFD_MQD_TYPE_CIK_HIQ);
|
||||
kq->mqd = dev->dqm->ops.get_mqd_manager(dev->dqm,
|
||||
KFD_MQD_TYPE_HIQ);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
@@ -72,23 +72,19 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
if (prop.doorbell_ptr == NULL)
|
||||
goto err_get_kernel_doorbell;
|
||||
|
||||
retval = kfd2kgd->allocate_mem(dev->kgd,
|
||||
queue_size,
|
||||
PAGE_SIZE,
|
||||
KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
|
||||
(struct kgd_mem **) &kq->pq);
|
||||
|
||||
retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
|
||||
if (retval != 0)
|
||||
goto err_pq_allocate_vidmem;
|
||||
|
||||
kq->pq_kernel_addr = kq->pq->cpu_ptr;
|
||||
kq->pq_gpu_addr = kq->pq->gpu_addr;
|
||||
|
||||
retval = kfd2kgd->allocate_mem(dev->kgd,
|
||||
sizeof(*kq->rptr_kernel),
|
||||
32,
|
||||
KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
|
||||
(struct kgd_mem **) &kq->rptr_mem);
|
||||
retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size);
|
||||
if (retval == false)
|
||||
goto err_eop_allocate_vidmem;
|
||||
|
||||
retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
|
||||
&kq->rptr_mem);
|
||||
|
||||
if (retval != 0)
|
||||
goto err_rptr_allocate_vidmem;
|
||||
@@ -96,11 +92,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
|
||||
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
|
||||
|
||||
retval = kfd2kgd->allocate_mem(dev->kgd,
|
||||
sizeof(*kq->wptr_kernel),
|
||||
32,
|
||||
KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
|
||||
(struct kgd_mem **) &kq->wptr_mem);
|
||||
retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
|
||||
&kq->wptr_mem);
|
||||
|
||||
if (retval != 0)
|
||||
goto err_wptr_allocate_vidmem;
|
||||
@@ -121,6 +114,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
prop.queue_address = kq->pq_gpu_addr;
|
||||
prop.read_ptr = (uint32_t *) kq->rptr_gpu_addr;
|
||||
prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
|
||||
prop.eop_ring_buffer_address = kq->eop_gpu_addr;
|
||||
prop.eop_ring_buffer_size = PAGE_SIZE;
|
||||
|
||||
if (init_queue(&kq->queue, prop) != 0)
|
||||
goto err_init_queue;
|
||||
@@ -145,11 +140,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
} else {
|
||||
/* allocate fence for DIQ */
|
||||
|
||||
retval = kfd2kgd->allocate_mem(dev->kgd,
|
||||
sizeof(uint32_t),
|
||||
32,
|
||||
KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
|
||||
(struct kgd_mem **) &kq->fence_mem_obj);
|
||||
retval = kfd_gtt_sa_allocate(dev, sizeof(uint32_t),
|
||||
&kq->fence_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
goto err_alloc_fence;
|
||||
@@ -165,11 +157,13 @@ err_alloc_fence:
|
||||
err_init_mqd:
|
||||
uninit_queue(kq->queue);
|
||||
err_init_queue:
|
||||
kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->wptr_mem);
|
||||
kfd_gtt_sa_free(dev, kq->wptr_mem);
|
||||
err_wptr_allocate_vidmem:
|
||||
kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->rptr_mem);
|
||||
kfd_gtt_sa_free(dev, kq->rptr_mem);
|
||||
err_rptr_allocate_vidmem:
|
||||
kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->pq);
|
||||
kfd_gtt_sa_free(dev, kq->eop_mem);
|
||||
err_eop_allocate_vidmem:
|
||||
kfd_gtt_sa_free(dev, kq->pq);
|
||||
err_pq_allocate_vidmem:
|
||||
pr_err("kfd: error init pq\n");
|
||||
kfd_release_kernel_doorbell(dev, prop.doorbell_ptr);
|
||||
@@ -190,10 +184,13 @@ static void uninitialize(struct kernel_queue *kq)
|
||||
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
|
||||
kq->queue->pipe,
|
||||
kq->queue->queue);
|
||||
else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
|
||||
kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
|
||||
|
||||
kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->rptr_mem);
|
||||
kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->wptr_mem);
|
||||
kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->pq);
|
||||
kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
|
||||
kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
|
||||
kq->ops_asic_specific.uninitialize(kq);
|
||||
kfd_gtt_sa_free(kq->dev, kq->pq);
|
||||
kfd_release_kernel_doorbell(kq->dev,
|
||||
kq->queue->properties.doorbell_ptr);
|
||||
uninit_queue(kq->queue);
|
||||
@@ -265,28 +262,6 @@ static void submit_packet(struct kernel_queue *kq)
|
||||
kq->pending_wptr);
|
||||
}
|
||||
|
||||
static int sync_with_hw(struct kernel_queue *kq, unsigned long timeout_ms)
|
||||
{
|
||||
unsigned long org_timeout_ms;
|
||||
|
||||
BUG_ON(!kq);
|
||||
|
||||
org_timeout_ms = timeout_ms;
|
||||
timeout_ms += jiffies * 1000 / HZ;
|
||||
while (*kq->wptr_kernel != *kq->rptr_kernel) {
|
||||
if (time_after(jiffies * 1000 / HZ, timeout_ms)) {
|
||||
pr_err("kfd: kernel_queue %s timeout expired %lu\n",
|
||||
__func__, org_timeout_ms);
|
||||
pr_err("kfd: wptr: %d rptr: %d\n",
|
||||
*kq->wptr_kernel, *kq->rptr_kernel);
|
||||
return -ETIME;
|
||||
}
|
||||
schedule();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rollback_packet(struct kernel_queue *kq)
|
||||
{
|
||||
BUG_ON(!kq);
|
||||
@@ -304,14 +279,23 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
|
||||
if (!kq)
|
||||
return NULL;
|
||||
|
||||
kq->initialize = initialize;
|
||||
kq->uninitialize = uninitialize;
|
||||
kq->acquire_packet_buffer = acquire_packet_buffer;
|
||||
kq->submit_packet = submit_packet;
|
||||
kq->sync_with_hw = sync_with_hw;
|
||||
kq->rollback_packet = rollback_packet;
|
||||
kq->ops.initialize = initialize;
|
||||
kq->ops.uninitialize = uninitialize;
|
||||
kq->ops.acquire_packet_buffer = acquire_packet_buffer;
|
||||
kq->ops.submit_packet = submit_packet;
|
||||
kq->ops.rollback_packet = rollback_packet;
|
||||
|
||||
if (kq->initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) {
|
||||
switch (dev->device_info->asic_family) {
|
||||
case CHIP_CARRIZO:
|
||||
kernel_queue_init_vi(&kq->ops_asic_specific);
|
||||
break;
|
||||
|
||||
case CHIP_KAVERI:
|
||||
kernel_queue_init_cik(&kq->ops_asic_specific);
|
||||
break;
|
||||
}
|
||||
|
||||
if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) {
|
||||
pr_err("kfd: failed to init kernel queue\n");
|
||||
kfree(kq);
|
||||
return NULL;
|
||||
@@ -323,7 +307,7 @@ void kernel_queue_uninit(struct kernel_queue *kq)
|
||||
{
|
||||
BUG_ON(!kq);
|
||||
|
||||
kq->uninitialize(kq);
|
||||
kq->ops.uninitialize(kq);
|
||||
kfree(kq);
|
||||
}
|
||||
|
||||
@@ -335,19 +319,18 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
|
||||
|
||||
BUG_ON(!dev);
|
||||
|
||||
pr_debug("kfd: starting kernel queue test\n");
|
||||
pr_err("kfd: starting kernel queue test\n");
|
||||
|
||||
kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
|
||||
BUG_ON(!kq);
|
||||
|
||||
retval = kq->acquire_packet_buffer(kq, 5, &buffer);
|
||||
retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer);
|
||||
BUG_ON(retval != 0);
|
||||
for (i = 0; i < 5; i++)
|
||||
buffer[i] = kq->nop_packet;
|
||||
kq->submit_packet(kq);
|
||||
kq->sync_with_hw(kq, 1000);
|
||||
kq->ops.submit_packet(kq);
|
||||
|
||||
pr_debug("kfd: ending kernel queue test\n");
|
||||
pr_err("kfd: ending kernel queue test\n");
|
||||
}
|
||||
|
||||
|
||||
|
@@ -28,8 +28,31 @@
|
||||
#include <linux/types.h>
|
||||
#include "kfd_priv.h"
|
||||
|
||||
struct kernel_queue {
|
||||
/* interface */
|
||||
/**
|
||||
* struct kernel_queue_ops
|
||||
*
|
||||
* @initialize: Initialize a kernel queue, including allocations of GART memory
|
||||
* needed for the queue.
|
||||
*
|
||||
* @uninitialize: Uninitialize a kernel queue and free all its memory usages.
|
||||
*
|
||||
* @acquire_packet_buffer: Returns a pointer to the location in the kernel
|
||||
* queue ring buffer where the calling function can write its packet. It is
|
||||
* Guaranteed that there is enough space for that packet. It also updates the
|
||||
* pending write pointer to that location so subsequent calls to
|
||||
* acquire_packet_buffer will get a correct write pointer
|
||||
*
|
||||
* @submit_packet: Update the write pointer and doorbell of a kernel queue.
|
||||
*
|
||||
* @sync_with_hw: Wait until the write pointer and the read pointer of a kernel
|
||||
* queue are equal, which means the CP has read all the submitted packets.
|
||||
*
|
||||
* @rollback_packet: This routine is called if we failed to build an acquired
|
||||
* packet for some reason. It just overwrites the pending wptr with the current
|
||||
* one
|
||||
*
|
||||
*/
|
||||
struct kernel_queue_ops {
|
||||
bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size);
|
||||
void (*uninitialize)(struct kernel_queue *kq);
|
||||
@@ -38,9 +61,12 @@ struct kernel_queue {
|
||||
unsigned int **buffer_ptr);
|
||||
|
||||
void (*submit_packet)(struct kernel_queue *kq);
|
||||
int (*sync_with_hw)(struct kernel_queue *kq,
|
||||
unsigned long timeout_ms);
|
||||
void (*rollback_packet)(struct kernel_queue *kq);
|
||||
};
|
||||
|
||||
struct kernel_queue {
|
||||
struct kernel_queue_ops ops;
|
||||
struct kernel_queue_ops ops_asic_specific;
|
||||
|
||||
/* data */
|
||||
struct kfd_dev *dev;
|
||||
@@ -58,6 +84,9 @@ struct kernel_queue {
|
||||
struct kfd_mem_obj *pq;
|
||||
uint64_t pq_gpu_addr;
|
||||
uint32_t *pq_kernel_addr;
|
||||
struct kfd_mem_obj *eop_mem;
|
||||
uint64_t eop_gpu_addr;
|
||||
uint32_t *eop_kernel_addr;
|
||||
|
||||
struct kfd_mem_obj *fence_mem_obj;
|
||||
uint64_t fence_gpu_addr;
|
||||
@@ -66,4 +95,7 @@ struct kernel_queue {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
void kernel_queue_init_cik(struct kernel_queue_ops *ops);
|
||||
void kernel_queue_init_vi(struct kernel_queue_ops *ops);
|
||||
|
||||
#endif /* KFD_KERNEL_QUEUE_H_ */
|
||||
|
44
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
Обычный файл
44
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
Обычный файл
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kfd_kernel_queue.h"
|
||||
|
||||
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size);
|
||||
static void uninitialize_cik(struct kernel_queue *kq);
|
||||
|
||||
void kernel_queue_init_cik(struct kernel_queue_ops *ops)
|
||||
{
|
||||
ops->initialize = initialize_cik;
|
||||
ops->uninitialize = uninitialize_cik;
|
||||
}
|
||||
|
||||
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void uninitialize_cik(struct kernel_queue *kq)
|
||||
{
|
||||
}
|
56
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
Обычный файл
56
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
Обычный файл
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kfd_kernel_queue.h"
|
||||
|
||||
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size);
|
||||
static void uninitialize_vi(struct kernel_queue *kq);
|
||||
|
||||
void kernel_queue_init_vi(struct kernel_queue_ops *ops)
|
||||
{
|
||||
ops->initialize = initialize_vi;
|
||||
ops->uninitialize = uninitialize_vi;
|
||||
}
|
||||
|
||||
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
enum kfd_queue_type type, unsigned int queue_size)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
|
||||
if (retval != 0)
|
||||
return false;
|
||||
|
||||
kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
|
||||
kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
|
||||
|
||||
memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void uninitialize_vi(struct kernel_queue *kq)
|
||||
{
|
||||
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
|
||||
}
|
@@ -29,10 +29,10 @@
|
||||
#define KFD_DRIVER_AUTHOR "AMD Inc. and others"
|
||||
|
||||
#define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs"
|
||||
#define KFD_DRIVER_DATE "20141113"
|
||||
#define KFD_DRIVER_DATE "20150122"
|
||||
#define KFD_DRIVER_MAJOR 0
|
||||
#define KFD_DRIVER_MINOR 7
|
||||
#define KFD_DRIVER_PATCHLEVEL 0
|
||||
#define KFD_DRIVER_PATCHLEVEL 1
|
||||
|
||||
const struct kfd2kgd_calls *kfd2kgd;
|
||||
static const struct kgd2kfd_calls kgd2kfd = {
|
||||
@@ -48,7 +48,7 @@ static const struct kgd2kfd_calls kgd2kfd = {
|
||||
int sched_policy = KFD_SCHED_POLICY_HWS;
|
||||
module_param(sched_policy, int, 0444);
|
||||
MODULE_PARM_DESC(sched_policy,
|
||||
"Kernel cmdline parameter that defines the amdkfd scheduling policy");
|
||||
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
|
||||
|
||||
int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
|
||||
module_param(max_num_of_queues_per_device, int, 0444);
|
||||
|
@@ -21,326 +21,17 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/printk.h>
|
||||
#include <linux/slab.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_mqd_manager.h"
|
||||
#include "cik_regs.h"
|
||||
#include "../../radeon/cik_reg.h"
|
||||
|
||||
inline void busy_wait(unsigned long ms)
|
||||
{
|
||||
while (time_before(jiffies, ms))
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
static inline struct cik_mqd *get_mqd(void *mqd)
|
||||
{
|
||||
return (struct cik_mqd *)mqd;
|
||||
}
|
||||
|
||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
uint64_t addr;
|
||||
struct cik_mqd *m;
|
||||
int retval;
|
||||
|
||||
BUG_ON(!mm || !q || !mqd);
|
||||
|
||||
pr_debug("kfd: In func %s\n", __func__);
|
||||
|
||||
retval = kfd2kgd->allocate_mem(mm->dev->kgd,
|
||||
sizeof(struct cik_mqd),
|
||||
256,
|
||||
KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
|
||||
(struct kgd_mem **) mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
addr = (*mqd_mem_obj)->gpu_addr;
|
||||
|
||||
memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256));
|
||||
|
||||
m->header = 0xC0310800;
|
||||
m->compute_pipelinestat_enable = 1;
|
||||
m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
|
||||
|
||||
/*
|
||||
* Make sure to use the last queue state saved on mqd when the cp
|
||||
* reassigns the queue, so when queue is switched on/off (e.g over
|
||||
* subscription or quantum timeout) the context will be consistent
|
||||
*/
|
||||
m->cp_hqd_persistent_state =
|
||||
DEFAULT_CP_HQD_PERSISTENT_STATE | PRELOAD_REQ;
|
||||
|
||||
m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN;
|
||||
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
|
||||
m->cp_mqd_base_addr_hi = upper_32_bits(addr);
|
||||
|
||||
m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN;
|
||||
/* Although WinKFD writes this, I suspect it should not be necessary */
|
||||
m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE;
|
||||
|
||||
m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
|
||||
QUANTUM_DURATION(10);
|
||||
|
||||
/*
|
||||
* Pipe Priority
|
||||
* Identifies the pipe relative priority when this queue is connected
|
||||
* to the pipeline. The pipe priority is against the GFX pipe and HP3D.
|
||||
* In KFD we are using a fixed pipe priority set to CS_MEDIUM.
|
||||
* 0 = CS_LOW (typically below GFX)
|
||||
* 1 = CS_MEDIUM (typically between HP3D and GFX
|
||||
* 2 = CS_HIGH (typically above HP3D)
|
||||
*/
|
||||
m->cp_hqd_pipe_priority = 1;
|
||||
m->cp_hqd_queue_priority = 15;
|
||||
|
||||
*mqd = m;
|
||||
if (gart_addr != NULL)
|
||||
*gart_addr = addr;
|
||||
retval = mm->update_mqd(mm, m, q);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static void uninit_mqd(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj)
|
||||
{
|
||||
BUG_ON(!mm || !mqd);
|
||||
kfd2kgd->free_mem(mm->dev->kgd, (struct kgd_mem *) mqd_mem_obj);
|
||||
}
|
||||
|
||||
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
|
||||
uint32_t queue_id, uint32_t __user *wptr)
|
||||
{
|
||||
return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
|
||||
|
||||
}
|
||||
|
||||
static int update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct cik_mqd *m;
|
||||
|
||||
BUG_ON(!mm || !q || !mqd);
|
||||
|
||||
pr_debug("kfd: In func %s\n", __func__);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
|
||||
DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
|
||||
|
||||
/*
|
||||
* Calculating queue size which is log base 2 of actual queue size -1
|
||||
* dwords and another -1 for ffs
|
||||
*/
|
||||
m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
|
||||
- 1 - 1;
|
||||
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
|
||||
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
|
||||
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
||||
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
|
||||
m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
|
||||
DOORBELL_OFFSET(q->doorbell_off);
|
||||
|
||||
m->cp_hqd_vmid = q->vmid;
|
||||
|
||||
if (q->format == KFD_QUEUE_FORMAT_AQL) {
|
||||
m->cp_hqd_iq_rptr = AQL_ENABLE;
|
||||
m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
|
||||
}
|
||||
|
||||
m->cp_hqd_active = 0;
|
||||
q->is_active = false;
|
||||
if (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0) {
|
||||
m->cp_hqd_active = 1;
|
||||
q->is_active = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int destroy_mqd(struct mqd_manager *mm, void *mqd,
|
||||
enum kfd_preempt_type type,
|
||||
unsigned int timeout, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
{
|
||||
return kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout,
|
||||
pipe_id, queue_id);
|
||||
}
|
||||
|
||||
static bool is_occupied(struct mqd_manager *mm, void *mqd,
|
||||
uint64_t queue_address, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
{
|
||||
|
||||
return kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address,
|
||||
pipe_id, queue_id);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* HIQ MQD Implementation, concrete implementation for HIQ MQD implementation.
|
||||
* The HIQ queue in Kaveri is using the same MQD structure as all the user mode
|
||||
* queues but with different initial values.
|
||||
*/
|
||||
|
||||
static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
uint64_t addr;
|
||||
struct cik_mqd *m;
|
||||
int retval;
|
||||
|
||||
BUG_ON(!mm || !q || !mqd || !mqd_mem_obj);
|
||||
|
||||
pr_debug("kfd: In func %s\n", __func__);
|
||||
|
||||
retval = kfd2kgd->allocate_mem(mm->dev->kgd,
|
||||
sizeof(struct cik_mqd),
|
||||
256,
|
||||
KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
|
||||
(struct kgd_mem **) mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
addr = (*mqd_mem_obj)->gpu_addr;
|
||||
|
||||
memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256));
|
||||
|
||||
m->header = 0xC0310800;
|
||||
m->compute_pipelinestat_enable = 1;
|
||||
m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
|
||||
|
||||
m->cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE |
|
||||
PRELOAD_REQ;
|
||||
m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
|
||||
QUANTUM_DURATION(10);
|
||||
|
||||
m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN;
|
||||
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
|
||||
m->cp_mqd_base_addr_hi = upper_32_bits(addr);
|
||||
|
||||
m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
|
||||
|
||||
/*
|
||||
* Pipe Priority
|
||||
* Identifies the pipe relative priority when this queue is connected
|
||||
* to the pipeline. The pipe priority is against the GFX pipe and HP3D.
|
||||
* In KFD we are using a fixed pipe priority set to CS_MEDIUM.
|
||||
* 0 = CS_LOW (typically below GFX)
|
||||
* 1 = CS_MEDIUM (typically between HP3D and GFX
|
||||
* 2 = CS_HIGH (typically above HP3D)
|
||||
*/
|
||||
m->cp_hqd_pipe_priority = 1;
|
||||
m->cp_hqd_queue_priority = 15;
|
||||
|
||||
*mqd = m;
|
||||
if (gart_addr)
|
||||
*gart_addr = addr;
|
||||
retval = mm->update_mqd(mm, m, q);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct cik_mqd *m;
|
||||
|
||||
BUG_ON(!mm || !q || !mqd);
|
||||
|
||||
pr_debug("kfd: In func %s\n", __func__);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
|
||||
DEFAULT_MIN_AVAIL_SIZE |
|
||||
PRIV_STATE |
|
||||
KMD_QUEUE;
|
||||
|
||||
/*
|
||||
* Calculating queue size which is log base 2 of actual queue
|
||||
* size -1 dwords
|
||||
*/
|
||||
m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
|
||||
- 1 - 1;
|
||||
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
|
||||
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
|
||||
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
||||
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
|
||||
m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
|
||||
DOORBELL_OFFSET(q->doorbell_off);
|
||||
|
||||
m->cp_hqd_vmid = q->vmid;
|
||||
|
||||
m->cp_hqd_active = 0;
|
||||
q->is_active = false;
|
||||
if (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0) {
|
||||
m->cp_hqd_active = 1;
|
||||
q->is_active = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev)
|
||||
{
|
||||
struct mqd_manager *mqd;
|
||||
|
||||
BUG_ON(!dev);
|
||||
BUG_ON(type >= KFD_MQD_TYPE_MAX);
|
||||
|
||||
pr_debug("kfd: In func %s\n", __func__);
|
||||
|
||||
mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
|
||||
mqd->dev = dev;
|
||||
|
||||
switch (type) {
|
||||
case KFD_MQD_TYPE_CIK_CP:
|
||||
case KFD_MQD_TYPE_CIK_COMPUTE:
|
||||
mqd->init_mqd = init_mqd;
|
||||
mqd->uninit_mqd = uninit_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
break;
|
||||
case KFD_MQD_TYPE_CIK_HIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
break;
|
||||
default:
|
||||
kfree(mqd);
|
||||
return NULL;
|
||||
switch (dev->device_info->asic_family) {
|
||||
case CHIP_KAVERI:
|
||||
return mqd_manager_init_cik(type, dev);
|
||||
case CHIP_CARRIZO:
|
||||
return mqd_manager_init_vi(type, dev);
|
||||
}
|
||||
|
||||
return mqd;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* SDMA queues should be implemented here when the cp will supports them */
|
||||
|
450
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
Обычный файл
450
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
Обычный файл
@@ -0,0 +1,450 @@
|
||||
/*
|
||||
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/printk.h>
|
||||
#include <linux/slab.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_mqd_manager.h"
|
||||
#include "cik_regs.h"
|
||||
#include "cik_structs.h"
|
||||
|
||||
static inline struct cik_mqd *get_mqd(void *mqd)
|
||||
{
|
||||
return (struct cik_mqd *)mqd;
|
||||
}
|
||||
|
||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
uint64_t addr;
|
||||
struct cik_mqd *m;
|
||||
int retval;
|
||||
|
||||
BUG_ON(!mm || !q || !mqd);
|
||||
|
||||
pr_debug("kfd: In func %s\n", __func__);
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
|
||||
mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
addr = (*mqd_mem_obj)->gpu_addr;
|
||||
|
||||
memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256));
|
||||
|
||||
m->header = 0xC0310800;
|
||||
m->compute_pipelinestat_enable = 1;
|
||||
m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
|
||||
|
||||
/*
|
||||
* Make sure to use the last queue state saved on mqd when the cp
|
||||
* reassigns the queue, so when queue is switched on/off (e.g over
|
||||
* subscription or quantum timeout) the context will be consistent
|
||||
*/
|
||||
m->cp_hqd_persistent_state =
|
||||
DEFAULT_CP_HQD_PERSISTENT_STATE | PRELOAD_REQ;
|
||||
|
||||
m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN;
|
||||
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
|
||||
m->cp_mqd_base_addr_hi = upper_32_bits(addr);
|
||||
|
||||
m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN;
|
||||
/* Although WinKFD writes this, I suspect it should not be necessary */
|
||||
m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE;
|
||||
|
||||
m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
|
||||
QUANTUM_DURATION(10);
|
||||
|
||||
/*
|
||||
* Pipe Priority
|
||||
* Identifies the pipe relative priority when this queue is connected
|
||||
* to the pipeline. The pipe priority is against the GFX pipe and HP3D.
|
||||
* In KFD we are using a fixed pipe priority set to CS_MEDIUM.
|
||||
* 0 = CS_LOW (typically below GFX)
|
||||
* 1 = CS_MEDIUM (typically between HP3D and GFX
|
||||
* 2 = CS_HIGH (typically above HP3D)
|
||||
*/
|
||||
m->cp_hqd_pipe_priority = 1;
|
||||
m->cp_hqd_queue_priority = 15;
|
||||
|
||||
if (q->format == KFD_QUEUE_FORMAT_AQL)
|
||||
m->cp_hqd_iq_rptr = AQL_ENABLE;
|
||||
|
||||
*mqd = m;
|
||||
if (gart_addr != NULL)
|
||||
*gart_addr = addr;
|
||||
retval = mm->update_mqd(mm, m, q);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
int retval;
|
||||
struct cik_sdma_rlc_registers *m;
|
||||
|
||||
BUG_ON(!mm || !mqd || !mqd_mem_obj);
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev,
|
||||
sizeof(struct cik_sdma_rlc_registers),
|
||||
mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr;
|
||||
|
||||
memset(m, 0, sizeof(struct cik_sdma_rlc_registers));
|
||||
|
||||
*mqd = m;
|
||||
if (gart_addr != NULL)
|
||||
*gart_addr = (*mqd_mem_obj)->gpu_addr;
|
||||
|
||||
retval = mm->update_mqd(mm, m, q);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static void uninit_mqd(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj)
|
||||
{
|
||||
BUG_ON(!mm || !mqd);
|
||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||
}
|
||||
|
||||
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj)
|
||||
{
|
||||
BUG_ON(!mm || !mqd);
|
||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||
}
|
||||
|
||||
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
|
||||
uint32_t queue_id, uint32_t __user *wptr)
|
||||
{
|
||||
return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
|
||||
}
|
||||
|
||||
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t __user *wptr)
|
||||
{
|
||||
return kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
|
||||
}
|
||||
|
||||
static int update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct cik_mqd *m;
|
||||
|
||||
BUG_ON(!mm || !q || !mqd);
|
||||
|
||||
pr_debug("kfd: In func %s\n", __func__);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
|
||||
DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
|
||||
|
||||
/*
|
||||
* Calculating queue size which is log base 2 of actual queue size -1
|
||||
* dwords and another -1 for ffs
|
||||
*/
|
||||
m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
|
||||
- 1 - 1;
|
||||
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
|
||||
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
|
||||
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
||||
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
|
||||
m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
|
||||
DOORBELL_OFFSET(q->doorbell_off);
|
||||
|
||||
m->cp_hqd_vmid = q->vmid;
|
||||
|
||||
if (q->format == KFD_QUEUE_FORMAT_AQL) {
|
||||
m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
|
||||
}
|
||||
|
||||
m->cp_hqd_active = 0;
|
||||
q->is_active = false;
|
||||
if (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0) {
|
||||
m->cp_hqd_active = 1;
|
||||
q->is_active = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct cik_sdma_rlc_registers *m;
|
||||
|
||||
BUG_ON(!mm || !mqd || !q);
|
||||
|
||||
m = get_sdma_mqd(mqd);
|
||||
m->sdma_rlc_rb_cntl =
|
||||
SDMA_RB_SIZE((ffs(q->queue_size / sizeof(unsigned int)))) |
|
||||
SDMA_RB_VMID(q->vmid) |
|
||||
SDMA_RPTR_WRITEBACK_ENABLE |
|
||||
SDMA_RPTR_WRITEBACK_TIMER(6);
|
||||
|
||||
m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8);
|
||||
m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
|
||||
m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
||||
m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
|
||||
m->sdma_rlc_doorbell = SDMA_OFFSET(q->doorbell_off) | SDMA_DB_ENABLE;
|
||||
m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
|
||||
|
||||
m->sdma_engine_id = q->sdma_engine_id;
|
||||
m->sdma_queue_id = q->sdma_queue_id;
|
||||
|
||||
q->is_active = false;
|
||||
if (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0) {
|
||||
m->sdma_rlc_rb_cntl |= SDMA_RB_ENABLE;
|
||||
q->is_active = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int destroy_mqd(struct mqd_manager *mm, void *mqd,
|
||||
enum kfd_preempt_type type,
|
||||
unsigned int timeout, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
{
|
||||
return kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout,
|
||||
pipe_id, queue_id);
|
||||
}
|
||||
|
||||
/*
|
||||
* preempt type here is ignored because there is only one way
|
||||
* to preempt sdma queue
|
||||
*/
|
||||
static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
enum kfd_preempt_type type,
|
||||
unsigned int timeout, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
{
|
||||
return kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
|
||||
}
|
||||
|
||||
static bool is_occupied(struct mqd_manager *mm, void *mqd,
|
||||
uint64_t queue_address, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
{
|
||||
|
||||
return kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address,
|
||||
pipe_id, queue_id);
|
||||
|
||||
}
|
||||
|
||||
static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
|
||||
uint64_t queue_address, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
{
|
||||
return kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
|
||||
}
|
||||
|
||||
/*
|
||||
* HIQ MQD Implementation, concrete implementation for HIQ MQD implementation.
|
||||
* The HIQ queue in Kaveri is using the same MQD structure as all the user mode
|
||||
* queues but with different initial values.
|
||||
*/
|
||||
|
||||
static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
uint64_t addr;
|
||||
struct cik_mqd *m;
|
||||
int retval;
|
||||
|
||||
BUG_ON(!mm || !q || !mqd || !mqd_mem_obj);
|
||||
|
||||
pr_debug("kfd: In func %s\n", __func__);
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
|
||||
mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
addr = (*mqd_mem_obj)->gpu_addr;
|
||||
|
||||
memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256));
|
||||
|
||||
m->header = 0xC0310800;
|
||||
m->compute_pipelinestat_enable = 1;
|
||||
m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
|
||||
m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
|
||||
|
||||
m->cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE |
|
||||
PRELOAD_REQ;
|
||||
m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
|
||||
QUANTUM_DURATION(10);
|
||||
|
||||
m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN;
|
||||
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
|
||||
m->cp_mqd_base_addr_hi = upper_32_bits(addr);
|
||||
|
||||
m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
|
||||
|
||||
/*
|
||||
* Pipe Priority
|
||||
* Identifies the pipe relative priority when this queue is connected
|
||||
* to the pipeline. The pipe priority is against the GFX pipe and HP3D.
|
||||
* In KFD we are using a fixed pipe priority set to CS_MEDIUM.
|
||||
* 0 = CS_LOW (typically below GFX)
|
||||
* 1 = CS_MEDIUM (typically between HP3D and GFX
|
||||
* 2 = CS_HIGH (typically above HP3D)
|
||||
*/
|
||||
m->cp_hqd_pipe_priority = 1;
|
||||
m->cp_hqd_queue_priority = 15;
|
||||
|
||||
*mqd = m;
|
||||
if (gart_addr)
|
||||
*gart_addr = addr;
|
||||
retval = mm->update_mqd(mm, m, q);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct cik_mqd *m;
|
||||
|
||||
BUG_ON(!mm || !q || !mqd);
|
||||
|
||||
pr_debug("kfd: In func %s\n", __func__);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
|
||||
DEFAULT_MIN_AVAIL_SIZE |
|
||||
PRIV_STATE |
|
||||
KMD_QUEUE;
|
||||
|
||||
/*
|
||||
* Calculating queue size which is log base 2 of actual queue
|
||||
* size -1 dwords
|
||||
*/
|
||||
m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
|
||||
- 1 - 1;
|
||||
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
|
||||
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
|
||||
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
||||
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
|
||||
m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
|
||||
DOORBELL_OFFSET(q->doorbell_off);
|
||||
|
||||
m->cp_hqd_vmid = q->vmid;
|
||||
|
||||
m->cp_hqd_active = 0;
|
||||
q->is_active = false;
|
||||
if (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0) {
|
||||
m->cp_hqd_active = 1;
|
||||
q->is_active = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
|
||||
{
|
||||
struct cik_sdma_rlc_registers *m;
|
||||
|
||||
BUG_ON(!mqd);
|
||||
|
||||
m = (struct cik_sdma_rlc_registers *)mqd;
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev)
|
||||
{
|
||||
struct mqd_manager *mqd;
|
||||
|
||||
BUG_ON(!dev);
|
||||
BUG_ON(type >= KFD_MQD_TYPE_MAX);
|
||||
|
||||
pr_debug("kfd: In func %s\n", __func__);
|
||||
|
||||
mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
|
||||
mqd->dev = dev;
|
||||
|
||||
switch (type) {
|
||||
case KFD_MQD_TYPE_CP:
|
||||
case KFD_MQD_TYPE_COMPUTE:
|
||||
mqd->init_mqd = init_mqd;
|
||||
mqd->uninit_mqd = uninit_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
break;
|
||||
case KFD_MQD_TYPE_HIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
break;
|
||||
case KFD_MQD_TYPE_SDMA:
|
||||
mqd->init_mqd = init_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_sdma;
|
||||
mqd->load_mqd = load_mqd_sdma;
|
||||
mqd->update_mqd = update_mqd_sdma;
|
||||
mqd->destroy_mqd = destroy_mqd_sdma;
|
||||
mqd->is_occupied = is_occupied_sdma;
|
||||
break;
|
||||
default:
|
||||
kfree(mqd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return mqd;
|
||||
}
|
||||
|
33
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
Обычный файл
33
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
Обычный файл
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/printk.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_mqd_manager.h"
|
||||
|
||||
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev)
|
||||
{
|
||||
pr_warn("amdkfd: VI MQD is not currently supported\n");
|
||||
return NULL;
|
||||
}
|
@@ -97,11 +97,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
|
||||
|
||||
pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
|
||||
|
||||
retval = kfd2kgd->allocate_mem(pm->dqm->dev->kgd,
|
||||
*rl_buffer_size,
|
||||
PAGE_SIZE,
|
||||
KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
|
||||
(struct kgd_mem **) &pm->ib_buffer_obj);
|
||||
retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
|
||||
&pm->ib_buffer_obj);
|
||||
|
||||
if (retval != 0) {
|
||||
pr_err("kfd: failed to allocate runlist IB\n");
|
||||
@@ -351,7 +348,7 @@ int pm_send_set_resources(struct packet_manager *pm,
|
||||
pr_debug("kfd: In func %s\n", __func__);
|
||||
|
||||
mutex_lock(&pm->lock);
|
||||
pm->priv_queue->acquire_packet_buffer(pm->priv_queue,
|
||||
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
|
||||
sizeof(*packet) / sizeof(uint32_t),
|
||||
(unsigned int **)&packet);
|
||||
if (packet == NULL) {
|
||||
@@ -378,8 +375,7 @@ int pm_send_set_resources(struct packet_manager *pm,
|
||||
packet->queue_mask_lo = lower_32_bits(res->queue_mask);
|
||||
packet->queue_mask_hi = upper_32_bits(res->queue_mask);
|
||||
|
||||
pm->priv_queue->submit_packet(pm->priv_queue);
|
||||
pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
|
||||
mutex_unlock(&pm->lock);
|
||||
|
||||
@@ -405,7 +401,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
|
||||
packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t);
|
||||
mutex_lock(&pm->lock);
|
||||
|
||||
retval = pm->priv_queue->acquire_packet_buffer(pm->priv_queue,
|
||||
retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
|
||||
packet_size_dwords, &rl_buffer);
|
||||
if (retval != 0)
|
||||
goto fail_acquire_packet_buffer;
|
||||
@@ -415,15 +411,14 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
|
||||
if (retval != 0)
|
||||
goto fail_create_runlist;
|
||||
|
||||
pm->priv_queue->submit_packet(pm->priv_queue);
|
||||
pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
|
||||
mutex_unlock(&pm->lock);
|
||||
|
||||
return retval;
|
||||
|
||||
fail_create_runlist:
|
||||
pm->priv_queue->rollback_packet(pm->priv_queue);
|
||||
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
|
||||
fail_acquire_packet_buffer:
|
||||
mutex_unlock(&pm->lock);
|
||||
fail_create_runlist_ib:
|
||||
@@ -441,7 +436,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
|
||||
BUG_ON(!pm || !fence_address);
|
||||
|
||||
mutex_lock(&pm->lock);
|
||||
retval = pm->priv_queue->acquire_packet_buffer(
|
||||
retval = pm->priv_queue->ops.acquire_packet_buffer(
|
||||
pm->priv_queue,
|
||||
sizeof(struct pm4_query_status) / sizeof(uint32_t),
|
||||
(unsigned int **)&packet);
|
||||
@@ -462,8 +457,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
|
||||
packet->data_hi = upper_32_bits((uint64_t)fence_value);
|
||||
packet->data_lo = lower_32_bits((uint64_t)fence_value);
|
||||
|
||||
pm->priv_queue->submit_packet(pm->priv_queue);
|
||||
pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
mutex_unlock(&pm->lock);
|
||||
|
||||
return 0;
|
||||
@@ -485,7 +479,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
|
||||
BUG_ON(!pm);
|
||||
|
||||
mutex_lock(&pm->lock);
|
||||
retval = pm->priv_queue->acquire_packet_buffer(
|
||||
retval = pm->priv_queue->ops.acquire_packet_buffer(
|
||||
pm->priv_queue,
|
||||
sizeof(struct pm4_unmap_queues) / sizeof(uint32_t),
|
||||
&buffer);
|
||||
@@ -540,8 +534,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
|
||||
break;
|
||||
};
|
||||
|
||||
pm->priv_queue->submit_packet(pm->priv_queue);
|
||||
pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
|
||||
mutex_unlock(&pm->lock);
|
||||
return 0;
|
||||
@@ -557,8 +550,7 @@ void pm_release_ib(struct packet_manager *pm)
|
||||
|
||||
mutex_lock(&pm->lock);
|
||||
if (pm->allocated) {
|
||||
kfd2kgd->free_mem(pm->dqm->dev->kgd,
|
||||
(struct kgd_mem *) pm->ib_buffer_obj);
|
||||
kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
|
||||
pm->allocated = false;
|
||||
}
|
||||
mutex_unlock(&pm->lock);
|
||||
|
@@ -103,12 +103,26 @@ enum cache_policy {
|
||||
cache_policy_noncoherent
|
||||
};
|
||||
|
||||
enum asic_family_type {
|
||||
CHIP_KAVERI = 0,
|
||||
CHIP_CARRIZO
|
||||
};
|
||||
|
||||
struct kfd_device_info {
|
||||
unsigned int asic_family;
|
||||
unsigned int max_pasid_bits;
|
||||
size_t ih_ring_entry_size;
|
||||
uint8_t num_of_watch_points;
|
||||
uint16_t mqd_size_aligned;
|
||||
};
|
||||
|
||||
struct kfd_mem_obj {
|
||||
uint32_t range_start;
|
||||
uint32_t range_end;
|
||||
uint64_t gpu_addr;
|
||||
uint32_t *cpu_ptr;
|
||||
};
|
||||
|
||||
struct kfd_dev {
|
||||
struct kgd_dev *kgd;
|
||||
|
||||
@@ -134,6 +148,14 @@ struct kfd_dev {
|
||||
|
||||
struct kgd2kfd_shared_resources shared_resources;
|
||||
|
||||
void *gtt_mem;
|
||||
uint64_t gtt_start_gpu_addr;
|
||||
void *gtt_start_cpu_ptr;
|
||||
void *gtt_sa_bitmap;
|
||||
struct mutex gtt_sa_lock;
|
||||
unsigned int gtt_sa_chunk_size;
|
||||
unsigned int gtt_sa_num_of_chunks;
|
||||
|
||||
/* QCM Device instance */
|
||||
struct device_queue_manager *dqm;
|
||||
|
||||
@@ -149,12 +171,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd);
|
||||
|
||||
extern const struct kfd2kgd_calls *kfd2kgd;
|
||||
|
||||
struct kfd_mem_obj {
|
||||
void *bo;
|
||||
uint64_t gpu_addr;
|
||||
uint32_t *cpu_ptr;
|
||||
};
|
||||
|
||||
enum kfd_mempool {
|
||||
KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
|
||||
KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
|
||||
@@ -272,6 +288,15 @@ struct queue_properties {
|
||||
bool is_active;
|
||||
/* Not relevant for user mode queues in cp scheduling */
|
||||
unsigned int vmid;
|
||||
/* Relevant only for sdma queues*/
|
||||
uint32_t sdma_engine_id;
|
||||
uint32_t sdma_queue_id;
|
||||
uint32_t sdma_vm_addr;
|
||||
/* Relevant only for VI */
|
||||
uint64_t eop_ring_buffer_address;
|
||||
uint32_t eop_ring_buffer_size;
|
||||
uint64_t ctx_save_restore_area_address;
|
||||
uint32_t ctx_save_restore_area_size;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -314,6 +339,8 @@ struct queue {
|
||||
uint32_t pipe;
|
||||
uint32_t queue;
|
||||
|
||||
unsigned int sdma_id;
|
||||
|
||||
struct kfd_process *process;
|
||||
struct kfd_dev *device;
|
||||
};
|
||||
@@ -322,10 +349,10 @@ struct queue {
|
||||
* Please read the kfd_mqd_manager.h description.
|
||||
*/
|
||||
enum KFD_MQD_TYPE {
|
||||
KFD_MQD_TYPE_CIK_COMPUTE = 0, /* for no cp scheduling */
|
||||
KFD_MQD_TYPE_CIK_HIQ, /* for hiq */
|
||||
KFD_MQD_TYPE_CIK_CP, /* for cp queues and diq */
|
||||
KFD_MQD_TYPE_CIK_SDMA, /* for sdma queues */
|
||||
KFD_MQD_TYPE_COMPUTE = 0, /* for no cp scheduling */
|
||||
KFD_MQD_TYPE_HIQ, /* for hiq */
|
||||
KFD_MQD_TYPE_CP, /* for cp queues and diq */
|
||||
KFD_MQD_TYPE_SDMA, /* for sdma queues */
|
||||
KFD_MQD_TYPE_MAX
|
||||
};
|
||||
|
||||
@@ -477,8 +504,9 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
|
||||
struct kfd_process *p);
|
||||
void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid);
|
||||
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
|
||||
struct kfd_process *p,
|
||||
int create_pdd);
|
||||
struct kfd_process *p);
|
||||
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||
struct kfd_process *p);
|
||||
|
||||
/* Process device data iterator */
|
||||
struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p);
|
||||
@@ -506,6 +534,13 @@ unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
|
||||
struct kfd_process *process,
|
||||
unsigned int queue_id);
|
||||
|
||||
/* GTT Sub-Allocator */
|
||||
|
||||
int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
|
||||
struct kfd_mem_obj **mem_obj);
|
||||
|
||||
int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj);
|
||||
|
||||
extern struct device *kfd_device;
|
||||
|
||||
/* Topology */
|
||||
@@ -530,6 +565,8 @@ int kfd_init_apertures(struct kfd_process *process);
|
||||
/* Queue Context Management */
|
||||
inline uint32_t lower_32(uint64_t x);
|
||||
inline uint32_t upper_32(uint64_t x);
|
||||
struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd);
|
||||
inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m);
|
||||
|
||||
int init_queue(struct queue **q, struct queue_properties properties);
|
||||
void uninit_queue(struct queue *q);
|
||||
@@ -538,6 +575,10 @@ void print_queue(struct queue *q);
|
||||
|
||||
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
|
||||
void device_queue_manager_uninit(struct device_queue_manager *dqm);
|
||||
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
|
||||
|
@@ -311,24 +311,29 @@ err_alloc_process:
|
||||
}
|
||||
|
||||
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
|
||||
struct kfd_process *p,
|
||||
int create_pdd)
|
||||
struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd = NULL;
|
||||
|
||||
list_for_each_entry(pdd, &p->per_device_data, per_device_list)
|
||||
if (pdd->dev == dev)
|
||||
return pdd;
|
||||
break;
|
||||
|
||||
if (create_pdd) {
|
||||
pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
|
||||
if (pdd != NULL) {
|
||||
pdd->dev = dev;
|
||||
INIT_LIST_HEAD(&pdd->qpd.queues_list);
|
||||
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
|
||||
pdd->qpd.dqm = dev->dqm;
|
||||
list_add(&pdd->per_device_list, &p->per_device_data);
|
||||
}
|
||||
return pdd;
|
||||
}
|
||||
|
||||
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||
struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd = NULL;
|
||||
|
||||
pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
|
||||
if (pdd != NULL) {
|
||||
pdd->dev = dev;
|
||||
INIT_LIST_HEAD(&pdd->qpd.queues_list);
|
||||
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
|
||||
pdd->qpd.dqm = dev->dqm;
|
||||
list_add(&pdd->per_device_list, &p->per_device_data);
|
||||
}
|
||||
|
||||
return pdd;
|
||||
@@ -344,11 +349,14 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
|
||||
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
|
||||
struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p, 1);
|
||||
struct kfd_process_device *pdd;
|
||||
int err;
|
||||
|
||||
if (pdd == NULL)
|
||||
pdd = kfd_get_process_device_data(dev, p);
|
||||
if (!pdd) {
|
||||
pr_err("Process device data doesn't exist\n");
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
if (pdd->bound)
|
||||
return pdd;
|
||||
@@ -384,7 +392,7 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
|
||||
|
||||
pqm_uninit(&p->pqm);
|
||||
|
||||
pdd = kfd_get_process_device_data(dev, p, 0);
|
||||
pdd = kfd_get_process_device_data(dev, p);
|
||||
|
||||
/*
|
||||
* Just mark pdd as unbound, because we still need it to call
|
||||
|
@@ -128,7 +128,6 @@ static int create_cp_queue(struct process_queue_manager *pqm,
|
||||
/* let DQM handle it*/
|
||||
q_properties->vmid = 0;
|
||||
q_properties->queue_id = qid;
|
||||
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
|
||||
|
||||
retval = init_queue(q, *q_properties);
|
||||
if (retval != 0)
|
||||
@@ -167,8 +166,11 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
q = NULL;
|
||||
kq = NULL;
|
||||
|
||||
pdd = kfd_get_process_device_data(dev, pqm->process, 1);
|
||||
BUG_ON(!pdd);
|
||||
pdd = kfd_get_process_device_data(dev, pqm->process);
|
||||
if (!pdd) {
|
||||
pr_err("Process device data doesn't exist\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
retval = find_available_queue_slot(pqm, qid);
|
||||
if (retval != 0)
|
||||
@@ -176,7 +178,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
|
||||
if (list_empty(&pqm->queues)) {
|
||||
pdd->qpd.pqm = pqm;
|
||||
dev->dqm->register_process(dev->dqm, &pdd->qpd);
|
||||
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
|
||||
}
|
||||
|
||||
pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL);
|
||||
@@ -186,6 +188,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
case KFD_QUEUE_TYPE_COMPUTE:
|
||||
/* check if there is over subscription */
|
||||
if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
|
||||
@@ -201,7 +204,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
goto err_create_queue;
|
||||
pqn->q = q;
|
||||
pqn->kq = NULL;
|
||||
retval = dev->dqm->create_queue(dev->dqm, q, &pdd->qpd,
|
||||
retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd,
|
||||
&q->properties.vmid);
|
||||
pr_debug("DQM returned %d for create_queue\n", retval);
|
||||
print_queue(q);
|
||||
@@ -215,7 +218,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
kq->queue->properties.queue_id = *qid;
|
||||
pqn->kq = kq;
|
||||
pqn->q = NULL;
|
||||
retval = dev->dqm->create_kernel_queue(dev->dqm, kq, &pdd->qpd);
|
||||
retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
|
||||
kq, &pdd->qpd);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
@@ -245,7 +249,7 @@ err_allocate_pqn:
|
||||
/* check if queues list is empty unregister process from device */
|
||||
clear_bit(*qid, pqm->queue_slot_bitmap);
|
||||
if (list_empty(&pqm->queues))
|
||||
dev->dqm->unregister_process(dev->dqm, &pdd->qpd);
|
||||
dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@@ -277,19 +281,22 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
|
||||
dev = pqn->q->device;
|
||||
BUG_ON(!dev);
|
||||
|
||||
pdd = kfd_get_process_device_data(dev, pqm->process, 1);
|
||||
BUG_ON(!pdd);
|
||||
pdd = kfd_get_process_device_data(dev, pqm->process);
|
||||
if (!pdd) {
|
||||
pr_err("Process device data doesn't exist\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pqn->kq) {
|
||||
/* destroy kernel queue (DIQ) */
|
||||
dqm = pqn->kq->dev->dqm;
|
||||
dqm->destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
|
||||
dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
|
||||
kernel_queue_uninit(pqn->kq);
|
||||
}
|
||||
|
||||
if (pqn->q) {
|
||||
dqm = pqn->q->device->dqm;
|
||||
retval = dqm->destroy_queue(dqm, &pdd->qpd, pqn->q);
|
||||
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
|
||||
if (retval != 0)
|
||||
return retval;
|
||||
|
||||
@@ -301,7 +308,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
|
||||
clear_bit(qid, pqm->queue_slot_bitmap);
|
||||
|
||||
if (list_empty(&pqm->queues))
|
||||
dqm->unregister_process(dqm, &pdd->qpd);
|
||||
dqm->ops.unregister_process(dqm, &pdd->qpd);
|
||||
|
||||
return retval;
|
||||
}
|
||||
@@ -326,7 +333,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
|
||||
pqn->q->properties.queue_percent = p->queue_percent;
|
||||
pqn->q->properties.priority = p->priority;
|
||||
|
||||
retval = pqn->q->device->dqm->update_queue(pqn->q->device->dqm, pqn->q);
|
||||
retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
|
||||
pqn->q);
|
||||
if (retval != 0)
|
||||
return retval;
|
||||
|
||||
|
@@ -27,6 +27,7 @@
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/log2.h>
|
||||
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_crat.h"
|
||||
@@ -630,10 +631,10 @@ static struct kobj_type cache_type = {
|
||||
static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
|
||||
char *buffer)
|
||||
{
|
||||
ssize_t ret;
|
||||
struct kfd_topology_device *dev;
|
||||
char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
|
||||
uint32_t i;
|
||||
uint32_t log_max_watch_addr;
|
||||
|
||||
/* Making sure that the buffer is an empty string */
|
||||
buffer[0] = 0;
|
||||
@@ -641,8 +642,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
|
||||
if (strcmp(attr->name, "gpu_id") == 0) {
|
||||
dev = container_of(attr, struct kfd_topology_device,
|
||||
attr_gpuid);
|
||||
ret = sysfs_show_32bit_val(buffer, dev->gpu_id);
|
||||
} else if (strcmp(attr->name, "name") == 0) {
|
||||
return sysfs_show_32bit_val(buffer, dev->gpu_id);
|
||||
}
|
||||
|
||||
if (strcmp(attr->name, "name") == 0) {
|
||||
dev = container_of(attr, struct kfd_topology_device,
|
||||
attr_name);
|
||||
for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) {
|
||||
@@ -652,80 +655,90 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
|
||||
break;
|
||||
}
|
||||
public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0;
|
||||
ret = sysfs_show_str_val(buffer, public_name);
|
||||
} else {
|
||||
dev = container_of(attr, struct kfd_topology_device,
|
||||
attr_props);
|
||||
sysfs_show_32bit_prop(buffer, "cpu_cores_count",
|
||||
dev->node_props.cpu_cores_count);
|
||||
sysfs_show_32bit_prop(buffer, "simd_count",
|
||||
dev->node_props.simd_count);
|
||||
|
||||
if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
|
||||
pr_warn("kfd: mem_banks_count truncated from %d to %d\n",
|
||||
dev->node_props.mem_banks_count,
|
||||
dev->mem_bank_count);
|
||||
sysfs_show_32bit_prop(buffer, "mem_banks_count",
|
||||
dev->mem_bank_count);
|
||||
} else {
|
||||
sysfs_show_32bit_prop(buffer, "mem_banks_count",
|
||||
dev->node_props.mem_banks_count);
|
||||
}
|
||||
|
||||
sysfs_show_32bit_prop(buffer, "caches_count",
|
||||
dev->node_props.caches_count);
|
||||
sysfs_show_32bit_prop(buffer, "io_links_count",
|
||||
dev->node_props.io_links_count);
|
||||
sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
|
||||
dev->node_props.cpu_core_id_base);
|
||||
sysfs_show_32bit_prop(buffer, "simd_id_base",
|
||||
dev->node_props.simd_id_base);
|
||||
sysfs_show_32bit_prop(buffer, "capability",
|
||||
dev->node_props.capability);
|
||||
sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
|
||||
dev->node_props.max_waves_per_simd);
|
||||
sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
|
||||
dev->node_props.lds_size_in_kb);
|
||||
sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
|
||||
dev->node_props.gds_size_in_kb);
|
||||
sysfs_show_32bit_prop(buffer, "wave_front_size",
|
||||
dev->node_props.wave_front_size);
|
||||
sysfs_show_32bit_prop(buffer, "array_count",
|
||||
dev->node_props.array_count);
|
||||
sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
|
||||
dev->node_props.simd_arrays_per_engine);
|
||||
sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
|
||||
dev->node_props.cu_per_simd_array);
|
||||
sysfs_show_32bit_prop(buffer, "simd_per_cu",
|
||||
dev->node_props.simd_per_cu);
|
||||
sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
|
||||
dev->node_props.max_slots_scratch_cu);
|
||||
sysfs_show_32bit_prop(buffer, "vendor_id",
|
||||
dev->node_props.vendor_id);
|
||||
sysfs_show_32bit_prop(buffer, "device_id",
|
||||
dev->node_props.device_id);
|
||||
sysfs_show_32bit_prop(buffer, "location_id",
|
||||
dev->node_props.location_id);
|
||||
|
||||
if (dev->gpu) {
|
||||
sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
|
||||
kfd2kgd->get_max_engine_clock_in_mhz(
|
||||
dev->gpu->kgd));
|
||||
sysfs_show_64bit_prop(buffer, "local_mem_size",
|
||||
kfd2kgd->get_vmem_size(dev->gpu->kgd));
|
||||
|
||||
sysfs_show_32bit_prop(buffer, "fw_version",
|
||||
kfd2kgd->get_fw_version(
|
||||
dev->gpu->kgd,
|
||||
KGD_ENGINE_MEC1));
|
||||
|
||||
}
|
||||
|
||||
ret = sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
|
||||
cpufreq_quick_get_max(0)/1000);
|
||||
return sysfs_show_str_val(buffer, public_name);
|
||||
}
|
||||
|
||||
return ret;
|
||||
dev = container_of(attr, struct kfd_topology_device,
|
||||
attr_props);
|
||||
sysfs_show_32bit_prop(buffer, "cpu_cores_count",
|
||||
dev->node_props.cpu_cores_count);
|
||||
sysfs_show_32bit_prop(buffer, "simd_count",
|
||||
dev->node_props.simd_count);
|
||||
|
||||
if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
|
||||
pr_warn("kfd: mem_banks_count truncated from %d to %d\n",
|
||||
dev->node_props.mem_banks_count,
|
||||
dev->mem_bank_count);
|
||||
sysfs_show_32bit_prop(buffer, "mem_banks_count",
|
||||
dev->mem_bank_count);
|
||||
} else {
|
||||
sysfs_show_32bit_prop(buffer, "mem_banks_count",
|
||||
dev->node_props.mem_banks_count);
|
||||
}
|
||||
|
||||
sysfs_show_32bit_prop(buffer, "caches_count",
|
||||
dev->node_props.caches_count);
|
||||
sysfs_show_32bit_prop(buffer, "io_links_count",
|
||||
dev->node_props.io_links_count);
|
||||
sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
|
||||
dev->node_props.cpu_core_id_base);
|
||||
sysfs_show_32bit_prop(buffer, "simd_id_base",
|
||||
dev->node_props.simd_id_base);
|
||||
sysfs_show_32bit_prop(buffer, "capability",
|
||||
dev->node_props.capability);
|
||||
sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
|
||||
dev->node_props.max_waves_per_simd);
|
||||
sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
|
||||
dev->node_props.lds_size_in_kb);
|
||||
sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
|
||||
dev->node_props.gds_size_in_kb);
|
||||
sysfs_show_32bit_prop(buffer, "wave_front_size",
|
||||
dev->node_props.wave_front_size);
|
||||
sysfs_show_32bit_prop(buffer, "array_count",
|
||||
dev->node_props.array_count);
|
||||
sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
|
||||
dev->node_props.simd_arrays_per_engine);
|
||||
sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
|
||||
dev->node_props.cu_per_simd_array);
|
||||
sysfs_show_32bit_prop(buffer, "simd_per_cu",
|
||||
dev->node_props.simd_per_cu);
|
||||
sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
|
||||
dev->node_props.max_slots_scratch_cu);
|
||||
sysfs_show_32bit_prop(buffer, "vendor_id",
|
||||
dev->node_props.vendor_id);
|
||||
sysfs_show_32bit_prop(buffer, "device_id",
|
||||
dev->node_props.device_id);
|
||||
sysfs_show_32bit_prop(buffer, "location_id",
|
||||
dev->node_props.location_id);
|
||||
|
||||
if (dev->gpu) {
|
||||
log_max_watch_addr =
|
||||
__ilog2_u32(dev->gpu->device_info->num_of_watch_points);
|
||||
|
||||
if (log_max_watch_addr) {
|
||||
dev->node_props.capability |=
|
||||
HSA_CAP_WATCH_POINTS_SUPPORTED;
|
||||
|
||||
dev->node_props.capability |=
|
||||
((log_max_watch_addr <<
|
||||
HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) &
|
||||
HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
|
||||
}
|
||||
|
||||
sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
|
||||
kfd2kgd->get_max_engine_clock_in_mhz(
|
||||
dev->gpu->kgd));
|
||||
sysfs_show_64bit_prop(buffer, "local_mem_size",
|
||||
kfd2kgd->get_vmem_size(dev->gpu->kgd));
|
||||
|
||||
sysfs_show_32bit_prop(buffer, "fw_version",
|
||||
kfd2kgd->get_fw_version(
|
||||
dev->gpu->kgd,
|
||||
KGD_ENGINE_MEC1));
|
||||
}
|
||||
|
||||
return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
|
||||
cpufreq_quick_get_max(0)/1000);
|
||||
}
|
||||
|
||||
static const struct sysfs_ops node_ops = {
|
||||
|
293
drivers/gpu/drm/amd/include/cik_structs.h
Обычный файл
293
drivers/gpu/drm/amd/include/cik_structs.h
Обычный файл
@@ -0,0 +1,293 @@
|
||||
/*
|
||||
* Copyright 2012 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CIK_STRUCTS_H_
|
||||
#define CIK_STRUCTS_H_
|
||||
|
||||
struct cik_mqd {
|
||||
uint32_t header;
|
||||
uint32_t compute_dispatch_initiator;
|
||||
uint32_t compute_dim_x;
|
||||
uint32_t compute_dim_y;
|
||||
uint32_t compute_dim_z;
|
||||
uint32_t compute_start_x;
|
||||
uint32_t compute_start_y;
|
||||
uint32_t compute_start_z;
|
||||
uint32_t compute_num_thread_x;
|
||||
uint32_t compute_num_thread_y;
|
||||
uint32_t compute_num_thread_z;
|
||||
uint32_t compute_pipelinestat_enable;
|
||||
uint32_t compute_perfcount_enable;
|
||||
uint32_t compute_pgm_lo;
|
||||
uint32_t compute_pgm_hi;
|
||||
uint32_t compute_tba_lo;
|
||||
uint32_t compute_tba_hi;
|
||||
uint32_t compute_tma_lo;
|
||||
uint32_t compute_tma_hi;
|
||||
uint32_t compute_pgm_rsrc1;
|
||||
uint32_t compute_pgm_rsrc2;
|
||||
uint32_t compute_vmid;
|
||||
uint32_t compute_resource_limits;
|
||||
uint32_t compute_static_thread_mgmt_se0;
|
||||
uint32_t compute_static_thread_mgmt_se1;
|
||||
uint32_t compute_tmpring_size;
|
||||
uint32_t compute_static_thread_mgmt_se2;
|
||||
uint32_t compute_static_thread_mgmt_se3;
|
||||
uint32_t compute_restart_x;
|
||||
uint32_t compute_restart_y;
|
||||
uint32_t compute_restart_z;
|
||||
uint32_t compute_thread_trace_enable;
|
||||
uint32_t compute_misc_reserved;
|
||||
uint32_t compute_user_data_0;
|
||||
uint32_t compute_user_data_1;
|
||||
uint32_t compute_user_data_2;
|
||||
uint32_t compute_user_data_3;
|
||||
uint32_t compute_user_data_4;
|
||||
uint32_t compute_user_data_5;
|
||||
uint32_t compute_user_data_6;
|
||||
uint32_t compute_user_data_7;
|
||||
uint32_t compute_user_data_8;
|
||||
uint32_t compute_user_data_9;
|
||||
uint32_t compute_user_data_10;
|
||||
uint32_t compute_user_data_11;
|
||||
uint32_t compute_user_data_12;
|
||||
uint32_t compute_user_data_13;
|
||||
uint32_t compute_user_data_14;
|
||||
uint32_t compute_user_data_15;
|
||||
uint32_t cp_compute_csinvoc_count_lo;
|
||||
uint32_t cp_compute_csinvoc_count_hi;
|
||||
uint32_t cp_mqd_base_addr_lo;
|
||||
uint32_t cp_mqd_base_addr_hi;
|
||||
uint32_t cp_hqd_active;
|
||||
uint32_t cp_hqd_vmid;
|
||||
uint32_t cp_hqd_persistent_state;
|
||||
uint32_t cp_hqd_pipe_priority;
|
||||
uint32_t cp_hqd_queue_priority;
|
||||
uint32_t cp_hqd_quantum;
|
||||
uint32_t cp_hqd_pq_base_lo;
|
||||
uint32_t cp_hqd_pq_base_hi;
|
||||
uint32_t cp_hqd_pq_rptr;
|
||||
uint32_t cp_hqd_pq_rptr_report_addr_lo;
|
||||
uint32_t cp_hqd_pq_rptr_report_addr_hi;
|
||||
uint32_t cp_hqd_pq_wptr_poll_addr_lo;
|
||||
uint32_t cp_hqd_pq_wptr_poll_addr_hi;
|
||||
uint32_t cp_hqd_pq_doorbell_control;
|
||||
uint32_t cp_hqd_pq_wptr;
|
||||
uint32_t cp_hqd_pq_control;
|
||||
uint32_t cp_hqd_ib_base_addr_lo;
|
||||
uint32_t cp_hqd_ib_base_addr_hi;
|
||||
uint32_t cp_hqd_ib_rptr;
|
||||
uint32_t cp_hqd_ib_control;
|
||||
uint32_t cp_hqd_iq_timer;
|
||||
uint32_t cp_hqd_iq_rptr;
|
||||
uint32_t cp_hqd_dequeue_request;
|
||||
uint32_t cp_hqd_dma_offload;
|
||||
uint32_t cp_hqd_sema_cmd;
|
||||
uint32_t cp_hqd_msg_type;
|
||||
uint32_t cp_hqd_atomic0_preop_lo;
|
||||
uint32_t cp_hqd_atomic0_preop_hi;
|
||||
uint32_t cp_hqd_atomic1_preop_lo;
|
||||
uint32_t cp_hqd_atomic1_preop_hi;
|
||||
uint32_t cp_hqd_hq_status0;
|
||||
uint32_t cp_hqd_hq_control0;
|
||||
uint32_t cp_mqd_control;
|
||||
uint32_t cp_mqd_query_time_lo;
|
||||
uint32_t cp_mqd_query_time_hi;
|
||||
uint32_t cp_mqd_connect_start_time_lo;
|
||||
uint32_t cp_mqd_connect_start_time_hi;
|
||||
uint32_t cp_mqd_connect_end_time_lo;
|
||||
uint32_t cp_mqd_connect_end_time_hi;
|
||||
uint32_t cp_mqd_connect_end_wf_count;
|
||||
uint32_t cp_mqd_connect_end_pq_rptr;
|
||||
uint32_t cp_mqd_connect_end_pq_wptr;
|
||||
uint32_t cp_mqd_connect_end_ib_rptr;
|
||||
uint32_t reserved_96;
|
||||
uint32_t reserved_97;
|
||||
uint32_t reserved_98;
|
||||
uint32_t reserved_99;
|
||||
uint32_t iqtimer_pkt_header;
|
||||
uint32_t iqtimer_pkt_dw0;
|
||||
uint32_t iqtimer_pkt_dw1;
|
||||
uint32_t iqtimer_pkt_dw2;
|
||||
uint32_t iqtimer_pkt_dw3;
|
||||
uint32_t iqtimer_pkt_dw4;
|
||||
uint32_t iqtimer_pkt_dw5;
|
||||
uint32_t iqtimer_pkt_dw6;
|
||||
uint32_t reserved_108;
|
||||
uint32_t reserved_109;
|
||||
uint32_t reserved_110;
|
||||
uint32_t reserved_111;
|
||||
uint32_t queue_doorbell_id0;
|
||||
uint32_t queue_doorbell_id1;
|
||||
uint32_t queue_doorbell_id2;
|
||||
uint32_t queue_doorbell_id3;
|
||||
uint32_t queue_doorbell_id4;
|
||||
uint32_t queue_doorbell_id5;
|
||||
uint32_t queue_doorbell_id6;
|
||||
uint32_t queue_doorbell_id7;
|
||||
uint32_t queue_doorbell_id8;
|
||||
uint32_t queue_doorbell_id9;
|
||||
uint32_t queue_doorbell_id10;
|
||||
uint32_t queue_doorbell_id11;
|
||||
uint32_t queue_doorbell_id12;
|
||||
uint32_t queue_doorbell_id13;
|
||||
uint32_t queue_doorbell_id14;
|
||||
uint32_t queue_doorbell_id15;
|
||||
};
|
||||
|
||||
struct cik_sdma_rlc_registers {
|
||||
uint32_t sdma_rlc_rb_cntl;
|
||||
uint32_t sdma_rlc_rb_base;
|
||||
uint32_t sdma_rlc_rb_base_hi;
|
||||
uint32_t sdma_rlc_rb_rptr;
|
||||
uint32_t sdma_rlc_rb_wptr;
|
||||
uint32_t sdma_rlc_rb_wptr_poll_cntl;
|
||||
uint32_t sdma_rlc_rb_wptr_poll_addr_hi;
|
||||
uint32_t sdma_rlc_rb_wptr_poll_addr_lo;
|
||||
uint32_t sdma_rlc_rb_rptr_addr_hi;
|
||||
uint32_t sdma_rlc_rb_rptr_addr_lo;
|
||||
uint32_t sdma_rlc_ib_cntl;
|
||||
uint32_t sdma_rlc_ib_rptr;
|
||||
uint32_t sdma_rlc_ib_offset;
|
||||
uint32_t sdma_rlc_ib_base_lo;
|
||||
uint32_t sdma_rlc_ib_base_hi;
|
||||
uint32_t sdma_rlc_ib_size;
|
||||
uint32_t sdma_rlc_skip_cntl;
|
||||
uint32_t sdma_rlc_context_status;
|
||||
uint32_t sdma_rlc_doorbell;
|
||||
uint32_t sdma_rlc_virtual_addr;
|
||||
uint32_t sdma_rlc_ape1_cntl;
|
||||
uint32_t sdma_rlc_doorbell_log;
|
||||
uint32_t reserved_22;
|
||||
uint32_t reserved_23;
|
||||
uint32_t reserved_24;
|
||||
uint32_t reserved_25;
|
||||
uint32_t reserved_26;
|
||||
uint32_t reserved_27;
|
||||
uint32_t reserved_28;
|
||||
uint32_t reserved_29;
|
||||
uint32_t reserved_30;
|
||||
uint32_t reserved_31;
|
||||
uint32_t reserved_32;
|
||||
uint32_t reserved_33;
|
||||
uint32_t reserved_34;
|
||||
uint32_t reserved_35;
|
||||
uint32_t reserved_36;
|
||||
uint32_t reserved_37;
|
||||
uint32_t reserved_38;
|
||||
uint32_t reserved_39;
|
||||
uint32_t reserved_40;
|
||||
uint32_t reserved_41;
|
||||
uint32_t reserved_42;
|
||||
uint32_t reserved_43;
|
||||
uint32_t reserved_44;
|
||||
uint32_t reserved_45;
|
||||
uint32_t reserved_46;
|
||||
uint32_t reserved_47;
|
||||
uint32_t reserved_48;
|
||||
uint32_t reserved_49;
|
||||
uint32_t reserved_50;
|
||||
uint32_t reserved_51;
|
||||
uint32_t reserved_52;
|
||||
uint32_t reserved_53;
|
||||
uint32_t reserved_54;
|
||||
uint32_t reserved_55;
|
||||
uint32_t reserved_56;
|
||||
uint32_t reserved_57;
|
||||
uint32_t reserved_58;
|
||||
uint32_t reserved_59;
|
||||
uint32_t reserved_60;
|
||||
uint32_t reserved_61;
|
||||
uint32_t reserved_62;
|
||||
uint32_t reserved_63;
|
||||
uint32_t reserved_64;
|
||||
uint32_t reserved_65;
|
||||
uint32_t reserved_66;
|
||||
uint32_t reserved_67;
|
||||
uint32_t reserved_68;
|
||||
uint32_t reserved_69;
|
||||
uint32_t reserved_70;
|
||||
uint32_t reserved_71;
|
||||
uint32_t reserved_72;
|
||||
uint32_t reserved_73;
|
||||
uint32_t reserved_74;
|
||||
uint32_t reserved_75;
|
||||
uint32_t reserved_76;
|
||||
uint32_t reserved_77;
|
||||
uint32_t reserved_78;
|
||||
uint32_t reserved_79;
|
||||
uint32_t reserved_80;
|
||||
uint32_t reserved_81;
|
||||
uint32_t reserved_82;
|
||||
uint32_t reserved_83;
|
||||
uint32_t reserved_84;
|
||||
uint32_t reserved_85;
|
||||
uint32_t reserved_86;
|
||||
uint32_t reserved_87;
|
||||
uint32_t reserved_88;
|
||||
uint32_t reserved_89;
|
||||
uint32_t reserved_90;
|
||||
uint32_t reserved_91;
|
||||
uint32_t reserved_92;
|
||||
uint32_t reserved_93;
|
||||
uint32_t reserved_94;
|
||||
uint32_t reserved_95;
|
||||
uint32_t reserved_96;
|
||||
uint32_t reserved_97;
|
||||
uint32_t reserved_98;
|
||||
uint32_t reserved_99;
|
||||
uint32_t reserved_100;
|
||||
uint32_t reserved_101;
|
||||
uint32_t reserved_102;
|
||||
uint32_t reserved_103;
|
||||
uint32_t reserved_104;
|
||||
uint32_t reserved_105;
|
||||
uint32_t reserved_106;
|
||||
uint32_t reserved_107;
|
||||
uint32_t reserved_108;
|
||||
uint32_t reserved_109;
|
||||
uint32_t reserved_110;
|
||||
uint32_t reserved_111;
|
||||
uint32_t reserved_112;
|
||||
uint32_t reserved_113;
|
||||
uint32_t reserved_114;
|
||||
uint32_t reserved_115;
|
||||
uint32_t reserved_116;
|
||||
uint32_t reserved_117;
|
||||
uint32_t reserved_118;
|
||||
uint32_t reserved_119;
|
||||
uint32_t reserved_120;
|
||||
uint32_t reserved_121;
|
||||
uint32_t reserved_122;
|
||||
uint32_t reserved_123;
|
||||
uint32_t reserved_124;
|
||||
uint32_t reserved_125;
|
||||
uint32_t reserved_126;
|
||||
uint32_t reserved_127;
|
||||
uint32_t sdma_engine_id;
|
||||
uint32_t sdma_queue_id;
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif /* CIK_STRUCTS_H_ */
|
@@ -110,17 +110,10 @@ struct kgd2kfd_calls {
|
||||
/**
|
||||
* struct kfd2kgd_calls
|
||||
*
|
||||
* @init_sa_manager: Initialize an instance of the sa manager, used by
|
||||
* amdkfd for all system memory allocations that are mapped to the GART
|
||||
* address space
|
||||
* @init_gtt_mem_allocation: Allocate a buffer on the gart aperture.
|
||||
* The buffer can be used for mqds, hpds, kernel queue, fence and runlists
|
||||
*
|
||||
* @fini_sa_manager: Releases all memory allocations for amdkfd that are
|
||||
* handled by kgd sa manager
|
||||
*
|
||||
* @allocate_mem: Allocate a buffer from amdkfd's sa manager. The buffer can
|
||||
* be used for mqds, hpds, kernel queue, fence and runlists
|
||||
*
|
||||
* @free_mem: Frees a buffer that was allocated by amdkfd's sa manager
|
||||
* @free_gtt_mem: Frees a buffer that was allocated on the gart aperture
|
||||
*
|
||||
* @get_vmem_size: Retrieves (physical) size of VRAM
|
||||
*
|
||||
@@ -136,18 +129,23 @@ struct kgd2kfd_calls {
|
||||
* @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp
|
||||
* scheduling mode. Only used for no cp scheduling mode.
|
||||
*
|
||||
* @init_memory: Initializes memory apertures to fixed base/limit address
|
||||
* and non cached memory types.
|
||||
*
|
||||
* @init_pipeline: Initialized the compute pipelines.
|
||||
*
|
||||
* @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp
|
||||
* sceduling mode.
|
||||
*
|
||||
* @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot.
|
||||
* used only for no HWS mode.
|
||||
*
|
||||
* @hqd_is_occupies: Checks if a hqd slot is occupied.
|
||||
*
|
||||
* @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot.
|
||||
*
|
||||
* @hqd_sdma_is_occupied: Checks if an SDMA hqd slot is occupied.
|
||||
*
|
||||
* @hqd_sdma_destroy: Destructs and preempts the SDMA queue assigned to that
|
||||
* SDMA hqd slot.
|
||||
*
|
||||
* @get_fw_version: Returns FW versions from the header
|
||||
*
|
||||
* This structure contains function pointers to services that the kgd driver
|
||||
@@ -155,13 +153,11 @@ struct kgd2kfd_calls {
|
||||
*
|
||||
*/
|
||||
struct kfd2kgd_calls {
|
||||
/* Memory management. */
|
||||
int (*init_sa_manager)(struct kgd_dev *kgd, unsigned int size);
|
||||
void (*fini_sa_manager)(struct kgd_dev *kgd);
|
||||
int (*allocate_mem)(struct kgd_dev *kgd, size_t size, size_t alignment,
|
||||
enum kgd_memory_pool pool, struct kgd_mem **mem);
|
||||
int (*init_gtt_mem_allocation)(struct kgd_dev *kgd, size_t size,
|
||||
void **mem_obj, uint64_t *gpu_addr,
|
||||
void **cpu_ptr);
|
||||
|
||||
void (*free_mem)(struct kgd_dev *kgd, struct kgd_mem *mem);
|
||||
void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj);
|
||||
|
||||
uint64_t (*get_vmem_size)(struct kgd_dev *kgd);
|
||||
uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd);
|
||||
@@ -176,25 +172,32 @@ struct kfd2kgd_calls {
|
||||
int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid,
|
||||
unsigned int vmid);
|
||||
|
||||
int (*init_memory)(struct kgd_dev *kgd);
|
||||
int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id,
|
||||
uint32_t hpd_size, uint64_t hpd_gpu_addr);
|
||||
|
||||
int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||
uint32_t queue_id, uint32_t __user *wptr);
|
||||
|
||||
int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd);
|
||||
|
||||
bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address,
|
||||
uint32_t pipe_id, uint32_t queue_id);
|
||||
|
||||
int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type,
|
||||
unsigned int timeout, uint32_t pipe_id,
|
||||
uint32_t queue_id);
|
||||
|
||||
bool (*hqd_sdma_is_occupied)(struct kgd_dev *kgd, void *mqd);
|
||||
|
||||
int (*hqd_sdma_destroy)(struct kgd_dev *kgd, void *mqd,
|
||||
unsigned int timeout);
|
||||
|
||||
uint16_t (*get_fw_version)(struct kgd_dev *kgd,
|
||||
enum kgd_engine_type type);
|
||||
};
|
||||
|
||||
bool kgd2kfd_init(unsigned interface_version,
|
||||
const struct kfd2kgd_calls *f2g,
|
||||
const struct kgd2kfd_calls **g2f);
|
||||
const struct kfd2kgd_calls *f2g,
|
||||
const struct kgd2kfd_calls **g2f);
|
||||
|
||||
#endif /* KGD_KFD_INTERFACE_H_INCLUDED */
|
||||
#endif /* KGD_KFD_INTERFACE_H_INCLUDED */
|
||||
|
Ссылка в новой задаче
Block a user