Merge remote-tracking branch 'origin/master' into drm-next

Backmerge Linus tree after rc5 + drm-fixes went in.

There were a few amdkfd conflicts I wanted to avoid,
and Ben requested this for nouveau also.

Conflicts:
	drivers/gpu/drm/amd/amdkfd/Makefile
	drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
	drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
	drivers/gpu/drm/amd/amdkfd/kfd_priv.h
	drivers/gpu/drm/amd/include/kgd_kfd_interface.h
	drivers/gpu/drm/i915/intel_runtime_pm.c
	drivers/gpu/drm/radeon/radeon_kfd.c
This commit is contained in:
Dave Airlie
2015-01-22 10:44:41 +10:00
551 changed files with 5307 additions and 2827 deletions

View File

@@ -12,6 +12,5 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_kernel_queue_vi.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o \
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
kfd_interrupt.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o

View File

@@ -31,7 +31,6 @@
#include <uapi/linux/kfd_ioctl.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
#include <uapi/asm-generic/mman-common.h>
#include <asm/processor.h>
#include "kfd_priv.h"
@@ -127,17 +126,14 @@ static int kfd_open(struct inode *inode, struct file *filep)
return 0;
}
static long kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
void __user *arg)
static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_get_version_args args;
struct kfd_ioctl_get_version_args *args = data;
int err = 0;
args.major_version = KFD_IOCTL_MAJOR_VERSION;
args.minor_version = KFD_IOCTL_MINOR_VERSION;
if (copy_to_user(arg, &args, sizeof(args)))
err = -EFAULT;
args->major_version = KFD_IOCTL_MAJOR_VERSION;
args->minor_version = KFD_IOCTL_MINOR_VERSION;
return err;
}
@@ -249,10 +245,10 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
return 0;
}
static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
void __user *arg)
static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_create_queue_args args;
struct kfd_ioctl_create_queue_args *args = data;
struct kfd_dev *dev;
int err = 0;
unsigned int queue_id;
@@ -261,19 +257,16 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
memset(&q_properties, 0, sizeof(struct queue_properties));
if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;
pr_debug("kfd: creating queue ioctl\n");
err = set_queue_properties_from_user(&q_properties, &args);
err = set_queue_properties_from_user(&q_properties, args);
if (err)
return err;
pr_debug("kfd: looking for gpu id 0x%x\n", args.gpu_id);
dev = kfd_device_by_id(args.gpu_id);
pr_debug("kfd: looking for gpu id 0x%x\n", args->gpu_id);
dev = kfd_device_by_id(args->gpu_id);
if (dev == NULL) {
pr_debug("kfd: gpu id 0x%x was not found\n", args.gpu_id);
pr_debug("kfd: gpu id 0x%x was not found\n", args->gpu_id);
return -EINVAL;
}
@@ -281,7 +274,7 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
err = PTR_ERR(pdd);
err = -ESRCH;
goto err_bind_process;
}
@@ -294,33 +287,26 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
if (err != 0)
goto err_create_queue;
args.queue_id = queue_id;
args->queue_id = queue_id;
/* Return gpu_id as doorbell offset for mmap usage */
args.doorbell_offset = args.gpu_id << PAGE_SHIFT;
if (copy_to_user(arg, &args, sizeof(args))) {
err = -EFAULT;
goto err_copy_args_out;
}
args->doorbell_offset = args->gpu_id << PAGE_SHIFT;
mutex_unlock(&p->mutex);
pr_debug("kfd: queue id %d was created successfully\n", args.queue_id);
pr_debug("kfd: queue id %d was created successfully\n", args->queue_id);
pr_debug("ring buffer address == 0x%016llX\n",
args.ring_base_address);
args->ring_base_address);
pr_debug("read ptr address == 0x%016llX\n",
args.read_pointer_address);
args->read_pointer_address);
pr_debug("write ptr address == 0x%016llX\n",
args.write_pointer_address);
args->write_pointer_address);
return 0;
err_copy_args_out:
pqm_destroy_queue(&p->pqm, queue_id);
err_create_queue:
err_bind_process:
mutex_unlock(&p->mutex);
@@ -328,99 +314,90 @@ err_bind_process:
}
static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
void __user *arg)
void *data)
{
int retval;
struct kfd_ioctl_destroy_queue_args args;
if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;
struct kfd_ioctl_destroy_queue_args *args = data;
pr_debug("kfd: destroying queue id %d for PASID %d\n",
args.queue_id,
args->queue_id,
p->pasid);
mutex_lock(&p->mutex);
retval = pqm_destroy_queue(&p->pqm, args.queue_id);
retval = pqm_destroy_queue(&p->pqm, args->queue_id);
mutex_unlock(&p->mutex);
return retval;
}
static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
void __user *arg)
void *data)
{
int retval;
struct kfd_ioctl_update_queue_args args;
struct kfd_ioctl_update_queue_args *args = data;
struct queue_properties properties;
if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;
if (args.queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
return -EINVAL;
}
if (args.queue_priority > KFD_MAX_QUEUE_PRIORITY) {
if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
return -EINVAL;
}
if ((args.ring_base_address) &&
if ((args->ring_base_address) &&
(!access_ok(VERIFY_WRITE,
(const void __user *) args.ring_base_address,
(const void __user *) args->ring_base_address,
sizeof(uint64_t)))) {
pr_err("kfd: can't access ring base address\n");
return -EFAULT;
}
if (!is_power_of_2(args.ring_size) && (args.ring_size != 0)) {
if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
pr_err("kfd: ring size must be a power of 2 or 0\n");
return -EINVAL;
}
properties.queue_address = args.ring_base_address;
properties.queue_size = args.ring_size;
properties.queue_percent = args.queue_percentage;
properties.priority = args.queue_priority;
properties.queue_address = args->ring_base_address;
properties.queue_size = args->ring_size;
properties.queue_percent = args->queue_percentage;
properties.priority = args->queue_priority;
pr_debug("kfd: updating queue id %d for PASID %d\n",
args.queue_id, p->pasid);
args->queue_id, p->pasid);
mutex_lock(&p->mutex);
retval = pqm_update_queue(&p->pqm, args.queue_id, &properties);
retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
mutex_unlock(&p->mutex);
return retval;
}
static long kfd_ioctl_set_memory_policy(struct file *filep,
struct kfd_process *p, void __user *arg)
static int kfd_ioctl_set_memory_policy(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_set_memory_policy_args args;
struct kfd_ioctl_set_memory_policy_args *args = data;
struct kfd_dev *dev;
int err = 0;
struct kfd_process_device *pdd;
enum cache_policy default_policy, alternate_policy;
if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;
if (args.default_policy != KFD_IOC_CACHE_POLICY_COHERENT
&& args.default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
&& args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
return -EINVAL;
}
if (args.alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
&& args.alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
&& args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
return -EINVAL;
}
dev = kfd_device_by_id(args.gpu_id);
dev = kfd_device_by_id(args->gpu_id);
if (dev == NULL)
return -EINVAL;
@@ -428,23 +405,23 @@ static long kfd_ioctl_set_memory_policy(struct file *filep,
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
err = PTR_ERR(pdd);
err = -ESRCH;
goto out;
}
default_policy = (args.default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
? cache_policy_coherent : cache_policy_noncoherent;
alternate_policy =
(args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
? cache_policy_coherent : cache_policy_noncoherent;
if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
&pdd->qpd,
default_policy,
alternate_policy,
(void __user *)args.alternate_aperture_base,
args.alternate_aperture_size))
(void __user *)args->alternate_aperture_base,
args->alternate_aperture_size))
err = -EINVAL;
out:
@@ -453,53 +430,44 @@ out:
return err;
}
static long kfd_ioctl_get_clock_counters(struct file *filep,
struct kfd_process *p, void __user *arg)
static int kfd_ioctl_get_clock_counters(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_get_clock_counters_args args;
struct kfd_ioctl_get_clock_counters_args *args = data;
struct kfd_dev *dev;
struct timespec time;
if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;
dev = kfd_device_by_id(args.gpu_id);
dev = kfd_device_by_id(args->gpu_id);
if (dev == NULL)
return -EINVAL;
/* Reading GPU clock counter from KGD */
args.gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd);
args->gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd);
/* No access to rdtsc. Using raw monotonic time */
getrawmonotonic(&time);
args.cpu_clock_counter = (uint64_t)timespec_to_ns(&time);
args->cpu_clock_counter = (uint64_t)timespec_to_ns(&time);
get_monotonic_boottime(&time);
args.system_clock_counter = (uint64_t)timespec_to_ns(&time);
args->system_clock_counter = (uint64_t)timespec_to_ns(&time);
/* Since the counter is in nano-seconds we use 1GHz frequency */
args.system_clock_freq = 1000000000;
if (copy_to_user(arg, &args, sizeof(args)))
return -EFAULT;
args->system_clock_freq = 1000000000;
return 0;
}
static int kfd_ioctl_get_process_apertures(struct file *filp,
struct kfd_process *p, void __user *arg)
struct kfd_process *p, void *data)
{
struct kfd_ioctl_get_process_apertures_args args;
struct kfd_ioctl_get_process_apertures_args *args = data;
struct kfd_process_device_apertures *pAperture;
struct kfd_process_device *pdd;
dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;
args.num_of_nodes = 0;
args->num_of_nodes = 0;
mutex_lock(&p->mutex);
@@ -508,7 +476,8 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
/* Run over all pdd of the process */
pdd = kfd_get_first_process_device_data(p);
do {
pAperture = &args.process_apertures[args.num_of_nodes];
pAperture =
&args->process_apertures[args->num_of_nodes];
pAperture->gpu_id = pdd->dev->id;
pAperture->lds_base = pdd->lds_base;
pAperture->lds_limit = pdd->lds_limit;
@@ -518,7 +487,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
pAperture->scratch_limit = pdd->scratch_limit;
dev_dbg(kfd_device,
"node id %u\n", args.num_of_nodes);
"node id %u\n", args->num_of_nodes);
dev_dbg(kfd_device,
"gpu id %u\n", pdd->dev->id);
dev_dbg(kfd_device,
@@ -534,80 +503,131 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
dev_dbg(kfd_device,
"scratch_limit %llX\n", pdd->scratch_limit);
args.num_of_nodes++;
args->num_of_nodes++;
} while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL &&
(args.num_of_nodes < NUM_OF_SUPPORTED_GPUS));
(args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
}
mutex_unlock(&p->mutex);
if (copy_to_user(arg, &args, sizeof(args)))
return -EFAULT;
return 0;
}
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl}
/** Ioctl table */
static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
kfd_ioctl_get_version, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
kfd_ioctl_create_queue, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
kfd_ioctl_destroy_queue, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
kfd_ioctl_set_memory_policy, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
kfd_ioctl_get_clock_counters, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
kfd_ioctl_get_process_apertures, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
kfd_ioctl_update_queue, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct kfd_process *process;
long err = -EINVAL;
amdkfd_ioctl_t *func;
const struct amdkfd_ioctl_desc *ioctl = NULL;
unsigned int nr = _IOC_NR(cmd);
char stack_kdata[128];
char *kdata = NULL;
unsigned int usize, asize;
int retcode = -EINVAL;
dev_dbg(kfd_device,
"ioctl cmd 0x%x (#%d), arg 0x%lx\n",
cmd, _IOC_NR(cmd), arg);
if (nr >= AMDKFD_CORE_IOCTL_COUNT)
goto err_i1;
if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
u32 amdkfd_size;
ioctl = &amdkfd_ioctls[nr];
amdkfd_size = _IOC_SIZE(ioctl->cmd);
usize = asize = _IOC_SIZE(cmd);
if (amdkfd_size > asize)
asize = amdkfd_size;
cmd = ioctl->cmd;
} else
goto err_i1;
dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
process = kfd_get_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
switch (cmd) {
case KFD_IOC_GET_VERSION:
err = kfd_ioctl_get_version(filep, process, (void __user *)arg);
break;
case KFD_IOC_CREATE_QUEUE:
err = kfd_ioctl_create_queue(filep, process,
(void __user *)arg);
break;
case KFD_IOC_DESTROY_QUEUE:
err = kfd_ioctl_destroy_queue(filep, process,
(void __user *)arg);
break;
case KFD_IOC_SET_MEMORY_POLICY:
err = kfd_ioctl_set_memory_policy(filep, process,
(void __user *)arg);
break;
case KFD_IOC_GET_CLOCK_COUNTERS:
err = kfd_ioctl_get_clock_counters(filep, process,
(void __user *)arg);
break;
case KFD_IOC_GET_PROCESS_APERTURES:
err = kfd_ioctl_get_process_apertures(filep, process,
(void __user *)arg);
break;
case KFD_IOC_UPDATE_QUEUE:
err = kfd_ioctl_update_queue(filep, process,
(void __user *)arg);
break;
default:
dev_err(kfd_device,
"unknown ioctl cmd 0x%x, arg 0x%lx)\n",
cmd, arg);
err = -EINVAL;
break;
if (IS_ERR(process)) {
dev_dbg(kfd_device, "no process\n");
goto err_i1;
}
if (err < 0)
dev_err(kfd_device,
"ioctl error %ld for ioctl cmd 0x%x (#%d)\n",
err, cmd, _IOC_NR(cmd));
/* Do not trust userspace, use our own definition */
func = ioctl->func;
return err;
if (unlikely(!func)) {
dev_dbg(kfd_device, "no function\n");
retcode = -EINVAL;
goto err_i1;
}
if (cmd & (IOC_IN | IOC_OUT)) {
if (asize <= sizeof(stack_kdata)) {
kdata = stack_kdata;
} else {
kdata = kmalloc(asize, GFP_KERNEL);
if (!kdata) {
retcode = -ENOMEM;
goto err_i1;
}
}
if (asize > usize)
memset(kdata + usize, 0, asize - usize);
}
if (cmd & IOC_IN) {
if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
retcode = -EFAULT;
goto err_i1;
}
} else if (cmd & IOC_OUT) {
memset(kdata, 0, usize);
}
retcode = func(filep, process, kdata);
if (cmd & IOC_OUT)
if (copy_to_user((void __user *)arg, kdata, usize) != 0)
retcode = -EFAULT;
err_i1:
if (!ioctl)
dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
task_pid_nr(current), cmd, nr);
if (kdata != stack_kdata)
kfree(kdata);
if (retcode)
dev_dbg(kfd_device, "ret = %d\n", retcode);
return retcode;
}
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)

View File

@@ -229,13 +229,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_topology_add_device_error;
}
if (kfd_interrupt_init(kfd)) {
dev_err(kfd_device,
"Error initializing interrupts for device (%x:%x)\n",
kfd->pdev->vendor, kfd->pdev->device);
goto kfd_interrupt_error;
}
if (!device_iommu_pasid_init(kfd)) {
dev_err(kfd_device,
"Error initializing iommuv2 for device (%x:%x)\n",
@@ -274,8 +267,6 @@ dqm_start_error:
device_queue_manager_error:
amd_iommu_free_device(kfd->pdev);
device_iommu_pasid_error:
kfd_interrupt_exit(kfd);
kfd_interrupt_error:
kfd_topology_remove_device(kfd);
kfd_topology_add_device_error:
kfd_gtt_sa_fini(kfd);
@@ -293,7 +284,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
if (kfd->init_complete) {
device_queue_manager_uninit(kfd->dqm);
amd_iommu_free_device(kfd->pdev);
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);
kfd_gtt_sa_fini(kfd);
kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
@@ -337,15 +327,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
if (kfd->init_complete) {
spin_lock(&kfd->interrupt_lock);
if (kfd->interrupts_active
&& enqueue_ih_ring_entry(kfd, ih_ring_entry))
schedule_work(&kfd->interrupt_work);
spin_unlock(&kfd->interrupt_lock);
}
/* Process interrupts / schedule work as necessary */
}
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,

View File

@@ -138,6 +138,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
{
int bit = qpd->vmid - KFD_VMID_START_OFFSET;
/* Release the vmid mapping */
set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
qpd->vmid = 0;
q->properties.vmid = 0;
@@ -253,6 +256,18 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
return retval;
}
pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n",
q->pipe,
q->queue);
retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
q->queue, (uint32_t __user *) q->properties.write_ptr);
if (retval != 0) {
deallocate_hqd(dqm, q);
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
return retval;
}
return 0;
}
@@ -311,6 +326,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
{
int retval;
struct mqd_manager *mqd;
bool prev_active = false;
BUG_ON(!dqm || !q || !q->mqd);
@@ -321,10 +337,18 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
return -ENOMEM;
}
retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
if (q->properties.is_active == true)
prev_active = true;
/*
*
* check active state vs. the previous state
* and modify counter accordingly
*/
retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
if ((q->properties.is_active == true) && (prev_active == false))
dqm->queue_count++;
else
else if ((q->properties.is_active == false) && (prev_active == true))
dqm->queue_count--;
if (sched_policy != KFD_SCHED_POLICY_NO_HWS)

View File

@@ -1,176 +0,0 @@
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* KFD Interrupts.
*
* AMD GPUs deliver interrupts by pushing an interrupt description onto the
* interrupt ring and then sending an interrupt. KGD receives the interrupt
* in ISR and sends us a pointer to each new entry on the interrupt ring.
*
* We generally can't process interrupt-signaled events from ISR, so we call
* out to each interrupt client module (currently only the scheduler) to ask if
* each interrupt is interesting. If they return true, then it requires further
* processing so we copy it to an internal interrupt ring and call each
* interrupt client again from a work-queue.
*
* There's no acknowledgment for the interrupts we use. The hardware simply
* queues a new interrupt each time without waiting.
*
* The fixed-size internal queue means that it's possible for us to lose
* interrupts because we have no back-pressure to the hardware.
*/
#include <linux/slab.h>
#include <linux/device.h>
#include "kfd_priv.h"
#define KFD_INTERRUPT_RING_SIZE 256
static void interrupt_wq(struct work_struct *);
int kfd_interrupt_init(struct kfd_dev *kfd)
{
void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE,
kfd->device_info->ih_ring_entry_size,
GFP_KERNEL);
if (!interrupt_ring)
return -ENOMEM;
kfd->interrupt_ring = interrupt_ring;
kfd->interrupt_ring_size =
KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size;
atomic_set(&kfd->interrupt_ring_wptr, 0);
atomic_set(&kfd->interrupt_ring_rptr, 0);
spin_lock_init(&kfd->interrupt_lock);
INIT_WORK(&kfd->interrupt_work, interrupt_wq);
kfd->interrupts_active = true;
/*
* After this function returns, the interrupt will be enabled. This
* barrier ensures that the interrupt running on a different processor
* sees all the above writes.
*/
smp_wmb();
return 0;
}
void kfd_interrupt_exit(struct kfd_dev *kfd)
{
/*
* Stop the interrupt handler from writing to the ring and scheduling
* workqueue items. The spinlock ensures that any interrupt running
* after we have unlocked sees interrupts_active = false.
*/
unsigned long flags;
spin_lock_irqsave(&kfd->interrupt_lock, flags);
kfd->interrupts_active = false;
spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
/*
* Flush_scheduled_work ensures that there are no outstanding
* work-queue items that will access interrupt_ring. New work items
* can't be created because we stopped interrupt handling above.
*/
flush_scheduled_work();
kfree(kfd->interrupt_ring);
}
/*
* This assumes that it can't be called concurrently with itself
* but only with dequeue_ih_ring_entry.
*/
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
{
unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
if ((rptr - wptr) % kfd->interrupt_ring_size ==
kfd->device_info->ih_ring_entry_size) {
/* This is very bad, the system is likely to hang. */
dev_err_ratelimited(kfd_chardev(),
"Interrupt ring overflow, dropping interrupt.\n");
return false;
}
memcpy(kfd->interrupt_ring + wptr, ih_ring_entry,
kfd->device_info->ih_ring_entry_size);
wptr = (wptr + kfd->device_info->ih_ring_entry_size) %
kfd->interrupt_ring_size;
smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */
atomic_set(&kfd->interrupt_ring_wptr, wptr);
return true;
}
/*
* This assumes that it can't be called concurrently with itself
* but only with enqueue_ih_ring_entry.
*/
static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
{
/*
* Assume that wait queues have an implicit barrier, i.e. anything that
* happened in the ISR before it queued work is visible.
*/
unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
if (rptr == wptr)
return false;
memcpy(ih_ring_entry, kfd->interrupt_ring + rptr,
kfd->device_info->ih_ring_entry_size);
rptr = (rptr + kfd->device_info->ih_ring_entry_size) %
kfd->interrupt_ring_size;
/*
* Ensure the rptr write update is not visible until
* memcpy has finished reading.
*/
smp_mb();
atomic_set(&kfd->interrupt_ring_rptr, rptr);
return true;
}
static void interrupt_wq(struct work_struct *work)
{
struct kfd_dev *dev = container_of(work, struct kfd_dev,
interrupt_work);
uint32_t ih_ring_entry[DIV_ROUND_UP(
dev->device_info->ih_ring_entry_size,
sizeof(uint32_t))];
while (dequeue_ih_ring_entry(dev, ih_ring_entry))
;
}

View File

@@ -264,7 +264,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,
uint32_t queue_id)
{
return kfd2kgd->hqd_is_occupies(mm->dev->kgd, queue_address,
return kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address,
pipe_id, queue_id);
}

View File

@@ -32,7 +32,7 @@ int kfd_pasid_init(void)
{
pasid_limit = max_num_of_processes;
pasid_bitmap = kzalloc(BITS_TO_LONGS(pasid_limit), GFP_KERNEL);
pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL);
if (!pasid_bitmap)
return -ENOMEM;

View File

@@ -157,22 +157,10 @@ struct kfd_dev {
unsigned int gtt_sa_chunk_size;
unsigned int gtt_sa_num_of_chunks;
void *interrupt_ring;
size_t interrupt_ring_size;
atomic_t interrupt_ring_rptr;
atomic_t interrupt_ring_wptr;
struct work_struct interrupt_work;
spinlock_t interrupt_lock;
/* QCM Device instance */
struct device_queue_manager *dqm;
bool init_complete;
/*
* Interrupts of interest to KFD are copied
* from the HW ring into a SW ring.
*/
bool interrupts_active;
};
/* KGD2KFD callbacks */
@@ -490,6 +478,24 @@ struct kfd_process {
bool is_32bit_user_mode;
};
/**
* Ioctl function type.
*
* \param filep pointer to file structure.
* \param p amdkfd process pointer.
* \param data pointer to arg that was copied from user.
*/
typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p,
void *data);
struct amdkfd_ioctl_desc {
unsigned int cmd;
int flags;
amdkfd_ioctl_t *func;
unsigned int cmd_drv;
const char *name;
};
void kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
struct kfd_process *kfd_create_process(const struct task_struct *);
@@ -548,10 +554,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
/* Interrupts */
int kfd_interrupt_init(struct kfd_dev *dev);
void kfd_interrupt_exit(struct kfd_dev *dev);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
/* Power Management */
void kgd2kfd_suspend(struct kfd_dev *kfd);

View File

@@ -934,7 +934,7 @@ static int kfd_build_sysfs_node_tree(void)
uint32_t i = 0;
list_for_each_entry(dev, &topology_device_list, list) {
ret = kfd_build_sysfs_node_entry(dev, 0);
ret = kfd_build_sysfs_node_entry(dev, i);
if (ret < 0)
return ret;
i++;

View File

@@ -180,7 +180,7 @@ struct kfd2kgd_calls {
int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd);
bool (*hqd_is_occupies)(struct kgd_dev *kgd, uint64_t queue_address,
bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id);
int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type,