drm/amdgpu: save vm fault information for amdkfd
amdgpu save the vm fault related information for KFD usage and keep the copy until KFD read it. Signed-off-by: shaoyun liu <shaoyun.liu@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
@@ -183,6 +183,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
|
|||||||
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
|
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
|
||||||
struct dma_fence **ef);
|
struct dma_fence **ef);
|
||||||
|
|
||||||
|
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
|
||||||
|
struct kfd_vm_fault_info *info);
|
||||||
|
|
||||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
|
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
|
||||||
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
|
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
|
||||||
|
|
||||||
|
@@ -216,6 +216,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
|||||||
.invalidate_tlbs = invalidate_tlbs,
|
.invalidate_tlbs = invalidate_tlbs,
|
||||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||||
|
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
|
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
|
||||||
|
@@ -176,6 +176,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
|||||||
.invalidate_tlbs = invalidate_tlbs,
|
.invalidate_tlbs = invalidate_tlbs,
|
||||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||||
|
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
|
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
|
||||||
|
@@ -1621,6 +1621,20 @@ bo_reserve_failed:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
|
||||||
|
struct kfd_vm_fault_info *mem)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev;
|
||||||
|
|
||||||
|
adev = (struct amdgpu_device *)kgd;
|
||||||
|
if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
|
||||||
|
*mem = *adev->gmc.vm_fault_info;
|
||||||
|
mb();
|
||||||
|
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Evict a userptr BO by stopping the queues if necessary
|
/* Evict a userptr BO by stopping the queues if necessary
|
||||||
*
|
*
|
||||||
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
|
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
|
||||||
|
@@ -105,6 +105,8 @@ struct amdgpu_gmc {
|
|||||||
/* protects concurrent invalidation */
|
/* protects concurrent invalidation */
|
||||||
spinlock_t invalidate_lock;
|
spinlock_t invalidate_lock;
|
||||||
bool translate_further;
|
bool translate_further;
|
||||||
|
struct kfd_vm_fault_info *vm_fault_info;
|
||||||
|
atomic_t vm_fault_info_updated;
|
||||||
|
|
||||||
const struct amdgpu_gmc_funcs *gmc_funcs;
|
const struct amdgpu_gmc_funcs *gmc_funcs;
|
||||||
};
|
};
|
||||||
|
@@ -28,6 +28,7 @@
|
|||||||
#include "cik.h"
|
#include "cik.h"
|
||||||
#include "gmc_v7_0.h"
|
#include "gmc_v7_0.h"
|
||||||
#include "amdgpu_ucode.h"
|
#include "amdgpu_ucode.h"
|
||||||
|
#include "amdgpu_amdkfd.h"
|
||||||
|
|
||||||
#include "bif/bif_4_1_d.h"
|
#include "bif/bif_4_1_d.h"
|
||||||
#include "bif/bif_4_1_sh_mask.h"
|
#include "bif/bif_4_1_sh_mask.h"
|
||||||
@@ -1078,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle)
|
|||||||
adev->vm_manager.vram_base_offset = 0;
|
adev->vm_manager.vram_base_offset = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!adev->gmc.vm_fault_info)
|
||||||
|
return -ENOMEM;
|
||||||
|
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1087,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle)
|
|||||||
|
|
||||||
amdgpu_gem_force_release(adev);
|
amdgpu_gem_force_release(adev);
|
||||||
amdgpu_vm_manager_fini(adev);
|
amdgpu_vm_manager_fini(adev);
|
||||||
|
kfree(adev->gmc.vm_fault_info);
|
||||||
gmc_v7_0_gart_fini(adev);
|
gmc_v7_0_gart_fini(adev);
|
||||||
amdgpu_bo_fini(adev);
|
amdgpu_bo_fini(adev);
|
||||||
release_firmware(adev->gmc.fw);
|
release_firmware(adev->gmc.fw);
|
||||||
@@ -1276,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
|
|||||||
struct amdgpu_irq_src *source,
|
struct amdgpu_irq_src *source,
|
||||||
struct amdgpu_iv_entry *entry)
|
struct amdgpu_iv_entry *entry)
|
||||||
{
|
{
|
||||||
u32 addr, status, mc_client;
|
u32 addr, status, mc_client, vmid;
|
||||||
|
|
||||||
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
|
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
|
||||||
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
|
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
|
||||||
@@ -1301,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
|
|||||||
entry->pasid);
|
entry->pasid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
|
||||||
|
VMID);
|
||||||
|
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
|
||||||
|
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
|
||||||
|
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
|
||||||
|
u32 protections = REG_GET_FIELD(status,
|
||||||
|
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
|
||||||
|
PROTECTIONS);
|
||||||
|
|
||||||
|
info->vmid = vmid;
|
||||||
|
info->mc_id = REG_GET_FIELD(status,
|
||||||
|
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
|
||||||
|
MEMORY_CLIENT_ID);
|
||||||
|
info->status = status;
|
||||||
|
info->page_addr = addr;
|
||||||
|
info->prot_valid = protections & 0x7 ? true : false;
|
||||||
|
info->prot_read = protections & 0x8 ? true : false;
|
||||||
|
info->prot_write = protections & 0x10 ? true : false;
|
||||||
|
info->prot_exec = protections & 0x20 ? true : false;
|
||||||
|
mb();
|
||||||
|
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -26,6 +26,7 @@
|
|||||||
#include "amdgpu.h"
|
#include "amdgpu.h"
|
||||||
#include "gmc_v8_0.h"
|
#include "gmc_v8_0.h"
|
||||||
#include "amdgpu_ucode.h"
|
#include "amdgpu_ucode.h"
|
||||||
|
#include "amdgpu_amdkfd.h"
|
||||||
|
|
||||||
#include "gmc/gmc_8_1_d.h"
|
#include "gmc/gmc_8_1_d.h"
|
||||||
#include "gmc/gmc_8_1_sh_mask.h"
|
#include "gmc/gmc_8_1_sh_mask.h"
|
||||||
@@ -1182,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle)
|
|||||||
adev->vm_manager.vram_base_offset = 0;
|
adev->vm_manager.vram_base_offset = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!adev->gmc.vm_fault_info)
|
||||||
|
return -ENOMEM;
|
||||||
|
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1191,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle)
|
|||||||
|
|
||||||
amdgpu_gem_force_release(adev);
|
amdgpu_gem_force_release(adev);
|
||||||
amdgpu_vm_manager_fini(adev);
|
amdgpu_vm_manager_fini(adev);
|
||||||
|
kfree(adev->gmc.vm_fault_info);
|
||||||
gmc_v8_0_gart_fini(adev);
|
gmc_v8_0_gart_fini(adev);
|
||||||
amdgpu_bo_fini(adev);
|
amdgpu_bo_fini(adev);
|
||||||
release_firmware(adev->gmc.fw);
|
release_firmware(adev->gmc.fw);
|
||||||
@@ -1426,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
|
|||||||
struct amdgpu_irq_src *source,
|
struct amdgpu_irq_src *source,
|
||||||
struct amdgpu_iv_entry *entry)
|
struct amdgpu_iv_entry *entry)
|
||||||
{
|
{
|
||||||
u32 addr, status, mc_client;
|
u32 addr, status, mc_client, vmid;
|
||||||
|
|
||||||
if (amdgpu_sriov_vf(adev)) {
|
if (amdgpu_sriov_vf(adev)) {
|
||||||
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
|
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
|
||||||
@@ -1463,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
|
|||||||
entry->pasid);
|
entry->pasid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
|
||||||
|
VMID);
|
||||||
|
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
|
||||||
|
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
|
||||||
|
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
|
||||||
|
u32 protections = REG_GET_FIELD(status,
|
||||||
|
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
|
||||||
|
PROTECTIONS);
|
||||||
|
|
||||||
|
info->vmid = vmid;
|
||||||
|
info->mc_id = REG_GET_FIELD(status,
|
||||||
|
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
|
||||||
|
MEMORY_CLIENT_ID);
|
||||||
|
info->status = status;
|
||||||
|
info->page_addr = addr;
|
||||||
|
info->prot_valid = protections & 0x7 ? true : false;
|
||||||
|
info->prot_read = protections & 0x8 ? true : false;
|
||||||
|
info->prot_write = protections & 0x10 ? true : false;
|
||||||
|
info->prot_exec = protections & 0x20 ? true : false;
|
||||||
|
mb();
|
||||||
|
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -47,6 +47,17 @@ enum kfd_preempt_type {
|
|||||||
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
|
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct kfd_vm_fault_info {
|
||||||
|
uint64_t page_addr;
|
||||||
|
uint32_t vmid;
|
||||||
|
uint32_t mc_id;
|
||||||
|
uint32_t status;
|
||||||
|
bool prot_valid;
|
||||||
|
bool prot_read;
|
||||||
|
bool prot_write;
|
||||||
|
bool prot_exec;
|
||||||
|
};
|
||||||
|
|
||||||
struct kfd_cu_info {
|
struct kfd_cu_info {
|
||||||
uint32_t num_shader_engines;
|
uint32_t num_shader_engines;
|
||||||
uint32_t num_shader_arrays_per_engine;
|
uint32_t num_shader_arrays_per_engine;
|
||||||
@@ -259,6 +270,12 @@ struct tile_config {
|
|||||||
* IB to the corresponding ring (ring type). The IB is executed with the
|
* IB to the corresponding ring (ring type). The IB is executed with the
|
||||||
* specified VMID in a user mode context.
|
* specified VMID in a user mode context.
|
||||||
*
|
*
|
||||||
|
* @get_vm_fault_info: Return information about a recent VM fault on
|
||||||
|
* GFXv7 and v8. If multiple VM faults occurred since the last call of
|
||||||
|
* this function, it will return information about the first of those
|
||||||
|
* faults. On GFXv9 VM fault information is fully contained in the IH
|
||||||
|
* packet and this function is not needed.
|
||||||
|
*
|
||||||
* This structure contains function pointers to services that the kgd driver
|
* This structure contains function pointers to services that the kgd driver
|
||||||
* provides to amdkfd driver.
|
* provides to amdkfd driver.
|
||||||
*
|
*
|
||||||
@@ -374,6 +391,9 @@ struct kfd2kgd_calls {
|
|||||||
int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine,
|
int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine,
|
||||||
uint32_t vmid, uint64_t gpu_addr,
|
uint32_t vmid, uint64_t gpu_addr,
|
||||||
uint32_t *ib_cmd, uint32_t ib_len);
|
uint32_t *ib_cmd, uint32_t ib_len);
|
||||||
|
|
||||||
|
int (*get_vm_fault_info)(struct kgd_dev *kgd,
|
||||||
|
struct kfd_vm_fault_info *info);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Reference in New Issue
Block a user