Merge tag 'drm-next-2020-10-15' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Not a major amount of change, the i915 trees got split into display and gt trees to better facilitate higher level review, and there's a major refactoring of i915 GEM locking to use more core kernel concepts (like ww-mutexes). msm gets per-process pagetables, older AMD SI cards get DC support, nouveau got a bump in displayport support with common code extraction from i915. Outside of drm this contains a couple of patches for hexint moduleparams which you've acked, and a virtio common code tree that you should also get via it's regular path. New driver: - Cadence MHDP8546 DisplayPort bridge driver core: - cross-driver scatterlist cleanups - devm_drm conversions - remove drm_dev_init - devm_drm_dev_alloc conversion ttm: - lots of refactoring and cleanups bridges: - chained bridge support in more drivers panel: - misc new panels scheduler: - cleanup priority levels displayport: - refactor i915 code into helpers for nouveau i915: - split into display and GT trees - WW locking refactoring in GEM - execbuf2 extension mechanism - syncobj timeline support - GEN 12 HOBL display powersaving - Rocket Lake display additions - Disable FBC on Tigerlake - Tigerlake Type-C + DP improvements - Hotplug interrupt refactoring amdgpu: - Sienna Cichlid updates - Navy Flounder updates - DCE6 (SI) support for DC - Plane rotation enabled - TMZ state info ioctl - PCIe DPC recovery support - DC interrupt handling refactor - OLED panel fixes amdkfd: - add SMI events for thermal throttling - SMI interface events ioctl update - process eviction counters radeon: - move to dma_ for allocations - expose sclk via sysfs msm: - DSI support for sm8150/sm8250 - per-process GPU pagetable support - Displayport support mediatek: - move HDMI phy driver to PHY - convert mtk-dpi to bridge API - disable mt2701 tmds tegra: - bridge support exynos: - misc cleanups vc4: - dual display cleanups ast: - cleanups gma500: - conversion to GPIOd API hisilicon: - misc reworks ingenic: - clock handling and format improvements mcde: - DSI support mgag200: - desktop g200 support mxsfb: - i.MX7 + i.MX8M - alpha plane support panfrost: - devfreq support - amlogic SoC support ps8640: - EDID from eDP retrieval tidss: - AM65xx YUV workaround virtio: - virtio-gpu exported resources rcar-du: - R8A7742, R8A774E1 and R8A77961 support - YUV planar format fixes - non-visible plane handling - VSP device reference count fix - Kconfig fix to avoid displaying disabled options in .config" * tag 'drm-next-2020-10-15' of git://anongit.freedesktop.org/drm/drm: (1494 commits) drm/ingenic: Fix bad revert drm/amdgpu: Fix invalid number of character '{' in amdgpu_acpi_init drm/amdgpu: Remove warning for virtual_display drm/amdgpu: kfd_initialized can be static drm/amd/pm: setup APU dpm clock table in SMU HW initialization drm/amdgpu: prevent spurious warning drm/amdgpu/swsmu: fix ARC build errors drm/amd/display: Fix OPTC_DATA_FORMAT programming drm/amd/display: Don't allow pstate if no support in blank drm/panfrost: increase readl_relaxed_poll_timeout values MAINTAINERS: Update entry for st7703 driver after the rename Revert "gpu/drm: ingenic: Add option to mmap GEM buffers cached" drm/amd/display: HDMI remote sink need mode validation for Linux drm/amd/display: Change to correct unit on audio rate drm/amd/display: Avoid set zero in the requested clk drm/amdgpu: align frag_end to covered address space drm/amdgpu: fix NULL pointer dereference for Renoir drm/vmwgfx: fix regression in thp code due to ttm init refactor. drm/amdgpu/swsmu: add interrupt work handler for smu11 parts drm/amdgpu/swsmu: add interrupt work function ...
This commit is contained in:
@@ -911,7 +911,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
|
||||
0x705d0000, 0x807c817c,
|
||||
0x8070ff70, 0x00000080,
|
||||
0xbf0a7b7c, 0xbf85fff8,
|
||||
0xbf82014f, 0xbef4037e,
|
||||
0xbf820151, 0xbef4037e,
|
||||
0x8775ff7f, 0x0000ffff,
|
||||
0x8875ff75, 0x00040000,
|
||||
0xbef60380, 0xbef703ff,
|
||||
@@ -1024,61 +1024,62 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
|
||||
0xbe883108, 0xbe8a310a,
|
||||
0xbe8c310c, 0xbe8e310e,
|
||||
0xbf06807c, 0xbf84fff0,
|
||||
0xb9782a05, 0x80788178,
|
||||
0xbf0d9972, 0xbf850002,
|
||||
0x8f788978, 0xbf820001,
|
||||
0x8f788a78, 0xb96e1e06,
|
||||
0x8f6e8a6e, 0x80786e78,
|
||||
0x8078ff78, 0x00000200,
|
||||
0xbef603ff, 0x01000000,
|
||||
0xf4211bfa, 0xf0000000,
|
||||
0x80788478, 0xf4211b3a,
|
||||
0xba80f801, 0x00000000,
|
||||
0xbf8a0000, 0xb9782a05,
|
||||
0x80788178, 0xbf0d9972,
|
||||
0xbf850002, 0x8f788978,
|
||||
0xbf820001, 0x8f788a78,
|
||||
0xb96e1e06, 0x8f6e8a6e,
|
||||
0x80786e78, 0x8078ff78,
|
||||
0x00000200, 0xbef603ff,
|
||||
0x01000000, 0xf4211bfa,
|
||||
0xf0000000, 0x80788478,
|
||||
0xf4211b7a, 0xf0000000,
|
||||
0x80788478, 0xf4211c3a,
|
||||
0xf4211b3a, 0xf0000000,
|
||||
0x80788478, 0xf4211b7a,
|
||||
0xf0000000, 0x80788478,
|
||||
0xf4211c7a, 0xf0000000,
|
||||
0x80788478, 0xf4211eba,
|
||||
0xf4211c3a, 0xf0000000,
|
||||
0x80788478, 0xf4211c7a,
|
||||
0xf0000000, 0x80788478,
|
||||
0xf4211efa, 0xf0000000,
|
||||
0x80788478, 0xf4211e7a,
|
||||
0xf4211eba, 0xf0000000,
|
||||
0x80788478, 0xf4211efa,
|
||||
0xf0000000, 0x80788478,
|
||||
0xf4211cfa, 0xf0000000,
|
||||
0x80788478, 0xf4211bba,
|
||||
0xf4211e7a, 0xf0000000,
|
||||
0x80788478, 0xf4211cfa,
|
||||
0xf0000000, 0x80788478,
|
||||
0xbf8cc07f, 0xb9eef814,
|
||||
0xf4211bba, 0xf0000000,
|
||||
0x80788478, 0xbf8cc07f,
|
||||
0xb9eef815, 0xbefc036f,
|
||||
0xbefe0370, 0xbeff0371,
|
||||
0x876f7bff, 0x000003ff,
|
||||
0xb9ef4803, 0xb9f9f816,
|
||||
0x876f7bff, 0xfffff800,
|
||||
0x906f8b6f, 0xb9efa2c3,
|
||||
0xb9f3f801, 0xb96e2a05,
|
||||
0x806e816e, 0xbf0d9972,
|
||||
0xbf850002, 0x8f6e896e,
|
||||
0xbf820001, 0x8f6e8a6e,
|
||||
0x806eff6e, 0x00000200,
|
||||
0x806e746e, 0x826f8075,
|
||||
0x876fff6f, 0x0000ffff,
|
||||
0xf4091c37, 0xfa000050,
|
||||
0xf4091d37, 0xfa000060,
|
||||
0xf4011e77, 0xfa000074,
|
||||
0xbf8cc07f, 0x876fff6d,
|
||||
0xfc000000, 0x906f9a6f,
|
||||
0x8f6f906f, 0xbeee0380,
|
||||
0xb9eef814, 0xf4211bba,
|
||||
0xf0000000, 0x80788478,
|
||||
0xbf8cc07f, 0xb9eef815,
|
||||
0xbefc036f, 0xbefe0370,
|
||||
0xbeff0371, 0x876f7bff,
|
||||
0x000003ff, 0xb9ef4803,
|
||||
0xb9f9f816, 0x876f7bff,
|
||||
0xfffff800, 0x906f8b6f,
|
||||
0xb9efa2c3, 0xb9f3f801,
|
||||
0xb96e2a05, 0x806e816e,
|
||||
0xbf0d9972, 0xbf850002,
|
||||
0x8f6e896e, 0xbf820001,
|
||||
0x8f6e8a6e, 0x806eff6e,
|
||||
0x00000200, 0x806e746e,
|
||||
0x826f8075, 0x876fff6f,
|
||||
0x0000ffff, 0xf4091c37,
|
||||
0xfa000050, 0xf4091d37,
|
||||
0xfa000060, 0xf4011e77,
|
||||
0xfa000074, 0xbf8cc07f,
|
||||
0x876fff6d, 0xfc000000,
|
||||
0x906f9a6f, 0x8f6f906f,
|
||||
0xbeee0380, 0x886e6f6e,
|
||||
0x876fff6d, 0x02000000,
|
||||
0x906f996f, 0x8f6f8f6f,
|
||||
0x886e6f6e, 0x876fff6d,
|
||||
0x02000000, 0x906f996f,
|
||||
0x8f6f8f6f, 0x886e6f6e,
|
||||
0x876fff6d, 0x01000000,
|
||||
0x906f986f, 0x8f6f996f,
|
||||
0x886e6f6e, 0x876fff7a,
|
||||
0x00800000, 0x906f976f,
|
||||
0xb9eef807, 0x876dff6d,
|
||||
0x0000ffff, 0x87fe7e7e,
|
||||
0x87ea6a6a, 0xb9faf802,
|
||||
0xbf8a0000, 0xbe80226c,
|
||||
0x01000000, 0x906f986f,
|
||||
0x8f6f996f, 0x886e6f6e,
|
||||
0x876fff7a, 0x00800000,
|
||||
0x906f976f, 0xb9eef807,
|
||||
0x876dff6d, 0x0000ffff,
|
||||
0x87fe7e7e, 0x87ea6a6a,
|
||||
0xb9faf802, 0xbe80226c,
|
||||
0xbf810000, 0xbf9f0000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
@@ -1807,7 +1808,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
|
||||
0x705d0000, 0x807c817c,
|
||||
0x8070ff70, 0x00000080,
|
||||
0xbf0a7b7c, 0xbf85fff8,
|
||||
0xbf82013a, 0xbef4037e,
|
||||
0xbf82013c, 0xbef4037e,
|
||||
0x8775ff7f, 0x0000ffff,
|
||||
0x8875ff75, 0x00040000,
|
||||
0xbef60380, 0xbef703ff,
|
||||
@@ -1920,50 +1921,51 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
|
||||
0xbe883108, 0xbe8a310a,
|
||||
0xbe8c310c, 0xbe8e310e,
|
||||
0xbf06807c, 0xbf84fff0,
|
||||
0xb9782a05, 0x80788178,
|
||||
0xbf0d9972, 0xbf850002,
|
||||
0x8f788978, 0xbf820001,
|
||||
0x8f788a78, 0xb96e1e06,
|
||||
0x8f6e8a6e, 0x80786e78,
|
||||
0x8078ff78, 0x00000200,
|
||||
0xbef603ff, 0x01000000,
|
||||
0xf4211bfa, 0xf0000000,
|
||||
0x80788478, 0xf4211b3a,
|
||||
0xba80f801, 0x00000000,
|
||||
0xbf8a0000, 0xb9782a05,
|
||||
0x80788178, 0xbf0d9972,
|
||||
0xbf850002, 0x8f788978,
|
||||
0xbf820001, 0x8f788a78,
|
||||
0xb96e1e06, 0x8f6e8a6e,
|
||||
0x80786e78, 0x8078ff78,
|
||||
0x00000200, 0xbef603ff,
|
||||
0x01000000, 0xf4211bfa,
|
||||
0xf0000000, 0x80788478,
|
||||
0xf4211b7a, 0xf0000000,
|
||||
0x80788478, 0xf4211c3a,
|
||||
0xf4211b3a, 0xf0000000,
|
||||
0x80788478, 0xf4211b7a,
|
||||
0xf0000000, 0x80788478,
|
||||
0xf4211c7a, 0xf0000000,
|
||||
0x80788478, 0xf4211eba,
|
||||
0xf4211c3a, 0xf0000000,
|
||||
0x80788478, 0xf4211c7a,
|
||||
0xf0000000, 0x80788478,
|
||||
0xf4211efa, 0xf0000000,
|
||||
0x80788478, 0xf4211e7a,
|
||||
0xf4211eba, 0xf0000000,
|
||||
0x80788478, 0xf4211efa,
|
||||
0xf0000000, 0x80788478,
|
||||
0xf4211cfa, 0xf0000000,
|
||||
0x80788478, 0xf4211bba,
|
||||
0xf4211e7a, 0xf0000000,
|
||||
0x80788478, 0xf4211cfa,
|
||||
0xf0000000, 0x80788478,
|
||||
0xbf8cc07f, 0xb9eef814,
|
||||
0xf4211bba, 0xf0000000,
|
||||
0x80788478, 0xbf8cc07f,
|
||||
0xb9eef815, 0xbefc036f,
|
||||
0xbefe0370, 0xbeff0371,
|
||||
0x876f7bff, 0x000003ff,
|
||||
0xb9ef4803, 0x876f7bff,
|
||||
0xfffff800, 0x906f8b6f,
|
||||
0xb9efa2c3, 0xb9f3f801,
|
||||
0xb96e2a05, 0x806e816e,
|
||||
0xbf0d9972, 0xbf850002,
|
||||
0x8f6e896e, 0xbf820001,
|
||||
0x8f6e8a6e, 0x806eff6e,
|
||||
0x00000200, 0x806e746e,
|
||||
0x826f8075, 0x876fff6f,
|
||||
0x0000ffff, 0xf4091c37,
|
||||
0xfa000050, 0xf4091d37,
|
||||
0xfa000060, 0xf4011e77,
|
||||
0xfa000074, 0xbf8cc07f,
|
||||
0x876dff6d, 0x0000ffff,
|
||||
0x87fe7e7e, 0x87ea6a6a,
|
||||
0xb9faf802, 0xbf8a0000,
|
||||
0xb9eef814, 0xf4211bba,
|
||||
0xf0000000, 0x80788478,
|
||||
0xbf8cc07f, 0xb9eef815,
|
||||
0xbefc036f, 0xbefe0370,
|
||||
0xbeff0371, 0x876f7bff,
|
||||
0x000003ff, 0xb9ef4803,
|
||||
0x876f7bff, 0xfffff800,
|
||||
0x906f8b6f, 0xb9efa2c3,
|
||||
0xb9f3f801, 0xb96e2a05,
|
||||
0x806e816e, 0xbf0d9972,
|
||||
0xbf850002, 0x8f6e896e,
|
||||
0xbf820001, 0x8f6e8a6e,
|
||||
0x806eff6e, 0x00000200,
|
||||
0x806e746e, 0x826f8075,
|
||||
0x876fff6f, 0x0000ffff,
|
||||
0xf4091c37, 0xfa000050,
|
||||
0xf4091d37, 0xfa000060,
|
||||
0xf4011e77, 0xfa000074,
|
||||
0xbf8cc07f, 0x876dff6d,
|
||||
0x0000ffff, 0x87fe7e7e,
|
||||
0x87ea6a6a, 0xb9faf802,
|
||||
0xbe80226c, 0xbf810000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
|
@@ -894,6 +894,11 @@ L_RESTORE_SGPR:
|
||||
s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0
|
||||
s_cbranch_scc0 L_RESTORE_SGPR_LOOP
|
||||
|
||||
// s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception.
|
||||
// Clear DEBUG_EN before and restore MODE after the barrier.
|
||||
s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0
|
||||
s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG
|
||||
|
||||
/* restore HW registers */
|
||||
L_RESTORE_HWREG:
|
||||
// HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
|
||||
@@ -976,8 +981,6 @@ L_RESTORE_HWREG:
|
||||
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
|
||||
|
||||
s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG
|
||||
|
||||
s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
|
||||
|
||||
L_END_PGM:
|
||||
|
@@ -97,6 +97,7 @@ void kfd_chardev_exit(void)
|
||||
device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
|
||||
class_destroy(kfd_class);
|
||||
unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
|
||||
kfd_device = NULL;
|
||||
}
|
||||
|
||||
struct device *kfd_chardev(void)
|
||||
@@ -1254,7 +1255,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
|
||||
return true;
|
||||
}
|
||||
|
||||
if (dev->device_info->needs_iommu_device)
|
||||
if (dev->use_iommu_v2)
|
||||
return false;
|
||||
|
||||
amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
|
||||
@@ -1290,18 +1291,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
|
||||
if (args->size != kfd_doorbell_process_slice(dev))
|
||||
return -EINVAL;
|
||||
offset = kfd_get_process_doorbells(dev, p);
|
||||
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
|
||||
if (args->size != PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
|
||||
if (!offset)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
|
||||
pdd = kfd_bind_process_to_device(dev, p);
|
||||
@@ -1310,6 +1299,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
|
||||
if (args->size != kfd_doorbell_process_slice(dev)) {
|
||||
err = -EINVAL;
|
||||
goto err_unlock;
|
||||
}
|
||||
offset = kfd_get_process_doorbells(pdd);
|
||||
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
|
||||
if (args->size != PAGE_SIZE) {
|
||||
err = -EINVAL;
|
||||
goto err_unlock;
|
||||
}
|
||||
offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
|
||||
if (!offset) {
|
||||
err = -ENOMEM;
|
||||
goto err_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
dev->kgd, args->va_addr, args->size,
|
||||
pdd->vm, (struct kgd_mem **) &mem, &offset,
|
||||
|
@@ -742,6 +742,22 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool kfd_ignore_crat(void)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
if (ignore_crat)
|
||||
return true;
|
||||
|
||||
#ifndef KFD_SUPPORT_IOMMU_V2
|
||||
ret = true;
|
||||
#else
|
||||
ret = false;
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* kfd_create_crat_image_acpi - Allocates memory for CRAT image and
|
||||
* copies CRAT from ACPI (if available).
|
||||
@@ -776,12 +792,13 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ignore_crat) {
|
||||
if (kfd_ignore_crat()) {
|
||||
pr_info("CRAT table disabled by module option\n");
|
||||
return -ENODATA;
|
||||
}
|
||||
|
||||
pcrat_image = kmemdup(crat_table, crat_table->length, GFP_KERNEL);
|
||||
pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL);
|
||||
memcpy(pcrat_image, crat_table, crat_table->length);
|
||||
if (!pcrat_image)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -793,11 +810,10 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
|
||||
|
||||
/* Memory required to create Virtual CRAT.
|
||||
* Since there is no easy way to predict the amount of memory required, the
|
||||
* following amount are allocated for CPU and GPU Virtual CRAT. This is
|
||||
* following amount is allocated for GPU Virtual CRAT. This is
|
||||
* expected to cover all known conditions. But to be safe additional check
|
||||
* is put in the code to ensure we don't overwrite.
|
||||
*/
|
||||
#define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE)
|
||||
#define VCRAT_SIZE_FOR_GPU (4 * PAGE_SIZE)
|
||||
|
||||
/* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
|
||||
@@ -948,7 +964,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
|
||||
#endif
|
||||
int ret = 0;
|
||||
|
||||
if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU)
|
||||
if (!pcrat_image)
|
||||
return -EINVAL;
|
||||
|
||||
/* Fill in CRAT Header.
|
||||
@@ -1348,30 +1364,37 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
|
||||
uint32_t proximity_domain)
|
||||
{
|
||||
void *pcrat_image = NULL;
|
||||
int ret = 0;
|
||||
int ret = 0, num_nodes;
|
||||
size_t dyn_size;
|
||||
|
||||
if (!crat_image)
|
||||
return -EINVAL;
|
||||
|
||||
*crat_image = NULL;
|
||||
|
||||
/* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and
|
||||
* VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover
|
||||
* all the current conditions. A check is put not to overwrite beyond
|
||||
* allocated size
|
||||
/* Allocate the CPU Virtual CRAT size based on the number of online
|
||||
* nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image.
|
||||
* This should cover all the current conditions. A check is put not
|
||||
* to overwrite beyond allocated size for GPUs
|
||||
*/
|
||||
switch (flags) {
|
||||
case COMPUTE_UNIT_CPU:
|
||||
pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL);
|
||||
num_nodes = num_online_nodes();
|
||||
dyn_size = sizeof(struct crat_header) +
|
||||
num_nodes * (sizeof(struct crat_subtype_computeunit) +
|
||||
sizeof(struct crat_subtype_memory) +
|
||||
(num_nodes - 1) * sizeof(struct crat_subtype_iolink));
|
||||
pcrat_image = kvmalloc(dyn_size, GFP_KERNEL);
|
||||
if (!pcrat_image)
|
||||
return -ENOMEM;
|
||||
*size = VCRAT_SIZE_FOR_CPU;
|
||||
*size = dyn_size;
|
||||
pr_debug("CRAT size is %ld", dyn_size);
|
||||
ret = kfd_create_vcrat_image_cpu(pcrat_image, size);
|
||||
break;
|
||||
case COMPUTE_UNIT_GPU:
|
||||
if (!kdev)
|
||||
return -EINVAL;
|
||||
pcrat_image = kmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL);
|
||||
pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL);
|
||||
if (!pcrat_image)
|
||||
return -ENOMEM;
|
||||
*size = VCRAT_SIZE_FOR_GPU;
|
||||
@@ -1390,7 +1413,7 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
|
||||
if (!ret)
|
||||
*crat_image = pcrat_image;
|
||||
else
|
||||
kfree(pcrat_image);
|
||||
kvfree(pcrat_image);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@@ -29,6 +29,7 @@
|
||||
#include "cwsr_trap_handler.h"
|
||||
#include "kfd_iommu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "kfd_smi_events.h"
|
||||
|
||||
#define MQD_SIZE_ALIGNED 768
|
||||
|
||||
@@ -115,6 +116,7 @@ static const struct kfd_device_info carrizo_device_info = {
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
#endif
|
||||
|
||||
static const struct kfd_device_info raven_device_info = {
|
||||
.asic_family = CHIP_RAVEN,
|
||||
@@ -133,7 +135,6 @@ static const struct kfd_device_info raven_device_info = {
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
#endif
|
||||
|
||||
static const struct kfd_device_info hawaii_device_info = {
|
||||
.asic_family = CHIP_HAWAII,
|
||||
@@ -502,8 +503,8 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = {
|
||||
#ifdef KFD_SUPPORT_IOMMU_V2
|
||||
[CHIP_KAVERI] = {&kaveri_device_info, NULL},
|
||||
[CHIP_CARRIZO] = {&carrizo_device_info, NULL},
|
||||
[CHIP_RAVEN] = {&raven_device_info, NULL},
|
||||
#endif
|
||||
[CHIP_RAVEN] = {&raven_device_info, NULL},
|
||||
[CHIP_HAWAII] = {&hawaii_device_info, NULL},
|
||||
[CHIP_TONGA] = {&tonga_device_info, NULL},
|
||||
[CHIP_FIJI] = {&fiji_device_info, &fiji_vf_device_info},
|
||||
@@ -582,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
|
||||
|
||||
atomic_set(&kfd->sram_ecc_flag, 0);
|
||||
|
||||
ida_init(&kfd->doorbell_ida);
|
||||
|
||||
return kfd;
|
||||
}
|
||||
|
||||
@@ -711,11 +714,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
goto kfd_doorbell_error;
|
||||
}
|
||||
|
||||
if (kfd->kfd2kgd->get_hive_id)
|
||||
kfd->hive_id = kfd->kfd2kgd->get_hive_id(kfd->kgd);
|
||||
kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd);
|
||||
|
||||
if (kfd->kfd2kgd->get_unique_id)
|
||||
kfd->unique_id = kfd->kfd2kgd->get_unique_id(kfd->kgd);
|
||||
kfd->unique_id = amdgpu_amdkfd_get_unique_id(kfd->kgd);
|
||||
|
||||
kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd);
|
||||
|
||||
if (kfd_interrupt_init(kfd)) {
|
||||
dev_err(kfd_device, "Error initializing interrupts\n");
|
||||
@@ -737,6 +740,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
goto gws_error;
|
||||
}
|
||||
|
||||
/* If CRAT is broken, won't set iommu enabled */
|
||||
kfd_double_confirm_iommu_support(kfd);
|
||||
|
||||
if (kfd_iommu_device_init(kfd)) {
|
||||
dev_err(kfd_device, "Error initializing iommuv2\n");
|
||||
goto device_iommu_error;
|
||||
@@ -796,6 +802,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
|
||||
kfd_interrupt_exit(kfd);
|
||||
kfd_topology_remove_device(kfd);
|
||||
kfd_doorbell_fini(kfd);
|
||||
ida_destroy(&kfd->doorbell_ida);
|
||||
kfd_gtt_sa_fini(kfd);
|
||||
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
|
||||
if (kfd->gws)
|
||||
@@ -810,6 +817,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
|
||||
if (!kfd->init_complete)
|
||||
return 0;
|
||||
|
||||
kfd_smi_event_update_gpu_reset(kfd, false);
|
||||
|
||||
kfd->dqm->ops.pre_reset(kfd->dqm);
|
||||
|
||||
kgd2kfd_suspend(kfd, false);
|
||||
@@ -838,6 +847,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
|
||||
|
||||
atomic_set(&kfd->sram_ecc_flag, 0);
|
||||
|
||||
kfd_smi_event_update_gpu_reset(kfd, true);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1245,6 +1256,12 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
|
||||
WARN_ONCE(count < 0, "Compute profile ref. count error");
|
||||
}
|
||||
|
||||
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
|
||||
{
|
||||
if (kfd)
|
||||
kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
|
||||
/* This function will send a package to HIQ to hang the HWS
|
||||
|
@@ -153,30 +153,6 @@ static void decrement_queue_count(struct device_queue_manager *dqm,
|
||||
dqm->active_cp_queue_count--;
|
||||
}
|
||||
|
||||
int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val)
|
||||
{
|
||||
int ret;
|
||||
uint64_t tmp = 0;
|
||||
|
||||
if (!val)
|
||||
return -EINVAL;
|
||||
/*
|
||||
* SDMA activity counter is stored at queue's RPTR + 0x8 location.
|
||||
*/
|
||||
if (!access_ok((const void __user *)(q_rptr +
|
||||
sizeof(uint64_t)), sizeof(uint64_t))) {
|
||||
pr_err("Can't access sdma queue activity counter\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
ret = get_user(tmp, (uint64_t *)(q_rptr + sizeof(uint64_t)));
|
||||
if (!ret) {
|
||||
*val = tmp;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
|
||||
{
|
||||
struct kfd_dev *dev = qpd->dqm->dev;
|
||||
@@ -215,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
|
||||
}
|
||||
|
||||
q->properties.doorbell_off =
|
||||
kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
|
||||
kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
|
||||
q->doorbell_id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -552,7 +527,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
/* Get the SDMA queue stats */
|
||||
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
|
||||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
|
||||
retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
|
||||
retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
|
||||
&sdma_val);
|
||||
if (retval)
|
||||
pr_err("Failed to read SDMA queue counter for queue: %d\n",
|
||||
@@ -674,9 +649,10 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
goto out;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
pr_info_ratelimited("Evicting PASID 0x%x queues\n",
|
||||
pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
|
||||
pdd->process->pasid);
|
||||
|
||||
pdd->last_evict_timestamp = get_jiffies_64();
|
||||
/* Mark all queues as evicted. Deactivate all active queues on
|
||||
* the qpd.
|
||||
*/
|
||||
@@ -724,7 +700,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
|
||||
goto out;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
pr_info_ratelimited("Evicting PASID 0x%x queues\n",
|
||||
pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
|
||||
pdd->process->pasid);
|
||||
|
||||
/* Mark all queues as evicted. Deactivate all active queues on
|
||||
@@ -738,6 +714,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
|
||||
q->properties.is_active = false;
|
||||
decrement_queue_count(dqm, q->properties.type);
|
||||
}
|
||||
pdd->last_evict_timestamp = get_jiffies_64();
|
||||
retval = execute_queues_cpsch(dqm,
|
||||
qpd->is_debug ?
|
||||
KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
|
||||
@@ -756,6 +733,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
struct mqd_manager *mqd_mgr;
|
||||
struct kfd_process_device *pdd;
|
||||
uint64_t pd_base;
|
||||
uint64_t eviction_duration;
|
||||
int retval, ret = 0;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
@@ -770,7 +748,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_info_ratelimited("Restoring PASID 0x%x queues\n",
|
||||
pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
|
||||
pdd->process->pasid);
|
||||
|
||||
/* Update PD Base in QPD */
|
||||
@@ -823,6 +801,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
ret = retval;
|
||||
}
|
||||
qpd->evicted = 0;
|
||||
eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
|
||||
atomic64_add(eviction_duration, &pdd->evict_duration_counter);
|
||||
out:
|
||||
if (mm)
|
||||
mmput(mm);
|
||||
@@ -836,6 +816,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
|
||||
struct queue *q;
|
||||
struct kfd_process_device *pdd;
|
||||
uint64_t pd_base;
|
||||
uint64_t eviction_duration;
|
||||
int retval = 0;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
@@ -850,7 +831,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_info_ratelimited("Restoring PASID 0x%x queues\n",
|
||||
pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
|
||||
pdd->process->pasid);
|
||||
|
||||
/* Update PD Base in QPD */
|
||||
@@ -869,6 +850,8 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
|
||||
retval = execute_queues_cpsch(dqm,
|
||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
|
||||
qpd->evicted = 0;
|
||||
eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
|
||||
atomic64_add(eviction_duration, &pdd->evict_duration_counter);
|
||||
out:
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
@@ -1475,7 +1458,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||
/* Get the SDMA queue stats */
|
||||
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
|
||||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
|
||||
retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
|
||||
retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
|
||||
&sdma_val);
|
||||
if (retval)
|
||||
pr_err("Failed to read SDMA queue counter for queue: %d\n",
|
||||
@@ -1989,6 +1972,7 @@ int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid)
|
||||
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
|
||||
pdd = kfd_get_process_device_data(dqm->dev, p);
|
||||
if (pdd)
|
||||
ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
|
||||
|
@@ -251,5 +251,11 @@ static inline void dqm_unlock(struct device_queue_manager *dqm)
|
||||
mutex_unlock(&dqm->lock_hidden);
|
||||
}
|
||||
|
||||
int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val);
|
||||
static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val)
|
||||
{
|
||||
/*
|
||||
* SDMA activity counter is stored at queue's RPTR + 0x8 location.
|
||||
*/
|
||||
return get_user(*val, q_rptr + 1);
|
||||
}
|
||||
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
|
||||
|
@@ -61,8 +61,8 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
|
||||
qpd->sh_mem_config =
|
||||
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
|
||||
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
|
||||
if (amdgpu_noretry &&
|
||||
!dqm->dev->device_info->needs_iommu_device)
|
||||
if (dqm->dev->noretry &&
|
||||
!dqm->dev->use_iommu_v2)
|
||||
qpd->sh_mem_config |=
|
||||
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
|
||||
|
||||
|
@@ -31,9 +31,6 @@
|
||||
* kernel queues using the first doorbell page reserved for the kernel.
|
||||
*/
|
||||
|
||||
static DEFINE_IDA(doorbell_ida);
|
||||
static unsigned int max_doorbell_slices;
|
||||
|
||||
/*
|
||||
* Each device exposes a doorbell aperture, a PCI MMIO aperture that
|
||||
* receives 32-bit writes that are passed to queues as wptr values.
|
||||
@@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
|
||||
else
|
||||
return -ENOSPC;
|
||||
|
||||
if (!max_doorbell_slices ||
|
||||
doorbell_process_limit < max_doorbell_slices)
|
||||
max_doorbell_slices = doorbell_process_limit;
|
||||
if (!kfd->max_doorbell_slices ||
|
||||
doorbell_process_limit < kfd->max_doorbell_slices)
|
||||
kfd->max_doorbell_slices = doorbell_process_limit;
|
||||
|
||||
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
|
||||
doorbell_start_offset;
|
||||
@@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
phys_addr_t address;
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
/*
|
||||
* For simplicitly we only allow mapping of the entire doorbell
|
||||
@@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
|
||||
if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
|
||||
return -EINVAL;
|
||||
|
||||
/* Calculate physical address of doorbell */
|
||||
address = kfd_get_process_doorbells(dev, process);
|
||||
pdd = kfd_get_process_device_data(dev, process);
|
||||
if (!pdd)
|
||||
return -EINVAL;
|
||||
|
||||
/* Calculate physical address of doorbell */
|
||||
address = kfd_get_process_doorbells(pdd);
|
||||
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
|
||||
VM_DONTDUMP | VM_PFNMAP;
|
||||
|
||||
@@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
|
||||
}
|
||||
|
||||
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
|
||||
struct kfd_process *process,
|
||||
struct kfd_process_device *pdd,
|
||||
unsigned int doorbell_id)
|
||||
{
|
||||
/*
|
||||
@@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
|
||||
* units regardless of the ASIC-dependent doorbell size.
|
||||
*/
|
||||
return kfd->doorbell_base_dw_offset +
|
||||
process->doorbell_index
|
||||
pdd->doorbell_index
|
||||
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
|
||||
doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
|
||||
}
|
||||
@@ -251,25 +252,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
|
||||
|
||||
}
|
||||
|
||||
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
|
||||
struct kfd_process *process)
|
||||
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
|
||||
{
|
||||
return dev->doorbell_base +
|
||||
process->doorbell_index * kfd_doorbell_process_slice(dev);
|
||||
return pdd->dev->doorbell_base +
|
||||
pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
|
||||
}
|
||||
|
||||
int kfd_alloc_process_doorbells(struct kfd_process *process)
|
||||
int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
|
||||
{
|
||||
int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices,
|
||||
int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
|
||||
GFP_KERNEL);
|
||||
if (r > 0)
|
||||
process->doorbell_index = r;
|
||||
*doorbell_index = r;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void kfd_free_process_doorbells(struct kfd_process *process)
|
||||
void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
|
||||
{
|
||||
if (process->doorbell_index)
|
||||
ida_simple_remove(&doorbell_ida, process->doorbell_index);
|
||||
if (doorbell_index)
|
||||
ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
|
||||
}
|
||||
|
@@ -321,7 +321,7 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
|
||||
pdd->lds_base = MAKE_LDS_APP_BASE_VI();
|
||||
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
|
||||
|
||||
if (!pdd->dev->device_info->needs_iommu_device) {
|
||||
if (!pdd->dev->use_iommu_v2) {
|
||||
/* dGPUs: SVM aperture starting at 0
|
||||
* with small reserved space for kernel.
|
||||
* Set them to CANONICAL addresses.
|
||||
@@ -425,7 +425,7 @@ int kfd_init_apertures(struct kfd_process *process)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!dev->device_info->needs_iommu_device) {
|
||||
if (!dev->use_iommu_v2) {
|
||||
/* dGPUs: the reserved space for kernel
|
||||
* before SVM
|
||||
*/
|
||||
|
@@ -41,7 +41,7 @@ int kfd_iommu_check_device(struct kfd_dev *kfd)
|
||||
struct amd_iommu_device_info iommu_info;
|
||||
int err;
|
||||
|
||||
if (!kfd->device_info->needs_iommu_device)
|
||||
if (!kfd->use_iommu_v2)
|
||||
return -ENODEV;
|
||||
|
||||
iommu_info.flags = 0;
|
||||
@@ -63,7 +63,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
|
||||
unsigned int pasid_limit;
|
||||
int err;
|
||||
|
||||
if (!kfd->device_info->needs_iommu_device)
|
||||
if (!kfd->use_iommu_v2)
|
||||
return 0;
|
||||
|
||||
iommu_info.flags = 0;
|
||||
@@ -109,7 +109,7 @@ int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
|
||||
struct kfd_process *p = pdd->process;
|
||||
int err;
|
||||
|
||||
if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND)
|
||||
if (!dev->use_iommu_v2 || pdd->bound == PDD_BOUND)
|
||||
return 0;
|
||||
|
||||
if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
|
||||
@@ -284,7 +284,7 @@ static void kfd_unbind_processes_from_device(struct kfd_dev *kfd)
|
||||
*/
|
||||
void kfd_iommu_suspend(struct kfd_dev *kfd)
|
||||
{
|
||||
if (!kfd->device_info->needs_iommu_device)
|
||||
if (!kfd->use_iommu_v2)
|
||||
return;
|
||||
|
||||
kfd_unbind_processes_from_device(kfd);
|
||||
@@ -304,7 +304,7 @@ int kfd_iommu_resume(struct kfd_dev *kfd)
|
||||
unsigned int pasid_limit;
|
||||
int err;
|
||||
|
||||
if (!kfd->device_info->needs_iommu_device)
|
||||
if (!kfd->use_iommu_v2)
|
||||
return 0;
|
||||
|
||||
pasid_limit = kfd_get_pasid_limit();
|
||||
|
@@ -70,6 +70,7 @@ err_create_wq:
|
||||
err_topology:
|
||||
kfd_chardev_exit();
|
||||
err_ioctl:
|
||||
pr_err("KFD is disabled due to module initialization failure\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@@ -297,6 +297,9 @@ struct kfd_dev {
|
||||
|
||||
bool pci_atomic_requested;
|
||||
|
||||
/* Use IOMMU v2 flag */
|
||||
bool use_iommu_v2;
|
||||
|
||||
/* SRAM ECC flag */
|
||||
atomic_t sram_ecc_flag;
|
||||
|
||||
@@ -309,6 +312,13 @@ struct kfd_dev {
|
||||
/* Clients watching SMI events */
|
||||
struct list_head smi_clients;
|
||||
spinlock_t smi_lock;
|
||||
|
||||
uint32_t reset_seq_num;
|
||||
|
||||
struct ida doorbell_ida;
|
||||
unsigned int max_doorbell_slices;
|
||||
|
||||
int noretry;
|
||||
};
|
||||
|
||||
enum kfd_mempool {
|
||||
@@ -626,7 +636,7 @@ enum kfd_pdd_bound {
|
||||
PDD_BOUND_SUSPENDED,
|
||||
};
|
||||
|
||||
#define MAX_SYSFS_FILENAME_LEN 11
|
||||
#define MAX_SYSFS_FILENAME_LEN 15
|
||||
|
||||
/*
|
||||
* SDMA counter runs at 100MHz frequency.
|
||||
@@ -687,6 +697,39 @@ struct kfd_process_device {
|
||||
uint64_t sdma_past_activity_counter;
|
||||
struct attribute attr_sdma;
|
||||
char sdma_filename[MAX_SYSFS_FILENAME_LEN];
|
||||
|
||||
/* Eviction activity tracking */
|
||||
uint64_t last_evict_timestamp;
|
||||
atomic64_t evict_duration_counter;
|
||||
struct attribute attr_evict;
|
||||
|
||||
struct kobject *kobj_stats;
|
||||
unsigned int doorbell_index;
|
||||
|
||||
/*
|
||||
* @cu_occupancy: Reports occupancy of Compute Units (CU) of a process
|
||||
* that is associated with device encoded by "this" struct instance. The
|
||||
* value reflects CU usage by all of the waves launched by this process
|
||||
* on this device. A very important property of occupancy parameter is
|
||||
* that its value is a snapshot of current use.
|
||||
*
|
||||
* Following is to be noted regarding how this parameter is reported:
|
||||
*
|
||||
* The number of waves that a CU can launch is limited by couple of
|
||||
* parameters. These are encoded by struct amdgpu_cu_info instance
|
||||
* that is part of every device definition. For GFX9 devices this
|
||||
* translates to 40 waves (simd_per_cu * max_waves_per_simd) when waves
|
||||
* do not use scratch memory and 32 waves (max_scratch_slots_per_cu)
|
||||
* when they do use scratch memory. This could change for future
|
||||
* devices and therefore this example should be considered as a guide.
|
||||
*
|
||||
* All CU's of a device are available for the process. This may not be true
|
||||
* under certain conditions - e.g. CU masking.
|
||||
*
|
||||
* Finally number of CU's that are occupied by a process is affected by both
|
||||
* number of CU's a device has along with number of other competing processes
|
||||
*/
|
||||
struct attribute attr_cu_occupancy;
|
||||
};
|
||||
|
||||
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
|
||||
@@ -724,7 +767,6 @@ struct kfd_process {
|
||||
struct mmu_notifier mmu_notifier;
|
||||
|
||||
u32 pasid;
|
||||
unsigned int doorbell_index;
|
||||
|
||||
/*
|
||||
* List of kfd_process_device structures,
|
||||
@@ -857,13 +899,13 @@ u32 read_kernel_doorbell(u32 __iomem *db);
|
||||
void write_kernel_doorbell(void __iomem *db, u32 value);
|
||||
void write_kernel_doorbell64(void __iomem *db, u64 value);
|
||||
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
|
||||
struct kfd_process *process,
|
||||
struct kfd_process_device *pdd,
|
||||
unsigned int doorbell_id);
|
||||
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
|
||||
struct kfd_process *process);
|
||||
int kfd_alloc_process_doorbells(struct kfd_process *process);
|
||||
void kfd_free_process_doorbells(struct kfd_process *process);
|
||||
|
||||
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
|
||||
int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
|
||||
unsigned int *doorbell_index);
|
||||
void kfd_free_process_doorbells(struct kfd_dev *kfd,
|
||||
unsigned int doorbell_index);
|
||||
/* GTT Sub-Allocator */
|
||||
|
||||
int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
|
||||
@@ -892,6 +934,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
|
||||
struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
|
||||
int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
|
||||
int kfd_numa_node_to_apic_id(int numa_node_id);
|
||||
void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
|
||||
|
||||
/* Interrupts */
|
||||
int kfd_interrupt_init(struct kfd_dev *dev);
|
||||
|
@@ -87,7 +87,7 @@ struct kfd_sdma_activity_handler_workarea {
|
||||
};
|
||||
|
||||
struct temp_sdma_queue_list {
|
||||
uint64_t rptr;
|
||||
uint64_t __user *rptr;
|
||||
uint64_t sdma_val;
|
||||
unsigned int queue_id;
|
||||
struct list_head list;
|
||||
@@ -159,7 +159,7 @@ static void kfd_sdma_activity_worker(struct work_struct *work)
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&sdma_q->list);
|
||||
sdma_q->rptr = (uint64_t)q->properties.read_ptr;
|
||||
sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
|
||||
sdma_q->queue_id = q->properties.queue_id;
|
||||
list_add_tail(&sdma_q->list, &sdma_q_list.list);
|
||||
}
|
||||
@@ -218,7 +218,7 @@ static void kfd_sdma_activity_worker(struct work_struct *work)
|
||||
continue;
|
||||
|
||||
list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
|
||||
if (((uint64_t)q->properties.read_ptr == sdma_q->rptr) &&
|
||||
if (((uint64_t __user *)q->properties.read_ptr == sdma_q->rptr) &&
|
||||
(sdma_q->queue_id == q->properties.queue_id)) {
|
||||
list_del(&sdma_q->list);
|
||||
kfree(sdma_q);
|
||||
@@ -249,6 +249,52 @@ cleanup:
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device
|
||||
* by current process. Translates acquired wave count into number of compute units
|
||||
* that are occupied.
|
||||
*
|
||||
* @atr: Handle of attribute that allows reporting of wave count. The attribute
|
||||
* handle encapsulates GPU device it is associated with, thereby allowing collection
|
||||
* of waves in flight, etc
|
||||
*
|
||||
* @buffer: Handle of user provided buffer updated with wave count
|
||||
*
|
||||
* Return: Number of bytes written to user buffer or an error value
|
||||
*/
|
||||
static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
|
||||
{
|
||||
int cu_cnt;
|
||||
int wave_cnt;
|
||||
int max_waves_per_cu;
|
||||
struct kfd_dev *dev = NULL;
|
||||
struct kfd_process *proc = NULL;
|
||||
struct kfd_process_device *pdd = NULL;
|
||||
|
||||
pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
|
||||
dev = pdd->dev;
|
||||
if (dev->kfd2kgd->get_cu_occupancy == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
cu_cnt = 0;
|
||||
proc = pdd->process;
|
||||
if (pdd->qpd.queue_count == 0) {
|
||||
pr_debug("Gpu-Id: %d has no active queues for process %d\n",
|
||||
dev->id, proc->pasid);
|
||||
return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
|
||||
}
|
||||
|
||||
/* Collect wave count from device if it supports */
|
||||
wave_cnt = 0;
|
||||
max_waves_per_cu = 0;
|
||||
dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt,
|
||||
&max_waves_per_cu);
|
||||
|
||||
/* Translate wave count to number of compute units */
|
||||
cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
|
||||
return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
|
||||
}
|
||||
|
||||
static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
|
||||
char *buffer)
|
||||
{
|
||||
@@ -270,6 +316,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
|
||||
kfd_sdma_activity_worker);
|
||||
|
||||
sdma_activity_work_handler.pdd = pdd;
|
||||
sdma_activity_work_handler.sdma_activity_counter = 0;
|
||||
|
||||
schedule_work(&sdma_activity_work_handler.sdma_activity_work);
|
||||
|
||||
@@ -344,6 +391,32 @@ static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
|
||||
struct attribute *attr, char *buffer)
|
||||
{
|
||||
if (strcmp(attr->name, "evicted_ms") == 0) {
|
||||
struct kfd_process_device *pdd = container_of(attr,
|
||||
struct kfd_process_device,
|
||||
attr_evict);
|
||||
uint64_t evict_jiffies;
|
||||
|
||||
evict_jiffies = atomic64_read(&pdd->evict_duration_counter);
|
||||
|
||||
return snprintf(buffer,
|
||||
PAGE_SIZE,
|
||||
"%llu\n",
|
||||
jiffies64_to_msecs(evict_jiffies));
|
||||
|
||||
/* Sysfs handle that gets CU occupancy is per device */
|
||||
} else if (strcmp(attr->name, "cu_occupancy") == 0) {
|
||||
return kfd_get_cu_occupancy(attr, buffer);
|
||||
} else {
|
||||
pr_err("Invalid attribute");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct attribute attr_queue_size = {
|
||||
.name = "size",
|
||||
.mode = KFD_SYSFS_FILE_MODE
|
||||
@@ -375,6 +448,19 @@ static struct kobj_type procfs_queue_type = {
|
||||
.default_attrs = procfs_queue_attrs,
|
||||
};
|
||||
|
||||
static const struct sysfs_ops procfs_stats_ops = {
|
||||
.show = kfd_procfs_stats_show,
|
||||
};
|
||||
|
||||
static struct attribute *procfs_stats_attrs[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct kobj_type procfs_stats_type = {
|
||||
.sysfs_ops = &procfs_stats_ops,
|
||||
.default_attrs = procfs_stats_attrs,
|
||||
};
|
||||
|
||||
int kfd_procfs_add_queue(struct queue *q)
|
||||
{
|
||||
struct kfd_process *proc;
|
||||
@@ -416,6 +502,72 @@ static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kfd_process_device *pdd;
|
||||
char stats_dir_filename[MAX_SYSFS_FILENAME_LEN];
|
||||
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
|
||||
if (!p->kobj)
|
||||
return -EFAULT;
|
||||
|
||||
/*
|
||||
* Create sysfs files for each GPU:
|
||||
* - proc/<pid>/stats_<gpuid>/
|
||||
* - proc/<pid>/stats_<gpuid>/evicted_ms
|
||||
* - proc/<pid>/stats_<gpuid>/cu_occupancy
|
||||
*/
|
||||
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
|
||||
struct kobject *kobj_stats;
|
||||
|
||||
snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN,
|
||||
"stats_%u", pdd->dev->id);
|
||||
kobj_stats = kfd_alloc_struct(kobj_stats);
|
||||
if (!kobj_stats)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = kobject_init_and_add(kobj_stats,
|
||||
&procfs_stats_type,
|
||||
p->kobj,
|
||||
stats_dir_filename);
|
||||
|
||||
if (ret) {
|
||||
pr_warn("Creating KFD proc/stats_%s folder failed",
|
||||
stats_dir_filename);
|
||||
kobject_put(kobj_stats);
|
||||
goto err;
|
||||
}
|
||||
|
||||
pdd->kobj_stats = kobj_stats;
|
||||
pdd->attr_evict.name = "evicted_ms";
|
||||
pdd->attr_evict.mode = KFD_SYSFS_FILE_MODE;
|
||||
sysfs_attr_init(&pdd->attr_evict);
|
||||
ret = sysfs_create_file(kobj_stats, &pdd->attr_evict);
|
||||
if (ret)
|
||||
pr_warn("Creating eviction stats for gpuid %d failed",
|
||||
(int)pdd->dev->id);
|
||||
|
||||
/* Add sysfs file to report compute unit occupancy */
|
||||
if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) {
|
||||
pdd->attr_cu_occupancy.name = "cu_occupancy";
|
||||
pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE;
|
||||
sysfs_attr_init(&pdd->attr_cu_occupancy);
|
||||
ret = sysfs_create_file(kobj_stats,
|
||||
&pdd->attr_cu_occupancy);
|
||||
if (ret)
|
||||
pr_warn("Creating %s failed for gpuid: %d",
|
||||
pdd->attr_cu_occupancy.name,
|
||||
(int)pdd->dev->id);
|
||||
}
|
||||
}
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
|
||||
{
|
||||
int ret = 0;
|
||||
@@ -451,7 +603,6 @@ static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void kfd_procfs_del_queue(struct queue *q)
|
||||
{
|
||||
if (!q)
|
||||
@@ -659,6 +810,11 @@ struct kfd_process *kfd_create_process(struct file *filep)
|
||||
if (!process->kobj_queues)
|
||||
pr_warn("Creating KFD proc/queues folder failed");
|
||||
|
||||
ret = kfd_procfs_add_sysfs_stats(process);
|
||||
if (ret)
|
||||
pr_warn("Creating sysfs stats dir for pid %d failed",
|
||||
(int)process->lead_thread->pid);
|
||||
|
||||
ret = kfd_procfs_add_sysfs_files(process);
|
||||
if (ret)
|
||||
pr_warn("Creating sysfs usage file for pid %d failed",
|
||||
@@ -780,6 +936,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
|
||||
kfree(pdd->qpd.doorbell_bitmap);
|
||||
idr_destroy(&pdd->alloc_idr);
|
||||
|
||||
kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
|
||||
|
||||
/*
|
||||
* before destroying pdd, make sure to report availability
|
||||
* for auto suspend
|
||||
@@ -815,6 +973,12 @@ static void kfd_process_wq_release(struct work_struct *work)
|
||||
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
|
||||
sysfs_remove_file(p->kobj, &pdd->attr_vram);
|
||||
sysfs_remove_file(p->kobj, &pdd->attr_sdma);
|
||||
sysfs_remove_file(p->kobj, &pdd->attr_evict);
|
||||
if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL)
|
||||
sysfs_remove_file(p->kobj, &pdd->attr_cu_occupancy);
|
||||
kobject_del(pdd->kobj_stats);
|
||||
kobject_put(pdd->kobj_stats);
|
||||
pdd->kobj_stats = NULL;
|
||||
}
|
||||
|
||||
kobject_del(p->kobj);
|
||||
@@ -832,8 +996,6 @@ static void kfd_process_wq_release(struct work_struct *work)
|
||||
kfd_event_free_process(p);
|
||||
|
||||
kfd_pasid_free(p->pasid);
|
||||
kfd_free_process_doorbells(p);
|
||||
|
||||
mutex_destroy(&p->mutex);
|
||||
|
||||
put_task_struct(p->lead_thread);
|
||||
@@ -1011,9 +1173,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
|
||||
if (process->pasid == 0)
|
||||
goto err_alloc_pasid;
|
||||
|
||||
if (kfd_alloc_process_doorbells(process) < 0)
|
||||
goto err_alloc_doorbells;
|
||||
|
||||
err = pqm_init(&process->pqm, process);
|
||||
if (err != 0)
|
||||
goto err_process_pqm_init;
|
||||
@@ -1041,8 +1200,6 @@ err_register_notifier:
|
||||
err_init_apertures:
|
||||
pqm_uninit(&process->pqm);
|
||||
err_process_pqm_init:
|
||||
kfd_free_process_doorbells(process);
|
||||
err_alloc_doorbells:
|
||||
kfd_pasid_free(process->pasid);
|
||||
err_alloc_pasid:
|
||||
mutex_destroy(&process->mutex);
|
||||
@@ -1105,10 +1262,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||
if (!pdd)
|
||||
return NULL;
|
||||
|
||||
if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
|
||||
pr_err("Failed to alloc doorbell for pdd\n");
|
||||
goto err_free_pdd;
|
||||
}
|
||||
|
||||
if (init_doorbell_bitmap(&pdd->qpd, dev)) {
|
||||
pr_err("Failed to init doorbell for process\n");
|
||||
kfree(pdd);
|
||||
return NULL;
|
||||
goto err_free_pdd;
|
||||
}
|
||||
|
||||
pdd->dev = dev;
|
||||
@@ -1124,12 +1285,17 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||
pdd->runtime_inuse = false;
|
||||
pdd->vram_usage = 0;
|
||||
pdd->sdma_past_activity_counter = 0;
|
||||
atomic64_set(&pdd->evict_duration_counter, 0);
|
||||
list_add(&pdd->per_device_list, &p->per_device_data);
|
||||
|
||||
/* Init idr used for memory handle translation */
|
||||
idr_init(&pdd->alloc_idr);
|
||||
|
||||
return pdd;
|
||||
|
||||
err_free_pdd:
|
||||
kfree(pdd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1487,6 +1653,7 @@ void kfd_suspend_all_processes(void)
|
||||
unsigned int temp;
|
||||
int idx = srcu_read_lock(&kfd_processes_srcu);
|
||||
|
||||
WARN(debug_evictions, "Evicting all processes");
|
||||
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
|
||||
cancel_delayed_work_sync(&p->eviction_work);
|
||||
cancel_delayed_work_sync(&p->restore_work);
|
||||
|
@@ -24,6 +24,7 @@
|
||||
#include <linux/wait.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_vm.h"
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_smi_events.h"
|
||||
@@ -148,15 +149,94 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
|
||||
char *event_msg, int len)
|
||||
{
|
||||
struct kfd_smi_client *client;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
|
||||
if (!(READ_ONCE(client->events) &
|
||||
KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
|
||||
continue;
|
||||
spin_lock(&client->lock);
|
||||
if (kfifo_avail(&client->fifo) >= len) {
|
||||
kfifo_in(&client->fifo, event_msg, len);
|
||||
wake_up_all(&client->wait_queue);
|
||||
} else {
|
||||
pr_debug("smi_event(EventID: %u): no space left\n",
|
||||
smi_event);
|
||||
}
|
||||
spin_unlock(&client->lock);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
|
||||
{
|
||||
/*
|
||||
* GpuReset msg = Reset seq number (incremented for
|
||||
* every reset message sent before GPU reset).
|
||||
* 1 byte event + 1 byte space + 8 bytes seq num +
|
||||
* 1 byte \n + 1 byte \0 = 12
|
||||
*/
|
||||
char fifo_in[12];
|
||||
int len;
|
||||
unsigned int event;
|
||||
|
||||
if (list_empty(&dev->smi_clients))
|
||||
return;
|
||||
|
||||
memset(fifo_in, 0x0, sizeof(fifo_in));
|
||||
|
||||
if (post_reset) {
|
||||
event = KFD_SMI_EVENT_GPU_POST_RESET;
|
||||
} else {
|
||||
event = KFD_SMI_EVENT_GPU_PRE_RESET;
|
||||
++(dev->reset_seq_num);
|
||||
}
|
||||
|
||||
len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
|
||||
dev->reset_seq_num);
|
||||
|
||||
add_event_to_kfifo(dev, event, fifo_in, len);
|
||||
}
|
||||
|
||||
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
|
||||
uint32_t throttle_bitmask)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
|
||||
/*
|
||||
* ThermalThrottle msg = throttle_bitmask(8):
|
||||
* thermal_interrupt_count(16):
|
||||
* 1 byte event + 1 byte space + 8 byte throttle_bitmask +
|
||||
* 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
|
||||
* 1 byte \0 = 29
|
||||
*/
|
||||
char fifo_in[29];
|
||||
int len;
|
||||
|
||||
if (list_empty(&dev->smi_clients))
|
||||
return;
|
||||
|
||||
len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n",
|
||||
KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
|
||||
atomic64_read(&adev->smu.throttle_int_counter));
|
||||
|
||||
add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
|
||||
}
|
||||
|
||||
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
|
||||
struct amdgpu_task_info task_info;
|
||||
/* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
|
||||
/* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43
|
||||
/* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
|
||||
* 1 byte \0 = 29
|
||||
*/
|
||||
char fifo_in[43];
|
||||
struct kfd_smi_client *client;
|
||||
char fifo_in[29];
|
||||
int len;
|
||||
|
||||
if (list_empty(&dev->smi_clients))
|
||||
@@ -168,25 +248,10 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
|
||||
if (!task_info.pid)
|
||||
return;
|
||||
|
||||
len = snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
|
||||
len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
|
||||
task_info.pid, task_info.task_name);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
|
||||
if (!(READ_ONCE(client->events) & KFD_SMI_EVENT_VMFAULT))
|
||||
continue;
|
||||
spin_lock(&client->lock);
|
||||
if (kfifo_avail(&client->fifo) >= len) {
|
||||
kfifo_in(&client->fifo, fifo_in, len);
|
||||
wake_up_all(&client->wait_queue);
|
||||
}
|
||||
else
|
||||
pr_debug("smi_event(vmfault): no space left\n");
|
||||
spin_unlock(&client->lock);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
|
||||
}
|
||||
|
||||
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
|
||||
|
@@ -25,5 +25,8 @@
|
||||
|
||||
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
|
||||
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
|
||||
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
|
||||
uint32_t throttle_bitmask);
|
||||
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
|
||||
|
||||
#endif
|
||||
|
@@ -446,7 +446,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
|
||||
sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
|
||||
dev->node_props.cpu_cores_count);
|
||||
sysfs_show_32bit_prop(buffer, offs, "simd_count",
|
||||
dev->node_props.simd_count);
|
||||
dev->gpu ? dev->node_props.simd_count : 0);
|
||||
sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
|
||||
dev->node_props.mem_banks_count);
|
||||
sysfs_show_32bit_prop(buffer, offs, "caches_count",
|
||||
@@ -1139,7 +1139,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
|
||||
/* Discrete GPUs need their own topology device list
|
||||
* entries. Don't assign them to CPU/APU nodes.
|
||||
*/
|
||||
if (!gpu->device_info->needs_iommu_device &&
|
||||
if (!gpu->use_iommu_v2 &&
|
||||
dev->node_props.cpu_cores_count)
|
||||
continue;
|
||||
|
||||
@@ -1239,7 +1239,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||
void *crat_image = NULL;
|
||||
size_t image_size = 0;
|
||||
int proximity_domain;
|
||||
struct amdgpu_ras *ctx;
|
||||
struct amdgpu_device *adev;
|
||||
|
||||
INIT_LIST_HEAD(&temp_topology_device_list);
|
||||
|
||||
@@ -1388,7 +1388,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||
* Overwrite ATS capability according to needs_iommu_device to fix
|
||||
* potential missing corresponding bit in CRAT of BIOS.
|
||||
*/
|
||||
if (dev->gpu->device_info->needs_iommu_device)
|
||||
if (dev->gpu->use_iommu_v2)
|
||||
dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
|
||||
else
|
||||
dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
|
||||
@@ -1404,19 +1404,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||
dev->node_props.max_waves_per_simd = 10;
|
||||
}
|
||||
|
||||
ctx = amdgpu_ras_get_context((struct amdgpu_device *)(dev->gpu->kgd));
|
||||
if (ctx) {
|
||||
/* kfd only concerns sram ecc on GFX/SDMA and HBM ecc on UMC */
|
||||
dev->node_props.capability |=
|
||||
(((ctx->features & BIT(AMDGPU_RAS_BLOCK__SDMA)) != 0) ||
|
||||
((ctx->features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0)) ?
|
||||
HSA_CAP_SRAM_EDCSUPPORTED : 0;
|
||||
dev->node_props.capability |= ((ctx->features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
|
||||
HSA_CAP_MEM_EDCSUPPORTED : 0;
|
||||
adev = (struct amdgpu_device *)(dev->gpu->kgd);
|
||||
/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
|
||||
dev->node_props.capability |=
|
||||
((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
|
||||
HSA_CAP_SRAM_EDCSUPPORTED : 0;
|
||||
dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
|
||||
HSA_CAP_MEM_EDCSUPPORTED : 0;
|
||||
|
||||
dev->node_props.capability |= (ctx->features != 0) ?
|
||||
if (adev->asic_type != CHIP_VEGA10)
|
||||
dev->node_props.capability |= (adev->ras_features != 0) ?
|
||||
HSA_CAP_RASEVENTNOTIFY : 0;
|
||||
}
|
||||
|
||||
kfd_debug_print_topology();
|
||||
|
||||
@@ -1515,6 +1513,29 @@ int kfd_numa_node_to_apic_id(int numa_node_id)
|
||||
return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
|
||||
}
|
||||
|
||||
void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
|
||||
{
|
||||
struct kfd_topology_device *dev;
|
||||
|
||||
gpu->use_iommu_v2 = false;
|
||||
|
||||
if (!gpu->device_info->needs_iommu_device)
|
||||
return;
|
||||
|
||||
down_read(&topology_lock);
|
||||
|
||||
/* Only use IOMMUv2 if there is an APU topology node with no GPU
|
||||
* assigned yet. This GPU will be assigned to it.
|
||||
*/
|
||||
list_for_each_entry(dev, &topology_device_list, list)
|
||||
if (dev->node_props.cpu_cores_count &&
|
||||
dev->node_props.simd_count &&
|
||||
!dev->gpu)
|
||||
gpu->use_iommu_v2 = true;
|
||||
|
||||
up_read(&topology_lock);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
|
||||
int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
|
||||
|
Reference in New Issue
Block a user