Merge tag 'drm-next-2020-10-15' of git://anongit.freedesktop.org/drm/drm

Pull drm updates from Dave Airlie:
 "Not a major amount of change, the i915 trees got split into display
  and gt trees to better facilitate higher level review, and there's a
  major refactoring of i915 GEM locking to use more core kernel concepts
  (like ww-mutexes). msm gets per-process pagetables, older AMD SI cards
  get DC support, nouveau got a bump in displayport support with common
  code extraction from i915.

  Outside of drm this contains a couple of patches for hexint
  moduleparams which you've acked, and a virtio common code tree that
  you should also get via it's regular path.

  New driver:
   - Cadence MHDP8546 DisplayPort bridge driver

  core:
   - cross-driver scatterlist cleanups
   - devm_drm conversions
   - remove drm_dev_init
   - devm_drm_dev_alloc conversion

  ttm:
   - lots of refactoring and cleanups

  bridges:
   - chained bridge support in more drivers

  panel:
   - misc new panels

  scheduler:
   - cleanup priority levels

  displayport:
   - refactor i915 code into helpers for nouveau

  i915:
   - split into display and GT trees
   - WW locking refactoring in GEM
   - execbuf2 extension mechanism
   - syncobj timeline support
   - GEN 12 HOBL display powersaving
   - Rocket Lake display additions
   - Disable FBC on Tigerlake
   - Tigerlake Type-C + DP improvements
   - Hotplug interrupt refactoring

  amdgpu:
   - Sienna Cichlid updates
   - Navy Flounder updates
   - DCE6 (SI) support for DC
   - Plane rotation enabled
   - TMZ state info ioctl
   - PCIe DPC recovery support
   - DC interrupt handling refactor
   - OLED panel fixes

  amdkfd:
   - add SMI events for thermal throttling
   - SMI interface events ioctl update
   - process eviction counters

  radeon:
   - move to dma_ for allocations
   - expose sclk via sysfs

  msm:
   - DSI support for sm8150/sm8250
   - per-process GPU pagetable support
   - Displayport support

  mediatek:
   - move HDMI phy driver to PHY
   - convert mtk-dpi to bridge API
   - disable mt2701 tmds

  tegra:
   - bridge support

  exynos:
   - misc cleanups

  vc4:
   - dual display cleanups

  ast:
   - cleanups

  gma500:
   - conversion to GPIOd API

  hisilicon:
   - misc reworks

  ingenic:
   - clock handling and format improvements

  mcde:
   - DSI support

  mgag200:
   - desktop g200 support

  mxsfb:
   - i.MX7 + i.MX8M
   - alpha plane support

  panfrost:
   - devfreq support
   - amlogic SoC support

  ps8640:
   - EDID from eDP retrieval

  tidss:
   - AM65xx YUV workaround

  virtio:
   - virtio-gpu exported resources

  rcar-du:
   - R8A7742, R8A774E1 and R8A77961 support
   - YUV planar format fixes
   - non-visible plane handling
   - VSP device reference count fix
   - Kconfig fix to avoid displaying disabled options in .config"

* tag 'drm-next-2020-10-15' of git://anongit.freedesktop.org/drm/drm: (1494 commits)
  drm/ingenic: Fix bad revert
  drm/amdgpu: Fix invalid number of character '{' in amdgpu_acpi_init
  drm/amdgpu: Remove warning for virtual_display
  drm/amdgpu: kfd_initialized can be static
  drm/amd/pm: setup APU dpm clock table in SMU HW initialization
  drm/amdgpu: prevent spurious warning
  drm/amdgpu/swsmu: fix ARC build errors
  drm/amd/display: Fix OPTC_DATA_FORMAT programming
  drm/amd/display: Don't allow pstate if no support in blank
  drm/panfrost: increase readl_relaxed_poll_timeout values
  MAINTAINERS: Update entry for st7703 driver after the rename
  Revert "gpu/drm: ingenic: Add option to mmap GEM buffers cached"
  drm/amd/display: HDMI remote sink need mode validation for Linux
  drm/amd/display: Change to correct unit on audio rate
  drm/amd/display: Avoid set zero in the requested clk
  drm/amdgpu: align frag_end to covered address space
  drm/amdgpu: fix NULL pointer dereference for Renoir
  drm/vmwgfx: fix regression in thp code due to ttm init refactor.
  drm/amdgpu/swsmu: add interrupt work handler for smu11 parts
  drm/amdgpu/swsmu: add interrupt work function
  ...
This commit is contained in:
Linus Torvalds
2020-10-15 10:46:16 -07:00
1295 changed files with 65076 additions and 20247 deletions

View File

@@ -911,7 +911,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x705d0000, 0x807c817c,
0x8070ff70, 0x00000080,
0xbf0a7b7c, 0xbf85fff8,
0xbf82014f, 0xbef4037e,
0xbf820151, 0xbef4037e,
0x8775ff7f, 0x0000ffff,
0x8875ff75, 0x00040000,
0xbef60380, 0xbef703ff,
@@ -1024,61 +1024,62 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbe883108, 0xbe8a310a,
0xbe8c310c, 0xbe8e310e,
0xbf06807c, 0xbf84fff0,
0xb9782a05, 0x80788178,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
0x8f788a78, 0xb96e1e06,
0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
0xbef603ff, 0x01000000,
0xf4211bfa, 0xf0000000,
0x80788478, 0xf4211b3a,
0xba80f801, 0x00000000,
0xbf8a0000, 0xb9782a05,
0x80788178, 0xbf0d9972,
0xbf850002, 0x8f788978,
0xbf820001, 0x8f788a78,
0xb96e1e06, 0x8f6e8a6e,
0x80786e78, 0x8078ff78,
0x00000200, 0xbef603ff,
0x01000000, 0xf4211bfa,
0xf0000000, 0x80788478,
0xf4211b7a, 0xf0000000,
0x80788478, 0xf4211c3a,
0xf4211b3a, 0xf0000000,
0x80788478, 0xf4211b7a,
0xf0000000, 0x80788478,
0xf4211c7a, 0xf0000000,
0x80788478, 0xf4211eba,
0xf4211c3a, 0xf0000000,
0x80788478, 0xf4211c7a,
0xf0000000, 0x80788478,
0xf4211efa, 0xf0000000,
0x80788478, 0xf4211e7a,
0xf4211eba, 0xf0000000,
0x80788478, 0xf4211efa,
0xf0000000, 0x80788478,
0xf4211cfa, 0xf0000000,
0x80788478, 0xf4211bba,
0xf4211e7a, 0xf0000000,
0x80788478, 0xf4211cfa,
0xf0000000, 0x80788478,
0xbf8cc07f, 0xb9eef814,
0xf4211bba, 0xf0000000,
0x80788478, 0xbf8cc07f,
0xb9eef815, 0xbefc036f,
0xbefe0370, 0xbeff0371,
0x876f7bff, 0x000003ff,
0xb9ef4803, 0xb9f9f816,
0x876f7bff, 0xfffff800,
0x906f8b6f, 0xb9efa2c3,
0xb9f3f801, 0xb96e2a05,
0x806e816e, 0xbf0d9972,
0xbf850002, 0x8f6e896e,
0xbf820001, 0x8f6e8a6e,
0x806eff6e, 0x00000200,
0x806e746e, 0x826f8075,
0x876fff6f, 0x0000ffff,
0xf4091c37, 0xfa000050,
0xf4091d37, 0xfa000060,
0xf4011e77, 0xfa000074,
0xbf8cc07f, 0x876fff6d,
0xfc000000, 0x906f9a6f,
0x8f6f906f, 0xbeee0380,
0xb9eef814, 0xf4211bba,
0xf0000000, 0x80788478,
0xbf8cc07f, 0xb9eef815,
0xbefc036f, 0xbefe0370,
0xbeff0371, 0x876f7bff,
0x000003ff, 0xb9ef4803,
0xb9f9f816, 0x876f7bff,
0xfffff800, 0x906f8b6f,
0xb9efa2c3, 0xb9f3f801,
0xb96e2a05, 0x806e816e,
0xbf0d9972, 0xbf850002,
0x8f6e896e, 0xbf820001,
0x8f6e8a6e, 0x806eff6e,
0x00000200, 0x806e746e,
0x826f8075, 0x876fff6f,
0x0000ffff, 0xf4091c37,
0xfa000050, 0xf4091d37,
0xfa000060, 0xf4011e77,
0xfa000074, 0xbf8cc07f,
0x876fff6d, 0xfc000000,
0x906f9a6f, 0x8f6f906f,
0xbeee0380, 0x886e6f6e,
0x876fff6d, 0x02000000,
0x906f996f, 0x8f6f8f6f,
0x886e6f6e, 0x876fff6d,
0x02000000, 0x906f996f,
0x8f6f8f6f, 0x886e6f6e,
0x876fff6d, 0x01000000,
0x906f986f, 0x8f6f996f,
0x886e6f6e, 0x876fff7a,
0x00800000, 0x906f976f,
0xb9eef807, 0x876dff6d,
0x0000ffff, 0x87fe7e7e,
0x87ea6a6a, 0xb9faf802,
0xbf8a0000, 0xbe80226c,
0x01000000, 0x906f986f,
0x8f6f996f, 0x886e6f6e,
0x876fff7a, 0x00800000,
0x906f976f, 0xb9eef807,
0x876dff6d, 0x0000ffff,
0x87fe7e7e, 0x87ea6a6a,
0xb9faf802, 0xbe80226c,
0xbf810000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
@@ -1807,7 +1808,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x705d0000, 0x807c817c,
0x8070ff70, 0x00000080,
0xbf0a7b7c, 0xbf85fff8,
0xbf82013a, 0xbef4037e,
0xbf82013c, 0xbef4037e,
0x8775ff7f, 0x0000ffff,
0x8875ff75, 0x00040000,
0xbef60380, 0xbef703ff,
@@ -1920,50 +1921,51 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbe883108, 0xbe8a310a,
0xbe8c310c, 0xbe8e310e,
0xbf06807c, 0xbf84fff0,
0xb9782a05, 0x80788178,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
0x8f788a78, 0xb96e1e06,
0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
0xbef603ff, 0x01000000,
0xf4211bfa, 0xf0000000,
0x80788478, 0xf4211b3a,
0xba80f801, 0x00000000,
0xbf8a0000, 0xb9782a05,
0x80788178, 0xbf0d9972,
0xbf850002, 0x8f788978,
0xbf820001, 0x8f788a78,
0xb96e1e06, 0x8f6e8a6e,
0x80786e78, 0x8078ff78,
0x00000200, 0xbef603ff,
0x01000000, 0xf4211bfa,
0xf0000000, 0x80788478,
0xf4211b7a, 0xf0000000,
0x80788478, 0xf4211c3a,
0xf4211b3a, 0xf0000000,
0x80788478, 0xf4211b7a,
0xf0000000, 0x80788478,
0xf4211c7a, 0xf0000000,
0x80788478, 0xf4211eba,
0xf4211c3a, 0xf0000000,
0x80788478, 0xf4211c7a,
0xf0000000, 0x80788478,
0xf4211efa, 0xf0000000,
0x80788478, 0xf4211e7a,
0xf4211eba, 0xf0000000,
0x80788478, 0xf4211efa,
0xf0000000, 0x80788478,
0xf4211cfa, 0xf0000000,
0x80788478, 0xf4211bba,
0xf4211e7a, 0xf0000000,
0x80788478, 0xf4211cfa,
0xf0000000, 0x80788478,
0xbf8cc07f, 0xb9eef814,
0xf4211bba, 0xf0000000,
0x80788478, 0xbf8cc07f,
0xb9eef815, 0xbefc036f,
0xbefe0370, 0xbeff0371,
0x876f7bff, 0x000003ff,
0xb9ef4803, 0x876f7bff,
0xfffff800, 0x906f8b6f,
0xb9efa2c3, 0xb9f3f801,
0xb96e2a05, 0x806e816e,
0xbf0d9972, 0xbf850002,
0x8f6e896e, 0xbf820001,
0x8f6e8a6e, 0x806eff6e,
0x00000200, 0x806e746e,
0x826f8075, 0x876fff6f,
0x0000ffff, 0xf4091c37,
0xfa000050, 0xf4091d37,
0xfa000060, 0xf4011e77,
0xfa000074, 0xbf8cc07f,
0x876dff6d, 0x0000ffff,
0x87fe7e7e, 0x87ea6a6a,
0xb9faf802, 0xbf8a0000,
0xb9eef814, 0xf4211bba,
0xf0000000, 0x80788478,
0xbf8cc07f, 0xb9eef815,
0xbefc036f, 0xbefe0370,
0xbeff0371, 0x876f7bff,
0x000003ff, 0xb9ef4803,
0x876f7bff, 0xfffff800,
0x906f8b6f, 0xb9efa2c3,
0xb9f3f801, 0xb96e2a05,
0x806e816e, 0xbf0d9972,
0xbf850002, 0x8f6e896e,
0xbf820001, 0x8f6e8a6e,
0x806eff6e, 0x00000200,
0x806e746e, 0x826f8075,
0x876fff6f, 0x0000ffff,
0xf4091c37, 0xfa000050,
0xf4091d37, 0xfa000060,
0xf4011e77, 0xfa000074,
0xbf8cc07f, 0x876dff6d,
0x0000ffff, 0x87fe7e7e,
0x87ea6a6a, 0xb9faf802,
0xbe80226c, 0xbf810000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,

View File

@@ -894,6 +894,11 @@ L_RESTORE_SGPR:
s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0
s_cbranch_scc0 L_RESTORE_SGPR_LOOP
// s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception.
// Clear DEBUG_EN before and restore MODE after the barrier.
s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0
s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG
/* restore HW registers */
L_RESTORE_HWREG:
// HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
@@ -976,8 +981,6 @@ L_RESTORE_HWREG:
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG
s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
L_END_PGM:

View File

@@ -97,6 +97,7 @@ void kfd_chardev_exit(void)
device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
class_destroy(kfd_class);
unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
kfd_device = NULL;
}
struct device *kfd_chardev(void)
@@ -1254,7 +1255,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
return true;
}
if (dev->device_info->needs_iommu_device)
if (dev->use_iommu_v2)
return false;
amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
@@ -1290,18 +1291,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
return -EINVAL;
}
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
if (args->size != kfd_doorbell_process_slice(dev))
return -EINVAL;
offset = kfd_get_process_doorbells(dev, p);
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
if (args->size != PAGE_SIZE)
return -EINVAL;
offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
if (!offset)
return -ENOMEM;
}
mutex_lock(&p->mutex);
pdd = kfd_bind_process_to_device(dev, p);
@@ -1310,6 +1299,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
goto err_unlock;
}
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
if (args->size != kfd_doorbell_process_slice(dev)) {
err = -EINVAL;
goto err_unlock;
}
offset = kfd_get_process_doorbells(pdd);
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
if (args->size != PAGE_SIZE) {
err = -EINVAL;
goto err_unlock;
}
offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
if (!offset) {
err = -ENOMEM;
goto err_unlock;
}
}
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
dev->kgd, args->va_addr, args->size,
pdd->vm, (struct kgd_mem **) &mem, &offset,

View File

@@ -742,6 +742,22 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
return 0;
}
static bool kfd_ignore_crat(void)
{
bool ret;
if (ignore_crat)
return true;
#ifndef KFD_SUPPORT_IOMMU_V2
ret = true;
#else
ret = false;
#endif
return ret;
}
/*
* kfd_create_crat_image_acpi - Allocates memory for CRAT image and
* copies CRAT from ACPI (if available).
@@ -776,12 +792,13 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
return -EINVAL;
}
if (ignore_crat) {
if (kfd_ignore_crat()) {
pr_info("CRAT table disabled by module option\n");
return -ENODATA;
}
pcrat_image = kmemdup(crat_table, crat_table->length, GFP_KERNEL);
pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL);
memcpy(pcrat_image, crat_table, crat_table->length);
if (!pcrat_image)
return -ENOMEM;
@@ -793,11 +810,10 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
/* Memory required to create Virtual CRAT.
* Since there is no easy way to predict the amount of memory required, the
* following amount are allocated for CPU and GPU Virtual CRAT. This is
* following amount is allocated for GPU Virtual CRAT. This is
* expected to cover all known conditions. But to be safe additional check
* is put in the code to ensure we don't overwrite.
*/
#define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE)
#define VCRAT_SIZE_FOR_GPU (4 * PAGE_SIZE)
/* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
@@ -948,7 +964,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
#endif
int ret = 0;
if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU)
if (!pcrat_image)
return -EINVAL;
/* Fill in CRAT Header.
@@ -1348,30 +1364,37 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
uint32_t proximity_domain)
{
void *pcrat_image = NULL;
int ret = 0;
int ret = 0, num_nodes;
size_t dyn_size;
if (!crat_image)
return -EINVAL;
*crat_image = NULL;
/* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and
* VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover
* all the current conditions. A check is put not to overwrite beyond
* allocated size
/* Allocate the CPU Virtual CRAT size based on the number of online
* nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image.
* This should cover all the current conditions. A check is put not
* to overwrite beyond allocated size for GPUs
*/
switch (flags) {
case COMPUTE_UNIT_CPU:
pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL);
num_nodes = num_online_nodes();
dyn_size = sizeof(struct crat_header) +
num_nodes * (sizeof(struct crat_subtype_computeunit) +
sizeof(struct crat_subtype_memory) +
(num_nodes - 1) * sizeof(struct crat_subtype_iolink));
pcrat_image = kvmalloc(dyn_size, GFP_KERNEL);
if (!pcrat_image)
return -ENOMEM;
*size = VCRAT_SIZE_FOR_CPU;
*size = dyn_size;
pr_debug("CRAT size is %ld", dyn_size);
ret = kfd_create_vcrat_image_cpu(pcrat_image, size);
break;
case COMPUTE_UNIT_GPU:
if (!kdev)
return -EINVAL;
pcrat_image = kmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL);
pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL);
if (!pcrat_image)
return -ENOMEM;
*size = VCRAT_SIZE_FOR_GPU;
@@ -1390,7 +1413,7 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
if (!ret)
*crat_image = pcrat_image;
else
kfree(pcrat_image);
kvfree(pcrat_image);
return ret;
}

View File

@@ -29,6 +29,7 @@
#include "cwsr_trap_handler.h"
#include "kfd_iommu.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
#define MQD_SIZE_ALIGNED 768
@@ -115,6 +116,7 @@ static const struct kfd_device_info carrizo_device_info = {
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
#endif
static const struct kfd_device_info raven_device_info = {
.asic_family = CHIP_RAVEN,
@@ -133,7 +135,6 @@ static const struct kfd_device_info raven_device_info = {
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
#endif
static const struct kfd_device_info hawaii_device_info = {
.asic_family = CHIP_HAWAII,
@@ -502,8 +503,8 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = {
#ifdef KFD_SUPPORT_IOMMU_V2
[CHIP_KAVERI] = {&kaveri_device_info, NULL},
[CHIP_CARRIZO] = {&carrizo_device_info, NULL},
[CHIP_RAVEN] = {&raven_device_info, NULL},
#endif
[CHIP_RAVEN] = {&raven_device_info, NULL},
[CHIP_HAWAII] = {&hawaii_device_info, NULL},
[CHIP_TONGA] = {&tonga_device_info, NULL},
[CHIP_FIJI] = {&fiji_device_info, &fiji_vf_device_info},
@@ -582,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
atomic_set(&kfd->sram_ecc_flag, 0);
ida_init(&kfd->doorbell_ida);
return kfd;
}
@@ -711,11 +714,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_doorbell_error;
}
if (kfd->kfd2kgd->get_hive_id)
kfd->hive_id = kfd->kfd2kgd->get_hive_id(kfd->kgd);
kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd);
if (kfd->kfd2kgd->get_unique_id)
kfd->unique_id = kfd->kfd2kgd->get_unique_id(kfd->kgd);
kfd->unique_id = amdgpu_amdkfd_get_unique_id(kfd->kgd);
kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd);
if (kfd_interrupt_init(kfd)) {
dev_err(kfd_device, "Error initializing interrupts\n");
@@ -737,6 +740,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto gws_error;
}
/* If CRAT is broken, won't set iommu enabled */
kfd_double_confirm_iommu_support(kfd);
if (kfd_iommu_device_init(kfd)) {
dev_err(kfd_device, "Error initializing iommuv2\n");
goto device_iommu_error;
@@ -796,6 +802,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);
kfd_doorbell_fini(kfd);
ida_destroy(&kfd->doorbell_ida);
kfd_gtt_sa_fini(kfd);
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
if (kfd->gws)
@@ -810,6 +817,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
if (!kfd->init_complete)
return 0;
kfd_smi_event_update_gpu_reset(kfd, false);
kfd->dqm->ops.pre_reset(kfd->dqm);
kgd2kfd_suspend(kfd, false);
@@ -838,6 +847,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
atomic_set(&kfd->sram_ecc_flag, 0);
kfd_smi_event_update_gpu_reset(kfd, true);
return 0;
}
@@ -1245,6 +1256,12 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
WARN_ONCE(count < 0, "Compute profile ref. count error");
}
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
{
if (kfd)
kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
}
#if defined(CONFIG_DEBUG_FS)
/* This function will send a package to HIQ to hang the HWS

View File

@@ -153,30 +153,6 @@ static void decrement_queue_count(struct device_queue_manager *dqm,
dqm->active_cp_queue_count--;
}
int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val)
{
int ret;
uint64_t tmp = 0;
if (!val)
return -EINVAL;
/*
* SDMA activity counter is stored at queue's RPTR + 0x8 location.
*/
if (!access_ok((const void __user *)(q_rptr +
sizeof(uint64_t)), sizeof(uint64_t))) {
pr_err("Can't access sdma queue activity counter\n");
return -EFAULT;
}
ret = get_user(tmp, (uint64_t *)(q_rptr + sizeof(uint64_t)));
if (!ret) {
*val = tmp;
}
return ret;
}
static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
{
struct kfd_dev *dev = qpd->dqm->dev;
@@ -215,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
}
q->properties.doorbell_off =
kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
q->doorbell_id);
return 0;
}
@@ -552,7 +527,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
&sdma_val);
if (retval)
pr_err("Failed to read SDMA queue counter for queue: %d\n",
@@ -674,9 +649,10 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
goto out;
pdd = qpd_to_pdd(qpd);
pr_info_ratelimited("Evicting PASID 0x%x queues\n",
pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
pdd->process->pasid);
pdd->last_evict_timestamp = get_jiffies_64();
/* Mark all queues as evicted. Deactivate all active queues on
* the qpd.
*/
@@ -724,7 +700,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
goto out;
pdd = qpd_to_pdd(qpd);
pr_info_ratelimited("Evicting PASID 0x%x queues\n",
pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
pdd->process->pasid);
/* Mark all queues as evicted. Deactivate all active queues on
@@ -738,6 +714,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
q->properties.is_active = false;
decrement_queue_count(dqm, q->properties.type);
}
pdd->last_evict_timestamp = get_jiffies_64();
retval = execute_queues_cpsch(dqm,
qpd->is_debug ?
KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
@@ -756,6 +733,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
struct mqd_manager *mqd_mgr;
struct kfd_process_device *pdd;
uint64_t pd_base;
uint64_t eviction_duration;
int retval, ret = 0;
pdd = qpd_to_pdd(qpd);
@@ -770,7 +748,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
goto out;
}
pr_info_ratelimited("Restoring PASID 0x%x queues\n",
pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
pdd->process->pasid);
/* Update PD Base in QPD */
@@ -823,6 +801,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
ret = retval;
}
qpd->evicted = 0;
eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
atomic64_add(eviction_duration, &pdd->evict_duration_counter);
out:
if (mm)
mmput(mm);
@@ -836,6 +816,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
struct queue *q;
struct kfd_process_device *pdd;
uint64_t pd_base;
uint64_t eviction_duration;
int retval = 0;
pdd = qpd_to_pdd(qpd);
@@ -850,7 +831,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
goto out;
}
pr_info_ratelimited("Restoring PASID 0x%x queues\n",
pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
pdd->process->pasid);
/* Update PD Base in QPD */
@@ -869,6 +850,8 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
qpd->evicted = 0;
eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
atomic64_add(eviction_duration, &pdd->evict_duration_counter);
out:
dqm_unlock(dqm);
return retval;
@@ -1475,7 +1458,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
&sdma_val);
if (retval)
pr_err("Failed to read SDMA queue counter for queue: %d\n",
@@ -1989,6 +1972,7 @@ int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid)
if (!p)
return -EINVAL;
WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
pdd = kfd_get_process_device_data(dqm->dev, p);
if (pdd)
ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);

View File

@@ -251,5 +251,11 @@ static inline void dqm_unlock(struct device_queue_manager *dqm)
mutex_unlock(&dqm->lock_hidden);
}
int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val);
static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val)
{
/*
* SDMA activity counter is stored at queue's RPTR + 0x8 location.
*/
return get_user(*val, q_rptr + 1);
}
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */

View File

@@ -61,8 +61,8 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
if (amdgpu_noretry &&
!dqm->dev->device_info->needs_iommu_device)
if (dqm->dev->noretry &&
!dqm->dev->use_iommu_v2)
qpd->sh_mem_config |=
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;

View File

@@ -31,9 +31,6 @@
* kernel queues using the first doorbell page reserved for the kernel.
*/
static DEFINE_IDA(doorbell_ida);
static unsigned int max_doorbell_slices;
/*
* Each device exposes a doorbell aperture, a PCI MMIO aperture that
* receives 32-bit writes that are passed to queues as wptr values.
@@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
else
return -ENOSPC;
if (!max_doorbell_slices ||
doorbell_process_limit < max_doorbell_slices)
max_doorbell_slices = doorbell_process_limit;
if (!kfd->max_doorbell_slices ||
doorbell_process_limit < kfd->max_doorbell_slices)
kfd->max_doorbell_slices = doorbell_process_limit;
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
doorbell_start_offset;
@@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma)
{
phys_addr_t address;
struct kfd_process_device *pdd;
/*
* For simplicitly we only allow mapping of the entire doorbell
@@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
return -EINVAL;
/* Calculate physical address of doorbell */
address = kfd_get_process_doorbells(dev, process);
pdd = kfd_get_process_device_data(dev, process);
if (!pdd)
return -EINVAL;
/* Calculate physical address of doorbell */
address = kfd_get_process_doorbells(pdd);
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
VM_DONTDUMP | VM_PFNMAP;
@@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
}
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
struct kfd_process *process,
struct kfd_process_device *pdd,
unsigned int doorbell_id)
{
/*
@@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
* units regardless of the ASIC-dependent doorbell size.
*/
return kfd->doorbell_base_dw_offset +
process->doorbell_index
pdd->doorbell_index
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
}
@@ -251,25 +252,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
}
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process)
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
{
return dev->doorbell_base +
process->doorbell_index * kfd_doorbell_process_slice(dev);
return pdd->dev->doorbell_base +
pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
}
int kfd_alloc_process_doorbells(struct kfd_process *process)
int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
{
int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices,
int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
GFP_KERNEL);
if (r > 0)
process->doorbell_index = r;
*doorbell_index = r;
return r;
}
void kfd_free_process_doorbells(struct kfd_process *process)
void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
{
if (process->doorbell_index)
ida_simple_remove(&doorbell_ida, process->doorbell_index);
if (doorbell_index)
ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
}

View File

@@ -321,7 +321,7 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
pdd->lds_base = MAKE_LDS_APP_BASE_VI();
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
if (!pdd->dev->device_info->needs_iommu_device) {
if (!pdd->dev->use_iommu_v2) {
/* dGPUs: SVM aperture starting at 0
* with small reserved space for kernel.
* Set them to CANONICAL addresses.
@@ -425,7 +425,7 @@ int kfd_init_apertures(struct kfd_process *process)
return -EINVAL;
}
if (!dev->device_info->needs_iommu_device) {
if (!dev->use_iommu_v2) {
/* dGPUs: the reserved space for kernel
* before SVM
*/

View File

@@ -41,7 +41,7 @@ int kfd_iommu_check_device(struct kfd_dev *kfd)
struct amd_iommu_device_info iommu_info;
int err;
if (!kfd->device_info->needs_iommu_device)
if (!kfd->use_iommu_v2)
return -ENODEV;
iommu_info.flags = 0;
@@ -63,7 +63,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
unsigned int pasid_limit;
int err;
if (!kfd->device_info->needs_iommu_device)
if (!kfd->use_iommu_v2)
return 0;
iommu_info.flags = 0;
@@ -109,7 +109,7 @@ int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
struct kfd_process *p = pdd->process;
int err;
if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND)
if (!dev->use_iommu_v2 || pdd->bound == PDD_BOUND)
return 0;
if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
@@ -284,7 +284,7 @@ static void kfd_unbind_processes_from_device(struct kfd_dev *kfd)
*/
void kfd_iommu_suspend(struct kfd_dev *kfd)
{
if (!kfd->device_info->needs_iommu_device)
if (!kfd->use_iommu_v2)
return;
kfd_unbind_processes_from_device(kfd);
@@ -304,7 +304,7 @@ int kfd_iommu_resume(struct kfd_dev *kfd)
unsigned int pasid_limit;
int err;
if (!kfd->device_info->needs_iommu_device)
if (!kfd->use_iommu_v2)
return 0;
pasid_limit = kfd_get_pasid_limit();

View File

@@ -70,6 +70,7 @@ err_create_wq:
err_topology:
kfd_chardev_exit();
err_ioctl:
pr_err("KFD is disabled due to module initialization failure\n");
return err;
}

View File

@@ -297,6 +297,9 @@ struct kfd_dev {
bool pci_atomic_requested;
/* Use IOMMU v2 flag */
bool use_iommu_v2;
/* SRAM ECC flag */
atomic_t sram_ecc_flag;
@@ -309,6 +312,13 @@ struct kfd_dev {
/* Clients watching SMI events */
struct list_head smi_clients;
spinlock_t smi_lock;
uint32_t reset_seq_num;
struct ida doorbell_ida;
unsigned int max_doorbell_slices;
int noretry;
};
enum kfd_mempool {
@@ -626,7 +636,7 @@ enum kfd_pdd_bound {
PDD_BOUND_SUSPENDED,
};
#define MAX_SYSFS_FILENAME_LEN 11
#define MAX_SYSFS_FILENAME_LEN 15
/*
* SDMA counter runs at 100MHz frequency.
@@ -687,6 +697,39 @@ struct kfd_process_device {
uint64_t sdma_past_activity_counter;
struct attribute attr_sdma;
char sdma_filename[MAX_SYSFS_FILENAME_LEN];
/* Eviction activity tracking */
uint64_t last_evict_timestamp;
atomic64_t evict_duration_counter;
struct attribute attr_evict;
struct kobject *kobj_stats;
unsigned int doorbell_index;
/*
* @cu_occupancy: Reports occupancy of Compute Units (CU) of a process
* that is associated with device encoded by "this" struct instance. The
* value reflects CU usage by all of the waves launched by this process
* on this device. A very important property of occupancy parameter is
* that its value is a snapshot of current use.
*
* Following is to be noted regarding how this parameter is reported:
*
* The number of waves that a CU can launch is limited by couple of
* parameters. These are encoded by struct amdgpu_cu_info instance
* that is part of every device definition. For GFX9 devices this
* translates to 40 waves (simd_per_cu * max_waves_per_simd) when waves
* do not use scratch memory and 32 waves (max_scratch_slots_per_cu)
* when they do use scratch memory. This could change for future
* devices and therefore this example should be considered as a guide.
*
* All CU's of a device are available for the process. This may not be true
* under certain conditions - e.g. CU masking.
*
* Finally number of CU's that are occupied by a process is affected by both
* number of CU's a device has along with number of other competing processes
*/
struct attribute attr_cu_occupancy;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -724,7 +767,6 @@ struct kfd_process {
struct mmu_notifier mmu_notifier;
u32 pasid;
unsigned int doorbell_index;
/*
* List of kfd_process_device structures,
@@ -857,13 +899,13 @@ u32 read_kernel_doorbell(u32 __iomem *db);
void write_kernel_doorbell(void __iomem *db, u32 value);
void write_kernel_doorbell64(void __iomem *db, u64 value);
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
struct kfd_process *process,
struct kfd_process_device *pdd,
unsigned int doorbell_id);
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process);
int kfd_alloc_process_doorbells(struct kfd_process *process);
void kfd_free_process_doorbells(struct kfd_process *process);
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
unsigned int *doorbell_index);
void kfd_free_process_doorbells(struct kfd_dev *kfd,
unsigned int doorbell_index);
/* GTT Sub-Allocator */
int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
@@ -892,6 +934,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
int kfd_numa_node_to_apic_id(int numa_node_id);
void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
/* Interrupts */
int kfd_interrupt_init(struct kfd_dev *dev);

View File

@@ -87,7 +87,7 @@ struct kfd_sdma_activity_handler_workarea {
};
struct temp_sdma_queue_list {
uint64_t rptr;
uint64_t __user *rptr;
uint64_t sdma_val;
unsigned int queue_id;
struct list_head list;
@@ -159,7 +159,7 @@ static void kfd_sdma_activity_worker(struct work_struct *work)
}
INIT_LIST_HEAD(&sdma_q->list);
sdma_q->rptr = (uint64_t)q->properties.read_ptr;
sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
sdma_q->queue_id = q->properties.queue_id;
list_add_tail(&sdma_q->list, &sdma_q_list.list);
}
@@ -218,7 +218,7 @@ static void kfd_sdma_activity_worker(struct work_struct *work)
continue;
list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
if (((uint64_t)q->properties.read_ptr == sdma_q->rptr) &&
if (((uint64_t __user *)q->properties.read_ptr == sdma_q->rptr) &&
(sdma_q->queue_id == q->properties.queue_id)) {
list_del(&sdma_q->list);
kfree(sdma_q);
@@ -249,6 +249,52 @@ cleanup:
}
}
/**
* @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device
* by current process. Translates acquired wave count into number of compute units
* that are occupied.
*
* @atr: Handle of attribute that allows reporting of wave count. The attribute
* handle encapsulates GPU device it is associated with, thereby allowing collection
* of waves in flight, etc
*
* @buffer: Handle of user provided buffer updated with wave count
*
* Return: Number of bytes written to user buffer or an error value
*/
static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
{
int cu_cnt;
int wave_cnt;
int max_waves_per_cu;
struct kfd_dev *dev = NULL;
struct kfd_process *proc = NULL;
struct kfd_process_device *pdd = NULL;
pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
dev = pdd->dev;
if (dev->kfd2kgd->get_cu_occupancy == NULL)
return -EINVAL;
cu_cnt = 0;
proc = pdd->process;
if (pdd->qpd.queue_count == 0) {
pr_debug("Gpu-Id: %d has no active queues for process %d\n",
dev->id, proc->pasid);
return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
}
/* Collect wave count from device if it supports */
wave_cnt = 0;
max_waves_per_cu = 0;
dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt,
&max_waves_per_cu);
/* Translate wave count to number of compute units */
cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
}
static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
@@ -270,6 +316,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
kfd_sdma_activity_worker);
sdma_activity_work_handler.pdd = pdd;
sdma_activity_work_handler.sdma_activity_counter = 0;
schedule_work(&sdma_activity_work_handler.sdma_activity_work);
@@ -344,6 +391,32 @@ static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
return 0;
}
static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
struct attribute *attr, char *buffer)
{
if (strcmp(attr->name, "evicted_ms") == 0) {
struct kfd_process_device *pdd = container_of(attr,
struct kfd_process_device,
attr_evict);
uint64_t evict_jiffies;
evict_jiffies = atomic64_read(&pdd->evict_duration_counter);
return snprintf(buffer,
PAGE_SIZE,
"%llu\n",
jiffies64_to_msecs(evict_jiffies));
/* Sysfs handle that gets CU occupancy is per device */
} else if (strcmp(attr->name, "cu_occupancy") == 0) {
return kfd_get_cu_occupancy(attr, buffer);
} else {
pr_err("Invalid attribute");
}
return 0;
}
static struct attribute attr_queue_size = {
.name = "size",
.mode = KFD_SYSFS_FILE_MODE
@@ -375,6 +448,19 @@ static struct kobj_type procfs_queue_type = {
.default_attrs = procfs_queue_attrs,
};
static const struct sysfs_ops procfs_stats_ops = {
.show = kfd_procfs_stats_show,
};
static struct attribute *procfs_stats_attrs[] = {
NULL
};
static struct kobj_type procfs_stats_type = {
.sysfs_ops = &procfs_stats_ops,
.default_attrs = procfs_stats_attrs,
};
int kfd_procfs_add_queue(struct queue *q)
{
struct kfd_process *proc;
@@ -416,6 +502,72 @@ static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
return ret;
}
static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
{
int ret = 0;
struct kfd_process_device *pdd;
char stats_dir_filename[MAX_SYSFS_FILENAME_LEN];
if (!p)
return -EINVAL;
if (!p->kobj)
return -EFAULT;
/*
* Create sysfs files for each GPU:
* - proc/<pid>/stats_<gpuid>/
* - proc/<pid>/stats_<gpuid>/evicted_ms
* - proc/<pid>/stats_<gpuid>/cu_occupancy
*/
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
struct kobject *kobj_stats;
snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN,
"stats_%u", pdd->dev->id);
kobj_stats = kfd_alloc_struct(kobj_stats);
if (!kobj_stats)
return -ENOMEM;
ret = kobject_init_and_add(kobj_stats,
&procfs_stats_type,
p->kobj,
stats_dir_filename);
if (ret) {
pr_warn("Creating KFD proc/stats_%s folder failed",
stats_dir_filename);
kobject_put(kobj_stats);
goto err;
}
pdd->kobj_stats = kobj_stats;
pdd->attr_evict.name = "evicted_ms";
pdd->attr_evict.mode = KFD_SYSFS_FILE_MODE;
sysfs_attr_init(&pdd->attr_evict);
ret = sysfs_create_file(kobj_stats, &pdd->attr_evict);
if (ret)
pr_warn("Creating eviction stats for gpuid %d failed",
(int)pdd->dev->id);
/* Add sysfs file to report compute unit occupancy */
if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) {
pdd->attr_cu_occupancy.name = "cu_occupancy";
pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE;
sysfs_attr_init(&pdd->attr_cu_occupancy);
ret = sysfs_create_file(kobj_stats,
&pdd->attr_cu_occupancy);
if (ret)
pr_warn("Creating %s failed for gpuid: %d",
pdd->attr_cu_occupancy.name,
(int)pdd->dev->id);
}
}
err:
return ret;
}
static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
{
int ret = 0;
@@ -451,7 +603,6 @@ static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
return ret;
}
void kfd_procfs_del_queue(struct queue *q)
{
if (!q)
@@ -659,6 +810,11 @@ struct kfd_process *kfd_create_process(struct file *filep)
if (!process->kobj_queues)
pr_warn("Creating KFD proc/queues folder failed");
ret = kfd_procfs_add_sysfs_stats(process);
if (ret)
pr_warn("Creating sysfs stats dir for pid %d failed",
(int)process->lead_thread->pid);
ret = kfd_procfs_add_sysfs_files(process);
if (ret)
pr_warn("Creating sysfs usage file for pid %d failed",
@@ -780,6 +936,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
kfree(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr);
kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
/*
* before destroying pdd, make sure to report availability
* for auto suspend
@@ -815,6 +973,12 @@ static void kfd_process_wq_release(struct work_struct *work)
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
sysfs_remove_file(p->kobj, &pdd->attr_vram);
sysfs_remove_file(p->kobj, &pdd->attr_sdma);
sysfs_remove_file(p->kobj, &pdd->attr_evict);
if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL)
sysfs_remove_file(p->kobj, &pdd->attr_cu_occupancy);
kobject_del(pdd->kobj_stats);
kobject_put(pdd->kobj_stats);
pdd->kobj_stats = NULL;
}
kobject_del(p->kobj);
@@ -832,8 +996,6 @@ static void kfd_process_wq_release(struct work_struct *work)
kfd_event_free_process(p);
kfd_pasid_free(p->pasid);
kfd_free_process_doorbells(p);
mutex_destroy(&p->mutex);
put_task_struct(p->lead_thread);
@@ -1011,9 +1173,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (process->pasid == 0)
goto err_alloc_pasid;
if (kfd_alloc_process_doorbells(process) < 0)
goto err_alloc_doorbells;
err = pqm_init(&process->pqm, process);
if (err != 0)
goto err_process_pqm_init;
@@ -1041,8 +1200,6 @@ err_register_notifier:
err_init_apertures:
pqm_uninit(&process->pqm);
err_process_pqm_init:
kfd_free_process_doorbells(process);
err_alloc_doorbells:
kfd_pasid_free(process->pasid);
err_alloc_pasid:
mutex_destroy(&process->mutex);
@@ -1105,10 +1262,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
if (!pdd)
return NULL;
if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
pr_err("Failed to alloc doorbell for pdd\n");
goto err_free_pdd;
}
if (init_doorbell_bitmap(&pdd->qpd, dev)) {
pr_err("Failed to init doorbell for process\n");
kfree(pdd);
return NULL;
goto err_free_pdd;
}
pdd->dev = dev;
@@ -1124,12 +1285,17 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
pdd->runtime_inuse = false;
pdd->vram_usage = 0;
pdd->sdma_past_activity_counter = 0;
atomic64_set(&pdd->evict_duration_counter, 0);
list_add(&pdd->per_device_list, &p->per_device_data);
/* Init idr used for memory handle translation */
idr_init(&pdd->alloc_idr);
return pdd;
err_free_pdd:
kfree(pdd);
return NULL;
}
/**
@@ -1487,6 +1653,7 @@ void kfd_suspend_all_processes(void)
unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
WARN(debug_evictions, "Evicting all processes");
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);

View File

@@ -24,6 +24,7 @@
#include <linux/wait.h>
#include <linux/anon_inodes.h>
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
#include "kfd_priv.h"
#include "kfd_smi_events.h"
@@ -148,15 +149,94 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
return 0;
}
static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
char *event_msg, int len)
{
struct kfd_smi_client *client;
rcu_read_lock();
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
if (!(READ_ONCE(client->events) &
KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
continue;
spin_lock(&client->lock);
if (kfifo_avail(&client->fifo) >= len) {
kfifo_in(&client->fifo, event_msg, len);
wake_up_all(&client->wait_queue);
} else {
pr_debug("smi_event(EventID: %u): no space left\n",
smi_event);
}
spin_unlock(&client->lock);
}
rcu_read_unlock();
}
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
{
/*
* GpuReset msg = Reset seq number (incremented for
* every reset message sent before GPU reset).
* 1 byte event + 1 byte space + 8 bytes seq num +
* 1 byte \n + 1 byte \0 = 12
*/
char fifo_in[12];
int len;
unsigned int event;
if (list_empty(&dev->smi_clients))
return;
memset(fifo_in, 0x0, sizeof(fifo_in));
if (post_reset) {
event = KFD_SMI_EVENT_GPU_POST_RESET;
} else {
event = KFD_SMI_EVENT_GPU_PRE_RESET;
++(dev->reset_seq_num);
}
len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
dev->reset_seq_num);
add_event_to_kfifo(dev, event, fifo_in, len);
}
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint32_t throttle_bitmask)
{
struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
/*
* ThermalThrottle msg = throttle_bitmask(8):
* thermal_interrupt_count(16):
* 1 byte event + 1 byte space + 8 byte throttle_bitmask +
* 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
* 1 byte \0 = 29
*/
char fifo_in[29];
int len;
if (list_empty(&dev->smi_clients))
return;
len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n",
KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
atomic64_read(&adev->smu.throttle_int_counter));
add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
}
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
{
struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
struct amdgpu_task_info task_info;
/* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
/* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43
/* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
* 1 byte \0 = 29
*/
char fifo_in[43];
struct kfd_smi_client *client;
char fifo_in[29];
int len;
if (list_empty(&dev->smi_clients))
@@ -168,25 +248,10 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
if (!task_info.pid)
return;
len = snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
task_info.pid, task_info.task_name);
rcu_read_lock();
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
if (!(READ_ONCE(client->events) & KFD_SMI_EVENT_VMFAULT))
continue;
spin_lock(&client->lock);
if (kfifo_avail(&client->fifo) >= len) {
kfifo_in(&client->fifo, fifo_in, len);
wake_up_all(&client->wait_queue);
}
else
pr_debug("smi_event(vmfault): no space left\n");
spin_unlock(&client->lock);
}
rcu_read_unlock();
add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
}
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)

View File

@@ -25,5 +25,8 @@
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint32_t throttle_bitmask);
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
#endif

View File

@@ -446,7 +446,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
dev->node_props.cpu_cores_count);
sysfs_show_32bit_prop(buffer, offs, "simd_count",
dev->node_props.simd_count);
dev->gpu ? dev->node_props.simd_count : 0);
sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
dev->node_props.mem_banks_count);
sysfs_show_32bit_prop(buffer, offs, "caches_count",
@@ -1139,7 +1139,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
/* Discrete GPUs need their own topology device list
* entries. Don't assign them to CPU/APU nodes.
*/
if (!gpu->device_info->needs_iommu_device &&
if (!gpu->use_iommu_v2 &&
dev->node_props.cpu_cores_count)
continue;
@@ -1239,7 +1239,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
void *crat_image = NULL;
size_t image_size = 0;
int proximity_domain;
struct amdgpu_ras *ctx;
struct amdgpu_device *adev;
INIT_LIST_HEAD(&temp_topology_device_list);
@@ -1388,7 +1388,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
* Overwrite ATS capability according to needs_iommu_device to fix
* potential missing corresponding bit in CRAT of BIOS.
*/
if (dev->gpu->device_info->needs_iommu_device)
if (dev->gpu->use_iommu_v2)
dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
else
dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
@@ -1404,19 +1404,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.max_waves_per_simd = 10;
}
ctx = amdgpu_ras_get_context((struct amdgpu_device *)(dev->gpu->kgd));
if (ctx) {
/* kfd only concerns sram ecc on GFX/SDMA and HBM ecc on UMC */
dev->node_props.capability |=
(((ctx->features & BIT(AMDGPU_RAS_BLOCK__SDMA)) != 0) ||
((ctx->features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0)) ?
HSA_CAP_SRAM_EDCSUPPORTED : 0;
dev->node_props.capability |= ((ctx->features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
HSA_CAP_MEM_EDCSUPPORTED : 0;
adev = (struct amdgpu_device *)(dev->gpu->kgd);
/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
dev->node_props.capability |=
((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
HSA_CAP_SRAM_EDCSUPPORTED : 0;
dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
HSA_CAP_MEM_EDCSUPPORTED : 0;
dev->node_props.capability |= (ctx->features != 0) ?
if (adev->asic_type != CHIP_VEGA10)
dev->node_props.capability |= (adev->ras_features != 0) ?
HSA_CAP_RASEVENTNOTIFY : 0;
}
kfd_debug_print_topology();
@@ -1515,6 +1513,29 @@ int kfd_numa_node_to_apic_id(int numa_node_id)
return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
}
void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
{
struct kfd_topology_device *dev;
gpu->use_iommu_v2 = false;
if (!gpu->device_info->needs_iommu_device)
return;
down_read(&topology_lock);
/* Only use IOMMUv2 if there is an APU topology node with no GPU
* assigned yet. This GPU will be assigned to it.
*/
list_for_each_entry(dev, &topology_device_list, list)
if (dev->node_props.cpu_cores_count &&
dev->node_props.simd_count &&
!dev->gpu)
gpu->use_iommu_v2 = true;
up_read(&topology_lock);
}
#if defined(CONFIG_DEBUG_FS)
int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)