Merge tag 'drm-next-2018-12-14' of git://anongit.freedesktop.org/drm/drm

Pull drm updates from Dave Airlie:
 "Core:
   - shared fencing staging removal
   - drop transactional atomic helpers and move helpers to new location
   - DP/MST atomic cleanup
   - Leasing cleanups and drop EXPORT_SYMBOL
   - Convert drivers to atomic helpers and generic fbdev.
   - removed deprecated obj_ref/unref in favour of get/put
   - Improve dumb callback documentation
   - MODESET_LOCK_BEGIN/END helpers

  panels:
   - CDTech panels, Banana Pi Panel, DLC1010GIG,
   - Olimex LCD-O-LinuXino, Samsung S6D16D0, Truly NT35597 WQXGA,
   - Himax HX8357D, simulated RTSM AEMv8.
   - GPD Win2 panel
   - AUO G101EVN010

  vgem:
   - render node support

  ttm:
   - move global init out of drivers
   - fix LRU handling for ghost objects
   - Support for simultaneous submissions to multiple engines

  scheduler:
   - timeout/fault handling changes to help GPU recovery
   - helpers for hw with preemption support

  i915:
   - Scaler/Watermark fixes
   - DP MST + powerwell fixes
   - PSR fixes
   - Break long get/put shmemfs pages
   - Icelake fixes
   - Icelake DSI video mode enablement
   - Engine workaround improvements

  amdgpu:
   - freesync support
   - GPU reset enabled on CI, VI, SOC15 dGPUs
   - ABM support in DC
   - KFD support for vega12/polaris12
   - SDMA paging queue on vega
   - More amdkfd code sharing
   - DCC scanout on GFX9
   - DC kerneldoc
   - Updated SMU firmware for GFX8 chips
   - XGMI PSP + hive reset support
   - GPU reset
   - DC trace support
   - Powerplay updates for newer Polaris
   - Cursor plane update fast path
   - kfd dma-buf support

  virtio-gpu:
   - add EDID support

  vmwgfx:
   - pageflip with damage support

  nouveau:
   - Initial Turing TU104/TU106 modesetting support

  msm:
   - a2xx gpu support for apq8060 and imx5
   - a2xx gpummu support
   - mdp4 display support for apq8060
   - DPU fixes and cleanups
   - enhanced profiling support
   - debug object naming interface
   - get_iova/page pinning decoupling

  tegra:
   - Tegra194 host1x, VIC and display support enabled
   - Audio over HDMI for Tegra186 and Tegra194

  exynos:
   - DMA/IOMMU refactoring
   - plane alpha + blend mode support
   - Color format fixes for mixer driver

  rcar-du:
   - R8A7744 and R8A77470 support
   - R8A77965 LVDS support

  imx:
   - fbdev emulation fix
   - multi-tiled scalling fixes
   - SPDX identifiers

  rockchip
   - dw_hdmi support
   - dw-mipi-dsi + dual dsi support
   - mailbox read size fix

  qxl:
   - fix cursor pinning

  vc4:
   - YUV support (scaling + cursor)

  v3d:
   - enable TFU (Texture Formatting Unit)

  mali-dp:
   - add support for linear tiled formats

  sun4i:
   - Display Engine 3 support
   - H6 DE3 mixer 0 support
   - H6 display engine support
   - dw-hdmi support
   - H6 HDMI phy support
   - implicit fence waiting
   - BGRX8888 support

  meson:
   - Overlay plane support
   - implicit fence waiting
   - HDMI 1.4 4k modes

  bridge:
   - i2c fixes for sii902x"

* tag 'drm-next-2018-12-14' of git://anongit.freedesktop.org/drm/drm: (1403 commits)
  drm/amd/display: Add fast path for cursor plane updates
  drm/amdgpu: Enable GPU recovery by default for CI
  drm/amd/display: Fix duplicating scaling/underscan connector state
  drm/amd/display: Fix unintialized max_bpc state values
  Revert "drm/amd/display: Set RMX_ASPECT as default"
  drm/amdgpu: Fix stub function name
  drm/msm/dpu: Fix clock issue after bind failure
  drm/msm/dpu: Clean up dpu_media_info.h static inline functions
  drm/msm/dpu: Further cleanups for static inline functions
  drm/msm/dpu: Cleanup the debugfs functions
  drm/msm/dpu: Remove dpu_irq and unused functions
  drm/msm: Make irq_postinstall optional
  drm/msm/dpu: Cleanup callers of dpu_hw_blk_init
  drm/msm/dpu: Remove unused functions
  drm/msm/dpu: Remove dpu_crtc_is_enabled()
  drm/msm/dpu: Remove dpu_crtc_get_mixer_height
  drm/msm/dpu: Remove dpu_dbg
  drm/msm: dpu: Remove crtc_lock
  drm/msm: dpu: Remove vblank_requested flag from dpu_crtc
  drm/msm: dpu: Separate crtc assignment from vblank enable
  ...
This commit is contained in:
Linus Torvalds
2018-12-25 11:48:26 -08:00
1030 changed files with 47149 additions and 26578 deletions

View File

@@ -53,7 +53,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
amdgpu_gmc.o amdgpu_xgmi.o
amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@@ -105,6 +105,7 @@ amdgpu-y += \
# add GFX block
amdgpu-y += \
amdgpu_gfx.o \
amdgpu_rlc.o \
gfx_v8_0.o \
gfx_v9_0.o

View File

@@ -75,11 +75,14 @@
#include "amdgpu_sdma.h"
#include "amdgpu_dm.h"
#include "amdgpu_virt.h"
#include "amdgpu_csa.h"
#include "amdgpu_gart.h"
#include "amdgpu_debugfs.h"
#include "amdgpu_job.h"
#include "amdgpu_bo_list.h"
#include "amdgpu_gem.h"
#include "amdgpu_doorbell.h"
#include "amdgpu_amdkfd.h"
#define MAX_GPU_INSTANCE 16
@@ -161,6 +164,7 @@ extern int amdgpu_si_support;
extern int amdgpu_cik_support;
#endif
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
@@ -359,123 +363,6 @@ struct amdgpu_sa_bo {
int amdgpu_fence_slab_init(void);
void amdgpu_fence_slab_fini(void);
/*
* GPU doorbell structures, functions & helpers
*/
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
{
AMDGPU_DOORBELL_KIQ = 0x000,
AMDGPU_DOORBELL_HIQ = 0x001,
AMDGPU_DOORBELL_DIQ = 0x002,
AMDGPU_DOORBELL_MEC_RING0 = 0x010,
AMDGPU_DOORBELL_MEC_RING1 = 0x011,
AMDGPU_DOORBELL_MEC_RING2 = 0x012,
AMDGPU_DOORBELL_MEC_RING3 = 0x013,
AMDGPU_DOORBELL_MEC_RING4 = 0x014,
AMDGPU_DOORBELL_MEC_RING5 = 0x015,
AMDGPU_DOORBELL_MEC_RING6 = 0x016,
AMDGPU_DOORBELL_MEC_RING7 = 0x017,
AMDGPU_DOORBELL_GFX_RING0 = 0x020,
AMDGPU_DOORBELL_sDMA_ENGINE0 = 0x1E0,
AMDGPU_DOORBELL_sDMA_ENGINE1 = 0x1E1,
AMDGPU_DOORBELL_IH = 0x1E8,
AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF,
AMDGPU_DOORBELL_INVALID = 0xFFFF
} AMDGPU_DOORBELL_ASSIGNMENT;
struct amdgpu_doorbell {
/* doorbell mmio */
resource_size_t base;
resource_size_t size;
u32 __iomem *ptr;
u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */
};
/*
* 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
*/
typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
{
/*
* All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
* a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
* Compute related doorbells are allocated from 0x00 to 0x8a
*/
/* kernel scheduling */
AMDGPU_DOORBELL64_KIQ = 0x00,
/* HSA interface queue and debug queue */
AMDGPU_DOORBELL64_HIQ = 0x01,
AMDGPU_DOORBELL64_DIQ = 0x02,
/* Compute engines */
AMDGPU_DOORBELL64_MEC_RING0 = 0x03,
AMDGPU_DOORBELL64_MEC_RING1 = 0x04,
AMDGPU_DOORBELL64_MEC_RING2 = 0x05,
AMDGPU_DOORBELL64_MEC_RING3 = 0x06,
AMDGPU_DOORBELL64_MEC_RING4 = 0x07,
AMDGPU_DOORBELL64_MEC_RING5 = 0x08,
AMDGPU_DOORBELL64_MEC_RING6 = 0x09,
AMDGPU_DOORBELL64_MEC_RING7 = 0x0a,
/* User queue doorbell range (128 doorbells) */
AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b,
AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a,
/* Graphics engine */
AMDGPU_DOORBELL64_GFX_RING0 = 0x8b,
/*
* Other graphics doorbells can be allocated here: from 0x8c to 0xdf
* Graphics voltage island aperture 1
* default non-graphics QWORD index is 0xe0 - 0xFF inclusive
*/
/* sDMA engines reserved from 0xe0 -oxef */
AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1,
AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xE9,
/* For vega10 sriov, the sdma doorbell must be fixed as follow
* to keep the same setting with host driver, or it will
* happen conflicts
*/
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 = 0xF0,
AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 = 0xF2,
AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
/* Interrupt handler */
AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */
AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */
AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */
/* VCN engine use 32 bits doorbell */
AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
AMDGPU_DOORBELL64_VCN2_3 = 0xF9,
AMDGPU_DOORBELL64_VCN4_5 = 0xFA,
AMDGPU_DOORBELL64_VCN6_7 = 0xFB,
/* overlap the doorbell assignment with VCN as they are mutually exclusive
* VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
*/
AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8,
AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9,
AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA,
AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB,
AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC,
AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD,
AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE,
AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF,
AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF,
AMDGPU_DOORBELL64_INVALID = 0xFFFF
} AMDGPU_DOORBELL64_ASSIGNMENT;
/*
* IRQS.
*/
@@ -653,6 +540,8 @@ struct amdgpu_asic_funcs {
struct amdgpu_ring *ring);
/* check if the asic needs a full reset of if soft reset will work */
bool (*need_full_reset)(struct amdgpu_device *adev);
/* initialize doorbell layout for specific asic*/
void (*init_doorbell_index)(struct amdgpu_device *adev);
};
/*
@@ -831,7 +720,6 @@ struct amdgpu_device {
bool need_dma32;
bool need_swiotlb;
bool accel_working;
struct work_struct reset_work;
struct notifier_block acpi_nb;
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
@@ -976,6 +864,9 @@ struct amdgpu_device {
/* GDS */
struct amdgpu_gds gds;
/* KFD */
struct amdgpu_kfd_dev kfd;
/* display related functionality */
struct amdgpu_display_manager dm;
@@ -989,9 +880,6 @@ struct amdgpu_device {
atomic64_t visible_pin_size;
atomic64_t gart_pin_size;
/* amdkfd interface */
struct kfd_dev *kfd;
/* soc15 register offset based on ip, instance and segment */
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
@@ -1023,6 +911,10 @@ struct amdgpu_device {
unsigned long last_mm_index;
bool in_gpu_reset;
struct mutex lock_reset;
struct amdgpu_doorbell_index doorbell_index;
int asic_reset_res;
struct work_struct xgmi_reset_work;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -1047,11 +939,6 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
@@ -1113,11 +1000,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg))
#define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v))
#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
#define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
#define REG_FIELD_MASK(reg, field) reg##__##field##_MASK
@@ -1159,6 +1041,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
/* Common functions */
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
@@ -1219,12 +1102,6 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg);
/*
* functions used by amdgpu_xgmi.c
*/
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
/*
* functions used by amdgpu_encoder.c
*/
@@ -1252,6 +1129,9 @@ bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *ade
int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
u8 perf_req, bool advertise);
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
struct amdgpu_dm_backlight_caps *caps);
#else
static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }

View File

@@ -41,28 +41,21 @@ struct amdgpu_atif_notification_cfg {
};
struct amdgpu_atif_notifications {
bool display_switch;
bool expansion_mode_change;
bool thermal_state;
bool forced_power_state;
bool system_power_state;
bool display_conf_change;
bool px_gfx_switch;
bool brightness_change;
bool dgpu_display_event;
bool gpu_package_power_limit;
};
struct amdgpu_atif_functions {
bool system_params;
bool sbios_requests;
bool select_active_disp;
bool lid_state;
bool get_tv_standard;
bool set_tv_standard;
bool get_panel_expansion_mode;
bool set_panel_expansion_mode;
bool temperature_change;
bool graphics_device_types;
bool query_backlight_transfer_characteristics;
bool ready_to_undock;
bool external_gpu_information;
};
struct amdgpu_atif {
@@ -72,6 +65,7 @@ struct amdgpu_atif {
struct amdgpu_atif_functions functions;
struct amdgpu_atif_notification_cfg notification_cfg;
struct amdgpu_encoder *encoder_for_bl;
struct amdgpu_dm_backlight_caps backlight_caps;
};
/* Call the ATIF method
@@ -137,15 +131,12 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
*/
static void amdgpu_atif_parse_notification(struct amdgpu_atif_notifications *n, u32 mask)
{
n->display_switch = mask & ATIF_DISPLAY_SWITCH_REQUEST_SUPPORTED;
n->expansion_mode_change = mask & ATIF_EXPANSION_MODE_CHANGE_REQUEST_SUPPORTED;
n->thermal_state = mask & ATIF_THERMAL_STATE_CHANGE_REQUEST_SUPPORTED;
n->forced_power_state = mask & ATIF_FORCED_POWER_STATE_CHANGE_REQUEST_SUPPORTED;
n->system_power_state = mask & ATIF_SYSTEM_POWER_SOURCE_CHANGE_REQUEST_SUPPORTED;
n->display_conf_change = mask & ATIF_DISPLAY_CONF_CHANGE_REQUEST_SUPPORTED;
n->px_gfx_switch = mask & ATIF_PX_GFX_SWITCH_REQUEST_SUPPORTED;
n->brightness_change = mask & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST_SUPPORTED;
n->dgpu_display_event = mask & ATIF_DGPU_DISPLAY_EVENT_SUPPORTED;
n->gpu_package_power_limit = mask & ATIF_GPU_PACKAGE_POWER_LIMIT_REQUEST_SUPPORTED;
}
/**
@@ -162,14 +153,11 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
{
f->system_params = mask & ATIF_GET_SYSTEM_PARAMETERS_SUPPORTED;
f->sbios_requests = mask & ATIF_GET_SYSTEM_BIOS_REQUESTS_SUPPORTED;
f->select_active_disp = mask & ATIF_SELECT_ACTIVE_DISPLAYS_SUPPORTED;
f->lid_state = mask & ATIF_GET_LID_STATE_SUPPORTED;
f->get_tv_standard = mask & ATIF_GET_TV_STANDARD_FROM_CMOS_SUPPORTED;
f->set_tv_standard = mask & ATIF_SET_TV_STANDARD_IN_CMOS_SUPPORTED;
f->get_panel_expansion_mode = mask & ATIF_GET_PANEL_EXPANSION_MODE_FROM_CMOS_SUPPORTED;
f->set_panel_expansion_mode = mask & ATIF_SET_PANEL_EXPANSION_MODE_IN_CMOS_SUPPORTED;
f->temperature_change = mask & ATIF_TEMPERATURE_CHANGE_NOTIFICATION_SUPPORTED;
f->graphics_device_types = mask & ATIF_GET_GRAPHICS_DEVICE_TYPES_SUPPORTED;
f->query_backlight_transfer_characteristics =
mask & ATIF_QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS_SUPPORTED;
f->ready_to_undock = mask & ATIF_READY_TO_UNDOCK_NOTIFICATION_SUPPORTED;
f->external_gpu_information = mask & ATIF_GET_EXTERNAL_GPU_INFORMATION_SUPPORTED;
}
/**
@@ -310,6 +298,65 @@ out:
return err;
}
/**
* amdgpu_atif_query_backlight_caps - get min and max backlight input signal
*
* @handle: acpi handle
*
* Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function
* to determine the acceptable range of backlight values
*
* Backlight_caps.caps_valid will be set to true if the query is successful
*
* The input signals are in range 0-255
*
* This function assumes the display with backlight is the first LCD
*
* Returns 0 on success, error on failure.
*/
static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
{
union acpi_object *info;
struct atif_qbtc_output characteristics;
struct atif_qbtc_arguments arguments;
struct acpi_buffer params;
size_t size;
int err = 0;
arguments.size = sizeof(arguments);
arguments.requested_display = ATIF_QBTC_REQUEST_LCD1;
params.length = sizeof(arguments);
params.pointer = (void *)&arguments;
info = amdgpu_atif_call(atif,
ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS,
&params);
if (!info) {
err = -EIO;
goto out;
}
size = *(u16 *) info->buffer.pointer;
if (size < 10) {
err = -EINVAL;
goto out;
}
memset(&characteristics, 0, sizeof(characteristics));
size = min(sizeof(characteristics), size);
memcpy(&characteristics, info->buffer.pointer, size);
atif->backlight_caps.caps_valid = true;
atif->backlight_caps.min_input_signal =
characteristics.min_input_signal;
atif->backlight_caps.max_input_signal =
characteristics.max_input_signal;
out:
kfree(info);
return err;
}
/**
* amdgpu_atif_get_sbios_requests - get requested sbios event
*
@@ -799,6 +846,17 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
}
}
if (atif->functions.query_backlight_transfer_characteristics) {
ret = amdgpu_atif_query_backlight_caps(atif);
if (ret) {
DRM_DEBUG_DRIVER("Call to QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS failed: %d\n",
ret);
atif->backlight_caps.caps_valid = false;
}
} else {
atif->backlight_caps.caps_valid = false;
}
out:
adev->acpi_nb.notifier_call = amdgpu_acpi_event;
register_acpi_notifier(&adev->acpi_nb);
@@ -806,6 +864,18 @@ out:
return ret;
}
void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
struct amdgpu_dm_backlight_caps *caps)
{
if (!adev->atif) {
caps->caps_valid = false;
return;
}
caps->caps_valid = adev->atif->backlight_caps.caps_valid;
caps->min_input_signal = adev->atif->backlight_caps.min_input_signal;
caps->max_input_signal = adev->atif->backlight_caps.max_input_signal;
}
/**
* amdgpu_acpi_fini - tear down driver acpi support
*
@@ -816,6 +886,5 @@ out:
void amdgpu_acpi_fini(struct amdgpu_device *adev)
{
unregister_acpi_notifier(&adev->acpi_nb);
if (adev->atif)
kfree(adev->atif);
kfree(adev->atif);
}

View File

@@ -26,15 +26,26 @@
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include <linux/module.h>
#include <linux/dma-buf.h>
const struct kgd2kfd_calls *kgd2kfd;
static const unsigned int compute_vmid_bitmap = 0xFF00;
/* Total memory size in system memory and all GPU VRAM. Used to
* estimate worst case amount of memory to reserve for page tables
*/
uint64_t amdgpu_amdkfd_total_mem_size;
int amdgpu_amdkfd_init(void)
{
struct sysinfo si;
int ret;
si_meminfo(&si);
amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh;
amdgpu_amdkfd_total_mem_size *= si.mem_unit;
#ifdef CONFIG_HSA_AMD
ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
if (ret)
@@ -73,9 +84,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
@@ -85,8 +98,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
return;
}
adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
adev->pdev, kfd2kgd);
adev->kfd.dev = kgd2kfd->probe((struct kgd_dev *)adev,
adev->pdev, kfd2kgd);
if (adev->kfd.dev)
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
}
/**
@@ -126,7 +142,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
int i, n;
int last_valid_bit;
if (adev->kfd) {
if (adev->kfd.dev) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = compute_vmid_bitmap,
.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
@@ -144,7 +161,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
KGD_MAX_QUEUES);
/* remove the KIQ bit as well */
if (adev->gfx.kiq.ring.ready)
if (adev->gfx.kiq.ring.sched.ready)
clear_bit(amdgpu_gfx_queue_to_bit(adev,
adev->gfx.kiq.ring.me - 1,
adev->gfx.kiq.ring.pipe,
@@ -165,7 +182,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
&gpu_resources.doorbell_start_offset);
if (adev->asic_type < CHIP_VEGA10) {
kgd2kfd->device_init(adev->kfd, &gpu_resources);
kgd2kfd->device_init(adev->kfd.dev, &gpu_resources);
return;
}
@@ -179,25 +196,14 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
* process in case of 64-bit doorbells so we
* can use each doorbell assignment twice.
*/
if (adev->asic_type == CHIP_VEGA10) {
gpu_resources.sdma_doorbell[0][i] =
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
gpu_resources.sdma_doorbell[0][i+1] =
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
gpu_resources.sdma_doorbell[1][i] =
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
gpu_resources.sdma_doorbell[1][i+1] =
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
} else {
gpu_resources.sdma_doorbell[0][i] =
AMDGPU_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
gpu_resources.sdma_doorbell[0][i+1] =
AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
gpu_resources.sdma_doorbell[1][i] =
AMDGPU_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
gpu_resources.sdma_doorbell[1][i+1] =
AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
}
gpu_resources.sdma_doorbell[0][i] =
adev->doorbell_index.sdma_engine0 + (i >> 1);
gpu_resources.sdma_doorbell[0][i+1] =
adev->doorbell_index.sdma_engine0 + 0x200 + (i >> 1);
gpu_resources.sdma_doorbell[1][i] =
adev->doorbell_index.sdma_engine1 + (i >> 1);
gpu_resources.sdma_doorbell[1][i+1] =
adev->doorbell_index.sdma_engine1 + 0x200 + (i >> 1);
}
/* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
* SDMA, IH and VCN. So don't use them for the CP.
@@ -205,37 +211,37 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
gpu_resources.reserved_doorbell_mask = 0x1e0;
gpu_resources.reserved_doorbell_val = 0x0e0;
kgd2kfd->device_init(adev->kfd, &gpu_resources);
kgd2kfd->device_init(adev->kfd.dev, &gpu_resources);
}
}
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
{
if (adev->kfd) {
kgd2kfd->device_exit(adev->kfd);
adev->kfd = NULL;
if (adev->kfd.dev) {
kgd2kfd->device_exit(adev->kfd.dev);
adev->kfd.dev = NULL;
}
}
void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry)
{
if (adev->kfd)
kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
if (adev->kfd.dev)
kgd2kfd->interrupt(adev->kfd.dev, ih_ring_entry);
}
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
{
if (adev->kfd)
kgd2kfd->suspend(adev->kfd);
if (adev->kfd.dev)
kgd2kfd->suspend(adev->kfd.dev);
}
int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
{
int r = 0;
if (adev->kfd)
r = kgd2kfd->resume(adev->kfd);
if (adev->kfd.dev)
r = kgd2kfd->resume(adev->kfd.dev);
return r;
}
@@ -244,8 +250,8 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
{
int r = 0;
if (adev->kfd)
r = kgd2kfd->pre_reset(adev->kfd);
if (adev->kfd.dev)
r = kgd2kfd->pre_reset(adev->kfd.dev);
return r;
}
@@ -254,8 +260,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
{
int r = 0;
if (adev->kfd)
r = kgd2kfd->post_reset(adev->kfd);
if (adev->kfd.dev)
r = kgd2kfd->post_reset(adev->kfd.dev);
return r;
}
@@ -268,9 +274,9 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
amdgpu_device_gpu_recover(adev, NULL);
}
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9)
int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = NULL;
@@ -340,7 +346,7 @@ allocate_mem_reserve_bo_failed:
return r;
}
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
{
struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
@@ -351,8 +357,8 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
amdgpu_bo_unref(&(bo));
}
void get_local_mem_info(struct kgd_dev *kgd,
struct kfd_local_mem_info *mem_info)
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
struct kfd_local_mem_info *mem_info)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
@@ -383,7 +389,7 @@ void get_local_mem_info(struct kgd_dev *kgd,
mem_info->mem_clk_max = 100;
}
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
@@ -392,7 +398,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
return 0;
}
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
@@ -405,7 +411,7 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
return 100;
}
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
@@ -428,6 +434,62 @@ void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
cu_info->lds_size = acu_info.lds_size;
}
int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
struct kgd_dev **dma_buf_kgd,
uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
uint32_t *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct dma_buf *dma_buf;
struct drm_gem_object *obj;
struct amdgpu_bo *bo;
uint64_t metadata_flags;
int r = -EINVAL;
dma_buf = dma_buf_get(dma_buf_fd);
if (IS_ERR(dma_buf))
return PTR_ERR(dma_buf);
if (dma_buf->ops != &amdgpu_dmabuf_ops)
/* Can't handle non-graphics buffers */
goto out_put;
obj = dma_buf->priv;
if (obj->dev->driver != adev->ddev->driver)
/* Can't handle buffers from different drivers */
goto out_put;
adev = obj->dev->dev_private;
bo = gem_to_amdgpu_bo(obj);
if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT)))
/* Only VRAM and GTT BOs are supported */
goto out_put;
r = 0;
if (dma_buf_kgd)
*dma_buf_kgd = (struct kgd_dev *)adev;
if (bo_size)
*bo_size = amdgpu_bo_size(bo);
if (metadata_size)
*metadata_size = bo->metadata_size;
if (metadata_buffer)
r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
metadata_size, &metadata_flags);
if (flags) {
*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT;
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
*flags |= ALLOC_MEM_FLAGS_PUBLIC;
}
out_put:
dma_buf_put(dma_buf);
return r;
}
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
@@ -510,7 +572,7 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
{
if (adev->kfd) {
if (adev->kfd.dev) {
if ((1 << vmid) & compute_vmid_bitmap)
return true;
}
@@ -524,7 +586,7 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
return false;
}
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
{
}

View File

@@ -27,7 +27,6 @@
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/workqueue.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
@@ -35,6 +34,7 @@
#include "amdgpu_vm.h"
extern const struct kgd2kfd_calls *kgd2kfd;
extern uint64_t amdgpu_amdkfd_total_mem_size;
struct amdgpu_device;
@@ -77,6 +77,11 @@ struct amdgpu_amdkfd_fence {
char timeline_name[TASK_COMM_LEN];
};
struct amdgpu_kfd_dev {
struct kfd_dev *dev;
uint64_t vram_used;
};
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm);
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
@@ -134,16 +139,21 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
/* Shared API */
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9);
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
void get_local_mem_info(struct kgd_dev *kgd,
struct kfd_local_mem_info *mem_info);
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9);
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
struct kfd_local_mem_info *mem_info);
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd);
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
struct kgd_dev **dmabuf_kgd,
uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
uint32_t *flags);
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
@@ -195,7 +205,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
struct kfd_vm_fault_info *info);
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
struct dma_buf *dmabuf,
uint64_t va, void *vm,
struct kgd_mem **mem, uint64_t *size,
uint64_t *mmap_offset);
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
#endif /* AMDGPU_AMDKFD_H_INCLUDED */

View File

@@ -23,6 +23,7 @@
#include <linux/fdtable.h>
#include <linux/uaccess.h>
#include <linux/firmware.h>
#include <linux/mmu_context.h>
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
@@ -173,13 +174,6 @@ static int get_tile_config(struct kgd_dev *kgd,
}
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
.free_gtt_mem = free_gtt_mem,
.get_local_mem_info = get_local_mem_info,
.get_gpu_clock_counter = get_gpu_clock_counter,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
.alloc_pasid = amdgpu_pasid_alloc,
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_interrupts = kgd_init_interrupts,
@@ -200,28 +194,10 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
.get_cu_info = get_cu_info,
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
.gpu_recover = amdgpu_amdkfd_gpu_reset,
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)

View File

@@ -24,6 +24,7 @@
#include <linux/fdtable.h>
#include <linux/uaccess.h>
#include <linux/firmware.h>
#include <linux/mmu_context.h>
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
@@ -128,13 +129,6 @@ static int get_tile_config(struct kgd_dev *kgd,
}
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
.free_gtt_mem = free_gtt_mem,
.get_local_mem_info = get_local_mem_info,
.get_gpu_clock_counter = get_gpu_clock_counter,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
.alloc_pasid = amdgpu_pasid_alloc,
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_interrupts = kgd_init_interrupts,
@@ -157,27 +151,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
.get_cu_info = get_cu_info,
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
.gpu_recover = amdgpu_amdkfd_gpu_reset,
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)

View File

@@ -26,6 +26,7 @@
#include <linux/fdtable.h>
#include <linux/uaccess.h>
#include <linux/firmware.h>
#include <linux/mmu_context.h>
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
@@ -46,38 +47,9 @@
#include "v9_structs.h"
#include "soc15.h"
#include "soc15d.h"
#include "mmhub_v1_0.h"
#include "gfxhub_v1_0.h"
/* HACK: MMHUB and GC both have VM-related register with the same
* names but different offsets. Define the MMHUB register we need here
* with a prefix. A proper solution would be to move the functions
* programming these registers into gfx_v9_0.c and mmhub_v1_0.c
* respectively.
*/
#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3
#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0
#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705
#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0
#define V9_PIPE_PER_MEC (4)
#define V9_QUEUES_PER_PIPE_MEC (8)
@@ -167,13 +139,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
}
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
.free_gtt_mem = free_gtt_mem,
.get_local_mem_info = get_local_mem_info,
.get_gpu_clock_counter = get_gpu_clock_counter,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
.alloc_pasid = amdgpu_pasid_alloc,
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_interrupts = kgd_init_interrupts,
@@ -196,26 +161,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = amdgpu_amdkfd_get_tile_config,
.get_cu_info = get_cu_info,
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
.gpu_recover = amdgpu_amdkfd_gpu_reset,
.set_compute_idle = amdgpu_amdkfd_set_compute_idle,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
};
@@ -785,15 +733,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
uint32_t req = (1 << vmid) |
(0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
mutex_lock(&adev->srbm_mutex);
/* Use legacy mode tlb invalidation.
*
@@ -810,34 +749,7 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
* TODO 2: support range-based invalidation, requires kfg2kgd
* interface change
*/
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
0xffffffff);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
0x0000001f);
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
0xffffffff);
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
0x0000001f);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
req);
while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
(1 << vmid)))
cpu_relax();
while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
(1 << vmid)))
cpu_relax();
mutex_unlock(&adev->srbm_mutex);
amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
}
static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
@@ -876,7 +788,7 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
if (adev->in_gpu_reset)
return -EIO;
if (ring->ready)
if (ring->sched.ready)
return invalidate_tlbs_with_kiq(adev, pasid);
for (vmid = 0; vmid < 16; vmid++) {
@@ -1016,7 +928,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint64_t base = page_table_base | AMDGPU_PTE_VALID;
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",
@@ -1028,25 +939,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
* now, all processes share the same address space size, like
* on GFX8 and older.
*/
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
upper_32_bits(adev->vm_manager.max_pfn - 1));
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
upper_32_bits(adev->vm_manager.max_pfn - 1));
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
}

View File

@@ -25,6 +25,7 @@
#include <linux/list.h>
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
#include <linux/dma-buf.h>
#include <drm/drmP.h>
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
@@ -46,9 +47,9 @@
/* Impose limit on how much memory KFD can use */
static struct {
uint64_t max_system_mem_limit;
uint64_t max_userptr_mem_limit;
uint64_t max_ttm_mem_limit;
int64_t system_mem_used;
int64_t userptr_mem_used;
int64_t ttm_mem_used;
spinlock_t mem_limit_lock;
} kfd_mem_limit;
@@ -90,8 +91,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
}
/* Set memory usage limits. Current, limits are
* System (kernel) memory - 3/8th System RAM
* Userptr memory - 3/4th System RAM
* System (TTM + userptr) memory - 3/4th System RAM
* TTM memory - 3/8th System RAM
*/
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{
@@ -103,48 +104,61 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
(kfd_mem_limit.max_userptr_mem_limit >> 20));
(kfd_mem_limit.max_ttm_mem_limit >> 20));
}
static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain)
static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain, bool sg)
{
size_t acc_size;
size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9;
int ret = 0;
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
sizeof(struct amdgpu_bo));
spin_lock(&kfd_mem_limit.mem_limit_lock);
vram_needed = 0;
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
if (kfd_mem_limit.system_mem_used + (acc_size + size) >
kfd_mem_limit.max_system_mem_limit) {
ret = -ENOMEM;
goto err_no_mem;
}
kfd_mem_limit.system_mem_used += (acc_size + size);
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
if ((kfd_mem_limit.system_mem_used + acc_size >
kfd_mem_limit.max_system_mem_limit) ||
(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
kfd_mem_limit.max_userptr_mem_limit)) {
ret = -ENOMEM;
goto err_no_mem;
}
kfd_mem_limit.system_mem_used += acc_size;
kfd_mem_limit.userptr_mem_used += size;
/* TTM GTT memory */
system_mem_needed = acc_size + size;
ttm_mem_needed = acc_size + size;
} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
/* Userptr */
system_mem_needed = acc_size + size;
ttm_mem_needed = acc_size;
} else {
/* VRAM and SG */
system_mem_needed = acc_size;
ttm_mem_needed = acc_size;
if (domain == AMDGPU_GEM_DOMAIN_VRAM)
vram_needed = size;
}
err_no_mem:
spin_lock(&kfd_mem_limit.mem_limit_lock);
if ((kfd_mem_limit.system_mem_used + system_mem_needed >
kfd_mem_limit.max_system_mem_limit) ||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
kfd_mem_limit.max_ttm_mem_limit) ||
(adev->kfd.vram_used + vram_needed >
adev->gmc.real_vram_size - reserved_for_pt)) {
ret = -ENOMEM;
} else {
kfd_mem_limit.system_mem_used += system_mem_needed;
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
adev->kfd.vram_used += vram_needed;
}
spin_unlock(&kfd_mem_limit.mem_limit_lock);
return ret;
}
static void unreserve_system_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain)
static void unreserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain, bool sg)
{
size_t acc_size;
@@ -154,35 +168,39 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
kfd_mem_limit.ttm_mem_used -= (acc_size + size);
} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
kfd_mem_limit.ttm_mem_used -= acc_size;
} else {
kfd_mem_limit.system_mem_used -= acc_size;
kfd_mem_limit.userptr_mem_used -= size;
kfd_mem_limit.ttm_mem_used -= acc_size;
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
adev->kfd.vram_used -= size;
WARN_ONCE(adev->kfd.vram_used < 0,
"kfd VRAM memory accounting unbalanced");
}
}
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
"kfd userptr memory accounting unbalanced");
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
"kfd TTM memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
{
spin_lock(&kfd_mem_limit.mem_limit_lock);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
u32 domain = bo->preferred_domains;
bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -=
(bo->tbo.acc_size + amdgpu_bo_size(bo));
domain = AMDGPU_GEM_DOMAIN_CPU;
sg = false;
}
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
"kfd userptr memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
}
@@ -395,23 +413,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
return 0;
}
static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_fence *f)
{
int ret = amdgpu_sync_fence(adev, sync, f, false);
/* Sync objects can't handle multiple GPUs (contexts) updating
* sync->last_vm_update. Fortunately we don't need it for
* KFD's purposes, so we can just drop that fence.
*/
if (sync->last_vm_update) {
dma_fence_put(sync->last_vm_update);
sync->last_vm_update = NULL;
}
return ret;
}
static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
{
struct amdgpu_bo *pd = vm->root.base.bo;
@@ -422,7 +423,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
if (ret)
return ret;
return sync_vm_fence(adev, sync, vm->last_update);
return amdgpu_sync_fence(NULL, sync, vm->last_update, false);
}
/* add_bo_to_vm - Add a BO to a VM
@@ -536,7 +537,7 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
struct amdgpu_bo *bo = mem->bo;
INIT_LIST_HEAD(&entry->head);
entry->shared = true;
entry->num_shared = 1;
entry->bo = &bo->tbo;
mutex_lock(&process_info->lock);
if (userptr)
@@ -677,7 +678,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.shared = true;
ctx->kfd_bo.tv.num_shared = 1;
ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
@@ -741,7 +742,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.shared = true;
ctx->kfd_bo.tv.num_shared = 1;
ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
@@ -826,7 +827,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
/* Add the eviction fence back */
amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
sync_vm_fence(adev, sync, bo_va->last_pt_update);
amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
return 0;
}
@@ -851,7 +852,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
return ret;
}
return sync_vm_fence(adev, sync, bo_va->last_pt_update);
return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
}
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
@@ -886,6 +887,24 @@ update_gpuvm_pte_failed:
return ret;
}
static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
{
struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
if (!sg)
return NULL;
if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
kfree(sg);
return NULL;
}
sg->sgl->dma_address = addr;
sg->sgl->length = size;
#ifdef CONFIG_NEED_SG_DMA_LENGTH
sg->sgl->dma_length = size;
#endif
return sg;
}
static int process_validate_vms(struct amdkfd_process_info *process_info)
{
struct amdgpu_vm *peer_vm;
@@ -901,6 +920,26 @@ static int process_validate_vms(struct amdkfd_process_info *process_info)
return 0;
}
static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
struct amdgpu_sync *sync)
{
struct amdgpu_vm *peer_vm;
int ret;
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
struct amdgpu_bo *pd = peer_vm->root.base.bo;
ret = amdgpu_sync_resv(NULL,
sync, pd->tbo.resv,
AMDGPU_FENCE_OWNER_UNDEFINED, false);
if (ret)
return ret;
}
return 0;
}
static int process_update_pds(struct amdkfd_process_info *process_info,
struct amdgpu_sync *sync)
{
@@ -1149,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
enum ttm_bo_type bo_type = ttm_bo_type_device;
struct sg_table *sg = NULL;
uint64_t user_addr = 0;
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
@@ -1177,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (!offset || !*offset)
return -EINVAL;
user_addr = *offset;
} else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
bo_type = ttm_bo_type_sg;
alloc_flags = 0;
if (size > UINT_MAX)
return -EINVAL;
sg = create_doorbell_sg(*offset, size);
if (!sg)
return -ENOMEM;
} else {
return -EINVAL;
}
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if (!*mem)
return -ENOMEM;
if (!*mem) {
ret = -ENOMEM;
goto err;
}
INIT_LIST_HEAD(&(*mem)->bo_va_list);
mutex_init(&(*mem)->lock);
(*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
@@ -1199,7 +1252,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
byte_align = (adev->family == AMDGPU_FAMILY_VI &&
adev->asic_type != CHIP_FIJI &&
adev->asic_type != CHIP_POLARIS10 &&
adev->asic_type != CHIP_POLARIS11) ?
adev->asic_type != CHIP_POLARIS11 &&
adev->asic_type != CHIP_POLARIS12) ?
VI_BO_SIZE_ALIGN : 1;
mapping_flags = AMDGPU_VM_PAGE_READABLE;
@@ -1215,10 +1269,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
amdgpu_sync_create(&(*mem)->sync);
ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
if (ret) {
pr_debug("Insufficient system memory\n");
goto err_reserve_system_mem;
goto err_reserve_limit;
}
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
@@ -1229,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
bp.byte_align = byte_align;
bp.domain = alloc_domain;
bp.flags = alloc_flags;
bp.type = ttm_bo_type_device;
bp.type = bo_type;
bp.resv = NULL;
ret = amdgpu_bo_create(adev, &bp, &bo);
if (ret) {
@@ -1237,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
domain_string(alloc_domain), ret);
goto err_bo_create;
}
if (bo_type == ttm_bo_type_sg) {
bo->tbo.sg = sg;
bo->tbo.ttm->sg = sg;
}
bo->kfd_bo = *mem;
(*mem)->bo = bo;
if (user_addr)
@@ -1266,12 +1324,17 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
allocate_init_user_pages_failed:
amdgpu_bo_unref(&bo);
/* Don't unreserve system mem limit twice */
goto err_reserve_system_mem;
goto err_reserve_limit;
err_bo_create:
unreserve_system_mem_limit(adev, size, alloc_domain);
err_reserve_system_mem:
unreserve_mem_limit(adev, size, alloc_domain, !!sg);
err_reserve_limit:
mutex_destroy(&(*mem)->lock);
kfree(*mem);
err:
if (sg) {
sg_free_table(sg);
kfree(sg);
}
return ret;
}
@@ -1341,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Free the sync object */
amdgpu_sync_free(&mem->sync);
/* If the SG is not NULL, it's one we created for a doorbell
* BO. We need to free it.
*/
if (mem->bo->tbo.sg) {
sg_free_table(mem->bo->tbo.sg);
kfree(mem->bo->tbo.sg);
}
/* Free the BO*/
amdgpu_bo_unref(&mem->bo);
mutex_destroy(&mem->lock);
@@ -1405,7 +1476,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
* the queues are still stopped and we can leave mapping for
* the next restore worker
*/
if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
is_invalid_userptr = true;
if (check_if_add_bo_to_vm(avm, mem)) {
@@ -1642,6 +1714,60 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
return 0;
}
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
struct dma_buf *dma_buf,
uint64_t va, void *vm,
struct kgd_mem **mem, uint64_t *size,
uint64_t *mmap_offset)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct drm_gem_object *obj;
struct amdgpu_bo *bo;
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
if (dma_buf->ops != &amdgpu_dmabuf_ops)
/* Can't handle non-graphics buffers */
return -EINVAL;
obj = dma_buf->priv;
if (obj->dev->dev_private != adev)
/* Can't handle buffers from other devices */
return -EINVAL;
bo = gem_to_amdgpu_bo(obj);
if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT)))
/* Only VRAM and GTT BOs are supported */
return -EINVAL;
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if (!*mem)
return -ENOMEM;
if (size)
*size = amdgpu_bo_size(bo);
if (mmap_offset)
*mmap_offset = amdgpu_bo_mmap_offset(bo);
INIT_LIST_HEAD(&(*mem)->bo_va_list);
mutex_init(&(*mem)->lock);
(*mem)->mapping_flags =
AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC;
(*mem)->bo = amdgpu_bo_ref(bo);
(*mem)->va = va;
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
amdgpu_sync_create(&(*mem)->sync);
return 0;
}
/* Evict a userptr BO by stopping the queues if necessary
*
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
@@ -1808,7 +1934,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
validate_list.head) {
list_add_tail(&mem->resv_list.head, &resv_list);
mem->resv_list.bo = mem->validate_list.bo;
mem->resv_list.shared = mem->validate_list.shared;
mem->resv_list.num_shared = mem->validate_list.num_shared;
}
/* Reserve all BOs and page tables for validation */
@@ -2027,7 +2153,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
list_add_tail(&mem->resv_list.head, &ctx.list);
mem->resv_list.bo = mem->validate_list.bo;
mem->resv_list.shared = mem->validate_list.shared;
mem->resv_list.num_shared = mem->validate_list.num_shared;
}
ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
@@ -2044,13 +2170,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
if (ret)
goto validate_map_fail;
/* Wait for PD/PTs validate to finish */
/* FIXME: I think this isn't needed */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
struct amdgpu_bo *bo = peer_vm->root.base.bo;
ttm_bo_wait(&bo->tbo, false, false);
ret = process_sync_pds_resv(process_info, &sync_obj);
if (ret) {
pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
goto validate_map_fail;
}
/* Validate BOs and map them to GPUVM (update VM page tables). */
@@ -2066,7 +2189,11 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
pr_debug("Memory eviction: Validate BOs failed. Try again\n");
goto validate_map_fail;
}
ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false);
if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail;
}
list_for_each_entry(bo_va_entry, &mem->bo_va_list,
bo_list) {
ret = update_gpuvm_pte((struct amdgpu_device *)
@@ -2087,6 +2214,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
goto validate_map_fail;
}
/* Wait for validate and PT updates to finish */
amdgpu_sync_wait(&sync_obj, false);
/* Release old eviction fence and create new one, because fence only
@@ -2105,10 +2233,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
process_info->eviction_fence = new_fence;
*ef = dma_fence_get(&new_fence->base);
/* Wait for validate to finish and attach new eviction fence */
list_for_each_entry(mem, &process_info->kfd_bo_list,
validate_list.head)
ttm_bo_wait(&mem->bo->tbo, false, false);
/* Attach new eviction fence to all BOs */
list_for_each_entry(mem, &process_info->kfd_bo_list,
validate_list.head)
amdgpu_bo_fence(mem->bo,

View File

@@ -118,7 +118,6 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
entry->priority = min(info[i].bo_priority,
AMDGPU_BO_LIST_MAX_PRIORITY);
entry->tv.bo = &bo->tbo;
entry->tv.shared = !bo->prime_shared_count;
if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
list->gds_obj = bo;

View File

@@ -50,7 +50,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
p->uf_entry.priority = 0;
p->uf_entry.tv.bo = &bo->tbo;
p->uf_entry.tv.shared = true;
/* One for TTM and one for the CS job */
p->uf_entry.tv.num_shared = 2;
p->uf_entry.user_pages = NULL;
drm_gem_object_put_unlocked(gobj);
@@ -598,6 +599,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
return r;
}
/* One for TTM and one for the CS job */
amdgpu_bo_list_for_each_entry(e, p->bo_list)
e->tv.num_shared = 2;
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
if (p->bo_list->first_userptr != p->bo_list->num_entries)
p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
@@ -717,8 +722,14 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
gws = p->bo_list->gws_obj;
oa = p->bo_list->oa_obj;
amdgpu_bo_list_for_each_entry(e, p->bo_list)
e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo));
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
/* Make sure we use the exclusive slot for shared BOs */
if (bo->prime_shared_count)
e->tv.num_shared = 0;
e->bo_va = amdgpu_vm_bo_find(vm, bo);
}
if (gds) {
p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
@@ -955,10 +966,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
if (r)
return r;
p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
if (amdgpu_vm_debug) {
@@ -1104,7 +1111,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
{
int r;
struct dma_fence *fence;
r = drm_syncobj_find_fence(p->filp, handle, 0, &fence);
r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence);
if (r)
return r;
@@ -1193,7 +1200,7 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
int i;
for (i = 0; i < p->num_post_dep_syncobjs; ++i)
drm_syncobj_replace_fence(p->post_dep_syncobjs[i], 0, p->fence);
drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
}
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
@@ -1260,8 +1267,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
return 0;
error_abort:
dma_fence_put(&job->base.s_fence->finished);
job->base.s_fence = NULL;
drm_sched_job_cleanup(&job->base);
amdgpu_mn_unlock(p->mn);
error_unlock:
@@ -1285,7 +1291,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
r = amdgpu_cs_parser_init(&parser, data);
if (r) {
DRM_ERROR("Failed to initialize parser !\n");
DRM_ERROR("Failed to initialize parser %d!\n", r);
goto out;
}

View File

@@ -0,0 +1,117 @@
/*
* Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
* * Author: Monk.liu@amd.com
*/
#include "amdgpu.h"
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
{
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
addr -= AMDGPU_VA_RESERVED_SIZE;
addr = amdgpu_gmc_sign_extend(addr);
return addr;
}
int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
u32 domain, uint32_t size)
{
int r;
void *ptr;
r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
domain, bo,
NULL, &ptr);
if (!*bo)
return -ENOMEM;
memset(ptr, 0, size);
return 0;
}
void amdgpu_free_static_csa(struct amdgpu_bo **bo)
{
amdgpu_bo_free_kernel(bo, NULL, NULL);
}
/*
* amdgpu_map_static_csa should be called during amdgpu_vm_init
* it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
* submission of GFX should use this virtual address within META_DATA init
* package to support SRIOV gfx preemption.
*/
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
uint64_t csa_addr, uint32_t size)
{
struct ww_acquire_ctx ticket;
struct list_head list;
struct amdgpu_bo_list_entry pd;
struct ttm_validate_buffer csa_tv;
int r;
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&csa_tv.head);
csa_tv.bo = &bo->tbo;
csa_tv.num_shared = 1;
list_add(&csa_tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &pd);
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
if (r) {
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
return r;
}
*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
if (!*bo_va) {
ttm_eu_backoff_reservation(&ticket, &list);
DRM_ERROR("failed to create bo_va for static CSA\n");
return -ENOMEM;
}
r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
size);
if (r) {
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
amdgpu_vm_bo_rmv(adev, *bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
AMDGPU_PTE_EXECUTABLE);
if (r) {
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
amdgpu_vm_bo_rmv(adev, *bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
ttm_eu_backoff_reservation(&ticket, &list);
return 0;
}

View File

@@ -0,0 +1,39 @@
/*
* Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Author: Monk.liu@amd.com
*/
#ifndef AMDGPU_CSA_MANAGER_H
#define AMDGPU_CSA_MANAGER_H
#define AMDGPU_CSA_SIZE (128 * 1024)
uint32_t amdgpu_get_total_csa_size(struct amdgpu_device *adev);
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
u32 domain, uint32_t size);
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
uint64_t csa_addr, uint32_t size);
void amdgpu_free_static_csa(struct amdgpu_bo **bo);
#endif

View File

@@ -248,7 +248,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
return -ENOMEM;
mutex_lock(&mgr->lock);
r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
if (r < 0) {
mutex_unlock(&mgr->lock);
kfree(ctx);

View File

@@ -59,6 +59,8 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_pm.h"
#include "amdgpu_xgmi.h"
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -513,6 +515,7 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
*/
static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
{
/* No doorbell on SI hardware generation */
if (adev->asic_type < CHIP_BONAIRE) {
adev->doorbell.base = 0;
@@ -525,15 +528,26 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
return -EINVAL;
amdgpu_asic_init_doorbell_index(adev);
/* doorbell bar mapping */
adev->doorbell.base = pci_resource_start(adev->pdev, 2);
adev->doorbell.size = pci_resource_len(adev->pdev, 2);
adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
AMDGPU_DOORBELL_MAX_ASSIGNMENT+1);
adev->doorbell_index.max_assignment+1);
if (adev->doorbell.num_doorbells == 0)
return -EINVAL;
/* For Vega, reserve and map two pages on doorbell BAR since SDMA
* paging queue doorbell use the second page. The
* AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
* doorbells are in the first page. So with paging queue enabled,
* the max num_doorbells should + 1 page (0x400 in dword)
*/
if (adev->asic_type >= CHIP_VEGA10)
adev->doorbell.num_doorbells += 0x400;
adev->doorbell.ptr = ioremap(adev->doorbell.base,
adev->doorbell.num_doorbells *
sizeof(u32));
@@ -1656,7 +1670,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
/* right after GMC hw init, we create CSA */
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_allocate_static_csa(adev);
r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_CSA_SIZE);
if (r) {
DRM_ERROR("allocate CSA failed %d\n", r);
return r;
@@ -1681,7 +1697,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r)
return r;
amdgpu_xgmi_add_device(adev);
if (adev->gmc.xgmi.num_physical_nodes > 1)
amdgpu_xgmi_add_device(adev);
amdgpu_amdkfd_device_init(adev);
if (amdgpu_sriov_vf(adev))
@@ -1848,6 +1865,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
{
int i, r;
if (adev->gmc.xgmi.num_physical_nodes > 1)
amdgpu_xgmi_remove_device(adev);
amdgpu_amdkfd_device_fini(adev);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
@@ -1890,7 +1910,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
amdgpu_ucode_free_bo(adev);
amdgpu_free_static_csa(adev);
amdgpu_free_static_csa(&adev->virt.csa_obj);
amdgpu_device_wb_fini(adev);
amdgpu_device_vram_scratch_fini(adev);
}
@@ -2337,6 +2357,19 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
return amdgpu_device_asic_has_dc_support(adev->asic_type);
}
static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
{
struct amdgpu_device *adev =
container_of(__work, struct amdgpu_device, xgmi_reset_work);
adev->asic_reset_res = amdgpu_asic_reset(adev);
if (adev->asic_reset_res)
DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s",
adev->asic_reset_res, adev->ddev->unique);
}
/**
* amdgpu_device_init - initialize the driver
*
@@ -2435,6 +2468,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
amdgpu_device_delay_enable_gfx_off);
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
adev->gfx.gfx_off_req_count = 1;
adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
@@ -2455,9 +2490,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
/* doorbell bar mapping */
amdgpu_device_doorbell_init(adev);
/* io port mapping */
for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
@@ -2476,6 +2508,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
/* doorbell bar mapping and doorbell index init*/
amdgpu_device_doorbell_init(adev);
/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
/* this will fail for cards that aren't VGA class devices, just
* ignore it */
@@ -3148,86 +3183,6 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
return 0;
}
/**
* amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
*
* @adev: amdgpu device pointer
*
* attempt to do soft-reset or full-reset and reinitialize Asic
* return 0 means succeeded otherwise failed
*/
static int amdgpu_device_reset(struct amdgpu_device *adev)
{
bool need_full_reset, vram_lost = 0;
int r;
need_full_reset = amdgpu_device_ip_need_full_reset(adev);
if (!need_full_reset) {
amdgpu_device_ip_pre_soft_reset(adev);
r = amdgpu_device_ip_soft_reset(adev);
amdgpu_device_ip_post_soft_reset(adev);
if (r || amdgpu_device_ip_check_soft_reset(adev)) {
DRM_INFO("soft reset failed, will fallback to full reset!\n");
need_full_reset = true;
}
}
if (need_full_reset) {
r = amdgpu_device_ip_suspend(adev);
retry:
r = amdgpu_asic_reset(adev);
/* post card */
amdgpu_atom_asic_init(adev->mode_info.atom_context);
if (!r) {
dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
r = amdgpu_device_ip_resume_phase1(adev);
if (r)
goto out;
vram_lost = amdgpu_device_check_vram_lost(adev);
if (vram_lost) {
DRM_ERROR("VRAM is lost!\n");
atomic_inc(&adev->vram_lost_counter);
}
r = amdgpu_gtt_mgr_recover(
&adev->mman.bdev.man[TTM_PL_TT]);
if (r)
goto out;
r = amdgpu_device_fw_loading(adev);
if (r)
return r;
r = amdgpu_device_ip_resume_phase2(adev);
if (r)
goto out;
if (vram_lost)
amdgpu_device_fill_reset_magic(adev);
}
}
out:
if (!r) {
amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
if (r) {
dev_err(adev->dev, "ib ring test failed (%d).\n", r);
r = amdgpu_device_ip_suspend(adev);
need_full_reset = true;
goto retry;
}
}
if (!r)
r = amdgpu_device_recover_vram(adev);
return r;
}
/**
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
@@ -3295,40 +3250,46 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
return false;
}
if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 &&
!amdgpu_sriov_vf(adev))) {
DRM_INFO("GPU recovery disabled.\n");
return false;
if (amdgpu_gpu_recovery == 0)
goto disabled;
if (amdgpu_sriov_vf(adev))
return true;
if (amdgpu_gpu_recovery == -1) {
switch (adev->asic_type) {
case CHIP_BONAIRE:
case CHIP_HAWAII:
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
case CHIP_VEGA20:
case CHIP_VEGA10:
case CHIP_VEGA12:
break;
default:
goto disabled;
}
}
return true;
disabled:
DRM_INFO("GPU recovery disabled.\n");
return false;
}
/**
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
*
* @adev: amdgpu device pointer
* @job: which job trigger hang
*
* Attempt to reset the GPU if it has hung (all asics).
* Returns 0 for success or an error on failure.
*/
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job)
static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
struct amdgpu_job *job,
bool *need_full_reset_arg)
{
int i, r, resched;
dev_info(adev->dev, "GPU reset begin!\n");
mutex_lock(&adev->lock_reset);
atomic_inc(&adev->gpu_reset_counter);
adev->in_gpu_reset = 1;
/* Block kfd */
amdgpu_amdkfd_pre_reset(adev);
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
int i, r = 0;
bool need_full_reset = *need_full_reset_arg;
/* block all schedulers and reset given job's ring */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -3348,10 +3309,144 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
amdgpu_fence_driver_force_completion(ring);
}
if (amdgpu_sriov_vf(adev))
r = amdgpu_device_reset_sriov(adev, job ? false : true);
else
r = amdgpu_device_reset(adev);
if (!amdgpu_sriov_vf(adev)) {
if (!need_full_reset)
need_full_reset = amdgpu_device_ip_need_full_reset(adev);
if (!need_full_reset) {
amdgpu_device_ip_pre_soft_reset(adev);
r = amdgpu_device_ip_soft_reset(adev);
amdgpu_device_ip_post_soft_reset(adev);
if (r || amdgpu_device_ip_check_soft_reset(adev)) {
DRM_INFO("soft reset failed, will fallback to full reset!\n");
need_full_reset = true;
}
}
if (need_full_reset)
r = amdgpu_device_ip_suspend(adev);
*need_full_reset_arg = need_full_reset;
}
return r;
}
static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
struct list_head *device_list_handle,
bool *need_full_reset_arg)
{
struct amdgpu_device *tmp_adev = NULL;
bool need_full_reset = *need_full_reset_arg, vram_lost = false;
int r = 0;
/*
* ASIC reset has to be done on all HGMI hive nodes ASAP
* to allow proper links negotiation in FW (within 1 sec)
*/
if (need_full_reset) {
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
/* For XGMI run all resets in parallel to speed up the process */
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
r = -EALREADY;
} else
r = amdgpu_asic_reset(tmp_adev);
if (r) {
DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s",
r, tmp_adev->ddev->unique);
break;
}
}
/* For XGMI wait for all PSP resets to complete before proceed */
if (!r) {
list_for_each_entry(tmp_adev, device_list_handle,
gmc.xgmi.head) {
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
flush_work(&tmp_adev->xgmi_reset_work);
r = tmp_adev->asic_reset_res;
if (r)
break;
}
}
}
}
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
if (need_full_reset) {
/* post card */
if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
DRM_WARN("asic atom init failed!");
if (!r) {
dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
r = amdgpu_device_ip_resume_phase1(tmp_adev);
if (r)
goto out;
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
if (vram_lost) {
DRM_ERROR("VRAM is lost!\n");
atomic_inc(&tmp_adev->vram_lost_counter);
}
r = amdgpu_gtt_mgr_recover(
&tmp_adev->mman.bdev.man[TTM_PL_TT]);
if (r)
goto out;
r = amdgpu_device_fw_loading(tmp_adev);
if (r)
return r;
r = amdgpu_device_ip_resume_phase2(tmp_adev);
if (r)
goto out;
if (vram_lost)
amdgpu_device_fill_reset_magic(tmp_adev);
/* Update PSP FW topology after reset */
if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
r = amdgpu_xgmi_update_topology(hive, tmp_adev);
}
}
out:
if (!r) {
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
r = amdgpu_ib_ring_tests(tmp_adev);
if (r) {
dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
r = amdgpu_device_ip_suspend(tmp_adev);
need_full_reset = true;
r = -EAGAIN;
goto end;
}
}
if (!r)
r = amdgpu_device_recover_vram(tmp_adev);
else
tmp_adev->asic_reset_res = r;
}
end:
*need_full_reset_arg = need_full_reset;
return r;
}
static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
struct amdgpu_job *job)
{
int i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -3363,7 +3458,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
* or all rings (in the case @job is NULL)
* after above amdgpu_reset accomplished
*/
if ((!job || job->base.sched == &ring->sched) && !r)
if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res)
drm_sched_job_recovery(&ring->sched);
kthread_unpark(ring->sched.thread);
@@ -3373,21 +3468,142 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
drm_helper_resume_force_mode(adev->ddev);
}
ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
adev->asic_reset_res = 0;
}
if (r) {
/* bad news, how to tell it to userspace ? */
dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
} else {
dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter));
}
static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
{
mutex_lock(&adev->lock_reset);
atomic_inc(&adev->gpu_reset_counter);
adev->in_gpu_reset = 1;
/* Block kfd */
amdgpu_amdkfd_pre_reset(adev);
}
static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
{
/*unlock kfd */
amdgpu_amdkfd_post_reset(adev);
amdgpu_vf_error_trans_all(adev);
adev->in_gpu_reset = 0;
mutex_unlock(&adev->lock_reset);
}
/**
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
*
* @adev: amdgpu device pointer
* @job: which job trigger hang
*
* Attempt to reset the GPU if it has hung (all asics).
* Attempt to do soft-reset or full-reset and reinitialize Asic
* Returns 0 for success or an error on failure.
*/
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job)
{
int r;
struct amdgpu_hive_info *hive = NULL;
bool need_full_reset = false;
struct amdgpu_device *tmp_adev = NULL;
struct list_head device_list, *device_list_handle = NULL;
INIT_LIST_HEAD(&device_list);
dev_info(adev->dev, "GPU reset begin!\n");
/*
* In case of XGMI hive disallow concurrent resets to be triggered
* by different nodes. No point also since the one node already executing
* reset will also reset all the other nodes in the hive.
*/
hive = amdgpu_get_xgmi_hive(adev);
if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
!mutex_trylock(&hive->hive_lock))
return 0;
/* Start with adev pre asic reset first for soft reset check.*/
amdgpu_device_lock_adev(adev);
r = amdgpu_device_pre_asic_reset(adev,
job,
&need_full_reset);
if (r) {
/*TODO Should we stop ?*/
DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
r, adev->ddev->unique);
adev->asic_reset_res = r;
}
/* Build list of devices to reset */
if (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
if (!hive) {
amdgpu_device_unlock_adev(adev);
return -ENODEV;
}
/*
* In case we are in XGMI hive mode device reset is done for all the
* nodes in the hive to retrain all XGMI links and hence the reset
* sequence is executed in loop on all nodes.
*/
device_list_handle = &hive->device_list;
} else {
list_add_tail(&adev->gmc.xgmi.head, &device_list);
device_list_handle = &device_list;
}
retry: /* Rest of adevs pre asic reset from XGMI hive. */
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
if (tmp_adev == adev)
continue;
amdgpu_device_lock_adev(tmp_adev);
r = amdgpu_device_pre_asic_reset(tmp_adev,
NULL,
&need_full_reset);
/*TODO Should we stop ?*/
if (r) {
DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
r, tmp_adev->ddev->unique);
tmp_adev->asic_reset_res = r;
}
}
/* Actual ASIC resets if needed.*/
/* TODO Implement XGMI hive reset logic for SRIOV */
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_device_reset_sriov(adev, job ? false : true);
if (r)
adev->asic_reset_res = r;
} else {
r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
if (r && r == -EAGAIN)
goto retry;
}
/* Post ASIC reset for all devs .*/
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
if (r) {
/* bad news, how to tell it to userspace ? */
dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
} else {
dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
}
amdgpu_device_unlock_adev(tmp_adev);
}
if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
mutex_unlock(&hive->hive_lock);
if (r)
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
return r;
}

View File

@@ -631,6 +631,11 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16);
if (!adev->mode_info.max_bpc_property)
return -ENOMEM;
adev->mode_info.abm_level_property =
drm_property_create_range(adev->ddev, 0,
"abm level", 0, 4);
if (!adev->mode_info.abm_level_property)
return -ENOMEM;
}
return 0;
@@ -857,7 +862,12 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
/* Inside "upper part" of vblank area? Apply corrective offset if so: */
if (in_vbl && (*vpos >= vbl_start)) {
vtotal = mode->crtc_vtotal;
*vpos = *vpos - vtotal;
/* With variable refresh rate displays the vpos can exceed
* the vtotal value. Clamp to 0 to return -vbl_end instead
* of guessing the remaining number of lines until scanout.
*/
*vpos = (*vpos < vtotal) ? (*vpos - vtotal) : 0;
}
/* Correct for shifted end of vbl at vbl_end. */

View File

@@ -0,0 +1,243 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
/*
* GPU doorbell structures, functions & helpers
*/
struct amdgpu_doorbell {
/* doorbell mmio */
resource_size_t base;
resource_size_t size;
u32 __iomem *ptr;
u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */
};
/* Reserved doorbells for amdgpu (including multimedia).
* KFD can use all the rest in the 2M doorbell bar.
* For asic before vega10, doorbell is 32-bit, so the
* index/offset is in dword. For vega10 and after, doorbell
* can be 64-bit, so the index defined is in qword.
*/
struct amdgpu_doorbell_index {
uint32_t kiq;
uint32_t mec_ring0;
uint32_t mec_ring1;
uint32_t mec_ring2;
uint32_t mec_ring3;
uint32_t mec_ring4;
uint32_t mec_ring5;
uint32_t mec_ring6;
uint32_t mec_ring7;
uint32_t userqueue_start;
uint32_t userqueue_end;
uint32_t gfx_ring0;
uint32_t sdma_engine0;
uint32_t sdma_engine1;
uint32_t sdma_engine2;
uint32_t sdma_engine3;
uint32_t sdma_engine4;
uint32_t sdma_engine5;
uint32_t sdma_engine6;
uint32_t sdma_engine7;
uint32_t ih;
union {
struct {
uint32_t vcn_ring0_1;
uint32_t vcn_ring2_3;
uint32_t vcn_ring4_5;
uint32_t vcn_ring6_7;
} vcn;
struct {
uint32_t uvd_ring0_1;
uint32_t uvd_ring2_3;
uint32_t uvd_ring4_5;
uint32_t uvd_ring6_7;
uint32_t vce_ring0_1;
uint32_t vce_ring2_3;
uint32_t vce_ring4_5;
uint32_t vce_ring6_7;
} uvd_vce;
};
uint32_t max_assignment;
};
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
{
AMDGPU_DOORBELL_KIQ = 0x000,
AMDGPU_DOORBELL_HIQ = 0x001,
AMDGPU_DOORBELL_DIQ = 0x002,
AMDGPU_DOORBELL_MEC_RING0 = 0x010,
AMDGPU_DOORBELL_MEC_RING1 = 0x011,
AMDGPU_DOORBELL_MEC_RING2 = 0x012,
AMDGPU_DOORBELL_MEC_RING3 = 0x013,
AMDGPU_DOORBELL_MEC_RING4 = 0x014,
AMDGPU_DOORBELL_MEC_RING5 = 0x015,
AMDGPU_DOORBELL_MEC_RING6 = 0x016,
AMDGPU_DOORBELL_MEC_RING7 = 0x017,
AMDGPU_DOORBELL_GFX_RING0 = 0x020,
AMDGPU_DOORBELL_sDMA_ENGINE0 = 0x1E0,
AMDGPU_DOORBELL_sDMA_ENGINE1 = 0x1E1,
AMDGPU_DOORBELL_IH = 0x1E8,
AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF,
AMDGPU_DOORBELL_INVALID = 0xFFFF
} AMDGPU_DOORBELL_ASSIGNMENT;
typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
{
/* Compute + GFX: 0~255 */
AMDGPU_VEGA20_DOORBELL_KIQ = 0x000,
AMDGPU_VEGA20_DOORBELL_HIQ = 0x001,
AMDGPU_VEGA20_DOORBELL_DIQ = 0x002,
AMDGPU_VEGA20_DOORBELL_MEC_RING0 = 0x003,
AMDGPU_VEGA20_DOORBELL_MEC_RING1 = 0x004,
AMDGPU_VEGA20_DOORBELL_MEC_RING2 = 0x005,
AMDGPU_VEGA20_DOORBELL_MEC_RING3 = 0x006,
AMDGPU_VEGA20_DOORBELL_MEC_RING4 = 0x007,
AMDGPU_VEGA20_DOORBELL_MEC_RING5 = 0x008,
AMDGPU_VEGA20_DOORBELL_MEC_RING6 = 0x009,
AMDGPU_VEGA20_DOORBELL_MEC_RING7 = 0x00A,
AMDGPU_VEGA20_DOORBELL_USERQUEUE_START = 0x00B,
AMDGPU_VEGA20_DOORBELL_USERQUEUE_END = 0x08A,
AMDGPU_VEGA20_DOORBELL_GFX_RING0 = 0x08B,
/* SDMA:256~335*/
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0 = 0x100,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1 = 0x10A,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2 = 0x114,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3 = 0x11E,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4 = 0x128,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5 = 0x132,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6 = 0x13C,
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7 = 0x146,
/* IH: 376~391 */
AMDGPU_VEGA20_DOORBELL_IH = 0x178,
/* MMSCH: 392~407
* overlap the doorbell assignment with VCN as they are mutually exclusive
* VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
*/
AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189,
AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A,
AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B,
AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188,
AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189,
AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A,
AMDGPU_VEGA20_DOORBELL64_UVD_RING6_7 = 0x18B,
AMDGPU_VEGA20_DOORBELL64_VCE_RING0_1 = 0x18C,
AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3 = 0x18D,
AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5 = 0x18E,
AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7 = 0x18F,
AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F,
AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF
} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
/*
* 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
*/
typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
{
/*
* All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
* a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
* Compute related doorbells are allocated from 0x00 to 0x8a
*/
/* kernel scheduling */
AMDGPU_DOORBELL64_KIQ = 0x00,
/* HSA interface queue and debug queue */
AMDGPU_DOORBELL64_HIQ = 0x01,
AMDGPU_DOORBELL64_DIQ = 0x02,
/* Compute engines */
AMDGPU_DOORBELL64_MEC_RING0 = 0x03,
AMDGPU_DOORBELL64_MEC_RING1 = 0x04,
AMDGPU_DOORBELL64_MEC_RING2 = 0x05,
AMDGPU_DOORBELL64_MEC_RING3 = 0x06,
AMDGPU_DOORBELL64_MEC_RING4 = 0x07,
AMDGPU_DOORBELL64_MEC_RING5 = 0x08,
AMDGPU_DOORBELL64_MEC_RING6 = 0x09,
AMDGPU_DOORBELL64_MEC_RING7 = 0x0a,
/* User queue doorbell range (128 doorbells) */
AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b,
AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a,
/* Graphics engine */
AMDGPU_DOORBELL64_GFX_RING0 = 0x8b,
/*
* Other graphics doorbells can be allocated here: from 0x8c to 0xdf
* Graphics voltage island aperture 1
* default non-graphics QWORD index is 0xe0 - 0xFF inclusive
*/
/* For vega10 sriov, the sdma doorbell must be fixed as follow
* to keep the same setting with host driver, or it will
* happen conflicts
*/
AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
/* Interrupt handler */
AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */
AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */
AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */
/* VCN engine use 32 bits doorbell */
AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
AMDGPU_DOORBELL64_VCN2_3 = 0xF9,
AMDGPU_DOORBELL64_VCN4_5 = 0xFA,
AMDGPU_DOORBELL64_VCN6_7 = 0xFB,
/* overlap the doorbell assignment with VCN as they are mutually exclusive
* VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
*/
AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8,
AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9,
AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA,
AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB,
AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC,
AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD,
AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE,
AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF,
AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF,
AMDGPU_DOORBELL64_INVALID = 0xFFFF
} AMDGPU_DOORBELL64_ASSIGNMENT;
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))

View File

@@ -454,9 +454,10 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
/**
* DOC: param_buf_per_se (int)
* Override the size of Off-Chip Pramater Cache per Shader Engine in Byte. The default is 0 (depending on gfx).
* Override the size of Off-Chip Parameter Cache per Shader Engine in Byte.
* The default is 0 (depending on gfx).
*/
MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");
MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Parameter Cache per Shader Engine (default depending on gfx)");
module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
/**
@@ -1227,9 +1228,6 @@ static struct drm_driver kms_driver = {
.patchlevel = KMS_DRIVER_PATCHLEVEL,
};
static struct drm_driver *driver;
static struct pci_driver *pdriver;
static struct pci_driver amdgpu_kms_pci_driver = {
.name = DRIVER_NAME,
.id_table = pciidlist,
@@ -1259,16 +1257,14 @@ static int __init amdgpu_init(void)
goto error_fence;
DRM_INFO("amdgpu kernel modesetting enabled.\n");
driver = &kms_driver;
pdriver = &amdgpu_kms_pci_driver;
driver->num_ioctls = amdgpu_max_kms_ioctl;
kms_driver.num_ioctls = amdgpu_max_kms_ioctl;
amdgpu_register_atpx_handler();
/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
amdgpu_amdkfd_init();
/* let modprobe override vga console setting */
return pci_register_driver(pdriver);
return pci_register_driver(&amdgpu_kms_pci_driver);
error_fence:
amdgpu_sync_fini();
@@ -1280,7 +1276,7 @@ error_sync:
static void __exit amdgpu_exit(void)
{
amdgpu_amdkfd_fini();
pci_unregister_driver(pdriver);
pci_unregister_driver(&amdgpu_kms_pci_driver);
amdgpu_unregister_atpx_handler();
amdgpu_sync_fini();
amdgpu_fence_slab_fini();

View File

@@ -398,9 +398,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
ring->fence_drv.irq_type = irq_type;
ring->fence_drv.initialized = true;
dev_dbg(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, "
"cpu addr 0x%p\n", ring->idx,
ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr);
DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr "
"0x%016llx, cpu addr 0x%p\n", ring->name,
ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr);
return 0;
}

View File

@@ -248,7 +248,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
}
mb();
amdgpu_asic_flush_hdp(adev, NULL);
amdgpu_gmc_flush_gpu_tlb(adev, 0);
amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
return 0;
}
@@ -259,6 +259,8 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
* @offset: offset into the GPU's gart aperture
* @pages: number of pages to bind
* @dma_addr: DMA addresses of pages
* @flags: page table entry flags
* @dst: CPU address of the gart table
*
* Map the dma_addresses into GART entries (all asics).
* Returns 0 for success, -EINVAL for failure.
@@ -331,7 +333,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
mb();
amdgpu_asic_flush_hdp(adev, NULL);
amdgpu_gmc_flush_gpu_tlb(adev, 0);
amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
return 0;
}

View File

@@ -41,6 +41,7 @@ struct amdgpu_bo;
struct amdgpu_gart {
struct amdgpu_bo *bo;
/* CPU kmapped address of gart table */
void *ptr;
unsigned num_gpu_pages;
unsigned num_cpu_pages;

View File

@@ -169,7 +169,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
INIT_LIST_HEAD(&duplicates);
tv.bo = &bo->tbo;
tv.shared = true;
tv.num_shared = 1;
list_add(&tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
@@ -604,7 +604,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -ENOENT;
abo = gem_to_amdgpu_bo(gobj);
tv.bo = &abo->tbo;
tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID);
if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
tv.num_shared = 1;
else
tv.num_shared = 0;
list_add(&tv.head, &list);
} else {
gobj = NULL;

View File

@@ -54,6 +54,8 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
extern const struct dma_buf_ops amdgpu_dmabuf_ops;
/*
* GEM objects.
*/

View File

@@ -25,6 +25,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_rlc.h"
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
@@ -249,7 +250,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
ring->adev = NULL;
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
ring->doorbell_index = adev->doorbell_index.kiq;
r = amdgpu_gfx_kiq_acquire(adev, ring);
if (r)

View File

@@ -29,6 +29,7 @@
*/
#include "clearstate_defs.h"
#include "amdgpu_ring.h"
#include "amdgpu_rlc.h"
/* GFX current status */
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
@@ -37,59 +38,6 @@
#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
struct amdgpu_rlc_funcs {
void (*enter_safe_mode)(struct amdgpu_device *adev);
void (*exit_safe_mode)(struct amdgpu_device *adev);
};
struct amdgpu_rlc {
/* for power gating */
struct amdgpu_bo *save_restore_obj;
uint64_t save_restore_gpu_addr;
volatile uint32_t *sr_ptr;
const u32 *reg_list;
u32 reg_list_size;
/* for clear state */
struct amdgpu_bo *clear_state_obj;
uint64_t clear_state_gpu_addr;
volatile uint32_t *cs_ptr;
const struct cs_section_def *cs_data;
u32 clear_state_size;
/* for cp tables */
struct amdgpu_bo *cp_table_obj;
uint64_t cp_table_gpu_addr;
volatile uint32_t *cp_table_ptr;
u32 cp_table_size;
/* safe mode for updating CG/PG state */
bool in_safe_mode;
const struct amdgpu_rlc_funcs *funcs;
/* for firmware data */
u32 save_and_restore_offset;
u32 clear_state_descriptor_offset;
u32 avail_scratch_ram_locations;
u32 reg_restore_list_size;
u32 reg_list_format_start;
u32 reg_list_format_separate_start;
u32 starting_offsets_start;
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
u32 reg_list_format_direct_reg_list_length;
u32 save_restore_list_cntl_size_bytes;
u32 save_restore_list_gpm_size_bytes;
u32 save_restore_list_srm_size_bytes;
u32 *register_list_format;
u32 *register_restore;
u8 *save_restore_list_cntl;
u8 *save_restore_list_gpm;
u8 *save_restore_list_srm;
bool is_rlc_v2_1;
};
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
struct amdgpu_mec {

View File

@@ -64,7 +64,7 @@ struct amdgpu_vmhub {
struct amdgpu_gmc_funcs {
/* flush the vm tlb via mmio */
void (*flush_gpu_tlb)(struct amdgpu_device *adev,
uint32_t vmid);
uint32_t vmid, uint32_t flush_type);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr);
@@ -89,7 +89,7 @@ struct amdgpu_gmc_funcs {
struct amdgpu_xgmi {
/* from psp */
u64 device_id;
u64 node_id;
u64 hive_id;
/* fixed per family */
u64 node_segment_size;
@@ -99,6 +99,7 @@ struct amdgpu_xgmi {
unsigned num_physical_nodes;
/* gpu list in the same hive */
struct list_head head;
bool supported;
};
struct amdgpu_gmc {
@@ -151,7 +152,7 @@ struct amdgpu_gmc {
struct amdgpu_xgmi xgmi;
};
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))

View File

@@ -146,7 +146,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
fence_ctx = 0;
}
if (!ring->ready) {
if (!ring->sched.ready) {
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
return -EINVAL;
}
@@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
continue;
amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0,
need_ctx_switch);
amdgpu_ring_emit_ib(ring, job, ib, need_ctx_switch);
need_ctx_switch = false;
}
@@ -347,19 +346,14 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
}
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
for (i = 0; i < adev->num_rings; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
long tmo;
if (!ring || !ring->ready)
continue;
/* skip IB tests for KIQ in general for the below reasons:
* 1. We never submit IBs to the KIQ
* 2. KIQ doesn't use the EOP interrupts,
* we use some other CP interrupt.
/* KIQ rings don't have an IB test because we never submit IBs
* to them and they have no interrupt support.
*/
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
if (!ring->sched.ready || !ring->funcs->test_ib)
continue;
/* MM engine need more time */
@@ -374,20 +368,23 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
tmo = tmo_gfx;
r = amdgpu_ring_test_ib(ring, tmo);
if (r) {
ring->ready = false;
if (!r) {
DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n",
ring->name);
continue;
}
if (ring == &adev->gfx.gfx_ring[0]) {
/* oh, oh, that's really bad */
DRM_ERROR("amdgpu: failed testing IB on GFX ring (%d).\n", r);
adev->accel_working = false;
return r;
ring->sched.ready = false;
DRM_DEV_ERROR(adev->dev, "IB test failed on %s (%d).\n",
ring->name, r);
} else {
/* still not good, but we can live with it */
DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r);
ret = r;
}
if (ring == &adev->gfx.gfx_ring[0]) {
/* oh, oh, that's really bad */
adev->accel_working = false;
return r;
} else {
ret = r;
}
}
return ret;

View File

@@ -51,14 +51,12 @@ struct amdgpu_ih_ring {
struct amdgpu_ih_funcs {
/* ring read/write ptr handling, called from interrupt context */
u32 (*get_wptr)(struct amdgpu_device *adev);
bool (*prescreen_iv)(struct amdgpu_device *adev);
void (*decode_iv)(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
void (*set_rptr)(struct amdgpu_device *adev);
};
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))

View File

@@ -93,23 +93,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
drm_helper_hpd_irq_event(dev);
}
/**
* amdgpu_irq_reset_work_func - execute GPU reset
*
* @work: work struct pointer
*
* Execute scheduled GPU reset (Cayman+).
* This function is called when the IRQ handler thinks we need a GPU reset.
*/
static void amdgpu_irq_reset_work_func(struct work_struct *work)
{
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
reset_work);
if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL);
}
/**
* amdgpu_irq_disable_all - disable *all* interrupts
*
@@ -162,13 +145,6 @@ static void amdgpu_irq_callback(struct amdgpu_device *adev,
u32 ring_index = ih->rptr >> 2;
struct amdgpu_iv_entry entry;
/* Prescreening of high-frequency interrupts */
if (!amdgpu_ih_prescreen_iv(adev))
return;
/* Before dispatching irq to IP blocks, send it to amdkfd */
amdgpu_amdkfd_interrupt(adev, (const void *) &ih->ring[ring_index]);
entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
amdgpu_ih_decode_iv(adev, &entry);
@@ -262,15 +238,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
amdgpu_hotplug_work_func);
}
INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func);
adev->irq.installed = true;
r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
if (r) {
adev->irq.installed = false;
if (!amdgpu_device_has_dc_support(adev))
flush_work(&adev->hotplug_work);
cancel_work_sync(&adev->reset_work);
return r;
}
adev->ddev->max_vblank_count = 0x00ffffff;
@@ -299,7 +272,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
pci_disable_msi(adev->pdev);
if (!amdgpu_device_has_dc_support(adev))
flush_work(&adev->hotplug_work);
cancel_work_sync(&adev->reset_work);
}
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
@@ -392,39 +364,38 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
unsigned client_id = entry->client_id;
unsigned src_id = entry->src_id;
struct amdgpu_irq_src *src;
bool handled = false;
int r;
trace_amdgpu_iv(entry);
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
return;
}
if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
} else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
DRM_DEBUG("Invalid src_id in IV: %d\n", src_id);
return;
}
if (adev->irq.virq[src_id]) {
} else if (adev->irq.virq[src_id]) {
generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id));
} else {
if (!adev->irq.client[client_id].sources) {
DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
client_id, src_id);
return;
}
src = adev->irq.client[client_id].sources[src_id];
if (!src) {
DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
return;
}
} else if (!adev->irq.client[client_id].sources) {
DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
client_id, src_id);
} else if ((src = adev->irq.client[client_id].sources[src_id])) {
r = src->funcs->process(adev, src, entry);
if (r)
if (r < 0)
DRM_ERROR("error processing interrupt (%d)\n", r);
else if (r)
handled = true;
} else {
DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
}
/* Send it to amdkfd as well if it isn't already handled */
if (!handled)
amdgpu_amdkfd_interrupt(adev, entry->iv_entry);
}
/**

View File

@@ -112,6 +112,8 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job);
drm_sched_job_cleanup(s_job);
amdgpu_ring_priority_put(ring, s_job->s_priority);
dma_fence_put(job->fence);
amdgpu_sync_free(&job->sync);

View File

@@ -33,6 +33,8 @@
#define to_amdgpu_job(sched_job) \
container_of((sched_job), struct amdgpu_job, base)
#define AMDGPU_JOB_GET_VMID(job) ((job) ? (job)->vmid : 0)
struct amdgpu_fence;
struct amdgpu_job {

View File

@@ -336,7 +336,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_GFX:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
if (adev->gfx.gfx_ring[i].ready)
if (adev->gfx.gfx_ring[i].sched.ready)
++num_rings;
ib_start_alignment = 32;
ib_size_alignment = 32;
@@ -344,7 +344,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_COMPUTE:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
if (adev->gfx.compute_ring[i].ready)
if (adev->gfx.compute_ring[i].sched.ready)
++num_rings;
ib_start_alignment = 32;
ib_size_alignment = 32;
@@ -352,7 +352,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_DMA:
type = AMD_IP_BLOCK_TYPE_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++)
if (adev->sdma.instance[i].ring.ready)
if (adev->sdma.instance[i].ring.sched.ready)
++num_rings;
ib_start_alignment = 256;
ib_size_alignment = 4;
@@ -363,7 +363,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->uvd.harvest_config & (1 << i))
continue;
if (adev->uvd.inst[i].ring.ready)
if (adev->uvd.inst[i].ring.sched.ready)
++num_rings;
}
ib_start_alignment = 64;
@@ -372,7 +372,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < adev->vce.num_rings; i++)
if (adev->vce.ring[i].ready)
if (adev->vce.ring[i].sched.ready)
++num_rings;
ib_start_alignment = 4;
ib_size_alignment = 1;
@@ -384,7 +384,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
continue;
for (j = 0; j < adev->uvd.num_enc_rings; j++)
if (adev->uvd.inst[i].ring_enc[j].ready)
if (adev->uvd.inst[i].ring_enc[j].sched.ready)
++num_rings;
}
ib_start_alignment = 64;
@@ -392,7 +392,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
if (adev->vcn.ring_dec.ready)
if (adev->vcn.ring_dec.sched.ready)
++num_rings;
ib_start_alignment = 16;
ib_size_alignment = 16;
@@ -400,14 +400,14 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_enc_rings; i++)
if (adev->vcn.ring_enc[i].ready)
if (adev->vcn.ring_enc[i].sched.ready)
++num_rings;
ib_start_alignment = 64;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_VCN_JPEG:
type = AMD_IP_BLOCK_TYPE_VCN;
if (adev->vcn.ring_jpeg.ready)
if (adev->vcn.ring_jpeg.sched.ready)
++num_rings;
ib_start_alignment = 16;
ib_size_alignment = 16;
@@ -978,7 +978,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
}
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va);
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
&fpriv->csa_va, csa_addr, AMDGPU_CSA_SIZE);
if (r)
goto error_vm;
}
@@ -1048,8 +1051,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
pasid = fpriv->vm.pasid;
pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
amdgpu_vm_fini(adev, &fpriv->vm);
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
amdgpu_vm_fini(adev, &fpriv->vm);
if (pasid)
amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);

View File

@@ -38,7 +38,6 @@
#include <drm/drm_crtc_helper.h>
#include <drm/drm_fb_helper.h>
#include <drm/drm_plane_helper.h>
#include <drm/drm_fb_helper.h>
#include <linux/i2c.h>
#include <linux/i2c-algo-bit.h>
#include <linux/hrtimer.h>
@@ -57,7 +56,6 @@ struct amdgpu_hpd;
#define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
#define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base)
#define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base)
#define to_amdgpu_plane(x) container_of(x, struct amdgpu_plane, base)
#define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base);
@@ -295,13 +293,6 @@ struct amdgpu_display_funcs {
uint16_t connector_object_id,
struct amdgpu_hpd *hpd,
struct amdgpu_router *router);
/* it is used to enter or exit into free sync mode */
int (*notify_freesync)(struct drm_device *dev, void *data,
struct drm_file *filp);
/* it is used to allow enablement of freesync mode */
int (*set_freesync_property)(struct drm_connector *connector,
struct drm_property *property,
uint64_t val);
};
@@ -325,7 +316,7 @@ struct amdgpu_mode_info {
struct card_info *atom_card_info;
bool mode_config_initialized;
struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS];
struct amdgpu_plane *planes[AMDGPU_MAX_PLANES];
struct drm_plane *planes[AMDGPU_MAX_PLANES];
struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS];
/* DVI-I properties */
struct drm_property *coherent_mode_property;
@@ -341,6 +332,8 @@ struct amdgpu_mode_info {
struct drm_property *dither_property;
/* maximum number of bits per channel for monitor color */
struct drm_property *max_bpc_property;
/* Adaptive Backlight Modulation (power feature) */
struct drm_property *abm_level_property;
/* hardcoded DFP edid from BIOS */
struct edid *bios_hardcoded_edid;
int bios_hardcoded_edid_size;
@@ -436,11 +429,6 @@ struct amdgpu_crtc {
struct drm_pending_vblank_event *event;
};
struct amdgpu_plane {
struct drm_plane base;
enum drm_plane_type plane_type;
};
struct amdgpu_encoder_atom_dig {
bool linkb;
/* atom dig */

View File

@@ -81,7 +81,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
amdgpu_bo_subtract_pin_size(bo);
if (bo->kfd_bo)
amdgpu_amdkfd_unreserve_system_memory_limit(bo);
amdgpu_amdkfd_unreserve_memory_limit(bo);
amdgpu_bo_kunmap(bo);
@@ -607,53 +607,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
return r;
}
/**
* amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object
* @adev: amdgpu device object
* @ring: amdgpu_ring for the engine handling the buffer operations
* @bo: &amdgpu_bo buffer to be backed up
* @resv: reservation object with embedded fence
* @fence: dma_fence associated with the operation
* @direct: whether to submit the job directly
*
* Copies an &amdgpu_bo buffer object to its shadow object.
* Not used for now.
*
* Returns:
* 0 for success or a negative error code on failure.
*/
int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
struct reservation_object *resv,
struct dma_fence **fence,
bool direct)
{
struct amdgpu_bo *shadow = bo->shadow;
uint64_t bo_addr, shadow_addr;
int r;
if (!shadow)
return -EINVAL;
bo_addr = amdgpu_bo_gpu_offset(bo);
shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
r = reservation_object_reserve_shared(bo->tbo.resv);
if (r)
goto err;
r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr,
amdgpu_bo_size(bo), resv, fence,
direct, false);
if (!r)
amdgpu_bo_fence(bo, *fence, true);
err:
return r;
}
/**
* amdgpu_bo_validate - validate an &amdgpu_bo buffer object
* @bo: pointer to the buffer object

View File

@@ -267,11 +267,6 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
bool shared);
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
struct reservation_object *resv,
struct dma_fence **fence, bool direct);
int amdgpu_bo_validate(struct amdgpu_bo *bo);
int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
struct dma_fence **fence);

View File

@@ -33,6 +33,8 @@
#include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h>
#include <linux/nospec.h>
#include "hwmgr.h"
#define WIDTH_4K 3840
static int amdgpu_debugfs_pm_init(struct amdgpu_device *adev);
@@ -1642,6 +1644,19 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
return 0;
/* Skip fan attributes on APU */
if ((adev->flags & AMD_IS_APU) &&
(attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_input.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_min.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_target.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
return 0;
/* Skip limit attributes if DPM is not enabled */
if (!adev->pm.dpm_enabled &&
(attr == &sensor_dev_attr_temp1_crit.dev_attr.attr ||
@@ -1956,6 +1971,17 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
mutex_unlock(&adev->pm.mutex);
}
/* enable/disable Low Memory PState for UVD (4k videos) */
if (adev->asic_type == CHIP_STONEY &&
adev->uvd.decode_image_width >= WIDTH_4K) {
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
if (hwmgr && hwmgr->hwmgr_func &&
hwmgr->hwmgr_func->update_nbdpm_pstate)
hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr,
!enable,
true);
}
}
void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
@@ -2129,7 +2155,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (ring && ring->ready)
if (ring && ring->sched.ready)
amdgpu_fence_wait_empty(ring);
}

View File

@@ -39,8 +39,6 @@
#include <drm/amdgpu_drm.h>
#include <linux/dma-buf.h>
static const struct dma_buf_ops amdgpu_dmabuf_ops;
/**
* amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
* implementation
@@ -332,15 +330,13 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
return ret;
}
static const struct dma_buf_ops amdgpu_dmabuf_ops = {
const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_gem_map_attach,
.detach = amdgpu_gem_map_detach,
.map_dma_buf = drm_gem_map_dma_buf,
.unmap_dma_buf = drm_gem_unmap_dma_buf,
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_gem_begin_cpu_access,
.map = drm_gem_dmabuf_kmap,
.unmap = drm_gem_dmabuf_kunmap,
.mmap = drm_gem_dmabuf_mmap,
.vmap = drm_gem_dmabuf_vmap,
.vunmap = drm_gem_dmabuf_vunmap,

View File

@@ -90,6 +90,8 @@ static int psp_sw_fini(void *handle)
adev->psp.sos_fw = NULL;
release_firmware(adev->psp.asd_fw);
adev->psp.asd_fw = NULL;
release_firmware(adev->psp.ta_fw);
adev->psp.ta_fw = NULL;
return 0;
}
@@ -118,22 +120,26 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
static int
psp_cmd_submit_buf(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr,
int index)
struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr)
{
int ret;
int index;
memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
index = atomic_inc_return(&psp->fence_value);
ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr,
fence_mc_addr, index);
while (*((unsigned int *)psp->fence_buf) != index) {
msleep(1);
if (ret) {
atomic_dec(&psp->fence_value);
return ret;
}
while (*((unsigned int *)psp->fence_buf) != index)
msleep(1);
/* the status field must be 0 after FW is loaded */
if (ucode && psp->cmd_buf_mem->resp.status) {
DRM_ERROR("failed loading with status (%d) and ucode id (%d)\n",
@@ -149,10 +155,22 @@ psp_cmd_submit_buf(struct psp_context *psp,
return ret;
}
static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd,
bool psp_support_vmr_ring(struct psp_context *psp)
{
if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045)
return true;
else
return false;
}
static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
struct psp_gfx_cmd_resp *cmd,
uint64_t tmr_mc, uint32_t size)
{
cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
if (psp_support_vmr_ring(psp))
cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
else
cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc);
cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc);
cmd->cmd.cmd_setup_tmr.buf_size = size;
@@ -186,12 +204,12 @@ static int psp_tmr_load(struct psp_context *psp)
if (!cmd)
return -ENOMEM;
psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, PSP_TMR_SIZE);
psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, PSP_TMR_SIZE);
DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n",
PSP_TMR_SIZE, psp->tmr_mc_addr);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr, 1);
psp->fence_buf_mc_addr);
if (ret)
goto failed;
@@ -258,13 +276,194 @@ static int psp_asd_load(struct psp_context *psp)
psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr, 2);
psp->fence_buf_mc_addr);
kfree(cmd);
return ret;
}
static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
uint32_t xgmi_ta_size, uint32_t shared_size)
{
cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(xgmi_ta_mc);
cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(xgmi_ta_mc);
cmd->cmd.cmd_load_ta.app_len = xgmi_ta_size;
cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(xgmi_mc_shared);
cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(xgmi_mc_shared);
cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
}
static int psp_xgmi_init_shared_buf(struct psp_context *psp)
{
int ret;
/*
* Allocate 16k memory aligned to 4k from Frame Buffer (local
* physical) for xgmi ta <-> Driver
*/
ret = amdgpu_bo_create_kernel(psp->adev, PSP_XGMI_SHARED_MEM_SIZE,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
&psp->xgmi_context.xgmi_shared_bo,
&psp->xgmi_context.xgmi_shared_mc_addr,
&psp->xgmi_context.xgmi_shared_buf);
return ret;
}
static int psp_xgmi_load(struct psp_context *psp)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
/*
* TODO: bypass the loading in sriov for now
*/
if (amdgpu_sriov_vf(psp->adev))
return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size);
psp_prep_xgmi_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
psp->xgmi_context.xgmi_shared_mc_addr,
psp->ta_xgmi_ucode_size, PSP_XGMI_SHARED_MEM_SIZE);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
if (!ret) {
psp->xgmi_context.initialized = 1;
psp->xgmi_context.session_id = cmd->resp.session_id;
}
kfree(cmd);
return ret;
}
static void psp_prep_xgmi_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint32_t xgmi_session_id)
{
cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
cmd->cmd.cmd_unload_ta.session_id = xgmi_session_id;
}
static int psp_xgmi_unload(struct psp_context *psp)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
/*
* TODO: bypass the unloading in sriov for now
*/
if (amdgpu_sriov_vf(psp->adev))
return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
psp_prep_xgmi_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
kfree(cmd);
return ret;
}
static void psp_prep_xgmi_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint32_t ta_cmd_id,
uint32_t xgmi_session_id)
{
cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
cmd->cmd.cmd_invoke_cmd.session_id = xgmi_session_id;
cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
/* Note: cmd_invoke_cmd.buf is not used for now */
}
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
/*
* TODO: bypass the loading in sriov for now
*/
if (amdgpu_sriov_vf(psp->adev))
return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
psp_prep_xgmi_ta_invoke_cmd_buf(cmd, ta_cmd_id,
psp->xgmi_context.session_id);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
kfree(cmd);
return ret;
}
static int psp_xgmi_terminate(struct psp_context *psp)
{
int ret;
if (!psp->xgmi_context.initialized)
return 0;
ret = psp_xgmi_unload(psp);
if (ret)
return ret;
psp->xgmi_context.initialized = 0;
/* free xgmi shared memory */
amdgpu_bo_free_kernel(&psp->xgmi_context.xgmi_shared_bo,
&psp->xgmi_context.xgmi_shared_mc_addr,
&psp->xgmi_context.xgmi_shared_buf);
return 0;
}
static int psp_xgmi_initialize(struct psp_context *psp)
{
struct ta_xgmi_shared_memory *xgmi_cmd;
int ret;
if (!psp->xgmi_context.initialized) {
ret = psp_xgmi_init_shared_buf(psp);
if (ret)
return ret;
}
/* Load XGMI TA */
ret = psp_xgmi_load(psp);
if (ret)
return ret;
/* Initialize XGMI session */
xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.xgmi_shared_buf);
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE;
ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
return ret;
}
static int psp_hw_start(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -292,6 +491,15 @@ static int psp_hw_start(struct psp_context *psp)
if (ret)
return ret;
if (adev->gmc.xgmi.num_physical_nodes > 1) {
ret = psp_xgmi_initialize(psp);
/* Warning the XGMI seesion initialize failure
* Instead of stop driver initialization
*/
if (ret)
dev_err(psp->adev->dev,
"XGMI: Failed to initialize XGMI session\n");
}
return 0;
}
@@ -321,7 +529,7 @@ static int psp_np_fw_load(struct psp_context *psp)
return ret;
ret = psp_cmd_submit_buf(psp, ucode, psp->cmd,
psp->fence_buf_mc_addr, i + 3);
psp->fence_buf_mc_addr);
if (ret)
return ret;
@@ -340,8 +548,10 @@ static int psp_load_fw(struct amdgpu_device *adev)
int ret;
struct psp_context *psp = &adev->psp;
if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset != 0)
if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) {
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
goto skip_memalloc;
}
psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!psp->cmd)
@@ -452,6 +662,10 @@ static int psp_hw_fini(void *handle)
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
return 0;
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
psp->xgmi_context.initialized == 1)
psp_xgmi_terminate(psp);
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
@@ -479,6 +693,15 @@ static int psp_suspend(void *handle)
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
return 0;
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
psp->xgmi_context.initialized == 1) {
ret = psp_xgmi_terminate(psp);
if (ret) {
DRM_ERROR("Failed to terminate xgmi ta\n");
return ret;
}
}
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
if (ret) {
DRM_ERROR("PSP ring stop failed\n");

View File

@@ -27,14 +27,17 @@
#include "amdgpu.h"
#include "psp_gfx_if.h"
#include "ta_xgmi_if.h"
#define PSP_FENCE_BUFFER_SIZE 0x1000
#define PSP_CMD_BUFFER_SIZE 0x1000
#define PSP_ASD_SHARED_MEM_SIZE 0x4000
#define PSP_ASD_SHARED_MEM_SIZE 0x4000
#define PSP_XGMI_SHARED_MEM_SIZE 0x4000
#define PSP_1_MEG 0x100000
#define PSP_TMR_SIZE 0x400000
struct psp_context;
struct psp_xgmi_node_info;
struct psp_xgmi_topology_info;
enum psp_ring_type
@@ -80,12 +83,20 @@ struct psp_funcs
enum AMDGPU_UCODE_ID ucode_type);
bool (*smu_reload_quirk)(struct psp_context *psp);
int (*mode1_reset)(struct psp_context *psp);
uint64_t (*xgmi_get_device_id)(struct psp_context *psp);
uint64_t (*xgmi_get_node_id)(struct psp_context *psp);
uint64_t (*xgmi_get_hive_id)(struct psp_context *psp);
int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices,
struct psp_xgmi_topology_info *topology);
struct psp_xgmi_topology_info *topology);
int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices,
struct psp_xgmi_topology_info *topology);
struct psp_xgmi_topology_info *topology);
};
struct psp_xgmi_context {
uint8_t initialized;
uint32_t session_id;
struct amdgpu_bo *xgmi_shared_bo;
uint64_t xgmi_shared_mc_addr;
void *xgmi_shared_buf;
};
struct psp_context
@@ -96,7 +107,7 @@ struct psp_context
const struct psp_funcs *funcs;
/* fence buffer */
/* firmware buffer */
struct amdgpu_bo *fw_pri_bo;
uint64_t fw_pri_mc_addr;
void *fw_pri_buf;
@@ -134,6 +145,16 @@ struct psp_context
struct amdgpu_bo *cmd_buf_bo;
uint64_t cmd_buf_mc_addr;
struct psp_gfx_cmd_resp *cmd_buf_mem;
/* fence value associated with cmd buffer */
atomic_t fence_value;
/* xgmi ta firmware and buffer */
const struct firmware *ta_fw;
uint32_t ta_xgmi_ucode_version;
uint32_t ta_xgmi_ucode_size;
uint8_t *ta_xgmi_start_addr;
struct psp_xgmi_context xgmi_context;
};
struct amdgpu_psp_funcs {
@@ -141,21 +162,17 @@ struct amdgpu_psp_funcs {
enum AMDGPU_UCODE_ID);
};
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
struct psp_xgmi_node_info {
uint64_t node_id;
uint8_t num_hops;
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
};
struct psp_xgmi_topology_info {
/* Generated by PSP to identify the GPU instance within xgmi connection */
uint64_t device_id;
/*
* If all bits set to 0 , driver indicates it wants to retrieve the xgmi
* connection vector topology, but not access enable the connections
* if some or all bits are set to 1, driver indicates it want to retrieve the
* current xgmi topology and access enable the link to GPU[i] associated
* with the bit position in the vector.
* On return,: bits indicated which xgmi links are present/active depending
* on the value passed in. The relative bit offset for the relative GPU index
* within the hive is always marked active.
*/
uint32_t connection_mask;
uint32_t reserved; /* must be 0 */
uint32_t num_nodes;
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};
#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
@@ -177,8 +194,8 @@ struct psp_xgmi_topology_info {
((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
#define psp_mode1_reset(psp) \
((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
#define psp_xgmi_get_device_id(psp) \
((psp)->funcs->xgmi_get_device_id ? (psp)->funcs->xgmi_get_device_id((psp)) : 0)
#define psp_xgmi_get_node_id(psp) \
((psp)->funcs->xgmi_get_node_id ? (psp)->funcs->xgmi_get_node_id((psp)) : 0)
#define psp_xgmi_get_hive_id(psp) \
((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0)
#define psp_xgmi_get_topology_info(psp, num_device, topology) \
@@ -199,6 +216,9 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
int psp_gpu_reset(struct amdgpu_device *adev);
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
bool psp_support_vmr_ring(struct psp_context *psp);
extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
#endif

View File

@@ -338,7 +338,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
*/
void amdgpu_ring_fini(struct amdgpu_ring *ring)
{
ring->ready = false;
ring->sched.ready = false;
/* Not to finish a ring which is not initialized */
if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
@@ -397,7 +397,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
{
ktime_t deadline = ktime_add_us(ktime_get(), 10000);
if (!ring->funcs->soft_recovery)
if (!ring->funcs->soft_recovery || !fence)
return false;
atomic_inc(&ring->adev->gpu_reset_counter);
@@ -500,3 +500,29 @@ static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring)
debugfs_remove(ring->ent);
#endif
}
/**
* amdgpu_ring_test_helper - tests ring and set sched readiness status
*
* @ring: ring to try the recovery on
*
* Tests ring and set sched readiness status
*
* Returns 0 on success, error on failure.
*/
int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
int r;
r = amdgpu_ring_test_ring(ring);
if (r)
DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n",
ring->name, r);
else
DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n",
ring->name);
ring->sched.ready = !r;
return r;
}

View File

@@ -129,8 +129,9 @@ struct amdgpu_ring_funcs {
unsigned emit_ib_size;
/* command emit functions */
void (*emit_ib)(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch);
bool ctx_switch);
void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
uint64_t seq, unsigned flags);
void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
@@ -189,7 +190,6 @@ struct amdgpu_ring {
uint64_t gpu_addr;
uint64_t ptr_mask;
uint32_t buf_mask;
bool ready;
u32 idx;
u32 me;
u32 pipe;
@@ -229,7 +229,7 @@ struct amdgpu_ring {
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
#define amdgpu_ring_emit_ib(r, job, ib, c) ((r)->funcs->emit_ib((r), (job), (ib), (c)))
#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
@@ -313,4 +313,6 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
ring->count_dw -= count_dw;
}
int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
#endif

View File

@@ -0,0 +1,282 @@
/*
* Copyright 2014 Advanced Micro Devices, Inc.
* Copyright 2008 Red Hat Inc.
* Copyright 2009 Jerome Glisse.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_rlc.h"
/**
* amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode
*
* @adev: amdgpu_device pointer
*
* Set RLC enter into safe mode if RLC is enabled and haven't in safe mode.
*/
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
{
if (adev->gfx.rlc.in_safe_mode)
return;
/* if RLC is not enabled, do nothing */
if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
return;
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
adev->gfx.rlc.funcs->set_safe_mode(adev);
adev->gfx.rlc.in_safe_mode = true;
}
}
/**
* amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode
*
* @adev: amdgpu_device pointer
*
* Set RLC exit safe mode if RLC is enabled and have entered into safe mode.
*/
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
{
if (!(adev->gfx.rlc.in_safe_mode))
return;
/* if RLC is not enabled, do nothing */
if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
return;
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
adev->gfx.rlc.funcs->unset_safe_mode(adev);
adev->gfx.rlc.in_safe_mode = false;
}
}
/**
* amdgpu_gfx_rlc_init_sr - Init save restore block
*
* @adev: amdgpu_device pointer
* @dws: the size of save restore block
*
* Allocate and setup value to save restore block of rlc.
* Returns 0 on succeess or negative error code if allocate failed.
*/
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
{
const u32 *src_ptr;
volatile u32 *dst_ptr;
u32 i;
int r;
/* allocate save restore block */
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.save_restore_obj,
&adev->gfx.rlc.save_restore_gpu_addr,
(void **)&adev->gfx.rlc.sr_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r);
amdgpu_gfx_rlc_fini(adev);
return r;
}
/* write the sr buffer */
src_ptr = adev->gfx.rlc.reg_list;
dst_ptr = adev->gfx.rlc.sr_ptr;
for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
dst_ptr[i] = cpu_to_le32(src_ptr[i]);
amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
return 0;
}
/**
* amdgpu_gfx_rlc_init_csb - Init clear state block
*
* @adev: amdgpu_device pointer
*
* Allocate and setup value to clear state block of rlc.
* Returns 0 on succeess or negative error code if allocate failed.
*/
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
{
volatile u32 *dst_ptr;
u32 dws;
int r;
/* allocate clear state block */
adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", r);
amdgpu_gfx_rlc_fini(adev);
return r;
}
/* set up the cs buffer */
dst_ptr = adev->gfx.rlc.cs_ptr;
adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
return 0;
}
/**
* amdgpu_gfx_rlc_init_cpt - Init cp table
*
* @adev: amdgpu_device pointer
*
* Allocate and setup value to cp table of rlc.
* Returns 0 on succeess or negative error code if allocate failed.
*/
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev)
{
int r;
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to create cp table bo\n", r);
amdgpu_gfx_rlc_fini(adev);
return r;
}
/* set up the cp table */
amdgpu_gfx_rlc_setup_cp_table(adev);
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
return 0;
}
/**
* amdgpu_gfx_rlc_setup_cp_table - setup cp the buffer of cp table
*
* @adev: amdgpu_device pointer
*
* Write cp firmware data into cp table.
*/
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
{
const __le32 *fw_data;
volatile u32 *dst_ptr;
int me, i, max_me;
u32 bo_offset = 0;
u32 table_offset, table_size;
max_me = adev->gfx.rlc.funcs->get_cp_table_num(adev);
/* write the cp table buffer */
dst_ptr = adev->gfx.rlc.cp_table_ptr;
for (me = 0; me < max_me; me++) {
if (me == 0) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
fw_data = (const __le32 *)
(adev->gfx.ce_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 1) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
fw_data = (const __le32 *)
(adev->gfx.pfp_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 2) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
fw_data = (const __le32 *)
(adev->gfx.me_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 3) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 4) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec2_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
}
for (i = 0; i < table_size; i ++) {
dst_ptr[bo_offset + i] =
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
}
bo_offset += table_size;
}
}
/**
* amdgpu_gfx_rlc_fini - Free BO which used for RLC
*
* @adev: amdgpu_device pointer
*
* Free three BO which is used for rlc_save_restore_block, rlc_clear_state_block
* and rlc_jump_table_block.
*/
void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev)
{
/* save restore block */
if (adev->gfx.rlc.save_restore_obj) {
amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj,
&adev->gfx.rlc.save_restore_gpu_addr,
(void **)&adev->gfx.rlc.sr_ptr);
}
/* clear state block */
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
/* jump table block */
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
}

View File

@@ -0,0 +1,98 @@
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_RLC_H__
#define __AMDGPU_RLC_H__
#include "clearstate_defs.h"
struct amdgpu_rlc_funcs {
bool (*is_rlc_enabled)(struct amdgpu_device *adev);
void (*set_safe_mode)(struct amdgpu_device *adev);
void (*unset_safe_mode)(struct amdgpu_device *adev);
int (*init)(struct amdgpu_device *adev);
u32 (*get_csb_size)(struct amdgpu_device *adev);
void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
int (*get_cp_table_num)(struct amdgpu_device *adev);
int (*resume)(struct amdgpu_device *adev);
void (*stop)(struct amdgpu_device *adev);
void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev);
};
struct amdgpu_rlc {
/* for power gating */
struct amdgpu_bo *save_restore_obj;
uint64_t save_restore_gpu_addr;
volatile uint32_t *sr_ptr;
const u32 *reg_list;
u32 reg_list_size;
/* for clear state */
struct amdgpu_bo *clear_state_obj;
uint64_t clear_state_gpu_addr;
volatile uint32_t *cs_ptr;
const struct cs_section_def *cs_data;
u32 clear_state_size;
/* for cp tables */
struct amdgpu_bo *cp_table_obj;
uint64_t cp_table_gpu_addr;
volatile uint32_t *cp_table_ptr;
u32 cp_table_size;
/* safe mode for updating CG/PG state */
bool in_safe_mode;
const struct amdgpu_rlc_funcs *funcs;
/* for firmware data */
u32 save_and_restore_offset;
u32 clear_state_descriptor_offset;
u32 avail_scratch_ram_locations;
u32 reg_restore_list_size;
u32 reg_list_format_start;
u32 reg_list_format_separate_start;
u32 starting_offsets_start;
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
u32 reg_list_format_direct_reg_list_length;
u32 save_restore_list_cntl_size_bytes;
u32 save_restore_list_gpm_size_bytes;
u32 save_restore_list_srm_size_bytes;
u32 *register_list_format;
u32 *register_restore;
u8 *save_restore_list_cntl;
u8 *save_restore_list_gpm;
u8 *save_restore_list_srm;
bool is_rlc_v2_1;
};
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev);
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev);
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws);
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev);
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev);
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev);
void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev);
#endif

View File

@@ -28,17 +28,31 @@
* GPU SDMA IP block helpers function.
*/
struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
struct amdgpu_sdma_instance *amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
if (&adev->sdma.instance[i].ring == ring)
break;
if (ring == &adev->sdma.instance[i].ring ||
ring == &adev->sdma.instance[i].page)
return &adev->sdma.instance[i];
if (i < AMDGPU_MAX_SDMA_INSTANCES)
return &adev->sdma.instance[i];
else
return NULL;
return NULL;
}
int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
{
struct amdgpu_device *adev = ring->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
if (ring == &adev->sdma.instance[i].ring ||
ring == &adev->sdma.instance[i].page) {
*index = i;
return 0;
}
}
return -EINVAL;
}

View File

@@ -41,6 +41,7 @@ struct amdgpu_sdma_instance {
uint32_t feature_version;
struct amdgpu_ring ring;
struct amdgpu_ring page;
bool burst_nop;
};
@@ -50,6 +51,7 @@ struct amdgpu_sdma {
struct amdgpu_irq_src illegal_inst_irq;
int num_instances;
uint32_t srbm_soft_reset;
bool has_page_queue;
};
/*
@@ -92,6 +94,7 @@ struct amdgpu_buffer_funcs {
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
struct amdgpu_sdma_instance *
amdgpu_get_sdma_instance(struct amdgpu_ring *ring);
amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
#endif

View File

@@ -218,6 +218,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
TP_ARGS(vm, ring, job),
TP_STRUCT__entry(
__field(u32, pasid)
__string(ring, ring->name)
__field(u32, ring)
__field(u32, vmid)
__field(u32, vm_hub)
@@ -227,14 +228,14 @@ TRACE_EVENT(amdgpu_vm_grab_id,
TP_fast_assign(
__entry->pasid = vm->pasid;
__entry->ring = ring->idx;
__assign_str(ring, ring->name)
__entry->vmid = job->vmid;
__entry->vm_hub = ring->funcs->vmhub,
__entry->pd_addr = job->vm_pd_addr;
__entry->needs_flush = job->vm_needs_flush;
),
TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
__entry->pasid, __entry->ring, __entry->vmid,
TP_printk("pasid=%d, ring=%s, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
__entry->pasid, __get_str(ring), __entry->vmid,
__entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
);
@@ -366,20 +367,20 @@ TRACE_EVENT(amdgpu_vm_flush,
uint64_t pd_addr),
TP_ARGS(ring, vmid, pd_addr),
TP_STRUCT__entry(
__field(u32, ring)
__string(ring, ring->name)
__field(u32, vmid)
__field(u32, vm_hub)
__field(u64, pd_addr)
),
TP_fast_assign(
__entry->ring = ring->idx;
__assign_str(ring, ring->name)
__entry->vmid = vmid;
__entry->vm_hub = ring->funcs->vmhub;
__entry->pd_addr = pd_addr;
),
TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx",
__entry->ring, __entry->vmid,
TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx",
__get_str(ring), __entry->vmid,
__entry->vm_hub,__entry->pd_addr)
);

View File

@@ -61,100 +61,6 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
/*
* Global memory.
*/
/**
* amdgpu_ttm_mem_global_init - Initialize and acquire reference to
* memory object
*
* @ref: Object for initialization.
*
* This is called by drm_global_item_ref() when an object is being
* initialized.
*/
static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
{
return ttm_mem_global_init(ref->object);
}
/**
* amdgpu_ttm_mem_global_release - Drop reference to a memory object
*
* @ref: Object being removed
*
* This is called by drm_global_item_unref() when an object is being
* released.
*/
static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
{
ttm_mem_global_release(ref->object);
}
/**
* amdgpu_ttm_global_init - Initialize global TTM memory reference structures.
*
* @adev: AMDGPU device for which the global structures need to be registered.
*
* This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
* during bring up.
*/
static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
{
struct drm_global_reference *global_ref;
int r;
/* ensure reference is false in case init fails */
adev->mman.mem_global_referenced = false;
global_ref = &adev->mman.mem_global_ref;
global_ref->global_type = DRM_GLOBAL_TTM_MEM;
global_ref->size = sizeof(struct ttm_mem_global);
global_ref->init = &amdgpu_ttm_mem_global_init;
global_ref->release = &amdgpu_ttm_mem_global_release;
r = drm_global_item_ref(global_ref);
if (r) {
DRM_ERROR("Failed setting up TTM memory accounting "
"subsystem.\n");
goto error_mem;
}
adev->mman.bo_global_ref.mem_glob =
adev->mman.mem_global_ref.object;
global_ref = &adev->mman.bo_global_ref.ref;
global_ref->global_type = DRM_GLOBAL_TTM_BO;
global_ref->size = sizeof(struct ttm_bo_global);
global_ref->init = &ttm_bo_global_init;
global_ref->release = &ttm_bo_global_release;
r = drm_global_item_ref(global_ref);
if (r) {
DRM_ERROR("Failed setting up TTM BO subsystem.\n");
goto error_bo;
}
mutex_init(&adev->mman.gtt_window_lock);
adev->mman.mem_global_referenced = true;
return 0;
error_bo:
drm_global_item_unref(&adev->mman.mem_global_ref);
error_mem:
return r;
}
static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
{
if (adev->mman.mem_global_referenced) {
mutex_destroy(&adev->mman.gtt_window_lock);
drm_global_item_unref(&adev->mman.bo_global_ref.ref);
drm_global_item_unref(&adev->mman.mem_global_ref);
adev->mman.mem_global_referenced = false;
}
}
static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
{
return 0;
@@ -1758,14 +1664,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
int r;
u64 vis_vram_limit;
/* initialize global references for vram/gtt */
r = amdgpu_ttm_global_init(adev);
if (r) {
return r;
}
mutex_init(&adev->mman.gtt_window_lock);
/* No others user of address space so set it to 0 */
r = ttm_bo_device_init(&adev->mman.bdev,
adev->mman.bo_global_ref.ref.object,
&amdgpu_bo_driver,
adev->ddev->anon_inode->i_mapping,
DRM_FILE_PAGE_OFFSET,
@@ -1922,7 +1824,6 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
ttm_bo_device_release(&adev->mman.bdev);
amdgpu_ttm_global_fini(adev);
adev->mman.initialized = false;
DRM_INFO("amdgpu: ttm finalized\n");
}
@@ -2069,7 +1970,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
unsigned i;
int r;
if (direct_submit && !ring->ready) {
if (direct_submit && !ring->sched.ready) {
DRM_ERROR("Trying to move memory with ring turned off.\n");
return -EINVAL;
}

View File

@@ -39,8 +39,6 @@
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
struct amdgpu_mman {
struct ttm_bo_global_ref bo_global_ref;
struct drm_global_reference mem_global_ref;
struct ttm_bo_device bdev;
bool mem_global_referenced;
bool initialized;

View File

@@ -57,6 +57,17 @@ struct psp_firmware_header_v1_0 {
uint32_t sos_size_bytes;
};
/* version_major=1, version_minor=0 */
struct ta_firmware_header_v1_0 {
struct common_firmware_header header;
uint32_t ta_xgmi_ucode_version;
uint32_t ta_xgmi_offset_bytes;
uint32_t ta_xgmi_size_bytes;
uint32_t ta_ras_ucode_version;
uint32_t ta_ras_offset_bytes;
uint32_t ta_ras_size_bytes;
};
/* version_major=1, version_minor=0 */
struct gfx_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -170,6 +181,7 @@ union amdgpu_firmware_header {
struct mc_firmware_header_v1_0 mc;
struct smc_firmware_header_v1_0 smc;
struct psp_firmware_header_v1_0 psp;
struct ta_firmware_header_v1_0 ta;
struct gfx_firmware_header_v1_0 gfx;
struct rlc_firmware_header_v1_0 rlc;
struct rlc_firmware_header_v2_0 rlc_v2_0;

View File

@@ -692,6 +692,8 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
buf_sizes[0x1] = dpb_size;
buf_sizes[0x2] = image_size;
buf_sizes[0x4] = min_ctx_size;
/* store image width to adjust nb memory pstate */
adev->uvd.decode_image_width = width;
return 0;
}
@@ -1243,30 +1245,20 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence;
long r;
uint32_t ip_instance = ring->me;
r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r);
if (r)
goto error;
}
r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
if (r) {
DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance);
if (r == 0)
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r);
} else {
DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx);
else if (r > 0)
r = 0;
}
dma_fence_put(fence);

View File

@@ -65,6 +65,8 @@ struct amdgpu_uvd {
struct drm_sched_entity entity;
struct delayed_work idle_work;
unsigned harvest_config;
/* store image width to adjust nb memory state */
unsigned decode_image_width;
};
int amdgpu_uvd_sw_init(struct amdgpu_device *adev);

View File

@@ -1032,8 +1032,10 @@ out:
* @ib: the IB to execute
*
*/
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
amdgpu_ring_write(ring, VCE_CMD_IB);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
@@ -1079,11 +1081,9 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
return 0;
r = amdgpu_ring_alloc(ring, 16);
if (r) {
DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, VCE_CMD_END);
amdgpu_ring_commit(ring);
@@ -1093,14 +1093,8 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed\n",
ring->idx);
if (i >= timeout)
r = -ETIMEDOUT;
}
return r;
}
@@ -1121,27 +1115,19 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
return 0;
r = amdgpu_vce_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
if (r)
goto error;
}
r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
if (r == 0)
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
} else {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
else if (r > 0)
r = 0;
}
error:
dma_fence_put(fence);
return r;

View File

@@ -65,8 +65,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch);
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
struct amdgpu_ib *ib, bool ctx_switch);
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned flags);
int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);

View File

@@ -425,11 +425,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
@@ -441,14 +439,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
@@ -570,30 +563,20 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
if (r)
goto error;
}
r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
if (r == 0)
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
} else {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
else if (r > 0)
r = 0;
}
dma_fence_put(fence);
error:
return r;
}
@@ -606,11 +589,9 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
int r;
r = amdgpu_ring_alloc(ring, 16);
if (r) {
DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, VCN_ENC_CMD_END);
amdgpu_ring_commit(ring);
@@ -620,14 +601,8 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed\n",
ring->idx);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
}
return r;
}
@@ -742,27 +717,19 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
if (r)
goto error;
}
r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
if (r == 0)
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
} else {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
else if (r > 0)
r = 0;
}
error:
dma_fence_put(fence);
return r;
@@ -778,11 +745,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0));
@@ -796,14 +760,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
@@ -856,21 +814,18 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r = 0;
r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
r = -ETIMEDOUT;
goto error;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto error;
} else
} else {
r = 0;
}
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
@@ -879,15 +834,10 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout)
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
else {
DRM_ERROR("ib test failed (0x%08X)\n", tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
dma_fence_put(fence);
error:
return r;
}

View File

@@ -23,16 +23,6 @@
#include "amdgpu.h"
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
{
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
addr -= AMDGPU_VA_RESERVED_SIZE;
addr = amdgpu_gmc_sign_extend(addr);
return addr;
}
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
{
/* By now all MMIO pages except mailbox are blocked */
@@ -41,88 +31,6 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
return RREG32_NO_KIQ(0xc040) == 0xffffffff;
}
int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
{
int r;
void *ptr;
r = amdgpu_bo_create_kernel(adev, AMDGPU_CSA_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, &adev->virt.csa_obj,
&adev->virt.csa_vmid0_addr, &ptr);
if (r)
return r;
memset(ptr, 0, AMDGPU_CSA_SIZE);
return 0;
}
void amdgpu_free_static_csa(struct amdgpu_device *adev) {
amdgpu_bo_free_kernel(&adev->virt.csa_obj,
&adev->virt.csa_vmid0_addr,
NULL);
}
/*
* amdgpu_map_static_csa should be called during amdgpu_vm_init
* it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
* submission of GFX should use this virtual address within META_DATA init
* package to support SRIOV gfx preemption.
*/
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va **bo_va)
{
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
struct ww_acquire_ctx ticket;
struct list_head list;
struct amdgpu_bo_list_entry pd;
struct ttm_validate_buffer csa_tv;
int r;
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&csa_tv.head);
csa_tv.bo = &adev->virt.csa_obj->tbo;
csa_tv.shared = true;
list_add(&csa_tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &pd);
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
if (r) {
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
return r;
}
*bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj);
if (!*bo_va) {
ttm_eu_backoff_reservation(&ticket, &list);
DRM_ERROR("failed to create bo_va for static CSA\n");
return -ENOMEM;
}
r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
AMDGPU_CSA_SIZE);
if (r) {
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
amdgpu_vm_bo_rmv(adev, *bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE,
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
AMDGPU_PTE_EXECUTABLE);
if (r) {
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
amdgpu_vm_bo_rmv(adev, *bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
ttm_eu_backoff_reservation(&ticket, &list);
return 0;
}
void amdgpu_virt_init_setting(struct amdgpu_device *adev)
{
/* enable virtual display */
@@ -162,9 +70,7 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_read;
if (in_interrupt())
might_sleep();
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
@@ -210,9 +116,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_write;
if (in_interrupt())
might_sleep();
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
@@ -228,6 +132,46 @@ failed_kiq_write:
pr_err("failed to write reg:%x\n", reg);
}
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq;
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
ref, mask);
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
/* don't wait anymore for IRQ context */
if (r < 1 && in_interrupt())
goto failed_kiq;
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
if (cnt > MAX_KIQ_REG_TRY)
goto failed_kiq;
return;
failed_kiq:
pr_err("failed to write reg %x wait reg %x\n", reg0, reg1);
}
/**
* amdgpu_virt_request_full_gpu() - request full gpu access
* @amdgpu: amdgpu device.
@@ -390,7 +334,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
if (adev->fw_vram_usage.va != NULL) {
adev->virt.fw_reserve.p_pf2vf =
(struct amdgim_pf2vf_info_header *)(
(struct amd_sriov_msg_pf2vf_info_header *)(
adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET);
AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size);
AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum);

View File

@@ -63,8 +63,8 @@ struct amdgpu_virt_ops {
* Firmware Reserve Frame buffer
*/
struct amdgpu_virt_fw_reserve {
struct amdgim_pf2vf_info_header *p_pf2vf;
struct amdgim_vf2pf_info_header *p_vf2pf;
struct amd_sriov_msg_pf2vf_info_header *p_pf2vf;
struct amd_sriov_msg_vf2pf_info_header *p_vf2pf;
unsigned int checksum_key;
};
/*
@@ -85,15 +85,17 @@ enum AMDGIM_FEATURE_FLAG {
AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
};
struct amdgim_pf2vf_info_header {
struct amd_sriov_msg_pf2vf_info_header {
/* the total structure size in byte. */
uint32_t size;
/* version of this structure, written by the GIM */
uint32_t version;
/* reserved */
uint32_t reserved[2];
} __aligned(4);
struct amdgim_pf2vf_info_v1 {
/* header contains size and version */
struct amdgim_pf2vf_info_header header;
struct amd_sriov_msg_pf2vf_info_header header;
/* max_width * max_height */
unsigned int uvd_enc_max_pixels_count;
/* 16x16 pixels/sec, codec independent */
@@ -112,7 +114,7 @@ struct amdgim_pf2vf_info_v1 {
struct amdgim_pf2vf_info_v2 {
/* header contains size and version */
struct amdgim_pf2vf_info_header header;
struct amd_sriov_msg_pf2vf_info_header header;
/* use private key from mailbox 2 to create chueksum */
uint32_t checksum;
/* The features flags of the GIM driver supports. */
@@ -137,20 +139,22 @@ struct amdgim_pf2vf_info_v2 {
uint64_t vcefw_kboffset;
/* VCE FW size in KB */
uint32_t vcefw_ksize;
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amdgim_pf2vf_info_header)/sizeof(uint32_t)), 3)];
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 3)];
} __aligned(4);
struct amdgim_vf2pf_info_header {
struct amd_sriov_msg_vf2pf_info_header {
/* the total structure size in byte. */
uint32_t size;
/*version of this structure, written by the guest */
uint32_t version;
/* reserved */
uint32_t reserved[2];
} __aligned(4);
struct amdgim_vf2pf_info_v1 {
/* header contains size and version */
struct amdgim_vf2pf_info_header header;
struct amd_sriov_msg_vf2pf_info_header header;
/* driver version */
char driver_version[64];
/* driver certification, 1=WHQL, 0=None */
@@ -180,7 +184,7 @@ struct amdgim_vf2pf_info_v1 {
struct amdgim_vf2pf_info_v2 {
/* header contains size and version */
struct amdgim_vf2pf_info_header header;
struct amd_sriov_msg_vf2pf_info_header header;
uint32_t checksum;
/* driver version */
uint8_t driver_version[64];
@@ -206,7 +210,7 @@ struct amdgim_vf2pf_info_v2 {
uint32_t uvd_enc_usage;
/* guest uvd engine usage percentage. 0xffff means N/A. */
uint32_t uvd_enc_health;
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amdgim_vf2pf_info_header)/sizeof(uint32_t)), 0)];
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amd_sriov_msg_vf2pf_info_header)/sizeof(uint32_t)), 0)];
} __aligned(4);
#define AMDGPU_FW_VRAM_VF2PF_VER 2
@@ -238,7 +242,6 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ;
struct amdgpu_virt {
uint32_t caps;
struct amdgpu_bo *csa_obj;
uint64_t csa_vmid0_addr;
bool chained_ib_support;
uint32_t reg_val_offs;
struct amdgpu_irq_src ack_irq;
@@ -251,8 +254,6 @@ struct amdgpu_virt {
uint32_t gim_feature;
};
#define AMDGPU_CSA_SIZE (8 * 1024)
#define amdgpu_sriov_enabled(adev) \
((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
@@ -277,17 +278,13 @@ static inline bool is_virtual_machine(void)
#endif
}
struct amdgpu_vm;
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va **bo_va);
void amdgpu_free_static_csa(struct amdgpu_device *adev);
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
uint32_t ref, uint32_t mask);
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);

View File

@@ -617,7 +617,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
{
entry->priority = 0;
entry->tv.bo = &vm->root.base.bo->tbo;
entry->tv.shared = true;
/* One for the VM updates, one for TTM and one for the CS job */
entry->tv.num_shared = 3;
entry->user_pages = NULL;
list_add(&entry->tv.head, validated);
}
@@ -773,10 +774,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
r = reservation_object_reserve_shared(bo->tbo.resv);
if (r)
return r;
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
goto error;
@@ -1844,10 +1841,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (r)
goto error_free;
r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
if (r)
goto error_free;
r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
if (r)
goto error_free;
@@ -3028,6 +3021,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (r)
goto error_free_root;
r = reservation_object_reserve_shared(root->tbo.resv, 1);
if (r)
goto error_unreserve;
r = amdgpu_vm_clear_bo(adev, vm, root,
adev->vm_manager.root_level,
vm->pte_support_ats);
@@ -3057,7 +3054,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
INIT_KFIFO(vm->faults);
vm->fault_credit = 16;
return 0;
@@ -3269,42 +3265,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_vmid_free_reserved(adev, vm, i);
}
/**
* amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
*
* @adev: amdgpu_device pointer
* @pasid: PASID do identify the VM
*
* This function is expected to be called in interrupt context.
*
* Returns:
* True if there was fault credit, false otherwise
*/
bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
unsigned int pasid)
{
struct amdgpu_vm *vm;
spin_lock(&adev->vm_manager.pasid_lock);
vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
if (!vm) {
/* VM not found, can't track fault credit */
spin_unlock(&adev->vm_manager.pasid_lock);
return true;
}
/* No lock needed. only accessed by IRQ handler */
if (!vm->fault_credit) {
/* Too many faults in this VM */
spin_unlock(&adev->vm_manager.pasid_lock);
return false;
}
vm->fault_credit--;
spin_unlock(&adev->vm_manager.pasid_lock);
return true;
}
/**
* amdgpu_vm_manager_init - init the VM manager
*

View File

@@ -229,9 +229,6 @@ struct amdgpu_vm {
/* Up to 128 pending retry page faults */
DECLARE_KFIFO(faults, u64, 128);
/* Limit non-retry fault storms */
unsigned int fault_credit;
/* Points to the KFD process VM info */
struct amdkfd_process_info *process_info;
@@ -299,8 +296,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid);
void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
unsigned int pasid);
void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
struct list_head *validated,
struct amdgpu_bo_list_entry *entry);

View File

@@ -23,7 +23,7 @@
*/
#include <linux/list.h>
#include "amdgpu.h"
#include "amdgpu_psp.h"
#include "amdgpu_xgmi.h"
static DEFINE_MUTEX(xgmi_mutex);
@@ -31,15 +31,16 @@ static DEFINE_MUTEX(xgmi_mutex);
#define AMDGPU_MAX_XGMI_HIVE 8
#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4
struct amdgpu_hive_info {
uint64_t hive_id;
struct list_head device_list;
};
static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
static unsigned hive_count = 0;
static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
{
return &hive->device_list;
}
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
{
int i;
struct amdgpu_hive_info *tmp;
@@ -58,62 +59,99 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
tmp = &xgmi_hives[hive_count++];
tmp->hive_id = adev->gmc.xgmi.hive_id;
INIT_LIST_HEAD(&tmp->device_list);
mutex_init(&tmp->hive_lock);
return tmp;
}
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
{
int ret = -EINVAL;
/* Each psp need to set the latest topology */
ret = psp_xgmi_set_topology_info(&adev->psp,
hive->number_devices,
&hive->topology_info);
if (ret)
dev_err(adev->dev,
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.node_id,
adev->gmc.xgmi.hive_id, ret);
else
dev_info(adev->dev, "XGMI: Set topology for node %d, hive 0x%llx.\n",
adev->gmc.xgmi.physical_node_id,
adev->gmc.xgmi.hive_id);
return ret;
}
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
{
struct psp_xgmi_topology_info tmp_topology[AMDGPU_MAX_XGMI_DEVICE_PER_HIVE];
struct psp_xgmi_topology_info *hive_topology;
struct amdgpu_hive_info *hive;
struct amdgpu_xgmi *entry;
struct amdgpu_device *tmp_adev;
struct amdgpu_device *tmp_adev = NULL;
int count = 0, ret = -EINVAL;
if ((adev->asic_type < CHIP_VEGA20) ||
(adev->flags & AMD_IS_APU) )
if (!adev->gmc.xgmi.supported)
return 0;
adev->gmc.xgmi.device_id = psp_xgmi_get_device_id(&adev->psp);
adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp);
adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
memset(&tmp_topology[0], 0, sizeof(tmp_topology));
mutex_lock(&xgmi_mutex);
hive = amdgpu_get_xgmi_hive(adev);
if (!hive)
goto exit;
hive_topology = &hive->topology_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
list_for_each_entry(entry, &hive->device_list, head)
tmp_topology[count++].device_id = entry->device_id;
hive_topology->nodes[count++].node_id = entry->node_id;
hive->number_devices = count;
ret = psp_xgmi_get_topology_info(&adev->psp, count, tmp_topology);
if (ret) {
dev_err(adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.device_id,
adev->gmc.xgmi.hive_id, ret);
goto exit;
}
/* Each psp need to set the latest topology */
/* Each psp need to get the latest topology */
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
if (ret) {
dev_err(tmp_adev->dev,
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
tmp_adev->gmc.xgmi.device_id,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
tmp_adev->gmc.xgmi.node_id,
tmp_adev->gmc.xgmi.hive_id, ret);
/* To do : continue with some node failed or disable the whole hive */
/* To do : continue with some node failed or disable the whole hive */
break;
}
}
if (!ret)
dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
adev->gmc.xgmi.physical_node_id,
adev->gmc.xgmi.hive_id);
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
if (ret)
break;
}
exit:
mutex_unlock(&xgmi_mutex);
return ret;
}
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
{
struct amdgpu_hive_info *hive;
if (!adev->gmc.xgmi.supported)
return;
mutex_lock(&xgmi_mutex);
hive = amdgpu_get_xgmi_hive(adev);
if (!hive)
goto exit;
if (!(hive->number_devices--))
mutex_destroy(&hive->hive_lock);
exit:
mutex_unlock(&xgmi_mutex);
}

View File

@@ -0,0 +1,40 @@
/*
* Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __AMDGPU_XGMI_H__
#define __AMDGPU_XGMI_H__
#include "amdgpu_psp.h"
struct amdgpu_hive_info {
uint64_t hive_id;
struct list_head device_list;
struct psp_xgmi_topology_info topology_info;
int number_devices;
struct mutex hive_lock;
};
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
#endif

View File

@@ -743,19 +743,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable)
if (pi->caps_sq_ramping || pi->caps_db_ramping ||
pi->caps_td_ramping || pi->caps_tcp_ramping) {
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
if (enable) {
ret = ci_program_pt_config_registers(adev, didt_config_ci);
if (ret) {
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
return ret;
}
}
ci_do_enable_didt(adev, enable);
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
return 0;

View File

@@ -1755,6 +1755,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.flush_hdp = &cik_flush_hdp,
.invalidate_hdp = &cik_invalidate_hdp,
.need_full_reset = &cik_need_full_reset,
.init_doorbell_index = &legacy_doorbell_index_init,
};
static int cik_common_early_init(void *handle)

View File

@@ -30,4 +30,5 @@ void cik_srbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
int cik_set_ip_blocks(struct amdgpu_device *adev);
void legacy_doorbell_index_init(struct amdgpu_device *adev);
#endif

View File

@@ -228,34 +228,6 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
* [127:96] - reserved
*/
/**
* cik_ih_prescreen_iv - prescreen an interrupt vector
*
* @adev: amdgpu_device pointer
*
* Returns true if the interrupt vector should be further processed.
*/
static bool cik_ih_prescreen_iv(struct amdgpu_device *adev)
{
u32 ring_index = adev->irq.ih.rptr >> 2;
u16 pasid;
switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
case 146:
case 147:
pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
return true;
break;
default:
/* Not a VM fault */
return true;
}
adev->irq.ih.rptr += 16;
return false;
}
/**
* cik_ih_decode_iv - decode an interrupt vector
*
@@ -461,7 +433,6 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = {
static const struct amdgpu_ih_funcs cik_ih_funcs = {
.get_wptr = cik_ih_get_wptr,
.prescreen_iv = cik_ih_prescreen_iv,
.decode_iv = cik_ih_decode_iv,
.set_rptr = cik_ih_set_rptr
};

View File

@@ -198,7 +198,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
int i;
for (i = 0; i < count; i++)
@@ -218,9 +218,11 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
* Schedule an IB in the DMA ring (CIK).
*/
static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 extra_bits = vmid & 0xf;
/* IB packet must end on a 8 DW boundary */
@@ -316,8 +318,8 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0);
}
sdma0->ready = false;
sdma1->ready = false;
sdma0->sched.ready = false;
sdma1->sched.ready = false;
}
/**
@@ -494,18 +496,16 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
ring->ready = true;
ring->sched.ready = true;
}
cik_sdma_enable(adev, true);
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
@@ -618,21 +618,17 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
u64 gpu_addr;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
return r;
}
if (r)
goto error_free_wb;
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
@@ -647,15 +643,11 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
amdgpu_device_wb_free(adev, index);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_wb:
amdgpu_device_wb_free(adev, index);
return r;
}
@@ -678,20 +670,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err0;
}
ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE,
SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
@@ -706,21 +694,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
else
r = -EINVAL;
}
err1:
amdgpu_ib_free(adev, &ib, NULL);
@@ -822,7 +805,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
*/
static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
u32 pad_count;
int i;
@@ -1214,8 +1197,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
u8 instance_id;
DRM_ERROR("Illegal instruction in SDMA command stream\n");
schedule_work(&adev->reset_work);
instance_id = (entry->ring_id & 0x3) >> 0;
drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
return 0;
}

View File

@@ -207,34 +207,6 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
return (wptr & adev->irq.ih.ptr_mask);
}
/**
* cz_ih_prescreen_iv - prescreen an interrupt vector
*
* @adev: amdgpu_device pointer
*
* Returns true if the interrupt vector should be further processed.
*/
static bool cz_ih_prescreen_iv(struct amdgpu_device *adev)
{
u32 ring_index = adev->irq.ih.rptr >> 2;
u16 pasid;
switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
case 146:
case 147:
pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
return true;
break;
default:
/* Not a VM fault */
return true;
}
adev->irq.ih.rptr += 16;
return false;
}
/**
* cz_ih_decode_iv - decode an interrupt vector
*
@@ -442,7 +414,6 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = {
static const struct amdgpu_ih_funcs cz_ih_funcs = {
.get_wptr = cz_ih_get_wptr,
.prescreen_iv = cz_ih_prescreen_iv,
.decode_iv = cz_ih_decode_iv,
.set_rptr = cz_ih_set_rptr
};

View File

@@ -1775,18 +1775,15 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
if (r)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
if (r)
goto error_free_scratch;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START));
amdgpu_ring_write(ring, 0xDEADBEEF);
@@ -1798,13 +1795,11 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
break;
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
ring->idx, scratch, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_scratch:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -1845,9 +1840,11 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
}
static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
/* insert SWITCH_BUFFER packet before first IB in the ring frame */
@@ -1892,17 +1889,15 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
if (r)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START));
ib.ptr[2] = 0xDEADBEEF;
@@ -1914,22 +1909,16 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err2;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
}
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
scratch, tmp);
else
r = -EINVAL;
}
err2:
amdgpu_ib_free(adev, &ib, NULL);
@@ -1950,9 +1939,9 @@ static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
CP_ME_CNTL__CE_HALT_MASK));
WREG32(mmSCRATCH_UMSK, 0);
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].ready = false;
adev->gfx.gfx_ring[i].sched.ready = false;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].ready = false;
adev->gfx.compute_ring[i].sched.ready = false;
}
udelay(50);
}
@@ -2124,12 +2113,9 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the rings */
gfx_v6_0_cp_gfx_start(adev);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
return 0;
}
@@ -2227,14 +2213,11 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
WREG32(mmCP_RB2_CNTL, tmp);
WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8);
adev->gfx.compute_ring[0].ready = false;
adev->gfx.compute_ring[1].ready = false;
for (i = 0; i < 2; i++) {
r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[i]);
r = amdgpu_ring_test_helper(&adev->gfx.compute_ring[i]);
if (r)
return r;
adev->gfx.compute_ring[i].ready = true;
}
return 0;
@@ -2368,18 +2351,11 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
}
static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
{
const u32 *src_ptr;
volatile u32 *dst_ptr;
u32 dws, i;
u32 dws;
u64 reg_list_mc_addr;
const struct cs_section_def *cs_data;
int r;
@@ -2394,26 +2370,10 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
cs_data = adev->gfx.rlc.cs_data;
if (src_ptr) {
/* save restore block */
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.save_restore_obj,
&adev->gfx.rlc.save_restore_gpu_addr,
(void **)&adev->gfx.rlc.sr_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC sr bo failed\n",
r);
gfx_v6_0_rlc_fini(adev);
/* init save restore block */
r = amdgpu_gfx_rlc_init_sr(adev, dws);
if (r)
return r;
}
/* write the sr buffer */
dst_ptr = adev->gfx.rlc.sr_ptr;
for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
dst_ptr[i] = cpu_to_le32(src_ptr[i]);
amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
}
if (cs_data) {
@@ -2428,7 +2388,7 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
(void **)&adev->gfx.rlc.cs_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
gfx_v6_0_rlc_fini(adev);
amdgpu_gfx_rlc_fini(adev);
return r;
}
@@ -2549,8 +2509,8 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev)
if (!adev->gfx.rlc_fw)
return -EINVAL;
gfx_v6_0_rlc_stop(adev);
gfx_v6_0_rlc_reset(adev);
adev->gfx.rlc.funcs->stop(adev);
adev->gfx.rlc.funcs->reset(adev);
gfx_v6_0_init_pg(adev);
gfx_v6_0_init_cg(adev);
@@ -2578,7 +2538,7 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev)
WREG32(mmRLC_UCODE_ADDR, 0);
gfx_v6_0_enable_lbpw(adev, gfx_v6_0_lbpw_supported(adev));
gfx_v6_0_rlc_start(adev);
adev->gfx.rlc.funcs->start(adev);
return 0;
}
@@ -3075,6 +3035,14 @@ static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = {
.select_me_pipe_q = &gfx_v6_0_select_me_pipe_q
};
static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = {
.init = gfx_v6_0_rlc_init,
.resume = gfx_v6_0_rlc_resume,
.stop = gfx_v6_0_rlc_stop,
.reset = gfx_v6_0_rlc_reset,
.start = gfx_v6_0_rlc_start
};
static int gfx_v6_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -3082,6 +3050,7 @@ static int gfx_v6_0_early_init(void *handle)
adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = GFX6_NUM_COMPUTE_RINGS;
adev->gfx.funcs = &gfx_v6_0_gfx_funcs;
adev->gfx.rlc.funcs = &gfx_v6_0_rlc_funcs;
gfx_v6_0_set_ring_funcs(adev);
gfx_v6_0_set_irq_funcs(adev);
@@ -3114,7 +3083,7 @@ static int gfx_v6_0_sw_init(void *handle)
return r;
}
r = gfx_v6_0_rlc_init(adev);
r = adev->gfx.rlc.funcs->init(adev);
if (r) {
DRM_ERROR("Failed to init rlc BOs!\n");
return r;
@@ -3165,7 +3134,7 @@ static int gfx_v6_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
gfx_v6_0_rlc_fini(adev);
amdgpu_gfx_rlc_fini(adev);
return 0;
}
@@ -3177,7 +3146,7 @@ static int gfx_v6_0_hw_init(void *handle)
gfx_v6_0_constants_init(adev);
r = gfx_v6_0_rlc_resume(adev);
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@@ -3195,7 +3164,7 @@ static int gfx_v6_0_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gfx_v6_0_cp_enable(adev, false);
gfx_v6_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
gfx_v6_0_fini_pg(adev);
return 0;
@@ -3393,12 +3362,31 @@ static int gfx_v6_0_eop_irq(struct amdgpu_device *adev,
return 0;
}
static void gfx_v6_0_fault(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
struct amdgpu_ring *ring;
switch (entry->ring_id) {
case 0:
ring = &adev->gfx.gfx_ring[0];
break;
case 1:
case 2:
ring = &adev->gfx.compute_ring[entry->ring_id - 1];
break;
default:
return;
}
drm_sched_fault(&ring->sched);
}
static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal register access in command stream\n");
schedule_work(&adev->reset_work);
gfx_v6_0_fault(adev, entry);
return 0;
}
@@ -3407,7 +3395,7 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal instruction in command stream\n");
schedule_work(&adev->reset_work);
gfx_v6_0_fault(adev, entry);
return 0;
}

View File

@@ -882,7 +882,6 @@ static const u32 kalindi_rlc_save_restore_register_list[] =
static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev);
static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
@@ -2064,17 +2063,14 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
if (r)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
if (r)
goto error_free_scratch;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, 0xDEADBEEF);
@@ -2086,13 +2082,10 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
break;
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
ring->idx, scratch, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_scratch:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -2233,9 +2226,11 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
* on the gfx ring for execution by the GPU.
*/
static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
/* insert SWITCH_BUFFER packet before first IB in the ring frame */
@@ -2262,9 +2257,11 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
}
static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
@@ -2316,17 +2313,15 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
if (r)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
ib.ptr[2] = 0xDEADBEEF;
@@ -2338,22 +2333,16 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err2;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
}
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
scratch, tmp);
else
r = -EINVAL;
}
err2:
amdgpu_ib_free(adev, &ib, NULL);
@@ -2403,7 +2392,7 @@ static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
} else {
WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].ready = false;
adev->gfx.gfx_ring[i].sched.ready = false;
}
udelay(50);
}
@@ -2613,12 +2602,9 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v7_0_cp_gfx_start(adev);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
return 0;
}
@@ -2675,7 +2661,7 @@ static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
} else {
WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].ready = false;
adev->gfx.compute_ring[i].sched.ready = false;
}
udelay(50);
}
@@ -2781,7 +2767,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
* GFX7_MEC_HPD_SIZE * 2;
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
@@ -3106,10 +3092,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
amdgpu_ring_test_helper(ring);
}
return 0;
@@ -3268,18 +3251,10 @@ static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
* The RLC is a multi-purpose microengine that handles a
* variety of functions.
*/
static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
}
static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
{
const u32 *src_ptr;
volatile u32 *dst_ptr;
u32 dws, i;
u32 dws;
const struct cs_section_def *cs_data;
int r;
@@ -3306,66 +3281,23 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
cs_data = adev->gfx.rlc.cs_data;
if (src_ptr) {
/* save restore block */
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.save_restore_obj,
&adev->gfx.rlc.save_restore_gpu_addr,
(void **)&adev->gfx.rlc.sr_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r);
gfx_v7_0_rlc_fini(adev);
/* init save restore block */
r = amdgpu_gfx_rlc_init_sr(adev, dws);
if (r)
return r;
}
/* write the sr buffer */
dst_ptr = adev->gfx.rlc.sr_ptr;
for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
dst_ptr[i] = cpu_to_le32(src_ptr[i]);
amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
}
if (cs_data) {
/* clear state block */
adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev);
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
gfx_v7_0_rlc_fini(adev);
/* init clear state block */
r = amdgpu_gfx_rlc_init_csb(adev);
if (r)
return r;
}
/* set up the cs buffer */
dst_ptr = adev->gfx.rlc.cs_ptr;
gfx_v7_0_get_csb_buffer(adev, dst_ptr);
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
}
if (adev->gfx.rlc.cp_table_size) {
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
gfx_v7_0_rlc_fini(adev);
r = amdgpu_gfx_rlc_init_cpt(adev);
if (r)
return r;
}
gfx_v7_0_init_cp_pg_table(adev);
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
}
return 0;
@@ -3446,7 +3378,12 @@ static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
return orig;
}
static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
{
return true;
}
static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
{
u32 tmp, i, mask;
@@ -3468,7 +3405,7 @@ static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
}
}
static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
{
u32 tmp;
@@ -3545,13 +3482,13 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
adev->gfx.rlc_feature_version = le32_to_cpu(
hdr->ucode_feature_version);
gfx_v7_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
/* disable CG */
tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
gfx_v7_0_rlc_reset(adev);
adev->gfx.rlc.funcs->reset(adev);
gfx_v7_0_init_pg(adev);
@@ -3582,7 +3519,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
if (adev->asic_type == CHIP_BONAIRE)
WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
gfx_v7_0_rlc_start(adev);
adev->gfx.rlc.funcs->start(adev);
return 0;
}
@@ -3784,72 +3721,12 @@ static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
WREG32(mmRLC_PG_CNTL, data);
}
static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev)
static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev)
{
const __le32 *fw_data;
volatile u32 *dst_ptr;
int me, i, max_me = 4;
u32 bo_offset = 0;
u32 table_offset, table_size;
if (adev->asic_type == CHIP_KAVERI)
max_me = 5;
if (adev->gfx.rlc.cp_table_ptr == NULL)
return;
/* write the cp table buffer */
dst_ptr = adev->gfx.rlc.cp_table_ptr;
for (me = 0; me < max_me; me++) {
if (me == 0) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
fw_data = (const __le32 *)
(adev->gfx.ce_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 1) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
fw_data = (const __le32 *)
(adev->gfx.pfp_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 2) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
fw_data = (const __le32 *)
(adev->gfx.me_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 3) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec2_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
}
for (i = 0; i < table_size; i ++) {
dst_ptr[bo_offset + i] =
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
}
bo_offset += table_size;
}
return 5;
else
return 4;
}
static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
@@ -4288,8 +4165,17 @@ static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
};
static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
.enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode,
.exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode
.is_rlc_enabled = gfx_v7_0_is_rlc_enabled,
.set_safe_mode = gfx_v7_0_set_safe_mode,
.unset_safe_mode = gfx_v7_0_unset_safe_mode,
.init = gfx_v7_0_rlc_init,
.get_csb_size = gfx_v7_0_get_csb_size,
.get_csb_buffer = gfx_v7_0_get_csb_buffer,
.get_cp_table_num = gfx_v7_0_cp_pg_table_num,
.resume = gfx_v7_0_rlc_resume,
.stop = gfx_v7_0_rlc_stop,
.reset = gfx_v7_0_rlc_reset,
.start = gfx_v7_0_rlc_start
};
static int gfx_v7_0_early_init(void *handle)
@@ -4477,7 +4363,7 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
@@ -4540,7 +4426,7 @@ static int gfx_v7_0_sw_init(void *handle)
return r;
}
r = gfx_v7_0_rlc_init(adev);
r = adev->gfx.rlc.funcs->init(adev);
if (r) {
DRM_ERROR("Failed to init rlc BOs!\n");
return r;
@@ -4604,7 +4490,7 @@ static int gfx_v7_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
gfx_v7_0_cp_compute_fini(adev);
gfx_v7_0_rlc_fini(adev);
amdgpu_gfx_rlc_fini(adev);
gfx_v7_0_mec_fini(adev);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
@@ -4627,7 +4513,7 @@ static int gfx_v7_0_hw_init(void *handle)
gfx_v7_0_constants_init(adev);
/* init rlc */
r = gfx_v7_0_rlc_resume(adev);
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@@ -4645,7 +4531,7 @@ static int gfx_v7_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
gfx_v7_0_cp_enable(adev, false);
gfx_v7_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
gfx_v7_0_fini_pg(adev);
return 0;
@@ -4730,7 +4616,7 @@ static int gfx_v7_0_soft_reset(void *handle)
gfx_v7_0_update_cg(adev, false);
/* stop the rlc */
gfx_v7_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
/* Disable GFX parsing/prefetching */
WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
@@ -4959,12 +4845,36 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
return 0;
}
static void gfx_v7_0_fault(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
struct amdgpu_ring *ring;
u8 me_id, pipe_id;
int i;
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
switch (me_id) {
case 0:
drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if ((ring->me == me_id) && (ring->pipe == pipe_id))
drm_sched_fault(&ring->sched);
}
break;
}
}
static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal register access in command stream\n");
schedule_work(&adev->reset_work);
gfx_v7_0_fault(adev, entry);
return 0;
}
@@ -4974,7 +4884,7 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
{
DRM_ERROR("Illegal instruction in command stream\n");
// XXX soft reset the gfx block only
schedule_work(&adev->reset_work);
gfx_v7_0_fault(adev, entry);
return 0;
}

View File

@@ -44,7 +44,6 @@
#include "gca/gfx_8_0_d.h"
#include "gca/gfx_8_0_enum.h"
#include "gca/gfx_8_0_sh_mask.h"
#include "gca/gfx_8_0_enum.h"
#include "dce/dce_10_0_d.h"
#include "dce/dce_10_0_sh_mask.h"
@@ -54,7 +53,7 @@
#include "ivsrcid/ivsrcid_vislands30.h"
#define GFX8_NUM_GFX_RINGS 1
#define GFX8_MEC_HPD_SIZE 2048
#define GFX8_MEC_HPD_SIZE 4096
#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
@@ -839,18 +838,14 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
if (r)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
if (r)
goto error_free_scratch;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, 0xDEADBEEF);
@@ -862,14 +857,11 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
break;
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
ring->idx, scratch, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_scratch:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -886,19 +878,16 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 16, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
ib.ptr[2] = lower_32_bits(gpu_addr);
@@ -912,22 +901,17 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
r = -ETIMEDOUT;
goto err2;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
}
tmp = adev->wb.wb[index];
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("ib test on ring %d failed\n", ring->idx);
else
r = -EINVAL;
}
err2:
amdgpu_ib_free(adev, &ib, NULL);
@@ -1298,81 +1282,16 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
buffer[count++] = cpu_to_le32(0);
}
static void cz_init_cp_jump_table(struct amdgpu_device *adev)
static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
{
const __le32 *fw_data;
volatile u32 *dst_ptr;
int me, i, max_me = 4;
u32 bo_offset = 0;
u32 table_offset, table_size;
if (adev->asic_type == CHIP_CARRIZO)
max_me = 5;
/* write the cp table buffer */
dst_ptr = adev->gfx.rlc.cp_table_ptr;
for (me = 0; me < max_me; me++) {
if (me == 0) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
fw_data = (const __le32 *)
(adev->gfx.ce_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 1) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
fw_data = (const __le32 *)
(adev->gfx.pfp_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 2) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
fw_data = (const __le32 *)
(adev->gfx.me_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 3) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 4) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec2_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
}
for (i = 0; i < table_size; i ++) {
dst_ptr[bo_offset + i] =
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
}
bo_offset += table_size;
}
}
static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
return 5;
else
return 4;
}
static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
{
volatile u32 *dst_ptr;
u32 dws;
const struct cs_section_def *cs_data;
int r;
@@ -1381,44 +1300,18 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
cs_data = adev->gfx.rlc.cs_data;
if (cs_data) {
/* clear state block */
adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
gfx_v8_0_rlc_fini(adev);
/* init clear state block */
r = amdgpu_gfx_rlc_init_csb(adev);
if (r)
return r;
}
/* set up the cs buffer */
dst_ptr = adev->gfx.rlc.cs_ptr;
gfx_v8_0_get_csb_buffer(adev, dst_ptr);
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
}
if ((adev->asic_type == CHIP_CARRIZO) ||
(adev->asic_type == CHIP_STONEY)) {
adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
r = amdgpu_gfx_rlc_init_cpt(adev);
if (r)
return r;
}
cz_init_cp_jump_table(adev);
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
}
return 0;
@@ -1443,7 +1336,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
@@ -1629,7 +1522,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
return 0;
/* bail if the compute ring is not ready */
if (!ring->ready)
if (!ring->sched.ready)
return 0;
tmp = RREG32(mmGB_EDC_MODE);
@@ -1997,7 +1890,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ (ring_id * GFX8_MEC_HPD_SIZE);
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
@@ -2088,7 +1981,7 @@ static int gfx_v8_0_sw_init(void *handle)
return r;
}
r = gfx_v8_0_rlc_init(adev);
r = adev->gfx.rlc.funcs->init(adev);
if (r) {
DRM_ERROR("Failed to init rlc BOs!\n");
return r;
@@ -2108,7 +2001,7 @@ static int gfx_v8_0_sw_init(void *handle)
/* no gfx doorbells on iceland */
if (adev->asic_type != CHIP_TOPAZ) {
ring->use_doorbell = true;
ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
ring->doorbell_index = adev->doorbell_index.gfx_ring0;
}
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
@@ -2181,7 +2074,7 @@ static int gfx_v8_0_sw_fini(void *handle)
amdgpu_gfx_kiq_fini(adev);
gfx_v8_0_mec_fini(adev);
gfx_v8_0_rlc_fini(adev);
amdgpu_gfx_rlc_fini(adev);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
@@ -4175,10 +4068,15 @@ static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
{
gfx_v8_0_rlc_stop(adev);
gfx_v8_0_rlc_reset(adev);
if (amdgpu_sriov_vf(adev)) {
gfx_v8_0_init_csb(adev);
return 0;
}
adev->gfx.rlc.funcs->stop(adev);
adev->gfx.rlc.funcs->reset(adev);
gfx_v8_0_init_pg(adev);
gfx_v8_0_rlc_start(adev);
adev->gfx.rlc.funcs->start(adev);
return 0;
}
@@ -4197,7 +4095,7 @@ static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].ready = false;
adev->gfx.gfx_ring[i].sched.ready = false;
}
WREG32(mmCP_ME_CNTL, tmp);
udelay(50);
@@ -4322,7 +4220,7 @@ static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu
tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
DOORBELL_RANGE_LOWER,
AMDGPU_DOORBELL_GFX_RING0);
adev->doorbell_index.gfx_ring0);
WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
@@ -4379,10 +4277,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
amdgpu_ring_clear_ring(ring);
gfx_v8_0_cp_gfx_start(adev);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
ring->sched.ready = true;
r = amdgpu_ring_test_helper(ring);
return r;
}
@@ -4396,8 +4292,8 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
} else {
WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].ready = false;
adev->gfx.kiq.ring.ready = false;
adev->gfx.compute_ring[i].sched.ready = false;
adev->gfx.kiq.ring.sched.ready = false;
}
udelay(50);
}
@@ -4473,11 +4369,9 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
}
r = amdgpu_ring_test_ring(kiq_ring);
if (r) {
r = amdgpu_ring_test_helper(kiq_ring);
if (r)
DRM_ERROR("KCQ enable failed\n");
kiq_ring->ready = false;
}
return r;
}
@@ -4755,8 +4649,8 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
{
if (adev->asic_type > CHIP_TONGA) {
WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
}
/* enable doorbells */
WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
@@ -4781,7 +4675,7 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
amdgpu_bo_unreserve(ring->mqd_obj);
ring->ready = true;
ring->sched.ready = true;
return 0;
}
@@ -4820,10 +4714,7 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
*/
for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
ring = &adev->gfx.compute_ring[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
}
done:
@@ -4867,7 +4758,7 @@ static int gfx_v8_0_hw_init(void *handle)
gfx_v8_0_init_golden_registers(adev);
gfx_v8_0_constants_init(adev);
r = gfx_v8_0_rlc_resume(adev);
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@@ -4899,7 +4790,7 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
}
r = amdgpu_ring_test_ring(kiq_ring);
r = amdgpu_ring_test_helper(kiq_ring);
if (r)
DRM_ERROR("KCQ disable failed\n");
@@ -4973,16 +4864,16 @@ static int gfx_v8_0_hw_fini(void *handle)
pr_debug("For SRIOV client, shouldn't do anything.\n");
return 0;
}
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
if (!gfx_v8_0_wait_for_idle(adev))
gfx_v8_0_cp_enable(adev, false);
else
pr_err("cp is busy, skip halt cp\n");
if (!gfx_v8_0_wait_for_rlc_idle(adev))
gfx_v8_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
else
pr_err("rlc is busy, skip halt rlc\n");
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
return 0;
}
@@ -5061,17 +4952,16 @@ static bool gfx_v8_0_check_soft_reset(void *handle)
static int gfx_v8_0_pre_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 grbm_soft_reset = 0;
if ((!adev->gfx.grbm_soft_reset) &&
(!adev->gfx.srbm_soft_reset))
return 0;
grbm_soft_reset = adev->gfx.grbm_soft_reset;
srbm_soft_reset = adev->gfx.srbm_soft_reset;
/* stop the rlc */
gfx_v8_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
@@ -5165,14 +5055,13 @@ static int gfx_v8_0_soft_reset(void *handle)
static int gfx_v8_0_post_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 grbm_soft_reset = 0;
if ((!adev->gfx.grbm_soft_reset) &&
(!adev->gfx.srbm_soft_reset))
return 0;
grbm_soft_reset = adev->gfx.grbm_soft_reset;
srbm_soft_reset = adev->gfx.srbm_soft_reset;
if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
@@ -5197,7 +5086,7 @@ static int gfx_v8_0_post_soft_reset(void *handle)
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
gfx_v8_0_cp_gfx_resume(adev);
gfx_v8_0_rlc_start(adev);
adev->gfx.rlc.funcs->start(adev);
return 0;
}
@@ -5445,7 +5334,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
AMD_PG_SUPPORT_RLC_SMU_HS |
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_GFX_DMG))
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
switch (adev->asic_type) {
case CHIP_CARRIZO:
case CHIP_STONEY:
@@ -5499,7 +5388,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
AMD_PG_SUPPORT_RLC_SMU_HS |
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_GFX_DMG))
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
return 0;
}
@@ -5593,57 +5482,53 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
{
u32 data;
uint32_t rlc_setting;
rlc_setting = RREG32(mmRLC_CNTL);
if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
return false;
return true;
}
static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
{
uint32_t data;
unsigned i;
data = RREG32(mmRLC_CNTL);
if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
return;
data |= RLC_SAFE_MODE__CMD_MASK;
data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
WREG32(mmRLC_SAFE_MODE, data);
if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
data |= RLC_SAFE_MODE__CMD_MASK;
data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
WREG32(mmRLC_SAFE_MODE, data);
for (i = 0; i < adev->usec_timeout; i++) {
if ((RREG32(mmRLC_GPM_STAT) &
(RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
(RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
break;
udelay(1);
}
for (i = 0; i < adev->usec_timeout; i++) {
if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
adev->gfx.rlc.in_safe_mode = true;
/* wait for RLC_SAFE_MODE */
for (i = 0; i < adev->usec_timeout; i++) {
if ((RREG32(mmRLC_GPM_STAT) &
(RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
(RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
break;
udelay(1);
}
for (i = 0; i < adev->usec_timeout; i++) {
if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
}
static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
{
u32 data = 0;
uint32_t data;
unsigned i;
data = RREG32(mmRLC_CNTL);
if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
return;
if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
if (adev->gfx.rlc.in_safe_mode) {
data |= RLC_SAFE_MODE__CMD_MASK;
data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
WREG32(mmRLC_SAFE_MODE, data);
adev->gfx.rlc.in_safe_mode = false;
}
}
data |= RLC_SAFE_MODE__CMD_MASK;
data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
WREG32(mmRLC_SAFE_MODE, data);
for (i = 0; i < adev->usec_timeout; i++) {
if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
@@ -5653,8 +5538,17 @@ static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
}
static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
.enter_safe_mode = iceland_enter_rlc_safe_mode,
.exit_safe_mode = iceland_exit_rlc_safe_mode
.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
.set_safe_mode = gfx_v8_0_set_safe_mode,
.unset_safe_mode = gfx_v8_0_unset_safe_mode,
.init = gfx_v8_0_rlc_init,
.get_csb_size = gfx_v8_0_get_csb_size,
.get_csb_buffer = gfx_v8_0_get_csb_buffer,
.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
.resume = gfx_v8_0_rlc_resume,
.stop = gfx_v8_0_rlc_stop,
.reset = gfx_v8_0_rlc_reset,
.start = gfx_v8_0_rlc_start
};
static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@@ -5662,7 +5556,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t temp, data;
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
@@ -5758,7 +5652,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
gfx_v8_0_wait_for_rlc_serdes(adev);
}
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -5768,7 +5662,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
@@ -5851,7 +5745,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
gfx_v8_0_wait_for_rlc_serdes(adev);
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
@@ -6131,9 +6025,11 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
}
static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
if (ib->flags & AMDGPU_IB_FLAG_CE)
@@ -6161,9 +6057,11 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
}
static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
@@ -6738,12 +6636,39 @@ static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
return 0;
}
static void gfx_v8_0_fault(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
int i;
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
switch (me_id) {
case 0:
drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if (ring->me == me_id && ring->pipe == pipe_id &&
ring->queue == queue_id)
drm_sched_fault(&ring->sched);
}
break;
}
}
static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal register access in command stream\n");
schedule_work(&adev->reset_work);
gfx_v8_0_fault(adev, entry);
return 0;
}
@@ -6752,7 +6677,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal instruction in command stream\n");
schedule_work(&adev->reset_work);
gfx_v8_0_fault(adev, entry);
return 0;
}
@@ -6976,10 +6901,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
17 + /* gfx_v8_0_ring_emit_vm_flush */
7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
.emit_ib = gfx_v8_0_ring_emit_ib_compute,
.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
.test_ring = gfx_v8_0_ring_test_ring,
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_rreg = gfx_v8_0_ring_emit_rreg,

View File

@@ -41,7 +41,7 @@
#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
#define GFX9_NUM_GFX_RINGS 1
#define GFX9_MEC_HPD_SIZE 2048
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
@@ -86,6 +86,7 @@ MODULE_FIRMWARE("amdgpu/picasso_me.bin");
MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
@@ -396,18 +397,14 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
if (r)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
if (r)
goto error_free_scratch;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, 0xDEADBEEF);
@@ -419,14 +416,11 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
break;
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
ring->idx, scratch, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_scratch:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -443,19 +437,16 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 16, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
ib.ptr[2] = lower_32_bits(gpu_addr);
@@ -469,22 +460,17 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
r = -ETIMEDOUT;
goto err2;
r = -ETIMEDOUT;
goto err2;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
goto err2;
}
tmp = adev->wb.wb[index];
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
r = 0;
} else {
DRM_ERROR("ib test on ring %d failed\n", ring->idx);
r = -EINVAL;
}
if (tmp == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
err2:
amdgpu_ib_free(adev, &ib, NULL);
@@ -660,7 +646,20 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
/*
* For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
* instead of picasso_rlc.bin.
* Judgment method:
* PCO AM4: revision >= 0xC8 && revision <= 0xCF
* or revision >= 0xD8 && revision <= 0xDF
* otherwise is PCO FP5
*/
if (!strcmp(chip_name, "picasso") &&
(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
else
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -1065,85 +1064,13 @@ static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
}
static void rv_init_cp_jump_table(struct amdgpu_device *adev)
static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
{
const __le32 *fw_data;
volatile u32 *dst_ptr;
int me, i, max_me = 5;
u32 bo_offset = 0;
u32 table_offset, table_size;
/* write the cp table buffer */
dst_ptr = adev->gfx.rlc.cp_table_ptr;
for (me = 0; me < max_me; me++) {
if (me == 0) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
fw_data = (const __le32 *)
(adev->gfx.ce_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 1) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
fw_data = (const __le32 *)
(adev->gfx.pfp_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 2) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
fw_data = (const __le32 *)
(adev->gfx.me_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 3) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 4) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec2_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
}
for (i = 0; i < table_size; i ++) {
dst_ptr[bo_offset + i] =
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
}
bo_offset += table_size;
}
}
static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev)
{
/* clear state block */
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
/* jump table block */
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
return 5;
}
static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
{
volatile u32 *dst_ptr;
u32 dws;
const struct cs_section_def *cs_data;
int r;
@@ -1152,45 +1079,18 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
cs_data = adev->gfx.rlc.cs_data;
if (cs_data) {
/* clear state block */
adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev);
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to create rlc csb bo\n",
r);
gfx_v9_0_rlc_fini(adev);
/* init clear state block */
r = amdgpu_gfx_rlc_init_csb(adev);
if (r)
return r;
}
/* set up the cs buffer */
dst_ptr = adev->gfx.rlc.cs_ptr;
gfx_v9_0_get_csb_buffer(adev, dst_ptr);
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
}
if (adev->asic_type == CHIP_RAVEN) {
/* TODO: double check the cp_table_size for RV */
adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
if (r) {
dev_err(adev->dev,
"(%d) failed to create cp table bo\n", r);
gfx_v9_0_rlc_fini(adev);
r = amdgpu_gfx_rlc_init_cpt(adev);
if (r)
return r;
}
rv_init_cp_jump_table(adev);
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
}
switch (adev->asic_type) {
@@ -1264,7 +1164,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
@@ -1635,8 +1535,8 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
/* Clear GDS reserved memory */
r = amdgpu_ring_alloc(ring, 17);
if (r) {
DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n",
ring->idx, r);
DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
ring->name, r);
return r;
}
@@ -1680,7 +1580,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->doorbell_index = (AMDGPU_DOORBELL_MEC_RING0 + ring_id) << 1;
ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ (ring_id * GFX9_MEC_HPD_SIZE);
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
@@ -1748,7 +1648,7 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
}
r = gfx_v9_0_rlc_init(adev);
r = adev->gfx.rlc.funcs->init(adev);
if (r) {
DRM_ERROR("Failed to init rlc BOs!\n");
return r;
@@ -1769,7 +1669,7 @@ static int gfx_v9_0_sw_init(void *handle)
else
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = true;
ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
r = amdgpu_ring_init(adev, ring, 1024,
&adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
if (r)
@@ -2499,12 +2399,12 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
return 0;
}
gfx_v9_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
/* disable CG */
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
gfx_v9_0_rlc_reset(adev);
adev->gfx.rlc.funcs->reset(adev);
gfx_v9_0_init_pg(adev);
@@ -2515,15 +2415,24 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
return r;
}
if (adev->asic_type == CHIP_RAVEN ||
adev->asic_type == CHIP_VEGA20) {
if (amdgpu_lbpw != 0)
switch (adev->asic_type) {
case CHIP_RAVEN:
if (amdgpu_lbpw == 0)
gfx_v9_0_enable_lbpw(adev, false);
else
gfx_v9_0_enable_lbpw(adev, true);
break;
case CHIP_VEGA20:
if (amdgpu_lbpw > 0)
gfx_v9_0_enable_lbpw(adev, true);
else
gfx_v9_0_enable_lbpw(adev, false);
break;
default:
break;
}
gfx_v9_0_rlc_start(adev);
adev->gfx.rlc.funcs->start(adev);
return 0;
}
@@ -2538,7 +2447,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
if (!enable) {
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].ready = false;
adev->gfx.gfx_ring[i].sched.ready = false;
}
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
udelay(50);
@@ -2728,7 +2637,7 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v9_0_cp_gfx_start(adev);
ring->ready = true;
ring->sched.ready = true;
return 0;
}
@@ -2743,8 +2652,8 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].ready = false;
adev->gfx.kiq.ring.ready = false;
adev->gfx.compute_ring[i].sched.ready = false;
adev->gfx.kiq.ring.sched.ready = false;
}
udelay(50);
}
@@ -2867,11 +2776,9 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
}
r = amdgpu_ring_test_ring(kiq_ring);
if (r) {
r = amdgpu_ring_test_helper(kiq_ring);
if (r)
DRM_ERROR("KCQ enable failed\n");
kiq_ring->ready = false;
}
return r;
}
@@ -3089,9 +2996,9 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
/* enable the doorbell if requested */
if (ring->use_doorbell) {
WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
(AMDGPU_DOORBELL64_KIQ *2) << 2);
(adev->doorbell_index.kiq * 2) << 2);
WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
(AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2);
(adev->doorbell_index.userqueue_end * 2) << 2);
}
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
@@ -3250,7 +3157,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
amdgpu_bo_unreserve(ring->mqd_obj);
ring->ready = true;
ring->sched.ready = true;
return 0;
}
@@ -3315,19 +3222,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
return r;
ring = &adev->gfx.gfx_ring[0];
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
amdgpu_ring_test_helper(ring);
}
gfx_v9_0_enable_gui_idle_interrupt(adev, true);
@@ -3354,7 +3255,7 @@ static int gfx_v9_0_hw_init(void *handle)
if (r)
return r;
r = gfx_v9_0_rlc_resume(adev);
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@@ -3392,7 +3293,7 @@ static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
}
r = amdgpu_ring_test_ring(kiq_ring);
r = amdgpu_ring_test_helper(kiq_ring);
if (r)
DRM_ERROR("KCQ disable failed\n");
@@ -3434,7 +3335,7 @@ static int gfx_v9_0_hw_fini(void *handle)
}
gfx_v9_0_cp_enable(adev, false);
gfx_v9_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
gfx_v9_0_csb_vram_unpin(adev);
@@ -3509,7 +3410,7 @@ static int gfx_v9_0_soft_reset(void *handle)
if (grbm_soft_reset) {
/* stop the rlc */
gfx_v9_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
/* Disable GFX parsing/prefetching */
gfx_v9_0_cp_gfx_enable(adev, false);
@@ -3608,64 +3509,47 @@ static int gfx_v9_0_late_init(void *handle)
return 0;
}
static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
{
uint32_t rlc_setting, data;
unsigned i;
if (adev->gfx.rlc.in_safe_mode)
return;
uint32_t rlc_setting;
/* if RLC is not enabled, do nothing */
rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
return;
return false;
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
data = RLC_SAFE_MODE__CMD_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
return true;
}
/* wait for RLC_SAFE_MODE */
for (i = 0; i < adev->usec_timeout; i++) {
if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
adev->gfx.rlc.in_safe_mode = true;
static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
{
uint32_t data;
unsigned i;
data = RLC_SAFE_MODE__CMD_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
/* wait for RLC_SAFE_MODE */
for (i = 0; i < adev->usec_timeout; i++) {
if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
}
static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
{
uint32_t rlc_setting, data;
uint32_t data;
if (!adev->gfx.rlc.in_safe_mode)
return;
/* if RLC is not enabled, do nothing */
rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
return;
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
/*
* Try to exit safe mode only if it is already in safe
* mode.
*/
data = RLC_SAFE_MODE__CMD_MASK;
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
adev->gfx.rlc.in_safe_mode = false;
}
data = RLC_SAFE_MODE__CMD_MASK;
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
}
static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
bool enable)
{
gfx_v9_0_enter_rlc_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
@@ -3676,7 +3560,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
}
gfx_v9_0_exit_rlc_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@@ -3774,7 +3658,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
{
uint32_t data, def;
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
/* Enable 3D CGCG/CGLS */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
@@ -3814,7 +3698,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
}
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -3822,7 +3706,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t def, data;
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
@@ -3862,7 +3746,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
}
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
@@ -3891,8 +3775,17 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
}
static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
.enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode,
.exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode
.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
.set_safe_mode = gfx_v9_0_set_safe_mode,
.unset_safe_mode = gfx_v9_0_unset_safe_mode,
.init = gfx_v9_0_rlc_init,
.get_csb_size = gfx_v9_0_get_csb_size,
.get_csb_buffer = gfx_v9_0_get_csb_buffer,
.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
.resume = gfx_v9_0_rlc_resume,
.stop = gfx_v9_0_rlc_stop,
.reset = gfx_v9_0_rlc_reset,
.start = gfx_v9_0_rlc_start
};
static int gfx_v9_0_set_powergating_state(void *handle,
@@ -4073,9 +3966,11 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
if (ib->flags & AMDGPU_IB_FLAG_CE)
@@ -4104,20 +3999,22 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
}
static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
amdgpu_ring_write(ring,
#ifdef __BIG_ENDIAN
(2 << 0) |
(2 << 0) |
#endif
lower_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring, control);
lower_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring, control);
}
static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
@@ -4696,12 +4593,39 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
return 0;
}
static void gfx_v9_0_fault(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
int i;
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
switch (me_id) {
case 0:
drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if (ring->me == me_id && ring->pipe == pipe_id &&
ring->queue == queue_id)
drm_sched_fault(&ring->sched);
}
break;
}
}
static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal register access in command stream\n");
schedule_work(&adev->reset_work);
gfx_v9_0_fault(adev, entry);
return 0;
}
@@ -4710,7 +4634,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal instruction in command stream\n");
schedule_work(&adev->reset_work);
gfx_v9_0_fault(adev, entry);
return 0;
}
@@ -4837,10 +4761,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
.emit_ib = gfx_v9_0_ring_emit_ib_compute,
.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
.test_ring = gfx_v9_0_ring_test_ring,
.test_ib = gfx_v9_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_rreg = gfx_v9_0_ring_emit_rreg,

View File

@@ -35,20 +35,25 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev)
return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24;
}
static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
/* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */
int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
- mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
lower_32_bits(value));
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
offset * vmid, lower_32_bits(page_table_base));
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
upper_32_bits(value));
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
offset * vmid, upper_32_bits(page_table_base));
}
static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
gfxhub_v1_0_init_gart_pt_regs(adev);
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
gfxhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));

View File

@@ -30,5 +30,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value);
void gfxhub_v1_0_init(struct amdgpu_device *adev);
u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev);
void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base);
#endif

View File

@@ -359,7 +359,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
return 0;
}
static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev,
uint32_t vmid, uint32_t flush_type)
{
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
}
@@ -581,7 +582,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
else
gmc_v6_0_set_fault_enable_default(adev, true);
gmc_v6_0_flush_gpu_tlb(adev, 0);
gmc_v6_0_flush_gpu_tlb(adev, 0, 0);
dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);

View File

@@ -430,7 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
*
* Flush the TLB for the requested page table (CIK).
*/
static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev,
uint32_t vmid, uint32_t flush_type)
{
/* bits 0-15 are the VM contexts0-15 */
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
@@ -698,7 +699,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
WREG32(mmCHUB_CONTROL, tmp);
}
gmc_v7_0_flush_gpu_tlb(adev, 0);
gmc_v7_0_flush_gpu_tlb(adev, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);

View File

@@ -633,7 +633,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
* Flush the TLB for the requested page table (CIK).
*/
static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
uint32_t vmid)
uint32_t vmid, uint32_t flush_type)
{
/* bits 0-15 are the VM contexts0-15 */
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
@@ -942,7 +942,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
else
gmc_v8_0_set_fault_enable_default(adev, true);
gmc_v8_0_flush_gpu_tlb(adev, 0);
gmc_v8_0_flush_gpu_tlb(adev, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);

View File

@@ -244,6 +244,62 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
return 0;
}
/**
* vega10_ih_prescreen_iv - prescreen an interrupt vector
*
* @adev: amdgpu_device pointer
*
* Returns true if the interrupt vector should be further processed.
*/
static bool gmc_v9_0_prescreen_iv(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry,
uint64_t addr)
{
struct amdgpu_vm *vm;
u64 key;
int r;
/* No PASID, can't identify faulting process */
if (!entry->pasid)
return true;
/* Not a retry fault */
if (!(entry->src_data[1] & 0x80))
return true;
/* Track retry faults in per-VM fault FIFO. */
spin_lock(&adev->vm_manager.pasid_lock);
vm = idr_find(&adev->vm_manager.pasid_idr, entry->pasid);
if (!vm) {
/* VM not found, process it normally */
spin_unlock(&adev->vm_manager.pasid_lock);
return true;
}
key = AMDGPU_VM_FAULT(entry->pasid, addr);
r = amdgpu_vm_add_fault(vm->fault_hash, key);
/* Hash table is full or the fault is already being processed,
* ignore further page faults
*/
if (r != 0) {
spin_unlock(&adev->vm_manager.pasid_lock);
return false;
}
/* No locking required with single writer and single reader */
r = kfifo_put(&vm->faults, key);
if (!r) {
/* FIFO is full. Ignore it until there is space */
amdgpu_vm_clear_fault(vm->fault_hash, key);
spin_unlock(&adev->vm_manager.pasid_lock);
return false;
}
spin_unlock(&adev->vm_manager.pasid_lock);
/* It's the first fault for this address, process it normally */
return true;
}
static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -255,6 +311,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
if (!gmc_v9_0_prescreen_iv(adev, entry, addr))
return 1; /* This also prevents sending it to KFD */
if (!amdgpu_sriov_vf(adev)) {
status = RREG32(hub->vm_l2_pro_fault_status);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
@@ -293,14 +352,14 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
}
static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
uint32_t flush_type)
{
u32 req = 0;
/* invalidate using legacy mode on vmid*/
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
PER_VMID_INVALIDATE_REQ, 1 << vmid);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
@@ -312,48 +371,6 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
return req;
}
static signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask)
{
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
ref, mask);
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
/* don't wait anymore for IRQ context */
if (r < 1 && in_interrupt())
goto failed_kiq;
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
if (cnt > MAX_KIQ_REG_TRY)
goto failed_kiq;
return 0;
failed_kiq:
pr_err("failed to invalidate tlb with kiq\n");
return r;
}
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -362,64 +379,50 @@ failed_kiq:
*/
/**
* gmc_v9_0_flush_gpu_tlb - gart tlb flush callback
* gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
*
* @adev: amdgpu_device pointer
* @vmid: vm instance to flush
* @flush_type: the flush type
*
* Flush the TLB for the requested page table.
* Flush the TLB for the requested page table using certain type.
*/
static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
uint32_t vmid)
uint32_t vmid, uint32_t flush_type)
{
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned i, j;
int r;
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
struct amdgpu_vmhub *hub = &adev->vmhub[i];
u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
if (adev->gfx.kiq.ring.ready &&
/* This is necessary for a HW workaround under SRIOV as well
* as GFXOFF under bare metal
*/
if (adev->gfx.kiq.ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
!adev->in_gpu_reset) {
r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
if (!r)
continue;
uint32_t req = hub->vm_inv_eng0_req + eng;
uint32_t ack = hub->vm_inv_eng0_ack + eng;
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
1 << vmid);
continue;
}
spin_lock(&adev->gmc.invalidate_lock);
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
/* Busy wait for ACK.*/
for (j = 0; j < 100; j++) {
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
tmp &= 1 << vmid;
if (tmp)
break;
cpu_relax();
}
if (j < 100) {
spin_unlock(&adev->gmc.invalidate_lock);
continue;
}
/* Wait for ACK with a delay.*/
for (j = 0; j < adev->usec_timeout; j++) {
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
tmp &= 1 << vmid;
if (tmp)
if (tmp & (1 << vmid))
break;
udelay(1);
}
if (j < adev->usec_timeout) {
spin_unlock(&adev->gmc.invalidate_lock);
continue;
}
spin_unlock(&adev->gmc.invalidate_lock);
if (j < adev->usec_timeout)
continue;
DRM_ERROR("Timeout waiting for VM flush ACK!\n");
}
}
@@ -429,7 +432,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
uint32_t req = gmc_v9_0_get_invalidate_req(vmid);
uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
unsigned eng = ring->vm_inv_eng;
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
@@ -739,9 +742,8 @@ static int gmc_v9_0_late_init(void *handle)
unsigned vmhub = ring->funcs->vmhub;
ring->vm_inv_eng = vm_inv_eng[vmhub]++;
dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n",
ring->idx, ring->name, ring->vm_inv_eng,
ring->funcs->vmhub);
dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
}
/* Engine 16 is used for KFD and 17 for GART flushes */
@@ -959,6 +961,9 @@ static int gmc_v9_0_sw_init(void *handle)
/* This interrupt is VMC page fault.*/
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
&adev->gmc.vm_fault);
if (r)
return r;
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
&adev->gmc.vm_fault);
@@ -991,7 +996,7 @@ static int gmc_v9_0_sw_init(void *handle)
}
adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
if (adev->asic_type == CHIP_VEGA20) {
if (adev->gmc.xgmi.supported) {
r = gfxhub_v1_1_get_xgmi_info(adev);
if (r)
return r;
@@ -1122,7 +1127,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
gfxhub_v1_0_set_fault_enable_default(adev, value);
mmhub_v1_0_set_fault_enable_default(adev, value);
gmc_v9_0_flush_gpu_tlb(adev, 0);
gmc_v9_0_flush_gpu_tlb(adev, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),

View File

@@ -207,34 +207,6 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev)
return (wptr & adev->irq.ih.ptr_mask);
}
/**
* iceland_ih_prescreen_iv - prescreen an interrupt vector
*
* @adev: amdgpu_device pointer
*
* Returns true if the interrupt vector should be further processed.
*/
static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev)
{
u32 ring_index = adev->irq.ih.rptr >> 2;
u16 pasid;
switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
case 146:
case 147:
pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
return true;
break;
default:
/* Not a VM fault */
return true;
}
adev->irq.ih.rptr += 16;
return false;
}
/**
* iceland_ih_decode_iv - decode an interrupt vector
*
@@ -440,7 +412,6 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = {
static const struct amdgpu_ih_funcs iceland_ih_funcs = {
.get_wptr = iceland_ih_get_wptr,
.prescreen_iv = iceland_ih_prescreen_iv,
.decode_iv = iceland_ih_decode_iv,
.set_rptr = iceland_ih_set_rptr
};

View File

@@ -508,19 +508,19 @@ static int kv_enable_didt(struct amdgpu_device *adev, bool enable)
pi->caps_db_ramping ||
pi->caps_td_ramping ||
pi->caps_tcp_ramping) {
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
if (enable) {
ret = kv_program_pt_config_registers(adev, didt_config_kv);
if (ret) {
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
return ret;
}
}
kv_do_enable_didt(adev, enable);
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
return 0;

View File

@@ -52,20 +52,25 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
return base;
}
static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
/* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */
int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
- mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
lower_32_bits(value));
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
offset * vmid, lower_32_bits(page_table_base));
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
upper_32_bits(value));
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
offset * vmid, upper_32_bits(page_table_base));
}
static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
mmhub_v1_0_init_gart_pt_regs(adev);
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
mmhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));

View File

@@ -34,5 +34,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
bool enable);
void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base);
#endif

View File

@@ -37,7 +37,6 @@
#include "gmc/gmc_8_2_sh_mask.h"
#include "oss/oss_3_0_d.h"
#include "oss/oss_3_0_sh_mask.h"
#include "gca/gfx_8_0_sh_mask.h"
#include "dce/dce_10_0_d.h"
#include "dce/dce_10_0_sh_mask.h"
#include "smu/smu_7_1_3_d.h"

View File

@@ -43,6 +43,8 @@ enum psp_gfx_crtl_cmd_id
GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */
GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */
GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */
GFX_CTRL_CMD_ID_CONSUME_CMD = 0x000A0000, /* send interrupt to psp for updating write pointer of vf */
GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */
GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */
};
@@ -89,7 +91,8 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */
GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */
GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
};

View File

@@ -240,12 +240,9 @@ static int psp_v10_0_ring_stop(struct psp_context *psp,
enum psp_ring_type ring_type)
{
int ret = 0;
struct psp_ring *ring;
unsigned int psp_ring_reg = 0;
struct amdgpu_device *adev = psp->adev;
ring = &psp->km_ring;
/* Write the ring destroy command to C2PMSG_64 */
psp_ring_reg = 3 << 16;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);

View File

@@ -34,6 +34,7 @@
#include "nbio/nbio_7_4_offset.h"
MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
MODULE_FIRMWARE("amdgpu/vega20_ta.bin");
/* address block */
#define smnMP1_FIRMWARE_FLAGS 0x3010024
@@ -98,7 +99,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
const char *chip_name;
char fw_name[30];
int err = 0;
const struct psp_firmware_header_v1_0 *hdr;
const struct psp_firmware_header_v1_0 *sos_hdr;
const struct ta_firmware_header_v1_0 *ta_hdr;
DRM_DEBUG("\n");
@@ -119,16 +121,32 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
if (err)
goto out;
hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
adev->psp.sos_fw_version = le32_to_cpu(hdr->header.ucode_version);
adev->psp.sos_feature_version = le32_to_cpu(hdr->ucode_feature_version);
adev->psp.sos_bin_size = le32_to_cpu(hdr->sos_size_bytes);
adev->psp.sys_bin_size = le32_to_cpu(hdr->header.ucode_size_bytes) -
le32_to_cpu(hdr->sos_size_bytes);
adev->psp.sys_start_addr = (uint8_t *)hdr +
le32_to_cpu(hdr->header.ucode_array_offset_bytes);
sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version);
adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes);
adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->header.ucode_size_bytes) -
le32_to_cpu(sos_hdr->sos_size_bytes);
adev->psp.sys_start_addr = (uint8_t *)sos_hdr +
le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr +
le32_to_cpu(hdr->sos_offset_bytes);
le32_to_cpu(sos_hdr->sos_offset_bytes);
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
if (err)
goto out;
err = amdgpu_ucode_validate(adev->psp.ta_fw);
if (err)
goto out;
ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version);
adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes);
adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr +
le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
return 0;
out:
if (err) {
@@ -153,8 +171,11 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
* are already been loaded.
*/
sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
if (sol_reg)
if (sol_reg) {
psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
printk("sos fw version = 0x%x.\n", psp->sos_fw_version);
return 0;
}
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
@@ -167,7 +188,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
/* Copy PSP System Driver binary to memory */
memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
/* Provide the sys driver to bootrom */
/* Provide the sys driver to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = 1 << 16;
@@ -208,7 +229,7 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
/* Copy Secure OS binary to PSP memory */
memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
/* Provide the PSP secure OS to bootrom */
/* Provide the PSP secure OS to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = 2 << 16;
@@ -278,26 +299,47 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
struct psp_ring *ring = &psp->km_ring;
struct amdgpu_device *adev = psp->adev;
/* Write low address of the ring to C2PMSG_69 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
/* Write high address of the ring to C2PMSG_70 */
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
/* Write size of ring to C2PMSG_71 */
psp_ring_reg = ring->ring_size;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
/* Write the ring initialization command to C2PMSG_64 */
psp_ring_reg = ring_type;
psp_ring_reg = psp_ring_reg << 16;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
if (psp_support_vmr_ring(psp)) {
/* Write low address of the ring to C2PMSG_102 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
/* Write high address of the ring to C2PMSG_103 */
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Write the ring initialization command to C2PMSG_101 */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
0x80000000, 0x8000FFFF, false);
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_101 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
0x80000000, 0x8000FFFF, false);
} else {
/* Write low address of the ring to C2PMSG_69 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
/* Write high address of the ring to C2PMSG_70 */
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
/* Write size of ring to C2PMSG_71 */
psp_ring_reg = ring->ring_size;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
/* Write the ring initialization command to C2PMSG_64 */
psp_ring_reg = ring_type;
psp_ring_reg = psp_ring_reg << 16;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
0x80000000, 0x8000FFFF, false);
}
return ret;
}
@@ -308,15 +350,24 @@ static int psp_v11_0_ring_stop(struct psp_context *psp,
int ret = 0;
struct amdgpu_device *adev = psp->adev;
/* Write the ring destroy command to C2PMSG_64 */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_DESTROY_RINGS);
/* Write the ring destroy command*/
if (psp_support_vmr_ring(psp))
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
else
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
GFX_CTRL_CMD_ID_DESTROY_RINGS);
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
0x80000000, 0x80000000, false);
/* Wait for response flag (bit 31) */
if (psp_support_vmr_ring(psp))
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
0x80000000, 0x80000000, false);
else
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
0x80000000, 0x80000000, false);
return ret;
}
@@ -355,7 +406,10 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp,
uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
/* KM (GPCOM) prepare write pointer */
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
if (psp_support_vmr_ring(psp))
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
else
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
/* Update KM RB frame pointer to new frame */
/* write_frame ptr increments by size of rb_frame in bytes */
@@ -384,7 +438,11 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp,
/* Update the write Pointer in DWORDs */
psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
if (psp_support_vmr_ring(psp)) {
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
} else
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
return 0;
}
@@ -529,7 +587,7 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
/*send the mode 1 reset command*/
WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST);
mdelay(1000);
msleep(500);
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
@@ -552,24 +610,110 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
static int psp_v11_0_xgmi_get_topology_info(struct psp_context *psp,
int number_devices, struct psp_xgmi_topology_info *topology)
{
struct ta_xgmi_shared_memory *xgmi_cmd;
struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
struct ta_xgmi_cmd_get_topology_info_output *topology_info_output;
int i;
int ret;
if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
return -EINVAL;
xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
/* Fill in the shared memory with topology information as input */
topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO;
topology_info_input->num_nodes = number_devices;
for (i = 0; i < topology_info_input->num_nodes; i++) {
topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled;
topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
}
/* Invoke xgmi ta to get the topology information */
ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO);
if (ret)
return ret;
/* Read the output topology information from the shared memory */
topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info;
topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes;
for (i = 0; i < topology->num_nodes; i++) {
topology->nodes[i].node_id = topology_info_output->nodes[i].node_id;
topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops;
topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled;
topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine;
}
return 0;
}
static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp,
int number_devices, struct psp_xgmi_topology_info *topology)
{
return 0;
struct ta_xgmi_shared_memory *xgmi_cmd;
struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
int i;
if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
return -EINVAL;
xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO;
topology_info_input->num_nodes = number_devices;
for (i = 0; i < topology_info_input->num_nodes; i++) {
topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled;
topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
}
/* Invoke xgmi ta to set topology information */
return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO);
}
static u64 psp_v11_0_xgmi_get_hive_id(struct psp_context *psp)
{
u64 hive_id = 0;
struct ta_xgmi_shared_memory *xgmi_cmd;
int ret;
/* Remove me when we can get correct hive_id through PSP */
if (psp->adev->gmc.xgmi.num_physical_nodes)
hive_id = 0x123456789abcdef;
xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
return hive_id;
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID;
/* Invoke xgmi ta to get hive id */
ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
if (ret)
return 0;
else
return xgmi_cmd->xgmi_out_message.get_hive_id.hive_id;
}
static u64 psp_v11_0_xgmi_get_node_id(struct psp_context *psp)
{
struct ta_xgmi_shared_memory *xgmi_cmd;
int ret;
xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID;
/* Invoke xgmi ta to get the node id */
ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
if (ret)
return 0;
else
return xgmi_cmd->xgmi_out_message.get_node_id.node_id;
}
static const struct psp_funcs psp_v11_0_funcs = {
@@ -587,6 +731,7 @@ static const struct psp_funcs psp_v11_0_funcs = {
.xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info,
.xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info,
.xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id,
.xgmi_get_node_id = psp_v11_0_xgmi_get_node_id,
};
void psp_v11_0_set_psp_funcs(struct psp_context *psp)

View File

@@ -194,7 +194,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
/* Copy PSP System Driver binary to memory */
memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
/* Provide the sys driver to bootrom */
/* Provide the sys driver to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = 1 << 16;
@@ -254,7 +254,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
/* Copy Secure OS binary to PSP memory */
memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
/* Provide the PSP secure OS to bootrom */
/* Provide the PSP secure OS to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = 2 << 16;
@@ -356,12 +356,9 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
enum psp_ring_type ring_type)
{
int ret = 0;
struct psp_ring *ring;
unsigned int psp_ring_reg = 0;
struct amdgpu_device *adev = psp->adev;
ring = &psp->km_ring;
/* Write the ring destroy command to C2PMSG_64 */
psp_ring_reg = 3 << 16;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
@@ -593,9 +590,9 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
}
/*send the mode 1 reset command*/
WREG32(offset, 0x70000);
WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST);
mdelay(1000);
msleep(500);
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);

View File

@@ -225,7 +225,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
int i;
for (i = 0; i < count; i++)
@@ -245,9 +245,12 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
* Schedule an IB in the DMA ring (VI).
*/
static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
@@ -349,8 +352,8 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
}
sdma0->ready = false;
sdma1->ready = false;
sdma0->sched.ready = false;
sdma1->sched.ready = false;
}
/**
@@ -471,17 +474,15 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
ring->ready = true;
ring->sched.ready = true;
}
sdma_v2_4_enable(adev, true);
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
@@ -550,21 +551,16 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
u64 gpu_addr;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
return r;
}
if (r)
goto error_free_wb;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
@@ -581,15 +577,11 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
amdgpu_device_wb_free(adev, index);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_wb:
amdgpu_device_wb_free(adev, index);
return r;
}
@@ -612,20 +604,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err0;
}
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
@@ -644,21 +632,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
else
r = -EINVAL;
}
err1:
amdgpu_ib_free(adev, &ib, NULL);
@@ -760,7 +743,7 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
*/
static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
u32 pad_count;
int i;
@@ -1105,8 +1088,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
u8 instance_id, queue_id;
DRM_ERROR("Illegal instruction in SDMA command stream\n");
schedule_work(&adev->reset_work);
instance_id = (entry->ring_id & 0x3) >> 0;
queue_id = (entry->ring_id & 0xc) >> 2;
if (instance_id <= 1 && queue_id == 0)
drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
return 0;
}

View File

@@ -399,7 +399,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
int i;
for (i = 0; i < count; i++)
@@ -419,9 +419,12 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
* Schedule an IB in the DMA ring (VI).
*/
static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
@@ -523,8 +526,8 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
}
sdma0->ready = false;
sdma1->ready = false;
sdma0->sched.ready = false;
sdma1->sched.ready = false;
}
/**
@@ -739,7 +742,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
ring->ready = true;
ring->sched.ready = true;
}
/* unhalt the MEs */
@@ -749,11 +752,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
@@ -822,21 +823,16 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
u64 gpu_addr;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
return r;
}
if (r)
goto error_free_wb;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
@@ -853,15 +849,11 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
amdgpu_device_wb_free(adev, index);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_wb:
amdgpu_device_wb_free(adev, index);
return r;
}
@@ -884,20 +876,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err0;
}
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
@@ -916,21 +904,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
else
r = -EINVAL;
}
err1:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
@@ -1031,7 +1014,7 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
*/
static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
u32 pad_count;
int i;
@@ -1163,7 +1146,7 @@ static int sdma_v3_0_sw_init(void *handle)
if (!amdgpu_sriov_vf(adev)) {
ring->use_doorbell = true;
ring->doorbell_index = (i == 0) ?
AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1;
adev->doorbell_index.sdma_engine0 : adev->doorbell_index.sdma_engine1;
} else {
ring->use_pollmem = true;
}
@@ -1440,8 +1423,14 @@ static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
u8 instance_id, queue_id;
DRM_ERROR("Illegal instruction in SDMA command stream\n");
schedule_work(&adev->reset_work);
instance_id = (entry->ring_id & 0x3) >> 0;
queue_id = (entry->ring_id & 0xc) >> 2;
if (instance_id <= 1 && queue_id == 0)
drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -61,9 +61,11 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
}
static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
* Pad as necessary with NOPs.
*/
@@ -122,7 +124,7 @@ static void si_dma_stop(struct amdgpu_device *adev)
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, false);
ring->ready = false;
ring->sched.ready = false;
}
}
@@ -175,13 +177,11 @@ static int si_dma_start(struct amdgpu_device *adev)
WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
ring->ready = true;
ring->sched.ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
@@ -209,21 +209,16 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
u64 gpu_addr;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ring_alloc(ring, 4);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
return r;
}
if (r)
goto error_free_wb;
amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1));
amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
@@ -238,15 +233,11 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
amdgpu_device_wb_free(adev, index);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_wb:
amdgpu_device_wb_free(adev, index);
return r;
}
@@ -269,20 +260,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err0;
}
ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1);
ib.ptr[1] = lower_32_bits(gpu_addr);
@@ -295,21 +282,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
else
r = -EINVAL;
}
err1:
amdgpu_ib_free(adev, &ib, NULL);
@@ -658,15 +640,6 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev,
return 0;
}
static int si_dma_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal instruction in SDMA command stream\n");
schedule_work(&adev->reset_work);
return 0;
}
static int si_dma_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
@@ -781,15 +754,10 @@ static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = {
.process = si_dma_process_trap_irq,
};
static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs = {
.process = si_dma_process_illegal_inst_irq,
};
static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
{
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs;
adev->sdma.illegal_inst_irq.funcs = &si_dma_illegal_inst_irq_funcs;
}
/**

View File

@@ -118,19 +118,6 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev)
return (wptr & adev->irq.ih.ptr_mask);
}
/**
* si_ih_prescreen_iv - prescreen an interrupt vector
*
* @adev: amdgpu_device pointer
*
* Returns true if the interrupt vector should be further processed.
*/
static bool si_ih_prescreen_iv(struct amdgpu_device *adev)
{
/* Process all interrupts */
return true;
}
static void si_ih_decode_iv(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
@@ -301,7 +288,6 @@ static const struct amd_ip_funcs si_ih_ip_funcs = {
static const struct amdgpu_ih_funcs si_ih_funcs = {
.get_wptr = si_ih_get_wptr,
.prescreen_iv = si_ih_prescreen_iv,
.decode_iv = si_ih_decode_iv,
.set_rptr = si_ih_set_rptr
};

View File

@@ -507,6 +507,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
return -EINVAL;
}
if (adev->asic_type == CHIP_VEGA20)
adev->gmc.xgmi.supported = true;
if (adev->flags & AMD_IS_APU)
adev->nbio_funcs = &nbio_v7_0_funcs;
else if (adev->asic_type == CHIP_VEGA20)
@@ -613,6 +616,24 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.flush_hdp = &soc15_flush_hdp,
.invalidate_hdp = &soc15_invalidate_hdp,
.need_full_reset = &soc15_need_full_reset,
.init_doorbell_index = &vega10_doorbell_index_init,
};
static const struct amdgpu_asic_funcs vega20_asic_funcs =
{
.read_disabled_bios = &soc15_read_disabled_bios,
.read_bios_from_rom = &soc15_read_bios_from_rom,
.read_register = &soc15_read_register,
.reset = &soc15_asic_reset,
.set_vga_state = &soc15_vga_set_state,
.get_xclk = &soc15_get_xclk,
.set_uvd_clocks = &soc15_set_uvd_clocks,
.set_vce_clocks = &soc15_set_vce_clocks,
.get_config_memsize = &soc15_get_config_memsize,
.flush_hdp = &soc15_flush_hdp,
.invalidate_hdp = &soc15_invalidate_hdp,
.need_full_reset = &soc15_need_full_reset,
.init_doorbell_index = &vega20_doorbell_index_init,
};
static int soc15_common_early_init(void *handle)
@@ -632,11 +653,11 @@ static int soc15_common_early_init(void *handle)
adev->se_cac_rreg = &soc15_se_cac_rreg;
adev->se_cac_wreg = &soc15_se_cac_wreg;
adev->asic_funcs = &soc15_asic_funcs;
adev->external_rev_id = 0xFF;
switch (adev->asic_type) {
case CHIP_VEGA10:
adev->asic_funcs = &soc15_asic_funcs;
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_RLC_LS |
@@ -660,6 +681,7 @@ static int soc15_common_early_init(void *handle)
adev->external_rev_id = 0x1;
break;
case CHIP_VEGA12:
adev->asic_funcs = &soc15_asic_funcs;
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_CGCG |
@@ -682,6 +704,7 @@ static int soc15_common_early_init(void *handle)
adev->external_rev_id = adev->rev_id + 0x14;
break;
case CHIP_VEGA20:
adev->asic_funcs = &vega20_asic_funcs;
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_CGCG |
@@ -704,6 +727,7 @@ static int soc15_common_early_init(void *handle)
adev->external_rev_id = adev->rev_id + 0x28;
break;
case CHIP_RAVEN:
adev->asic_funcs = &soc15_asic_funcs;
if (adev->rev_id >= 0x8)
adev->external_rev_id = adev->rev_id + 0x81;
else if (adev->pdev->device == 0x15d8)

View File

@@ -58,4 +58,6 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
int vega10_reg_base_init(struct amdgpu_device *adev);
int vega20_reg_base_init(struct amdgpu_device *adev);
void vega10_doorbell_index_init(struct amdgpu_device *adev);
void vega20_doorbell_index_init(struct amdgpu_device *adev);
#endif

View File

@@ -0,0 +1,130 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef _TA_XGMI_IF_H
#define _TA_XGMI_IF_H
/* Responses have bit 31 set */
#define RSP_ID_MASK (1U << 31)
#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
enum ta_command_xgmi {
TA_COMMAND_XGMI__INITIALIZE = 0x00,
TA_COMMAND_XGMI__GET_NODE_ID = 0x01,
TA_COMMAND_XGMI__GET_HIVE_ID = 0x02,
TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03,
TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04
};
/* XGMI related enumerations */
/**********************************************************/;
enum ta_xgmi_connected_nodes {
TA_XGMI__MAX_CONNECTED_NODES = 64
};
enum ta_xgmi_status {
TA_XGMI_STATUS__SUCCESS = 0x00,
TA_XGMI_STATUS__GENERIC_FAILURE = 0x01,
TA_XGMI_STATUS__NULL_POINTER = 0x02,
TA_XGMI_STATUS__INVALID_PARAMETER = 0x03,
TA_XGMI_STATUS__NOT_INITIALIZED = 0x04,
TA_XGMI_STATUS__INVALID_NODE_NUM = 0x05,
TA_XGMI_STATUS__INVALID_NODE_ID = 0x06,
TA_XGMI_STATUS__INVALID_TOPOLOGY = 0x07,
TA_XGMI_STATUS__FAILED_ID_GEN = 0x08,
TA_XGMI_STATUS__FAILED_TOPOLOGY_INIT = 0x09,
TA_XGMI_STATUS__SET_SHARING_ERROR = 0x0A
};
enum ta_xgmi_assigned_sdma_engine {
TA_XGMI_ASSIGNED_SDMA_ENGINE__NOT_ASSIGNED = -1,
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA0 = 0,
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA1 = 1,
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA2 = 2,
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA3 = 3,
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA4 = 4,
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA5 = 5
};
/* input/output structures for XGMI commands */
/**********************************************************/
struct ta_xgmi_node_info {
uint64_t node_id;
uint8_t num_hops;
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
};
struct ta_xgmi_cmd_initialize_output {
uint32_t status;
};
struct ta_xgmi_cmd_get_node_id_output {
uint64_t node_id;
};
struct ta_xgmi_cmd_get_hive_id_output {
uint64_t hive_id;
};
struct ta_xgmi_cmd_get_topology_info_input {
uint32_t num_nodes;
struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
};
struct ta_xgmi_cmd_get_topology_info_output {
uint32_t num_nodes;
struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
};
struct ta_xgmi_cmd_set_topology_info_input {
uint32_t num_nodes;
struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
};
/**********************************************************/
/* Common input structure for XGMI callbacks */
union ta_xgmi_cmd_input {
struct ta_xgmi_cmd_get_topology_info_input get_topology_info;
struct ta_xgmi_cmd_set_topology_info_input set_topology_info;
};
/* Common output structure for XGMI callbacks */
union ta_xgmi_cmd_output {
struct ta_xgmi_cmd_initialize_output initialize;
struct ta_xgmi_cmd_get_node_id_output get_node_id;
struct ta_xgmi_cmd_get_hive_id_output get_hive_id;
struct ta_xgmi_cmd_get_topology_info_output get_topology_info;
};
/**********************************************************/
struct ta_xgmi_shared_memory {
uint32_t cmd_id;
uint32_t resp_id;
enum ta_xgmi_status xgmi_status;
uint32_t reserved;
union ta_xgmi_cmd_input xgmi_in_message;
union ta_xgmi_cmd_output xgmi_out_message;
};
#endif //_TA_XGMI_IF_H

View File

@@ -218,34 +218,6 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev)
return (wptr & adev->irq.ih.ptr_mask);
}
/**
* tonga_ih_prescreen_iv - prescreen an interrupt vector
*
* @adev: amdgpu_device pointer
*
* Returns true if the interrupt vector should be further processed.
*/
static bool tonga_ih_prescreen_iv(struct amdgpu_device *adev)
{
u32 ring_index = adev->irq.ih.rptr >> 2;
u16 pasid;
switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
case 146:
case 147:
pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
return true;
break;
default:
/* Not a VM fault */
return true;
}
adev->irq.ih.rptr += 16;
return false;
}
/**
* tonga_ih_decode_iv - decode an interrupt vector
*
@@ -322,7 +294,7 @@ static int tonga_ih_sw_init(void *handle)
return r;
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = AMDGPU_DOORBELL_IH;
adev->irq.ih.doorbell_index = adev->doorbell_index.ih;
r = amdgpu_irq_init(adev);
@@ -506,7 +478,6 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = {
static const struct amdgpu_ih_funcs tonga_ih_funcs = {
.get_wptr = tonga_ih_get_wptr,
.prescreen_iv = tonga_ih_prescreen_iv,
.decode_iv = tonga_ih_decode_iv,
.set_rptr = tonga_ih_set_rptr
};

View File

@@ -116,16 +116,16 @@ static int uvd_v4_2_sw_init(void *handle)
if (r)
return r;
r = amdgpu_uvd_resume(adev);
if (r)
return r;
ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
if (r)
return r;
r = amdgpu_uvd_resume(adev);
if (r)
return r;
r = amdgpu_uvd_entity_init(adev);
return r;
@@ -162,12 +162,9 @@ static int uvd_v4_2_hw_init(void *handle)
uvd_v4_2_enable_mgcg(adev, true);
amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
r = amdgpu_ring_alloc(ring, 10);
if (r) {
@@ -218,7 +215,7 @@ static int uvd_v4_2_hw_fini(void *handle)
if (RREG32(mmUVD_STATUS) != 0)
uvd_v4_2_stop(adev);
ring->ready = false;
ring->sched.ready = false;
return 0;
}
@@ -484,11 +481,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
@@ -499,14 +494,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
@@ -519,8 +509,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
* Write ring commands to execute the indirect buffer
*/
static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
amdgpu_ring_write(ring, ib->gpu_addr);

View File

@@ -113,16 +113,16 @@ static int uvd_v5_0_sw_init(void *handle)
if (r)
return r;
r = amdgpu_uvd_resume(adev);
if (r)
return r;
ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
if (r)
return r;
r = amdgpu_uvd_resume(adev);
if (r)
return r;
r = amdgpu_uvd_entity_init(adev);
return r;
@@ -158,12 +158,9 @@ static int uvd_v5_0_hw_init(void *handle)
uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
uvd_v5_0_enable_mgcg(adev, true);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
r = amdgpu_ring_alloc(ring, 10);
if (r) {
@@ -215,7 +212,7 @@ static int uvd_v5_0_hw_fini(void *handle)
if (RREG32(mmUVD_STATUS) != 0)
uvd_v5_0_stop(adev);
ring->ready = false;
ring->sched.ready = false;
return 0;
}
@@ -500,11 +497,8 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
@@ -515,14 +509,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
@@ -535,8 +524,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
* Write ring commands to execute the indirect buffer
*/
static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));

View File

@@ -175,11 +175,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
int r;
r = amdgpu_ring_alloc(ring, 16);
if (r) {
DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
amdgpu_ring_commit(ring);
@@ -189,14 +186,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed\n",
ring->idx);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
}
return r;
}
@@ -336,31 +327,24 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
if (r)
goto error;
}
r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
if (r == 0)
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
} else {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
else if (r > 0)
r = 0;
}
error:
dma_fence_put(fence);
return r;
}
static int uvd_v6_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -416,16 +400,16 @@ static int uvd_v6_0_sw_init(void *handle)
DRM_INFO("UVD ENC is disabled\n");
}
r = amdgpu_uvd_resume(adev);
if (r)
return r;
ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
if (r)
return r;
r = amdgpu_uvd_resume(adev);
if (r)
return r;
if (uvd_v6_0_enc_support(adev)) {
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
ring = &adev->uvd.inst->ring_enc[i];
@@ -476,12 +460,9 @@ static int uvd_v6_0_hw_init(void *handle)
uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
uvd_v6_0_enable_mgcg(adev, true);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
r = amdgpu_ring_alloc(ring, 10);
if (r) {
@@ -513,12 +494,9 @@ static int uvd_v6_0_hw_init(void *handle)
if (uvd_v6_0_enc_support(adev)) {
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
ring = &adev->uvd.inst->ring_enc[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
}
}
@@ -548,7 +526,7 @@ static int uvd_v6_0_hw_fini(void *handle)
if (RREG32(mmUVD_STATUS) != 0)
uvd_v6_0_stop(adev);
ring->ready = false;
ring->sched.ready = false;
return 0;
}
@@ -969,11 +947,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
@@ -984,14 +960,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
@@ -1004,9 +975,12 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
* Write ring commands to execute the indirect buffer
*/
static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0));
amdgpu_ring_write(ring, vmid);
@@ -1027,8 +1001,12 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
* Write enc ring commands to execute the indirect buffer
*/
static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM);
amdgpu_ring_write(ring, vmid);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));

View File

@@ -183,11 +183,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
return 0;
r = amdgpu_ring_alloc(ring, 16);
if (r) {
DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n",
ring->me, ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
amdgpu_ring_commit(ring);
@@ -197,14 +194,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
ring->me, ring->idx, i);
} else {
DRM_ERROR("amdgpu: (%d)ring %d test failed\n",
ring->me, ring->idx);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
}
return r;
}
@@ -343,27 +334,19 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r);
if (r)
goto error;
}
r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me);
if (r == 0)
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r);
} else {
DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx);
else if (r > 0)
r = 0;
}
error:
dma_fence_put(fence);
return r;
@@ -447,10 +430,6 @@ static int uvd_v7_0_sw_init(void *handle)
DRM_INFO("PSP loading UVD firmware\n");
}
r = amdgpu_uvd_resume(adev);
if (r)
return r;
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
if (adev->uvd.harvest_config & (1 << j))
continue;
@@ -472,9 +451,9 @@ static int uvd_v7_0_sw_init(void *handle)
* sriov, so set unused location for other unused rings.
*/
if (i == 0)
ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring0_1 * 2;
else
ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1;
ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring2_3 * 2 + 1;
}
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
if (r)
@@ -482,6 +461,10 @@ static int uvd_v7_0_sw_init(void *handle)
}
}
r = amdgpu_uvd_resume(adev);
if (r)
return r;
r = amdgpu_uvd_entity_init(adev);
if (r)
return r;
@@ -540,12 +523,9 @@ static int uvd_v7_0_hw_init(void *handle)
ring = &adev->uvd.inst[j].ring;
if (!amdgpu_sriov_vf(adev)) {
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
r = amdgpu_ring_alloc(ring, 10);
if (r) {
@@ -582,12 +562,9 @@ static int uvd_v7_0_hw_init(void *handle)
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
ring = &adev->uvd.inst[j].ring_enc[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
}
}
done:
@@ -619,7 +596,7 @@ static int uvd_v7_0_hw_fini(void *handle)
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
if (adev->uvd.harvest_config & (1 << i))
continue;
adev->uvd.inst[i].ring.ready = false;
adev->uvd.inst[i].ring.sched.ready = false;
}
return 0;
@@ -1235,11 +1212,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n",
ring->me, ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
@@ -1251,14 +1226,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
ring->me, ring->idx, i);
} else {
DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n",
ring->me, ring->idx, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
@@ -1300,10 +1270,12 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
* Write ring commands to execute the indirect buffer
*/
static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0));
@@ -1329,8 +1301,12 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
* Write enc ring commands to execute the indirect buffer
*/
static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM);
amdgpu_ring_write(ring, vmid);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));

View File

@@ -463,15 +463,11 @@ static int vce_v2_0_hw_init(void *handle)
amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
vce_v2_0_enable_mgcg(adev, true, false);
for (i = 0; i < adev->vce.num_rings; i++)
adev->vce.ring[i].ready = false;
for (i = 0; i < adev->vce.num_rings; i++) {
r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
if (r)
return r;
else
adev->vce.ring[i].ready = true;
}
DRM_INFO("VCE initialized successfully.\n");

View File

@@ -37,7 +37,6 @@
#include "gca/gfx_8_0_d.h"
#include "smu/smu_7_1_2_d.h"
#include "smu/smu_7_1_2_sh_mask.h"
#include "gca/gfx_8_0_d.h"
#include "gca/gfx_8_0_sh_mask.h"
#include "ivsrcid/ivsrcid_vislands30.h"
@@ -474,15 +473,10 @@ static int vce_v3_0_hw_init(void *handle)
amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
for (i = 0; i < adev->vce.num_rings; i++)
adev->vce.ring[i].ready = false;
for (i = 0; i < adev->vce.num_rings; i++) {
r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
if (r)
return r;
else
adev->vce.ring[i].ready = true;
}
DRM_INFO("VCE initialized successfully.\n");
@@ -838,8 +832,12 @@ out:
}
static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring, VCE_CMD_IB_VM);
amdgpu_ring_write(ring, vmid);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));

Some files were not shown because too many files have changed in this diff Show More