Merge tag 'drm-next-2018-12-14' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Core: - shared fencing staging removal - drop transactional atomic helpers and move helpers to new location - DP/MST atomic cleanup - Leasing cleanups and drop EXPORT_SYMBOL - Convert drivers to atomic helpers and generic fbdev. - removed deprecated obj_ref/unref in favour of get/put - Improve dumb callback documentation - MODESET_LOCK_BEGIN/END helpers panels: - CDTech panels, Banana Pi Panel, DLC1010GIG, - Olimex LCD-O-LinuXino, Samsung S6D16D0, Truly NT35597 WQXGA, - Himax HX8357D, simulated RTSM AEMv8. - GPD Win2 panel - AUO G101EVN010 vgem: - render node support ttm: - move global init out of drivers - fix LRU handling for ghost objects - Support for simultaneous submissions to multiple engines scheduler: - timeout/fault handling changes to help GPU recovery - helpers for hw with preemption support i915: - Scaler/Watermark fixes - DP MST + powerwell fixes - PSR fixes - Break long get/put shmemfs pages - Icelake fixes - Icelake DSI video mode enablement - Engine workaround improvements amdgpu: - freesync support - GPU reset enabled on CI, VI, SOC15 dGPUs - ABM support in DC - KFD support for vega12/polaris12 - SDMA paging queue on vega - More amdkfd code sharing - DCC scanout on GFX9 - DC kerneldoc - Updated SMU firmware for GFX8 chips - XGMI PSP + hive reset support - GPU reset - DC trace support - Powerplay updates for newer Polaris - Cursor plane update fast path - kfd dma-buf support virtio-gpu: - add EDID support vmwgfx: - pageflip with damage support nouveau: - Initial Turing TU104/TU106 modesetting support msm: - a2xx gpu support for apq8060 and imx5 - a2xx gpummu support - mdp4 display support for apq8060 - DPU fixes and cleanups - enhanced profiling support - debug object naming interface - get_iova/page pinning decoupling tegra: - Tegra194 host1x, VIC and display support enabled - Audio over HDMI for Tegra186 and Tegra194 exynos: - DMA/IOMMU refactoring - plane alpha + blend mode support - Color format fixes for mixer driver rcar-du: - R8A7744 and R8A77470 support - R8A77965 LVDS support imx: - fbdev emulation fix - multi-tiled scalling fixes - SPDX identifiers rockchip - dw_hdmi support - dw-mipi-dsi + dual dsi support - mailbox read size fix qxl: - fix cursor pinning vc4: - YUV support (scaling + cursor) v3d: - enable TFU (Texture Formatting Unit) mali-dp: - add support for linear tiled formats sun4i: - Display Engine 3 support - H6 DE3 mixer 0 support - H6 display engine support - dw-hdmi support - H6 HDMI phy support - implicit fence waiting - BGRX8888 support meson: - Overlay plane support - implicit fence waiting - HDMI 1.4 4k modes bridge: - i2c fixes for sii902x" * tag 'drm-next-2018-12-14' of git://anongit.freedesktop.org/drm/drm: (1403 commits) drm/amd/display: Add fast path for cursor plane updates drm/amdgpu: Enable GPU recovery by default for CI drm/amd/display: Fix duplicating scaling/underscan connector state drm/amd/display: Fix unintialized max_bpc state values Revert "drm/amd/display: Set RMX_ASPECT as default" drm/amdgpu: Fix stub function name drm/msm/dpu: Fix clock issue after bind failure drm/msm/dpu: Clean up dpu_media_info.h static inline functions drm/msm/dpu: Further cleanups for static inline functions drm/msm/dpu: Cleanup the debugfs functions drm/msm/dpu: Remove dpu_irq and unused functions drm/msm: Make irq_postinstall optional drm/msm/dpu: Cleanup callers of dpu_hw_blk_init drm/msm/dpu: Remove unused functions drm/msm/dpu: Remove dpu_crtc_is_enabled() drm/msm/dpu: Remove dpu_crtc_get_mixer_height drm/msm/dpu: Remove dpu_dbg drm/msm: dpu: Remove crtc_lock drm/msm: dpu: Remove vblank_requested flag from dpu_crtc drm/msm: dpu: Separate crtc assignment from vblank enable ...
This commit is contained in:
@@ -53,7 +53,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
|
||||
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
|
||||
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
|
||||
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
|
||||
amdgpu_gmc.o amdgpu_xgmi.o
|
||||
amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o
|
||||
|
||||
# add asic specific block
|
||||
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
|
||||
@@ -105,6 +105,7 @@ amdgpu-y += \
|
||||
# add GFX block
|
||||
amdgpu-y += \
|
||||
amdgpu_gfx.o \
|
||||
amdgpu_rlc.o \
|
||||
gfx_v8_0.o \
|
||||
gfx_v9_0.o
|
||||
|
||||
|
@@ -75,11 +75,14 @@
|
||||
#include "amdgpu_sdma.h"
|
||||
#include "amdgpu_dm.h"
|
||||
#include "amdgpu_virt.h"
|
||||
#include "amdgpu_csa.h"
|
||||
#include "amdgpu_gart.h"
|
||||
#include "amdgpu_debugfs.h"
|
||||
#include "amdgpu_job.h"
|
||||
#include "amdgpu_bo_list.h"
|
||||
#include "amdgpu_gem.h"
|
||||
#include "amdgpu_doorbell.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
#define MAX_GPU_INSTANCE 16
|
||||
|
||||
@@ -161,6 +164,7 @@ extern int amdgpu_si_support;
|
||||
extern int amdgpu_cik_support;
|
||||
#endif
|
||||
|
||||
#define AMDGPU_VM_MAX_NUM_CTX 4096
|
||||
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
|
||||
#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
|
||||
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
|
||||
@@ -359,123 +363,6 @@ struct amdgpu_sa_bo {
|
||||
int amdgpu_fence_slab_init(void);
|
||||
void amdgpu_fence_slab_fini(void);
|
||||
|
||||
/*
|
||||
* GPU doorbell structures, functions & helpers
|
||||
*/
|
||||
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
|
||||
{
|
||||
AMDGPU_DOORBELL_KIQ = 0x000,
|
||||
AMDGPU_DOORBELL_HIQ = 0x001,
|
||||
AMDGPU_DOORBELL_DIQ = 0x002,
|
||||
AMDGPU_DOORBELL_MEC_RING0 = 0x010,
|
||||
AMDGPU_DOORBELL_MEC_RING1 = 0x011,
|
||||
AMDGPU_DOORBELL_MEC_RING2 = 0x012,
|
||||
AMDGPU_DOORBELL_MEC_RING3 = 0x013,
|
||||
AMDGPU_DOORBELL_MEC_RING4 = 0x014,
|
||||
AMDGPU_DOORBELL_MEC_RING5 = 0x015,
|
||||
AMDGPU_DOORBELL_MEC_RING6 = 0x016,
|
||||
AMDGPU_DOORBELL_MEC_RING7 = 0x017,
|
||||
AMDGPU_DOORBELL_GFX_RING0 = 0x020,
|
||||
AMDGPU_DOORBELL_sDMA_ENGINE0 = 0x1E0,
|
||||
AMDGPU_DOORBELL_sDMA_ENGINE1 = 0x1E1,
|
||||
AMDGPU_DOORBELL_IH = 0x1E8,
|
||||
AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF,
|
||||
AMDGPU_DOORBELL_INVALID = 0xFFFF
|
||||
} AMDGPU_DOORBELL_ASSIGNMENT;
|
||||
|
||||
struct amdgpu_doorbell {
|
||||
/* doorbell mmio */
|
||||
resource_size_t base;
|
||||
resource_size_t size;
|
||||
u32 __iomem *ptr;
|
||||
u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */
|
||||
};
|
||||
|
||||
/*
|
||||
* 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
|
||||
*/
|
||||
typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
|
||||
{
|
||||
/*
|
||||
* All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
|
||||
* a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
|
||||
* Compute related doorbells are allocated from 0x00 to 0x8a
|
||||
*/
|
||||
|
||||
|
||||
/* kernel scheduling */
|
||||
AMDGPU_DOORBELL64_KIQ = 0x00,
|
||||
|
||||
/* HSA interface queue and debug queue */
|
||||
AMDGPU_DOORBELL64_HIQ = 0x01,
|
||||
AMDGPU_DOORBELL64_DIQ = 0x02,
|
||||
|
||||
/* Compute engines */
|
||||
AMDGPU_DOORBELL64_MEC_RING0 = 0x03,
|
||||
AMDGPU_DOORBELL64_MEC_RING1 = 0x04,
|
||||
AMDGPU_DOORBELL64_MEC_RING2 = 0x05,
|
||||
AMDGPU_DOORBELL64_MEC_RING3 = 0x06,
|
||||
AMDGPU_DOORBELL64_MEC_RING4 = 0x07,
|
||||
AMDGPU_DOORBELL64_MEC_RING5 = 0x08,
|
||||
AMDGPU_DOORBELL64_MEC_RING6 = 0x09,
|
||||
AMDGPU_DOORBELL64_MEC_RING7 = 0x0a,
|
||||
|
||||
/* User queue doorbell range (128 doorbells) */
|
||||
AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b,
|
||||
AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a,
|
||||
|
||||
/* Graphics engine */
|
||||
AMDGPU_DOORBELL64_GFX_RING0 = 0x8b,
|
||||
|
||||
/*
|
||||
* Other graphics doorbells can be allocated here: from 0x8c to 0xdf
|
||||
* Graphics voltage island aperture 1
|
||||
* default non-graphics QWORD index is 0xe0 - 0xFF inclusive
|
||||
*/
|
||||
|
||||
/* sDMA engines reserved from 0xe0 -oxef */
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0,
|
||||
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1,
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8,
|
||||
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xE9,
|
||||
|
||||
/* For vega10 sriov, the sdma doorbell must be fixed as follow
|
||||
* to keep the same setting with host driver, or it will
|
||||
* happen conflicts
|
||||
*/
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 = 0xF0,
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 = 0xF2,
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
|
||||
|
||||
/* Interrupt handler */
|
||||
AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */
|
||||
AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */
|
||||
AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */
|
||||
|
||||
/* VCN engine use 32 bits doorbell */
|
||||
AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
|
||||
AMDGPU_DOORBELL64_VCN2_3 = 0xF9,
|
||||
AMDGPU_DOORBELL64_VCN4_5 = 0xFA,
|
||||
AMDGPU_DOORBELL64_VCN6_7 = 0xFB,
|
||||
|
||||
/* overlap the doorbell assignment with VCN as they are mutually exclusive
|
||||
* VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
|
||||
*/
|
||||
AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8,
|
||||
AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9,
|
||||
AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA,
|
||||
AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB,
|
||||
|
||||
AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC,
|
||||
AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD,
|
||||
AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE,
|
||||
AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF,
|
||||
|
||||
AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF,
|
||||
AMDGPU_DOORBELL64_INVALID = 0xFFFF
|
||||
} AMDGPU_DOORBELL64_ASSIGNMENT;
|
||||
|
||||
/*
|
||||
* IRQS.
|
||||
*/
|
||||
@@ -653,6 +540,8 @@ struct amdgpu_asic_funcs {
|
||||
struct amdgpu_ring *ring);
|
||||
/* check if the asic needs a full reset of if soft reset will work */
|
||||
bool (*need_full_reset)(struct amdgpu_device *adev);
|
||||
/* initialize doorbell layout for specific asic*/
|
||||
void (*init_doorbell_index)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -831,7 +720,6 @@ struct amdgpu_device {
|
||||
bool need_dma32;
|
||||
bool need_swiotlb;
|
||||
bool accel_working;
|
||||
struct work_struct reset_work;
|
||||
struct notifier_block acpi_nb;
|
||||
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
|
||||
struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
|
||||
@@ -976,6 +864,9 @@ struct amdgpu_device {
|
||||
/* GDS */
|
||||
struct amdgpu_gds gds;
|
||||
|
||||
/* KFD */
|
||||
struct amdgpu_kfd_dev kfd;
|
||||
|
||||
/* display related functionality */
|
||||
struct amdgpu_display_manager dm;
|
||||
|
||||
@@ -989,9 +880,6 @@ struct amdgpu_device {
|
||||
atomic64_t visible_pin_size;
|
||||
atomic64_t gart_pin_size;
|
||||
|
||||
/* amdkfd interface */
|
||||
struct kfd_dev *kfd;
|
||||
|
||||
/* soc15 register offset based on ip, instance and segment */
|
||||
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
|
||||
|
||||
@@ -1023,6 +911,10 @@ struct amdgpu_device {
|
||||
unsigned long last_mm_index;
|
||||
bool in_gpu_reset;
|
||||
struct mutex lock_reset;
|
||||
struct amdgpu_doorbell_index doorbell_index;
|
||||
|
||||
int asic_reset_res;
|
||||
struct work_struct xgmi_reset_work;
|
||||
};
|
||||
|
||||
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
|
||||
@@ -1047,11 +939,6 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
|
||||
u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
|
||||
void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
|
||||
|
||||
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
|
||||
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
|
||||
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
|
||||
void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
|
||||
|
||||
bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
|
||||
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
|
||||
|
||||
@@ -1113,11 +1000,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
||||
#define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg))
|
||||
#define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v))
|
||||
|
||||
#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
|
||||
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
|
||||
#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
|
||||
#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
|
||||
|
||||
#define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
|
||||
#define REG_FIELD_MASK(reg, field) reg##__##field##_MASK
|
||||
|
||||
@@ -1159,6 +1041,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
||||
#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
|
||||
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
|
||||
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
|
||||
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
|
||||
|
||||
/* Common functions */
|
||||
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
|
||||
@@ -1219,12 +1102,6 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
|
||||
long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
|
||||
unsigned long arg);
|
||||
|
||||
|
||||
/*
|
||||
* functions used by amdgpu_xgmi.c
|
||||
*/
|
||||
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
|
||||
|
||||
/*
|
||||
* functions used by amdgpu_encoder.c
|
||||
*/
|
||||
@@ -1252,6 +1129,9 @@ bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *ade
|
||||
int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
|
||||
u8 perf_req, bool advertise);
|
||||
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
|
||||
|
||||
void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
|
||||
struct amdgpu_dm_backlight_caps *caps);
|
||||
#else
|
||||
static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
|
||||
static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
|
||||
|
@@ -41,28 +41,21 @@ struct amdgpu_atif_notification_cfg {
|
||||
};
|
||||
|
||||
struct amdgpu_atif_notifications {
|
||||
bool display_switch;
|
||||
bool expansion_mode_change;
|
||||
bool thermal_state;
|
||||
bool forced_power_state;
|
||||
bool system_power_state;
|
||||
bool display_conf_change;
|
||||
bool px_gfx_switch;
|
||||
bool brightness_change;
|
||||
bool dgpu_display_event;
|
||||
bool gpu_package_power_limit;
|
||||
};
|
||||
|
||||
struct amdgpu_atif_functions {
|
||||
bool system_params;
|
||||
bool sbios_requests;
|
||||
bool select_active_disp;
|
||||
bool lid_state;
|
||||
bool get_tv_standard;
|
||||
bool set_tv_standard;
|
||||
bool get_panel_expansion_mode;
|
||||
bool set_panel_expansion_mode;
|
||||
bool temperature_change;
|
||||
bool graphics_device_types;
|
||||
bool query_backlight_transfer_characteristics;
|
||||
bool ready_to_undock;
|
||||
bool external_gpu_information;
|
||||
};
|
||||
|
||||
struct amdgpu_atif {
|
||||
@@ -72,6 +65,7 @@ struct amdgpu_atif {
|
||||
struct amdgpu_atif_functions functions;
|
||||
struct amdgpu_atif_notification_cfg notification_cfg;
|
||||
struct amdgpu_encoder *encoder_for_bl;
|
||||
struct amdgpu_dm_backlight_caps backlight_caps;
|
||||
};
|
||||
|
||||
/* Call the ATIF method
|
||||
@@ -137,15 +131,12 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
|
||||
*/
|
||||
static void amdgpu_atif_parse_notification(struct amdgpu_atif_notifications *n, u32 mask)
|
||||
{
|
||||
n->display_switch = mask & ATIF_DISPLAY_SWITCH_REQUEST_SUPPORTED;
|
||||
n->expansion_mode_change = mask & ATIF_EXPANSION_MODE_CHANGE_REQUEST_SUPPORTED;
|
||||
n->thermal_state = mask & ATIF_THERMAL_STATE_CHANGE_REQUEST_SUPPORTED;
|
||||
n->forced_power_state = mask & ATIF_FORCED_POWER_STATE_CHANGE_REQUEST_SUPPORTED;
|
||||
n->system_power_state = mask & ATIF_SYSTEM_POWER_SOURCE_CHANGE_REQUEST_SUPPORTED;
|
||||
n->display_conf_change = mask & ATIF_DISPLAY_CONF_CHANGE_REQUEST_SUPPORTED;
|
||||
n->px_gfx_switch = mask & ATIF_PX_GFX_SWITCH_REQUEST_SUPPORTED;
|
||||
n->brightness_change = mask & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST_SUPPORTED;
|
||||
n->dgpu_display_event = mask & ATIF_DGPU_DISPLAY_EVENT_SUPPORTED;
|
||||
n->gpu_package_power_limit = mask & ATIF_GPU_PACKAGE_POWER_LIMIT_REQUEST_SUPPORTED;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -162,14 +153,11 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
|
||||
{
|
||||
f->system_params = mask & ATIF_GET_SYSTEM_PARAMETERS_SUPPORTED;
|
||||
f->sbios_requests = mask & ATIF_GET_SYSTEM_BIOS_REQUESTS_SUPPORTED;
|
||||
f->select_active_disp = mask & ATIF_SELECT_ACTIVE_DISPLAYS_SUPPORTED;
|
||||
f->lid_state = mask & ATIF_GET_LID_STATE_SUPPORTED;
|
||||
f->get_tv_standard = mask & ATIF_GET_TV_STANDARD_FROM_CMOS_SUPPORTED;
|
||||
f->set_tv_standard = mask & ATIF_SET_TV_STANDARD_IN_CMOS_SUPPORTED;
|
||||
f->get_panel_expansion_mode = mask & ATIF_GET_PANEL_EXPANSION_MODE_FROM_CMOS_SUPPORTED;
|
||||
f->set_panel_expansion_mode = mask & ATIF_SET_PANEL_EXPANSION_MODE_IN_CMOS_SUPPORTED;
|
||||
f->temperature_change = mask & ATIF_TEMPERATURE_CHANGE_NOTIFICATION_SUPPORTED;
|
||||
f->graphics_device_types = mask & ATIF_GET_GRAPHICS_DEVICE_TYPES_SUPPORTED;
|
||||
f->query_backlight_transfer_characteristics =
|
||||
mask & ATIF_QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS_SUPPORTED;
|
||||
f->ready_to_undock = mask & ATIF_READY_TO_UNDOCK_NOTIFICATION_SUPPORTED;
|
||||
f->external_gpu_information = mask & ATIF_GET_EXTERNAL_GPU_INFORMATION_SUPPORTED;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -310,6 +298,65 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_atif_query_backlight_caps - get min and max backlight input signal
|
||||
*
|
||||
* @handle: acpi handle
|
||||
*
|
||||
* Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function
|
||||
* to determine the acceptable range of backlight values
|
||||
*
|
||||
* Backlight_caps.caps_valid will be set to true if the query is successful
|
||||
*
|
||||
* The input signals are in range 0-255
|
||||
*
|
||||
* This function assumes the display with backlight is the first LCD
|
||||
*
|
||||
* Returns 0 on success, error on failure.
|
||||
*/
|
||||
static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
|
||||
{
|
||||
union acpi_object *info;
|
||||
struct atif_qbtc_output characteristics;
|
||||
struct atif_qbtc_arguments arguments;
|
||||
struct acpi_buffer params;
|
||||
size_t size;
|
||||
int err = 0;
|
||||
|
||||
arguments.size = sizeof(arguments);
|
||||
arguments.requested_display = ATIF_QBTC_REQUEST_LCD1;
|
||||
|
||||
params.length = sizeof(arguments);
|
||||
params.pointer = (void *)&arguments;
|
||||
|
||||
info = amdgpu_atif_call(atif,
|
||||
ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS,
|
||||
¶ms);
|
||||
if (!info) {
|
||||
err = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
size = *(u16 *) info->buffer.pointer;
|
||||
if (size < 10) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(&characteristics, 0, sizeof(characteristics));
|
||||
size = min(sizeof(characteristics), size);
|
||||
memcpy(&characteristics, info->buffer.pointer, size);
|
||||
|
||||
atif->backlight_caps.caps_valid = true;
|
||||
atif->backlight_caps.min_input_signal =
|
||||
characteristics.min_input_signal;
|
||||
atif->backlight_caps.max_input_signal =
|
||||
characteristics.max_input_signal;
|
||||
out:
|
||||
kfree(info);
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_atif_get_sbios_requests - get requested sbios event
|
||||
*
|
||||
@@ -799,6 +846,17 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
|
||||
}
|
||||
}
|
||||
|
||||
if (atif->functions.query_backlight_transfer_characteristics) {
|
||||
ret = amdgpu_atif_query_backlight_caps(atif);
|
||||
if (ret) {
|
||||
DRM_DEBUG_DRIVER("Call to QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS failed: %d\n",
|
||||
ret);
|
||||
atif->backlight_caps.caps_valid = false;
|
||||
}
|
||||
} else {
|
||||
atif->backlight_caps.caps_valid = false;
|
||||
}
|
||||
|
||||
out:
|
||||
adev->acpi_nb.notifier_call = amdgpu_acpi_event;
|
||||
register_acpi_notifier(&adev->acpi_nb);
|
||||
@@ -806,6 +864,18 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
|
||||
struct amdgpu_dm_backlight_caps *caps)
|
||||
{
|
||||
if (!adev->atif) {
|
||||
caps->caps_valid = false;
|
||||
return;
|
||||
}
|
||||
caps->caps_valid = adev->atif->backlight_caps.caps_valid;
|
||||
caps->min_input_signal = adev->atif->backlight_caps.min_input_signal;
|
||||
caps->max_input_signal = adev->atif->backlight_caps.max_input_signal;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_acpi_fini - tear down driver acpi support
|
||||
*
|
||||
@@ -816,6 +886,5 @@ out:
|
||||
void amdgpu_acpi_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
unregister_acpi_notifier(&adev->acpi_nb);
|
||||
if (adev->atif)
|
||||
kfree(adev->atif);
|
||||
kfree(adev->atif);
|
||||
}
|
||||
|
@@ -26,15 +26,26 @@
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
#include <linux/module.h>
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
const struct kgd2kfd_calls *kgd2kfd;
|
||||
|
||||
static const unsigned int compute_vmid_bitmap = 0xFF00;
|
||||
|
||||
/* Total memory size in system memory and all GPU VRAM. Used to
|
||||
* estimate worst case amount of memory to reserve for page tables
|
||||
*/
|
||||
uint64_t amdgpu_amdkfd_total_mem_size;
|
||||
|
||||
int amdgpu_amdkfd_init(void)
|
||||
{
|
||||
struct sysinfo si;
|
||||
int ret;
|
||||
|
||||
si_meminfo(&si);
|
||||
amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh;
|
||||
amdgpu_amdkfd_total_mem_size *= si.mem_unit;
|
||||
|
||||
#ifdef CONFIG_HSA_AMD
|
||||
ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
|
||||
if (ret)
|
||||
@@ -73,9 +84,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_RAVEN:
|
||||
kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
|
||||
@@ -85,8 +98,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
|
||||
return;
|
||||
}
|
||||
|
||||
adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
|
||||
adev->pdev, kfd2kgd);
|
||||
adev->kfd.dev = kgd2kfd->probe((struct kgd_dev *)adev,
|
||||
adev->pdev, kfd2kgd);
|
||||
|
||||
if (adev->kfd.dev)
|
||||
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -126,7 +142,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int i, n;
|
||||
int last_valid_bit;
|
||||
if (adev->kfd) {
|
||||
|
||||
if (adev->kfd.dev) {
|
||||
struct kgd2kfd_shared_resources gpu_resources = {
|
||||
.compute_vmid_bitmap = compute_vmid_bitmap,
|
||||
.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
|
||||
@@ -144,7 +161,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
||||
KGD_MAX_QUEUES);
|
||||
|
||||
/* remove the KIQ bit as well */
|
||||
if (adev->gfx.kiq.ring.ready)
|
||||
if (adev->gfx.kiq.ring.sched.ready)
|
||||
clear_bit(amdgpu_gfx_queue_to_bit(adev,
|
||||
adev->gfx.kiq.ring.me - 1,
|
||||
adev->gfx.kiq.ring.pipe,
|
||||
@@ -165,7 +182,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
||||
&gpu_resources.doorbell_start_offset);
|
||||
|
||||
if (adev->asic_type < CHIP_VEGA10) {
|
||||
kgd2kfd->device_init(adev->kfd, &gpu_resources);
|
||||
kgd2kfd->device_init(adev->kfd.dev, &gpu_resources);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -179,25 +196,14 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
||||
* process in case of 64-bit doorbells so we
|
||||
* can use each doorbell assignment twice.
|
||||
*/
|
||||
if (adev->asic_type == CHIP_VEGA10) {
|
||||
gpu_resources.sdma_doorbell[0][i] =
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[0][i+1] =
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i] =
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i+1] =
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
|
||||
} else {
|
||||
gpu_resources.sdma_doorbell[0][i] =
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[0][i+1] =
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i] =
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i+1] =
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
|
||||
}
|
||||
gpu_resources.sdma_doorbell[0][i] =
|
||||
adev->doorbell_index.sdma_engine0 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[0][i+1] =
|
||||
adev->doorbell_index.sdma_engine0 + 0x200 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i] =
|
||||
adev->doorbell_index.sdma_engine1 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i+1] =
|
||||
adev->doorbell_index.sdma_engine1 + 0x200 + (i >> 1);
|
||||
}
|
||||
/* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
|
||||
* SDMA, IH and VCN. So don't use them for the CP.
|
||||
@@ -205,37 +211,37 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
||||
gpu_resources.reserved_doorbell_mask = 0x1e0;
|
||||
gpu_resources.reserved_doorbell_val = 0x0e0;
|
||||
|
||||
kgd2kfd->device_init(adev->kfd, &gpu_resources);
|
||||
kgd2kfd->device_init(adev->kfd.dev, &gpu_resources);
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->kfd) {
|
||||
kgd2kfd->device_exit(adev->kfd);
|
||||
adev->kfd = NULL;
|
||||
if (adev->kfd.dev) {
|
||||
kgd2kfd->device_exit(adev->kfd.dev);
|
||||
adev->kfd.dev = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
|
||||
const void *ih_ring_entry)
|
||||
{
|
||||
if (adev->kfd)
|
||||
kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
|
||||
if (adev->kfd.dev)
|
||||
kgd2kfd->interrupt(adev->kfd.dev, ih_ring_entry);
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->kfd)
|
||||
kgd2kfd->suspend(adev->kfd);
|
||||
if (adev->kfd.dev)
|
||||
kgd2kfd->suspend(adev->kfd.dev);
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
if (adev->kfd)
|
||||
r = kgd2kfd->resume(adev->kfd);
|
||||
if (adev->kfd.dev)
|
||||
r = kgd2kfd->resume(adev->kfd.dev);
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -244,8 +250,8 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
if (adev->kfd)
|
||||
r = kgd2kfd->pre_reset(adev->kfd);
|
||||
if (adev->kfd.dev)
|
||||
r = kgd2kfd->pre_reset(adev->kfd.dev);
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -254,8 +260,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
if (adev->kfd)
|
||||
r = kgd2kfd->post_reset(adev->kfd);
|
||||
if (adev->kfd.dev)
|
||||
r = kgd2kfd->post_reset(adev->kfd.dev);
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -268,9 +274,9 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
|
||||
amdgpu_device_gpu_recover(adev, NULL);
|
||||
}
|
||||
|
||||
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
||||
void **mem_obj, uint64_t *gpu_addr,
|
||||
void **cpu_ptr, bool mqd_gfx9)
|
||||
int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
||||
void **mem_obj, uint64_t *gpu_addr,
|
||||
void **cpu_ptr, bool mqd_gfx9)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct amdgpu_bo *bo = NULL;
|
||||
@@ -340,7 +346,7 @@ allocate_mem_reserve_bo_failed:
|
||||
return r;
|
||||
}
|
||||
|
||||
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
|
||||
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
|
||||
{
|
||||
struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
|
||||
|
||||
@@ -351,8 +357,8 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
|
||||
amdgpu_bo_unref(&(bo));
|
||||
}
|
||||
|
||||
void get_local_mem_info(struct kgd_dev *kgd,
|
||||
struct kfd_local_mem_info *mem_info)
|
||||
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
|
||||
struct kfd_local_mem_info *mem_info)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
|
||||
@@ -383,7 +389,7 @@ void get_local_mem_info(struct kgd_dev *kgd,
|
||||
mem_info->mem_clk_max = 100;
|
||||
}
|
||||
|
||||
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
|
||||
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
|
||||
@@ -392,7 +398,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
|
||||
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
|
||||
@@ -405,7 +411,7 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
|
||||
return 100;
|
||||
}
|
||||
|
||||
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
|
||||
void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
|
||||
@@ -428,6 +434,62 @@ void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
|
||||
cu_info->lds_size = acu_info.lds_size;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
|
||||
struct kgd_dev **dma_buf_kgd,
|
||||
uint64_t *bo_size, void *metadata_buffer,
|
||||
size_t buffer_size, uint32_t *metadata_size,
|
||||
uint32_t *flags)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct dma_buf *dma_buf;
|
||||
struct drm_gem_object *obj;
|
||||
struct amdgpu_bo *bo;
|
||||
uint64_t metadata_flags;
|
||||
int r = -EINVAL;
|
||||
|
||||
dma_buf = dma_buf_get(dma_buf_fd);
|
||||
if (IS_ERR(dma_buf))
|
||||
return PTR_ERR(dma_buf);
|
||||
|
||||
if (dma_buf->ops != &amdgpu_dmabuf_ops)
|
||||
/* Can't handle non-graphics buffers */
|
||||
goto out_put;
|
||||
|
||||
obj = dma_buf->priv;
|
||||
if (obj->dev->driver != adev->ddev->driver)
|
||||
/* Can't handle buffers from different drivers */
|
||||
goto out_put;
|
||||
|
||||
adev = obj->dev->dev_private;
|
||||
bo = gem_to_amdgpu_bo(obj);
|
||||
if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
|
||||
AMDGPU_GEM_DOMAIN_GTT)))
|
||||
/* Only VRAM and GTT BOs are supported */
|
||||
goto out_put;
|
||||
|
||||
r = 0;
|
||||
if (dma_buf_kgd)
|
||||
*dma_buf_kgd = (struct kgd_dev *)adev;
|
||||
if (bo_size)
|
||||
*bo_size = amdgpu_bo_size(bo);
|
||||
if (metadata_size)
|
||||
*metadata_size = bo->metadata_size;
|
||||
if (metadata_buffer)
|
||||
r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
|
||||
metadata_size, &metadata_flags);
|
||||
if (flags) {
|
||||
*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
|
||||
ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT;
|
||||
|
||||
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
|
||||
*flags |= ALLOC_MEM_FLAGS_PUBLIC;
|
||||
}
|
||||
|
||||
out_put:
|
||||
dma_buf_put(dma_buf);
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
@@ -510,7 +572,7 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
|
||||
|
||||
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
|
||||
{
|
||||
if (adev->kfd) {
|
||||
if (adev->kfd.dev) {
|
||||
if ((1 << vmid) & compute_vmid_bitmap)
|
||||
return true;
|
||||
}
|
||||
@@ -524,7 +586,7 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
|
||||
return false;
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
|
||||
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
|
||||
{
|
||||
}
|
||||
|
||||
|
@@ -27,7 +27,6 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <kgd_kfd_interface.h>
|
||||
#include <drm/ttm/ttm_execbuf_util.h>
|
||||
@@ -35,6 +34,7 @@
|
||||
#include "amdgpu_vm.h"
|
||||
|
||||
extern const struct kgd2kfd_calls *kgd2kfd;
|
||||
extern uint64_t amdgpu_amdkfd_total_mem_size;
|
||||
|
||||
struct amdgpu_device;
|
||||
|
||||
@@ -77,6 +77,11 @@ struct amdgpu_amdkfd_fence {
|
||||
char timeline_name[TASK_COMM_LEN];
|
||||
};
|
||||
|
||||
struct amdgpu_kfd_dev {
|
||||
struct kfd_dev *dev;
|
||||
uint64_t vram_used;
|
||||
};
|
||||
|
||||
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
|
||||
struct mm_struct *mm);
|
||||
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
|
||||
@@ -134,16 +139,21 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
|
||||
void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
|
||||
|
||||
/* Shared API */
|
||||
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
||||
void **mem_obj, uint64_t *gpu_addr,
|
||||
void **cpu_ptr, bool mqd_gfx9);
|
||||
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
|
||||
void get_local_mem_info(struct kgd_dev *kgd,
|
||||
struct kfd_local_mem_info *mem_info);
|
||||
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
|
||||
int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
||||
void **mem_obj, uint64_t *gpu_addr,
|
||||
void **cpu_ptr, bool mqd_gfx9);
|
||||
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
|
||||
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
|
||||
struct kfd_local_mem_info *mem_info);
|
||||
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd);
|
||||
|
||||
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
|
||||
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
|
||||
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
|
||||
void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
|
||||
int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
|
||||
struct kgd_dev **dmabuf_kgd,
|
||||
uint64_t *bo_size, void *metadata_buffer,
|
||||
size_t buffer_size, uint32_t *metadata_size,
|
||||
uint32_t *flags);
|
||||
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
|
||||
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
|
||||
|
||||
@@ -195,7 +205,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
|
||||
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
|
||||
struct kfd_vm_fault_info *info);
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
|
||||
struct dma_buf *dmabuf,
|
||||
uint64_t va, void *vm,
|
||||
struct kgd_mem **mem, uint64_t *size,
|
||||
uint64_t *mmap_offset);
|
||||
|
||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
|
||||
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
|
||||
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
|
||||
|
||||
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
|
||||
|
@@ -23,6 +23,7 @@
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <drm/drmP.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
@@ -173,13 +174,6 @@ static int get_tile_config(struct kgd_dev *kgd,
|
||||
}
|
||||
|
||||
static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.init_gtt_mem_allocation = alloc_gtt_mem,
|
||||
.free_gtt_mem = free_gtt_mem,
|
||||
.get_local_mem_info = get_local_mem_info,
|
||||
.get_gpu_clock_counter = get_gpu_clock_counter,
|
||||
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
|
||||
.alloc_pasid = amdgpu_pasid_alloc,
|
||||
.free_pasid = amdgpu_pasid_free,
|
||||
.program_sh_mem_settings = kgd_program_sh_mem_settings,
|
||||
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
|
||||
.init_interrupts = kgd_init_interrupts,
|
||||
@@ -200,28 +194,10 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.get_fw_version = get_fw_version,
|
||||
.set_scratch_backing_va = set_scratch_backing_va,
|
||||
.get_tile_config = get_tile_config,
|
||||
.get_cu_info = get_cu_info,
|
||||
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
|
||||
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
|
||||
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
|
||||
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
|
||||
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
|
||||
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
|
||||
.set_vm_context_page_table_base = set_vm_context_page_table_base,
|
||||
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
|
||||
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
|
||||
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
|
||||
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
|
||||
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
|
||||
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
|
||||
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
|
||||
.invalidate_tlbs = invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
|
||||
.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
|
||||
.gpu_recover = amdgpu_amdkfd_gpu_reset,
|
||||
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
|
||||
};
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
|
||||
|
@@ -24,6 +24,7 @@
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <drm/drmP.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
@@ -128,13 +129,6 @@ static int get_tile_config(struct kgd_dev *kgd,
|
||||
}
|
||||
|
||||
static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.init_gtt_mem_allocation = alloc_gtt_mem,
|
||||
.free_gtt_mem = free_gtt_mem,
|
||||
.get_local_mem_info = get_local_mem_info,
|
||||
.get_gpu_clock_counter = get_gpu_clock_counter,
|
||||
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
|
||||
.alloc_pasid = amdgpu_pasid_alloc,
|
||||
.free_pasid = amdgpu_pasid_free,
|
||||
.program_sh_mem_settings = kgd_program_sh_mem_settings,
|
||||
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
|
||||
.init_interrupts = kgd_init_interrupts,
|
||||
@@ -157,27 +151,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.get_fw_version = get_fw_version,
|
||||
.set_scratch_backing_va = set_scratch_backing_va,
|
||||
.get_tile_config = get_tile_config,
|
||||
.get_cu_info = get_cu_info,
|
||||
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
|
||||
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
|
||||
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
|
||||
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
|
||||
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
|
||||
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
|
||||
.set_vm_context_page_table_base = set_vm_context_page_table_base,
|
||||
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
|
||||
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
|
||||
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
|
||||
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
|
||||
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
|
||||
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
|
||||
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
|
||||
.invalidate_tlbs = invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
|
||||
.gpu_recover = amdgpu_amdkfd_gpu_reset,
|
||||
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
|
||||
};
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
|
||||
|
@@ -26,6 +26,7 @@
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <drm/drmP.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
@@ -46,38 +47,9 @@
|
||||
#include "v9_structs.h"
|
||||
#include "soc15.h"
|
||||
#include "soc15d.h"
|
||||
#include "mmhub_v1_0.h"
|
||||
#include "gfxhub_v1_0.h"
|
||||
|
||||
/* HACK: MMHUB and GC both have VM-related register with the same
|
||||
* names but different offsets. Define the MMHUB register we need here
|
||||
* with a prefix. A proper solution would be to move the functions
|
||||
* programming these registers into gfx_v9_0.c and mmhub_v1_0.c
|
||||
* respectively.
|
||||
*/
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0
|
||||
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0
|
||||
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0
|
||||
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0
|
||||
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0
|
||||
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0
|
||||
|
||||
#define V9_PIPE_PER_MEC (4)
|
||||
#define V9_QUEUES_PER_PIPE_MEC (8)
|
||||
@@ -167,13 +139,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
|
||||
}
|
||||
|
||||
static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.init_gtt_mem_allocation = alloc_gtt_mem,
|
||||
.free_gtt_mem = free_gtt_mem,
|
||||
.get_local_mem_info = get_local_mem_info,
|
||||
.get_gpu_clock_counter = get_gpu_clock_counter,
|
||||
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
|
||||
.alloc_pasid = amdgpu_pasid_alloc,
|
||||
.free_pasid = amdgpu_pasid_free,
|
||||
.program_sh_mem_settings = kgd_program_sh_mem_settings,
|
||||
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
|
||||
.init_interrupts = kgd_init_interrupts,
|
||||
@@ -196,26 +161,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.get_fw_version = get_fw_version,
|
||||
.set_scratch_backing_va = set_scratch_backing_va,
|
||||
.get_tile_config = amdgpu_amdkfd_get_tile_config,
|
||||
.get_cu_info = get_cu_info,
|
||||
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
|
||||
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
|
||||
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
|
||||
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
|
||||
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
|
||||
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
|
||||
.set_vm_context_page_table_base = set_vm_context_page_table_base,
|
||||
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
|
||||
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
|
||||
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
|
||||
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
|
||||
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
|
||||
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
|
||||
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
|
||||
.invalidate_tlbs = invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||
.gpu_recover = amdgpu_amdkfd_gpu_reset,
|
||||
.set_compute_idle = amdgpu_amdkfd_set_compute_idle,
|
||||
.get_hive_id = amdgpu_amdkfd_get_hive_id,
|
||||
};
|
||||
|
||||
@@ -785,15 +733,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
|
||||
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
uint32_t req = (1 << vmid) |
|
||||
(0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
|
||||
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
|
||||
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
|
||||
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
|
||||
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
|
||||
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
|
||||
|
||||
mutex_lock(&adev->srbm_mutex);
|
||||
|
||||
/* Use legacy mode tlb invalidation.
|
||||
*
|
||||
@@ -810,34 +749,7 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
|
||||
* TODO 2: support range-based invalidation, requires kfg2kgd
|
||||
* interface change
|
||||
*/
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
|
||||
0xffffffff);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
|
||||
0x0000001f);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
|
||||
mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
|
||||
0xffffffff);
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
|
||||
mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
|
||||
0x0000001f);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
|
||||
req);
|
||||
|
||||
while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
|
||||
(1 << vmid)))
|
||||
cpu_relax();
|
||||
|
||||
while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
|
||||
mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
|
||||
(1 << vmid)))
|
||||
cpu_relax();
|
||||
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
|
||||
}
|
||||
|
||||
static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
|
||||
@@ -876,7 +788,7 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
||||
if (adev->in_gpu_reset)
|
||||
return -EIO;
|
||||
|
||||
if (ring->ready)
|
||||
if (ring->sched.ready)
|
||||
return invalidate_tlbs_with_kiq(adev, pasid);
|
||||
|
||||
for (vmid = 0; vmid < 16; vmid++) {
|
||||
@@ -1016,7 +928,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
||||
uint64_t page_table_base)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
uint64_t base = page_table_base | AMDGPU_PTE_VALID;
|
||||
|
||||
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
|
||||
pr_err("trying to set page table base for wrong VMID %u\n",
|
||||
@@ -1028,25 +939,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
||||
* now, all processes share the same address space size, like
|
||||
* on GFX8 and older.
|
||||
*/
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
|
||||
mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
|
||||
lower_32_bits(adev->vm_manager.max_pfn - 1));
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
|
||||
upper_32_bits(adev->vm_manager.max_pfn - 1));
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
|
||||
lower_32_bits(adev->vm_manager.max_pfn - 1));
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
|
||||
upper_32_bits(adev->vm_manager.max_pfn - 1));
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
|
||||
gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
|
||||
}
|
||||
|
@@ -25,6 +25,7 @@
|
||||
#include <linux/list.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <drm/drmP.h>
|
||||
#include "amdgpu_object.h"
|
||||
#include "amdgpu_vm.h"
|
||||
@@ -46,9 +47,9 @@
|
||||
/* Impose limit on how much memory KFD can use */
|
||||
static struct {
|
||||
uint64_t max_system_mem_limit;
|
||||
uint64_t max_userptr_mem_limit;
|
||||
uint64_t max_ttm_mem_limit;
|
||||
int64_t system_mem_used;
|
||||
int64_t userptr_mem_used;
|
||||
int64_t ttm_mem_used;
|
||||
spinlock_t mem_limit_lock;
|
||||
} kfd_mem_limit;
|
||||
|
||||
@@ -90,8 +91,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
|
||||
}
|
||||
|
||||
/* Set memory usage limits. Current, limits are
|
||||
* System (kernel) memory - 3/8th System RAM
|
||||
* Userptr memory - 3/4th System RAM
|
||||
* System (TTM + userptr) memory - 3/4th System RAM
|
||||
* TTM memory - 3/8th System RAM
|
||||
*/
|
||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
|
||||
{
|
||||
@@ -103,48 +104,61 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
|
||||
mem *= si.mem_unit;
|
||||
|
||||
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
|
||||
kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
|
||||
kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
|
||||
pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
|
||||
kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
|
||||
kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
|
||||
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
|
||||
(kfd_mem_limit.max_system_mem_limit >> 20),
|
||||
(kfd_mem_limit.max_userptr_mem_limit >> 20));
|
||||
(kfd_mem_limit.max_ttm_mem_limit >> 20));
|
||||
}
|
||||
|
||||
static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 domain)
|
||||
static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 domain, bool sg)
|
||||
{
|
||||
size_t acc_size;
|
||||
size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
|
||||
uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9;
|
||||
int ret = 0;
|
||||
|
||||
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
|
||||
sizeof(struct amdgpu_bo));
|
||||
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
vram_needed = 0;
|
||||
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
|
||||
if (kfd_mem_limit.system_mem_used + (acc_size + size) >
|
||||
kfd_mem_limit.max_system_mem_limit) {
|
||||
ret = -ENOMEM;
|
||||
goto err_no_mem;
|
||||
}
|
||||
kfd_mem_limit.system_mem_used += (acc_size + size);
|
||||
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
|
||||
if ((kfd_mem_limit.system_mem_used + acc_size >
|
||||
kfd_mem_limit.max_system_mem_limit) ||
|
||||
(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
|
||||
kfd_mem_limit.max_userptr_mem_limit)) {
|
||||
ret = -ENOMEM;
|
||||
goto err_no_mem;
|
||||
}
|
||||
kfd_mem_limit.system_mem_used += acc_size;
|
||||
kfd_mem_limit.userptr_mem_used += size;
|
||||
/* TTM GTT memory */
|
||||
system_mem_needed = acc_size + size;
|
||||
ttm_mem_needed = acc_size + size;
|
||||
} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
|
||||
/* Userptr */
|
||||
system_mem_needed = acc_size + size;
|
||||
ttm_mem_needed = acc_size;
|
||||
} else {
|
||||
/* VRAM and SG */
|
||||
system_mem_needed = acc_size;
|
||||
ttm_mem_needed = acc_size;
|
||||
if (domain == AMDGPU_GEM_DOMAIN_VRAM)
|
||||
vram_needed = size;
|
||||
}
|
||||
err_no_mem:
|
||||
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
|
||||
if ((kfd_mem_limit.system_mem_used + system_mem_needed >
|
||||
kfd_mem_limit.max_system_mem_limit) ||
|
||||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
|
||||
kfd_mem_limit.max_ttm_mem_limit) ||
|
||||
(adev->kfd.vram_used + vram_needed >
|
||||
adev->gmc.real_vram_size - reserved_for_pt)) {
|
||||
ret = -ENOMEM;
|
||||
} else {
|
||||
kfd_mem_limit.system_mem_used += system_mem_needed;
|
||||
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
|
||||
adev->kfd.vram_used += vram_needed;
|
||||
}
|
||||
|
||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void unreserve_system_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 domain)
|
||||
static void unreserve_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 domain, bool sg)
|
||||
{
|
||||
size_t acc_size;
|
||||
|
||||
@@ -154,35 +168,39 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
|
||||
kfd_mem_limit.system_mem_used -= (acc_size + size);
|
||||
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
|
||||
kfd_mem_limit.ttm_mem_used -= (acc_size + size);
|
||||
} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
|
||||
kfd_mem_limit.system_mem_used -= (acc_size + size);
|
||||
kfd_mem_limit.ttm_mem_used -= acc_size;
|
||||
} else {
|
||||
kfd_mem_limit.system_mem_used -= acc_size;
|
||||
kfd_mem_limit.userptr_mem_used -= size;
|
||||
kfd_mem_limit.ttm_mem_used -= acc_size;
|
||||
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
|
||||
adev->kfd.vram_used -= size;
|
||||
WARN_ONCE(adev->kfd.vram_used < 0,
|
||||
"kfd VRAM memory accounting unbalanced");
|
||||
}
|
||||
}
|
||||
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
|
||||
"kfd system memory accounting unbalanced");
|
||||
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
|
||||
"kfd userptr memory accounting unbalanced");
|
||||
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
|
||||
"kfd TTM memory accounting unbalanced");
|
||||
|
||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
|
||||
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
|
||||
{
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||
u32 domain = bo->preferred_domains;
|
||||
bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
|
||||
|
||||
if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
|
||||
kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
|
||||
kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
|
||||
} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
|
||||
kfd_mem_limit.system_mem_used -=
|
||||
(bo->tbo.acc_size + amdgpu_bo_size(bo));
|
||||
domain = AMDGPU_GEM_DOMAIN_CPU;
|
||||
sg = false;
|
||||
}
|
||||
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
|
||||
"kfd system memory accounting unbalanced");
|
||||
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
|
||||
"kfd userptr memory accounting unbalanced");
|
||||
|
||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||
unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
|
||||
}
|
||||
|
||||
|
||||
@@ -395,23 +413,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
|
||||
struct dma_fence *f)
|
||||
{
|
||||
int ret = amdgpu_sync_fence(adev, sync, f, false);
|
||||
|
||||
/* Sync objects can't handle multiple GPUs (contexts) updating
|
||||
* sync->last_vm_update. Fortunately we don't need it for
|
||||
* KFD's purposes, so we can just drop that fence.
|
||||
*/
|
||||
if (sync->last_vm_update) {
|
||||
dma_fence_put(sync->last_vm_update);
|
||||
sync->last_vm_update = NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
|
||||
{
|
||||
struct amdgpu_bo *pd = vm->root.base.bo;
|
||||
@@ -422,7 +423,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return sync_vm_fence(adev, sync, vm->last_update);
|
||||
return amdgpu_sync_fence(NULL, sync, vm->last_update, false);
|
||||
}
|
||||
|
||||
/* add_bo_to_vm - Add a BO to a VM
|
||||
@@ -536,7 +537,7 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
|
||||
struct amdgpu_bo *bo = mem->bo;
|
||||
|
||||
INIT_LIST_HEAD(&entry->head);
|
||||
entry->shared = true;
|
||||
entry->num_shared = 1;
|
||||
entry->bo = &bo->tbo;
|
||||
mutex_lock(&process_info->lock);
|
||||
if (userptr)
|
||||
@@ -677,7 +678,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
|
||||
|
||||
ctx->kfd_bo.priority = 0;
|
||||
ctx->kfd_bo.tv.bo = &bo->tbo;
|
||||
ctx->kfd_bo.tv.shared = true;
|
||||
ctx->kfd_bo.tv.num_shared = 1;
|
||||
ctx->kfd_bo.user_pages = NULL;
|
||||
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
|
||||
|
||||
@@ -741,7 +742,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
|
||||
|
||||
ctx->kfd_bo.priority = 0;
|
||||
ctx->kfd_bo.tv.bo = &bo->tbo;
|
||||
ctx->kfd_bo.tv.shared = true;
|
||||
ctx->kfd_bo.tv.num_shared = 1;
|
||||
ctx->kfd_bo.user_pages = NULL;
|
||||
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
|
||||
|
||||
@@ -826,7 +827,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
|
||||
/* Add the eviction fence back */
|
||||
amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
|
||||
|
||||
sync_vm_fence(adev, sync, bo_va->last_pt_update);
|
||||
amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -851,7 +852,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
|
||||
return ret;
|
||||
}
|
||||
|
||||
return sync_vm_fence(adev, sync, bo_va->last_pt_update);
|
||||
return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
|
||||
}
|
||||
|
||||
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
|
||||
@@ -886,6 +887,24 @@ update_gpuvm_pte_failed:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
|
||||
{
|
||||
struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
|
||||
|
||||
if (!sg)
|
||||
return NULL;
|
||||
if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
|
||||
kfree(sg);
|
||||
return NULL;
|
||||
}
|
||||
sg->sgl->dma_address = addr;
|
||||
sg->sgl->length = size;
|
||||
#ifdef CONFIG_NEED_SG_DMA_LENGTH
|
||||
sg->sgl->dma_length = size;
|
||||
#endif
|
||||
return sg;
|
||||
}
|
||||
|
||||
static int process_validate_vms(struct amdkfd_process_info *process_info)
|
||||
{
|
||||
struct amdgpu_vm *peer_vm;
|
||||
@@ -901,6 +920,26 @@ static int process_validate_vms(struct amdkfd_process_info *process_info)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
|
||||
struct amdgpu_sync *sync)
|
||||
{
|
||||
struct amdgpu_vm *peer_vm;
|
||||
int ret;
|
||||
|
||||
list_for_each_entry(peer_vm, &process_info->vm_list_head,
|
||||
vm_list_node) {
|
||||
struct amdgpu_bo *pd = peer_vm->root.base.bo;
|
||||
|
||||
ret = amdgpu_sync_resv(NULL,
|
||||
sync, pd->tbo.resv,
|
||||
AMDGPU_FENCE_OWNER_UNDEFINED, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_update_pds(struct amdkfd_process_info *process_info,
|
||||
struct amdgpu_sync *sync)
|
||||
{
|
||||
@@ -1149,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
|
||||
enum ttm_bo_type bo_type = ttm_bo_type_device;
|
||||
struct sg_table *sg = NULL;
|
||||
uint64_t user_addr = 0;
|
||||
struct amdgpu_bo *bo;
|
||||
struct amdgpu_bo_param bp;
|
||||
@@ -1177,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
if (!offset || !*offset)
|
||||
return -EINVAL;
|
||||
user_addr = *offset;
|
||||
} else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
|
||||
domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
|
||||
bo_type = ttm_bo_type_sg;
|
||||
alloc_flags = 0;
|
||||
if (size > UINT_MAX)
|
||||
return -EINVAL;
|
||||
sg = create_doorbell_sg(*offset, size);
|
||||
if (!sg)
|
||||
return -ENOMEM;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
|
||||
if (!*mem)
|
||||
return -ENOMEM;
|
||||
if (!*mem) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
INIT_LIST_HEAD(&(*mem)->bo_va_list);
|
||||
mutex_init(&(*mem)->lock);
|
||||
(*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
|
||||
@@ -1199,7 +1252,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
byte_align = (adev->family == AMDGPU_FAMILY_VI &&
|
||||
adev->asic_type != CHIP_FIJI &&
|
||||
adev->asic_type != CHIP_POLARIS10 &&
|
||||
adev->asic_type != CHIP_POLARIS11) ?
|
||||
adev->asic_type != CHIP_POLARIS11 &&
|
||||
adev->asic_type != CHIP_POLARIS12) ?
|
||||
VI_BO_SIZE_ALIGN : 1;
|
||||
|
||||
mapping_flags = AMDGPU_VM_PAGE_READABLE;
|
||||
@@ -1215,10 +1269,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
|
||||
amdgpu_sync_create(&(*mem)->sync);
|
||||
|
||||
ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
|
||||
ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
|
||||
if (ret) {
|
||||
pr_debug("Insufficient system memory\n");
|
||||
goto err_reserve_system_mem;
|
||||
goto err_reserve_limit;
|
||||
}
|
||||
|
||||
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
|
||||
@@ -1229,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
bp.byte_align = byte_align;
|
||||
bp.domain = alloc_domain;
|
||||
bp.flags = alloc_flags;
|
||||
bp.type = ttm_bo_type_device;
|
||||
bp.type = bo_type;
|
||||
bp.resv = NULL;
|
||||
ret = amdgpu_bo_create(adev, &bp, &bo);
|
||||
if (ret) {
|
||||
@@ -1237,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
domain_string(alloc_domain), ret);
|
||||
goto err_bo_create;
|
||||
}
|
||||
if (bo_type == ttm_bo_type_sg) {
|
||||
bo->tbo.sg = sg;
|
||||
bo->tbo.ttm->sg = sg;
|
||||
}
|
||||
bo->kfd_bo = *mem;
|
||||
(*mem)->bo = bo;
|
||||
if (user_addr)
|
||||
@@ -1266,12 +1324,17 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
allocate_init_user_pages_failed:
|
||||
amdgpu_bo_unref(&bo);
|
||||
/* Don't unreserve system mem limit twice */
|
||||
goto err_reserve_system_mem;
|
||||
goto err_reserve_limit;
|
||||
err_bo_create:
|
||||
unreserve_system_mem_limit(adev, size, alloc_domain);
|
||||
err_reserve_system_mem:
|
||||
unreserve_mem_limit(adev, size, alloc_domain, !!sg);
|
||||
err_reserve_limit:
|
||||
mutex_destroy(&(*mem)->lock);
|
||||
kfree(*mem);
|
||||
err:
|
||||
if (sg) {
|
||||
sg_free_table(sg);
|
||||
kfree(sg);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1341,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
||||
/* Free the sync object */
|
||||
amdgpu_sync_free(&mem->sync);
|
||||
|
||||
/* If the SG is not NULL, it's one we created for a doorbell
|
||||
* BO. We need to free it.
|
||||
*/
|
||||
if (mem->bo->tbo.sg) {
|
||||
sg_free_table(mem->bo->tbo.sg);
|
||||
kfree(mem->bo->tbo.sg);
|
||||
}
|
||||
|
||||
/* Free the BO*/
|
||||
amdgpu_bo_unref(&mem->bo);
|
||||
mutex_destroy(&mem->lock);
|
||||
@@ -1405,7 +1476,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||
* the queues are still stopped and we can leave mapping for
|
||||
* the next restore worker
|
||||
*/
|
||||
if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
|
||||
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
|
||||
bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
|
||||
is_invalid_userptr = true;
|
||||
|
||||
if (check_if_add_bo_to_vm(avm, mem)) {
|
||||
@@ -1642,6 +1714,60 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
|
||||
struct dma_buf *dma_buf,
|
||||
uint64_t va, void *vm,
|
||||
struct kgd_mem **mem, uint64_t *size,
|
||||
uint64_t *mmap_offset)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct drm_gem_object *obj;
|
||||
struct amdgpu_bo *bo;
|
||||
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
|
||||
|
||||
if (dma_buf->ops != &amdgpu_dmabuf_ops)
|
||||
/* Can't handle non-graphics buffers */
|
||||
return -EINVAL;
|
||||
|
||||
obj = dma_buf->priv;
|
||||
if (obj->dev->dev_private != adev)
|
||||
/* Can't handle buffers from other devices */
|
||||
return -EINVAL;
|
||||
|
||||
bo = gem_to_amdgpu_bo(obj);
|
||||
if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
|
||||
AMDGPU_GEM_DOMAIN_GTT)))
|
||||
/* Only VRAM and GTT BOs are supported */
|
||||
return -EINVAL;
|
||||
|
||||
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
|
||||
if (!*mem)
|
||||
return -ENOMEM;
|
||||
|
||||
if (size)
|
||||
*size = amdgpu_bo_size(bo);
|
||||
|
||||
if (mmap_offset)
|
||||
*mmap_offset = amdgpu_bo_mmap_offset(bo);
|
||||
|
||||
INIT_LIST_HEAD(&(*mem)->bo_va_list);
|
||||
mutex_init(&(*mem)->lock);
|
||||
(*mem)->mapping_flags =
|
||||
AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
|
||||
AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC;
|
||||
|
||||
(*mem)->bo = amdgpu_bo_ref(bo);
|
||||
(*mem)->va = va;
|
||||
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
|
||||
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
|
||||
(*mem)->mapped_to_gpu_memory = 0;
|
||||
(*mem)->process_info = avm->process_info;
|
||||
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
|
||||
amdgpu_sync_create(&(*mem)->sync);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Evict a userptr BO by stopping the queues if necessary
|
||||
*
|
||||
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
|
||||
@@ -1808,7 +1934,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
|
||||
validate_list.head) {
|
||||
list_add_tail(&mem->resv_list.head, &resv_list);
|
||||
mem->resv_list.bo = mem->validate_list.bo;
|
||||
mem->resv_list.shared = mem->validate_list.shared;
|
||||
mem->resv_list.num_shared = mem->validate_list.num_shared;
|
||||
}
|
||||
|
||||
/* Reserve all BOs and page tables for validation */
|
||||
@@ -2027,7 +2153,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
|
||||
|
||||
list_add_tail(&mem->resv_list.head, &ctx.list);
|
||||
mem->resv_list.bo = mem->validate_list.bo;
|
||||
mem->resv_list.shared = mem->validate_list.shared;
|
||||
mem->resv_list.num_shared = mem->validate_list.num_shared;
|
||||
}
|
||||
|
||||
ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
|
||||
@@ -2044,13 +2170,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
|
||||
if (ret)
|
||||
goto validate_map_fail;
|
||||
|
||||
/* Wait for PD/PTs validate to finish */
|
||||
/* FIXME: I think this isn't needed */
|
||||
list_for_each_entry(peer_vm, &process_info->vm_list_head,
|
||||
vm_list_node) {
|
||||
struct amdgpu_bo *bo = peer_vm->root.base.bo;
|
||||
|
||||
ttm_bo_wait(&bo->tbo, false, false);
|
||||
ret = process_sync_pds_resv(process_info, &sync_obj);
|
||||
if (ret) {
|
||||
pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
|
||||
goto validate_map_fail;
|
||||
}
|
||||
|
||||
/* Validate BOs and map them to GPUVM (update VM page tables). */
|
||||
@@ -2066,7 +2189,11 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
|
||||
pr_debug("Memory eviction: Validate BOs failed. Try again\n");
|
||||
goto validate_map_fail;
|
||||
}
|
||||
|
||||
ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false);
|
||||
if (ret) {
|
||||
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
|
||||
goto validate_map_fail;
|
||||
}
|
||||
list_for_each_entry(bo_va_entry, &mem->bo_va_list,
|
||||
bo_list) {
|
||||
ret = update_gpuvm_pte((struct amdgpu_device *)
|
||||
@@ -2087,6 +2214,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
|
||||
goto validate_map_fail;
|
||||
}
|
||||
|
||||
/* Wait for validate and PT updates to finish */
|
||||
amdgpu_sync_wait(&sync_obj, false);
|
||||
|
||||
/* Release old eviction fence and create new one, because fence only
|
||||
@@ -2105,10 +2233,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
|
||||
process_info->eviction_fence = new_fence;
|
||||
*ef = dma_fence_get(&new_fence->base);
|
||||
|
||||
/* Wait for validate to finish and attach new eviction fence */
|
||||
list_for_each_entry(mem, &process_info->kfd_bo_list,
|
||||
validate_list.head)
|
||||
ttm_bo_wait(&mem->bo->tbo, false, false);
|
||||
/* Attach new eviction fence to all BOs */
|
||||
list_for_each_entry(mem, &process_info->kfd_bo_list,
|
||||
validate_list.head)
|
||||
amdgpu_bo_fence(mem->bo,
|
||||
|
@@ -118,7 +118,6 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
|
||||
entry->priority = min(info[i].bo_priority,
|
||||
AMDGPU_BO_LIST_MAX_PRIORITY);
|
||||
entry->tv.bo = &bo->tbo;
|
||||
entry->tv.shared = !bo->prime_shared_count;
|
||||
|
||||
if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
|
||||
list->gds_obj = bo;
|
||||
|
@@ -50,7 +50,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
|
||||
bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
|
||||
p->uf_entry.priority = 0;
|
||||
p->uf_entry.tv.bo = &bo->tbo;
|
||||
p->uf_entry.tv.shared = true;
|
||||
/* One for TTM and one for the CS job */
|
||||
p->uf_entry.tv.num_shared = 2;
|
||||
p->uf_entry.user_pages = NULL;
|
||||
|
||||
drm_gem_object_put_unlocked(gobj);
|
||||
@@ -598,6 +599,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
||||
return r;
|
||||
}
|
||||
|
||||
/* One for TTM and one for the CS job */
|
||||
amdgpu_bo_list_for_each_entry(e, p->bo_list)
|
||||
e->tv.num_shared = 2;
|
||||
|
||||
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
|
||||
if (p->bo_list->first_userptr != p->bo_list->num_entries)
|
||||
p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
|
||||
@@ -717,8 +722,14 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
||||
gws = p->bo_list->gws_obj;
|
||||
oa = p->bo_list->oa_obj;
|
||||
|
||||
amdgpu_bo_list_for_each_entry(e, p->bo_list)
|
||||
e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo));
|
||||
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
|
||||
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
|
||||
|
||||
/* Make sure we use the exclusive slot for shared BOs */
|
||||
if (bo->prime_shared_count)
|
||||
e->tv.num_shared = 0;
|
||||
e->bo_va = amdgpu_vm_bo_find(vm, bo);
|
||||
}
|
||||
|
||||
if (gds) {
|
||||
p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
|
||||
@@ -955,10 +966,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
|
||||
|
||||
if (amdgpu_vm_debug) {
|
||||
@@ -1104,7 +1111,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
|
||||
{
|
||||
int r;
|
||||
struct dma_fence *fence;
|
||||
r = drm_syncobj_find_fence(p->filp, handle, 0, &fence);
|
||||
r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
@@ -1193,7 +1200,7 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < p->num_post_dep_syncobjs; ++i)
|
||||
drm_syncobj_replace_fence(p->post_dep_syncobjs[i], 0, p->fence);
|
||||
drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
|
||||
}
|
||||
|
||||
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
||||
@@ -1260,8 +1267,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
||||
return 0;
|
||||
|
||||
error_abort:
|
||||
dma_fence_put(&job->base.s_fence->finished);
|
||||
job->base.s_fence = NULL;
|
||||
drm_sched_job_cleanup(&job->base);
|
||||
amdgpu_mn_unlock(p->mn);
|
||||
|
||||
error_unlock:
|
||||
@@ -1285,7 +1291,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
|
||||
r = amdgpu_cs_parser_init(&parser, data);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to initialize parser !\n");
|
||||
DRM_ERROR("Failed to initialize parser %d!\n", r);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
117
drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
Normal file
117
drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
Normal file
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright 2016 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
* * Author: Monk.liu@amd.com
|
||||
*/
|
||||
|
||||
#include "amdgpu.h"
|
||||
|
||||
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
|
||||
|
||||
addr -= AMDGPU_VA_RESERVED_SIZE;
|
||||
addr = amdgpu_gmc_sign_extend(addr);
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
|
||||
u32 domain, uint32_t size)
|
||||
{
|
||||
int r;
|
||||
void *ptr;
|
||||
|
||||
r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
|
||||
domain, bo,
|
||||
NULL, &ptr);
|
||||
if (!*bo)
|
||||
return -ENOMEM;
|
||||
|
||||
memset(ptr, 0, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_free_static_csa(struct amdgpu_bo **bo)
|
||||
{
|
||||
amdgpu_bo_free_kernel(bo, NULL, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* amdgpu_map_static_csa should be called during amdgpu_vm_init
|
||||
* it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
|
||||
* submission of GFX should use this virtual address within META_DATA init
|
||||
* package to support SRIOV gfx preemption.
|
||||
*/
|
||||
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
|
||||
uint64_t csa_addr, uint32_t size)
|
||||
{
|
||||
struct ww_acquire_ctx ticket;
|
||||
struct list_head list;
|
||||
struct amdgpu_bo_list_entry pd;
|
||||
struct ttm_validate_buffer csa_tv;
|
||||
int r;
|
||||
|
||||
INIT_LIST_HEAD(&list);
|
||||
INIT_LIST_HEAD(&csa_tv.head);
|
||||
csa_tv.bo = &bo->tbo;
|
||||
csa_tv.num_shared = 1;
|
||||
|
||||
list_add(&csa_tv.head, &list);
|
||||
amdgpu_vm_get_pd_bo(vm, &list, &pd);
|
||||
|
||||
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
|
||||
if (!*bo_va) {
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
DRM_ERROR("failed to create bo_va for static CSA\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
|
||||
size);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
|
||||
amdgpu_vm_bo_rmv(adev, *bo_va);
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
|
||||
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
|
||||
AMDGPU_PTE_EXECUTABLE);
|
||||
|
||||
if (r) {
|
||||
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
|
||||
amdgpu_vm_bo_rmv(adev, *bo_va);
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return r;
|
||||
}
|
||||
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return 0;
|
||||
}
|
39
drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
Normal file
39
drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright 2016 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Author: Monk.liu@amd.com
|
||||
*/
|
||||
|
||||
#ifndef AMDGPU_CSA_MANAGER_H
|
||||
#define AMDGPU_CSA_MANAGER_H
|
||||
|
||||
#define AMDGPU_CSA_SIZE (128 * 1024)
|
||||
|
||||
uint32_t amdgpu_get_total_csa_size(struct amdgpu_device *adev);
|
||||
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
|
||||
int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
|
||||
u32 domain, uint32_t size);
|
||||
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
|
||||
uint64_t csa_addr, uint32_t size);
|
||||
void amdgpu_free_static_csa(struct amdgpu_bo **bo);
|
||||
|
||||
#endif
|
@@ -248,7 +248,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&mgr->lock);
|
||||
r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
|
||||
r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
|
||||
if (r < 0) {
|
||||
mutex_unlock(&mgr->lock);
|
||||
kfree(ctx);
|
||||
|
@@ -59,6 +59,8 @@
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "amdgpu_pm.h"
|
||||
|
||||
#include "amdgpu_xgmi.h"
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
|
||||
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
|
||||
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
|
||||
@@ -513,6 +515,7 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
|
||||
*/
|
||||
static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
|
||||
{
|
||||
|
||||
/* No doorbell on SI hardware generation */
|
||||
if (adev->asic_type < CHIP_BONAIRE) {
|
||||
adev->doorbell.base = 0;
|
||||
@@ -525,15 +528,26 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
|
||||
if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
|
||||
return -EINVAL;
|
||||
|
||||
amdgpu_asic_init_doorbell_index(adev);
|
||||
|
||||
/* doorbell bar mapping */
|
||||
adev->doorbell.base = pci_resource_start(adev->pdev, 2);
|
||||
adev->doorbell.size = pci_resource_len(adev->pdev, 2);
|
||||
|
||||
adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
|
||||
AMDGPU_DOORBELL_MAX_ASSIGNMENT+1);
|
||||
adev->doorbell_index.max_assignment+1);
|
||||
if (adev->doorbell.num_doorbells == 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* For Vega, reserve and map two pages on doorbell BAR since SDMA
|
||||
* paging queue doorbell use the second page. The
|
||||
* AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
|
||||
* doorbells are in the first page. So with paging queue enabled,
|
||||
* the max num_doorbells should + 1 page (0x400 in dword)
|
||||
*/
|
||||
if (adev->asic_type >= CHIP_VEGA10)
|
||||
adev->doorbell.num_doorbells += 0x400;
|
||||
|
||||
adev->doorbell.ptr = ioremap(adev->doorbell.base,
|
||||
adev->doorbell.num_doorbells *
|
||||
sizeof(u32));
|
||||
@@ -1656,7 +1670,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
|
||||
|
||||
/* right after GMC hw init, we create CSA */
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
r = amdgpu_allocate_static_csa(adev);
|
||||
r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
AMDGPU_CSA_SIZE);
|
||||
if (r) {
|
||||
DRM_ERROR("allocate CSA failed %d\n", r);
|
||||
return r;
|
||||
@@ -1681,7 +1697,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
amdgpu_xgmi_add_device(adev);
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1)
|
||||
amdgpu_xgmi_add_device(adev);
|
||||
amdgpu_amdkfd_device_init(adev);
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
@@ -1848,6 +1865,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
int i, r;
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1)
|
||||
amdgpu_xgmi_remove_device(adev);
|
||||
|
||||
amdgpu_amdkfd_device_fini(adev);
|
||||
|
||||
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
|
||||
@@ -1890,7 +1910,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
|
||||
|
||||
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
|
||||
amdgpu_ucode_free_bo(adev);
|
||||
amdgpu_free_static_csa(adev);
|
||||
amdgpu_free_static_csa(&adev->virt.csa_obj);
|
||||
amdgpu_device_wb_fini(adev);
|
||||
amdgpu_device_vram_scratch_fini(adev);
|
||||
}
|
||||
@@ -2337,6 +2357,19 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
|
||||
return amdgpu_device_asic_has_dc_support(adev->asic_type);
|
||||
}
|
||||
|
||||
|
||||
static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
|
||||
{
|
||||
struct amdgpu_device *adev =
|
||||
container_of(__work, struct amdgpu_device, xgmi_reset_work);
|
||||
|
||||
adev->asic_reset_res = amdgpu_asic_reset(adev);
|
||||
if (adev->asic_reset_res)
|
||||
DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s",
|
||||
adev->asic_reset_res, adev->ddev->unique);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* amdgpu_device_init - initialize the driver
|
||||
*
|
||||
@@ -2435,6 +2468,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||
INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
|
||||
amdgpu_device_delay_enable_gfx_off);
|
||||
|
||||
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
|
||||
|
||||
adev->gfx.gfx_off_req_count = 1;
|
||||
adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
|
||||
|
||||
@@ -2455,9 +2490,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
|
||||
DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
|
||||
|
||||
/* doorbell bar mapping */
|
||||
amdgpu_device_doorbell_init(adev);
|
||||
|
||||
/* io port mapping */
|
||||
for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
|
||||
if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
|
||||
@@ -2476,6 +2508,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* doorbell bar mapping and doorbell index init*/
|
||||
amdgpu_device_doorbell_init(adev);
|
||||
|
||||
/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
|
||||
/* this will fail for cards that aren't VGA class devices, just
|
||||
* ignore it */
|
||||
@@ -3148,86 +3183,6 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
|
||||
*
|
||||
* @adev: amdgpu device pointer
|
||||
*
|
||||
* attempt to do soft-reset or full-reset and reinitialize Asic
|
||||
* return 0 means succeeded otherwise failed
|
||||
*/
|
||||
static int amdgpu_device_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
bool need_full_reset, vram_lost = 0;
|
||||
int r;
|
||||
|
||||
need_full_reset = amdgpu_device_ip_need_full_reset(adev);
|
||||
|
||||
if (!need_full_reset) {
|
||||
amdgpu_device_ip_pre_soft_reset(adev);
|
||||
r = amdgpu_device_ip_soft_reset(adev);
|
||||
amdgpu_device_ip_post_soft_reset(adev);
|
||||
if (r || amdgpu_device_ip_check_soft_reset(adev)) {
|
||||
DRM_INFO("soft reset failed, will fallback to full reset!\n");
|
||||
need_full_reset = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (need_full_reset) {
|
||||
r = amdgpu_device_ip_suspend(adev);
|
||||
|
||||
retry:
|
||||
r = amdgpu_asic_reset(adev);
|
||||
/* post card */
|
||||
amdgpu_atom_asic_init(adev->mode_info.atom_context);
|
||||
|
||||
if (!r) {
|
||||
dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
|
||||
r = amdgpu_device_ip_resume_phase1(adev);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
vram_lost = amdgpu_device_check_vram_lost(adev);
|
||||
if (vram_lost) {
|
||||
DRM_ERROR("VRAM is lost!\n");
|
||||
atomic_inc(&adev->vram_lost_counter);
|
||||
}
|
||||
|
||||
r = amdgpu_gtt_mgr_recover(
|
||||
&adev->mman.bdev.man[TTM_PL_TT]);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
r = amdgpu_device_fw_loading(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_device_ip_resume_phase2(adev);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
if (vram_lost)
|
||||
amdgpu_device_fill_reset_magic(adev);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (!r) {
|
||||
amdgpu_irq_gpu_reset_resume_helper(adev);
|
||||
r = amdgpu_ib_ring_tests(adev);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "ib ring test failed (%d).\n", r);
|
||||
r = amdgpu_device_ip_suspend(adev);
|
||||
need_full_reset = true;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
if (!r)
|
||||
r = amdgpu_device_recover_vram(adev);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
|
||||
@@ -3295,40 +3250,46 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 &&
|
||||
!amdgpu_sriov_vf(adev))) {
|
||||
DRM_INFO("GPU recovery disabled.\n");
|
||||
return false;
|
||||
if (amdgpu_gpu_recovery == 0)
|
||||
goto disabled;
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return true;
|
||||
|
||||
if (amdgpu_gpu_recovery == -1) {
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_BONAIRE:
|
||||
case CHIP_HAWAII:
|
||||
case CHIP_TOPAZ:
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
break;
|
||||
default:
|
||||
goto disabled;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
disabled:
|
||||
DRM_INFO("GPU recovery disabled.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
|
||||
*
|
||||
* @adev: amdgpu device pointer
|
||||
* @job: which job trigger hang
|
||||
*
|
||||
* Attempt to reset the GPU if it has hung (all asics).
|
||||
* Returns 0 for success or an error on failure.
|
||||
*/
|
||||
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job)
|
||||
|
||||
static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job,
|
||||
bool *need_full_reset_arg)
|
||||
{
|
||||
int i, r, resched;
|
||||
|
||||
dev_info(adev->dev, "GPU reset begin!\n");
|
||||
|
||||
mutex_lock(&adev->lock_reset);
|
||||
atomic_inc(&adev->gpu_reset_counter);
|
||||
adev->in_gpu_reset = 1;
|
||||
|
||||
/* Block kfd */
|
||||
amdgpu_amdkfd_pre_reset(adev);
|
||||
|
||||
/* block TTM */
|
||||
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
|
||||
int i, r = 0;
|
||||
bool need_full_reset = *need_full_reset_arg;
|
||||
|
||||
/* block all schedulers and reset given job's ring */
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
@@ -3348,10 +3309,144 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
amdgpu_fence_driver_force_completion(ring);
|
||||
}
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
r = amdgpu_device_reset_sriov(adev, job ? false : true);
|
||||
else
|
||||
r = amdgpu_device_reset(adev);
|
||||
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
|
||||
if (!need_full_reset)
|
||||
need_full_reset = amdgpu_device_ip_need_full_reset(adev);
|
||||
|
||||
if (!need_full_reset) {
|
||||
amdgpu_device_ip_pre_soft_reset(adev);
|
||||
r = amdgpu_device_ip_soft_reset(adev);
|
||||
amdgpu_device_ip_post_soft_reset(adev);
|
||||
if (r || amdgpu_device_ip_check_soft_reset(adev)) {
|
||||
DRM_INFO("soft reset failed, will fallback to full reset!\n");
|
||||
need_full_reset = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (need_full_reset)
|
||||
r = amdgpu_device_ip_suspend(adev);
|
||||
|
||||
*need_full_reset_arg = need_full_reset;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
|
||||
struct list_head *device_list_handle,
|
||||
bool *need_full_reset_arg)
|
||||
{
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
bool need_full_reset = *need_full_reset_arg, vram_lost = false;
|
||||
int r = 0;
|
||||
|
||||
/*
|
||||
* ASIC reset has to be done on all HGMI hive nodes ASAP
|
||||
* to allow proper links negotiation in FW (within 1 sec)
|
||||
*/
|
||||
if (need_full_reset) {
|
||||
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
|
||||
/* For XGMI run all resets in parallel to speed up the process */
|
||||
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
|
||||
if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
|
||||
r = -EALREADY;
|
||||
} else
|
||||
r = amdgpu_asic_reset(tmp_adev);
|
||||
|
||||
if (r) {
|
||||
DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s",
|
||||
r, tmp_adev->ddev->unique);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* For XGMI wait for all PSP resets to complete before proceed */
|
||||
if (!r) {
|
||||
list_for_each_entry(tmp_adev, device_list_handle,
|
||||
gmc.xgmi.head) {
|
||||
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
|
||||
flush_work(&tmp_adev->xgmi_reset_work);
|
||||
r = tmp_adev->asic_reset_res;
|
||||
if (r)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
|
||||
if (need_full_reset) {
|
||||
/* post card */
|
||||
if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
|
||||
DRM_WARN("asic atom init failed!");
|
||||
|
||||
if (!r) {
|
||||
dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
|
||||
r = amdgpu_device_ip_resume_phase1(tmp_adev);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
|
||||
if (vram_lost) {
|
||||
DRM_ERROR("VRAM is lost!\n");
|
||||
atomic_inc(&tmp_adev->vram_lost_counter);
|
||||
}
|
||||
|
||||
r = amdgpu_gtt_mgr_recover(
|
||||
&tmp_adev->mman.bdev.man[TTM_PL_TT]);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
r = amdgpu_device_fw_loading(tmp_adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_device_ip_resume_phase2(tmp_adev);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
if (vram_lost)
|
||||
amdgpu_device_fill_reset_magic(tmp_adev);
|
||||
|
||||
/* Update PSP FW topology after reset */
|
||||
if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
|
||||
r = amdgpu_xgmi_update_topology(hive, tmp_adev);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
out:
|
||||
if (!r) {
|
||||
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
|
||||
r = amdgpu_ib_ring_tests(tmp_adev);
|
||||
if (r) {
|
||||
dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
|
||||
r = amdgpu_device_ip_suspend(tmp_adev);
|
||||
need_full_reset = true;
|
||||
r = -EAGAIN;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
if (!r)
|
||||
r = amdgpu_device_recover_vram(tmp_adev);
|
||||
else
|
||||
tmp_adev->asic_reset_res = r;
|
||||
}
|
||||
|
||||
end:
|
||||
*need_full_reset_arg = need_full_reset;
|
||||
return r;
|
||||
}
|
||||
|
||||
static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
@@ -3363,7 +3458,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
* or all rings (in the case @job is NULL)
|
||||
* after above amdgpu_reset accomplished
|
||||
*/
|
||||
if ((!job || job->base.sched == &ring->sched) && !r)
|
||||
if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res)
|
||||
drm_sched_job_recovery(&ring->sched);
|
||||
|
||||
kthread_unpark(ring->sched.thread);
|
||||
@@ -3373,21 +3468,142 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
drm_helper_resume_force_mode(adev->ddev);
|
||||
}
|
||||
|
||||
ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
|
||||
adev->asic_reset_res = 0;
|
||||
}
|
||||
|
||||
if (r) {
|
||||
/* bad news, how to tell it to userspace ? */
|
||||
dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
|
||||
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
|
||||
} else {
|
||||
dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter));
|
||||
}
|
||||
static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
|
||||
{
|
||||
mutex_lock(&adev->lock_reset);
|
||||
atomic_inc(&adev->gpu_reset_counter);
|
||||
adev->in_gpu_reset = 1;
|
||||
/* Block kfd */
|
||||
amdgpu_amdkfd_pre_reset(adev);
|
||||
}
|
||||
|
||||
static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
|
||||
{
|
||||
/*unlock kfd */
|
||||
amdgpu_amdkfd_post_reset(adev);
|
||||
amdgpu_vf_error_trans_all(adev);
|
||||
adev->in_gpu_reset = 0;
|
||||
mutex_unlock(&adev->lock_reset);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
|
||||
*
|
||||
* @adev: amdgpu device pointer
|
||||
* @job: which job trigger hang
|
||||
*
|
||||
* Attempt to reset the GPU if it has hung (all asics).
|
||||
* Attempt to do soft-reset or full-reset and reinitialize Asic
|
||||
* Returns 0 for success or an error on failure.
|
||||
*/
|
||||
|
||||
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job)
|
||||
{
|
||||
int r;
|
||||
struct amdgpu_hive_info *hive = NULL;
|
||||
bool need_full_reset = false;
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
struct list_head device_list, *device_list_handle = NULL;
|
||||
|
||||
INIT_LIST_HEAD(&device_list);
|
||||
|
||||
dev_info(adev->dev, "GPU reset begin!\n");
|
||||
|
||||
/*
|
||||
* In case of XGMI hive disallow concurrent resets to be triggered
|
||||
* by different nodes. No point also since the one node already executing
|
||||
* reset will also reset all the other nodes in the hive.
|
||||
*/
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
|
||||
!mutex_trylock(&hive->hive_lock))
|
||||
return 0;
|
||||
|
||||
/* Start with adev pre asic reset first for soft reset check.*/
|
||||
amdgpu_device_lock_adev(adev);
|
||||
r = amdgpu_device_pre_asic_reset(adev,
|
||||
job,
|
||||
&need_full_reset);
|
||||
if (r) {
|
||||
/*TODO Should we stop ?*/
|
||||
DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
|
||||
r, adev->ddev->unique);
|
||||
adev->asic_reset_res = r;
|
||||
}
|
||||
|
||||
/* Build list of devices to reset */
|
||||
if (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
|
||||
if (!hive) {
|
||||
amdgpu_device_unlock_adev(adev);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/*
|
||||
* In case we are in XGMI hive mode device reset is done for all the
|
||||
* nodes in the hive to retrain all XGMI links and hence the reset
|
||||
* sequence is executed in loop on all nodes.
|
||||
*/
|
||||
device_list_handle = &hive->device_list;
|
||||
} else {
|
||||
list_add_tail(&adev->gmc.xgmi.head, &device_list);
|
||||
device_list_handle = &device_list;
|
||||
}
|
||||
|
||||
retry: /* Rest of adevs pre asic reset from XGMI hive. */
|
||||
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
|
||||
|
||||
if (tmp_adev == adev)
|
||||
continue;
|
||||
|
||||
amdgpu_device_lock_adev(tmp_adev);
|
||||
r = amdgpu_device_pre_asic_reset(tmp_adev,
|
||||
NULL,
|
||||
&need_full_reset);
|
||||
/*TODO Should we stop ?*/
|
||||
if (r) {
|
||||
DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
|
||||
r, tmp_adev->ddev->unique);
|
||||
tmp_adev->asic_reset_res = r;
|
||||
}
|
||||
}
|
||||
|
||||
/* Actual ASIC resets if needed.*/
|
||||
/* TODO Implement XGMI hive reset logic for SRIOV */
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
r = amdgpu_device_reset_sriov(adev, job ? false : true);
|
||||
if (r)
|
||||
adev->asic_reset_res = r;
|
||||
} else {
|
||||
r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
|
||||
if (r && r == -EAGAIN)
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Post ASIC reset for all devs .*/
|
||||
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
|
||||
amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
|
||||
|
||||
if (r) {
|
||||
/* bad news, how to tell it to userspace ? */
|
||||
dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
|
||||
amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
|
||||
} else {
|
||||
dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
|
||||
}
|
||||
|
||||
amdgpu_device_unlock_adev(tmp_adev);
|
||||
}
|
||||
|
||||
if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
|
||||
mutex_unlock(&hive->hive_lock);
|
||||
|
||||
if (r)
|
||||
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@@ -631,6 +631,11 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
|
||||
drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16);
|
||||
if (!adev->mode_info.max_bpc_property)
|
||||
return -ENOMEM;
|
||||
adev->mode_info.abm_level_property =
|
||||
drm_property_create_range(adev->ddev, 0,
|
||||
"abm level", 0, 4);
|
||||
if (!adev->mode_info.abm_level_property)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -857,7 +862,12 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
|
||||
/* Inside "upper part" of vblank area? Apply corrective offset if so: */
|
||||
if (in_vbl && (*vpos >= vbl_start)) {
|
||||
vtotal = mode->crtc_vtotal;
|
||||
*vpos = *vpos - vtotal;
|
||||
|
||||
/* With variable refresh rate displays the vpos can exceed
|
||||
* the vtotal value. Clamp to 0 to return -vbl_end instead
|
||||
* of guessing the remaining number of lines until scanout.
|
||||
*/
|
||||
*vpos = (*vpos < vtotal) ? (*vpos - vtotal) : 0;
|
||||
}
|
||||
|
||||
/* Correct for shifted end of vbl at vbl_end. */
|
||||
|
243
drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
Normal file
243
drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
Normal file
@@ -0,0 +1,243 @@
|
||||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* GPU doorbell structures, functions & helpers
|
||||
*/
|
||||
struct amdgpu_doorbell {
|
||||
/* doorbell mmio */
|
||||
resource_size_t base;
|
||||
resource_size_t size;
|
||||
u32 __iomem *ptr;
|
||||
u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */
|
||||
};
|
||||
|
||||
/* Reserved doorbells for amdgpu (including multimedia).
|
||||
* KFD can use all the rest in the 2M doorbell bar.
|
||||
* For asic before vega10, doorbell is 32-bit, so the
|
||||
* index/offset is in dword. For vega10 and after, doorbell
|
||||
* can be 64-bit, so the index defined is in qword.
|
||||
*/
|
||||
struct amdgpu_doorbell_index {
|
||||
uint32_t kiq;
|
||||
uint32_t mec_ring0;
|
||||
uint32_t mec_ring1;
|
||||
uint32_t mec_ring2;
|
||||
uint32_t mec_ring3;
|
||||
uint32_t mec_ring4;
|
||||
uint32_t mec_ring5;
|
||||
uint32_t mec_ring6;
|
||||
uint32_t mec_ring7;
|
||||
uint32_t userqueue_start;
|
||||
uint32_t userqueue_end;
|
||||
uint32_t gfx_ring0;
|
||||
uint32_t sdma_engine0;
|
||||
uint32_t sdma_engine1;
|
||||
uint32_t sdma_engine2;
|
||||
uint32_t sdma_engine3;
|
||||
uint32_t sdma_engine4;
|
||||
uint32_t sdma_engine5;
|
||||
uint32_t sdma_engine6;
|
||||
uint32_t sdma_engine7;
|
||||
uint32_t ih;
|
||||
union {
|
||||
struct {
|
||||
uint32_t vcn_ring0_1;
|
||||
uint32_t vcn_ring2_3;
|
||||
uint32_t vcn_ring4_5;
|
||||
uint32_t vcn_ring6_7;
|
||||
} vcn;
|
||||
struct {
|
||||
uint32_t uvd_ring0_1;
|
||||
uint32_t uvd_ring2_3;
|
||||
uint32_t uvd_ring4_5;
|
||||
uint32_t uvd_ring6_7;
|
||||
uint32_t vce_ring0_1;
|
||||
uint32_t vce_ring2_3;
|
||||
uint32_t vce_ring4_5;
|
||||
uint32_t vce_ring6_7;
|
||||
} uvd_vce;
|
||||
};
|
||||
uint32_t max_assignment;
|
||||
};
|
||||
|
||||
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
|
||||
{
|
||||
AMDGPU_DOORBELL_KIQ = 0x000,
|
||||
AMDGPU_DOORBELL_HIQ = 0x001,
|
||||
AMDGPU_DOORBELL_DIQ = 0x002,
|
||||
AMDGPU_DOORBELL_MEC_RING0 = 0x010,
|
||||
AMDGPU_DOORBELL_MEC_RING1 = 0x011,
|
||||
AMDGPU_DOORBELL_MEC_RING2 = 0x012,
|
||||
AMDGPU_DOORBELL_MEC_RING3 = 0x013,
|
||||
AMDGPU_DOORBELL_MEC_RING4 = 0x014,
|
||||
AMDGPU_DOORBELL_MEC_RING5 = 0x015,
|
||||
AMDGPU_DOORBELL_MEC_RING6 = 0x016,
|
||||
AMDGPU_DOORBELL_MEC_RING7 = 0x017,
|
||||
AMDGPU_DOORBELL_GFX_RING0 = 0x020,
|
||||
AMDGPU_DOORBELL_sDMA_ENGINE0 = 0x1E0,
|
||||
AMDGPU_DOORBELL_sDMA_ENGINE1 = 0x1E1,
|
||||
AMDGPU_DOORBELL_IH = 0x1E8,
|
||||
AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF,
|
||||
AMDGPU_DOORBELL_INVALID = 0xFFFF
|
||||
} AMDGPU_DOORBELL_ASSIGNMENT;
|
||||
|
||||
typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
|
||||
{
|
||||
/* Compute + GFX: 0~255 */
|
||||
AMDGPU_VEGA20_DOORBELL_KIQ = 0x000,
|
||||
AMDGPU_VEGA20_DOORBELL_HIQ = 0x001,
|
||||
AMDGPU_VEGA20_DOORBELL_DIQ = 0x002,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING0 = 0x003,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING1 = 0x004,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING2 = 0x005,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING3 = 0x006,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING4 = 0x007,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING5 = 0x008,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING6 = 0x009,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING7 = 0x00A,
|
||||
AMDGPU_VEGA20_DOORBELL_USERQUEUE_START = 0x00B,
|
||||
AMDGPU_VEGA20_DOORBELL_USERQUEUE_END = 0x08A,
|
||||
AMDGPU_VEGA20_DOORBELL_GFX_RING0 = 0x08B,
|
||||
/* SDMA:256~335*/
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0 = 0x100,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1 = 0x10A,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2 = 0x114,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3 = 0x11E,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4 = 0x128,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5 = 0x132,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6 = 0x13C,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7 = 0x146,
|
||||
/* IH: 376~391 */
|
||||
AMDGPU_VEGA20_DOORBELL_IH = 0x178,
|
||||
/* MMSCH: 392~407
|
||||
* overlap the doorbell assignment with VCN as they are mutually exclusive
|
||||
* VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
|
||||
*/
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B,
|
||||
|
||||
AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188,
|
||||
AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189,
|
||||
AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A,
|
||||
AMDGPU_VEGA20_DOORBELL64_UVD_RING6_7 = 0x18B,
|
||||
|
||||
AMDGPU_VEGA20_DOORBELL64_VCE_RING0_1 = 0x18C,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3 = 0x18D,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5 = 0x18E,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7 = 0x18F,
|
||||
AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F,
|
||||
AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF
|
||||
} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
|
||||
|
||||
/*
|
||||
* 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
|
||||
*/
|
||||
typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
|
||||
{
|
||||
/*
|
||||
* All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
|
||||
* a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
|
||||
* Compute related doorbells are allocated from 0x00 to 0x8a
|
||||
*/
|
||||
|
||||
|
||||
/* kernel scheduling */
|
||||
AMDGPU_DOORBELL64_KIQ = 0x00,
|
||||
|
||||
/* HSA interface queue and debug queue */
|
||||
AMDGPU_DOORBELL64_HIQ = 0x01,
|
||||
AMDGPU_DOORBELL64_DIQ = 0x02,
|
||||
|
||||
/* Compute engines */
|
||||
AMDGPU_DOORBELL64_MEC_RING0 = 0x03,
|
||||
AMDGPU_DOORBELL64_MEC_RING1 = 0x04,
|
||||
AMDGPU_DOORBELL64_MEC_RING2 = 0x05,
|
||||
AMDGPU_DOORBELL64_MEC_RING3 = 0x06,
|
||||
AMDGPU_DOORBELL64_MEC_RING4 = 0x07,
|
||||
AMDGPU_DOORBELL64_MEC_RING5 = 0x08,
|
||||
AMDGPU_DOORBELL64_MEC_RING6 = 0x09,
|
||||
AMDGPU_DOORBELL64_MEC_RING7 = 0x0a,
|
||||
|
||||
/* User queue doorbell range (128 doorbells) */
|
||||
AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b,
|
||||
AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a,
|
||||
|
||||
/* Graphics engine */
|
||||
AMDGPU_DOORBELL64_GFX_RING0 = 0x8b,
|
||||
|
||||
/*
|
||||
* Other graphics doorbells can be allocated here: from 0x8c to 0xdf
|
||||
* Graphics voltage island aperture 1
|
||||
* default non-graphics QWORD index is 0xe0 - 0xFF inclusive
|
||||
*/
|
||||
|
||||
/* For vega10 sriov, the sdma doorbell must be fixed as follow
|
||||
* to keep the same setting with host driver, or it will
|
||||
* happen conflicts
|
||||
*/
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0,
|
||||
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2,
|
||||
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
|
||||
|
||||
/* Interrupt handler */
|
||||
AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */
|
||||
AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */
|
||||
AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */
|
||||
|
||||
/* VCN engine use 32 bits doorbell */
|
||||
AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
|
||||
AMDGPU_DOORBELL64_VCN2_3 = 0xF9,
|
||||
AMDGPU_DOORBELL64_VCN4_5 = 0xFA,
|
||||
AMDGPU_DOORBELL64_VCN6_7 = 0xFB,
|
||||
|
||||
/* overlap the doorbell assignment with VCN as they are mutually exclusive
|
||||
* VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
|
||||
*/
|
||||
AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8,
|
||||
AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9,
|
||||
AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA,
|
||||
AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB,
|
||||
|
||||
AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC,
|
||||
AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD,
|
||||
AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE,
|
||||
AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF,
|
||||
|
||||
AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF,
|
||||
AMDGPU_DOORBELL64_INVALID = 0xFFFF
|
||||
} AMDGPU_DOORBELL64_ASSIGNMENT;
|
||||
|
||||
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
|
||||
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
|
||||
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
|
||||
void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
|
||||
|
||||
#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
|
||||
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
|
||||
#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
|
||||
#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
|
||||
|
@@ -454,9 +454,10 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
|
||||
|
||||
/**
|
||||
* DOC: param_buf_per_se (int)
|
||||
* Override the size of Off-Chip Pramater Cache per Shader Engine in Byte. The default is 0 (depending on gfx).
|
||||
* Override the size of Off-Chip Parameter Cache per Shader Engine in Byte.
|
||||
* The default is 0 (depending on gfx).
|
||||
*/
|
||||
MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");
|
||||
MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Parameter Cache per Shader Engine (default depending on gfx)");
|
||||
module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
|
||||
|
||||
/**
|
||||
@@ -1227,9 +1228,6 @@ static struct drm_driver kms_driver = {
|
||||
.patchlevel = KMS_DRIVER_PATCHLEVEL,
|
||||
};
|
||||
|
||||
static struct drm_driver *driver;
|
||||
static struct pci_driver *pdriver;
|
||||
|
||||
static struct pci_driver amdgpu_kms_pci_driver = {
|
||||
.name = DRIVER_NAME,
|
||||
.id_table = pciidlist,
|
||||
@@ -1259,16 +1257,14 @@ static int __init amdgpu_init(void)
|
||||
goto error_fence;
|
||||
|
||||
DRM_INFO("amdgpu kernel modesetting enabled.\n");
|
||||
driver = &kms_driver;
|
||||
pdriver = &amdgpu_kms_pci_driver;
|
||||
driver->num_ioctls = amdgpu_max_kms_ioctl;
|
||||
kms_driver.num_ioctls = amdgpu_max_kms_ioctl;
|
||||
amdgpu_register_atpx_handler();
|
||||
|
||||
/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
|
||||
amdgpu_amdkfd_init();
|
||||
|
||||
/* let modprobe override vga console setting */
|
||||
return pci_register_driver(pdriver);
|
||||
return pci_register_driver(&amdgpu_kms_pci_driver);
|
||||
|
||||
error_fence:
|
||||
amdgpu_sync_fini();
|
||||
@@ -1280,7 +1276,7 @@ error_sync:
|
||||
static void __exit amdgpu_exit(void)
|
||||
{
|
||||
amdgpu_amdkfd_fini();
|
||||
pci_unregister_driver(pdriver);
|
||||
pci_unregister_driver(&amdgpu_kms_pci_driver);
|
||||
amdgpu_unregister_atpx_handler();
|
||||
amdgpu_sync_fini();
|
||||
amdgpu_fence_slab_fini();
|
||||
|
@@ -398,9 +398,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
|
||||
ring->fence_drv.irq_type = irq_type;
|
||||
ring->fence_drv.initialized = true;
|
||||
|
||||
dev_dbg(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, "
|
||||
"cpu addr 0x%p\n", ring->idx,
|
||||
ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr);
|
||||
DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr "
|
||||
"0x%016llx, cpu addr 0x%p\n", ring->name,
|
||||
ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -248,7 +248,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
|
||||
}
|
||||
mb();
|
||||
amdgpu_asic_flush_hdp(adev, NULL);
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, 0);
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -259,6 +259,8 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
|
||||
* @offset: offset into the GPU's gart aperture
|
||||
* @pages: number of pages to bind
|
||||
* @dma_addr: DMA addresses of pages
|
||||
* @flags: page table entry flags
|
||||
* @dst: CPU address of the gart table
|
||||
*
|
||||
* Map the dma_addresses into GART entries (all asics).
|
||||
* Returns 0 for success, -EINVAL for failure.
|
||||
@@ -331,7 +333,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
|
||||
|
||||
mb();
|
||||
amdgpu_asic_flush_hdp(adev, NULL);
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, 0);
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -41,6 +41,7 @@ struct amdgpu_bo;
|
||||
|
||||
struct amdgpu_gart {
|
||||
struct amdgpu_bo *bo;
|
||||
/* CPU kmapped address of gart table */
|
||||
void *ptr;
|
||||
unsigned num_gpu_pages;
|
||||
unsigned num_cpu_pages;
|
||||
|
@@ -169,7 +169,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
|
||||
INIT_LIST_HEAD(&duplicates);
|
||||
|
||||
tv.bo = &bo->tbo;
|
||||
tv.shared = true;
|
||||
tv.num_shared = 1;
|
||||
list_add(&tv.head, &list);
|
||||
|
||||
amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
|
||||
@@ -604,7 +604,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
|
||||
return -ENOENT;
|
||||
abo = gem_to_amdgpu_bo(gobj);
|
||||
tv.bo = &abo->tbo;
|
||||
tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID);
|
||||
if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
|
||||
tv.num_shared = 1;
|
||||
else
|
||||
tv.num_shared = 0;
|
||||
list_add(&tv.head, &list);
|
||||
} else {
|
||||
gobj = NULL;
|
||||
|
@@ -54,6 +54,8 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
|
||||
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
|
||||
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
|
||||
|
||||
extern const struct dma_buf_ops amdgpu_dmabuf_ops;
|
||||
|
||||
/*
|
||||
* GEM objects.
|
||||
*/
|
||||
|
@@ -25,6 +25,7 @@
|
||||
#include <drm/drmP.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
#include "amdgpu_rlc.h"
|
||||
|
||||
/* delay 0.1 second to enable gfx off feature */
|
||||
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
|
||||
@@ -249,7 +250,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
|
||||
ring->adev = NULL;
|
||||
ring->ring_obj = NULL;
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
|
||||
ring->doorbell_index = adev->doorbell_index.kiq;
|
||||
|
||||
r = amdgpu_gfx_kiq_acquire(adev, ring);
|
||||
if (r)
|
||||
|
@@ -29,6 +29,7 @@
|
||||
*/
|
||||
#include "clearstate_defs.h"
|
||||
#include "amdgpu_ring.h"
|
||||
#include "amdgpu_rlc.h"
|
||||
|
||||
/* GFX current status */
|
||||
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
|
||||
@@ -37,59 +38,6 @@
|
||||
#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
|
||||
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
|
||||
|
||||
|
||||
struct amdgpu_rlc_funcs {
|
||||
void (*enter_safe_mode)(struct amdgpu_device *adev);
|
||||
void (*exit_safe_mode)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
struct amdgpu_rlc {
|
||||
/* for power gating */
|
||||
struct amdgpu_bo *save_restore_obj;
|
||||
uint64_t save_restore_gpu_addr;
|
||||
volatile uint32_t *sr_ptr;
|
||||
const u32 *reg_list;
|
||||
u32 reg_list_size;
|
||||
/* for clear state */
|
||||
struct amdgpu_bo *clear_state_obj;
|
||||
uint64_t clear_state_gpu_addr;
|
||||
volatile uint32_t *cs_ptr;
|
||||
const struct cs_section_def *cs_data;
|
||||
u32 clear_state_size;
|
||||
/* for cp tables */
|
||||
struct amdgpu_bo *cp_table_obj;
|
||||
uint64_t cp_table_gpu_addr;
|
||||
volatile uint32_t *cp_table_ptr;
|
||||
u32 cp_table_size;
|
||||
|
||||
/* safe mode for updating CG/PG state */
|
||||
bool in_safe_mode;
|
||||
const struct amdgpu_rlc_funcs *funcs;
|
||||
|
||||
/* for firmware data */
|
||||
u32 save_and_restore_offset;
|
||||
u32 clear_state_descriptor_offset;
|
||||
u32 avail_scratch_ram_locations;
|
||||
u32 reg_restore_list_size;
|
||||
u32 reg_list_format_start;
|
||||
u32 reg_list_format_separate_start;
|
||||
u32 starting_offsets_start;
|
||||
u32 reg_list_format_size_bytes;
|
||||
u32 reg_list_size_bytes;
|
||||
u32 reg_list_format_direct_reg_list_length;
|
||||
u32 save_restore_list_cntl_size_bytes;
|
||||
u32 save_restore_list_gpm_size_bytes;
|
||||
u32 save_restore_list_srm_size_bytes;
|
||||
|
||||
u32 *register_list_format;
|
||||
u32 *register_restore;
|
||||
u8 *save_restore_list_cntl;
|
||||
u8 *save_restore_list_gpm;
|
||||
u8 *save_restore_list_srm;
|
||||
|
||||
bool is_rlc_v2_1;
|
||||
};
|
||||
|
||||
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
|
||||
|
||||
struct amdgpu_mec {
|
||||
|
@@ -64,7 +64,7 @@ struct amdgpu_vmhub {
|
||||
struct amdgpu_gmc_funcs {
|
||||
/* flush the vm tlb via mmio */
|
||||
void (*flush_gpu_tlb)(struct amdgpu_device *adev,
|
||||
uint32_t vmid);
|
||||
uint32_t vmid, uint32_t flush_type);
|
||||
/* flush the vm tlb via ring */
|
||||
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
|
||||
uint64_t pd_addr);
|
||||
@@ -89,7 +89,7 @@ struct amdgpu_gmc_funcs {
|
||||
|
||||
struct amdgpu_xgmi {
|
||||
/* from psp */
|
||||
u64 device_id;
|
||||
u64 node_id;
|
||||
u64 hive_id;
|
||||
/* fixed per family */
|
||||
u64 node_segment_size;
|
||||
@@ -99,6 +99,7 @@ struct amdgpu_xgmi {
|
||||
unsigned num_physical_nodes;
|
||||
/* gpu list in the same hive */
|
||||
struct list_head head;
|
||||
bool supported;
|
||||
};
|
||||
|
||||
struct amdgpu_gmc {
|
||||
@@ -151,7 +152,7 @@ struct amdgpu_gmc {
|
||||
struct amdgpu_xgmi xgmi;
|
||||
};
|
||||
|
||||
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
|
||||
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
|
||||
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
|
||||
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
|
||||
#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
|
||||
|
@@ -146,7 +146,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
||||
fence_ctx = 0;
|
||||
}
|
||||
|
||||
if (!ring->ready) {
|
||||
if (!ring->sched.ready) {
|
||||
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
||||
!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
|
||||
continue;
|
||||
|
||||
amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0,
|
||||
need_ctx_switch);
|
||||
amdgpu_ring_emit_ib(ring, job, ib, need_ctx_switch);
|
||||
need_ctx_switch = false;
|
||||
}
|
||||
|
||||
@@ -347,19 +346,14 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
|
||||
tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
|
||||
}
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
for (i = 0; i < adev->num_rings; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
long tmo;
|
||||
|
||||
if (!ring || !ring->ready)
|
||||
continue;
|
||||
|
||||
/* skip IB tests for KIQ in general for the below reasons:
|
||||
* 1. We never submit IBs to the KIQ
|
||||
* 2. KIQ doesn't use the EOP interrupts,
|
||||
* we use some other CP interrupt.
|
||||
/* KIQ rings don't have an IB test because we never submit IBs
|
||||
* to them and they have no interrupt support.
|
||||
*/
|
||||
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
|
||||
if (!ring->sched.ready || !ring->funcs->test_ib)
|
||||
continue;
|
||||
|
||||
/* MM engine need more time */
|
||||
@@ -374,20 +368,23 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
|
||||
tmo = tmo_gfx;
|
||||
|
||||
r = amdgpu_ring_test_ib(ring, tmo);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
if (!r) {
|
||||
DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n",
|
||||
ring->name);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ring == &adev->gfx.gfx_ring[0]) {
|
||||
/* oh, oh, that's really bad */
|
||||
DRM_ERROR("amdgpu: failed testing IB on GFX ring (%d).\n", r);
|
||||
adev->accel_working = false;
|
||||
return r;
|
||||
ring->sched.ready = false;
|
||||
DRM_DEV_ERROR(adev->dev, "IB test failed on %s (%d).\n",
|
||||
ring->name, r);
|
||||
|
||||
} else {
|
||||
/* still not good, but we can live with it */
|
||||
DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r);
|
||||
ret = r;
|
||||
}
|
||||
if (ring == &adev->gfx.gfx_ring[0]) {
|
||||
/* oh, oh, that's really bad */
|
||||
adev->accel_working = false;
|
||||
return r;
|
||||
|
||||
} else {
|
||||
ret = r;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
@@ -51,14 +51,12 @@ struct amdgpu_ih_ring {
|
||||
struct amdgpu_ih_funcs {
|
||||
/* ring read/write ptr handling, called from interrupt context */
|
||||
u32 (*get_wptr)(struct amdgpu_device *adev);
|
||||
bool (*prescreen_iv)(struct amdgpu_device *adev);
|
||||
void (*decode_iv)(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
void (*set_rptr)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
|
||||
#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
|
||||
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
|
||||
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
|
||||
|
||||
|
@@ -93,23 +93,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
|
||||
drm_helper_hpd_irq_event(dev);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_irq_reset_work_func - execute GPU reset
|
||||
*
|
||||
* @work: work struct pointer
|
||||
*
|
||||
* Execute scheduled GPU reset (Cayman+).
|
||||
* This function is called when the IRQ handler thinks we need a GPU reset.
|
||||
*/
|
||||
static void amdgpu_irq_reset_work_func(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
|
||||
reset_work);
|
||||
|
||||
if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev))
|
||||
amdgpu_device_gpu_recover(adev, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_irq_disable_all - disable *all* interrupts
|
||||
*
|
||||
@@ -162,13 +145,6 @@ static void amdgpu_irq_callback(struct amdgpu_device *adev,
|
||||
u32 ring_index = ih->rptr >> 2;
|
||||
struct amdgpu_iv_entry entry;
|
||||
|
||||
/* Prescreening of high-frequency interrupts */
|
||||
if (!amdgpu_ih_prescreen_iv(adev))
|
||||
return;
|
||||
|
||||
/* Before dispatching irq to IP blocks, send it to amdkfd */
|
||||
amdgpu_amdkfd_interrupt(adev, (const void *) &ih->ring[ring_index]);
|
||||
|
||||
entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
|
||||
amdgpu_ih_decode_iv(adev, &entry);
|
||||
|
||||
@@ -262,15 +238,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
|
||||
amdgpu_hotplug_work_func);
|
||||
}
|
||||
|
||||
INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func);
|
||||
|
||||
adev->irq.installed = true;
|
||||
r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
|
||||
if (r) {
|
||||
adev->irq.installed = false;
|
||||
if (!amdgpu_device_has_dc_support(adev))
|
||||
flush_work(&adev->hotplug_work);
|
||||
cancel_work_sync(&adev->reset_work);
|
||||
return r;
|
||||
}
|
||||
adev->ddev->max_vblank_count = 0x00ffffff;
|
||||
@@ -299,7 +272,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
|
||||
pci_disable_msi(adev->pdev);
|
||||
if (!amdgpu_device_has_dc_support(adev))
|
||||
flush_work(&adev->hotplug_work);
|
||||
cancel_work_sync(&adev->reset_work);
|
||||
}
|
||||
|
||||
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
|
||||
@@ -392,39 +364,38 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
|
||||
unsigned client_id = entry->client_id;
|
||||
unsigned src_id = entry->src_id;
|
||||
struct amdgpu_irq_src *src;
|
||||
bool handled = false;
|
||||
int r;
|
||||
|
||||
trace_amdgpu_iv(entry);
|
||||
|
||||
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
|
||||
DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
|
||||
return;
|
||||
}
|
||||
|
||||
if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
|
||||
} else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
|
||||
DRM_DEBUG("Invalid src_id in IV: %d\n", src_id);
|
||||
return;
|
||||
}
|
||||
|
||||
if (adev->irq.virq[src_id]) {
|
||||
} else if (adev->irq.virq[src_id]) {
|
||||
generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id));
|
||||
} else {
|
||||
if (!adev->irq.client[client_id].sources) {
|
||||
DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
|
||||
client_id, src_id);
|
||||
return;
|
||||
}
|
||||
|
||||
src = adev->irq.client[client_id].sources[src_id];
|
||||
if (!src) {
|
||||
DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
|
||||
return;
|
||||
}
|
||||
} else if (!adev->irq.client[client_id].sources) {
|
||||
DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
|
||||
client_id, src_id);
|
||||
|
||||
} else if ((src = adev->irq.client[client_id].sources[src_id])) {
|
||||
r = src->funcs->process(adev, src, entry);
|
||||
if (r)
|
||||
if (r < 0)
|
||||
DRM_ERROR("error processing interrupt (%d)\n", r);
|
||||
else if (r)
|
||||
handled = true;
|
||||
|
||||
} else {
|
||||
DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
|
||||
}
|
||||
|
||||
/* Send it to amdkfd as well if it isn't already handled */
|
||||
if (!handled)
|
||||
amdgpu_amdkfd_interrupt(adev, entry->iv_entry);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -112,6 +112,8 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
|
||||
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
|
||||
struct amdgpu_job *job = to_amdgpu_job(s_job);
|
||||
|
||||
drm_sched_job_cleanup(s_job);
|
||||
|
||||
amdgpu_ring_priority_put(ring, s_job->s_priority);
|
||||
dma_fence_put(job->fence);
|
||||
amdgpu_sync_free(&job->sync);
|
||||
|
@@ -33,6 +33,8 @@
|
||||
#define to_amdgpu_job(sched_job) \
|
||||
container_of((sched_job), struct amdgpu_job, base)
|
||||
|
||||
#define AMDGPU_JOB_GET_VMID(job) ((job) ? (job)->vmid : 0)
|
||||
|
||||
struct amdgpu_fence;
|
||||
|
||||
struct amdgpu_job {
|
||||
|
@@ -336,7 +336,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
case AMDGPU_HW_IP_GFX:
|
||||
type = AMD_IP_BLOCK_TYPE_GFX;
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
if (adev->gfx.gfx_ring[i].ready)
|
||||
if (adev->gfx.gfx_ring[i].sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 32;
|
||||
ib_size_alignment = 32;
|
||||
@@ -344,7 +344,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
case AMDGPU_HW_IP_COMPUTE:
|
||||
type = AMD_IP_BLOCK_TYPE_GFX;
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
if (adev->gfx.compute_ring[i].ready)
|
||||
if (adev->gfx.compute_ring[i].sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 32;
|
||||
ib_size_alignment = 32;
|
||||
@@ -352,7 +352,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
case AMDGPU_HW_IP_DMA:
|
||||
type = AMD_IP_BLOCK_TYPE_SDMA;
|
||||
for (i = 0; i < adev->sdma.num_instances; i++)
|
||||
if (adev->sdma.instance[i].ring.ready)
|
||||
if (adev->sdma.instance[i].ring.sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 256;
|
||||
ib_size_alignment = 4;
|
||||
@@ -363,7 +363,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
if (adev->uvd.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
if (adev->uvd.inst[i].ring.ready)
|
||||
if (adev->uvd.inst[i].ring.sched.ready)
|
||||
++num_rings;
|
||||
}
|
||||
ib_start_alignment = 64;
|
||||
@@ -372,7 +372,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
case AMDGPU_HW_IP_VCE:
|
||||
type = AMD_IP_BLOCK_TYPE_VCE;
|
||||
for (i = 0; i < adev->vce.num_rings; i++)
|
||||
if (adev->vce.ring[i].ready)
|
||||
if (adev->vce.ring[i].sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 4;
|
||||
ib_size_alignment = 1;
|
||||
@@ -384,7 +384,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
continue;
|
||||
|
||||
for (j = 0; j < adev->uvd.num_enc_rings; j++)
|
||||
if (adev->uvd.inst[i].ring_enc[j].ready)
|
||||
if (adev->uvd.inst[i].ring_enc[j].sched.ready)
|
||||
++num_rings;
|
||||
}
|
||||
ib_start_alignment = 64;
|
||||
@@ -392,7 +392,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
break;
|
||||
case AMDGPU_HW_IP_VCN_DEC:
|
||||
type = AMD_IP_BLOCK_TYPE_VCN;
|
||||
if (adev->vcn.ring_dec.ready)
|
||||
if (adev->vcn.ring_dec.sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 16;
|
||||
ib_size_alignment = 16;
|
||||
@@ -400,14 +400,14 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
case AMDGPU_HW_IP_VCN_ENC:
|
||||
type = AMD_IP_BLOCK_TYPE_VCN;
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; i++)
|
||||
if (adev->vcn.ring_enc[i].ready)
|
||||
if (adev->vcn.ring_enc[i].sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 64;
|
||||
ib_size_alignment = 1;
|
||||
break;
|
||||
case AMDGPU_HW_IP_VCN_JPEG:
|
||||
type = AMD_IP_BLOCK_TYPE_VCN;
|
||||
if (adev->vcn.ring_jpeg.ready)
|
||||
if (adev->vcn.ring_jpeg.sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 16;
|
||||
ib_size_alignment = 16;
|
||||
@@ -978,7 +978,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
|
||||
}
|
||||
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va);
|
||||
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
|
||||
|
||||
r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
|
||||
&fpriv->csa_va, csa_addr, AMDGPU_CSA_SIZE);
|
||||
if (r)
|
||||
goto error_vm;
|
||||
}
|
||||
@@ -1048,8 +1051,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
|
||||
pasid = fpriv->vm.pasid;
|
||||
pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
|
||||
|
||||
amdgpu_vm_fini(adev, &fpriv->vm);
|
||||
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
|
||||
amdgpu_vm_fini(adev, &fpriv->vm);
|
||||
|
||||
if (pasid)
|
||||
amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
|
||||
|
@@ -38,7 +38,6 @@
|
||||
#include <drm/drm_crtc_helper.h>
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#include <drm/drm_plane_helper.h>
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#include <linux/i2c.h>
|
||||
#include <linux/i2c-algo-bit.h>
|
||||
#include <linux/hrtimer.h>
|
||||
@@ -57,7 +56,6 @@ struct amdgpu_hpd;
|
||||
#define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
|
||||
#define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base)
|
||||
#define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base)
|
||||
#define to_amdgpu_plane(x) container_of(x, struct amdgpu_plane, base)
|
||||
|
||||
#define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base);
|
||||
|
||||
@@ -295,13 +293,6 @@ struct amdgpu_display_funcs {
|
||||
uint16_t connector_object_id,
|
||||
struct amdgpu_hpd *hpd,
|
||||
struct amdgpu_router *router);
|
||||
/* it is used to enter or exit into free sync mode */
|
||||
int (*notify_freesync)(struct drm_device *dev, void *data,
|
||||
struct drm_file *filp);
|
||||
/* it is used to allow enablement of freesync mode */
|
||||
int (*set_freesync_property)(struct drm_connector *connector,
|
||||
struct drm_property *property,
|
||||
uint64_t val);
|
||||
|
||||
|
||||
};
|
||||
@@ -325,7 +316,7 @@ struct amdgpu_mode_info {
|
||||
struct card_info *atom_card_info;
|
||||
bool mode_config_initialized;
|
||||
struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS];
|
||||
struct amdgpu_plane *planes[AMDGPU_MAX_PLANES];
|
||||
struct drm_plane *planes[AMDGPU_MAX_PLANES];
|
||||
struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS];
|
||||
/* DVI-I properties */
|
||||
struct drm_property *coherent_mode_property;
|
||||
@@ -341,6 +332,8 @@ struct amdgpu_mode_info {
|
||||
struct drm_property *dither_property;
|
||||
/* maximum number of bits per channel for monitor color */
|
||||
struct drm_property *max_bpc_property;
|
||||
/* Adaptive Backlight Modulation (power feature) */
|
||||
struct drm_property *abm_level_property;
|
||||
/* hardcoded DFP edid from BIOS */
|
||||
struct edid *bios_hardcoded_edid;
|
||||
int bios_hardcoded_edid_size;
|
||||
@@ -436,11 +429,6 @@ struct amdgpu_crtc {
|
||||
struct drm_pending_vblank_event *event;
|
||||
};
|
||||
|
||||
struct amdgpu_plane {
|
||||
struct drm_plane base;
|
||||
enum drm_plane_type plane_type;
|
||||
};
|
||||
|
||||
struct amdgpu_encoder_atom_dig {
|
||||
bool linkb;
|
||||
/* atom dig */
|
||||
|
@@ -81,7 +81,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
|
||||
amdgpu_bo_subtract_pin_size(bo);
|
||||
|
||||
if (bo->kfd_bo)
|
||||
amdgpu_amdkfd_unreserve_system_memory_limit(bo);
|
||||
amdgpu_amdkfd_unreserve_memory_limit(bo);
|
||||
|
||||
amdgpu_bo_kunmap(bo);
|
||||
|
||||
@@ -607,53 +607,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object
|
||||
* @adev: amdgpu device object
|
||||
* @ring: amdgpu_ring for the engine handling the buffer operations
|
||||
* @bo: &amdgpu_bo buffer to be backed up
|
||||
* @resv: reservation object with embedded fence
|
||||
* @fence: dma_fence associated with the operation
|
||||
* @direct: whether to submit the job directly
|
||||
*
|
||||
* Copies an &amdgpu_bo buffer object to its shadow object.
|
||||
* Not used for now.
|
||||
*
|
||||
* Returns:
|
||||
* 0 for success or a negative error code on failure.
|
||||
*/
|
||||
int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring,
|
||||
struct amdgpu_bo *bo,
|
||||
struct reservation_object *resv,
|
||||
struct dma_fence **fence,
|
||||
bool direct)
|
||||
|
||||
{
|
||||
struct amdgpu_bo *shadow = bo->shadow;
|
||||
uint64_t bo_addr, shadow_addr;
|
||||
int r;
|
||||
|
||||
if (!shadow)
|
||||
return -EINVAL;
|
||||
|
||||
bo_addr = amdgpu_bo_gpu_offset(bo);
|
||||
shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
|
||||
|
||||
r = reservation_object_reserve_shared(bo->tbo.resv);
|
||||
if (r)
|
||||
goto err;
|
||||
|
||||
r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr,
|
||||
amdgpu_bo_size(bo), resv, fence,
|
||||
direct, false);
|
||||
if (!r)
|
||||
amdgpu_bo_fence(bo, *fence, true);
|
||||
|
||||
err:
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_bo_validate - validate an &amdgpu_bo buffer object
|
||||
* @bo: pointer to the buffer object
|
||||
|
@@ -267,11 +267,6 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
|
||||
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
|
||||
bool shared);
|
||||
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
|
||||
int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring,
|
||||
struct amdgpu_bo *bo,
|
||||
struct reservation_object *resv,
|
||||
struct dma_fence **fence, bool direct);
|
||||
int amdgpu_bo_validate(struct amdgpu_bo *bo);
|
||||
int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
|
||||
struct dma_fence **fence);
|
||||
|
@@ -33,6 +33,8 @@
|
||||
#include <linux/hwmon.h>
|
||||
#include <linux/hwmon-sysfs.h>
|
||||
#include <linux/nospec.h>
|
||||
#include "hwmgr.h"
|
||||
#define WIDTH_4K 3840
|
||||
|
||||
static int amdgpu_debugfs_pm_init(struct amdgpu_device *adev);
|
||||
|
||||
@@ -1642,6 +1644,19 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
|
||||
attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
|
||||
return 0;
|
||||
|
||||
/* Skip fan attributes on APU */
|
||||
if ((adev->flags & AMD_IS_APU) &&
|
||||
(attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_fan1_input.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_fan1_min.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_fan1_target.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
|
||||
return 0;
|
||||
|
||||
/* Skip limit attributes if DPM is not enabled */
|
||||
if (!adev->pm.dpm_enabled &&
|
||||
(attr == &sensor_dev_attr_temp1_crit.dev_attr.attr ||
|
||||
@@ -1956,6 +1971,17 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
|
||||
mutex_unlock(&adev->pm.mutex);
|
||||
}
|
||||
/* enable/disable Low Memory PState for UVD (4k videos) */
|
||||
if (adev->asic_type == CHIP_STONEY &&
|
||||
adev->uvd.decode_image_width >= WIDTH_4K) {
|
||||
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
|
||||
|
||||
if (hwmgr && hwmgr->hwmgr_func &&
|
||||
hwmgr->hwmgr_func->update_nbdpm_pstate)
|
||||
hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr,
|
||||
!enable,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
|
||||
@@ -2129,7 +2155,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
if (ring && ring->ready)
|
||||
if (ring && ring->sched.ready)
|
||||
amdgpu_fence_wait_empty(ring);
|
||||
}
|
||||
|
||||
|
@@ -39,8 +39,6 @@
|
||||
#include <drm/amdgpu_drm.h>
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
static const struct dma_buf_ops amdgpu_dmabuf_ops;
|
||||
|
||||
/**
|
||||
* amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
|
||||
* implementation
|
||||
@@ -332,15 +330,13 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct dma_buf_ops amdgpu_dmabuf_ops = {
|
||||
const struct dma_buf_ops amdgpu_dmabuf_ops = {
|
||||
.attach = amdgpu_gem_map_attach,
|
||||
.detach = amdgpu_gem_map_detach,
|
||||
.map_dma_buf = drm_gem_map_dma_buf,
|
||||
.unmap_dma_buf = drm_gem_unmap_dma_buf,
|
||||
.release = drm_gem_dmabuf_release,
|
||||
.begin_cpu_access = amdgpu_gem_begin_cpu_access,
|
||||
.map = drm_gem_dmabuf_kmap,
|
||||
.unmap = drm_gem_dmabuf_kunmap,
|
||||
.mmap = drm_gem_dmabuf_mmap,
|
||||
.vmap = drm_gem_dmabuf_vmap,
|
||||
.vunmap = drm_gem_dmabuf_vunmap,
|
||||
|
@@ -90,6 +90,8 @@ static int psp_sw_fini(void *handle)
|
||||
adev->psp.sos_fw = NULL;
|
||||
release_firmware(adev->psp.asd_fw);
|
||||
adev->psp.asd_fw = NULL;
|
||||
release_firmware(adev->psp.ta_fw);
|
||||
adev->psp.ta_fw = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -118,22 +120,26 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
|
||||
static int
|
||||
psp_cmd_submit_buf(struct psp_context *psp,
|
||||
struct amdgpu_firmware_info *ucode,
|
||||
struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr,
|
||||
int index)
|
||||
struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr)
|
||||
{
|
||||
int ret;
|
||||
int index;
|
||||
|
||||
memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
|
||||
|
||||
memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
|
||||
|
||||
index = atomic_inc_return(&psp->fence_value);
|
||||
ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr,
|
||||
fence_mc_addr, index);
|
||||
|
||||
while (*((unsigned int *)psp->fence_buf) != index) {
|
||||
msleep(1);
|
||||
if (ret) {
|
||||
atomic_dec(&psp->fence_value);
|
||||
return ret;
|
||||
}
|
||||
|
||||
while (*((unsigned int *)psp->fence_buf) != index)
|
||||
msleep(1);
|
||||
|
||||
/* the status field must be 0 after FW is loaded */
|
||||
if (ucode && psp->cmd_buf_mem->resp.status) {
|
||||
DRM_ERROR("failed loading with status (%d) and ucode id (%d)\n",
|
||||
@@ -149,10 +155,22 @@ psp_cmd_submit_buf(struct psp_context *psp,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
||||
bool psp_support_vmr_ring(struct psp_context *psp)
|
||||
{
|
||||
if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
|
||||
struct psp_gfx_cmd_resp *cmd,
|
||||
uint64_t tmr_mc, uint32_t size)
|
||||
{
|
||||
cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
|
||||
if (psp_support_vmr_ring(psp))
|
||||
cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
|
||||
else
|
||||
cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
|
||||
cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc);
|
||||
cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc);
|
||||
cmd->cmd.cmd_setup_tmr.buf_size = size;
|
||||
@@ -186,12 +204,12 @@ static int psp_tmr_load(struct psp_context *psp)
|
||||
if (!cmd)
|
||||
return -ENOMEM;
|
||||
|
||||
psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, PSP_TMR_SIZE);
|
||||
psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, PSP_TMR_SIZE);
|
||||
DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n",
|
||||
PSP_TMR_SIZE, psp->tmr_mc_addr);
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
psp->fence_buf_mc_addr, 1);
|
||||
psp->fence_buf_mc_addr);
|
||||
if (ret)
|
||||
goto failed;
|
||||
|
||||
@@ -258,13 +276,194 @@ static int psp_asd_load(struct psp_context *psp)
|
||||
psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE);
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
psp->fence_buf_mc_addr, 2);
|
||||
psp->fence_buf_mc_addr);
|
||||
|
||||
kfree(cmd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
||||
uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
|
||||
uint32_t xgmi_ta_size, uint32_t shared_size)
|
||||
{
|
||||
cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
|
||||
cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(xgmi_ta_mc);
|
||||
cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(xgmi_ta_mc);
|
||||
cmd->cmd.cmd_load_ta.app_len = xgmi_ta_size;
|
||||
|
||||
cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(xgmi_mc_shared);
|
||||
cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(xgmi_mc_shared);
|
||||
cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
|
||||
}
|
||||
|
||||
static int psp_xgmi_init_shared_buf(struct psp_context *psp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Allocate 16k memory aligned to 4k from Frame Buffer (local
|
||||
* physical) for xgmi ta <-> Driver
|
||||
*/
|
||||
ret = amdgpu_bo_create_kernel(psp->adev, PSP_XGMI_SHARED_MEM_SIZE,
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&psp->xgmi_context.xgmi_shared_bo,
|
||||
&psp->xgmi_context.xgmi_shared_mc_addr,
|
||||
&psp->xgmi_context.xgmi_shared_buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_xgmi_load(struct psp_context *psp)
|
||||
{
|
||||
int ret;
|
||||
struct psp_gfx_cmd_resp *cmd;
|
||||
|
||||
/*
|
||||
* TODO: bypass the loading in sriov for now
|
||||
*/
|
||||
if (amdgpu_sriov_vf(psp->adev))
|
||||
return 0;
|
||||
|
||||
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
|
||||
if (!cmd)
|
||||
return -ENOMEM;
|
||||
|
||||
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
|
||||
memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size);
|
||||
|
||||
psp_prep_xgmi_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
|
||||
psp->xgmi_context.xgmi_shared_mc_addr,
|
||||
psp->ta_xgmi_ucode_size, PSP_XGMI_SHARED_MEM_SIZE);
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
psp->fence_buf_mc_addr);
|
||||
|
||||
if (!ret) {
|
||||
psp->xgmi_context.initialized = 1;
|
||||
psp->xgmi_context.session_id = cmd->resp.session_id;
|
||||
}
|
||||
|
||||
kfree(cmd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void psp_prep_xgmi_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
||||
uint32_t xgmi_session_id)
|
||||
{
|
||||
cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
|
||||
cmd->cmd.cmd_unload_ta.session_id = xgmi_session_id;
|
||||
}
|
||||
|
||||
static int psp_xgmi_unload(struct psp_context *psp)
|
||||
{
|
||||
int ret;
|
||||
struct psp_gfx_cmd_resp *cmd;
|
||||
|
||||
/*
|
||||
* TODO: bypass the unloading in sriov for now
|
||||
*/
|
||||
if (amdgpu_sriov_vf(psp->adev))
|
||||
return 0;
|
||||
|
||||
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
|
||||
if (!cmd)
|
||||
return -ENOMEM;
|
||||
|
||||
psp_prep_xgmi_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id);
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
psp->fence_buf_mc_addr);
|
||||
|
||||
kfree(cmd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void psp_prep_xgmi_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
||||
uint32_t ta_cmd_id,
|
||||
uint32_t xgmi_session_id)
|
||||
{
|
||||
cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
|
||||
cmd->cmd.cmd_invoke_cmd.session_id = xgmi_session_id;
|
||||
cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
|
||||
/* Note: cmd_invoke_cmd.buf is not used for now */
|
||||
}
|
||||
|
||||
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
|
||||
{
|
||||
int ret;
|
||||
struct psp_gfx_cmd_resp *cmd;
|
||||
|
||||
/*
|
||||
* TODO: bypass the loading in sriov for now
|
||||
*/
|
||||
if (amdgpu_sriov_vf(psp->adev))
|
||||
return 0;
|
||||
|
||||
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
|
||||
if (!cmd)
|
||||
return -ENOMEM;
|
||||
|
||||
psp_prep_xgmi_ta_invoke_cmd_buf(cmd, ta_cmd_id,
|
||||
psp->xgmi_context.session_id);
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
psp->fence_buf_mc_addr);
|
||||
|
||||
kfree(cmd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_xgmi_terminate(struct psp_context *psp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!psp->xgmi_context.initialized)
|
||||
return 0;
|
||||
|
||||
ret = psp_xgmi_unload(psp);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
psp->xgmi_context.initialized = 0;
|
||||
|
||||
/* free xgmi shared memory */
|
||||
amdgpu_bo_free_kernel(&psp->xgmi_context.xgmi_shared_bo,
|
||||
&psp->xgmi_context.xgmi_shared_mc_addr,
|
||||
&psp->xgmi_context.xgmi_shared_buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int psp_xgmi_initialize(struct psp_context *psp)
|
||||
{
|
||||
struct ta_xgmi_shared_memory *xgmi_cmd;
|
||||
int ret;
|
||||
|
||||
if (!psp->xgmi_context.initialized) {
|
||||
ret = psp_xgmi_init_shared_buf(psp);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Load XGMI TA */
|
||||
ret = psp_xgmi_load(psp);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Initialize XGMI session */
|
||||
xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.xgmi_shared_buf);
|
||||
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
|
||||
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE;
|
||||
|
||||
ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_hw_start(struct psp_context *psp)
|
||||
{
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
@@ -292,6 +491,15 @@ static int psp_hw_start(struct psp_context *psp)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1) {
|
||||
ret = psp_xgmi_initialize(psp);
|
||||
/* Warning the XGMI seesion initialize failure
|
||||
* Instead of stop driver initialization
|
||||
*/
|
||||
if (ret)
|
||||
dev_err(psp->adev->dev,
|
||||
"XGMI: Failed to initialize XGMI session\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -321,7 +529,7 @@ static int psp_np_fw_load(struct psp_context *psp)
|
||||
return ret;
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, ucode, psp->cmd,
|
||||
psp->fence_buf_mc_addr, i + 3);
|
||||
psp->fence_buf_mc_addr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -340,8 +548,10 @@ static int psp_load_fw(struct amdgpu_device *adev)
|
||||
int ret;
|
||||
struct psp_context *psp = &adev->psp;
|
||||
|
||||
if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset != 0)
|
||||
if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) {
|
||||
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
|
||||
goto skip_memalloc;
|
||||
}
|
||||
|
||||
psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
|
||||
if (!psp->cmd)
|
||||
@@ -452,6 +662,10 @@ static int psp_hw_fini(void *handle)
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
|
||||
return 0;
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
|
||||
psp->xgmi_context.initialized == 1)
|
||||
psp_xgmi_terminate(psp);
|
||||
|
||||
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
|
||||
|
||||
amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
|
||||
@@ -479,6 +693,15 @@ static int psp_suspend(void *handle)
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
|
||||
return 0;
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
|
||||
psp->xgmi_context.initialized == 1) {
|
||||
ret = psp_xgmi_terminate(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to terminate xgmi ta\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP ring stop failed\n");
|
||||
|
@@ -27,14 +27,17 @@
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "psp_gfx_if.h"
|
||||
#include "ta_xgmi_if.h"
|
||||
|
||||
#define PSP_FENCE_BUFFER_SIZE 0x1000
|
||||
#define PSP_CMD_BUFFER_SIZE 0x1000
|
||||
#define PSP_ASD_SHARED_MEM_SIZE 0x4000
|
||||
#define PSP_ASD_SHARED_MEM_SIZE 0x4000
|
||||
#define PSP_XGMI_SHARED_MEM_SIZE 0x4000
|
||||
#define PSP_1_MEG 0x100000
|
||||
#define PSP_TMR_SIZE 0x400000
|
||||
|
||||
struct psp_context;
|
||||
struct psp_xgmi_node_info;
|
||||
struct psp_xgmi_topology_info;
|
||||
|
||||
enum psp_ring_type
|
||||
@@ -80,12 +83,20 @@ struct psp_funcs
|
||||
enum AMDGPU_UCODE_ID ucode_type);
|
||||
bool (*smu_reload_quirk)(struct psp_context *psp);
|
||||
int (*mode1_reset)(struct psp_context *psp);
|
||||
uint64_t (*xgmi_get_device_id)(struct psp_context *psp);
|
||||
uint64_t (*xgmi_get_node_id)(struct psp_context *psp);
|
||||
uint64_t (*xgmi_get_hive_id)(struct psp_context *psp);
|
||||
int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices,
|
||||
struct psp_xgmi_topology_info *topology);
|
||||
struct psp_xgmi_topology_info *topology);
|
||||
int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices,
|
||||
struct psp_xgmi_topology_info *topology);
|
||||
struct psp_xgmi_topology_info *topology);
|
||||
};
|
||||
|
||||
struct psp_xgmi_context {
|
||||
uint8_t initialized;
|
||||
uint32_t session_id;
|
||||
struct amdgpu_bo *xgmi_shared_bo;
|
||||
uint64_t xgmi_shared_mc_addr;
|
||||
void *xgmi_shared_buf;
|
||||
};
|
||||
|
||||
struct psp_context
|
||||
@@ -96,7 +107,7 @@ struct psp_context
|
||||
|
||||
const struct psp_funcs *funcs;
|
||||
|
||||
/* fence buffer */
|
||||
/* firmware buffer */
|
||||
struct amdgpu_bo *fw_pri_bo;
|
||||
uint64_t fw_pri_mc_addr;
|
||||
void *fw_pri_buf;
|
||||
@@ -134,6 +145,16 @@ struct psp_context
|
||||
struct amdgpu_bo *cmd_buf_bo;
|
||||
uint64_t cmd_buf_mc_addr;
|
||||
struct psp_gfx_cmd_resp *cmd_buf_mem;
|
||||
|
||||
/* fence value associated with cmd buffer */
|
||||
atomic_t fence_value;
|
||||
|
||||
/* xgmi ta firmware and buffer */
|
||||
const struct firmware *ta_fw;
|
||||
uint32_t ta_xgmi_ucode_version;
|
||||
uint32_t ta_xgmi_ucode_size;
|
||||
uint8_t *ta_xgmi_start_addr;
|
||||
struct psp_xgmi_context xgmi_context;
|
||||
};
|
||||
|
||||
struct amdgpu_psp_funcs {
|
||||
@@ -141,21 +162,17 @@ struct amdgpu_psp_funcs {
|
||||
enum AMDGPU_UCODE_ID);
|
||||
};
|
||||
|
||||
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
|
||||
struct psp_xgmi_node_info {
|
||||
uint64_t node_id;
|
||||
uint8_t num_hops;
|
||||
uint8_t is_sharing_enabled;
|
||||
enum ta_xgmi_assigned_sdma_engine sdma_engine;
|
||||
};
|
||||
|
||||
struct psp_xgmi_topology_info {
|
||||
/* Generated by PSP to identify the GPU instance within xgmi connection */
|
||||
uint64_t device_id;
|
||||
/*
|
||||
* If all bits set to 0 , driver indicates it wants to retrieve the xgmi
|
||||
* connection vector topology, but not access enable the connections
|
||||
* if some or all bits are set to 1, driver indicates it want to retrieve the
|
||||
* current xgmi topology and access enable the link to GPU[i] associated
|
||||
* with the bit position in the vector.
|
||||
* On return,: bits indicated which xgmi links are present/active depending
|
||||
* on the value passed in. The relative bit offset for the relative GPU index
|
||||
* within the hive is always marked active.
|
||||
*/
|
||||
uint32_t connection_mask;
|
||||
uint32_t reserved; /* must be 0 */
|
||||
uint32_t num_nodes;
|
||||
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
|
||||
};
|
||||
|
||||
#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
|
||||
@@ -177,8 +194,8 @@ struct psp_xgmi_topology_info {
|
||||
((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
|
||||
#define psp_mode1_reset(psp) \
|
||||
((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
|
||||
#define psp_xgmi_get_device_id(psp) \
|
||||
((psp)->funcs->xgmi_get_device_id ? (psp)->funcs->xgmi_get_device_id((psp)) : 0)
|
||||
#define psp_xgmi_get_node_id(psp) \
|
||||
((psp)->funcs->xgmi_get_node_id ? (psp)->funcs->xgmi_get_node_id((psp)) : 0)
|
||||
#define psp_xgmi_get_hive_id(psp) \
|
||||
((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0)
|
||||
#define psp_xgmi_get_topology_info(psp, num_device, topology) \
|
||||
@@ -199,6 +216,9 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
|
||||
extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
|
||||
|
||||
int psp_gpu_reset(struct amdgpu_device *adev);
|
||||
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
|
||||
bool psp_support_vmr_ring(struct psp_context *psp);
|
||||
|
||||
extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
|
||||
|
||||
#endif
|
||||
|
@@ -338,7 +338,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
|
||||
*/
|
||||
void amdgpu_ring_fini(struct amdgpu_ring *ring)
|
||||
{
|
||||
ring->ready = false;
|
||||
ring->sched.ready = false;
|
||||
|
||||
/* Not to finish a ring which is not initialized */
|
||||
if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
|
||||
@@ -397,7 +397,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
|
||||
{
|
||||
ktime_t deadline = ktime_add_us(ktime_get(), 10000);
|
||||
|
||||
if (!ring->funcs->soft_recovery)
|
||||
if (!ring->funcs->soft_recovery || !fence)
|
||||
return false;
|
||||
|
||||
atomic_inc(&ring->adev->gpu_reset_counter);
|
||||
@@ -500,3 +500,29 @@ static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring)
|
||||
debugfs_remove(ring->ent);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_ring_test_helper - tests ring and set sched readiness status
|
||||
*
|
||||
* @ring: ring to try the recovery on
|
||||
*
|
||||
* Tests ring and set sched readiness status
|
||||
*
|
||||
* Returns 0 on success, error on failure.
|
||||
*/
|
||||
int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
int r;
|
||||
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r)
|
||||
DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n",
|
||||
ring->name, r);
|
||||
else
|
||||
DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n",
|
||||
ring->name);
|
||||
|
||||
ring->sched.ready = !r;
|
||||
return r;
|
||||
}
|
||||
|
@@ -129,8 +129,9 @@ struct amdgpu_ring_funcs {
|
||||
unsigned emit_ib_size;
|
||||
/* command emit functions */
|
||||
void (*emit_ib)(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch);
|
||||
bool ctx_switch);
|
||||
void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
|
||||
uint64_t seq, unsigned flags);
|
||||
void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
|
||||
@@ -189,7 +190,6 @@ struct amdgpu_ring {
|
||||
uint64_t gpu_addr;
|
||||
uint64_t ptr_mask;
|
||||
uint32_t buf_mask;
|
||||
bool ready;
|
||||
u32 idx;
|
||||
u32 me;
|
||||
u32 pipe;
|
||||
@@ -229,7 +229,7 @@ struct amdgpu_ring {
|
||||
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
|
||||
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
|
||||
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
|
||||
#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
|
||||
#define amdgpu_ring_emit_ib(r, job, ib, c) ((r)->funcs->emit_ib((r), (job), (ib), (c)))
|
||||
#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
|
||||
#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
|
||||
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
|
||||
@@ -313,4 +313,6 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
|
||||
ring->count_dw -= count_dw;
|
||||
}
|
||||
|
||||
int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
|
||||
|
||||
#endif
|
||||
|
282
drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
Normal file
282
drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
Normal file
@@ -0,0 +1,282 @@
|
||||
/*
|
||||
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||
* Copyright 2008 Red Hat Inc.
|
||||
* Copyright 2009 Jerome Glisse.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <linux/firmware.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
#include "amdgpu_rlc.h"
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Set RLC enter into safe mode if RLC is enabled and haven't in safe mode.
|
||||
*/
|
||||
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->gfx.rlc.in_safe_mode)
|
||||
return;
|
||||
|
||||
/* if RLC is not enabled, do nothing */
|
||||
if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
|
||||
return;
|
||||
|
||||
if (adev->cg_flags &
|
||||
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
|
||||
adev->gfx.rlc.funcs->set_safe_mode(adev);
|
||||
adev->gfx.rlc.in_safe_mode = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Set RLC exit safe mode if RLC is enabled and have entered into safe mode.
|
||||
*/
|
||||
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
if (!(adev->gfx.rlc.in_safe_mode))
|
||||
return;
|
||||
|
||||
/* if RLC is not enabled, do nothing */
|
||||
if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
|
||||
return;
|
||||
|
||||
if (adev->cg_flags &
|
||||
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
|
||||
adev->gfx.rlc.funcs->unset_safe_mode(adev);
|
||||
adev->gfx.rlc.in_safe_mode = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_init_sr - Init save restore block
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @dws: the size of save restore block
|
||||
*
|
||||
* Allocate and setup value to save restore block of rlc.
|
||||
* Returns 0 on succeess or negative error code if allocate failed.
|
||||
*/
|
||||
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
|
||||
{
|
||||
const u32 *src_ptr;
|
||||
volatile u32 *dst_ptr;
|
||||
u32 i;
|
||||
int r;
|
||||
|
||||
/* allocate save restore block */
|
||||
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.save_restore_obj,
|
||||
&adev->gfx.rlc.save_restore_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.sr_ptr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r);
|
||||
amdgpu_gfx_rlc_fini(adev);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* write the sr buffer */
|
||||
src_ptr = adev->gfx.rlc.reg_list;
|
||||
dst_ptr = adev->gfx.rlc.sr_ptr;
|
||||
for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
|
||||
dst_ptr[i] = cpu_to_le32(src_ptr[i]);
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_init_csb - Init clear state block
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Allocate and setup value to clear state block of rlc.
|
||||
* Returns 0 on succeess or negative error code if allocate failed.
|
||||
*/
|
||||
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
|
||||
{
|
||||
volatile u32 *dst_ptr;
|
||||
u32 dws;
|
||||
int r;
|
||||
|
||||
/* allocate clear state block */
|
||||
adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
|
||||
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.clear_state_obj,
|
||||
&adev->gfx.rlc.clear_state_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cs_ptr);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", r);
|
||||
amdgpu_gfx_rlc_fini(adev);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* set up the cs buffer */
|
||||
dst_ptr = adev->gfx.rlc.cs_ptr;
|
||||
adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
|
||||
amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_init_cpt - Init cp table
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Allocate and setup value to cp table of rlc.
|
||||
* Returns 0 on succeess or negative error code if allocate failed.
|
||||
*/
|
||||
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.cp_table_obj,
|
||||
&adev->gfx.rlc.cp_table_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cp_table_ptr);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to create cp table bo\n", r);
|
||||
amdgpu_gfx_rlc_fini(adev);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* set up the cp table */
|
||||
amdgpu_gfx_rlc_setup_cp_table(adev);
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_setup_cp_table - setup cp the buffer of cp table
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Write cp firmware data into cp table.
|
||||
*/
|
||||
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
|
||||
{
|
||||
const __le32 *fw_data;
|
||||
volatile u32 *dst_ptr;
|
||||
int me, i, max_me;
|
||||
u32 bo_offset = 0;
|
||||
u32 table_offset, table_size;
|
||||
|
||||
max_me = adev->gfx.rlc.funcs->get_cp_table_num(adev);
|
||||
|
||||
/* write the cp table buffer */
|
||||
dst_ptr = adev->gfx.rlc.cp_table_ptr;
|
||||
for (me = 0; me < max_me; me++) {
|
||||
if (me == 0) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.ce_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 1) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.pfp_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 2) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.me_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 3) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.mec_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 4) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.mec2_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
}
|
||||
|
||||
for (i = 0; i < table_size; i ++) {
|
||||
dst_ptr[bo_offset + i] =
|
||||
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
|
||||
}
|
||||
|
||||
bo_offset += table_size;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_fini - Free BO which used for RLC
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Free three BO which is used for rlc_save_restore_block, rlc_clear_state_block
|
||||
* and rlc_jump_table_block.
|
||||
*/
|
||||
void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
/* save restore block */
|
||||
if (adev->gfx.rlc.save_restore_obj) {
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj,
|
||||
&adev->gfx.rlc.save_restore_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.sr_ptr);
|
||||
}
|
||||
|
||||
/* clear state block */
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
|
||||
&adev->gfx.rlc.clear_state_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cs_ptr);
|
||||
|
||||
/* jump table block */
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
|
||||
&adev->gfx.rlc.cp_table_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cp_table_ptr);
|
||||
}
|
98
drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
Normal file
98
drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __AMDGPU_RLC_H__
|
||||
#define __AMDGPU_RLC_H__
|
||||
|
||||
#include "clearstate_defs.h"
|
||||
|
||||
struct amdgpu_rlc_funcs {
|
||||
bool (*is_rlc_enabled)(struct amdgpu_device *adev);
|
||||
void (*set_safe_mode)(struct amdgpu_device *adev);
|
||||
void (*unset_safe_mode)(struct amdgpu_device *adev);
|
||||
int (*init)(struct amdgpu_device *adev);
|
||||
u32 (*get_csb_size)(struct amdgpu_device *adev);
|
||||
void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
|
||||
int (*get_cp_table_num)(struct amdgpu_device *adev);
|
||||
int (*resume)(struct amdgpu_device *adev);
|
||||
void (*stop)(struct amdgpu_device *adev);
|
||||
void (*reset)(struct amdgpu_device *adev);
|
||||
void (*start)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
struct amdgpu_rlc {
|
||||
/* for power gating */
|
||||
struct amdgpu_bo *save_restore_obj;
|
||||
uint64_t save_restore_gpu_addr;
|
||||
volatile uint32_t *sr_ptr;
|
||||
const u32 *reg_list;
|
||||
u32 reg_list_size;
|
||||
/* for clear state */
|
||||
struct amdgpu_bo *clear_state_obj;
|
||||
uint64_t clear_state_gpu_addr;
|
||||
volatile uint32_t *cs_ptr;
|
||||
const struct cs_section_def *cs_data;
|
||||
u32 clear_state_size;
|
||||
/* for cp tables */
|
||||
struct amdgpu_bo *cp_table_obj;
|
||||
uint64_t cp_table_gpu_addr;
|
||||
volatile uint32_t *cp_table_ptr;
|
||||
u32 cp_table_size;
|
||||
|
||||
/* safe mode for updating CG/PG state */
|
||||
bool in_safe_mode;
|
||||
const struct amdgpu_rlc_funcs *funcs;
|
||||
|
||||
/* for firmware data */
|
||||
u32 save_and_restore_offset;
|
||||
u32 clear_state_descriptor_offset;
|
||||
u32 avail_scratch_ram_locations;
|
||||
u32 reg_restore_list_size;
|
||||
u32 reg_list_format_start;
|
||||
u32 reg_list_format_separate_start;
|
||||
u32 starting_offsets_start;
|
||||
u32 reg_list_format_size_bytes;
|
||||
u32 reg_list_size_bytes;
|
||||
u32 reg_list_format_direct_reg_list_length;
|
||||
u32 save_restore_list_cntl_size_bytes;
|
||||
u32 save_restore_list_gpm_size_bytes;
|
||||
u32 save_restore_list_srm_size_bytes;
|
||||
|
||||
u32 *register_list_format;
|
||||
u32 *register_restore;
|
||||
u8 *save_restore_list_cntl;
|
||||
u8 *save_restore_list_gpm;
|
||||
u8 *save_restore_list_srm;
|
||||
|
||||
bool is_rlc_v2_1;
|
||||
};
|
||||
|
||||
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev);
|
||||
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev);
|
||||
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws);
|
||||
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev);
|
||||
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev);
|
||||
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev);
|
||||
void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev);
|
||||
|
||||
#endif
|
@@ -28,17 +28,31 @@
|
||||
* GPU SDMA IP block helpers function.
|
||||
*/
|
||||
|
||||
struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
|
||||
struct amdgpu_sdma_instance *amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++)
|
||||
if (&adev->sdma.instance[i].ring == ring)
|
||||
break;
|
||||
if (ring == &adev->sdma.instance[i].ring ||
|
||||
ring == &adev->sdma.instance[i].page)
|
||||
return &adev->sdma.instance[i];
|
||||
|
||||
if (i < AMDGPU_MAX_SDMA_INSTANCES)
|
||||
return &adev->sdma.instance[i];
|
||||
else
|
||||
return NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
if (ring == &adev->sdma.instance[i].ring ||
|
||||
ring == &adev->sdma.instance[i].page) {
|
||||
*index = i;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@@ -41,6 +41,7 @@ struct amdgpu_sdma_instance {
|
||||
uint32_t feature_version;
|
||||
|
||||
struct amdgpu_ring ring;
|
||||
struct amdgpu_ring page;
|
||||
bool burst_nop;
|
||||
};
|
||||
|
||||
@@ -50,6 +51,7 @@ struct amdgpu_sdma {
|
||||
struct amdgpu_irq_src illegal_inst_irq;
|
||||
int num_instances;
|
||||
uint32_t srbm_soft_reset;
|
||||
bool has_page_queue;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -92,6 +94,7 @@ struct amdgpu_buffer_funcs {
|
||||
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
|
||||
|
||||
struct amdgpu_sdma_instance *
|
||||
amdgpu_get_sdma_instance(struct amdgpu_ring *ring);
|
||||
amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
|
||||
int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
|
||||
|
||||
#endif
|
||||
|
@@ -218,6 +218,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
|
||||
TP_ARGS(vm, ring, job),
|
||||
TP_STRUCT__entry(
|
||||
__field(u32, pasid)
|
||||
__string(ring, ring->name)
|
||||
__field(u32, ring)
|
||||
__field(u32, vmid)
|
||||
__field(u32, vm_hub)
|
||||
@@ -227,14 +228,14 @@ TRACE_EVENT(amdgpu_vm_grab_id,
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->pasid = vm->pasid;
|
||||
__entry->ring = ring->idx;
|
||||
__assign_str(ring, ring->name)
|
||||
__entry->vmid = job->vmid;
|
||||
__entry->vm_hub = ring->funcs->vmhub,
|
||||
__entry->pd_addr = job->vm_pd_addr;
|
||||
__entry->needs_flush = job->vm_needs_flush;
|
||||
),
|
||||
TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
|
||||
__entry->pasid, __entry->ring, __entry->vmid,
|
||||
TP_printk("pasid=%d, ring=%s, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
|
||||
__entry->pasid, __get_str(ring), __entry->vmid,
|
||||
__entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
|
||||
);
|
||||
|
||||
@@ -366,20 +367,20 @@ TRACE_EVENT(amdgpu_vm_flush,
|
||||
uint64_t pd_addr),
|
||||
TP_ARGS(ring, vmid, pd_addr),
|
||||
TP_STRUCT__entry(
|
||||
__field(u32, ring)
|
||||
__string(ring, ring->name)
|
||||
__field(u32, vmid)
|
||||
__field(u32, vm_hub)
|
||||
__field(u64, pd_addr)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->ring = ring->idx;
|
||||
__assign_str(ring, ring->name)
|
||||
__entry->vmid = vmid;
|
||||
__entry->vm_hub = ring->funcs->vmhub;
|
||||
__entry->pd_addr = pd_addr;
|
||||
),
|
||||
TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx",
|
||||
__entry->ring, __entry->vmid,
|
||||
TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx",
|
||||
__get_str(ring), __entry->vmid,
|
||||
__entry->vm_hub,__entry->pd_addr)
|
||||
);
|
||||
|
||||
|
@@ -61,100 +61,6 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
|
||||
static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
|
||||
static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
|
||||
|
||||
/*
|
||||
* Global memory.
|
||||
*/
|
||||
|
||||
/**
|
||||
* amdgpu_ttm_mem_global_init - Initialize and acquire reference to
|
||||
* memory object
|
||||
*
|
||||
* @ref: Object for initialization.
|
||||
*
|
||||
* This is called by drm_global_item_ref() when an object is being
|
||||
* initialized.
|
||||
*/
|
||||
static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
|
||||
{
|
||||
return ttm_mem_global_init(ref->object);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_ttm_mem_global_release - Drop reference to a memory object
|
||||
*
|
||||
* @ref: Object being removed
|
||||
*
|
||||
* This is called by drm_global_item_unref() when an object is being
|
||||
* released.
|
||||
*/
|
||||
static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
|
||||
{
|
||||
ttm_mem_global_release(ref->object);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_ttm_global_init - Initialize global TTM memory reference structures.
|
||||
*
|
||||
* @adev: AMDGPU device for which the global structures need to be registered.
|
||||
*
|
||||
* This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
|
||||
* during bring up.
|
||||
*/
|
||||
static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct drm_global_reference *global_ref;
|
||||
int r;
|
||||
|
||||
/* ensure reference is false in case init fails */
|
||||
adev->mman.mem_global_referenced = false;
|
||||
|
||||
global_ref = &adev->mman.mem_global_ref;
|
||||
global_ref->global_type = DRM_GLOBAL_TTM_MEM;
|
||||
global_ref->size = sizeof(struct ttm_mem_global);
|
||||
global_ref->init = &amdgpu_ttm_mem_global_init;
|
||||
global_ref->release = &amdgpu_ttm_mem_global_release;
|
||||
r = drm_global_item_ref(global_ref);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed setting up TTM memory accounting "
|
||||
"subsystem.\n");
|
||||
goto error_mem;
|
||||
}
|
||||
|
||||
adev->mman.bo_global_ref.mem_glob =
|
||||
adev->mman.mem_global_ref.object;
|
||||
global_ref = &adev->mman.bo_global_ref.ref;
|
||||
global_ref->global_type = DRM_GLOBAL_TTM_BO;
|
||||
global_ref->size = sizeof(struct ttm_bo_global);
|
||||
global_ref->init = &ttm_bo_global_init;
|
||||
global_ref->release = &ttm_bo_global_release;
|
||||
r = drm_global_item_ref(global_ref);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed setting up TTM BO subsystem.\n");
|
||||
goto error_bo;
|
||||
}
|
||||
|
||||
mutex_init(&adev->mman.gtt_window_lock);
|
||||
|
||||
adev->mman.mem_global_referenced = true;
|
||||
|
||||
return 0;
|
||||
|
||||
error_bo:
|
||||
drm_global_item_unref(&adev->mman.mem_global_ref);
|
||||
error_mem:
|
||||
return r;
|
||||
}
|
||||
|
||||
static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->mman.mem_global_referenced) {
|
||||
mutex_destroy(&adev->mman.gtt_window_lock);
|
||||
drm_global_item_unref(&adev->mman.bo_global_ref.ref);
|
||||
drm_global_item_unref(&adev->mman.mem_global_ref);
|
||||
adev->mman.mem_global_referenced = false;
|
||||
}
|
||||
}
|
||||
|
||||
static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
|
||||
{
|
||||
return 0;
|
||||
@@ -1758,14 +1664,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
||||
int r;
|
||||
u64 vis_vram_limit;
|
||||
|
||||
/* initialize global references for vram/gtt */
|
||||
r = amdgpu_ttm_global_init(adev);
|
||||
if (r) {
|
||||
return r;
|
||||
}
|
||||
mutex_init(&adev->mman.gtt_window_lock);
|
||||
|
||||
/* No others user of address space so set it to 0 */
|
||||
r = ttm_bo_device_init(&adev->mman.bdev,
|
||||
adev->mman.bo_global_ref.ref.object,
|
||||
&amdgpu_bo_driver,
|
||||
adev->ddev->anon_inode->i_mapping,
|
||||
DRM_FILE_PAGE_OFFSET,
|
||||
@@ -1922,7 +1824,6 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
|
||||
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
|
||||
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
|
||||
ttm_bo_device_release(&adev->mman.bdev);
|
||||
amdgpu_ttm_global_fini(adev);
|
||||
adev->mman.initialized = false;
|
||||
DRM_INFO("amdgpu: ttm finalized\n");
|
||||
}
|
||||
@@ -2069,7 +1970,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
|
||||
unsigned i;
|
||||
int r;
|
||||
|
||||
if (direct_submit && !ring->ready) {
|
||||
if (direct_submit && !ring->sched.ready) {
|
||||
DRM_ERROR("Trying to move memory with ring turned off.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@@ -39,8 +39,6 @@
|
||||
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
|
||||
|
||||
struct amdgpu_mman {
|
||||
struct ttm_bo_global_ref bo_global_ref;
|
||||
struct drm_global_reference mem_global_ref;
|
||||
struct ttm_bo_device bdev;
|
||||
bool mem_global_referenced;
|
||||
bool initialized;
|
||||
|
@@ -57,6 +57,17 @@ struct psp_firmware_header_v1_0 {
|
||||
uint32_t sos_size_bytes;
|
||||
};
|
||||
|
||||
/* version_major=1, version_minor=0 */
|
||||
struct ta_firmware_header_v1_0 {
|
||||
struct common_firmware_header header;
|
||||
uint32_t ta_xgmi_ucode_version;
|
||||
uint32_t ta_xgmi_offset_bytes;
|
||||
uint32_t ta_xgmi_size_bytes;
|
||||
uint32_t ta_ras_ucode_version;
|
||||
uint32_t ta_ras_offset_bytes;
|
||||
uint32_t ta_ras_size_bytes;
|
||||
};
|
||||
|
||||
/* version_major=1, version_minor=0 */
|
||||
struct gfx_firmware_header_v1_0 {
|
||||
struct common_firmware_header header;
|
||||
@@ -170,6 +181,7 @@ union amdgpu_firmware_header {
|
||||
struct mc_firmware_header_v1_0 mc;
|
||||
struct smc_firmware_header_v1_0 smc;
|
||||
struct psp_firmware_header_v1_0 psp;
|
||||
struct ta_firmware_header_v1_0 ta;
|
||||
struct gfx_firmware_header_v1_0 gfx;
|
||||
struct rlc_firmware_header_v1_0 rlc;
|
||||
struct rlc_firmware_header_v2_0 rlc_v2_0;
|
||||
|
@@ -692,6 +692,8 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
|
||||
buf_sizes[0x1] = dpb_size;
|
||||
buf_sizes[0x2] = image_size;
|
||||
buf_sizes[0x4] = min_ctx_size;
|
||||
/* store image width to adjust nb memory pstate */
|
||||
adev->uvd.decode_image_width = width;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1243,30 +1245,20 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
{
|
||||
struct dma_fence *fence;
|
||||
long r;
|
||||
uint32_t ip_instance = ring->me;
|
||||
|
||||
r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = dma_fence_wait_timeout(fence, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance);
|
||||
if (r == 0)
|
||||
r = -ETIMEDOUT;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r);
|
||||
} else {
|
||||
DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx);
|
||||
else if (r > 0)
|
||||
r = 0;
|
||||
}
|
||||
|
||||
dma_fence_put(fence);
|
||||
|
||||
|
@@ -65,6 +65,8 @@ struct amdgpu_uvd {
|
||||
struct drm_sched_entity entity;
|
||||
struct delayed_work idle_work;
|
||||
unsigned harvest_config;
|
||||
/* store image width to adjust nb memory state */
|
||||
unsigned decode_image_width;
|
||||
};
|
||||
|
||||
int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
|
||||
|
@@ -1032,8 +1032,10 @@ out:
|
||||
* @ib: the IB to execute
|
||||
*
|
||||
*/
|
||||
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
{
|
||||
amdgpu_ring_write(ring, VCE_CMD_IB);
|
||||
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
|
||||
@@ -1079,11 +1081,9 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
|
||||
return 0;
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 16);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, VCE_CMD_END);
|
||||
amdgpu_ring_commit(ring);
|
||||
|
||||
@@ -1093,14 +1093,8 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed\n",
|
||||
ring->idx);
|
||||
if (i >= timeout)
|
||||
r = -ETIMEDOUT;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -1121,27 +1115,19 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
return 0;
|
||||
|
||||
r = amdgpu_vce_get_create_msg(ring, 1, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = dma_fence_wait_timeout(fence, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out.\n");
|
||||
if (r == 0)
|
||||
r = -ETIMEDOUT;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
} else {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
else if (r > 0)
|
||||
r = 0;
|
||||
}
|
||||
|
||||
error:
|
||||
dma_fence_put(fence);
|
||||
return r;
|
||||
|
@@ -65,8 +65,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
|
||||
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
|
||||
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
|
||||
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
|
||||
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch);
|
||||
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib, bool ctx_switch);
|
||||
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
|
||||
unsigned flags);
|
||||
int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
|
||||
|
@@ -425,11 +425,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring,
|
||||
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0));
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
@@ -441,14 +439,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
|
||||
ring->idx, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -570,30 +563,20 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
long r;
|
||||
|
||||
r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = dma_fence_wait_timeout(fence, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out.\n");
|
||||
if (r == 0)
|
||||
r = -ETIMEDOUT;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
} else {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
else if (r > 0)
|
||||
r = 0;
|
||||
}
|
||||
|
||||
dma_fence_put(fence);
|
||||
|
||||
error:
|
||||
return r;
|
||||
}
|
||||
@@ -606,11 +589,9 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
|
||||
int r;
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 16);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, VCN_ENC_CMD_END);
|
||||
amdgpu_ring_commit(ring);
|
||||
|
||||
@@ -620,14 +601,8 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed\n",
|
||||
ring->idx);
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -742,27 +717,19 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
long r;
|
||||
|
||||
r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = dma_fence_wait_timeout(fence, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out.\n");
|
||||
if (r == 0)
|
||||
r = -ETIMEDOUT;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
} else {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
else if (r > 0)
|
||||
r = 0;
|
||||
}
|
||||
|
||||
error:
|
||||
dma_fence_put(fence);
|
||||
return r;
|
||||
@@ -778,11 +745,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
|
||||
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring,
|
||||
PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0));
|
||||
@@ -796,14 +760,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
|
||||
ring->idx, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -856,21 +814,18 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
long r = 0;
|
||||
|
||||
r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = dma_fence_wait_timeout(fence, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out.\n");
|
||||
r = -ETIMEDOUT;
|
||||
goto error;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
goto error;
|
||||
} else
|
||||
} else {
|
||||
r = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
|
||||
@@ -879,15 +834,10 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout)
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
else {
|
||||
DRM_ERROR("ib test failed (0x%08X)\n", tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
dma_fence_put(fence);
|
||||
|
||||
error:
|
||||
return r;
|
||||
}
|
||||
|
@@ -23,16 +23,6 @@
|
||||
|
||||
#include "amdgpu.h"
|
||||
|
||||
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
|
||||
|
||||
addr -= AMDGPU_VA_RESERVED_SIZE;
|
||||
addr = amdgpu_gmc_sign_extend(addr);
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
|
||||
{
|
||||
/* By now all MMIO pages except mailbox are blocked */
|
||||
@@ -41,88 +31,6 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
|
||||
return RREG32_NO_KIQ(0xc040) == 0xffffffff;
|
||||
}
|
||||
|
||||
int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
void *ptr;
|
||||
|
||||
r = amdgpu_bo_create_kernel(adev, AMDGPU_CSA_SIZE, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM, &adev->virt.csa_obj,
|
||||
&adev->virt.csa_vmid0_addr, &ptr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(ptr, 0, AMDGPU_CSA_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_free_static_csa(struct amdgpu_device *adev) {
|
||||
amdgpu_bo_free_kernel(&adev->virt.csa_obj,
|
||||
&adev->virt.csa_vmid0_addr,
|
||||
NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* amdgpu_map_static_csa should be called during amdgpu_vm_init
|
||||
* it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
|
||||
* submission of GFX should use this virtual address within META_DATA init
|
||||
* package to support SRIOV gfx preemption.
|
||||
*/
|
||||
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo_va **bo_va)
|
||||
{
|
||||
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
|
||||
struct ww_acquire_ctx ticket;
|
||||
struct list_head list;
|
||||
struct amdgpu_bo_list_entry pd;
|
||||
struct ttm_validate_buffer csa_tv;
|
||||
int r;
|
||||
|
||||
INIT_LIST_HEAD(&list);
|
||||
INIT_LIST_HEAD(&csa_tv.head);
|
||||
csa_tv.bo = &adev->virt.csa_obj->tbo;
|
||||
csa_tv.shared = true;
|
||||
|
||||
list_add(&csa_tv.head, &list);
|
||||
amdgpu_vm_get_pd_bo(vm, &list, &pd);
|
||||
|
||||
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
*bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj);
|
||||
if (!*bo_va) {
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
DRM_ERROR("failed to create bo_va for static CSA\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
|
||||
AMDGPU_CSA_SIZE);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
|
||||
amdgpu_vm_bo_rmv(adev, *bo_va);
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE,
|
||||
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
|
||||
AMDGPU_PTE_EXECUTABLE);
|
||||
|
||||
if (r) {
|
||||
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
|
||||
amdgpu_vm_bo_rmv(adev, *bo_va);
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return r;
|
||||
}
|
||||
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_virt_init_setting(struct amdgpu_device *adev)
|
||||
{
|
||||
/* enable virtual display */
|
||||
@@ -162,9 +70,7 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
|
||||
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
|
||||
goto failed_kiq_read;
|
||||
|
||||
if (in_interrupt())
|
||||
might_sleep();
|
||||
|
||||
might_sleep();
|
||||
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
|
||||
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
|
||||
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
|
||||
@@ -210,9 +116,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
|
||||
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
|
||||
goto failed_kiq_write;
|
||||
|
||||
if (in_interrupt())
|
||||
might_sleep();
|
||||
|
||||
might_sleep();
|
||||
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
|
||||
|
||||
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
|
||||
@@ -228,6 +132,46 @@ failed_kiq_write:
|
||||
pr_err("failed to write reg:%x\n", reg);
|
||||
}
|
||||
|
||||
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
|
||||
uint32_t reg0, uint32_t reg1,
|
||||
uint32_t ref, uint32_t mask)
|
||||
{
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
|
||||
struct amdgpu_ring *ring = &kiq->ring;
|
||||
signed long r, cnt = 0;
|
||||
unsigned long flags;
|
||||
uint32_t seq;
|
||||
|
||||
spin_lock_irqsave(&kiq->ring_lock, flags);
|
||||
amdgpu_ring_alloc(ring, 32);
|
||||
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
|
||||
ref, mask);
|
||||
amdgpu_fence_emit_polling(ring, &seq);
|
||||
amdgpu_ring_commit(ring);
|
||||
spin_unlock_irqrestore(&kiq->ring_lock, flags);
|
||||
|
||||
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
|
||||
|
||||
/* don't wait anymore for IRQ context */
|
||||
if (r < 1 && in_interrupt())
|
||||
goto failed_kiq;
|
||||
|
||||
might_sleep();
|
||||
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
|
||||
|
||||
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
|
||||
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
|
||||
}
|
||||
|
||||
if (cnt > MAX_KIQ_REG_TRY)
|
||||
goto failed_kiq;
|
||||
|
||||
return;
|
||||
|
||||
failed_kiq:
|
||||
pr_err("failed to write reg %x wait reg %x\n", reg0, reg1);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_virt_request_full_gpu() - request full gpu access
|
||||
* @amdgpu: amdgpu device.
|
||||
@@ -390,7 +334,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
|
||||
|
||||
if (adev->fw_vram_usage.va != NULL) {
|
||||
adev->virt.fw_reserve.p_pf2vf =
|
||||
(struct amdgim_pf2vf_info_header *)(
|
||||
(struct amd_sriov_msg_pf2vf_info_header *)(
|
||||
adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET);
|
||||
AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size);
|
||||
AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum);
|
||||
|
@@ -63,8 +63,8 @@ struct amdgpu_virt_ops {
|
||||
* Firmware Reserve Frame buffer
|
||||
*/
|
||||
struct amdgpu_virt_fw_reserve {
|
||||
struct amdgim_pf2vf_info_header *p_pf2vf;
|
||||
struct amdgim_vf2pf_info_header *p_vf2pf;
|
||||
struct amd_sriov_msg_pf2vf_info_header *p_pf2vf;
|
||||
struct amd_sriov_msg_vf2pf_info_header *p_vf2pf;
|
||||
unsigned int checksum_key;
|
||||
};
|
||||
/*
|
||||
@@ -85,15 +85,17 @@ enum AMDGIM_FEATURE_FLAG {
|
||||
AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
|
||||
};
|
||||
|
||||
struct amdgim_pf2vf_info_header {
|
||||
struct amd_sriov_msg_pf2vf_info_header {
|
||||
/* the total structure size in byte. */
|
||||
uint32_t size;
|
||||
/* version of this structure, written by the GIM */
|
||||
uint32_t version;
|
||||
/* reserved */
|
||||
uint32_t reserved[2];
|
||||
} __aligned(4);
|
||||
struct amdgim_pf2vf_info_v1 {
|
||||
/* header contains size and version */
|
||||
struct amdgim_pf2vf_info_header header;
|
||||
struct amd_sriov_msg_pf2vf_info_header header;
|
||||
/* max_width * max_height */
|
||||
unsigned int uvd_enc_max_pixels_count;
|
||||
/* 16x16 pixels/sec, codec independent */
|
||||
@@ -112,7 +114,7 @@ struct amdgim_pf2vf_info_v1 {
|
||||
|
||||
struct amdgim_pf2vf_info_v2 {
|
||||
/* header contains size and version */
|
||||
struct amdgim_pf2vf_info_header header;
|
||||
struct amd_sriov_msg_pf2vf_info_header header;
|
||||
/* use private key from mailbox 2 to create chueksum */
|
||||
uint32_t checksum;
|
||||
/* The features flags of the GIM driver supports. */
|
||||
@@ -137,20 +139,22 @@ struct amdgim_pf2vf_info_v2 {
|
||||
uint64_t vcefw_kboffset;
|
||||
/* VCE FW size in KB */
|
||||
uint32_t vcefw_ksize;
|
||||
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amdgim_pf2vf_info_header)/sizeof(uint32_t)), 3)];
|
||||
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 3)];
|
||||
} __aligned(4);
|
||||
|
||||
|
||||
struct amdgim_vf2pf_info_header {
|
||||
struct amd_sriov_msg_vf2pf_info_header {
|
||||
/* the total structure size in byte. */
|
||||
uint32_t size;
|
||||
/*version of this structure, written by the guest */
|
||||
uint32_t version;
|
||||
/* reserved */
|
||||
uint32_t reserved[2];
|
||||
} __aligned(4);
|
||||
|
||||
struct amdgim_vf2pf_info_v1 {
|
||||
/* header contains size and version */
|
||||
struct amdgim_vf2pf_info_header header;
|
||||
struct amd_sriov_msg_vf2pf_info_header header;
|
||||
/* driver version */
|
||||
char driver_version[64];
|
||||
/* driver certification, 1=WHQL, 0=None */
|
||||
@@ -180,7 +184,7 @@ struct amdgim_vf2pf_info_v1 {
|
||||
|
||||
struct amdgim_vf2pf_info_v2 {
|
||||
/* header contains size and version */
|
||||
struct amdgim_vf2pf_info_header header;
|
||||
struct amd_sriov_msg_vf2pf_info_header header;
|
||||
uint32_t checksum;
|
||||
/* driver version */
|
||||
uint8_t driver_version[64];
|
||||
@@ -206,7 +210,7 @@ struct amdgim_vf2pf_info_v2 {
|
||||
uint32_t uvd_enc_usage;
|
||||
/* guest uvd engine usage percentage. 0xffff means N/A. */
|
||||
uint32_t uvd_enc_health;
|
||||
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amdgim_vf2pf_info_header)/sizeof(uint32_t)), 0)];
|
||||
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amd_sriov_msg_vf2pf_info_header)/sizeof(uint32_t)), 0)];
|
||||
} __aligned(4);
|
||||
|
||||
#define AMDGPU_FW_VRAM_VF2PF_VER 2
|
||||
@@ -238,7 +242,6 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ;
|
||||
struct amdgpu_virt {
|
||||
uint32_t caps;
|
||||
struct amdgpu_bo *csa_obj;
|
||||
uint64_t csa_vmid0_addr;
|
||||
bool chained_ib_support;
|
||||
uint32_t reg_val_offs;
|
||||
struct amdgpu_irq_src ack_irq;
|
||||
@@ -251,8 +254,6 @@ struct amdgpu_virt {
|
||||
uint32_t gim_feature;
|
||||
};
|
||||
|
||||
#define AMDGPU_CSA_SIZE (8 * 1024)
|
||||
|
||||
#define amdgpu_sriov_enabled(adev) \
|
||||
((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
|
||||
|
||||
@@ -277,17 +278,13 @@ static inline bool is_virtual_machine(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
struct amdgpu_vm;
|
||||
|
||||
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
|
||||
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
|
||||
int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
|
||||
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo_va **bo_va);
|
||||
void amdgpu_free_static_csa(struct amdgpu_device *adev);
|
||||
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
|
||||
uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
|
||||
void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
|
||||
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
|
||||
uint32_t reg0, uint32_t rreg1,
|
||||
uint32_t ref, uint32_t mask);
|
||||
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
|
||||
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
|
||||
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
|
||||
|
@@ -617,7 +617,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
|
||||
{
|
||||
entry->priority = 0;
|
||||
entry->tv.bo = &vm->root.base.bo->tbo;
|
||||
entry->tv.shared = true;
|
||||
/* One for the VM updates, one for TTM and one for the CS job */
|
||||
entry->tv.num_shared = 3;
|
||||
entry->user_pages = NULL;
|
||||
list_add(&entry->tv.head, validated);
|
||||
}
|
||||
@@ -773,10 +774,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
|
||||
|
||||
ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
|
||||
|
||||
r = reservation_object_reserve_shared(bo->tbo.resv);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
if (r)
|
||||
goto error;
|
||||
@@ -1844,10 +1841,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
||||
if (r)
|
||||
goto error_free;
|
||||
|
||||
r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
|
||||
if (r)
|
||||
goto error_free;
|
||||
|
||||
r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags);
|
||||
if (r)
|
||||
goto error_free;
|
||||
@@ -3028,6 +3021,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
if (r)
|
||||
goto error_free_root;
|
||||
|
||||
r = reservation_object_reserve_shared(root->tbo.resv, 1);
|
||||
if (r)
|
||||
goto error_unreserve;
|
||||
|
||||
r = amdgpu_vm_clear_bo(adev, vm, root,
|
||||
adev->vm_manager.root_level,
|
||||
vm->pte_support_ats);
|
||||
@@ -3057,7 +3054,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
}
|
||||
|
||||
INIT_KFIFO(vm->faults);
|
||||
vm->fault_credit = 16;
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -3269,42 +3265,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
||||
amdgpu_vmid_free_reserved(adev, vm, i);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @pasid: PASID do identify the VM
|
||||
*
|
||||
* This function is expected to be called in interrupt context.
|
||||
*
|
||||
* Returns:
|
||||
* True if there was fault credit, false otherwise
|
||||
*/
|
||||
bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
|
||||
unsigned int pasid)
|
||||
{
|
||||
struct amdgpu_vm *vm;
|
||||
|
||||
spin_lock(&adev->vm_manager.pasid_lock);
|
||||
vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
|
||||
if (!vm) {
|
||||
/* VM not found, can't track fault credit */
|
||||
spin_unlock(&adev->vm_manager.pasid_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* No lock needed. only accessed by IRQ handler */
|
||||
if (!vm->fault_credit) {
|
||||
/* Too many faults in this VM */
|
||||
spin_unlock(&adev->vm_manager.pasid_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
vm->fault_credit--;
|
||||
spin_unlock(&adev->vm_manager.pasid_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vm_manager_init - init the VM manager
|
||||
*
|
||||
|
@@ -229,9 +229,6 @@ struct amdgpu_vm {
|
||||
/* Up to 128 pending retry page faults */
|
||||
DECLARE_KFIFO(faults, u64, 128);
|
||||
|
||||
/* Limit non-retry fault storms */
|
||||
unsigned int fault_credit;
|
||||
|
||||
/* Points to the KFD process VM info */
|
||||
struct amdkfd_process_info *process_info;
|
||||
|
||||
@@ -299,8 +296,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid);
|
||||
void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
|
||||
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
|
||||
bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
|
||||
unsigned int pasid);
|
||||
void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
|
||||
struct list_head *validated,
|
||||
struct amdgpu_bo_list_entry *entry);
|
||||
|
@@ -23,7 +23,7 @@
|
||||
*/
|
||||
#include <linux/list.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_psp.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
|
||||
|
||||
static DEFINE_MUTEX(xgmi_mutex);
|
||||
@@ -31,15 +31,16 @@ static DEFINE_MUTEX(xgmi_mutex);
|
||||
#define AMDGPU_MAX_XGMI_HIVE 8
|
||||
#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4
|
||||
|
||||
struct amdgpu_hive_info {
|
||||
uint64_t hive_id;
|
||||
struct list_head device_list;
|
||||
};
|
||||
|
||||
static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
|
||||
static unsigned hive_count = 0;
|
||||
|
||||
static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
|
||||
|
||||
void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
|
||||
{
|
||||
return &hive->device_list;
|
||||
}
|
||||
|
||||
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
struct amdgpu_hive_info *tmp;
|
||||
@@ -58,62 +59,99 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
|
||||
tmp = &xgmi_hives[hive_count++];
|
||||
tmp->hive_id = adev->gmc.xgmi.hive_id;
|
||||
INIT_LIST_HEAD(&tmp->device_list);
|
||||
mutex_init(&tmp->hive_lock);
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
|
||||
/* Each psp need to set the latest topology */
|
||||
ret = psp_xgmi_set_topology_info(&adev->psp,
|
||||
hive->number_devices,
|
||||
&hive->topology_info);
|
||||
if (ret)
|
||||
dev_err(adev->dev,
|
||||
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
|
||||
adev->gmc.xgmi.node_id,
|
||||
adev->gmc.xgmi.hive_id, ret);
|
||||
else
|
||||
dev_info(adev->dev, "XGMI: Set topology for node %d, hive 0x%llx.\n",
|
||||
adev->gmc.xgmi.physical_node_id,
|
||||
adev->gmc.xgmi.hive_id);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
|
||||
{
|
||||
struct psp_xgmi_topology_info tmp_topology[AMDGPU_MAX_XGMI_DEVICE_PER_HIVE];
|
||||
struct psp_xgmi_topology_info *hive_topology;
|
||||
struct amdgpu_hive_info *hive;
|
||||
struct amdgpu_xgmi *entry;
|
||||
struct amdgpu_device *tmp_adev;
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
|
||||
int count = 0, ret = -EINVAL;
|
||||
|
||||
if ((adev->asic_type < CHIP_VEGA20) ||
|
||||
(adev->flags & AMD_IS_APU) )
|
||||
if (!adev->gmc.xgmi.supported)
|
||||
return 0;
|
||||
adev->gmc.xgmi.device_id = psp_xgmi_get_device_id(&adev->psp);
|
||||
|
||||
adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp);
|
||||
adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
|
||||
|
||||
memset(&tmp_topology[0], 0, sizeof(tmp_topology));
|
||||
mutex_lock(&xgmi_mutex);
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
if (!hive)
|
||||
goto exit;
|
||||
|
||||
hive_topology = &hive->topology_info;
|
||||
|
||||
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
|
||||
list_for_each_entry(entry, &hive->device_list, head)
|
||||
tmp_topology[count++].device_id = entry->device_id;
|
||||
hive_topology->nodes[count++].node_id = entry->node_id;
|
||||
hive->number_devices = count;
|
||||
|
||||
ret = psp_xgmi_get_topology_info(&adev->psp, count, tmp_topology);
|
||||
if (ret) {
|
||||
dev_err(adev->dev,
|
||||
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
|
||||
adev->gmc.xgmi.device_id,
|
||||
adev->gmc.xgmi.hive_id, ret);
|
||||
goto exit;
|
||||
}
|
||||
/* Each psp need to set the latest topology */
|
||||
/* Each psp need to get the latest topology */
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
|
||||
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
|
||||
if (ret) {
|
||||
dev_err(tmp_adev->dev,
|
||||
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
|
||||
tmp_adev->gmc.xgmi.device_id,
|
||||
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
|
||||
tmp_adev->gmc.xgmi.node_id,
|
||||
tmp_adev->gmc.xgmi.hive_id, ret);
|
||||
/* To do : continue with some node failed or disable the whole hive */
|
||||
/* To do : continue with some node failed or disable the whole hive */
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!ret)
|
||||
dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
|
||||
adev->gmc.xgmi.physical_node_id,
|
||||
adev->gmc.xgmi.hive_id);
|
||||
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
exit:
|
||||
mutex_unlock(&xgmi_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_hive_info *hive;
|
||||
|
||||
if (!adev->gmc.xgmi.supported)
|
||||
return;
|
||||
|
||||
mutex_lock(&xgmi_mutex);
|
||||
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
if (!hive)
|
||||
goto exit;
|
||||
|
||||
if (!(hive->number_devices--))
|
||||
mutex_destroy(&hive->hive_lock);
|
||||
|
||||
exit:
|
||||
mutex_unlock(&xgmi_mutex);
|
||||
}
|
||||
|
40
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
Normal file
40
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright 2016 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef __AMDGPU_XGMI_H__
|
||||
#define __AMDGPU_XGMI_H__
|
||||
|
||||
#include "amdgpu_psp.h"
|
||||
|
||||
struct amdgpu_hive_info {
|
||||
uint64_t hive_id;
|
||||
struct list_head device_list;
|
||||
struct psp_xgmi_topology_info topology_info;
|
||||
int number_devices;
|
||||
struct mutex hive_lock;
|
||||
};
|
||||
|
||||
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
|
||||
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
|
||||
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
|
||||
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
|
||||
|
||||
#endif
|
@@ -743,19 +743,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable)
|
||||
|
||||
if (pi->caps_sq_ramping || pi->caps_db_ramping ||
|
||||
pi->caps_td_ramping || pi->caps_tcp_ramping) {
|
||||
adev->gfx.rlc.funcs->enter_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev);
|
||||
|
||||
if (enable) {
|
||||
ret = ci_program_pt_config_registers(adev, didt_config_ci);
|
||||
if (ret) {
|
||||
adev->gfx.rlc.funcs->exit_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ci_do_enable_didt(adev, enable);
|
||||
|
||||
adev->gfx.rlc.funcs->exit_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@@ -1755,6 +1755,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
|
||||
.flush_hdp = &cik_flush_hdp,
|
||||
.invalidate_hdp = &cik_invalidate_hdp,
|
||||
.need_full_reset = &cik_need_full_reset,
|
||||
.init_doorbell_index = &legacy_doorbell_index_init,
|
||||
};
|
||||
|
||||
static int cik_common_early_init(void *handle)
|
||||
|
@@ -30,4 +30,5 @@ void cik_srbm_select(struct amdgpu_device *adev,
|
||||
u32 me, u32 pipe, u32 queue, u32 vmid);
|
||||
int cik_set_ip_blocks(struct amdgpu_device *adev);
|
||||
|
||||
void legacy_doorbell_index_init(struct amdgpu_device *adev);
|
||||
#endif
|
||||
|
@@ -228,34 +228,6 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
|
||||
* [127:96] - reserved
|
||||
*/
|
||||
|
||||
/**
|
||||
* cik_ih_prescreen_iv - prescreen an interrupt vector
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Returns true if the interrupt vector should be further processed.
|
||||
*/
|
||||
static bool cik_ih_prescreen_iv(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 ring_index = adev->irq.ih.rptr >> 2;
|
||||
u16 pasid;
|
||||
|
||||
switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
|
||||
case 146:
|
||||
case 147:
|
||||
pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
|
||||
if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
/* Not a VM fault */
|
||||
return true;
|
||||
}
|
||||
|
||||
adev->irq.ih.rptr += 16;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* cik_ih_decode_iv - decode an interrupt vector
|
||||
*
|
||||
@@ -461,7 +433,6 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = {
|
||||
|
||||
static const struct amdgpu_ih_funcs cik_ih_funcs = {
|
||||
.get_wptr = cik_ih_get_wptr,
|
||||
.prescreen_iv = cik_ih_prescreen_iv,
|
||||
.decode_iv = cik_ih_decode_iv,
|
||||
.set_rptr = cik_ih_set_rptr
|
||||
};
|
||||
|
@@ -198,7 +198,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
|
||||
|
||||
static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
{
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
@@ -218,9 +218,11 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
* Schedule an IB in the DMA ring (CIK).
|
||||
*/
|
||||
static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 extra_bits = vmid & 0xf;
|
||||
|
||||
/* IB packet must end on a 8 DW boundary */
|
||||
@@ -316,8 +318,8 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
|
||||
WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
|
||||
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0);
|
||||
}
|
||||
sdma0->ready = false;
|
||||
sdma1->ready = false;
|
||||
sdma0->sched.ready = false;
|
||||
sdma1->sched.ready = false;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -494,18 +496,16 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
|
||||
/* enable DMA IBs */
|
||||
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
|
||||
|
||||
ring->ready = true;
|
||||
ring->sched.ready = true;
|
||||
}
|
||||
|
||||
cik_sdma_enable(adev, true);
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
ring = &adev->sdma.instance[i].ring;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (adev->mman.buffer_funcs_ring == ring)
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, true);
|
||||
@@ -618,21 +618,17 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
|
||||
u64 gpu_addr;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
tmp = 0xCAFEDEAD;
|
||||
adev->wb.wb[index] = cpu_to_le32(tmp);
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 5);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
return r;
|
||||
}
|
||||
if (r)
|
||||
goto error_free_wb;
|
||||
|
||||
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
|
||||
amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
|
||||
amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
|
||||
@@ -647,15 +643,11 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
|
||||
ring->idx, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
error_free_wb:
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -678,20 +670,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
long r;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
tmp = 0xCAFEDEAD;
|
||||
adev->wb.wb[index] = cpu_to_le32(tmp);
|
||||
memset(&ib, 0, sizeof(ib));
|
||||
r = amdgpu_ib_get(adev, NULL, 256, &ib);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto err0;
|
||||
}
|
||||
|
||||
ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE,
|
||||
SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
|
||||
@@ -706,21 +694,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
|
||||
r = dma_fence_wait_timeout(f, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out\n");
|
||||
r = -ETIMEDOUT;
|
||||
goto err1;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
goto err1;
|
||||
}
|
||||
tmp = le32_to_cpu(adev->wb.wb[index]);
|
||||
if (tmp == 0xDEADBEEF) {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
r = 0;
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
|
||||
else
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
err1:
|
||||
amdgpu_ib_free(adev, &ib, NULL);
|
||||
@@ -822,7 +805,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
|
||||
*/
|
||||
static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
|
||||
{
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
|
||||
u32 pad_count;
|
||||
int i;
|
||||
|
||||
@@ -1214,8 +1197,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
u8 instance_id;
|
||||
|
||||
DRM_ERROR("Illegal instruction in SDMA command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
instance_id = (entry->ring_id & 0x3) >> 0;
|
||||
drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -207,34 +207,6 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
|
||||
return (wptr & adev->irq.ih.ptr_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* cz_ih_prescreen_iv - prescreen an interrupt vector
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Returns true if the interrupt vector should be further processed.
|
||||
*/
|
||||
static bool cz_ih_prescreen_iv(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 ring_index = adev->irq.ih.rptr >> 2;
|
||||
u16 pasid;
|
||||
|
||||
switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
|
||||
case 146:
|
||||
case 147:
|
||||
pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
|
||||
if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
/* Not a VM fault */
|
||||
return true;
|
||||
}
|
||||
|
||||
adev->irq.ih.rptr += 16;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* cz_ih_decode_iv - decode an interrupt vector
|
||||
*
|
||||
@@ -442,7 +414,6 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = {
|
||||
|
||||
static const struct amdgpu_ih_funcs cz_ih_funcs = {
|
||||
.get_wptr = cz_ih_get_wptr,
|
||||
.prescreen_iv = cz_ih_prescreen_iv,
|
||||
.decode_iv = cz_ih_decode_iv,
|
||||
.set_rptr = cz_ih_set_rptr
|
||||
};
|
||||
|
@@ -1775,18 +1775,15 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
int r;
|
||||
|
||||
r = amdgpu_gfx_scratch_get(adev, &scratch);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
WREG32(scratch, 0xCAFEDEAD);
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
|
||||
amdgpu_gfx_scratch_free(adev, scratch);
|
||||
return r;
|
||||
}
|
||||
if (r)
|
||||
goto error_free_scratch;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
|
||||
amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START));
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
@@ -1798,13 +1795,11 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
break;
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
|
||||
ring->idx, scratch, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
error_free_scratch:
|
||||
amdgpu_gfx_scratch_free(adev, scratch);
|
||||
return r;
|
||||
}
|
||||
@@ -1845,9 +1840,11 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
|
||||
}
|
||||
|
||||
static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 header, control = 0;
|
||||
|
||||
/* insert SWITCH_BUFFER packet before first IB in the ring frame */
|
||||
@@ -1892,17 +1889,15 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
long r;
|
||||
|
||||
r = amdgpu_gfx_scratch_get(adev, &scratch);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
WREG32(scratch, 0xCAFEDEAD);
|
||||
memset(&ib, 0, sizeof(ib));
|
||||
r = amdgpu_ib_get(adev, NULL, 256, &ib);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto err1;
|
||||
}
|
||||
|
||||
ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
|
||||
ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START));
|
||||
ib.ptr[2] = 0xDEADBEEF;
|
||||
@@ -1914,22 +1909,16 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
|
||||
r = dma_fence_wait_timeout(f, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out\n");
|
||||
r = -ETIMEDOUT;
|
||||
goto err2;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
goto err2;
|
||||
}
|
||||
tmp = RREG32(scratch);
|
||||
if (tmp == 0xDEADBEEF) {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
r = 0;
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
|
||||
scratch, tmp);
|
||||
else
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
err2:
|
||||
amdgpu_ib_free(adev, &ib, NULL);
|
||||
@@ -1950,9 +1939,9 @@ static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
|
||||
CP_ME_CNTL__CE_HALT_MASK));
|
||||
WREG32(mmSCRATCH_UMSK, 0);
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
adev->gfx.gfx_ring[i].ready = false;
|
||||
adev->gfx.gfx_ring[i].sched.ready = false;
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
adev->gfx.compute_ring[i].ready = false;
|
||||
adev->gfx.compute_ring[i].sched.ready = false;
|
||||
}
|
||||
udelay(50);
|
||||
}
|
||||
@@ -2124,12 +2113,9 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
|
||||
|
||||
/* start the rings */
|
||||
gfx_v6_0_cp_gfx_start(adev);
|
||||
ring->ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2227,14 +2213,11 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
|
||||
WREG32(mmCP_RB2_CNTL, tmp);
|
||||
WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8);
|
||||
|
||||
adev->gfx.compute_ring[0].ready = false;
|
||||
adev->gfx.compute_ring[1].ready = false;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[i]);
|
||||
r = amdgpu_ring_test_helper(&adev->gfx.compute_ring[i]);
|
||||
if (r)
|
||||
return r;
|
||||
adev->gfx.compute_ring[i].ready = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -2368,18 +2351,11 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
|
||||
amdgpu_ring_write(ring, val);
|
||||
}
|
||||
|
||||
static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
|
||||
}
|
||||
|
||||
static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
|
||||
{
|
||||
const u32 *src_ptr;
|
||||
volatile u32 *dst_ptr;
|
||||
u32 dws, i;
|
||||
u32 dws;
|
||||
u64 reg_list_mc_addr;
|
||||
const struct cs_section_def *cs_data;
|
||||
int r;
|
||||
@@ -2394,26 +2370,10 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
|
||||
cs_data = adev->gfx.rlc.cs_data;
|
||||
|
||||
if (src_ptr) {
|
||||
/* save restore block */
|
||||
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.save_restore_obj,
|
||||
&adev->gfx.rlc.save_restore_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.sr_ptr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create RLC sr bo failed\n",
|
||||
r);
|
||||
gfx_v6_0_rlc_fini(adev);
|
||||
/* init save restore block */
|
||||
r = amdgpu_gfx_rlc_init_sr(adev, dws);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* write the sr buffer */
|
||||
dst_ptr = adev->gfx.rlc.sr_ptr;
|
||||
for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
|
||||
dst_ptr[i] = cpu_to_le32(src_ptr[i]);
|
||||
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
|
||||
}
|
||||
|
||||
if (cs_data) {
|
||||
@@ -2428,7 +2388,7 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
|
||||
(void **)&adev->gfx.rlc.cs_ptr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
|
||||
gfx_v6_0_rlc_fini(adev);
|
||||
amdgpu_gfx_rlc_fini(adev);
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -2549,8 +2509,8 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev)
|
||||
if (!adev->gfx.rlc_fw)
|
||||
return -EINVAL;
|
||||
|
||||
gfx_v6_0_rlc_stop(adev);
|
||||
gfx_v6_0_rlc_reset(adev);
|
||||
adev->gfx.rlc.funcs->stop(adev);
|
||||
adev->gfx.rlc.funcs->reset(adev);
|
||||
gfx_v6_0_init_pg(adev);
|
||||
gfx_v6_0_init_cg(adev);
|
||||
|
||||
@@ -2578,7 +2538,7 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev)
|
||||
WREG32(mmRLC_UCODE_ADDR, 0);
|
||||
|
||||
gfx_v6_0_enable_lbpw(adev, gfx_v6_0_lbpw_supported(adev));
|
||||
gfx_v6_0_rlc_start(adev);
|
||||
adev->gfx.rlc.funcs->start(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -3075,6 +3035,14 @@ static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = {
|
||||
.select_me_pipe_q = &gfx_v6_0_select_me_pipe_q
|
||||
};
|
||||
|
||||
static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = {
|
||||
.init = gfx_v6_0_rlc_init,
|
||||
.resume = gfx_v6_0_rlc_resume,
|
||||
.stop = gfx_v6_0_rlc_stop,
|
||||
.reset = gfx_v6_0_rlc_reset,
|
||||
.start = gfx_v6_0_rlc_start
|
||||
};
|
||||
|
||||
static int gfx_v6_0_early_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
@@ -3082,6 +3050,7 @@ static int gfx_v6_0_early_init(void *handle)
|
||||
adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS;
|
||||
adev->gfx.num_compute_rings = GFX6_NUM_COMPUTE_RINGS;
|
||||
adev->gfx.funcs = &gfx_v6_0_gfx_funcs;
|
||||
adev->gfx.rlc.funcs = &gfx_v6_0_rlc_funcs;
|
||||
gfx_v6_0_set_ring_funcs(adev);
|
||||
gfx_v6_0_set_irq_funcs(adev);
|
||||
|
||||
@@ -3114,7 +3083,7 @@ static int gfx_v6_0_sw_init(void *handle)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = gfx_v6_0_rlc_init(adev);
|
||||
r = adev->gfx.rlc.funcs->init(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to init rlc BOs!\n");
|
||||
return r;
|
||||
@@ -3165,7 +3134,7 @@ static int gfx_v6_0_sw_fini(void *handle)
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
|
||||
|
||||
gfx_v6_0_rlc_fini(adev);
|
||||
amdgpu_gfx_rlc_fini(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -3177,7 +3146,7 @@ static int gfx_v6_0_hw_init(void *handle)
|
||||
|
||||
gfx_v6_0_constants_init(adev);
|
||||
|
||||
r = gfx_v6_0_rlc_resume(adev);
|
||||
r = adev->gfx.rlc.funcs->resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
@@ -3195,7 +3164,7 @@ static int gfx_v6_0_hw_fini(void *handle)
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
gfx_v6_0_cp_enable(adev, false);
|
||||
gfx_v6_0_rlc_stop(adev);
|
||||
adev->gfx.rlc.funcs->stop(adev);
|
||||
gfx_v6_0_fini_pg(adev);
|
||||
|
||||
return 0;
|
||||
@@ -3393,12 +3362,31 @@ static int gfx_v6_0_eop_irq(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gfx_v6_0_fault(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
struct amdgpu_ring *ring;
|
||||
|
||||
switch (entry->ring_id) {
|
||||
case 0:
|
||||
ring = &adev->gfx.gfx_ring[0];
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
ring = &adev->gfx.compute_ring[entry->ring_id - 1];
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
drm_sched_fault(&ring->sched);
|
||||
}
|
||||
|
||||
static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
DRM_ERROR("Illegal register access in command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
gfx_v6_0_fault(adev, entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3407,7 +3395,7 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
DRM_ERROR("Illegal instruction in command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
gfx_v6_0_fault(adev, entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -882,7 +882,6 @@ static const u32 kalindi_rlc_save_restore_register_list[] =
|
||||
|
||||
static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
|
||||
static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
|
||||
static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev);
|
||||
static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
|
||||
static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
|
||||
|
||||
@@ -2064,17 +2063,14 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
int r;
|
||||
|
||||
r = amdgpu_gfx_scratch_get(adev, &scratch);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
WREG32(scratch, 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
|
||||
amdgpu_gfx_scratch_free(adev, scratch);
|
||||
return r;
|
||||
}
|
||||
if (r)
|
||||
goto error_free_scratch;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
|
||||
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
@@ -2086,13 +2082,10 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
break;
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
|
||||
ring->idx, scratch, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
error_free_scratch:
|
||||
amdgpu_gfx_scratch_free(adev, scratch);
|
||||
return r;
|
||||
}
|
||||
@@ -2233,9 +2226,11 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
|
||||
* on the gfx ring for execution by the GPU.
|
||||
*/
|
||||
static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 header, control = 0;
|
||||
|
||||
/* insert SWITCH_BUFFER packet before first IB in the ring frame */
|
||||
@@ -2262,9 +2257,11 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
}
|
||||
|
||||
static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
|
||||
@@ -2316,17 +2313,15 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
long r;
|
||||
|
||||
r = amdgpu_gfx_scratch_get(adev, &scratch);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
WREG32(scratch, 0xCAFEDEAD);
|
||||
memset(&ib, 0, sizeof(ib));
|
||||
r = amdgpu_ib_get(adev, NULL, 256, &ib);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto err1;
|
||||
}
|
||||
|
||||
ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
|
||||
ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
|
||||
ib.ptr[2] = 0xDEADBEEF;
|
||||
@@ -2338,22 +2333,16 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
|
||||
r = dma_fence_wait_timeout(f, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out\n");
|
||||
r = -ETIMEDOUT;
|
||||
goto err2;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
goto err2;
|
||||
}
|
||||
tmp = RREG32(scratch);
|
||||
if (tmp == 0xDEADBEEF) {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
r = 0;
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
|
||||
scratch, tmp);
|
||||
else
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
err2:
|
||||
amdgpu_ib_free(adev, &ib, NULL);
|
||||
@@ -2403,7 +2392,7 @@ static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
|
||||
} else {
|
||||
WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
adev->gfx.gfx_ring[i].ready = false;
|
||||
adev->gfx.gfx_ring[i].sched.ready = false;
|
||||
}
|
||||
udelay(50);
|
||||
}
|
||||
@@ -2613,12 +2602,9 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
|
||||
|
||||
/* start the ring */
|
||||
gfx_v7_0_cp_gfx_start(adev);
|
||||
ring->ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2675,7 +2661,7 @@ static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
|
||||
} else {
|
||||
WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
adev->gfx.compute_ring[i].ready = false;
|
||||
adev->gfx.compute_ring[i].sched.ready = false;
|
||||
}
|
||||
udelay(50);
|
||||
}
|
||||
@@ -2781,7 +2767,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
|
||||
* GFX7_MEC_HPD_SIZE * 2;
|
||||
|
||||
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.mec.hpd_eop_obj,
|
||||
&adev->gfx.mec.hpd_eop_gpu_addr,
|
||||
(void **)&hpd);
|
||||
@@ -3106,10 +3092,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
|
||||
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
ring->ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r)
|
||||
ring->ready = false;
|
||||
amdgpu_ring_test_helper(ring);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -3268,18 +3251,10 @@ static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
|
||||
* The RLC is a multi-purpose microengine that handles a
|
||||
* variety of functions.
|
||||
*/
|
||||
static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
|
||||
}
|
||||
|
||||
static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
|
||||
{
|
||||
const u32 *src_ptr;
|
||||
volatile u32 *dst_ptr;
|
||||
u32 dws, i;
|
||||
u32 dws;
|
||||
const struct cs_section_def *cs_data;
|
||||
int r;
|
||||
|
||||
@@ -3306,66 +3281,23 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
|
||||
cs_data = adev->gfx.rlc.cs_data;
|
||||
|
||||
if (src_ptr) {
|
||||
/* save restore block */
|
||||
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.save_restore_obj,
|
||||
&adev->gfx.rlc.save_restore_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.sr_ptr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r);
|
||||
gfx_v7_0_rlc_fini(adev);
|
||||
/* init save restore block */
|
||||
r = amdgpu_gfx_rlc_init_sr(adev, dws);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* write the sr buffer */
|
||||
dst_ptr = adev->gfx.rlc.sr_ptr;
|
||||
for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
|
||||
dst_ptr[i] = cpu_to_le32(src_ptr[i]);
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
|
||||
}
|
||||
|
||||
if (cs_data) {
|
||||
/* clear state block */
|
||||
adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev);
|
||||
|
||||
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.clear_state_obj,
|
||||
&adev->gfx.rlc.clear_state_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cs_ptr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
|
||||
gfx_v7_0_rlc_fini(adev);
|
||||
/* init clear state block */
|
||||
r = amdgpu_gfx_rlc_init_csb(adev);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* set up the cs buffer */
|
||||
dst_ptr = adev->gfx.rlc.cs_ptr;
|
||||
gfx_v7_0_get_csb_buffer(adev, dst_ptr);
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
|
||||
}
|
||||
|
||||
if (adev->gfx.rlc.cp_table_size) {
|
||||
|
||||
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.cp_table_obj,
|
||||
&adev->gfx.rlc.cp_table_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cp_table_ptr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
|
||||
gfx_v7_0_rlc_fini(adev);
|
||||
r = amdgpu_gfx_rlc_init_cpt(adev);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
gfx_v7_0_init_cp_pg_table(adev);
|
||||
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -3446,7 +3378,12 @@ static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
|
||||
return orig;
|
||||
}
|
||||
|
||||
static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
|
||||
static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp, i, mask;
|
||||
|
||||
@@ -3468,7 +3405,7 @@ static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
|
||||
}
|
||||
}
|
||||
|
||||
static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
|
||||
static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp;
|
||||
|
||||
@@ -3545,13 +3482,13 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
|
||||
adev->gfx.rlc_feature_version = le32_to_cpu(
|
||||
hdr->ucode_feature_version);
|
||||
|
||||
gfx_v7_0_rlc_stop(adev);
|
||||
adev->gfx.rlc.funcs->stop(adev);
|
||||
|
||||
/* disable CG */
|
||||
tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
|
||||
WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
|
||||
|
||||
gfx_v7_0_rlc_reset(adev);
|
||||
adev->gfx.rlc.funcs->reset(adev);
|
||||
|
||||
gfx_v7_0_init_pg(adev);
|
||||
|
||||
@@ -3582,7 +3519,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
|
||||
if (adev->asic_type == CHIP_BONAIRE)
|
||||
WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
|
||||
|
||||
gfx_v7_0_rlc_start(adev);
|
||||
adev->gfx.rlc.funcs->start(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -3784,72 +3721,12 @@ static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
|
||||
WREG32(mmRLC_PG_CNTL, data);
|
||||
}
|
||||
|
||||
static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev)
|
||||
static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev)
|
||||
{
|
||||
const __le32 *fw_data;
|
||||
volatile u32 *dst_ptr;
|
||||
int me, i, max_me = 4;
|
||||
u32 bo_offset = 0;
|
||||
u32 table_offset, table_size;
|
||||
|
||||
if (adev->asic_type == CHIP_KAVERI)
|
||||
max_me = 5;
|
||||
|
||||
if (adev->gfx.rlc.cp_table_ptr == NULL)
|
||||
return;
|
||||
|
||||
/* write the cp table buffer */
|
||||
dst_ptr = adev->gfx.rlc.cp_table_ptr;
|
||||
for (me = 0; me < max_me; me++) {
|
||||
if (me == 0) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.ce_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 1) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.pfp_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 2) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.me_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 3) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.mec_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.mec2_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
}
|
||||
|
||||
for (i = 0; i < table_size; i ++) {
|
||||
dst_ptr[bo_offset + i] =
|
||||
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
|
||||
}
|
||||
|
||||
bo_offset += table_size;
|
||||
}
|
||||
return 5;
|
||||
else
|
||||
return 4;
|
||||
}
|
||||
|
||||
static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
|
||||
@@ -4288,8 +4165,17 @@ static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
|
||||
};
|
||||
|
||||
static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
|
||||
.enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode,
|
||||
.exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode
|
||||
.is_rlc_enabled = gfx_v7_0_is_rlc_enabled,
|
||||
.set_safe_mode = gfx_v7_0_set_safe_mode,
|
||||
.unset_safe_mode = gfx_v7_0_unset_safe_mode,
|
||||
.init = gfx_v7_0_rlc_init,
|
||||
.get_csb_size = gfx_v7_0_get_csb_size,
|
||||
.get_csb_buffer = gfx_v7_0_get_csb_buffer,
|
||||
.get_cp_table_num = gfx_v7_0_cp_pg_table_num,
|
||||
.resume = gfx_v7_0_rlc_resume,
|
||||
.stop = gfx_v7_0_rlc_stop,
|
||||
.reset = gfx_v7_0_rlc_reset,
|
||||
.start = gfx_v7_0_rlc_start
|
||||
};
|
||||
|
||||
static int gfx_v7_0_early_init(void *handle)
|
||||
@@ -4477,7 +4363,7 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
|
||||
|
||||
ring->ring_obj = NULL;
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
|
||||
ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
|
||||
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
|
||||
|
||||
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
|
||||
@@ -4540,7 +4426,7 @@ static int gfx_v7_0_sw_init(void *handle)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = gfx_v7_0_rlc_init(adev);
|
||||
r = adev->gfx.rlc.funcs->init(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to init rlc BOs!\n");
|
||||
return r;
|
||||
@@ -4604,7 +4490,7 @@ static int gfx_v7_0_sw_fini(void *handle)
|
||||
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
|
||||
|
||||
gfx_v7_0_cp_compute_fini(adev);
|
||||
gfx_v7_0_rlc_fini(adev);
|
||||
amdgpu_gfx_rlc_fini(adev);
|
||||
gfx_v7_0_mec_fini(adev);
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
|
||||
&adev->gfx.rlc.clear_state_gpu_addr,
|
||||
@@ -4627,7 +4513,7 @@ static int gfx_v7_0_hw_init(void *handle)
|
||||
gfx_v7_0_constants_init(adev);
|
||||
|
||||
/* init rlc */
|
||||
r = gfx_v7_0_rlc_resume(adev);
|
||||
r = adev->gfx.rlc.funcs->resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
@@ -4645,7 +4531,7 @@ static int gfx_v7_0_hw_fini(void *handle)
|
||||
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
|
||||
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
|
||||
gfx_v7_0_cp_enable(adev, false);
|
||||
gfx_v7_0_rlc_stop(adev);
|
||||
adev->gfx.rlc.funcs->stop(adev);
|
||||
gfx_v7_0_fini_pg(adev);
|
||||
|
||||
return 0;
|
||||
@@ -4730,7 +4616,7 @@ static int gfx_v7_0_soft_reset(void *handle)
|
||||
gfx_v7_0_update_cg(adev, false);
|
||||
|
||||
/* stop the rlc */
|
||||
gfx_v7_0_rlc_stop(adev);
|
||||
adev->gfx.rlc.funcs->stop(adev);
|
||||
|
||||
/* Disable GFX parsing/prefetching */
|
||||
WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
|
||||
@@ -4959,12 +4845,36 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gfx_v7_0_fault(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
struct amdgpu_ring *ring;
|
||||
u8 me_id, pipe_id;
|
||||
int i;
|
||||
|
||||
me_id = (entry->ring_id & 0x0c) >> 2;
|
||||
pipe_id = (entry->ring_id & 0x03) >> 0;
|
||||
switch (me_id) {
|
||||
case 0:
|
||||
drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
if ((ring->me == me_id) && (ring->pipe == pipe_id))
|
||||
drm_sched_fault(&ring->sched);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
DRM_ERROR("Illegal register access in command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
gfx_v7_0_fault(adev, entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4974,7 +4884,7 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
|
||||
{
|
||||
DRM_ERROR("Illegal instruction in command stream\n");
|
||||
// XXX soft reset the gfx block only
|
||||
schedule_work(&adev->reset_work);
|
||||
gfx_v7_0_fault(adev, entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -44,7 +44,6 @@
|
||||
#include "gca/gfx_8_0_d.h"
|
||||
#include "gca/gfx_8_0_enum.h"
|
||||
#include "gca/gfx_8_0_sh_mask.h"
|
||||
#include "gca/gfx_8_0_enum.h"
|
||||
|
||||
#include "dce/dce_10_0_d.h"
|
||||
#include "dce/dce_10_0_sh_mask.h"
|
||||
@@ -54,7 +53,7 @@
|
||||
#include "ivsrcid/ivsrcid_vislands30.h"
|
||||
|
||||
#define GFX8_NUM_GFX_RINGS 1
|
||||
#define GFX8_MEC_HPD_SIZE 2048
|
||||
#define GFX8_MEC_HPD_SIZE 4096
|
||||
|
||||
#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
|
||||
#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
|
||||
@@ -839,18 +838,14 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
int r;
|
||||
|
||||
r = amdgpu_gfx_scratch_get(adev, &scratch);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
WREG32(scratch, 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
amdgpu_gfx_scratch_free(adev, scratch);
|
||||
return r;
|
||||
}
|
||||
if (r)
|
||||
goto error_free_scratch;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
|
||||
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
@@ -862,14 +857,11 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
break;
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
|
||||
ring->idx, scratch, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
error_free_scratch:
|
||||
amdgpu_gfx_scratch_free(adev, scratch);
|
||||
return r;
|
||||
}
|
||||
@@ -886,19 +878,16 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
long r;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
|
||||
memset(&ib, 0, sizeof(ib));
|
||||
r = amdgpu_ib_get(adev, NULL, 16, &ib);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto err1;
|
||||
}
|
||||
|
||||
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
|
||||
ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
|
||||
ib.ptr[2] = lower_32_bits(gpu_addr);
|
||||
@@ -912,22 +901,17 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
|
||||
r = dma_fence_wait_timeout(f, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out.\n");
|
||||
r = -ETIMEDOUT;
|
||||
goto err2;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
goto err2;
|
||||
}
|
||||
|
||||
tmp = adev->wb.wb[index];
|
||||
if (tmp == 0xDEADBEEF) {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
r = 0;
|
||||
} else {
|
||||
DRM_ERROR("ib test on ring %d failed\n", ring->idx);
|
||||
else
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
err2:
|
||||
amdgpu_ib_free(adev, &ib, NULL);
|
||||
@@ -1298,81 +1282,16 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
|
||||
buffer[count++] = cpu_to_le32(0);
|
||||
}
|
||||
|
||||
static void cz_init_cp_jump_table(struct amdgpu_device *adev)
|
||||
static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
|
||||
{
|
||||
const __le32 *fw_data;
|
||||
volatile u32 *dst_ptr;
|
||||
int me, i, max_me = 4;
|
||||
u32 bo_offset = 0;
|
||||
u32 table_offset, table_size;
|
||||
|
||||
if (adev->asic_type == CHIP_CARRIZO)
|
||||
max_me = 5;
|
||||
|
||||
/* write the cp table buffer */
|
||||
dst_ptr = adev->gfx.rlc.cp_table_ptr;
|
||||
for (me = 0; me < max_me; me++) {
|
||||
if (me == 0) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.ce_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 1) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.pfp_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 2) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.me_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 3) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.mec_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 4) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.mec2_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
}
|
||||
|
||||
for (i = 0; i < table_size; i ++) {
|
||||
dst_ptr[bo_offset + i] =
|
||||
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
|
||||
}
|
||||
|
||||
bo_offset += table_size;
|
||||
}
|
||||
}
|
||||
|
||||
static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
|
||||
return 5;
|
||||
else
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
|
||||
{
|
||||
volatile u32 *dst_ptr;
|
||||
u32 dws;
|
||||
const struct cs_section_def *cs_data;
|
||||
int r;
|
||||
|
||||
@@ -1381,44 +1300,18 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
|
||||
cs_data = adev->gfx.rlc.cs_data;
|
||||
|
||||
if (cs_data) {
|
||||
/* clear state block */
|
||||
adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
|
||||
|
||||
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.clear_state_obj,
|
||||
&adev->gfx.rlc.clear_state_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cs_ptr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
|
||||
gfx_v8_0_rlc_fini(adev);
|
||||
/* init clear state block */
|
||||
r = amdgpu_gfx_rlc_init_csb(adev);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* set up the cs buffer */
|
||||
dst_ptr = adev->gfx.rlc.cs_ptr;
|
||||
gfx_v8_0_get_csb_buffer(adev, dst_ptr);
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
|
||||
}
|
||||
|
||||
if ((adev->asic_type == CHIP_CARRIZO) ||
|
||||
(adev->asic_type == CHIP_STONEY)) {
|
||||
adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
|
||||
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.cp_table_obj,
|
||||
&adev->gfx.rlc.cp_table_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cp_table_ptr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
|
||||
r = amdgpu_gfx_rlc_init_cpt(adev);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
cz_init_cp_jump_table(adev);
|
||||
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -1443,7 +1336,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
|
||||
mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
|
||||
|
||||
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.mec.hpd_eop_obj,
|
||||
&adev->gfx.mec.hpd_eop_gpu_addr,
|
||||
(void **)&hpd);
|
||||
@@ -1629,7 +1522,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
|
||||
return 0;
|
||||
|
||||
/* bail if the compute ring is not ready */
|
||||
if (!ring->ready)
|
||||
if (!ring->sched.ready)
|
||||
return 0;
|
||||
|
||||
tmp = RREG32(mmGB_EDC_MODE);
|
||||
@@ -1997,7 +1890,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
|
||||
|
||||
ring->ring_obj = NULL;
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
|
||||
ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
|
||||
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
|
||||
+ (ring_id * GFX8_MEC_HPD_SIZE);
|
||||
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
|
||||
@@ -2088,7 +1981,7 @@ static int gfx_v8_0_sw_init(void *handle)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = gfx_v8_0_rlc_init(adev);
|
||||
r = adev->gfx.rlc.funcs->init(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to init rlc BOs!\n");
|
||||
return r;
|
||||
@@ -2108,7 +2001,7 @@ static int gfx_v8_0_sw_init(void *handle)
|
||||
/* no gfx doorbells on iceland */
|
||||
if (adev->asic_type != CHIP_TOPAZ) {
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
|
||||
ring->doorbell_index = adev->doorbell_index.gfx_ring0;
|
||||
}
|
||||
|
||||
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
|
||||
@@ -2181,7 +2074,7 @@ static int gfx_v8_0_sw_fini(void *handle)
|
||||
amdgpu_gfx_kiq_fini(adev);
|
||||
|
||||
gfx_v8_0_mec_fini(adev);
|
||||
gfx_v8_0_rlc_fini(adev);
|
||||
amdgpu_gfx_rlc_fini(adev);
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
|
||||
&adev->gfx.rlc.clear_state_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cs_ptr);
|
||||
@@ -4175,10 +4068,15 @@ static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
|
||||
|
||||
static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
|
||||
{
|
||||
gfx_v8_0_rlc_stop(adev);
|
||||
gfx_v8_0_rlc_reset(adev);
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
gfx_v8_0_init_csb(adev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
adev->gfx.rlc.funcs->stop(adev);
|
||||
adev->gfx.rlc.funcs->reset(adev);
|
||||
gfx_v8_0_init_pg(adev);
|
||||
gfx_v8_0_rlc_start(adev);
|
||||
adev->gfx.rlc.funcs->start(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -4197,7 +4095,7 @@ static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
|
||||
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
adev->gfx.gfx_ring[i].ready = false;
|
||||
adev->gfx.gfx_ring[i].sched.ready = false;
|
||||
}
|
||||
WREG32(mmCP_ME_CNTL, tmp);
|
||||
udelay(50);
|
||||
@@ -4322,7 +4220,7 @@ static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu
|
||||
|
||||
tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
|
||||
DOORBELL_RANGE_LOWER,
|
||||
AMDGPU_DOORBELL_GFX_RING0);
|
||||
adev->doorbell_index.gfx_ring0);
|
||||
WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
|
||||
|
||||
WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
|
||||
@@ -4379,10 +4277,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
|
||||
/* start the ring */
|
||||
amdgpu_ring_clear_ring(ring);
|
||||
gfx_v8_0_cp_gfx_start(adev);
|
||||
ring->ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r)
|
||||
ring->ready = false;
|
||||
ring->sched.ready = true;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -4396,8 +4292,8 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
|
||||
} else {
|
||||
WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
adev->gfx.compute_ring[i].ready = false;
|
||||
adev->gfx.kiq.ring.ready = false;
|
||||
adev->gfx.compute_ring[i].sched.ready = false;
|
||||
adev->gfx.kiq.ring.sched.ready = false;
|
||||
}
|
||||
udelay(50);
|
||||
}
|
||||
@@ -4473,11 +4369,9 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
|
||||
amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
|
||||
}
|
||||
|
||||
r = amdgpu_ring_test_ring(kiq_ring);
|
||||
if (r) {
|
||||
r = amdgpu_ring_test_helper(kiq_ring);
|
||||
if (r)
|
||||
DRM_ERROR("KCQ enable failed\n");
|
||||
kiq_ring->ready = false;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -4755,8 +4649,8 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
|
||||
static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->asic_type > CHIP_TONGA) {
|
||||
WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
|
||||
WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
|
||||
WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
|
||||
WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
|
||||
}
|
||||
/* enable doorbells */
|
||||
WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
|
||||
@@ -4781,7 +4675,7 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
|
||||
amdgpu_bo_kunmap(ring->mqd_obj);
|
||||
ring->mqd_ptr = NULL;
|
||||
amdgpu_bo_unreserve(ring->mqd_obj);
|
||||
ring->ready = true;
|
||||
ring->sched.ready = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4820,10 +4714,7 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
|
||||
*/
|
||||
for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
ring->ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r)
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
}
|
||||
|
||||
done:
|
||||
@@ -4867,7 +4758,7 @@ static int gfx_v8_0_hw_init(void *handle)
|
||||
gfx_v8_0_init_golden_registers(adev);
|
||||
gfx_v8_0_constants_init(adev);
|
||||
|
||||
r = gfx_v8_0_rlc_resume(adev);
|
||||
r = adev->gfx.rlc.funcs->resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
@@ -4899,7 +4790,7 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
|
||||
amdgpu_ring_write(kiq_ring, 0);
|
||||
amdgpu_ring_write(kiq_ring, 0);
|
||||
}
|
||||
r = amdgpu_ring_test_ring(kiq_ring);
|
||||
r = amdgpu_ring_test_helper(kiq_ring);
|
||||
if (r)
|
||||
DRM_ERROR("KCQ disable failed\n");
|
||||
|
||||
@@ -4973,16 +4864,16 @@ static int gfx_v8_0_hw_fini(void *handle)
|
||||
pr_debug("For SRIOV client, shouldn't do anything.\n");
|
||||
return 0;
|
||||
}
|
||||
adev->gfx.rlc.funcs->enter_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev);
|
||||
if (!gfx_v8_0_wait_for_idle(adev))
|
||||
gfx_v8_0_cp_enable(adev, false);
|
||||
else
|
||||
pr_err("cp is busy, skip halt cp\n");
|
||||
if (!gfx_v8_0_wait_for_rlc_idle(adev))
|
||||
gfx_v8_0_rlc_stop(adev);
|
||||
adev->gfx.rlc.funcs->stop(adev);
|
||||
else
|
||||
pr_err("rlc is busy, skip halt rlc\n");
|
||||
adev->gfx.rlc.funcs->exit_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -5061,17 +4952,16 @@ static bool gfx_v8_0_check_soft_reset(void *handle)
|
||||
static int gfx_v8_0_pre_soft_reset(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
|
||||
u32 grbm_soft_reset = 0;
|
||||
|
||||
if ((!adev->gfx.grbm_soft_reset) &&
|
||||
(!adev->gfx.srbm_soft_reset))
|
||||
return 0;
|
||||
|
||||
grbm_soft_reset = adev->gfx.grbm_soft_reset;
|
||||
srbm_soft_reset = adev->gfx.srbm_soft_reset;
|
||||
|
||||
/* stop the rlc */
|
||||
gfx_v8_0_rlc_stop(adev);
|
||||
adev->gfx.rlc.funcs->stop(adev);
|
||||
|
||||
if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
|
||||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
|
||||
@@ -5165,14 +5055,13 @@ static int gfx_v8_0_soft_reset(void *handle)
|
||||
static int gfx_v8_0_post_soft_reset(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
|
||||
u32 grbm_soft_reset = 0;
|
||||
|
||||
if ((!adev->gfx.grbm_soft_reset) &&
|
||||
(!adev->gfx.srbm_soft_reset))
|
||||
return 0;
|
||||
|
||||
grbm_soft_reset = adev->gfx.grbm_soft_reset;
|
||||
srbm_soft_reset = adev->gfx.srbm_soft_reset;
|
||||
|
||||
if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
|
||||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
|
||||
@@ -5197,7 +5086,7 @@ static int gfx_v8_0_post_soft_reset(void *handle)
|
||||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
|
||||
gfx_v8_0_cp_gfx_resume(adev);
|
||||
|
||||
gfx_v8_0_rlc_start(adev);
|
||||
adev->gfx.rlc.funcs->start(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -5445,7 +5334,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
|
||||
AMD_PG_SUPPORT_RLC_SMU_HS |
|
||||
AMD_PG_SUPPORT_CP |
|
||||
AMD_PG_SUPPORT_GFX_DMG))
|
||||
adev->gfx.rlc.funcs->enter_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev);
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_CARRIZO:
|
||||
case CHIP_STONEY:
|
||||
@@ -5499,7 +5388,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
|
||||
AMD_PG_SUPPORT_RLC_SMU_HS |
|
||||
AMD_PG_SUPPORT_CP |
|
||||
AMD_PG_SUPPORT_GFX_DMG))
|
||||
adev->gfx.rlc.funcs->exit_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -5593,57 +5482,53 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
|
||||
#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
|
||||
#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
|
||||
|
||||
static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
|
||||
static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 data;
|
||||
uint32_t rlc_setting;
|
||||
|
||||
rlc_setting = RREG32(mmRLC_CNTL);
|
||||
if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t data;
|
||||
unsigned i;
|
||||
|
||||
data = RREG32(mmRLC_CNTL);
|
||||
if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
|
||||
return;
|
||||
data |= RLC_SAFE_MODE__CMD_MASK;
|
||||
data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
|
||||
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
|
||||
WREG32(mmRLC_SAFE_MODE, data);
|
||||
|
||||
if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
|
||||
data |= RLC_SAFE_MODE__CMD_MASK;
|
||||
data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
|
||||
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
|
||||
WREG32(mmRLC_SAFE_MODE, data);
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
if ((RREG32(mmRLC_GPM_STAT) &
|
||||
(RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
|
||||
RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
|
||||
(RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
|
||||
RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
adev->gfx.rlc.in_safe_mode = true;
|
||||
/* wait for RLC_SAFE_MODE */
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
if ((RREG32(mmRLC_GPM_STAT) &
|
||||
(RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
|
||||
RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
|
||||
(RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
|
||||
RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
|
||||
static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 data = 0;
|
||||
uint32_t data;
|
||||
unsigned i;
|
||||
|
||||
data = RREG32(mmRLC_CNTL);
|
||||
if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
|
||||
return;
|
||||
|
||||
if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
|
||||
if (adev->gfx.rlc.in_safe_mode) {
|
||||
data |= RLC_SAFE_MODE__CMD_MASK;
|
||||
data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
|
||||
WREG32(mmRLC_SAFE_MODE, data);
|
||||
adev->gfx.rlc.in_safe_mode = false;
|
||||
}
|
||||
}
|
||||
data |= RLC_SAFE_MODE__CMD_MASK;
|
||||
data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
|
||||
WREG32(mmRLC_SAFE_MODE, data);
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
|
||||
@@ -5653,8 +5538,17 @@ static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
|
||||
.enter_safe_mode = iceland_enter_rlc_safe_mode,
|
||||
.exit_safe_mode = iceland_exit_rlc_safe_mode
|
||||
.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
|
||||
.set_safe_mode = gfx_v8_0_set_safe_mode,
|
||||
.unset_safe_mode = gfx_v8_0_unset_safe_mode,
|
||||
.init = gfx_v8_0_rlc_init,
|
||||
.get_csb_size = gfx_v8_0_get_csb_size,
|
||||
.get_csb_buffer = gfx_v8_0_get_csb_buffer,
|
||||
.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
|
||||
.resume = gfx_v8_0_rlc_resume,
|
||||
.stop = gfx_v8_0_rlc_stop,
|
||||
.reset = gfx_v8_0_rlc_reset,
|
||||
.start = gfx_v8_0_rlc_start
|
||||
};
|
||||
|
||||
static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
|
||||
@@ -5662,7 +5556,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
|
||||
{
|
||||
uint32_t temp, data;
|
||||
|
||||
adev->gfx.rlc.funcs->enter_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev);
|
||||
|
||||
/* It is disabled by HW by default */
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
|
||||
@@ -5758,7 +5652,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
|
||||
gfx_v8_0_wait_for_rlc_serdes(adev);
|
||||
}
|
||||
|
||||
adev->gfx.rlc.funcs->exit_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
}
|
||||
|
||||
static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
|
||||
@@ -5768,7 +5662,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
|
||||
|
||||
temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
|
||||
|
||||
adev->gfx.rlc.funcs->enter_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev);
|
||||
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
|
||||
temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
|
||||
@@ -5851,7 +5745,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
|
||||
|
||||
gfx_v8_0_wait_for_rlc_serdes(adev);
|
||||
|
||||
adev->gfx.rlc.funcs->exit_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
}
|
||||
static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
@@ -6131,9 +6025,11 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
|
||||
}
|
||||
|
||||
static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 header, control = 0;
|
||||
|
||||
if (ib->flags & AMDGPU_IB_FLAG_CE)
|
||||
@@ -6161,9 +6057,11 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
}
|
||||
|
||||
static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
|
||||
@@ -6738,12 +6636,39 @@ static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gfx_v8_0_fault(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
u8 me_id, pipe_id, queue_id;
|
||||
struct amdgpu_ring *ring;
|
||||
int i;
|
||||
|
||||
me_id = (entry->ring_id & 0x0c) >> 2;
|
||||
pipe_id = (entry->ring_id & 0x03) >> 0;
|
||||
queue_id = (entry->ring_id & 0x70) >> 4;
|
||||
|
||||
switch (me_id) {
|
||||
case 0:
|
||||
drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
if (ring->me == me_id && ring->pipe == pipe_id &&
|
||||
ring->queue == queue_id)
|
||||
drm_sched_fault(&ring->sched);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
DRM_ERROR("Illegal register access in command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
gfx_v8_0_fault(adev, entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -6752,7 +6677,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
DRM_ERROR("Illegal instruction in command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
gfx_v8_0_fault(adev, entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -6976,10 +6901,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
|
||||
17 + /* gfx_v8_0_ring_emit_vm_flush */
|
||||
7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
|
||||
.emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
|
||||
.emit_ib = gfx_v8_0_ring_emit_ib_compute,
|
||||
.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
|
||||
.test_ring = gfx_v8_0_ring_test_ring,
|
||||
.test_ib = gfx_v8_0_ring_test_ib,
|
||||
.insert_nop = amdgpu_ring_insert_nop,
|
||||
.pad_ib = amdgpu_ring_generic_pad_ib,
|
||||
.emit_rreg = gfx_v8_0_ring_emit_rreg,
|
||||
|
@@ -41,7 +41,7 @@
|
||||
#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
|
||||
|
||||
#define GFX9_NUM_GFX_RINGS 1
|
||||
#define GFX9_MEC_HPD_SIZE 2048
|
||||
#define GFX9_MEC_HPD_SIZE 4096
|
||||
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
|
||||
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
|
||||
|
||||
@@ -86,6 +86,7 @@ MODULE_FIRMWARE("amdgpu/picasso_me.bin");
|
||||
MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
|
||||
MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
|
||||
MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
|
||||
MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
|
||||
MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
|
||||
@@ -396,18 +397,14 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
int r;
|
||||
|
||||
r = amdgpu_gfx_scratch_get(adev, &scratch);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
WREG32(scratch, 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
amdgpu_gfx_scratch_free(adev, scratch);
|
||||
return r;
|
||||
}
|
||||
if (r)
|
||||
goto error_free_scratch;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
|
||||
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
@@ -419,14 +416,11 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
break;
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
|
||||
ring->idx, scratch, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
error_free_scratch:
|
||||
amdgpu_gfx_scratch_free(adev, scratch);
|
||||
return r;
|
||||
}
|
||||
@@ -443,19 +437,16 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
long r;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
|
||||
memset(&ib, 0, sizeof(ib));
|
||||
r = amdgpu_ib_get(adev, NULL, 16, &ib);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto err1;
|
||||
}
|
||||
|
||||
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
|
||||
ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
|
||||
ib.ptr[2] = lower_32_bits(gpu_addr);
|
||||
@@ -469,22 +460,17 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
|
||||
r = dma_fence_wait_timeout(f, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out.\n");
|
||||
r = -ETIMEDOUT;
|
||||
goto err2;
|
||||
r = -ETIMEDOUT;
|
||||
goto err2;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
goto err2;
|
||||
goto err2;
|
||||
}
|
||||
|
||||
tmp = adev->wb.wb[index];
|
||||
if (tmp == 0xDEADBEEF) {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
r = 0;
|
||||
} else {
|
||||
DRM_ERROR("ib test on ring %d failed\n", ring->idx);
|
||||
r = -EINVAL;
|
||||
}
|
||||
if (tmp == 0xDEADBEEF)
|
||||
r = 0;
|
||||
else
|
||||
r = -EINVAL;
|
||||
|
||||
err2:
|
||||
amdgpu_ib_free(adev, &ib, NULL);
|
||||
@@ -660,7 +646,20 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
|
||||
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
|
||||
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
|
||||
|
||||
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
|
||||
/*
|
||||
* For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
|
||||
* instead of picasso_rlc.bin.
|
||||
* Judgment method:
|
||||
* PCO AM4: revision >= 0xC8 && revision <= 0xCF
|
||||
* or revision >= 0xD8 && revision <= 0xDF
|
||||
* otherwise is PCO FP5
|
||||
*/
|
||||
if (!strcmp(chip_name, "picasso") &&
|
||||
(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
|
||||
((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
|
||||
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
|
||||
else
|
||||
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
|
||||
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
|
||||
if (err)
|
||||
goto out;
|
||||
@@ -1065,85 +1064,13 @@ static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
|
||||
WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
|
||||
}
|
||||
|
||||
static void rv_init_cp_jump_table(struct amdgpu_device *adev)
|
||||
static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
|
||||
{
|
||||
const __le32 *fw_data;
|
||||
volatile u32 *dst_ptr;
|
||||
int me, i, max_me = 5;
|
||||
u32 bo_offset = 0;
|
||||
u32 table_offset, table_size;
|
||||
|
||||
/* write the cp table buffer */
|
||||
dst_ptr = adev->gfx.rlc.cp_table_ptr;
|
||||
for (me = 0; me < max_me; me++) {
|
||||
if (me == 0) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.ce_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 1) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.pfp_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 2) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.me_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 3) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.mec_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 4) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.mec2_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
}
|
||||
|
||||
for (i = 0; i < table_size; i ++) {
|
||||
dst_ptr[bo_offset + i] =
|
||||
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
|
||||
}
|
||||
|
||||
bo_offset += table_size;
|
||||
}
|
||||
}
|
||||
|
||||
static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
/* clear state block */
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
|
||||
&adev->gfx.rlc.clear_state_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cs_ptr);
|
||||
|
||||
/* jump table block */
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
|
||||
&adev->gfx.rlc.cp_table_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cp_table_ptr);
|
||||
return 5;
|
||||
}
|
||||
|
||||
static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
|
||||
{
|
||||
volatile u32 *dst_ptr;
|
||||
u32 dws;
|
||||
const struct cs_section_def *cs_data;
|
||||
int r;
|
||||
|
||||
@@ -1152,45 +1079,18 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
|
||||
cs_data = adev->gfx.rlc.cs_data;
|
||||
|
||||
if (cs_data) {
|
||||
/* clear state block */
|
||||
adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev);
|
||||
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.clear_state_obj,
|
||||
&adev->gfx.rlc.clear_state_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cs_ptr);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to create rlc csb bo\n",
|
||||
r);
|
||||
gfx_v9_0_rlc_fini(adev);
|
||||
/* init clear state block */
|
||||
r = amdgpu_gfx_rlc_init_csb(adev);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
/* set up the cs buffer */
|
||||
dst_ptr = adev->gfx.rlc.cs_ptr;
|
||||
gfx_v9_0_get_csb_buffer(adev, dst_ptr);
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
|
||||
amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
|
||||
}
|
||||
|
||||
if (adev->asic_type == CHIP_RAVEN) {
|
||||
/* TODO: double check the cp_table_size for RV */
|
||||
adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
|
||||
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.cp_table_obj,
|
||||
&adev->gfx.rlc.cp_table_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cp_table_ptr);
|
||||
if (r) {
|
||||
dev_err(adev->dev,
|
||||
"(%d) failed to create cp table bo\n", r);
|
||||
gfx_v9_0_rlc_fini(adev);
|
||||
r = amdgpu_gfx_rlc_init_cpt(adev);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
rv_init_cp_jump_table(adev);
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
|
||||
}
|
||||
|
||||
switch (adev->asic_type) {
|
||||
@@ -1264,7 +1164,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
|
||||
mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
|
||||
|
||||
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.mec.hpd_eop_obj,
|
||||
&adev->gfx.mec.hpd_eop_gpu_addr,
|
||||
(void **)&hpd);
|
||||
@@ -1635,8 +1535,8 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
|
||||
/* Clear GDS reserved memory */
|
||||
r = amdgpu_ring_alloc(ring, 17);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
|
||||
ring->name, r);
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -1680,7 +1580,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
|
||||
|
||||
ring->ring_obj = NULL;
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = (AMDGPU_DOORBELL_MEC_RING0 + ring_id) << 1;
|
||||
ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
|
||||
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
|
||||
+ (ring_id * GFX9_MEC_HPD_SIZE);
|
||||
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
|
||||
@@ -1748,7 +1648,7 @@ static int gfx_v9_0_sw_init(void *handle)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = gfx_v9_0_rlc_init(adev);
|
||||
r = adev->gfx.rlc.funcs->init(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to init rlc BOs!\n");
|
||||
return r;
|
||||
@@ -1769,7 +1669,7 @@ static int gfx_v9_0_sw_init(void *handle)
|
||||
else
|
||||
sprintf(ring->name, "gfx_%d", i);
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1;
|
||||
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
|
||||
r = amdgpu_ring_init(adev, ring, 1024,
|
||||
&adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
|
||||
if (r)
|
||||
@@ -2499,12 +2399,12 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
gfx_v9_0_rlc_stop(adev);
|
||||
adev->gfx.rlc.funcs->stop(adev);
|
||||
|
||||
/* disable CG */
|
||||
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
|
||||
|
||||
gfx_v9_0_rlc_reset(adev);
|
||||
adev->gfx.rlc.funcs->reset(adev);
|
||||
|
||||
gfx_v9_0_init_pg(adev);
|
||||
|
||||
@@ -2515,15 +2415,24 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (adev->asic_type == CHIP_RAVEN ||
|
||||
adev->asic_type == CHIP_VEGA20) {
|
||||
if (amdgpu_lbpw != 0)
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_RAVEN:
|
||||
if (amdgpu_lbpw == 0)
|
||||
gfx_v9_0_enable_lbpw(adev, false);
|
||||
else
|
||||
gfx_v9_0_enable_lbpw(adev, true);
|
||||
break;
|
||||
case CHIP_VEGA20:
|
||||
if (amdgpu_lbpw > 0)
|
||||
gfx_v9_0_enable_lbpw(adev, true);
|
||||
else
|
||||
gfx_v9_0_enable_lbpw(adev, false);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
gfx_v9_0_rlc_start(adev);
|
||||
adev->gfx.rlc.funcs->start(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2538,7 +2447,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
|
||||
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
|
||||
if (!enable) {
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
adev->gfx.gfx_ring[i].ready = false;
|
||||
adev->gfx.gfx_ring[i].sched.ready = false;
|
||||
}
|
||||
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
|
||||
udelay(50);
|
||||
@@ -2728,7 +2637,7 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
|
||||
|
||||
/* start the ring */
|
||||
gfx_v9_0_cp_gfx_start(adev);
|
||||
ring->ready = true;
|
||||
ring->sched.ready = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2743,8 +2652,8 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
|
||||
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
|
||||
(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
adev->gfx.compute_ring[i].ready = false;
|
||||
adev->gfx.kiq.ring.ready = false;
|
||||
adev->gfx.compute_ring[i].sched.ready = false;
|
||||
adev->gfx.kiq.ring.sched.ready = false;
|
||||
}
|
||||
udelay(50);
|
||||
}
|
||||
@@ -2867,11 +2776,9 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
|
||||
amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
|
||||
}
|
||||
|
||||
r = amdgpu_ring_test_ring(kiq_ring);
|
||||
if (r) {
|
||||
r = amdgpu_ring_test_helper(kiq_ring);
|
||||
if (r)
|
||||
DRM_ERROR("KCQ enable failed\n");
|
||||
kiq_ring->ready = false;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -3089,9 +2996,9 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
|
||||
/* enable the doorbell if requested */
|
||||
if (ring->use_doorbell) {
|
||||
WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
|
||||
(AMDGPU_DOORBELL64_KIQ *2) << 2);
|
||||
(adev->doorbell_index.kiq * 2) << 2);
|
||||
WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
|
||||
(AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2);
|
||||
(adev->doorbell_index.userqueue_end * 2) << 2);
|
||||
}
|
||||
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
|
||||
@@ -3250,7 +3157,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
|
||||
amdgpu_bo_kunmap(ring->mqd_obj);
|
||||
ring->mqd_ptr = NULL;
|
||||
amdgpu_bo_unreserve(ring->mqd_obj);
|
||||
ring->ready = true;
|
||||
ring->sched.ready = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3315,19 +3222,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
|
||||
return r;
|
||||
|
||||
ring = &adev->gfx.gfx_ring[0];
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
|
||||
ring->ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r)
|
||||
ring->ready = false;
|
||||
amdgpu_ring_test_helper(ring);
|
||||
}
|
||||
|
||||
gfx_v9_0_enable_gui_idle_interrupt(adev, true);
|
||||
@@ -3354,7 +3255,7 @@ static int gfx_v9_0_hw_init(void *handle)
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = gfx_v9_0_rlc_resume(adev);
|
||||
r = adev->gfx.rlc.funcs->resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
@@ -3392,7 +3293,7 @@ static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
|
||||
amdgpu_ring_write(kiq_ring, 0);
|
||||
amdgpu_ring_write(kiq_ring, 0);
|
||||
}
|
||||
r = amdgpu_ring_test_ring(kiq_ring);
|
||||
r = amdgpu_ring_test_helper(kiq_ring);
|
||||
if (r)
|
||||
DRM_ERROR("KCQ disable failed\n");
|
||||
|
||||
@@ -3434,7 +3335,7 @@ static int gfx_v9_0_hw_fini(void *handle)
|
||||
}
|
||||
|
||||
gfx_v9_0_cp_enable(adev, false);
|
||||
gfx_v9_0_rlc_stop(adev);
|
||||
adev->gfx.rlc.funcs->stop(adev);
|
||||
|
||||
gfx_v9_0_csb_vram_unpin(adev);
|
||||
|
||||
@@ -3509,7 +3410,7 @@ static int gfx_v9_0_soft_reset(void *handle)
|
||||
|
||||
if (grbm_soft_reset) {
|
||||
/* stop the rlc */
|
||||
gfx_v9_0_rlc_stop(adev);
|
||||
adev->gfx.rlc.funcs->stop(adev);
|
||||
|
||||
/* Disable GFX parsing/prefetching */
|
||||
gfx_v9_0_cp_gfx_enable(adev, false);
|
||||
@@ -3608,64 +3509,47 @@ static int gfx_v9_0_late_init(void *handle)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
|
||||
static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t rlc_setting, data;
|
||||
unsigned i;
|
||||
|
||||
if (adev->gfx.rlc.in_safe_mode)
|
||||
return;
|
||||
uint32_t rlc_setting;
|
||||
|
||||
/* if RLC is not enabled, do nothing */
|
||||
rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
|
||||
if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
|
||||
return;
|
||||
return false;
|
||||
|
||||
if (adev->cg_flags &
|
||||
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
|
||||
data = RLC_SAFE_MODE__CMD_MASK;
|
||||
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
|
||||
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* wait for RLC_SAFE_MODE */
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
adev->gfx.rlc.in_safe_mode = true;
|
||||
static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t data;
|
||||
unsigned i;
|
||||
|
||||
data = RLC_SAFE_MODE__CMD_MASK;
|
||||
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
|
||||
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
|
||||
|
||||
/* wait for RLC_SAFE_MODE */
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
|
||||
static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t rlc_setting, data;
|
||||
uint32_t data;
|
||||
|
||||
if (!adev->gfx.rlc.in_safe_mode)
|
||||
return;
|
||||
|
||||
/* if RLC is not enabled, do nothing */
|
||||
rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
|
||||
if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
|
||||
return;
|
||||
|
||||
if (adev->cg_flags &
|
||||
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
|
||||
/*
|
||||
* Try to exit safe mode only if it is already in safe
|
||||
* mode.
|
||||
*/
|
||||
data = RLC_SAFE_MODE__CMD_MASK;
|
||||
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
|
||||
adev->gfx.rlc.in_safe_mode = false;
|
||||
}
|
||||
data = RLC_SAFE_MODE__CMD_MASK;
|
||||
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
|
||||
}
|
||||
|
||||
static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
{
|
||||
gfx_v9_0_enter_rlc_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev);
|
||||
|
||||
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
|
||||
gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
|
||||
@@ -3676,7 +3560,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
|
||||
gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
|
||||
}
|
||||
|
||||
gfx_v9_0_exit_rlc_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
}
|
||||
|
||||
static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
|
||||
@@ -3774,7 +3658,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
|
||||
{
|
||||
uint32_t data, def;
|
||||
|
||||
adev->gfx.rlc.funcs->enter_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev);
|
||||
|
||||
/* Enable 3D CGCG/CGLS */
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
|
||||
@@ -3814,7 +3698,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
|
||||
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
|
||||
}
|
||||
|
||||
adev->gfx.rlc.funcs->exit_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
}
|
||||
|
||||
static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
|
||||
@@ -3822,7 +3706,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
|
||||
{
|
||||
uint32_t def, data;
|
||||
|
||||
adev->gfx.rlc.funcs->enter_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev);
|
||||
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
|
||||
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
|
||||
@@ -3862,7 +3746,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
|
||||
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
|
||||
}
|
||||
|
||||
adev->gfx.rlc.funcs->exit_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
}
|
||||
|
||||
static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
|
||||
@@ -3891,8 +3775,17 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
|
||||
.enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode,
|
||||
.exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode
|
||||
.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
|
||||
.set_safe_mode = gfx_v9_0_set_safe_mode,
|
||||
.unset_safe_mode = gfx_v9_0_unset_safe_mode,
|
||||
.init = gfx_v9_0_rlc_init,
|
||||
.get_csb_size = gfx_v9_0_get_csb_size,
|
||||
.get_csb_buffer = gfx_v9_0_get_csb_buffer,
|
||||
.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
|
||||
.resume = gfx_v9_0_rlc_resume,
|
||||
.stop = gfx_v9_0_rlc_stop,
|
||||
.reset = gfx_v9_0_rlc_reset,
|
||||
.start = gfx_v9_0_rlc_start
|
||||
};
|
||||
|
||||
static int gfx_v9_0_set_powergating_state(void *handle,
|
||||
@@ -4073,9 +3966,11 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
|
||||
}
|
||||
|
||||
static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 header, control = 0;
|
||||
|
||||
if (ib->flags & AMDGPU_IB_FLAG_CE)
|
||||
@@ -4104,20 +3999,22 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
}
|
||||
|
||||
static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
{
|
||||
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
|
||||
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
|
||||
amdgpu_ring_write(ring,
|
||||
amdgpu_ring_write(ring,
|
||||
#ifdef __BIG_ENDIAN
|
||||
(2 << 0) |
|
||||
(2 << 0) |
|
||||
#endif
|
||||
lower_32_bits(ib->gpu_addr));
|
||||
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
|
||||
amdgpu_ring_write(ring, control);
|
||||
lower_32_bits(ib->gpu_addr));
|
||||
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
|
||||
amdgpu_ring_write(ring, control);
|
||||
}
|
||||
|
||||
static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
|
||||
@@ -4696,12 +4593,39 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gfx_v9_0_fault(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
u8 me_id, pipe_id, queue_id;
|
||||
struct amdgpu_ring *ring;
|
||||
int i;
|
||||
|
||||
me_id = (entry->ring_id & 0x0c) >> 2;
|
||||
pipe_id = (entry->ring_id & 0x03) >> 0;
|
||||
queue_id = (entry->ring_id & 0x70) >> 4;
|
||||
|
||||
switch (me_id) {
|
||||
case 0:
|
||||
drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
if (ring->me == me_id && ring->pipe == pipe_id &&
|
||||
ring->queue == queue_id)
|
||||
drm_sched_fault(&ring->sched);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
DRM_ERROR("Illegal register access in command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
gfx_v9_0_fault(adev, entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4710,7 +4634,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
DRM_ERROR("Illegal instruction in command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
gfx_v9_0_fault(adev, entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4837,10 +4761,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
|
||||
2 + /* gfx_v9_0_ring_emit_vm_flush */
|
||||
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
|
||||
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
|
||||
.emit_ib = gfx_v9_0_ring_emit_ib_compute,
|
||||
.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
|
||||
.test_ring = gfx_v9_0_ring_test_ring,
|
||||
.test_ib = gfx_v9_0_ring_test_ib,
|
||||
.insert_nop = amdgpu_ring_insert_nop,
|
||||
.pad_ib = amdgpu_ring_generic_pad_ib,
|
||||
.emit_rreg = gfx_v9_0_ring_emit_rreg,
|
||||
|
@@ -35,20 +35,25 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev)
|
||||
return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24;
|
||||
}
|
||||
|
||||
static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
|
||||
void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
|
||||
uint64_t page_table_base)
|
||||
{
|
||||
uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
|
||||
/* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */
|
||||
int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
|
||||
- mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
|
||||
|
||||
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
|
||||
lower_32_bits(value));
|
||||
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
|
||||
offset * vmid, lower_32_bits(page_table_base));
|
||||
|
||||
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
|
||||
upper_32_bits(value));
|
||||
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
|
||||
offset * vmid, upper_32_bits(page_table_base));
|
||||
}
|
||||
|
||||
static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
|
||||
{
|
||||
gfxhub_v1_0_init_gart_pt_regs(adev);
|
||||
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
|
||||
|
||||
gfxhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base);
|
||||
|
||||
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
|
||||
(u32)(adev->gmc.gart_start >> 12));
|
||||
|
@@ -30,5 +30,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
|
||||
bool value);
|
||||
void gfxhub_v1_0_init(struct amdgpu_device *adev);
|
||||
u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev);
|
||||
void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
|
||||
uint64_t page_table_base);
|
||||
|
||||
#endif
|
||||
|
@@ -359,7 +359,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
|
||||
static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev,
|
||||
uint32_t vmid, uint32_t flush_type)
|
||||
{
|
||||
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
|
||||
}
|
||||
@@ -581,7 +582,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
|
||||
else
|
||||
gmc_v6_0_set_fault_enable_default(adev, true);
|
||||
|
||||
gmc_v6_0_flush_gpu_tlb(adev, 0);
|
||||
gmc_v6_0_flush_gpu_tlb(adev, 0, 0);
|
||||
dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
|
||||
(unsigned)(adev->gmc.gart_size >> 20),
|
||||
(unsigned long long)table_addr);
|
||||
|
@@ -430,7 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
|
||||
*
|
||||
* Flush the TLB for the requested page table (CIK).
|
||||
*/
|
||||
static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
|
||||
static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev,
|
||||
uint32_t vmid, uint32_t flush_type)
|
||||
{
|
||||
/* bits 0-15 are the VM contexts0-15 */
|
||||
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
|
||||
@@ -698,7 +699,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
|
||||
WREG32(mmCHUB_CONTROL, tmp);
|
||||
}
|
||||
|
||||
gmc_v7_0_flush_gpu_tlb(adev, 0);
|
||||
gmc_v7_0_flush_gpu_tlb(adev, 0, 0);
|
||||
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
|
||||
(unsigned)(adev->gmc.gart_size >> 20),
|
||||
(unsigned long long)table_addr);
|
||||
|
@@ -633,7 +633,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
|
||||
* Flush the TLB for the requested page table (CIK).
|
||||
*/
|
||||
static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
|
||||
uint32_t vmid)
|
||||
uint32_t vmid, uint32_t flush_type)
|
||||
{
|
||||
/* bits 0-15 are the VM contexts0-15 */
|
||||
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
|
||||
@@ -942,7 +942,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
|
||||
else
|
||||
gmc_v8_0_set_fault_enable_default(adev, true);
|
||||
|
||||
gmc_v8_0_flush_gpu_tlb(adev, 0);
|
||||
gmc_v8_0_flush_gpu_tlb(adev, 0, 0);
|
||||
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
|
||||
(unsigned)(adev->gmc.gart_size >> 20),
|
||||
(unsigned long long)table_addr);
|
||||
|
@@ -244,6 +244,62 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* vega10_ih_prescreen_iv - prescreen an interrupt vector
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Returns true if the interrupt vector should be further processed.
|
||||
*/
|
||||
static bool gmc_v9_0_prescreen_iv(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry,
|
||||
uint64_t addr)
|
||||
{
|
||||
struct amdgpu_vm *vm;
|
||||
u64 key;
|
||||
int r;
|
||||
|
||||
/* No PASID, can't identify faulting process */
|
||||
if (!entry->pasid)
|
||||
return true;
|
||||
|
||||
/* Not a retry fault */
|
||||
if (!(entry->src_data[1] & 0x80))
|
||||
return true;
|
||||
|
||||
/* Track retry faults in per-VM fault FIFO. */
|
||||
spin_lock(&adev->vm_manager.pasid_lock);
|
||||
vm = idr_find(&adev->vm_manager.pasid_idr, entry->pasid);
|
||||
if (!vm) {
|
||||
/* VM not found, process it normally */
|
||||
spin_unlock(&adev->vm_manager.pasid_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
key = AMDGPU_VM_FAULT(entry->pasid, addr);
|
||||
r = amdgpu_vm_add_fault(vm->fault_hash, key);
|
||||
|
||||
/* Hash table is full or the fault is already being processed,
|
||||
* ignore further page faults
|
||||
*/
|
||||
if (r != 0) {
|
||||
spin_unlock(&adev->vm_manager.pasid_lock);
|
||||
return false;
|
||||
}
|
||||
/* No locking required with single writer and single reader */
|
||||
r = kfifo_put(&vm->faults, key);
|
||||
if (!r) {
|
||||
/* FIFO is full. Ignore it until there is space */
|
||||
amdgpu_vm_clear_fault(vm->fault_hash, key);
|
||||
spin_unlock(&adev->vm_manager.pasid_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
spin_unlock(&adev->vm_manager.pasid_lock);
|
||||
/* It's the first fault for this address, process it normally */
|
||||
return true;
|
||||
}
|
||||
|
||||
static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
@@ -255,6 +311,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
|
||||
addr = (u64)entry->src_data[0] << 12;
|
||||
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
|
||||
|
||||
if (!gmc_v9_0_prescreen_iv(adev, entry, addr))
|
||||
return 1; /* This also prevents sending it to KFD */
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
status = RREG32(hub->vm_l2_pro_fault_status);
|
||||
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
|
||||
@@ -293,14 +352,14 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
|
||||
adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
|
||||
}
|
||||
|
||||
static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
|
||||
static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
|
||||
uint32_t flush_type)
|
||||
{
|
||||
u32 req = 0;
|
||||
|
||||
/* invalidate using legacy mode on vmid*/
|
||||
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
|
||||
PER_VMID_INVALIDATE_REQ, 1 << vmid);
|
||||
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
|
||||
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
|
||||
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
|
||||
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
|
||||
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
|
||||
@@ -312,48 +371,6 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
|
||||
return req;
|
||||
}
|
||||
|
||||
static signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
|
||||
uint32_t reg0, uint32_t reg1,
|
||||
uint32_t ref, uint32_t mask)
|
||||
{
|
||||
signed long r, cnt = 0;
|
||||
unsigned long flags;
|
||||
uint32_t seq;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
|
||||
struct amdgpu_ring *ring = &kiq->ring;
|
||||
|
||||
spin_lock_irqsave(&kiq->ring_lock, flags);
|
||||
|
||||
amdgpu_ring_alloc(ring, 32);
|
||||
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
|
||||
ref, mask);
|
||||
amdgpu_fence_emit_polling(ring, &seq);
|
||||
amdgpu_ring_commit(ring);
|
||||
spin_unlock_irqrestore(&kiq->ring_lock, flags);
|
||||
|
||||
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
|
||||
|
||||
/* don't wait anymore for IRQ context */
|
||||
if (r < 1 && in_interrupt())
|
||||
goto failed_kiq;
|
||||
|
||||
might_sleep();
|
||||
|
||||
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
|
||||
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
|
||||
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
|
||||
}
|
||||
|
||||
if (cnt > MAX_KIQ_REG_TRY)
|
||||
goto failed_kiq;
|
||||
|
||||
return 0;
|
||||
|
||||
failed_kiq:
|
||||
pr_err("failed to invalidate tlb with kiq\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* GART
|
||||
* VMID 0 is the physical GPU addresses as used by the kernel.
|
||||
@@ -362,64 +379,50 @@ failed_kiq:
|
||||
*/
|
||||
|
||||
/**
|
||||
* gmc_v9_0_flush_gpu_tlb - gart tlb flush callback
|
||||
* gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @vmid: vm instance to flush
|
||||
* @flush_type: the flush type
|
||||
*
|
||||
* Flush the TLB for the requested page table.
|
||||
* Flush the TLB for the requested page table using certain type.
|
||||
*/
|
||||
static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
|
||||
uint32_t vmid)
|
||||
uint32_t vmid, uint32_t flush_type)
|
||||
{
|
||||
/* Use register 17 for GART */
|
||||
const unsigned eng = 17;
|
||||
unsigned i, j;
|
||||
int r;
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[i];
|
||||
u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
|
||||
u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
|
||||
|
||||
if (adev->gfx.kiq.ring.ready &&
|
||||
/* This is necessary for a HW workaround under SRIOV as well
|
||||
* as GFXOFF under bare metal
|
||||
*/
|
||||
if (adev->gfx.kiq.ring.sched.ready &&
|
||||
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
|
||||
!adev->in_gpu_reset) {
|
||||
r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
|
||||
hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
|
||||
if (!r)
|
||||
continue;
|
||||
uint32_t req = hub->vm_inv_eng0_req + eng;
|
||||
uint32_t ack = hub->vm_inv_eng0_ack + eng;
|
||||
|
||||
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
|
||||
1 << vmid);
|
||||
continue;
|
||||
}
|
||||
|
||||
spin_lock(&adev->gmc.invalidate_lock);
|
||||
|
||||
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
|
||||
|
||||
/* Busy wait for ACK.*/
|
||||
for (j = 0; j < 100; j++) {
|
||||
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
|
||||
tmp &= 1 << vmid;
|
||||
if (tmp)
|
||||
break;
|
||||
cpu_relax();
|
||||
}
|
||||
if (j < 100) {
|
||||
spin_unlock(&adev->gmc.invalidate_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Wait for ACK with a delay.*/
|
||||
for (j = 0; j < adev->usec_timeout; j++) {
|
||||
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
|
||||
tmp &= 1 << vmid;
|
||||
if (tmp)
|
||||
if (tmp & (1 << vmid))
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
if (j < adev->usec_timeout) {
|
||||
spin_unlock(&adev->gmc.invalidate_lock);
|
||||
continue;
|
||||
}
|
||||
spin_unlock(&adev->gmc.invalidate_lock);
|
||||
if (j < adev->usec_timeout)
|
||||
continue;
|
||||
|
||||
DRM_ERROR("Timeout waiting for VM flush ACK!\n");
|
||||
}
|
||||
}
|
||||
@@ -429,7 +432,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
|
||||
uint32_t req = gmc_v9_0_get_invalidate_req(vmid);
|
||||
uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
|
||||
unsigned eng = ring->vm_inv_eng;
|
||||
|
||||
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
|
||||
@@ -739,9 +742,8 @@ static int gmc_v9_0_late_init(void *handle)
|
||||
unsigned vmhub = ring->funcs->vmhub;
|
||||
|
||||
ring->vm_inv_eng = vm_inv_eng[vmhub]++;
|
||||
dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n",
|
||||
ring->idx, ring->name, ring->vm_inv_eng,
|
||||
ring->funcs->vmhub);
|
||||
dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
|
||||
ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
|
||||
}
|
||||
|
||||
/* Engine 16 is used for KFD and 17 for GART flushes */
|
||||
@@ -959,6 +961,9 @@ static int gmc_v9_0_sw_init(void *handle)
|
||||
/* This interrupt is VMC page fault.*/
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
|
||||
&adev->gmc.vm_fault);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
|
||||
&adev->gmc.vm_fault);
|
||||
|
||||
@@ -991,7 +996,7 @@ static int gmc_v9_0_sw_init(void *handle)
|
||||
}
|
||||
adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
|
||||
|
||||
if (adev->asic_type == CHIP_VEGA20) {
|
||||
if (adev->gmc.xgmi.supported) {
|
||||
r = gfxhub_v1_1_get_xgmi_info(adev);
|
||||
if (r)
|
||||
return r;
|
||||
@@ -1122,7 +1127,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
|
||||
|
||||
gfxhub_v1_0_set_fault_enable_default(adev, value);
|
||||
mmhub_v1_0_set_fault_enable_default(adev, value);
|
||||
gmc_v9_0_flush_gpu_tlb(adev, 0);
|
||||
gmc_v9_0_flush_gpu_tlb(adev, 0, 0);
|
||||
|
||||
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
|
||||
(unsigned)(adev->gmc.gart_size >> 20),
|
||||
|
@@ -207,34 +207,6 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev)
|
||||
return (wptr & adev->irq.ih.ptr_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* iceland_ih_prescreen_iv - prescreen an interrupt vector
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Returns true if the interrupt vector should be further processed.
|
||||
*/
|
||||
static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 ring_index = adev->irq.ih.rptr >> 2;
|
||||
u16 pasid;
|
||||
|
||||
switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
|
||||
case 146:
|
||||
case 147:
|
||||
pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
|
||||
if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
/* Not a VM fault */
|
||||
return true;
|
||||
}
|
||||
|
||||
adev->irq.ih.rptr += 16;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* iceland_ih_decode_iv - decode an interrupt vector
|
||||
*
|
||||
@@ -440,7 +412,6 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = {
|
||||
|
||||
static const struct amdgpu_ih_funcs iceland_ih_funcs = {
|
||||
.get_wptr = iceland_ih_get_wptr,
|
||||
.prescreen_iv = iceland_ih_prescreen_iv,
|
||||
.decode_iv = iceland_ih_decode_iv,
|
||||
.set_rptr = iceland_ih_set_rptr
|
||||
};
|
||||
|
@@ -508,19 +508,19 @@ static int kv_enable_didt(struct amdgpu_device *adev, bool enable)
|
||||
pi->caps_db_ramping ||
|
||||
pi->caps_td_ramping ||
|
||||
pi->caps_tcp_ramping) {
|
||||
adev->gfx.rlc.funcs->enter_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev);
|
||||
|
||||
if (enable) {
|
||||
ret = kv_program_pt_config_registers(adev, didt_config_kv);
|
||||
if (ret) {
|
||||
adev->gfx.rlc.funcs->exit_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
kv_do_enable_didt(adev, enable);
|
||||
|
||||
adev->gfx.rlc.funcs->exit_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@@ -52,20 +52,25 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
|
||||
return base;
|
||||
}
|
||||
|
||||
static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
|
||||
void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
|
||||
uint64_t page_table_base)
|
||||
{
|
||||
uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
|
||||
/* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */
|
||||
int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
|
||||
- mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
|
||||
|
||||
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
|
||||
lower_32_bits(value));
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
|
||||
offset * vmid, lower_32_bits(page_table_base));
|
||||
|
||||
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
|
||||
upper_32_bits(value));
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
|
||||
offset * vmid, upper_32_bits(page_table_base));
|
||||
}
|
||||
|
||||
static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
|
||||
{
|
||||
mmhub_v1_0_init_gart_pt_regs(adev);
|
||||
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
|
||||
|
||||
mmhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base);
|
||||
|
||||
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
|
||||
(u32)(adev->gmc.gart_start >> 12));
|
||||
|
@@ -34,5 +34,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
|
||||
void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
|
||||
void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
|
||||
bool enable);
|
||||
void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
|
||||
uint64_t page_table_base);
|
||||
|
||||
#endif
|
||||
|
@@ -37,7 +37,6 @@
|
||||
#include "gmc/gmc_8_2_sh_mask.h"
|
||||
#include "oss/oss_3_0_d.h"
|
||||
#include "oss/oss_3_0_sh_mask.h"
|
||||
#include "gca/gfx_8_0_sh_mask.h"
|
||||
#include "dce/dce_10_0_d.h"
|
||||
#include "dce/dce_10_0_sh_mask.h"
|
||||
#include "smu/smu_7_1_3_d.h"
|
||||
|
@@ -43,6 +43,8 @@ enum psp_gfx_crtl_cmd_id
|
||||
GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */
|
||||
GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */
|
||||
GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */
|
||||
GFX_CTRL_CMD_ID_CONSUME_CMD = 0x000A0000, /* send interrupt to psp for updating write pointer of vf */
|
||||
GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */
|
||||
|
||||
GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */
|
||||
};
|
||||
@@ -89,7 +91,8 @@ enum psp_gfx_cmd_id
|
||||
GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */
|
||||
GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */
|
||||
GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
|
||||
|
||||
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
|
||||
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
|
||||
};
|
||||
|
||||
|
||||
|
@@ -240,12 +240,9 @@ static int psp_v10_0_ring_stop(struct psp_context *psp,
|
||||
enum psp_ring_type ring_type)
|
||||
{
|
||||
int ret = 0;
|
||||
struct psp_ring *ring;
|
||||
unsigned int psp_ring_reg = 0;
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
|
||||
ring = &psp->km_ring;
|
||||
|
||||
/* Write the ring destroy command to C2PMSG_64 */
|
||||
psp_ring_reg = 3 << 16;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
|
||||
|
@@ -34,6 +34,7 @@
|
||||
#include "nbio/nbio_7_4_offset.h"
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
|
||||
MODULE_FIRMWARE("amdgpu/vega20_ta.bin");
|
||||
|
||||
/* address block */
|
||||
#define smnMP1_FIRMWARE_FLAGS 0x3010024
|
||||
@@ -98,7 +99,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
|
||||
const char *chip_name;
|
||||
char fw_name[30];
|
||||
int err = 0;
|
||||
const struct psp_firmware_header_v1_0 *hdr;
|
||||
const struct psp_firmware_header_v1_0 *sos_hdr;
|
||||
const struct ta_firmware_header_v1_0 *ta_hdr;
|
||||
|
||||
DRM_DEBUG("\n");
|
||||
|
||||
@@ -119,16 +121,32 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
|
||||
adev->psp.sos_fw_version = le32_to_cpu(hdr->header.ucode_version);
|
||||
adev->psp.sos_feature_version = le32_to_cpu(hdr->ucode_feature_version);
|
||||
adev->psp.sos_bin_size = le32_to_cpu(hdr->sos_size_bytes);
|
||||
adev->psp.sys_bin_size = le32_to_cpu(hdr->header.ucode_size_bytes) -
|
||||
le32_to_cpu(hdr->sos_size_bytes);
|
||||
adev->psp.sys_start_addr = (uint8_t *)hdr +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes);
|
||||
sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
|
||||
adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
|
||||
adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version);
|
||||
adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes);
|
||||
adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->header.ucode_size_bytes) -
|
||||
le32_to_cpu(sos_hdr->sos_size_bytes);
|
||||
adev->psp.sys_start_addr = (uint8_t *)sos_hdr +
|
||||
le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
|
||||
adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr +
|
||||
le32_to_cpu(hdr->sos_offset_bytes);
|
||||
le32_to_cpu(sos_hdr->sos_offset_bytes);
|
||||
|
||||
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
|
||||
err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = amdgpu_ucode_validate(adev->psp.ta_fw);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
|
||||
adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version);
|
||||
adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes);
|
||||
adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr +
|
||||
le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
|
||||
|
||||
return 0;
|
||||
out:
|
||||
if (err) {
|
||||
@@ -153,8 +171,11 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
|
||||
* are already been loaded.
|
||||
*/
|
||||
sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
|
||||
if (sol_reg)
|
||||
if (sol_reg) {
|
||||
psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
|
||||
printk("sos fw version = 0x%x.\n", psp->sos_fw_version);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
|
||||
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
|
||||
@@ -167,7 +188,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
|
||||
/* Copy PSP System Driver binary to memory */
|
||||
memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
|
||||
|
||||
/* Provide the sys driver to bootrom */
|
||||
/* Provide the sys driver to bootloader */
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
|
||||
(uint32_t)(psp->fw_pri_mc_addr >> 20));
|
||||
psp_gfxdrv_command_reg = 1 << 16;
|
||||
@@ -208,7 +229,7 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
|
||||
/* Copy Secure OS binary to PSP memory */
|
||||
memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
|
||||
|
||||
/* Provide the PSP secure OS to bootrom */
|
||||
/* Provide the PSP secure OS to bootloader */
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
|
||||
(uint32_t)(psp->fw_pri_mc_addr >> 20));
|
||||
psp_gfxdrv_command_reg = 2 << 16;
|
||||
@@ -278,26 +299,47 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
|
||||
struct psp_ring *ring = &psp->km_ring;
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
|
||||
/* Write low address of the ring to C2PMSG_69 */
|
||||
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
|
||||
/* Write high address of the ring to C2PMSG_70 */
|
||||
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
|
||||
/* Write size of ring to C2PMSG_71 */
|
||||
psp_ring_reg = ring->ring_size;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
|
||||
/* Write the ring initialization command to C2PMSG_64 */
|
||||
psp_ring_reg = ring_type;
|
||||
psp_ring_reg = psp_ring_reg << 16;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
|
||||
if (psp_support_vmr_ring(psp)) {
|
||||
/* Write low address of the ring to C2PMSG_102 */
|
||||
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
|
||||
/* Write high address of the ring to C2PMSG_103 */
|
||||
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
|
||||
|
||||
/* there might be handshake issue with hardware which needs delay */
|
||||
mdelay(20);
|
||||
/* Write the ring initialization command to C2PMSG_101 */
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
|
||||
GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
|
||||
|
||||
/* Wait for response flag (bit 31) in C2PMSG_64 */
|
||||
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
|
||||
0x80000000, 0x8000FFFF, false);
|
||||
/* there might be handshake issue with hardware which needs delay */
|
||||
mdelay(20);
|
||||
|
||||
/* Wait for response flag (bit 31) in C2PMSG_101 */
|
||||
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
|
||||
0x80000000, 0x8000FFFF, false);
|
||||
|
||||
} else {
|
||||
/* Write low address of the ring to C2PMSG_69 */
|
||||
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
|
||||
/* Write high address of the ring to C2PMSG_70 */
|
||||
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
|
||||
/* Write size of ring to C2PMSG_71 */
|
||||
psp_ring_reg = ring->ring_size;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
|
||||
/* Write the ring initialization command to C2PMSG_64 */
|
||||
psp_ring_reg = ring_type;
|
||||
psp_ring_reg = psp_ring_reg << 16;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
|
||||
|
||||
/* there might be handshake issue with hardware which needs delay */
|
||||
mdelay(20);
|
||||
|
||||
/* Wait for response flag (bit 31) in C2PMSG_64 */
|
||||
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
|
||||
0x80000000, 0x8000FFFF, false);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -308,15 +350,24 @@ static int psp_v11_0_ring_stop(struct psp_context *psp,
|
||||
int ret = 0;
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
|
||||
/* Write the ring destroy command to C2PMSG_64 */
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_DESTROY_RINGS);
|
||||
/* Write the ring destroy command*/
|
||||
if (psp_support_vmr_ring(psp))
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
|
||||
GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
|
||||
else
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
|
||||
GFX_CTRL_CMD_ID_DESTROY_RINGS);
|
||||
|
||||
/* there might be handshake issue with hardware which needs delay */
|
||||
mdelay(20);
|
||||
|
||||
/* Wait for response flag (bit 31) in C2PMSG_64 */
|
||||
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
|
||||
0x80000000, 0x80000000, false);
|
||||
/* Wait for response flag (bit 31) */
|
||||
if (psp_support_vmr_ring(psp))
|
||||
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
|
||||
0x80000000, 0x80000000, false);
|
||||
else
|
||||
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
|
||||
0x80000000, 0x80000000, false);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -355,7 +406,10 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp,
|
||||
uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
|
||||
|
||||
/* KM (GPCOM) prepare write pointer */
|
||||
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
|
||||
if (psp_support_vmr_ring(psp))
|
||||
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
|
||||
else
|
||||
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
|
||||
|
||||
/* Update KM RB frame pointer to new frame */
|
||||
/* write_frame ptr increments by size of rb_frame in bytes */
|
||||
@@ -384,7 +438,11 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp,
|
||||
|
||||
/* Update the write Pointer in DWORDs */
|
||||
psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
|
||||
if (psp_support_vmr_ring(psp)) {
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
|
||||
} else
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -529,7 +587,7 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
|
||||
/*send the mode 1 reset command*/
|
||||
WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST);
|
||||
|
||||
mdelay(1000);
|
||||
msleep(500);
|
||||
|
||||
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
|
||||
|
||||
@@ -552,24 +610,110 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
|
||||
static int psp_v11_0_xgmi_get_topology_info(struct psp_context *psp,
|
||||
int number_devices, struct psp_xgmi_topology_info *topology)
|
||||
{
|
||||
struct ta_xgmi_shared_memory *xgmi_cmd;
|
||||
struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
|
||||
struct ta_xgmi_cmd_get_topology_info_output *topology_info_output;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
|
||||
return -EINVAL;
|
||||
|
||||
xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
|
||||
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
|
||||
|
||||
/* Fill in the shared memory with topology information as input */
|
||||
topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
|
||||
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO;
|
||||
topology_info_input->num_nodes = number_devices;
|
||||
|
||||
for (i = 0; i < topology_info_input->num_nodes; i++) {
|
||||
topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
|
||||
topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
|
||||
topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled;
|
||||
topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
|
||||
}
|
||||
|
||||
/* Invoke xgmi ta to get the topology information */
|
||||
ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Read the output topology information from the shared memory */
|
||||
topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info;
|
||||
topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes;
|
||||
for (i = 0; i < topology->num_nodes; i++) {
|
||||
topology->nodes[i].node_id = topology_info_output->nodes[i].node_id;
|
||||
topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops;
|
||||
topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled;
|
||||
topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp,
|
||||
int number_devices, struct psp_xgmi_topology_info *topology)
|
||||
{
|
||||
return 0;
|
||||
struct ta_xgmi_shared_memory *xgmi_cmd;
|
||||
struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
|
||||
int i;
|
||||
|
||||
if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
|
||||
return -EINVAL;
|
||||
|
||||
xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
|
||||
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
|
||||
|
||||
topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
|
||||
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO;
|
||||
topology_info_input->num_nodes = number_devices;
|
||||
|
||||
for (i = 0; i < topology_info_input->num_nodes; i++) {
|
||||
topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
|
||||
topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
|
||||
topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled;
|
||||
topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
|
||||
}
|
||||
|
||||
/* Invoke xgmi ta to set topology information */
|
||||
return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO);
|
||||
}
|
||||
|
||||
static u64 psp_v11_0_xgmi_get_hive_id(struct psp_context *psp)
|
||||
{
|
||||
u64 hive_id = 0;
|
||||
struct ta_xgmi_shared_memory *xgmi_cmd;
|
||||
int ret;
|
||||
|
||||
/* Remove me when we can get correct hive_id through PSP */
|
||||
if (psp->adev->gmc.xgmi.num_physical_nodes)
|
||||
hive_id = 0x123456789abcdef;
|
||||
xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
|
||||
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
|
||||
|
||||
return hive_id;
|
||||
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID;
|
||||
|
||||
/* Invoke xgmi ta to get hive id */
|
||||
ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
|
||||
if (ret)
|
||||
return 0;
|
||||
else
|
||||
return xgmi_cmd->xgmi_out_message.get_hive_id.hive_id;
|
||||
}
|
||||
|
||||
static u64 psp_v11_0_xgmi_get_node_id(struct psp_context *psp)
|
||||
{
|
||||
struct ta_xgmi_shared_memory *xgmi_cmd;
|
||||
int ret;
|
||||
|
||||
xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
|
||||
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
|
||||
|
||||
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID;
|
||||
|
||||
/* Invoke xgmi ta to get the node id */
|
||||
ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
|
||||
if (ret)
|
||||
return 0;
|
||||
else
|
||||
return xgmi_cmd->xgmi_out_message.get_node_id.node_id;
|
||||
}
|
||||
|
||||
static const struct psp_funcs psp_v11_0_funcs = {
|
||||
@@ -587,6 +731,7 @@ static const struct psp_funcs psp_v11_0_funcs = {
|
||||
.xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info,
|
||||
.xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info,
|
||||
.xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id,
|
||||
.xgmi_get_node_id = psp_v11_0_xgmi_get_node_id,
|
||||
};
|
||||
|
||||
void psp_v11_0_set_psp_funcs(struct psp_context *psp)
|
||||
|
@@ -194,7 +194,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
|
||||
/* Copy PSP System Driver binary to memory */
|
||||
memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
|
||||
|
||||
/* Provide the sys driver to bootrom */
|
||||
/* Provide the sys driver to bootloader */
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
|
||||
(uint32_t)(psp->fw_pri_mc_addr >> 20));
|
||||
psp_gfxdrv_command_reg = 1 << 16;
|
||||
@@ -254,7 +254,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
|
||||
/* Copy Secure OS binary to PSP memory */
|
||||
memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
|
||||
|
||||
/* Provide the PSP secure OS to bootrom */
|
||||
/* Provide the PSP secure OS to bootloader */
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
|
||||
(uint32_t)(psp->fw_pri_mc_addr >> 20));
|
||||
psp_gfxdrv_command_reg = 2 << 16;
|
||||
@@ -356,12 +356,9 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
|
||||
enum psp_ring_type ring_type)
|
||||
{
|
||||
int ret = 0;
|
||||
struct psp_ring *ring;
|
||||
unsigned int psp_ring_reg = 0;
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
|
||||
ring = &psp->km_ring;
|
||||
|
||||
/* Write the ring destroy command to C2PMSG_64 */
|
||||
psp_ring_reg = 3 << 16;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
|
||||
@@ -593,9 +590,9 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
|
||||
}
|
||||
|
||||
/*send the mode 1 reset command*/
|
||||
WREG32(offset, 0x70000);
|
||||
WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST);
|
||||
|
||||
mdelay(1000);
|
||||
msleep(500);
|
||||
|
||||
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
|
||||
|
||||
|
@@ -225,7 +225,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
|
||||
|
||||
static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
{
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
@@ -245,9 +245,12 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
* Schedule an IB in the DMA ring (VI).
|
||||
*/
|
||||
static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
/* IB packet must end on a 8 DW boundary */
|
||||
sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
|
||||
|
||||
@@ -349,8 +352,8 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
|
||||
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
|
||||
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
|
||||
}
|
||||
sdma0->ready = false;
|
||||
sdma1->ready = false;
|
||||
sdma0->sched.ready = false;
|
||||
sdma1->sched.ready = false;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -471,17 +474,15 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
|
||||
/* enable DMA IBs */
|
||||
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
|
||||
|
||||
ring->ready = true;
|
||||
ring->sched.ready = true;
|
||||
}
|
||||
|
||||
sdma_v2_4_enable(adev, true);
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
ring = &adev->sdma.instance[i].ring;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (adev->mman.buffer_funcs_ring == ring)
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, true);
|
||||
@@ -550,21 +551,16 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
|
||||
u64 gpu_addr;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
tmp = 0xCAFEDEAD;
|
||||
adev->wb.wb[index] = cpu_to_le32(tmp);
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 5);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
return r;
|
||||
}
|
||||
if (r)
|
||||
goto error_free_wb;
|
||||
|
||||
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
|
||||
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
|
||||
@@ -581,15 +577,11 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
|
||||
ring->idx, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
error_free_wb:
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -612,20 +604,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
long r;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
tmp = 0xCAFEDEAD;
|
||||
adev->wb.wb[index] = cpu_to_le32(tmp);
|
||||
memset(&ib, 0, sizeof(ib));
|
||||
r = amdgpu_ib_get(adev, NULL, 256, &ib);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto err0;
|
||||
}
|
||||
|
||||
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
|
||||
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
|
||||
@@ -644,21 +632,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
|
||||
r = dma_fence_wait_timeout(f, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out\n");
|
||||
r = -ETIMEDOUT;
|
||||
goto err1;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
goto err1;
|
||||
}
|
||||
tmp = le32_to_cpu(adev->wb.wb[index]);
|
||||
if (tmp == 0xDEADBEEF) {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
r = 0;
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
|
||||
else
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
err1:
|
||||
amdgpu_ib_free(adev, &ib, NULL);
|
||||
@@ -760,7 +743,7 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
|
||||
*/
|
||||
static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
|
||||
{
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
|
||||
u32 pad_count;
|
||||
int i;
|
||||
|
||||
@@ -1105,8 +1088,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
u8 instance_id, queue_id;
|
||||
|
||||
DRM_ERROR("Illegal instruction in SDMA command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
instance_id = (entry->ring_id & 0x3) >> 0;
|
||||
queue_id = (entry->ring_id & 0xc) >> 2;
|
||||
|
||||
if (instance_id <= 1 && queue_id == 0)
|
||||
drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -399,7 +399,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
|
||||
|
||||
static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
{
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
@@ -419,9 +419,12 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
* Schedule an IB in the DMA ring (VI).
|
||||
*/
|
||||
static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
/* IB packet must end on a 8 DW boundary */
|
||||
sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
|
||||
|
||||
@@ -523,8 +526,8 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
|
||||
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
|
||||
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
|
||||
}
|
||||
sdma0->ready = false;
|
||||
sdma1->ready = false;
|
||||
sdma0->sched.ready = false;
|
||||
sdma1->sched.ready = false;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -739,7 +742,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
|
||||
/* enable DMA IBs */
|
||||
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
|
||||
|
||||
ring->ready = true;
|
||||
ring->sched.ready = true;
|
||||
}
|
||||
|
||||
/* unhalt the MEs */
|
||||
@@ -749,11 +752,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
ring = &adev->sdma.instance[i].ring;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (adev->mman.buffer_funcs_ring == ring)
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, true);
|
||||
@@ -822,21 +823,16 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
u64 gpu_addr;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
tmp = 0xCAFEDEAD;
|
||||
adev->wb.wb[index] = cpu_to_le32(tmp);
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 5);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
return r;
|
||||
}
|
||||
if (r)
|
||||
goto error_free_wb;
|
||||
|
||||
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
|
||||
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
|
||||
@@ -853,15 +849,11 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
|
||||
ring->idx, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
error_free_wb:
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -884,20 +876,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
long r;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
tmp = 0xCAFEDEAD;
|
||||
adev->wb.wb[index] = cpu_to_le32(tmp);
|
||||
memset(&ib, 0, sizeof(ib));
|
||||
r = amdgpu_ib_get(adev, NULL, 256, &ib);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto err0;
|
||||
}
|
||||
|
||||
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
|
||||
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
|
||||
@@ -916,21 +904,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
|
||||
r = dma_fence_wait_timeout(f, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out\n");
|
||||
r = -ETIMEDOUT;
|
||||
goto err1;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
goto err1;
|
||||
}
|
||||
tmp = le32_to_cpu(adev->wb.wb[index]);
|
||||
if (tmp == 0xDEADBEEF) {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
r = 0;
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
|
||||
else
|
||||
r = -EINVAL;
|
||||
}
|
||||
err1:
|
||||
amdgpu_ib_free(adev, &ib, NULL);
|
||||
dma_fence_put(f);
|
||||
@@ -1031,7 +1014,7 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
|
||||
*/
|
||||
static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
|
||||
{
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
|
||||
u32 pad_count;
|
||||
int i;
|
||||
|
||||
@@ -1163,7 +1146,7 @@ static int sdma_v3_0_sw_init(void *handle)
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = (i == 0) ?
|
||||
AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1;
|
||||
adev->doorbell_index.sdma_engine0 : adev->doorbell_index.sdma_engine1;
|
||||
} else {
|
||||
ring->use_pollmem = true;
|
||||
}
|
||||
@@ -1440,8 +1423,14 @@ static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
u8 instance_id, queue_id;
|
||||
|
||||
DRM_ERROR("Illegal instruction in SDMA command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
instance_id = (entry->ring_id & 0x3) >> 0;
|
||||
queue_id = (entry->ring_id & 0xc) >> 2;
|
||||
|
||||
if (instance_id <= 1 && queue_id == 0)
|
||||
drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -61,9 +61,11 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
|
||||
}
|
||||
|
||||
static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
|
||||
* Pad as necessary with NOPs.
|
||||
*/
|
||||
@@ -122,7 +124,7 @@ static void si_dma_stop(struct amdgpu_device *adev)
|
||||
|
||||
if (adev->mman.buffer_funcs_ring == ring)
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, false);
|
||||
ring->ready = false;
|
||||
ring->sched.ready = false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -175,13 +177,11 @@ static int si_dma_start(struct amdgpu_device *adev)
|
||||
WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
|
||||
WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
|
||||
|
||||
ring->ready = true;
|
||||
ring->sched.ready = true;
|
||||
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (adev->mman.buffer_funcs_ring == ring)
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, true);
|
||||
@@ -209,21 +209,16 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
|
||||
u64 gpu_addr;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
tmp = 0xCAFEDEAD;
|
||||
adev->wb.wb[index] = cpu_to_le32(tmp);
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 4);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
return r;
|
||||
}
|
||||
if (r)
|
||||
goto error_free_wb;
|
||||
|
||||
amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1));
|
||||
amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
|
||||
@@ -238,15 +233,11 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
|
||||
ring->idx, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
error_free_wb:
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -269,20 +260,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
long r;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
tmp = 0xCAFEDEAD;
|
||||
adev->wb.wb[index] = cpu_to_le32(tmp);
|
||||
memset(&ib, 0, sizeof(ib));
|
||||
r = amdgpu_ib_get(adev, NULL, 256, &ib);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto err0;
|
||||
}
|
||||
|
||||
ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1);
|
||||
ib.ptr[1] = lower_32_bits(gpu_addr);
|
||||
@@ -295,21 +282,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
|
||||
r = dma_fence_wait_timeout(f, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out\n");
|
||||
r = -ETIMEDOUT;
|
||||
goto err1;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
goto err1;
|
||||
}
|
||||
tmp = le32_to_cpu(adev->wb.wb[index]);
|
||||
if (tmp == 0xDEADBEEF) {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
r = 0;
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
|
||||
else
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
err1:
|
||||
amdgpu_ib_free(adev, &ib, NULL);
|
||||
@@ -658,15 +640,6 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int si_dma_process_illegal_inst_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
DRM_ERROR("Illegal instruction in SDMA command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int si_dma_set_clockgating_state(void *handle,
|
||||
enum amd_clockgating_state state)
|
||||
{
|
||||
@@ -781,15 +754,10 @@ static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = {
|
||||
.process = si_dma_process_trap_irq,
|
||||
};
|
||||
|
||||
static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs = {
|
||||
.process = si_dma_process_illegal_inst_irq,
|
||||
};
|
||||
|
||||
static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
|
||||
adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs;
|
||||
adev->sdma.illegal_inst_irq.funcs = &si_dma_illegal_inst_irq_funcs;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -118,19 +118,6 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev)
|
||||
return (wptr & adev->irq.ih.ptr_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* si_ih_prescreen_iv - prescreen an interrupt vector
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Returns true if the interrupt vector should be further processed.
|
||||
*/
|
||||
static bool si_ih_prescreen_iv(struct amdgpu_device *adev)
|
||||
{
|
||||
/* Process all interrupts */
|
||||
return true;
|
||||
}
|
||||
|
||||
static void si_ih_decode_iv(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
@@ -301,7 +288,6 @@ static const struct amd_ip_funcs si_ih_ip_funcs = {
|
||||
|
||||
static const struct amdgpu_ih_funcs si_ih_funcs = {
|
||||
.get_wptr = si_ih_get_wptr,
|
||||
.prescreen_iv = si_ih_prescreen_iv,
|
||||
.decode_iv = si_ih_decode_iv,
|
||||
.set_rptr = si_ih_set_rptr
|
||||
};
|
||||
|
@@ -507,6 +507,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (adev->asic_type == CHIP_VEGA20)
|
||||
adev->gmc.xgmi.supported = true;
|
||||
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
adev->nbio_funcs = &nbio_v7_0_funcs;
|
||||
else if (adev->asic_type == CHIP_VEGA20)
|
||||
@@ -613,6 +616,24 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
|
||||
.flush_hdp = &soc15_flush_hdp,
|
||||
.invalidate_hdp = &soc15_invalidate_hdp,
|
||||
.need_full_reset = &soc15_need_full_reset,
|
||||
.init_doorbell_index = &vega10_doorbell_index_init,
|
||||
};
|
||||
|
||||
static const struct amdgpu_asic_funcs vega20_asic_funcs =
|
||||
{
|
||||
.read_disabled_bios = &soc15_read_disabled_bios,
|
||||
.read_bios_from_rom = &soc15_read_bios_from_rom,
|
||||
.read_register = &soc15_read_register,
|
||||
.reset = &soc15_asic_reset,
|
||||
.set_vga_state = &soc15_vga_set_state,
|
||||
.get_xclk = &soc15_get_xclk,
|
||||
.set_uvd_clocks = &soc15_set_uvd_clocks,
|
||||
.set_vce_clocks = &soc15_set_vce_clocks,
|
||||
.get_config_memsize = &soc15_get_config_memsize,
|
||||
.flush_hdp = &soc15_flush_hdp,
|
||||
.invalidate_hdp = &soc15_invalidate_hdp,
|
||||
.need_full_reset = &soc15_need_full_reset,
|
||||
.init_doorbell_index = &vega20_doorbell_index_init,
|
||||
};
|
||||
|
||||
static int soc15_common_early_init(void *handle)
|
||||
@@ -632,11 +653,11 @@ static int soc15_common_early_init(void *handle)
|
||||
adev->se_cac_rreg = &soc15_se_cac_rreg;
|
||||
adev->se_cac_wreg = &soc15_se_cac_wreg;
|
||||
|
||||
adev->asic_funcs = &soc15_asic_funcs;
|
||||
|
||||
adev->external_rev_id = 0xFF;
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA10:
|
||||
adev->asic_funcs = &soc15_asic_funcs;
|
||||
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_MGLS |
|
||||
AMD_CG_SUPPORT_GFX_RLC_LS |
|
||||
@@ -660,6 +681,7 @@ static int soc15_common_early_init(void *handle)
|
||||
adev->external_rev_id = 0x1;
|
||||
break;
|
||||
case CHIP_VEGA12:
|
||||
adev->asic_funcs = &soc15_asic_funcs;
|
||||
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_MGLS |
|
||||
AMD_CG_SUPPORT_GFX_CGCG |
|
||||
@@ -682,6 +704,7 @@ static int soc15_common_early_init(void *handle)
|
||||
adev->external_rev_id = adev->rev_id + 0x14;
|
||||
break;
|
||||
case CHIP_VEGA20:
|
||||
adev->asic_funcs = &vega20_asic_funcs;
|
||||
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_MGLS |
|
||||
AMD_CG_SUPPORT_GFX_CGCG |
|
||||
@@ -704,6 +727,7 @@ static int soc15_common_early_init(void *handle)
|
||||
adev->external_rev_id = adev->rev_id + 0x28;
|
||||
break;
|
||||
case CHIP_RAVEN:
|
||||
adev->asic_funcs = &soc15_asic_funcs;
|
||||
if (adev->rev_id >= 0x8)
|
||||
adev->external_rev_id = adev->rev_id + 0x81;
|
||||
else if (adev->pdev->device == 0x15d8)
|
||||
|
@@ -58,4 +58,6 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
|
||||
int vega10_reg_base_init(struct amdgpu_device *adev);
|
||||
int vega20_reg_base_init(struct amdgpu_device *adev);
|
||||
|
||||
void vega10_doorbell_index_init(struct amdgpu_device *adev);
|
||||
void vega20_doorbell_index_init(struct amdgpu_device *adev);
|
||||
#endif
|
||||
|
130
drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
Normal file
130
drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
Normal file
@@ -0,0 +1,130 @@
|
||||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _TA_XGMI_IF_H
|
||||
#define _TA_XGMI_IF_H
|
||||
|
||||
/* Responses have bit 31 set */
|
||||
#define RSP_ID_MASK (1U << 31)
|
||||
#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
|
||||
|
||||
enum ta_command_xgmi {
|
||||
TA_COMMAND_XGMI__INITIALIZE = 0x00,
|
||||
TA_COMMAND_XGMI__GET_NODE_ID = 0x01,
|
||||
TA_COMMAND_XGMI__GET_HIVE_ID = 0x02,
|
||||
TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03,
|
||||
TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04
|
||||
};
|
||||
|
||||
/* XGMI related enumerations */
|
||||
/**********************************************************/;
|
||||
enum ta_xgmi_connected_nodes {
|
||||
TA_XGMI__MAX_CONNECTED_NODES = 64
|
||||
};
|
||||
|
||||
enum ta_xgmi_status {
|
||||
TA_XGMI_STATUS__SUCCESS = 0x00,
|
||||
TA_XGMI_STATUS__GENERIC_FAILURE = 0x01,
|
||||
TA_XGMI_STATUS__NULL_POINTER = 0x02,
|
||||
TA_XGMI_STATUS__INVALID_PARAMETER = 0x03,
|
||||
TA_XGMI_STATUS__NOT_INITIALIZED = 0x04,
|
||||
TA_XGMI_STATUS__INVALID_NODE_NUM = 0x05,
|
||||
TA_XGMI_STATUS__INVALID_NODE_ID = 0x06,
|
||||
TA_XGMI_STATUS__INVALID_TOPOLOGY = 0x07,
|
||||
TA_XGMI_STATUS__FAILED_ID_GEN = 0x08,
|
||||
TA_XGMI_STATUS__FAILED_TOPOLOGY_INIT = 0x09,
|
||||
TA_XGMI_STATUS__SET_SHARING_ERROR = 0x0A
|
||||
};
|
||||
|
||||
enum ta_xgmi_assigned_sdma_engine {
|
||||
TA_XGMI_ASSIGNED_SDMA_ENGINE__NOT_ASSIGNED = -1,
|
||||
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA0 = 0,
|
||||
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA1 = 1,
|
||||
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA2 = 2,
|
||||
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA3 = 3,
|
||||
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA4 = 4,
|
||||
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA5 = 5
|
||||
};
|
||||
|
||||
/* input/output structures for XGMI commands */
|
||||
/**********************************************************/
|
||||
struct ta_xgmi_node_info {
|
||||
uint64_t node_id;
|
||||
uint8_t num_hops;
|
||||
uint8_t is_sharing_enabled;
|
||||
enum ta_xgmi_assigned_sdma_engine sdma_engine;
|
||||
};
|
||||
|
||||
struct ta_xgmi_cmd_initialize_output {
|
||||
uint32_t status;
|
||||
};
|
||||
|
||||
struct ta_xgmi_cmd_get_node_id_output {
|
||||
uint64_t node_id;
|
||||
};
|
||||
|
||||
struct ta_xgmi_cmd_get_hive_id_output {
|
||||
uint64_t hive_id;
|
||||
};
|
||||
|
||||
struct ta_xgmi_cmd_get_topology_info_input {
|
||||
uint32_t num_nodes;
|
||||
struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
|
||||
};
|
||||
|
||||
struct ta_xgmi_cmd_get_topology_info_output {
|
||||
uint32_t num_nodes;
|
||||
struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
|
||||
};
|
||||
|
||||
struct ta_xgmi_cmd_set_topology_info_input {
|
||||
uint32_t num_nodes;
|
||||
struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
|
||||
};
|
||||
|
||||
/**********************************************************/
|
||||
/* Common input structure for XGMI callbacks */
|
||||
union ta_xgmi_cmd_input {
|
||||
struct ta_xgmi_cmd_get_topology_info_input get_topology_info;
|
||||
struct ta_xgmi_cmd_set_topology_info_input set_topology_info;
|
||||
};
|
||||
|
||||
/* Common output structure for XGMI callbacks */
|
||||
union ta_xgmi_cmd_output {
|
||||
struct ta_xgmi_cmd_initialize_output initialize;
|
||||
struct ta_xgmi_cmd_get_node_id_output get_node_id;
|
||||
struct ta_xgmi_cmd_get_hive_id_output get_hive_id;
|
||||
struct ta_xgmi_cmd_get_topology_info_output get_topology_info;
|
||||
};
|
||||
/**********************************************************/
|
||||
|
||||
struct ta_xgmi_shared_memory {
|
||||
uint32_t cmd_id;
|
||||
uint32_t resp_id;
|
||||
enum ta_xgmi_status xgmi_status;
|
||||
uint32_t reserved;
|
||||
union ta_xgmi_cmd_input xgmi_in_message;
|
||||
union ta_xgmi_cmd_output xgmi_out_message;
|
||||
};
|
||||
|
||||
#endif //_TA_XGMI_IF_H
|
@@ -218,34 +218,6 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev)
|
||||
return (wptr & adev->irq.ih.ptr_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* tonga_ih_prescreen_iv - prescreen an interrupt vector
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Returns true if the interrupt vector should be further processed.
|
||||
*/
|
||||
static bool tonga_ih_prescreen_iv(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 ring_index = adev->irq.ih.rptr >> 2;
|
||||
u16 pasid;
|
||||
|
||||
switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
|
||||
case 146:
|
||||
case 147:
|
||||
pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
|
||||
if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
/* Not a VM fault */
|
||||
return true;
|
||||
}
|
||||
|
||||
adev->irq.ih.rptr += 16;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* tonga_ih_decode_iv - decode an interrupt vector
|
||||
*
|
||||
@@ -322,7 +294,7 @@ static int tonga_ih_sw_init(void *handle)
|
||||
return r;
|
||||
|
||||
adev->irq.ih.use_doorbell = true;
|
||||
adev->irq.ih.doorbell_index = AMDGPU_DOORBELL_IH;
|
||||
adev->irq.ih.doorbell_index = adev->doorbell_index.ih;
|
||||
|
||||
r = amdgpu_irq_init(adev);
|
||||
|
||||
@@ -506,7 +478,6 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = {
|
||||
|
||||
static const struct amdgpu_ih_funcs tonga_ih_funcs = {
|
||||
.get_wptr = tonga_ih_get_wptr,
|
||||
.prescreen_iv = tonga_ih_prescreen_iv,
|
||||
.decode_iv = tonga_ih_decode_iv,
|
||||
.set_rptr = tonga_ih_set_rptr
|
||||
};
|
||||
|
@@ -116,16 +116,16 @@ static int uvd_v4_2_sw_init(void *handle)
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_uvd_resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
ring = &adev->uvd.inst->ring;
|
||||
sprintf(ring->name, "uvd");
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_uvd_resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_uvd_entity_init(adev);
|
||||
|
||||
return r;
|
||||
@@ -162,12 +162,9 @@ static int uvd_v4_2_hw_init(void *handle)
|
||||
uvd_v4_2_enable_mgcg(adev, true);
|
||||
amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
|
||||
|
||||
ring->ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
goto done;
|
||||
}
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 10);
|
||||
if (r) {
|
||||
@@ -218,7 +215,7 @@ static int uvd_v4_2_hw_fini(void *handle)
|
||||
if (RREG32(mmUVD_STATUS) != 0)
|
||||
uvd_v4_2_stop(adev);
|
||||
|
||||
ring->ready = false;
|
||||
ring->sched.ready = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -484,11 +481,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
|
||||
|
||||
WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
amdgpu_ring_commit(ring);
|
||||
@@ -499,14 +494,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
|
||||
ring->idx, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -519,8 +509,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
|
||||
* Write ring commands to execute the indirect buffer
|
||||
*/
|
||||
static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
bool ctx_switch)
|
||||
{
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
|
||||
amdgpu_ring_write(ring, ib->gpu_addr);
|
||||
|
@@ -113,16 +113,16 @@ static int uvd_v5_0_sw_init(void *handle)
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_uvd_resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
ring = &adev->uvd.inst->ring;
|
||||
sprintf(ring->name, "uvd");
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_uvd_resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_uvd_entity_init(adev);
|
||||
|
||||
return r;
|
||||
@@ -158,12 +158,9 @@ static int uvd_v5_0_hw_init(void *handle)
|
||||
uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
|
||||
uvd_v5_0_enable_mgcg(adev, true);
|
||||
|
||||
ring->ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
goto done;
|
||||
}
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 10);
|
||||
if (r) {
|
||||
@@ -215,7 +212,7 @@ static int uvd_v5_0_hw_fini(void *handle)
|
||||
if (RREG32(mmUVD_STATUS) != 0)
|
||||
uvd_v5_0_stop(adev);
|
||||
|
||||
ring->ready = false;
|
||||
ring->sched.ready = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -500,11 +497,8 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
|
||||
WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
amdgpu_ring_commit(ring);
|
||||
@@ -515,14 +509,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
|
||||
ring->idx, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -535,8 +524,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
* Write ring commands to execute the indirect buffer
|
||||
*/
|
||||
static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
bool ctx_switch)
|
||||
{
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
|
||||
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
|
||||
|
@@ -175,11 +175,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
|
||||
int r;
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 16);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
|
||||
amdgpu_ring_commit(ring);
|
||||
|
||||
@@ -189,14 +186,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed\n",
|
||||
ring->idx);
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -336,31 +327,24 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
long r;
|
||||
|
||||
r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = dma_fence_wait_timeout(fence, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out.\n");
|
||||
if (r == 0)
|
||||
r = -ETIMEDOUT;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
} else {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
else if (r > 0)
|
||||
r = 0;
|
||||
}
|
||||
|
||||
error:
|
||||
dma_fence_put(fence);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int uvd_v6_0_early_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
@@ -416,16 +400,16 @@ static int uvd_v6_0_sw_init(void *handle)
|
||||
DRM_INFO("UVD ENC is disabled\n");
|
||||
}
|
||||
|
||||
r = amdgpu_uvd_resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
ring = &adev->uvd.inst->ring;
|
||||
sprintf(ring->name, "uvd");
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_uvd_resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (uvd_v6_0_enc_support(adev)) {
|
||||
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
|
||||
ring = &adev->uvd.inst->ring_enc[i];
|
||||
@@ -476,12 +460,9 @@ static int uvd_v6_0_hw_init(void *handle)
|
||||
uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
|
||||
uvd_v6_0_enable_mgcg(adev, true);
|
||||
|
||||
ring->ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
goto done;
|
||||
}
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 10);
|
||||
if (r) {
|
||||
@@ -513,12 +494,9 @@ static int uvd_v6_0_hw_init(void *handle)
|
||||
if (uvd_v6_0_enc_support(adev)) {
|
||||
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
|
||||
ring = &adev->uvd.inst->ring_enc[i];
|
||||
ring->ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -548,7 +526,7 @@ static int uvd_v6_0_hw_fini(void *handle)
|
||||
if (RREG32(mmUVD_STATUS) != 0)
|
||||
uvd_v6_0_stop(adev);
|
||||
|
||||
ring->ready = false;
|
||||
ring->sched.ready = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -969,11 +947,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
|
||||
WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
amdgpu_ring_commit(ring);
|
||||
@@ -984,14 +960,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
|
||||
ring->idx, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -1004,9 +975,12 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
* Write ring commands to execute the indirect buffer
|
||||
*/
|
||||
static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0));
|
||||
amdgpu_ring_write(ring, vmid);
|
||||
|
||||
@@ -1027,8 +1001,12 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
* Write enc ring commands to execute the indirect buffer
|
||||
*/
|
||||
static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM);
|
||||
amdgpu_ring_write(ring, vmid);
|
||||
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
|
||||
|
@@ -183,11 +183,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
|
||||
return 0;
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 16);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n",
|
||||
ring->me, ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
|
||||
amdgpu_ring_commit(ring);
|
||||
|
||||
@@ -197,14 +194,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
|
||||
ring->me, ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: (%d)ring %d test failed\n",
|
||||
ring->me, ring->idx);
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -343,27 +334,19 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
long r;
|
||||
|
||||
r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = dma_fence_wait_timeout(fence, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me);
|
||||
if (r == 0)
|
||||
r = -ETIMEDOUT;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r);
|
||||
} else {
|
||||
DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx);
|
||||
else if (r > 0)
|
||||
r = 0;
|
||||
}
|
||||
|
||||
error:
|
||||
dma_fence_put(fence);
|
||||
return r;
|
||||
@@ -447,10 +430,6 @@ static int uvd_v7_0_sw_init(void *handle)
|
||||
DRM_INFO("PSP loading UVD firmware\n");
|
||||
}
|
||||
|
||||
r = amdgpu_uvd_resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
|
||||
if (adev->uvd.harvest_config & (1 << j))
|
||||
continue;
|
||||
@@ -472,9 +451,9 @@ static int uvd_v7_0_sw_init(void *handle)
|
||||
* sriov, so set unused location for other unused rings.
|
||||
*/
|
||||
if (i == 0)
|
||||
ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
|
||||
ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring0_1 * 2;
|
||||
else
|
||||
ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1;
|
||||
ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring2_3 * 2 + 1;
|
||||
}
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
|
||||
if (r)
|
||||
@@ -482,6 +461,10 @@ static int uvd_v7_0_sw_init(void *handle)
|
||||
}
|
||||
}
|
||||
|
||||
r = amdgpu_uvd_resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_uvd_entity_init(adev);
|
||||
if (r)
|
||||
return r;
|
||||
@@ -540,12 +523,9 @@ static int uvd_v7_0_hw_init(void *handle)
|
||||
ring = &adev->uvd.inst[j].ring;
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
ring->ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
goto done;
|
||||
}
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 10);
|
||||
if (r) {
|
||||
@@ -582,12 +562,9 @@ static int uvd_v7_0_hw_init(void *handle)
|
||||
|
||||
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
|
||||
ring = &adev->uvd.inst[j].ring_enc[i];
|
||||
ring->ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
}
|
||||
done:
|
||||
@@ -619,7 +596,7 @@ static int uvd_v7_0_hw_fini(void *handle)
|
||||
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
|
||||
if (adev->uvd.harvest_config & (1 << i))
|
||||
continue;
|
||||
adev->uvd.inst[i].ring.ready = false;
|
||||
adev->uvd.inst[i].ring.sched.ready = false;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -1235,11 +1212,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
|
||||
WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n",
|
||||
ring->me, ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring,
|
||||
PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
@@ -1251,14 +1226,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
|
||||
ring->me, ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n",
|
||||
ring->me, ring->idx, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -1300,10 +1270,12 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
|
||||
* Write ring commands to execute the indirect buffer
|
||||
*/
|
||||
static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
bool ctx_switch)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
amdgpu_ring_write(ring,
|
||||
PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0));
|
||||
@@ -1329,8 +1301,12 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
* Write enc ring commands to execute the indirect buffer
|
||||
*/
|
||||
static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM);
|
||||
amdgpu_ring_write(ring, vmid);
|
||||
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
|
||||
|
@@ -463,15 +463,11 @@ static int vce_v2_0_hw_init(void *handle)
|
||||
|
||||
amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
|
||||
vce_v2_0_enable_mgcg(adev, true, false);
|
||||
for (i = 0; i < adev->vce.num_rings; i++)
|
||||
adev->vce.ring[i].ready = false;
|
||||
|
||||
for (i = 0; i < adev->vce.num_rings; i++) {
|
||||
r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
|
||||
r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
|
||||
if (r)
|
||||
return r;
|
||||
else
|
||||
adev->vce.ring[i].ready = true;
|
||||
}
|
||||
|
||||
DRM_INFO("VCE initialized successfully.\n");
|
||||
|
@@ -37,7 +37,6 @@
|
||||
#include "gca/gfx_8_0_d.h"
|
||||
#include "smu/smu_7_1_2_d.h"
|
||||
#include "smu/smu_7_1_2_sh_mask.h"
|
||||
#include "gca/gfx_8_0_d.h"
|
||||
#include "gca/gfx_8_0_sh_mask.h"
|
||||
#include "ivsrcid/ivsrcid_vislands30.h"
|
||||
|
||||
@@ -474,15 +473,10 @@ static int vce_v3_0_hw_init(void *handle)
|
||||
|
||||
amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
|
||||
|
||||
for (i = 0; i < adev->vce.num_rings; i++)
|
||||
adev->vce.ring[i].ready = false;
|
||||
|
||||
for (i = 0; i < adev->vce.num_rings; i++) {
|
||||
r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
|
||||
r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
|
||||
if (r)
|
||||
return r;
|
||||
else
|
||||
adev->vce.ring[i].ready = true;
|
||||
}
|
||||
|
||||
DRM_INFO("VCE initialized successfully.\n");
|
||||
@@ -838,8 +832,12 @@ out:
|
||||
}
|
||||
|
||||
static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
amdgpu_ring_write(ring, VCE_CMD_IB_VM);
|
||||
amdgpu_ring_write(ring, vmid);
|
||||
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user