drm/amdgpu: Implement concurrent asic reset for XGMI.
Use per hive wq to concurrently send reset commands to all nodes in the hive. v2: Switch to system_highpri_wq after dropping dedicated queue. Fix non XGMI code path KASAN error. Stop the hive reset for each node loop if there is a reset failure on any of the nodes. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:

committed by
Alex Deucher

parent
a82400b57a
commit
d4535e2c01
@@ -910,7 +910,9 @@ struct amdgpu_device {
|
||||
bool in_gpu_reset;
|
||||
struct mutex lock_reset;
|
||||
struct amdgpu_doorbell_index doorbell_index;
|
||||
|
||||
int asic_reset_res;
|
||||
struct work_struct xgmi_reset_work;
|
||||
};
|
||||
|
||||
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
|
||||
|
Reference in New Issue
Block a user