drm/radeon/kms: simplify & improve GPU reset V2
This simplify and improve GPU reset for R1XX-R6XX hw, it's not 100% reliable here are result: - R1XX/R2XX works bunch of time in a row, sometimes it seems it can work indifinitly - R3XX/R3XX the most unreliable one, sometimes you will be able to reset few times, sometimes not even once - R5XX more reliable than previous hw, seems to work most of the times but once in a while it fails for no obvious reasons (same status than previous reset just no same happy ending) - R6XX/R7XX are lot more reliable with this patch, still it seems that it can fail after a bunch (reset every 2sec for 3hour bring down the GPU & computer) This have been tested on various hw, for some odd reasons i wasn't able to lockup RS480/RS690 (while they use to love locking up). Note that on R1XX-R5XX the cursor will disapear after lockup haven't checked why, switch to console and back to X will restore cursor. Next step is to record the bogus command that leaded to the lockup. V2 Fix r6xx resume path to avoid reinitializing blit module, use the gpu_lockup boolean to avoid entering inifinite waiting loop on fence while reiniting the GPU Signed-off-by: Jerome Glisse <jglisse@redhat.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:

committed by
Dave Airlie

parent
a2d07b7438
commit
90aca4d274
@@ -151,6 +151,10 @@ void rv370_pcie_gart_disable(struct radeon_device *rdev)
|
||||
u32 tmp;
|
||||
int r;
|
||||
|
||||
WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, 0);
|
||||
WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, 0);
|
||||
WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
|
||||
WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
|
||||
tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
|
||||
tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
|
||||
WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
|
||||
@@ -323,7 +327,6 @@ void r300_gpu_init(struct radeon_device *rdev)
|
||||
{
|
||||
uint32_t gb_tile_config, tmp;
|
||||
|
||||
r100_hdp_reset(rdev);
|
||||
/* FIXME: rv380 one pipes ? */
|
||||
if ((rdev->family == CHIP_R300 && rdev->pdev->device != 0x4144) ||
|
||||
(rdev->family == CHIP_R350)) {
|
||||
@@ -376,57 +379,6 @@ void r300_gpu_init(struct radeon_device *rdev)
|
||||
rdev->num_gb_pipes, rdev->num_z_pipes);
|
||||
}
|
||||
|
||||
int r300_ga_reset(struct radeon_device *rdev)
|
||||
{
|
||||
uint32_t tmp;
|
||||
bool reinit_cp;
|
||||
int i;
|
||||
|
||||
reinit_cp = rdev->cp.ready;
|
||||
rdev->cp.ready = false;
|
||||
for (i = 0; i < rdev->usec_timeout; i++) {
|
||||
WREG32(RADEON_CP_CSQ_MODE, 0);
|
||||
WREG32(RADEON_CP_CSQ_CNTL, 0);
|
||||
WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
|
||||
(void)RREG32(RADEON_RBBM_SOFT_RESET);
|
||||
udelay(200);
|
||||
WREG32(RADEON_RBBM_SOFT_RESET, 0);
|
||||
/* Wait to prevent race in RBBM_STATUS */
|
||||
mdelay(1);
|
||||
tmp = RREG32(RADEON_RBBM_STATUS);
|
||||
if (tmp & ((1 << 20) | (1 << 26))) {
|
||||
DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
|
||||
/* GA still busy soft reset it */
|
||||
WREG32(0x429C, 0x200);
|
||||
WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
|
||||
WREG32(R300_RE_SCISSORS_TL, 0);
|
||||
WREG32(R300_RE_SCISSORS_BR, 0);
|
||||
WREG32(0x24AC, 0);
|
||||
}
|
||||
/* Wait to prevent race in RBBM_STATUS */
|
||||
mdelay(1);
|
||||
tmp = RREG32(RADEON_RBBM_STATUS);
|
||||
if (!(tmp & ((1 << 20) | (1 << 26)))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < rdev->usec_timeout; i++) {
|
||||
tmp = RREG32(RADEON_RBBM_STATUS);
|
||||
if (!(tmp & ((1 << 20) | (1 << 26)))) {
|
||||
DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
|
||||
tmp);
|
||||
if (reinit_cp) {
|
||||
return r100_cp_init(rdev, rdev->cp.ring_size);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
tmp = RREG32(RADEON_RBBM_STATUS);
|
||||
DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool r300_gpu_is_lockup(struct radeon_device *rdev)
|
||||
{
|
||||
u32 rbbm_status;
|
||||
@@ -451,37 +403,69 @@ bool r300_gpu_is_lockup(struct radeon_device *rdev)
|
||||
|
||||
int r300_asic_reset(struct radeon_device *rdev)
|
||||
{
|
||||
uint32_t status;
|
||||
struct r100_mc_save save;
|
||||
u32 status, tmp;
|
||||
|
||||
/* reset order likely matter */
|
||||
status = RREG32(RADEON_RBBM_STATUS);
|
||||
r100_mc_stop(rdev, &save);
|
||||
status = RREG32(R_000E40_RBBM_STATUS);
|
||||
if (!G_000E40_GUI_ACTIVE(status)) {
|
||||
return 0;
|
||||
}
|
||||
status = RREG32(R_000E40_RBBM_STATUS);
|
||||
dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
|
||||
/* reset HDP */
|
||||
r100_hdp_reset(rdev);
|
||||
/* reset rb2d */
|
||||
if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
|
||||
r100_rb2d_reset(rdev);
|
||||
}
|
||||
/* reset GA */
|
||||
if (status & ((1 << 20) | (1 << 26))) {
|
||||
r300_ga_reset(rdev);
|
||||
}
|
||||
/* reset CP */
|
||||
status = RREG32(RADEON_RBBM_STATUS);
|
||||
if (status & (1 << 16)) {
|
||||
r100_cp_reset(rdev);
|
||||
}
|
||||
/* stop CP */
|
||||
WREG32(RADEON_CP_CSQ_CNTL, 0);
|
||||
tmp = RREG32(RADEON_CP_RB_CNTL);
|
||||
WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
|
||||
WREG32(RADEON_CP_RB_RPTR_WR, 0);
|
||||
WREG32(RADEON_CP_RB_WPTR, 0);
|
||||
WREG32(RADEON_CP_RB_CNTL, tmp);
|
||||
/* save PCI state */
|
||||
pci_save_state(rdev->pdev);
|
||||
/* disable bus mastering */
|
||||
r100_bm_disable(rdev);
|
||||
WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_VAP(1) |
|
||||
S_0000F0_SOFT_RESET_GA(1));
|
||||
RREG32(R_0000F0_RBBM_SOFT_RESET);
|
||||
mdelay(500);
|
||||
WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
|
||||
mdelay(1);
|
||||
status = RREG32(R_000E40_RBBM_STATUS);
|
||||
dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
|
||||
/* resetting the CP seems to be problematic sometimes it end up
|
||||
* hard locking the computer, but it's necessary for successfull
|
||||
* reset more test & playing is needed on R3XX/R4XX to find a
|
||||
* reliable (if any solution)
|
||||
*/
|
||||
WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
|
||||
RREG32(R_0000F0_RBBM_SOFT_RESET);
|
||||
mdelay(500);
|
||||
WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
|
||||
mdelay(1);
|
||||
status = RREG32(R_000E40_RBBM_STATUS);
|
||||
dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
|
||||
/* reset MC */
|
||||
WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_MC(1));
|
||||
RREG32(R_0000F0_RBBM_SOFT_RESET);
|
||||
mdelay(500);
|
||||
WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
|
||||
mdelay(1);
|
||||
status = RREG32(R_000E40_RBBM_STATUS);
|
||||
dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
|
||||
/* restore PCI & busmastering */
|
||||
pci_restore_state(rdev->pdev);
|
||||
r100_enable_bm(rdev);
|
||||
/* Check if GPU is idle */
|
||||
status = RREG32(RADEON_RBBM_STATUS);
|
||||
if (status & RADEON_RBBM_ACTIVE) {
|
||||
DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
|
||||
if (G_000E40_GA_BUSY(status) || G_000E40_VAP_BUSY(status)) {
|
||||
dev_err(rdev->dev, "failed to reset GPU\n");
|
||||
rdev->gpu_lockup = true;
|
||||
return -1;
|
||||
}
|
||||
DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
|
||||
r100_mc_resume(rdev, &save);
|
||||
dev_info(rdev->dev, "GPU reset succeed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* r300,r350,rv350,rv380 VRAM info
|
||||
*/
|
||||
|
Reference in New Issue
Block a user