[SCSI] improved eh timeout handler

When a command runs into a timeout we need to send an 'ABORT TASK'
TMF. This is typically done by the 'eh_abort_handler' LLDD callback.

Conceptually, however, this function is a normal SCSI command, so
there is no need to enter the error handler.

This patch implements a new scsi_abort_command() function which
invokes an asynchronous function scsi_eh_abort_handler() to
abort the commands via the usual 'eh_abort_handler'.

If abort succeeds the command is either retried or terminated,
depending on the number of allowed retries. However, 'eh_eflags'
records the abort, so if the retry would fail again the
command is pushed onto the error handler without trying to
abort it (again); it'll be cleared up from SCSI EH.

[hare: smatch detected stray switch fixed]
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
This commit is contained in:
Hannes Reinecke
2013-11-11 13:44:54 +01:00
committed by James Bottomley
parent 2451079bc2
commit e494f6a728
6 changed files with 167 additions and 14 deletions

View File

@@ -53,6 +53,8 @@ static void scsi_eh_done(struct scsi_cmnd *scmd);
#define HOST_RESET_SETTLE_TIME (10)
static int scsi_eh_try_stu(struct scsi_cmnd *scmd);
static int scsi_try_to_abort_cmd(struct scsi_host_template *,
struct scsi_cmnd *);
/* called with shost->host_lock held */
void scsi_eh_wakeup(struct Scsi_Host *shost)
@@ -99,6 +101,116 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
return 1;
}
/**
* scmd_eh_abort_handler - Handle command aborts
* @work: command to be aborted.
*/
void
scmd_eh_abort_handler(struct work_struct *work)
{
struct scsi_cmnd *scmd =
container_of(work, struct scsi_cmnd, abort_work.work);
struct scsi_device *sdev = scmd->device;
unsigned long flags;
int rtn;
spin_lock_irqsave(sdev->host->host_lock, flags);
if (scsi_host_eh_past_deadline(sdev->host)) {
spin_unlock_irqrestore(sdev->host->host_lock, flags);
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_INFO, scmd,
"scmd %p eh timeout, not aborting\n",
scmd));
} else {
spin_unlock_irqrestore(sdev->host->host_lock, flags);
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_INFO, scmd,
"aborting command %p\n", scmd));
rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd);
if (rtn == SUCCESS) {
scmd->result |= DID_TIME_OUT << 16;
if (!scsi_noretry_cmd(scmd) &&
(++scmd->retries <= scmd->allowed)) {
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_WARNING, scmd,
"scmd %p retry "
"aborted command\n", scmd));
scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
} else {
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_WARNING, scmd,
"scmd %p finish "
"aborted command\n", scmd));
scsi_finish_command(scmd);
}
return;
}
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_INFO, scmd,
"scmd %p abort failed, rtn %d\n",
scmd, rtn));
}
if (!scsi_eh_scmd_add(scmd, 0)) {
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_WARNING, scmd,
"scmd %p terminate "
"aborted command\n", scmd));
scmd->result |= DID_TIME_OUT << 16;
scsi_finish_command(scmd);
}
}
/**
* scsi_abort_command - schedule a command abort
* @scmd: scmd to abort.
*
* We only need to abort commands after a command timeout
*/
static int
scsi_abort_command(struct scsi_cmnd *scmd)
{
struct scsi_device *sdev = scmd->device;
struct Scsi_Host *shost = sdev->host;
unsigned long flags;
if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) {
/*
* Retry after abort failed, escalate to next level.
*/
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_INFO, scmd,
"scmd %p previous abort failed\n", scmd));
cancel_delayed_work(&scmd->abort_work);
return FAILED;
}
/*
* Do not try a command abort if
* SCSI EH has already started.
*/
spin_lock_irqsave(shost->host_lock, flags);
if (scsi_host_in_recovery(shost)) {
spin_unlock_irqrestore(shost->host_lock, flags);
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_INFO, scmd,
"scmd %p not aborting, host in recovery\n",
scmd));
return FAILED;
}
if (shost->eh_deadline && !shost->last_reset)
shost->last_reset = jiffies;
spin_unlock_irqrestore(shost->host_lock, flags);
scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED;
SCSI_LOG_ERROR_RECOVERY(3,
scmd_printk(KERN_INFO, scmd,
"scmd %p abort scheduled\n", scmd));
queue_delayed_work(shost->tmf_work_q, &scmd->abort_work, HZ / 100);
return SUCCESS;
}
/**
* scsi_eh_scmd_add - add scsi cmd to error handling.
* @scmd: scmd to run eh on.
@@ -125,6 +237,8 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
shost->last_reset = jiffies;
ret = 1;
if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED)
eh_flag &= ~SCSI_EH_CANCEL_CMD;
scmd->eh_eflags |= eh_flag;
list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
shost->host_failed++;
@@ -161,6 +275,10 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)
else if (host->hostt->eh_timed_out)
rtn = host->hostt->eh_timed_out(scmd);
if (rtn == BLK_EH_NOT_HANDLED && !host->hostt->no_async_abort)
if (scsi_abort_command(scmd) == SUCCESS)
return BLK_EH_NOT_HANDLED;
scmd->result |= DID_TIME_OUT << 16;
if (unlikely(rtn == BLK_EH_NOT_HANDLED &&
@@ -1577,7 +1695,7 @@ static void scsi_eh_offline_sdevs(struct list_head *work_q,
}
/**
* scsi_noretry_cmd - determinte if command should be failed fast
* scsi_noretry_cmd - determine if command should be failed fast
* @scmd: SCSI cmd to examine.
*/
int scsi_noretry_cmd(struct scsi_cmnd *scmd)
@@ -1585,6 +1703,8 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd)
switch (host_byte(scmd->result)) {
case DID_OK:
break;
case DID_TIME_OUT:
goto check_type;
case DID_BUS_BUSY:
return (scmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT);
case DID_PARITY:
@@ -1598,18 +1718,19 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd)
return (scmd->request->cmd_flags & REQ_FAILFAST_DRIVER);
}
switch (status_byte(scmd->result)) {
case CHECK_CONDITION:
/*
* assume caller has checked sense and determinted
* the check condition was retryable.
*/
if (scmd->request->cmd_flags & REQ_FAILFAST_DEV ||
scmd->request->cmd_type == REQ_TYPE_BLOCK_PC)
return 1;
}
if (status_byte(scmd->result) != CHECK_CONDITION)
return 0;
return 0;
check_type:
/*
* assume caller has checked sense and determined
* the check condition was retryable.
*/
if (scmd->request->cmd_flags & REQ_FAILFAST_DEV ||
scmd->request->cmd_type == REQ_TYPE_BLOCK_PC)
return 1;
else
return 0;
}
/**
@@ -1659,9 +1780,13 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
* looks good. drop through, and check the next byte.
*/
break;
case DID_ABORT:
if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) {
scmd->result |= DID_TIME_OUT << 16;
return SUCCESS;
}
case DID_NO_CONNECT:
case DID_BAD_TARGET:
case DID_ABORT:
/*
* note - this means that we just report the status back
* to the top level driver, not that we actually think