i40e: Detection and recovery of TX queue hung logic moved to service_task from tx_timeout

This patch contains following changes:
   - detection and recovery logic (issue SW interrupt) has been moved to
     service_task from timeout function.
   - added some more debug info from tx_timeout.

Logic to detect and recover TX queue hung is now two step process:
  - service_task detects TX queue hung and sets a bit(hung_detected) if
    it was not set.
  - if bit was set (means this is back-back hung condition detected),
    issue SW interrupt and clear the bit.
  - napi_poll clears the bit unconditionally since it cleans TX/RX queues.

Change-ID: Ieed03a48927c845a988b3ff375090bf37caeb903
Signed-off-by: Kiran Patil <kiran.patil@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
This commit is contained in:
Kiran Patil
2015-11-06 15:26:02 -08:00
کامیت شده توسط Jeff Kirsher
والد 05281eb8e1
کامیت 9c6c12595b
5فایلهای تغییر یافته به همراه64 افزوده شده و 13 حذف شده

مشاهده پرونده

@@ -579,6 +579,9 @@ struct i40e_q_vector {
u8 num_ringpairs; /* total number of ring pairs in vector */
#define I40E_Q_VECTOR_HUNG_DETECT 0 /* Bit Index for hung detection logic */
unsigned long hung_detected; /* Set/Reset for hung_detection logic */
cpumask_t affinity_mask;
struct rcu_head rcu; /* to avoid race with update stats on free */
char name[I40E_INT_NAME_STR_LEN];

مشاهده پرونده

@@ -4368,17 +4368,41 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi)
else
val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
/* Bail out if interrupts are disabled because napi_poll
* execution in-progress or will get scheduled soon.
* napi_poll cleans TX and RX queues and updates 'next_to_clean'.
*/
if (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))
return;
head = i40e_get_head(tx_ring);
tx_pending = i40e_get_tx_pending(tx_ring);
/* Interrupts are disabled and TX pending is non-zero,
* trigger the SW interrupt (don't wait). Worst case
* there will be one extra interrupt which may result
* into not cleaning any queues because queues are cleaned.
/* HW is done executing descriptors, updated HEAD write back,
* but SW hasn't processed those descriptors. If interrupt is
* not generated from this point ON, it could result into
* dev_watchdog detecting timeout on those netdev_queue,
* hence proactively trigger SW interrupt.
*/
if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)))
i40e_force_wb(vsi, tx_ring->q_vector);
if (tx_pending) {
/* NAPI Poll didn't run and clear since it was set */
if (test_and_clear_bit(I40E_Q_VECTOR_HUNG_DETECT,
&tx_ring->q_vector->hung_detected)) {
netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n",
vsi->seid, q_idx, tx_pending,
tx_ring->next_to_clean, head,
tx_ring->next_to_use,
readl(tx_ring->tail));
netdev_info(vsi->netdev, "VSI_seid %d, Issuing force_wb for TX queue %d, Interrupt Reg: 0x%x\n",
vsi->seid, q_idx, val);
i40e_force_wb(vsi, tx_ring->q_vector);
} else {
/* First Chance - detected possible hung */
set_bit(I40E_Q_VECTOR_HUNG_DETECT,
&tx_ring->q_vector->hung_detected);
}
}
}
/**

مشاهده پرونده

@@ -1891,6 +1891,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
return 0;
}
/* Clear hung_detected bit */
clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected);
/* Since the actual Tx work is minimal, we can give the Tx a larger
* budget and be more aggressive about cleaning up the Tx descriptors.
*/