IB/hfi1: Resolve kernel panics by reference counting receive contexts
Base receive contexts can be used by sub contexts. Because of this,
resources for the context cannot be completely freed until all sub
contexts are done using the base context.
Introduce a reference count so that the base receive context can be
freed only when all sub contexts are done with it.
Use the provided function call for setting default send context
integrity rather than the manual method.
The cleanup path does not set all variables back to NULL after freeing
resources. Since the clean up code can get called more than once,
(e.g. during context close and on the error path), it is necessary to
make sure that all the variables are NULLed.
Possible crash are:
BUG: unable to handle kernel paging request at 0000000001908900
IP: read_csr+0x24/0x30 [hfi1]
RIP: 0010:read_csr+0x24/0x30 [hfi1]
Call Trace:
sc_disable+0x40/0x110 [hfi1]
hfi1_file_close+0x16f/0x360 [hfi1]
__fput+0xe7/0x210
____fput+0xe/0x10
or
kernel BUG at mm/slub.c:3877!
RIP: 0010:kfree+0x14f/0x170
Call Trace:
hfi1_free_ctxtdata+0x19a/0x2b0 [hfi1]
? hfi1_user_exp_rcv_grp_free+0x73/0x80 [hfi1]
hfi1_file_close+0x20f/0x360 [hfi1]
__fput+0xe7/0x210
____fput+0xe/0x10
Fixes: Commit 62239fc6e5
("IB/hfi1: Clean up on context initialization failure")
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:

committed by
Doug Ledford

parent
fe4e74eeb2
commit
f683c80ca6
@@ -191,15 +191,45 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
|
||||
nomem:
|
||||
ret = -ENOMEM;
|
||||
|
||||
if (dd->rcd) {
|
||||
for (i = 0; i < dd->num_rcv_contexts; ++i)
|
||||
hfi1_free_ctxtdata(dd, dd->rcd[i]);
|
||||
}
|
||||
for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i)
|
||||
hfi1_rcd_put(dd->rcd[i]);
|
||||
|
||||
/* All the contexts should be freed, free the array */
|
||||
kfree(dd->rcd);
|
||||
dd->rcd = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper routines for the receive context reference count (rcd and uctxt)
|
||||
*/
|
||||
static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd)
|
||||
{
|
||||
kref_init(&rcd->kref);
|
||||
}
|
||||
|
||||
static void hfi1_rcd_free(struct kref *kref)
|
||||
{
|
||||
struct hfi1_ctxtdata *rcd =
|
||||
container_of(kref, struct hfi1_ctxtdata, kref);
|
||||
|
||||
hfi1_free_ctxtdata(rcd->dd, rcd);
|
||||
kfree(rcd);
|
||||
}
|
||||
|
||||
int hfi1_rcd_put(struct hfi1_ctxtdata *rcd)
|
||||
{
|
||||
if (rcd)
|
||||
return kref_put(&rcd->kref, hfi1_rcd_free);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
|
||||
{
|
||||
kref_get(&rcd->kref);
|
||||
}
|
||||
|
||||
/*
|
||||
* Common code for user and kernel context setup.
|
||||
*/
|
||||
@@ -332,6 +362,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
|
||||
if (!rcd->opstats)
|
||||
goto bail;
|
||||
}
|
||||
|
||||
hfi1_rcd_init(rcd);
|
||||
}
|
||||
return rcd;
|
||||
bail:
|
||||
@@ -931,14 +963,11 @@ static void shutdown_device(struct hfi1_devdata *dd)
|
||||
* @rcd: the ctxtdata structure
|
||||
*
|
||||
* free up any allocated data for a context
|
||||
* This should not touch anything that would affect a simultaneous
|
||||
* re-allocation of context data, because it is called after hfi1_mutex
|
||||
* is released (and can be called from reinit as well).
|
||||
* It should never change any chip state, or global driver state.
|
||||
*/
|
||||
void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
|
||||
{
|
||||
unsigned e;
|
||||
u32 e;
|
||||
|
||||
if (!rcd)
|
||||
return;
|
||||
@@ -957,6 +986,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
|
||||
|
||||
/* all the RcvArray entries should have been cleared by now */
|
||||
kfree(rcd->egrbufs.rcvtids);
|
||||
rcd->egrbufs.rcvtids = NULL;
|
||||
|
||||
for (e = 0; e < rcd->egrbufs.alloced; e++) {
|
||||
if (rcd->egrbufs.buffers[e].dma)
|
||||
@@ -966,13 +996,21 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
|
||||
rcd->egrbufs.buffers[e].dma);
|
||||
}
|
||||
kfree(rcd->egrbufs.buffers);
|
||||
rcd->egrbufs.alloced = 0;
|
||||
rcd->egrbufs.buffers = NULL;
|
||||
|
||||
sc_free(rcd->sc);
|
||||
rcd->sc = NULL;
|
||||
|
||||
vfree(rcd->subctxt_uregbase);
|
||||
vfree(rcd->subctxt_rcvegrbuf);
|
||||
vfree(rcd->subctxt_rcvhdr_base);
|
||||
kfree(rcd->opstats);
|
||||
kfree(rcd);
|
||||
|
||||
rcd->subctxt_uregbase = NULL;
|
||||
rcd->subctxt_rcvegrbuf = NULL;
|
||||
rcd->subctxt_rcvhdr_base = NULL;
|
||||
rcd->opstats = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1366,7 +1404,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
|
||||
tmp[ctxt] = NULL; /* debugging paranoia */
|
||||
if (rcd) {
|
||||
hfi1_clear_tids(rcd);
|
||||
hfi1_free_ctxtdata(dd, rcd);
|
||||
hfi1_rcd_put(rcd);
|
||||
}
|
||||
}
|
||||
kfree(tmp);
|
||||
|
Reference in New Issue
Block a user