gru: allow users to specify gru chiplet 2

Add support to the GRU driver to allow users to specify the blade &
chiplet for allocation of GRU contexts.  Add new statistics for context
loading/unloading/retargeting.  Also deleted a few GRU stats that were no
longer being unused.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Jack Steiner
2009-12-15 16:48:05 -08:00
committed by Linus Torvalds
parent 518e5cd4aa
commit 55484c45db
6 changed files with 109 additions and 80 deletions

View File

@@ -684,6 +684,40 @@ static int gru_retarget_intr(struct gru_thread_state *gts)
return gru_update_cch(gts, 0);
}
/*
* Unload the gru context if it is not assigned to the correct blade or
* chiplet. Misassignment can occur if the process migrates to a different
* blade or if the user changes the selected blade/chiplet.
* Return 0 if context correct placed, otherwise 1
*/
void gru_check_context_placement(struct gru_thread_state *gts)
{
struct gru_state *gru;
int blade_id, chiplet_id;
/*
* If the current task is the context owner, verify that the
* context is correctly placed. This test is skipped for non-owner
* references. Pthread apps use non-owner references to the CBRs.
*/
gru = gts->ts_gru;
if (!gru || gts->ts_tgid_owner != current->tgid)
return;
blade_id = gts->ts_user_blade_id;
if (blade_id < 0)
blade_id = uv_numa_blade_id();
chiplet_id = gts->ts_user_chiplet_id;
if (gru->gs_blade_id != blade_id ||
(chiplet_id >= 0 && chiplet_id != gru->gs_chiplet_id)) {
STAT(check_context_unload);
gru_unload_context(gts, 1);
} else if (gru_retarget_intr(gts)) {
STAT(check_context_retarget_intr);
}
}
/*
* Insufficient GRU resources available on the local blade. Steal a context from
@@ -714,13 +748,17 @@ static void gts_stolen(struct gru_thread_state *gts,
}
}
void gru_steal_context(struct gru_thread_state *gts, int blade_id)
void gru_steal_context(struct gru_thread_state *gts)
{
struct gru_blade_state *blade;
struct gru_state *gru, *gru0;
struct gru_thread_state *ngts = NULL;
int ctxnum, ctxnum0, flag = 0, cbr, dsr;
int blade_id = gts->ts_user_blade_id;
int chiplet_id = gts->ts_user_chiplet_id;
if (blade_id < 0)
blade_id = uv_numa_blade_id();
cbr = gts->ts_cbr_au_count;
dsr = gts->ts_dsr_au_count;
@@ -731,35 +769,39 @@ void gru_steal_context(struct gru_thread_state *gts, int blade_id)
gru = blade->bs_lru_gru;
if (ctxnum == 0)
gru = next_gru(blade, gru);
blade->bs_lru_gru = gru;
blade->bs_lru_ctxnum = ctxnum;
ctxnum0 = ctxnum;
gru0 = gru;
while (1) {
if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
break;
spin_lock(&gru->gs_lock);
for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
if (flag && gru == gru0 && ctxnum == ctxnum0)
if (chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id) {
if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
break;
ngts = gru->gs_gts[ctxnum];
/*
* We are grabbing locks out of order, so trylock is
* needed. GTSs are usually not locked, so the odds of
* success are high. If trylock fails, try to steal a
* different GSEG.
*/
if (ngts && is_gts_stealable(ngts, blade))
spin_lock(&gru->gs_lock);
for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
if (flag && gru == gru0 && ctxnum == ctxnum0)
break;
ngts = gru->gs_gts[ctxnum];
/*
* We are grabbing locks out of order, so trylock is
* needed. GTSs are usually not locked, so the odds of
* success are high. If trylock fails, try to steal a
* different GSEG.
*/
if (ngts && is_gts_stealable(ngts, blade))
break;
ngts = NULL;
}
spin_unlock(&gru->gs_lock);
if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
break;
ngts = NULL;
flag = 1;
}
spin_unlock(&gru->gs_lock);
if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
if (flag && gru == gru0)
break;
flag = 1;
ctxnum = 0;
gru = next_gru(blade, gru);
}
blade->bs_lru_gru = gru;
blade->bs_lru_ctxnum = ctxnum;
spin_unlock(&blade->bs_lock);
if (ngts) {
@@ -777,20 +819,36 @@ void gru_steal_context(struct gru_thread_state *gts, int blade_id)
hweight64(gru->gs_dsr_map));
}
/*
* Assign a gru context.
*/
static int gru_assign_context_number(struct gru_state *gru)
{
int ctxnum;
ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
__set_bit(ctxnum, &gru->gs_context_map);
return ctxnum;
}
/*
* Scan the GRUs on the local blade & assign a GRU context.
*/
struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts,
int blade)
struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
{
struct gru_state *gru, *grux;
int i, max_active_contexts;
int blade_id = gts->ts_user_blade_id;
int chiplet_id = gts->ts_user_chiplet_id;
if (blade_id < 0)
blade_id = uv_numa_blade_id();
again:
gru = NULL;
max_active_contexts = GRU_NUM_CCH;
for_each_gru_on_blade(grux, blade, i) {
for_each_gru_on_blade(grux, blade_id, i) {
if (chiplet_id >= 0 && chiplet_id != grux->gs_chiplet_id)
continue;
if (check_gru_resources(grux, gts->ts_cbr_au_count,
gts->ts_dsr_au_count,
max_active_contexts)) {
@@ -811,12 +869,9 @@ again:
reserve_gru_resources(gru, gts);
gts->ts_gru = gru;
gts->ts_blade = gru->gs_blade_id;
gts->ts_ctxnum =
find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH);
gts->ts_ctxnum = gru_assign_context_number(gru);
atomic_inc(&gts->ts_refcnt);
gru->gs_gts[gts->ts_ctxnum] = gts;
__set_bit(gts->ts_ctxnum, &gru->gs_context_map);
spin_unlock(&gru->gs_lock);
STAT(assign_context);
@@ -844,7 +899,6 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct gru_thread_state *gts;
unsigned long paddr, vaddr;
int blade_id;
vaddr = (unsigned long)vmf->virtual_address;
gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
@@ -859,28 +913,18 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
again:
mutex_lock(&gts->ts_ctxlock);
preempt_disable();
blade_id = uv_numa_blade_id();
if (gts->ts_gru) {
if (gts->ts_gru->gs_blade_id != blade_id) {
STAT(migrated_nopfn_unload);
gru_unload_context(gts, 1);
} else {
if (gru_retarget_intr(gts))
STAT(migrated_nopfn_retarget);
}
}
gru_check_context_placement(gts);
if (!gts->ts_gru) {
STAT(load_user_context);
if (!gru_assign_gru_context(gts, blade_id)) {
if (!gru_assign_gru_context(gts)) {
preempt_enable();
mutex_unlock(&gts->ts_ctxlock);
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
blade_id = uv_numa_blade_id();
if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
gru_steal_context(gts, blade_id);
gru_steal_context(gts);
goto again;
}
gru_load_context(gts);