x86/mm: Rework lazy TLB to track the actual loaded mm

Lazy TLB state is currently managed in a rather baroque manner.
AFAICT, there are three possible states:

 - Non-lazy.  This means that we're running a user thread or a
   kernel thread that has called use_mm().  current->mm ==
   current->active_mm == cpu_tlbstate.active_mm and
   cpu_tlbstate.state == TLBSTATE_OK.

 - Lazy with user mm.  We're running a kernel thread without an mm
   and we're borrowing an mm_struct.  We have current->mm == NULL,
   current->active_mm == cpu_tlbstate.active_mm, cpu_tlbstate.state
   != TLBSTATE_OK (i.e. TLBSTATE_LAZY or 0).  The current cpu is set
   in mm_cpumask(current->active_mm).  CR3 points to
   current->active_mm->pgd.  The TLB is up to date.

 - Lazy with init_mm.  This happens when we call leave_mm().  We
   have current->mm == NULL, current->active_mm ==
   cpu_tlbstate.active_mm, but that mm is only relelvant insofar as
   the scheduler is tracking it for refcounting.  cpu_tlbstate.state
   != TLBSTATE_OK.  The current cpu is clear in
   mm_cpumask(current->active_mm).  CR3 points to swapper_pg_dir,
   i.e. init_mm->pgd.

This patch simplifies the situation.  Other than perf, x86 stops
caring about current->active_mm at all.  We have
cpu_tlbstate.loaded_mm pointing to the mm that CR3 references.  The
TLB is always up to date for that mm.  leave_mm() just switches us
to init_mm.  There are no longer any special cases for mm_cpumask,
and switch_mm() switches mms without worrying about laziness.

After this patch, cpu_tlbstate.state serves only to tell the TLB
flush code whether it may switch to init_mm instead of doing a
normal flush.

This makes fairly extensive changes to xen_exit_mmap(), which used
to look a bit like black magic.

Perf is unchanged.  With or without this change, perf may behave a bit
erratically if it tries to read user memory in kernel thread context.
We should build on this patch to teach perf to never look at user
memory when cpu_tlbstate.loaded_mm != current->mm.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Nadav Amit <namit@vmware.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Andy Lutomirski
2017-05-28 10:00:15 -07:00
committed by Ingo Molnar
parent ce4a4e565f
commit 3d28ebceaf
6 changed files with 148 additions and 145 deletions

View File

@@ -975,37 +975,32 @@ static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
spin_unlock(&mm->page_table_lock);
}
#ifdef CONFIG_SMP
/* Another cpu may still have their %cr3 pointing at the pagetable, so
we need to repoint it somewhere else before we can unpin it. */
static void drop_other_mm_ref(void *info)
static void drop_mm_ref_this_cpu(void *info)
{
struct mm_struct *mm = info;
struct mm_struct *active_mm;
active_mm = this_cpu_read(cpu_tlbstate.active_mm);
if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm)
leave_mm(smp_processor_id());
/* If this cpu still has a stale cr3 reference, then make sure
it has been flushed. */
/*
* If this cpu still has a stale cr3 reference, then make sure
* it has been flushed.
*/
if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
load_cr3(swapper_pg_dir);
xen_mc_flush();
}
#ifdef CONFIG_SMP
/*
* Another cpu may still have their %cr3 pointing at the pagetable, so
* we need to repoint it somewhere else before we can unpin it.
*/
static void xen_drop_mm_ref(struct mm_struct *mm)
{
cpumask_var_t mask;
unsigned cpu;
if (current->active_mm == mm) {
if (current->mm == mm)
load_cr3(swapper_pg_dir);
else
leave_mm(smp_processor_id());
}
drop_mm_ref_this_cpu(mm);
/* Get the "official" set of cpus referring to our pagetable. */
if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
@@ -1013,31 +1008,31 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
&& per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
continue;
smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
}
return;
}
cpumask_copy(mask, mm_cpumask(mm));
/* It's possible that a vcpu may have a stale reference to our
cr3, because its in lazy mode, and it hasn't yet flushed
its set of pending hypercalls yet. In this case, we can
look at its actual current cr3 value, and force it to flush
if needed. */
/*
* It's possible that a vcpu may have a stale reference to our
* cr3, because its in lazy mode, and it hasn't yet flushed
* its set of pending hypercalls yet. In this case, we can
* look at its actual current cr3 value, and force it to flush
* if needed.
*/
for_each_online_cpu(cpu) {
if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
cpumask_set_cpu(cpu, mask);
}
if (!cpumask_empty(mask))
smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
smp_call_function_many(mask, drop_mm_ref_this_cpu, mm, 1);
free_cpumask_var(mask);
}
#else
static void xen_drop_mm_ref(struct mm_struct *mm)
{
if (current->active_mm == mm)
load_cr3(swapper_pg_dir);
drop_mm_ref_this_cpu(mm);
}
#endif