
Git commit 050eef364a
"[S390] fix tlb flushing vs. concurrent
/proc accesses" introduced the attach counter to avoid using the
mm_users value to decide between IPTE for every PTE and lazy TLB
flushing with IDTE. That fixed the problem with mm_users but it
introduced another subtle race, fortunately one that is very hard
to hit.
The background is the requirement of the architecture that a valid
PTE may not be changed while it can be used concurrently by another
cpu. The decision between IPTE and lazy TLB flushing needs to be
done while the PTE is still valid. Now if the virtual cpu is
temporarily stopped after the decision to use lazy TLB flushing but
before the invalid bit of the PTE has been set, another cpu can attach
the mm, find that flush_mm is set, do the IDTE, return to userspace,
and recreate a TLB that uses the PTE in question. When the first,
stopped cpu continues it will change the PTE while it is attached on
another cpu. The first cpu will do another IDTE shortly after the
modification of the PTE which makes the race window quite short.
To fix this race the CPU that wants to attach the address space of a
user space thread needs to wait for the end of the PTE modification.
The number of concurrent TLB flushers for an mm is tracked in the
upper 16 bits of the attach_count and finish_arch_post_lock_switch
is used to wait for the end of the flush operation if required.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
112 lines
2.8 KiB
C
112 lines
2.8 KiB
C
/*
|
|
* S390 version
|
|
*
|
|
* Derived from "include/asm-i386/mmu_context.h"
|
|
*/
|
|
|
|
#ifndef __S390_MMU_CONTEXT_H
|
|
#define __S390_MMU_CONTEXT_H
|
|
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/uaccess.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/ctl_reg.h>
|
|
|
|
static inline int init_new_context(struct task_struct *tsk,
|
|
struct mm_struct *mm)
|
|
{
|
|
atomic_set(&mm->context.attach_count, 0);
|
|
mm->context.flush_mm = 0;
|
|
mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
|
|
#ifdef CONFIG_64BIT
|
|
mm->context.asce_bits |= _ASCE_TYPE_REGION3;
|
|
#endif
|
|
mm->context.has_pgste = 0;
|
|
mm->context.asce_limit = STACK_TOP_MAX;
|
|
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
|
|
return 0;
|
|
}
|
|
|
|
#define destroy_context(mm) do { } while (0)
|
|
|
|
#ifndef CONFIG_64BIT
|
|
#define LCTL_OPCODE "lctl"
|
|
#else
|
|
#define LCTL_OPCODE "lctlg"
|
|
#endif
|
|
|
|
static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk)
|
|
{
|
|
pgd_t *pgd = mm->pgd;
|
|
|
|
S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd);
|
|
/* Load primary space page table origin. */
|
|
asm volatile(LCTL_OPCODE" 1,1,%0\n" : : "m" (S390_lowcore.user_asce));
|
|
set_fs(current->thread.mm_segment);
|
|
}
|
|
|
|
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
|
struct task_struct *tsk)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
|
|
if (prev == next)
|
|
return;
|
|
if (atomic_inc_return(&next->context.attach_count) >> 16) {
|
|
/* Delay update_mm until all TLB flushes are done. */
|
|
set_tsk_thread_flag(tsk, TIF_TLB_WAIT);
|
|
} else {
|
|
cpumask_set_cpu(cpu, mm_cpumask(next));
|
|
update_mm(next, tsk);
|
|
if (next->context.flush_mm)
|
|
/* Flush pending TLBs */
|
|
__tlb_flush_mm(next);
|
|
}
|
|
atomic_dec(&prev->context.attach_count);
|
|
WARN_ON(atomic_read(&prev->context.attach_count) < 0);
|
|
}
|
|
|
|
#define finish_arch_post_lock_switch finish_arch_post_lock_switch
|
|
static inline void finish_arch_post_lock_switch(void)
|
|
{
|
|
struct task_struct *tsk = current;
|
|
struct mm_struct *mm = tsk->mm;
|
|
|
|
if (!test_tsk_thread_flag(tsk, TIF_TLB_WAIT))
|
|
return;
|
|
preempt_disable();
|
|
clear_tsk_thread_flag(tsk, TIF_TLB_WAIT);
|
|
while (atomic_read(&mm->context.attach_count) >> 16)
|
|
cpu_relax();
|
|
|
|
cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
|
|
update_mm(mm, tsk);
|
|
if (mm->context.flush_mm)
|
|
__tlb_flush_mm(mm);
|
|
preempt_enable();
|
|
}
|
|
|
|
#define enter_lazy_tlb(mm,tsk) do { } while (0)
|
|
#define deactivate_mm(tsk,mm) do { } while (0)
|
|
|
|
static inline void activate_mm(struct mm_struct *prev,
|
|
struct mm_struct *next)
|
|
{
|
|
switch_mm(prev, next, current);
|
|
}
|
|
|
|
static inline void arch_dup_mmap(struct mm_struct *oldmm,
|
|
struct mm_struct *mm)
|
|
{
|
|
#ifdef CONFIG_64BIT
|
|
if (oldmm->context.asce_limit < mm->context.asce_limit)
|
|
crst_table_downgrade(mm, oldmm->context.asce_limit);
|
|
#endif
|
|
}
|
|
|
|
static inline void arch_exit_mmap(struct mm_struct *mm)
|
|
{
|
|
}
|
|
|
|
#endif /* __S390_MMU_CONTEXT_H */
|