ANDROID: fix mmu_notifier race caused by not taking mmap_lock during SPF

When pagefaults are handled speculatively,the pair of
mmu_notifier_invalidate_range_start/mmu_notifier_invalidate_range_end
calls happen without mmap_lock being taken. This enables the following
race:

mmu_notifier_invalidate_range_start
                                       mmap_write_lock
                                       mmu_notifier_register
                                       mmap_write_unlock
mmu_notifier_invalidate_range_end

In this case mmu_notifier_invalidate_range_end will see a new
subscriber not seen at the time of mmu_notifier_invalidate_range_start
and will call ops->invalidate_range_end for that subscriber without
the matching ops->invalidate_range_start, creating imbalance.
Fix this by introducing a new mm->mmu_notifier_lock percpu_rw_semaphore
to synchronize mmu_notifier_invalidate_range_start/
mmu_notifier_invalidate_range_end with mmu_notifier_register when
handling pagefaults speculatively without holding mmap_lock.
percpu_rw_semaphore is used instead of rw_semaphore to prevent cache
line bouncing in the pagefault path.

Fixes: 86ee4a531e ("FROMLIST: x86/mm: add speculative pagefault handling")

Bug: 161210518
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: I9c363b2348efcad19818f93b010abf956870ab55
This commit is contained in:
Suren Baghdasaryan
2021-11-04 13:42:56 -07:00
parent 2fc2c66b9c
commit 6971350406
5 changed files with 108 additions and 6 deletions

View File

@@ -404,6 +404,7 @@ struct core_state {
};
struct kioctx_table;
struct percpu_rw_semaphore;
struct mm_struct {
struct {
struct vm_area_struct *mmap; /* list of VMAs */
@@ -561,6 +562,9 @@ struct mm_struct {
struct file __rcu *exe_file;
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier_subscriptions *notifier_subscriptions;
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
struct percpu_rw_semaphore *mmu_notifier_lock;
#endif
#endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
pgtable_t pmd_huge_pte; /* protected by page_table_lock */

View File

@@ -6,6 +6,8 @@
#include <linux/spinlock.h>
#include <linux/mm_types.h>
#include <linux/mmap_lock.h>
#include <linux/percpu-rwsem.h>
#include <linux/slab.h>
#include <linux/srcu.h>
#include <linux/interval_tree.h>
#include <linux/android_kabi.h>
@@ -502,9 +504,50 @@ static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
__mmu_notifier_invalidate_range(mm, start, end);
}
static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm)
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
static inline bool mmu_notifier_subscriptions_init(struct mm_struct *mm)
{
mm->mmu_notifier_lock = kzalloc(sizeof(struct percpu_rw_semaphore), GFP_KERNEL);
if (!mm->mmu_notifier_lock)
return false;
percpu_init_rwsem(mm->mmu_notifier_lock);
mm->notifier_subscriptions = NULL;
return true;
}
static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
{
if (mm_has_notifiers(mm))
__mmu_notifier_subscriptions_destroy(mm);
if (in_atomic()) {
percpu_rwsem_async_destroy(mm->mmu_notifier_lock);
} else {
percpu_free_rwsem(mm->mmu_notifier_lock);
kfree(mm->mmu_notifier_lock);
}
mm->mmu_notifier_lock = NULL;
}
static inline bool mmu_notifier_trylock(struct mm_struct *mm)
{
return percpu_down_read_trylock(mm->mmu_notifier_lock);
}
static inline void mmu_notifier_unlock(struct mm_struct *mm)
{
percpu_up_read(mm->mmu_notifier_lock);
}
#else /* CONFIG_SPECULATIVE_PAGE_FAULT */
static inline bool mmu_notifier_subscriptions_init(struct mm_struct *mm)
{
mm->notifier_subscriptions = NULL;
return true;
}
static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
@@ -513,6 +556,16 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
__mmu_notifier_subscriptions_destroy(mm);
}
static inline bool mmu_notifier_trylock(struct mm_struct *mm)
{
return true;
}
static inline void mmu_notifier_unlock(struct mm_struct *mm)
{
}
#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
static inline void mmu_notifier_range_init(struct mmu_notifier_range *range,
enum mmu_notifier_event event,
@@ -727,14 +780,23 @@ static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
{
}
static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm)
static inline bool mmu_notifier_subscriptions_init(struct mm_struct *mm)
{
return true;
}
static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
{
}
static inline void mmu_notifier_lock(struct mm_struct *mm)
{
}
static inline void mmu_notifier_unlock(struct mm_struct *mm)
{
}
#define mmu_notifier_range_update_to_read_only(r) false
#define ptep_clear_flush_young_notify ptep_clear_flush_young

View File

@@ -1072,7 +1072,8 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm_init_owner(mm, p);
mm_init_pasid(mm);
RCU_INIT_POINTER(mm->exe_file, NULL);
mmu_notifier_subscriptions_init(mm);
if (!mmu_notifier_subscriptions_init(mm))
goto fail_nopgd;
init_tlb_flush_pending(mm);
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
mm->pmd_huge_pte = NULL;

View File

@@ -4717,8 +4717,19 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
goto unlock;
}
if (vmf->flags & FAULT_FLAG_WRITE) {
if (!pte_write(entry))
return do_wp_page(vmf);
if (!pte_write(entry)) {
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE))
return do_wp_page(vmf);
if (!mmu_notifier_trylock(vmf->vma->vm_mm)) {
ret = VM_FAULT_RETRY;
goto unlock;
}
ret = do_wp_page(vmf);
mmu_notifier_unlock(vmf->vma->vm_mm);
return ret;
}
entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);

View File

@@ -621,6 +621,25 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm,
srcu_read_unlock(&srcu, id);
}
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
static inline void mmu_notifier_write_lock(struct mm_struct *mm)
{
percpu_down_write(mm->mmu_notifier_lock);
}
static inline void mmu_notifier_write_unlock(struct mm_struct *mm)
{
percpu_up_write(mm->mmu_notifier_lock);
}
#else /* CONFIG_SPECULATIVE_PAGE_FAULT */
static inline void mmu_notifier_write_lock(struct mm_struct *mm) {}
static inline void mmu_notifier_write_unlock(struct mm_struct *mm) {}
#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
/*
* Same as mmu_notifier_register but here the caller must hold the mmap_lock in
* write mode. A NULL mn signals the notifier is being registered for itree
@@ -661,9 +680,13 @@ int __mmu_notifier_register(struct mmu_notifier *subscription,
INIT_HLIST_HEAD(&subscriptions->deferred_list);
}
mmu_notifier_write_lock(mm);
ret = mm_take_all_locks(mm);
if (unlikely(ret))
if (unlikely(ret)) {
mmu_notifier_write_unlock(mm);
goto out_clean;
}
/*
* Serialize the update against mmu_notifier_unregister. A
@@ -698,6 +721,7 @@ int __mmu_notifier_register(struct mmu_notifier *subscription,
mm->notifier_subscriptions->has_itree = true;
mm_drop_all_locks(mm);
mmu_notifier_write_unlock(mm);
BUG_ON(atomic_read(&mm->mm_users) <= 0);
return 0;