ANDROID: userfaultfd: allow SPF for UFFD_FEATURE_SIGBUS on private+anon
Currently we bail out of speculative page fault when we detect that the fault address is in a userfaultfd registered vma. However, if userfaultfd is being used with UFFD_FEATURE_SIGBUS feature, then handle_userfault() doesn't do much and is easiest to handle with SPF. This patch lets MISSING userfaultfs on private anonymous mappings be allowed with SPF if UFFD_FEATURE_SIGBUS is used. With this patch we get >99% success rate for userfaults caused during userfaultfd GC's compaction phase. This translates into eliminating uninterruptible sleep time in do_page_fault() due to userfaults. Bug: 320478828 Signed-off-by: Lokesh Gidra <lokeshgidra@google.com> Change-Id: Ic7fde0fde03602b35179bc0cf891ddbbc434190f (cherry picked from commit 582c6d188ec138d8ed9c6ef235bf5698d80d7d6b)
This commit is contained in:
@@ -70,6 +70,7 @@ struct userfaultfd_ctx {
|
|||||||
bool mmap_changing;
|
bool mmap_changing;
|
||||||
/* mm with one ore more vmas attached to this userfaultfd_ctx */
|
/* mm with one ore more vmas attached to this userfaultfd_ctx */
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
|
struct rcu_head rcu_head;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct userfaultfd_fork_ctx {
|
struct userfaultfd_fork_ctx {
|
||||||
@@ -155,6 +156,13 @@ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
|
|||||||
refcount_inc(&ctx->refcount);
|
refcount_inc(&ctx->refcount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __free_userfaultfd_ctx(struct rcu_head *head)
|
||||||
|
{
|
||||||
|
struct userfaultfd_ctx *ctx = container_of(head, struct userfaultfd_ctx,
|
||||||
|
rcu_head);
|
||||||
|
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* userfaultfd_ctx_put - Releases a reference to the internal userfaultfd
|
* userfaultfd_ctx_put - Releases a reference to the internal userfaultfd
|
||||||
* context.
|
* context.
|
||||||
@@ -175,7 +183,7 @@ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
|
|||||||
VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
|
VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
|
||||||
VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
|
VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
|
||||||
mmdrop(ctx->mm);
|
mmdrop(ctx->mm);
|
||||||
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
|
call_rcu(&ctx->rcu_head, __free_userfaultfd_ctx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -349,6 +357,24 @@ static inline long userfaultfd_get_blocking_state(unsigned int flags)
|
|||||||
return TASK_UNINTERRUPTIBLE;
|
return TASK_UNINTERRUPTIBLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
|
||||||
|
bool userfaultfd_using_sigbus(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
struct userfaultfd_ctx *ctx;
|
||||||
|
bool ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Do it inside RCU section to ensure that the ctx doesn't
|
||||||
|
* disappear under us.
|
||||||
|
*/
|
||||||
|
rcu_read_lock();
|
||||||
|
ctx = rcu_dereference(vma->vm_userfaultfd_ctx.ctx);
|
||||||
|
ret = ctx && (ctx->features & UFFD_FEATURE_SIGBUS);
|
||||||
|
rcu_read_unlock();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The locking rules involved in returning VM_FAULT_RETRY depending on
|
* The locking rules involved in returning VM_FAULT_RETRY depending on
|
||||||
* FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and
|
* FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and
|
||||||
@@ -393,7 +419,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||||||
*/
|
*/
|
||||||
mmap_assert_locked(mm);
|
mmap_assert_locked(mm);
|
||||||
|
|
||||||
ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
|
ctx = rcu_dereference_protected(vmf->vma->vm_userfaultfd_ctx.ctx,
|
||||||
|
lockdep_is_held(&mm->mmap_lock));
|
||||||
if (!ctx)
|
if (!ctx)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
@@ -610,8 +637,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
|
|||||||
/* the various vma->vm_userfaultfd_ctx still points to it */
|
/* the various vma->vm_userfaultfd_ctx still points to it */
|
||||||
mmap_write_lock(mm);
|
mmap_write_lock(mm);
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next)
|
for (vma = mm->mmap; vma; vma = vma->vm_next)
|
||||||
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
|
if (rcu_access_pointer(vma->vm_userfaultfd_ctx.ctx) ==
|
||||||
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
release_new_ctx) {
|
||||||
|
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx,
|
||||||
|
NULL);
|
||||||
vma->vm_flags &= ~__VM_UFFD_FLAGS;
|
vma->vm_flags &= ~__VM_UFFD_FLAGS;
|
||||||
}
|
}
|
||||||
mmap_write_unlock(mm);
|
mmap_write_unlock(mm);
|
||||||
@@ -641,10 +670,13 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
|
|||||||
struct userfaultfd_ctx *ctx = NULL, *octx;
|
struct userfaultfd_ctx *ctx = NULL, *octx;
|
||||||
struct userfaultfd_fork_ctx *fctx;
|
struct userfaultfd_fork_ctx *fctx;
|
||||||
|
|
||||||
octx = vma->vm_userfaultfd_ctx.ctx;
|
octx = rcu_dereference_protected(
|
||||||
|
vma->vm_userfaultfd_ctx.ctx,
|
||||||
|
lockdep_is_held(&vma->vm_mm->mmap_lock));
|
||||||
|
|
||||||
if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
|
if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
|
||||||
vm_write_begin(vma);
|
vm_write_begin(vma);
|
||||||
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL);
|
||||||
WRITE_ONCE(vma->vm_flags,
|
WRITE_ONCE(vma->vm_flags,
|
||||||
vma->vm_flags & ~__VM_UFFD_FLAGS);
|
vma->vm_flags & ~__VM_UFFD_FLAGS);
|
||||||
vm_write_end(vma);
|
vm_write_end(vma);
|
||||||
@@ -683,7 +715,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
|
|||||||
list_add_tail(&fctx->list, fcs);
|
list_add_tail(&fctx->list, fcs);
|
||||||
}
|
}
|
||||||
|
|
||||||
vma->vm_userfaultfd_ctx.ctx = ctx;
|
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, ctx);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -716,7 +748,8 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
|
|||||||
{
|
{
|
||||||
struct userfaultfd_ctx *ctx;
|
struct userfaultfd_ctx *ctx;
|
||||||
|
|
||||||
ctx = vma->vm_userfaultfd_ctx.ctx;
|
ctx = rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
|
||||||
|
lockdep_is_held(&vma->vm_mm->mmap_lock));
|
||||||
|
|
||||||
if (!ctx)
|
if (!ctx)
|
||||||
return;
|
return;
|
||||||
@@ -727,7 +760,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
|
|||||||
WRITE_ONCE(ctx->mmap_changing, true);
|
WRITE_ONCE(ctx->mmap_changing, true);
|
||||||
} else {
|
} else {
|
||||||
/* Drop uffd context if remap feature not enabled */
|
/* Drop uffd context if remap feature not enabled */
|
||||||
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL);
|
||||||
vma->vm_flags &= ~__VM_UFFD_FLAGS;
|
vma->vm_flags &= ~__VM_UFFD_FLAGS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -764,7 +797,8 @@ bool userfaultfd_remove(struct vm_area_struct *vma,
|
|||||||
struct userfaultfd_ctx *ctx;
|
struct userfaultfd_ctx *ctx;
|
||||||
struct userfaultfd_wait_queue ewq;
|
struct userfaultfd_wait_queue ewq;
|
||||||
|
|
||||||
ctx = vma->vm_userfaultfd_ctx.ctx;
|
ctx = rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
|
||||||
|
lockdep_is_held(&mm->mmap_lock));
|
||||||
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
|
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
@@ -802,7 +836,9 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma,
|
|||||||
{
|
{
|
||||||
for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
|
for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
|
||||||
struct userfaultfd_unmap_ctx *unmap_ctx;
|
struct userfaultfd_unmap_ctx *unmap_ctx;
|
||||||
struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
|
struct userfaultfd_ctx *ctx =
|
||||||
|
rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
|
||||||
|
lockdep_is_held(&vma->vm_mm->mmap_lock));
|
||||||
|
|
||||||
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
|
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
|
||||||
has_unmap_ctx(ctx, unmaps, start, end))
|
has_unmap_ctx(ctx, unmaps, start, end))
|
||||||
@@ -867,10 +903,13 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
|||||||
mmap_write_lock(mm);
|
mmap_write_lock(mm);
|
||||||
prev = NULL;
|
prev = NULL;
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
||||||
|
struct userfaultfd_ctx *cur_uffd_ctx =
|
||||||
|
rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
|
||||||
|
lockdep_is_held(&mm->mmap_lock));
|
||||||
cond_resched();
|
cond_resched();
|
||||||
BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^
|
BUG_ON(!!cur_uffd_ctx ^
|
||||||
!!(vma->vm_flags & __VM_UFFD_FLAGS));
|
!!(vma->vm_flags & __VM_UFFD_FLAGS));
|
||||||
if (vma->vm_userfaultfd_ctx.ctx != ctx) {
|
if (cur_uffd_ctx != ctx) {
|
||||||
prev = vma;
|
prev = vma;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -887,7 +926,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
|||||||
prev = vma;
|
prev = vma;
|
||||||
vm_write_begin(vma);
|
vm_write_begin(vma);
|
||||||
WRITE_ONCE(vma->vm_flags, new_flags);
|
WRITE_ONCE(vma->vm_flags, new_flags);
|
||||||
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL);
|
||||||
vm_write_end(vma);
|
vm_write_end(vma);
|
||||||
}
|
}
|
||||||
mmap_write_unlock(mm);
|
mmap_write_unlock(mm);
|
||||||
@@ -1350,9 +1389,12 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||||||
found = false;
|
found = false;
|
||||||
basic_ioctls = false;
|
basic_ioctls = false;
|
||||||
for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
|
for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
|
||||||
|
struct userfaultfd_ctx *cur_uffd_ctx =
|
||||||
|
rcu_dereference_protected(cur->vm_userfaultfd_ctx.ctx,
|
||||||
|
lockdep_is_held(&mm->mmap_lock));
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
|
BUG_ON(!!cur_uffd_ctx ^
|
||||||
!!(cur->vm_flags & __VM_UFFD_FLAGS));
|
!!(cur->vm_flags & __VM_UFFD_FLAGS));
|
||||||
|
|
||||||
/* check not compatible vmas */
|
/* check not compatible vmas */
|
||||||
@@ -1395,8 +1437,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||||||
* wouldn't know which one to deliver the userfaults to.
|
* wouldn't know which one to deliver the userfaults to.
|
||||||
*/
|
*/
|
||||||
ret = -EBUSY;
|
ret = -EBUSY;
|
||||||
if (cur->vm_userfaultfd_ctx.ctx &&
|
if (cur_uffd_ctx && cur_uffd_ctx != ctx)
|
||||||
cur->vm_userfaultfd_ctx.ctx != ctx)
|
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1414,18 +1455,20 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
do {
|
do {
|
||||||
|
struct userfaultfd_ctx *cur_uffd_ctx =
|
||||||
|
rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
|
||||||
|
lockdep_is_held(&mm->mmap_lock));
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
BUG_ON(!vma_can_userfault(vma, vm_flags));
|
BUG_ON(!vma_can_userfault(vma, vm_flags));
|
||||||
BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
|
BUG_ON(cur_uffd_ctx && cur_uffd_ctx != ctx);
|
||||||
vma->vm_userfaultfd_ctx.ctx != ctx);
|
|
||||||
WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
|
WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Nothing to do: this vma is already registered into this
|
* Nothing to do: this vma is already registered into this
|
||||||
* userfaultfd and with the right tracking mode too.
|
* userfaultfd and with the right tracking mode too.
|
||||||
*/
|
*/
|
||||||
if (vma->vm_userfaultfd_ctx.ctx == ctx &&
|
if (cur_uffd_ctx == ctx &&
|
||||||
(vma->vm_flags & vm_flags) == vm_flags)
|
(vma->vm_flags & vm_flags) == vm_flags)
|
||||||
goto skip;
|
goto skip;
|
||||||
|
|
||||||
@@ -1461,7 +1504,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||||||
*/
|
*/
|
||||||
vm_write_begin(vma);
|
vm_write_begin(vma);
|
||||||
WRITE_ONCE(vma->vm_flags, new_flags);
|
WRITE_ONCE(vma->vm_flags, new_flags);
|
||||||
vma->vm_userfaultfd_ctx.ctx = ctx;
|
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, ctx);
|
||||||
vm_write_end(vma);
|
vm_write_end(vma);
|
||||||
|
|
||||||
if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma))
|
if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma))
|
||||||
@@ -1561,7 +1604,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||||||
for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
|
for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
|
BUG_ON(!!rcu_access_pointer(cur->vm_userfaultfd_ctx.ctx) ^
|
||||||
!!(cur->vm_flags & __VM_UFFD_FLAGS));
|
!!(cur->vm_flags & __VM_UFFD_FLAGS));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1583,6 +1626,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
do {
|
do {
|
||||||
|
struct userfaultfd_ctx *cur_uffd_ctx =
|
||||||
|
rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
|
||||||
|
lockdep_is_held(&mm->mmap_lock));
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
BUG_ON(!vma_can_userfault(vma, vma->vm_flags));
|
BUG_ON(!vma_can_userfault(vma, vma->vm_flags));
|
||||||
@@ -1591,7 +1637,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||||||
* Nothing to do: this vma is already registered into this
|
* Nothing to do: this vma is already registered into this
|
||||||
* userfaultfd and with the right tracking mode too.
|
* userfaultfd and with the right tracking mode too.
|
||||||
*/
|
*/
|
||||||
if (!vma->vm_userfaultfd_ctx.ctx)
|
if (!cur_uffd_ctx)
|
||||||
goto skip;
|
goto skip;
|
||||||
|
|
||||||
WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
|
WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
|
||||||
@@ -1610,7 +1656,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||||||
struct userfaultfd_wake_range range;
|
struct userfaultfd_wake_range range;
|
||||||
range.start = start;
|
range.start = start;
|
||||||
range.len = vma_end - start;
|
range.len = vma_end - start;
|
||||||
wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range);
|
wake_userfault(cur_uffd_ctx, &range);
|
||||||
}
|
}
|
||||||
|
|
||||||
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
|
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
|
||||||
@@ -1641,7 +1687,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||||||
*/
|
*/
|
||||||
vm_write_begin(vma);
|
vm_write_begin(vma);
|
||||||
WRITE_ONCE(vma->vm_flags, new_flags);
|
WRITE_ONCE(vma->vm_flags, new_flags);
|
||||||
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL);
|
||||||
vm_write_end(vma);
|
vm_write_end(vma);
|
||||||
|
|
||||||
skip:
|
skip:
|
||||||
|
@@ -292,7 +292,7 @@ struct vm_region {
|
|||||||
#ifdef CONFIG_USERFAULTFD
|
#ifdef CONFIG_USERFAULTFD
|
||||||
#define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) { NULL, })
|
#define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) { NULL, })
|
||||||
struct vm_userfaultfd_ctx {
|
struct vm_userfaultfd_ctx {
|
||||||
struct userfaultfd_ctx *ctx;
|
struct userfaultfd_ctx __rcu *ctx;
|
||||||
};
|
};
|
||||||
#else /* CONFIG_USERFAULTFD */
|
#else /* CONFIG_USERFAULTFD */
|
||||||
#define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) {})
|
#define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) {})
|
||||||
|
@@ -36,6 +36,9 @@
|
|||||||
extern int sysctl_unprivileged_userfaultfd;
|
extern int sysctl_unprivileged_userfaultfd;
|
||||||
|
|
||||||
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
|
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
|
||||||
|
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
|
||||||
|
extern bool userfaultfd_using_sigbus(struct vm_area_struct *vma);
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The mode of operation for __mcopy_atomic and its helpers.
|
* The mode of operation for __mcopy_atomic and its helpers.
|
||||||
@@ -75,7 +78,7 @@ extern int mwriteprotect_range(struct mm_struct *dst_mm,
|
|||||||
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
|
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
|
||||||
struct vm_userfaultfd_ctx vm_ctx)
|
struct vm_userfaultfd_ctx vm_ctx)
|
||||||
{
|
{
|
||||||
return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx;
|
return rcu_access_pointer(vma->vm_userfaultfd_ctx.ctx) == vm_ctx.ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -154,6 +157,13 @@ static inline vm_fault_t handle_userfault(struct vm_fault *vmf,
|
|||||||
return VM_FAULT_SIGBUS;
|
return VM_FAULT_SIGBUS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
|
||||||
|
static inline bool userfaultfd_using_sigbus(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
|
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
|
||||||
struct vm_userfaultfd_ctx vm_ctx)
|
struct vm_userfaultfd_ctx vm_ctx)
|
||||||
{
|
{
|
||||||
|
34
mm/memory.c
34
mm/memory.c
@@ -5040,6 +5040,7 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
|
|||||||
pud_t pudval;
|
pud_t pudval;
|
||||||
int seq;
|
int seq;
|
||||||
vm_fault_t ret;
|
vm_fault_t ret;
|
||||||
|
bool uffd_missing_sigbus = false;
|
||||||
|
|
||||||
/* Clear flags that may lead to release the mmap_sem to retry */
|
/* Clear flags that may lead to release the mmap_sem to retry */
|
||||||
flags &= ~(FAULT_FLAG_ALLOW_RETRY|FAULT_FLAG_KILLABLE);
|
flags &= ~(FAULT_FLAG_ALLOW_RETRY|FAULT_FLAG_KILLABLE);
|
||||||
@@ -5052,20 +5053,31 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
|
|||||||
return VM_FAULT_RETRY;
|
return VM_FAULT_RETRY;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!vmf_allows_speculation(&vmf))
|
|
||||||
return VM_FAULT_RETRY;
|
|
||||||
|
|
||||||
vmf.vma_flags = READ_ONCE(vmf.vma->vm_flags);
|
vmf.vma_flags = READ_ONCE(vmf.vma->vm_flags);
|
||||||
vmf.vma_page_prot = READ_ONCE(vmf.vma->vm_page_prot);
|
vmf.vma_page_prot = READ_ONCE(vmf.vma->vm_page_prot);
|
||||||
|
|
||||||
#ifdef CONFIG_USERFAULTFD
|
#ifdef CONFIG_USERFAULTFD
|
||||||
/* Can't call userland page fault handler in the speculative path */
|
/*
|
||||||
|
* Only support SPF for SIGBUS+MISSING userfaults in private anonymous
|
||||||
|
* VMAs. Rest all should be retried with mmap_lock.
|
||||||
|
*/
|
||||||
if (unlikely(vmf.vma_flags & __VM_UFFD_FLAGS)) {
|
if (unlikely(vmf.vma_flags & __VM_UFFD_FLAGS)) {
|
||||||
trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
|
uffd_missing_sigbus = vma_is_anonymous(vmf.vma) &&
|
||||||
return VM_FAULT_RETRY;
|
(vmf.vma_flags & VM_UFFD_MISSING) &&
|
||||||
|
userfaultfd_using_sigbus(vmf.vma);
|
||||||
|
if (!uffd_missing_sigbus) {
|
||||||
|
trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
|
||||||
|
return VM_FAULT_RETRY;
|
||||||
|
}
|
||||||
|
/* Not having anon_vma implies that the PTE is missing */
|
||||||
|
if (!vmf.vma->anon_vma)
|
||||||
|
return VM_FAULT_SIGBUS;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (!vmf_allows_speculation(&vmf))
|
||||||
|
return VM_FAULT_RETRY;
|
||||||
|
|
||||||
if (vmf.vma_flags & VM_GROWSDOWN || vmf.vma_flags & VM_GROWSUP) {
|
if (vmf.vma_flags & VM_GROWSDOWN || vmf.vma_flags & VM_GROWSUP) {
|
||||||
/*
|
/*
|
||||||
* This could be detected by the check address against VMA's
|
* This could be detected by the check address against VMA's
|
||||||
@@ -5183,6 +5195,9 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
|
|||||||
|
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
|
|
||||||
|
if (!vmf.pte && uffd_missing_sigbus)
|
||||||
|
return VM_FAULT_SIGBUS;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to re-validate the VMA after checking the bounds, otherwise
|
* We need to re-validate the VMA after checking the bounds, otherwise
|
||||||
* we might have a false positive on the bounds.
|
* we might have a false positive on the bounds.
|
||||||
@@ -5216,7 +5231,12 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
|
|||||||
out_walk:
|
out_walk:
|
||||||
trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
|
trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
return VM_FAULT_RETRY;
|
/*
|
||||||
|
* Failing page-table walk is similar to page-missing so give an
|
||||||
|
* opportunity to SIGBUS+MISSING userfault to handle it before retrying
|
||||||
|
* with mmap_lock
|
||||||
|
*/
|
||||||
|
return uffd_missing_sigbus ? VM_FAULT_SIGBUS : VM_FAULT_RETRY;
|
||||||
|
|
||||||
out_segv:
|
out_segv:
|
||||||
trace_spf_vma_access(_RET_IP_, vmf.vma, address);
|
trace_spf_vma_access(_RET_IP_, vmf.vma, address);
|
||||||
|
@@ -42,7 +42,7 @@ struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
|
|||||||
* enforce the VM_MAYWRITE check done at uffd registration
|
* enforce the VM_MAYWRITE check done at uffd registration
|
||||||
* time.
|
* time.
|
||||||
*/
|
*/
|
||||||
if (!dst_vma->vm_userfaultfd_ctx.ctx)
|
if (!rcu_access_pointer(dst_vma->vm_userfaultfd_ctx.ctx))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
return dst_vma;
|
return dst_vma;
|
||||||
|
Reference in New Issue
Block a user