BACKPORT: FROMGIT: userfaultfd: add UFFDIO_CONTINUE ioctl
This ioctl is how userspace ought to resolve "minor" userfaults. The idea is, userspace is notified that a minor fault has occurred. It might change the contents of the page using its second non-UFFD mapping, or not. Then, it calls UFFDIO_CONTINUE to tell the kernel "I have ensured the page contents are correct, carry on setting up the mapping". Note that it doesn't make much sense to use UFFDIO_{COPY,ZEROPAGE} for MINOR registered VMAs. ZEROPAGE maps the VMA to the zero page; but in the minor fault case, we already have some pre-existing underlying page. Likewise, UFFDIO_COPY isn't useful if we have a second non-UFFD mapping. We'd just use memcpy() or similar instead. It turns out hugetlb_mcopy_atomic_pte() already does very close to what we want, if an existing page is provided via `struct page **pagep`. We already special-case the behavior a bit for the UFFDIO_ZEROPAGE case, so just extend that design: add an enum for the three modes of operation, and make the small adjustments needed for the MCOPY_ATOMIC_CONTINUE case. (Basically, look up the existing page, and avoid adding the existing page to the page cache or calling set_page_huge_active() on it.) Link: https://lkml.kernel.org/r/20210301222728.176417-5-axelrasmussen@google.com Signed-off-by: Axel Rasmussen <axelrasmussen@google.com> Reviewed-by: Peter Xu <peterx@redhat.com> Cc: Adam Ruprecht <ruprecht@google.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Cannon Matthews <cannonmatthews@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chinwen Chang <chinwen.chang@mediatek.com> Cc: David Rientjes <rientjes@google.com> Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com> Cc: Huang Ying <ying.huang@intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jann Horn <jannh@google.com> Cc: Jerome Glisse <jglisse@redhat.com> Cc: Kirill A. Shutemov <kirill@shutemov.name> Cc: Lokesh Gidra <lokeshgidra@google.com> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: "Michal Koutn" <mkoutny@suse.com> Cc: Michel Lespinasse <walken@google.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Mina Almasry <almasrymina@google.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Oliver Upton <oupton@google.com> Cc: Shaohua Li <shli@fb.com> Cc: Shawn Anastasio <shawn@anastas.io> Cc: Steven Price <steven.price@arm.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> (cherry picked from commit 14ea86439abaf3423cd9b6712ed5ce8451d2d181 https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git akpm) Link: https://lore.kernel.org/patchwork/patch/1388136/ Conflicts: mm/hugetlb.c (8f251a3d5ce3bdea73bd045ed35db64f32e0d0d9 is not cherry-picked yet so switched SetHPageMigratable() to set_active_huge_page()) Signed-off-by: Lokesh Gidra <lokeshgidra@google.com> Bug: 160737021 Bug: 169683130 Change-Id: I45b62959dcb1d343154cb831113a26e47e77c8af
This commit is contained in:

committed by
Todd Kjos

parent
e6bf076c2a
commit
4a5cf92412
40
mm/hugetlb.c
40
mm/hugetlb.c
@@ -39,7 +39,6 @@
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/hugetlb_cgroup.h>
|
||||
#include <linux/node.h>
|
||||
#include <linux/userfaultfd_k.h>
|
||||
#include <linux/page_owner.h>
|
||||
#include "internal.h"
|
||||
|
||||
@@ -4716,8 +4715,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
|
||||
struct vm_area_struct *dst_vma,
|
||||
unsigned long dst_addr,
|
||||
unsigned long src_addr,
|
||||
enum mcopy_atomic_mode mode,
|
||||
struct page **pagep)
|
||||
{
|
||||
bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
|
||||
struct address_space *mapping;
|
||||
pgoff_t idx;
|
||||
unsigned long size;
|
||||
@@ -4727,8 +4728,17 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
|
||||
spinlock_t *ptl;
|
||||
int ret;
|
||||
struct page *page;
|
||||
int writable;
|
||||
|
||||
if (!*pagep) {
|
||||
mapping = dst_vma->vm_file->f_mapping;
|
||||
idx = vma_hugecache_offset(h, dst_vma, dst_addr);
|
||||
|
||||
if (is_continue) {
|
||||
ret = -EFAULT;
|
||||
page = find_lock_page(mapping, idx);
|
||||
if (!page)
|
||||
goto out;
|
||||
} else if (!*pagep) {
|
||||
ret = -ENOMEM;
|
||||
page = alloc_huge_page(dst_vma, dst_addr, 0);
|
||||
if (IS_ERR(page))
|
||||
@@ -4757,13 +4767,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
|
||||
*/
|
||||
__SetPageUptodate(page);
|
||||
|
||||
mapping = dst_vma->vm_file->f_mapping;
|
||||
idx = vma_hugecache_offset(h, dst_vma, dst_addr);
|
||||
|
||||
/*
|
||||
* If shared, add to page cache
|
||||
*/
|
||||
if (vm_shared) {
|
||||
/* Add shared, newly allocated pages to the page cache. */
|
||||
if (vm_shared && !is_continue) {
|
||||
size = i_size_read(mapping->host) >> huge_page_shift(h);
|
||||
ret = -EFAULT;
|
||||
if (idx >= size)
|
||||
@@ -4808,8 +4813,14 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
|
||||
hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
|
||||
}
|
||||
|
||||
_dst_pte = make_huge_pte(dst_vma, page, dst_vma->vm_flags & VM_WRITE);
|
||||
if (dst_vma->vm_flags & VM_WRITE)
|
||||
/* For CONTINUE on a non-shared VMA, don't set VM_WRITE for CoW. */
|
||||
if (is_continue && !vm_shared)
|
||||
writable = 0;
|
||||
else
|
||||
writable = dst_vma->vm_flags & VM_WRITE;
|
||||
|
||||
_dst_pte = make_huge_pte(dst_vma, page, writable);
|
||||
if (writable)
|
||||
_dst_pte = huge_pte_mkdirty(_dst_pte);
|
||||
_dst_pte = pte_mkyoung(_dst_pte);
|
||||
|
||||
@@ -4823,15 +4834,16 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
|
||||
update_mmu_cache(dst_vma, dst_addr, dst_pte);
|
||||
|
||||
spin_unlock(ptl);
|
||||
set_page_huge_active(page);
|
||||
if (vm_shared)
|
||||
if (!is_continue)
|
||||
set_page_huge_active(page);
|
||||
if (vm_shared || is_continue)
|
||||
unlock_page(page);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
out_release_unlock:
|
||||
spin_unlock(ptl);
|
||||
if (vm_shared)
|
||||
if (vm_shared || is_continue)
|
||||
unlock_page(page);
|
||||
out_release_nounlock:
|
||||
put_page(page);
|
||||
|
Reference in New Issue
Block a user