Merge tag 'dax-locking-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull DAX locking updates from Ross Zwisler: "Filesystem DAX locking for 4.7 - We use a bit in an exceptional radix tree entry as a lock bit and use it similarly to how page lock is used for normal faults. This fixes races between hole instantiation and read faults of the same index. - Filesystem DAX PMD faults are disabled, and will be re-enabled when PMD locking is implemented" * tag 'dax-locking-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: dax: Remove i_mmap_lock protection dax: Use radix tree entry lock to protect cow faults dax: New fault locking dax: Allow DAX code to replace exceptional entries dax: Define DAX lock bit for radix tree exceptional entry dax: Make huge page handling depend of CONFIG_BROKEN dax: Fix condition for filling of PMD holes
This commit is contained in:
40
mm/memory.c
40
mm/memory.c
@@ -63,6 +63,7 @@
|
||||
#include <linux/dma-debug.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/userfaultfd_k.h>
|
||||
#include <linux/dax.h>
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <asm/mmu_context.h>
|
||||
@@ -2492,8 +2493,6 @@ void unmap_mapping_range(struct address_space *mapping,
|
||||
if (details.last_index < details.first_index)
|
||||
details.last_index = ULONG_MAX;
|
||||
|
||||
|
||||
/* DAX uses i_mmap_lock to serialise file truncate vs page fault */
|
||||
i_mmap_lock_write(mapping);
|
||||
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
|
||||
unmap_mapping_range_tree(&mapping->i_mmap, &details);
|
||||
@@ -2825,7 +2824,8 @@ oom:
|
||||
*/
|
||||
static int __do_fault(struct vm_area_struct *vma, unsigned long address,
|
||||
pgoff_t pgoff, unsigned int flags,
|
||||
struct page *cow_page, struct page **page)
|
||||
struct page *cow_page, struct page **page,
|
||||
void **entry)
|
||||
{
|
||||
struct vm_fault vmf;
|
||||
int ret;
|
||||
@@ -2840,8 +2840,10 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
|
||||
ret = vma->vm_ops->fault(vma, &vmf);
|
||||
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
||||
return ret;
|
||||
if (!vmf.page)
|
||||
goto out;
|
||||
if (ret & VM_FAULT_DAX_LOCKED) {
|
||||
*entry = vmf.entry;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (unlikely(PageHWPoison(vmf.page))) {
|
||||
if (ret & VM_FAULT_LOCKED)
|
||||
@@ -2855,7 +2857,6 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
|
||||
else
|
||||
VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page);
|
||||
|
||||
out:
|
||||
*page = vmf.page;
|
||||
return ret;
|
||||
}
|
||||
@@ -3048,7 +3049,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
}
|
||||
|
||||
ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
|
||||
ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
|
||||
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
||||
return ret;
|
||||
|
||||
@@ -3071,6 +3072,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
|
||||
{
|
||||
struct page *fault_page, *new_page;
|
||||
void *fault_entry;
|
||||
struct mem_cgroup *memcg;
|
||||
spinlock_t *ptl;
|
||||
pte_t *pte;
|
||||
@@ -3088,26 +3090,24 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
return VM_FAULT_OOM;
|
||||
}
|
||||
|
||||
ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page);
|
||||
ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page,
|
||||
&fault_entry);
|
||||
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
||||
goto uncharge_out;
|
||||
|
||||
if (fault_page)
|
||||
if (!(ret & VM_FAULT_DAX_LOCKED))
|
||||
copy_user_highpage(new_page, fault_page, address, vma);
|
||||
__SetPageUptodate(new_page);
|
||||
|
||||
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
|
||||
if (unlikely(!pte_same(*pte, orig_pte))) {
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
if (fault_page) {
|
||||
if (!(ret & VM_FAULT_DAX_LOCKED)) {
|
||||
unlock_page(fault_page);
|
||||
put_page(fault_page);
|
||||
} else {
|
||||
/*
|
||||
* The fault handler has no page to lock, so it holds
|
||||
* i_mmap_lock for read to protect against truncate.
|
||||
*/
|
||||
i_mmap_unlock_read(vma->vm_file->f_mapping);
|
||||
dax_unlock_mapping_entry(vma->vm_file->f_mapping,
|
||||
pgoff);
|
||||
}
|
||||
goto uncharge_out;
|
||||
}
|
||||
@@ -3115,15 +3115,11 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
mem_cgroup_commit_charge(new_page, memcg, false, false);
|
||||
lru_cache_add_active_or_unevictable(new_page, vma);
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
if (fault_page) {
|
||||
if (!(ret & VM_FAULT_DAX_LOCKED)) {
|
||||
unlock_page(fault_page);
|
||||
put_page(fault_page);
|
||||
} else {
|
||||
/*
|
||||
* The fault handler has no page to lock, so it holds
|
||||
* i_mmap_lock for read to protect against truncate.
|
||||
*/
|
||||
i_mmap_unlock_read(vma->vm_file->f_mapping);
|
||||
dax_unlock_mapping_entry(vma->vm_file->f_mapping, pgoff);
|
||||
}
|
||||
return ret;
|
||||
uncharge_out:
|
||||
@@ -3143,7 +3139,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
int dirtied = 0;
|
||||
int ret, tmp;
|
||||
|
||||
ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
|
||||
ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
|
||||
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
||||
return ret;
|
||||
|
||||
|
Reference in New Issue
Block a user