ext4: Support for synchronous DAX faults
We return IOMAP_F_DIRTY flag from ext4_iomap_begin() when asked to prepare blocks for writing and the inode has some uncommitted metadata changes. In the fault handler ext4_dax_fault() we then detect this case (through VM_FAULT_NEEDDSYNC return value) and call helper dax_finish_sync_fault() to flush metadata changes and insert page table entry. Note that this will also dirty corresponding radix tree entry which is what we want - fsync(2) will still provide data integrity guarantees for applications not using userspace flushing. And applications using userspace flushing can avoid calling fsync(2) and thus avoid the performance overhead. Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Cette révision appartient à :
@@ -26,6 +26,7 @@
|
||||
#include <linux/quotaops.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/mman.h>
|
||||
#include "ext4.h"
|
||||
#include "ext4_jbd2.h"
|
||||
#include "xattr.h"
|
||||
@@ -295,6 +296,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
|
||||
*/
|
||||
bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
|
||||
(vmf->vma->vm_flags & VM_SHARED);
|
||||
pfn_t pfn;
|
||||
|
||||
if (write) {
|
||||
sb_start_pagefault(sb);
|
||||
@@ -310,9 +312,12 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
|
||||
} else {
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
}
|
||||
result = dax_iomap_fault(vmf, pe_size, NULL, &ext4_iomap_ops);
|
||||
result = dax_iomap_fault(vmf, pe_size, &pfn, &ext4_iomap_ops);
|
||||
if (write) {
|
||||
ext4_journal_stop(handle);
|
||||
/* Handling synchronous page fault? */
|
||||
if (result & VM_FAULT_NEEDDSYNC)
|
||||
result = dax_finish_sync_fault(vmf, pe_size, pfn);
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
sb_end_pagefault(sb);
|
||||
} else {
|
||||
@@ -350,6 +355,13 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* We don't support synchronous mappings for non-DAX files. At least
|
||||
* until someone comes with a sensible use case.
|
||||
*/
|
||||
if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
file_accessed(file);
|
||||
if (IS_DAX(file_inode(file))) {
|
||||
vma->vm_ops = &ext4_dax_vm_ops;
|
||||
@@ -719,6 +731,7 @@ const struct file_operations ext4_file_operations = {
|
||||
.compat_ioctl = ext4_compat_ioctl,
|
||||
#endif
|
||||
.mmap = ext4_file_mmap,
|
||||
.mmap_supported_flags = MAP_SYNC,
|
||||
.open = ext4_file_open,
|
||||
.release = ext4_release_file,
|
||||
.fsync = ext4_sync_file,
|
||||
|
@@ -3394,6 +3394,19 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
static bool ext4_inode_datasync_dirty(struct inode *inode)
|
||||
{
|
||||
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
|
||||
|
||||
if (journal)
|
||||
return !jbd2_transaction_committed(journal,
|
||||
EXT4_I(inode)->i_datasync_tid);
|
||||
/* Any metadata buffers to write? */
|
||||
if (!list_empty(&inode->i_mapping->private_list))
|
||||
return true;
|
||||
return inode->i_state & I_DIRTY_DATASYNC;
|
||||
}
|
||||
|
||||
static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
unsigned flags, struct iomap *iomap)
|
||||
{
|
||||
@@ -3466,6 +3479,8 @@ retry:
|
||||
}
|
||||
|
||||
iomap->flags = 0;
|
||||
if ((flags & IOMAP_WRITE) && ext4_inode_datasync_dirty(inode))
|
||||
iomap->flags |= IOMAP_F_DIRTY;
|
||||
iomap->bdev = inode->i_sb->s_bdev;
|
||||
iomap->dax_dev = sbi->s_daxdev;
|
||||
iomap->offset = first_block << blkbits;
|
||||
|
Référencer dans un nouveau ticket
Bloquer un utilisateur