Files
android_kernel_xiaomi_sm8450/include/linux/dax.h
Jan Kara ac401cc782 dax: New fault locking
Currently DAX page fault locking is racy.

CPU0 (write fault)		CPU1 (read fault)

__dax_fault()			__dax_fault()
  get_block(inode, block, &bh, 0) -> not mapped
				  get_block(inode, block, &bh, 0)
				    -> not mapped
  if (!buffer_mapped(&bh))
    if (vmf->flags & FAULT_FLAG_WRITE)
      get_block(inode, block, &bh, 1) -> allocates blocks
  if (page) -> no
				  if (!buffer_mapped(&bh))
				    if (vmf->flags & FAULT_FLAG_WRITE) {
				    } else {
				      dax_load_hole();
				    }
  dax_insert_mapping()

And we are in a situation where we fail in dax_radix_entry() with -EIO.

Another problem with the current DAX page fault locking is that there is
no race-free way to clear dirty tag in the radix tree. We can always
end up with clean radix tree and dirty data in CPU cache.

We fix the first problem by introducing locking of exceptional radix
tree entries in DAX mappings acting very similarly to page lock and thus
synchronizing properly faults against the same mapping index. The same
lock can later be used to avoid races when clearing radix tree dirty
tag.

Reviewed-by: NeilBrown <neilb@suse.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
2016-05-19 15:20:54 -06:00

70 lines
2.4 KiB
C

#ifndef _LINUX_DAX_H
#define _LINUX_DAX_H
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/radix-tree.h>
#include <asm/pgtable.h>
/* We use lowest available exceptional entry bit for locking */
#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
get_block_t, dio_iodone_t, int flags);
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
int dax_truncate_page(struct inode *, loff_t from, get_block_t);
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
void dax_wake_mapping_entry_waiter(struct address_space *mapping,
pgoff_t index, bool wake_all);
#ifdef CONFIG_FS_DAX
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
unsigned int offset, unsigned int length);
#else
static inline struct page *read_dax_sector(struct block_device *bdev,
sector_t n)
{
return ERR_PTR(-ENXIO);
}
static inline int __dax_zero_page_range(struct block_device *bdev,
sector_t sector, unsigned int offset, unsigned int length)
{
return -ENXIO;
}
#endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
unsigned int flags, get_block_t);
int __dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
unsigned int flags, get_block_t);
#else
static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, unsigned int flags, get_block_t gb)
{
return VM_FAULT_FALLBACK;
}
#define __dax_pmd_fault dax_pmd_fault
#endif
int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb)
#define __dax_mkwrite(vma, vmf, gb) __dax_fault(vma, vmf, gb)
static inline bool vma_is_dax(struct vm_area_struct *vma)
{
return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
}
static inline bool dax_mapping(struct address_space *mapping)
{
return mapping->host && IS_DAX(mapping->host);
}
struct writeback_control;
int dax_writeback_mapping_range(struct address_space *mapping,
struct block_device *bdev, struct writeback_control *wbc);
#endif