Merge tag 'xfs-4.20-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull vfs dedup fixes from Dave Chinner:
 "This reworks the vfs data cloning infrastructure.

  We discovered many issues with these interfaces late in the 4.19 cycle
  - the worst of them (data corruption, setuid stripping) were fixed for
  XFS in 4.19-rc8, but a larger rework of the infrastructure fixing all
  the problems was needed. That rework is the contents of this pull
  request.

  Rework the vfs_clone_file_range and vfs_dedupe_file_range
  infrastructure to use a common .remap_file_range method and supply
  generic bounds and sanity checking functions that are shared with the
  data write path. The current VFS infrastructure has problems with
  rlimit, LFS file sizes, file time stamps, maximum filesystem file
  sizes, stripping setuid bits, etc and so they are addressed in these
  commits.

  We also introduce the ability for the ->remap_file_range methods to
  return short clones so that clones for vfs_copy_file_range() don't get
  rejected if the entire range can't be cloned. It also allows
  filesystems to sliently skip deduplication of partial EOF blocks if
  they are not capable of doing so without requiring errors to be thrown
  to userspace.

  Existing filesystems are converted to user the new remap_file_range
  method, and both XFS and ocfs2 are modified to make use of the new
  generic checking infrastructure"

* tag 'xfs-4.20-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (28 commits)
  xfs: remove [cm]time update from reflink calls
  xfs: remove xfs_reflink_remap_range
  xfs: remove redundant remap partial EOF block checks
  xfs: support returning partial reflink results
  xfs: clean up xfs_reflink_remap_blocks call site
  xfs: fix pagecache truncation prior to reflink
  ocfs2: remove ocfs2_reflink_remap_range
  ocfs2: support partial clone range and dedupe range
  ocfs2: fix pagecache truncation prior to reflink
  ocfs2: truncate page cache for clone destination file before remapping
  vfs: clean up generic_remap_file_range_prep return value
  vfs: hide file range comparison function
  vfs: enable remap callers that can handle short operations
  vfs: plumb remap flags through the vfs dedupe functions
  vfs: plumb remap flags through the vfs clone functions
  vfs: make remap_file_range functions take and return bytes completed
  vfs: remap helper should update destination inode metadata
  vfs: pass remap flags to generic_remap_checks
  vfs: pass remap flags to generic_remap_file_range_prep
  vfs: combine the clone and dedupe into a single remap_file_range
  ...
This commit is contained in:
Linus Torvalds
2018-11-02 09:33:08 -07:00
20 changed files with 749 additions and 611 deletions

View File

@@ -4466,9 +4466,9 @@ out:
}
/* Update destination inode size, if necessary. */
static int ocfs2_reflink_update_dest(struct inode *dest,
struct buffer_head *d_bh,
loff_t newlen)
int ocfs2_reflink_update_dest(struct inode *dest,
struct buffer_head *d_bh,
loff_t newlen)
{
handle_t *handle;
int ret;
@@ -4505,14 +4505,14 @@ out_commit:
}
/* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */
static int ocfs2_reflink_remap_extent(struct inode *s_inode,
struct buffer_head *s_bh,
loff_t pos_in,
struct inode *t_inode,
struct buffer_head *t_bh,
loff_t pos_out,
loff_t len,
struct ocfs2_cached_dealloc_ctxt *dealloc)
static loff_t ocfs2_reflink_remap_extent(struct inode *s_inode,
struct buffer_head *s_bh,
loff_t pos_in,
struct inode *t_inode,
struct buffer_head *t_bh,
loff_t pos_out,
loff_t len,
struct ocfs2_cached_dealloc_ctxt *dealloc)
{
struct ocfs2_extent_tree s_et;
struct ocfs2_extent_tree t_et;
@@ -4520,8 +4520,9 @@ static int ocfs2_reflink_remap_extent(struct inode *s_inode,
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_refcount_tree *ref_tree;
struct ocfs2_super *osb;
loff_t remapped_bytes = 0;
loff_t pstart, plen;
u32 p_cluster, num_clusters, slast, spos, tpos;
u32 p_cluster, num_clusters, slast, spos, tpos, remapped_clus = 0;
unsigned int ext_flags;
int ret = 0;
@@ -4603,30 +4604,34 @@ static int ocfs2_reflink_remap_extent(struct inode *s_inode,
next_loop:
spos += num_clusters;
tpos += num_clusters;
remapped_clus += num_clusters;
}
out:
return ret;
goto out;
out_unlock_refcount:
ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
brelse(ref_root_bh);
return ret;
out:
remapped_bytes = ocfs2_clusters_to_bytes(t_inode->i_sb, remapped_clus);
remapped_bytes = min_t(loff_t, len, remapped_bytes);
return remapped_bytes > 0 ? remapped_bytes : ret;
}
/* Set up refcount tree and remap s_inode to t_inode. */
static int ocfs2_reflink_remap_blocks(struct inode *s_inode,
struct buffer_head *s_bh,
loff_t pos_in,
struct inode *t_inode,
struct buffer_head *t_bh,
loff_t pos_out,
loff_t len)
loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode,
struct buffer_head *s_bh,
loff_t pos_in,
struct inode *t_inode,
struct buffer_head *t_bh,
loff_t pos_out,
loff_t len)
{
struct ocfs2_cached_dealloc_ctxt dealloc;
struct ocfs2_super *osb;
struct ocfs2_dinode *dis;
struct ocfs2_dinode *dit;
int ret;
loff_t ret;
osb = OCFS2_SB(s_inode->i_sb);
dis = (struct ocfs2_dinode *)s_bh->b_data;
@@ -4698,7 +4703,7 @@ static int ocfs2_reflink_remap_blocks(struct inode *s_inode,
/* Actually remap extents now. */
ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh,
pos_out, len, &dealloc);
if (ret) {
if (ret < 0) {
mlog_errno(ret);
goto out;
}
@@ -4713,10 +4718,10 @@ out:
}
/* Lock an inode and grab a bh pointing to the inode. */
static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
struct buffer_head **bh1,
struct inode *t_inode,
struct buffer_head **bh2)
int ocfs2_reflink_inodes_lock(struct inode *s_inode,
struct buffer_head **bh1,
struct inode *t_inode,
struct buffer_head **bh2)
{
struct inode *inode1;
struct inode *inode2;
@@ -4801,10 +4806,10 @@ out_i1:
}
/* Unlock both inodes and release buffers. */
static void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
struct buffer_head *s_bh,
struct inode *t_inode,
struct buffer_head *t_bh)
void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
struct buffer_head *s_bh,
struct inode *t_inode,
struct buffer_head *t_bh)
{
ocfs2_inode_unlock(s_inode, 1);
ocfs2_rw_unlock(s_inode, 1);
@@ -4816,82 +4821,3 @@ static void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
}
unlock_two_nondirectories(s_inode, t_inode);
}
/* Link a range of blocks from one file to another. */
int ocfs2_reflink_remap_range(struct file *file_in,
loff_t pos_in,
struct file *file_out,
loff_t pos_out,
u64 len,
bool is_dedupe)
{
struct inode *inode_in = file_inode(file_in);
struct inode *inode_out = file_inode(file_out);
struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
struct buffer_head *in_bh = NULL, *out_bh = NULL;
bool same_inode = (inode_in == inode_out);
ssize_t ret;
if (!ocfs2_refcount_tree(osb))
return -EOPNOTSUPP;
if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
return -EROFS;
/* Lock both files against IO */
ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
if (ret)
return ret;
/* Check file eligibility and prepare for block sharing. */
ret = -EINVAL;
if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
(OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
goto out_unlock;
ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
&len, is_dedupe);
if (ret <= 0)
goto out_unlock;
/* Lock out changes to the allocation maps and remap. */
down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
if (!same_inode)
down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
SINGLE_DEPTH_NESTING);
ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out,
out_bh, pos_out, len);
/* Zap any page cache for the destination file's range. */
if (!ret)
truncate_inode_pages_range(&inode_out->i_data, pos_out,
PAGE_ALIGN(pos_out + len) - 1);
up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
if (!same_inode)
up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
if (ret) {
mlog_errno(ret);
goto out_unlock;
}
/*
* Empty the extent map so that we may get the right extent
* record from the disk.
*/
ocfs2_extent_map_trunc(inode_in, 0);
ocfs2_extent_map_trunc(inode_out, 0);
ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
if (ret) {
mlog_errno(ret);
goto out_unlock;
}
ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
return 0;
out_unlock:
ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
return ret;
}