xfs: fix COW writeback race
Due to the way how xfs_iomap_write_allocate tries to convert the whole found extents from delalloc to real space we can run into a race condition with multiple threads doing writes to this same extent. For the non-COW case that is harmless as the only thing that can happen is that we call xfs_bmapi_write on an extent that has already been converted to a real allocation. For COW writes where we move the extent from the COW to the data fork after I/O completion the race is, however, not quite as harmless. In the worst case we are now calling xfs_bmapi_write on a region that contains hole in the COW work, which will trip up an assert in debug builds or lead to file system corruption in non-debug builds. This seems to be reproducible with workloads of small O_DSYNC write, although so far I've not managed to come up with a with an isolated reproducer. The fix for the issue is relatively simple: tell xfs_bmapi_write that we are only asked to convert delayed allocations and skip holes in that case. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
This commit is contained in:

committed by
Darrick J. Wong

parent
7a308bb301
commit
d2b3964a07
@@ -4514,8 +4514,6 @@ xfs_bmapi_write(
|
|||||||
int n; /* current extent index */
|
int n; /* current extent index */
|
||||||
xfs_fileoff_t obno; /* old block number (offset) */
|
xfs_fileoff_t obno; /* old block number (offset) */
|
||||||
int whichfork; /* data or attr fork */
|
int whichfork; /* data or attr fork */
|
||||||
char inhole; /* current location is hole in file */
|
|
||||||
char wasdelay; /* old extent was delayed */
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
xfs_fileoff_t orig_bno; /* original block number value */
|
xfs_fileoff_t orig_bno; /* original block number value */
|
||||||
@@ -4603,22 +4601,44 @@ xfs_bmapi_write(
|
|||||||
bma.firstblock = firstblock;
|
bma.firstblock = firstblock;
|
||||||
|
|
||||||
while (bno < end && n < *nmap) {
|
while (bno < end && n < *nmap) {
|
||||||
inhole = eof || bma.got.br_startoff > bno;
|
bool need_alloc = false, wasdelay = false;
|
||||||
wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
|
|
||||||
|
|
||||||
|
/* in hole or beyoned EOF? */
|
||||||
|
if (eof || bma.got.br_startoff > bno) {
|
||||||
|
if (flags & XFS_BMAPI_DELALLOC) {
|
||||||
|
/*
|
||||||
|
* For the COW fork we can reasonably get a
|
||||||
|
* request for converting an extent that races
|
||||||
|
* with other threads already having converted
|
||||||
|
* part of it, as there converting COW to
|
||||||
|
* regular blocks is not protected using the
|
||||||
|
* IOLOCK.
|
||||||
|
*/
|
||||||
|
ASSERT(flags & XFS_BMAPI_COWFORK);
|
||||||
|
if (!(flags & XFS_BMAPI_COWFORK)) {
|
||||||
|
error = -EIO;
|
||||||
|
goto error0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (eof || bno >= end)
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
need_alloc = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
/*
|
/*
|
||||||
* Make sure we only reflink into a hole.
|
* Make sure we only reflink into a hole.
|
||||||
*/
|
*/
|
||||||
if (flags & XFS_BMAPI_REMAP)
|
ASSERT(!(flags & XFS_BMAPI_REMAP));
|
||||||
ASSERT(inhole);
|
if (isnullstartblock(bma.got.br_startblock))
|
||||||
if (flags & XFS_BMAPI_COWFORK)
|
wasdelay = true;
|
||||||
ASSERT(!inhole);
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First, deal with the hole before the allocated space
|
* First, deal with the hole before the allocated space
|
||||||
* that we found, if any.
|
* that we found, if any.
|
||||||
*/
|
*/
|
||||||
if (inhole || wasdelay) {
|
if (need_alloc || wasdelay) {
|
||||||
bma.eof = eof;
|
bma.eof = eof;
|
||||||
bma.conv = !!(flags & XFS_BMAPI_CONVERT);
|
bma.conv = !!(flags & XFS_BMAPI_CONVERT);
|
||||||
bma.wasdel = wasdelay;
|
bma.wasdel = wasdelay;
|
||||||
|
@@ -110,6 +110,9 @@ struct xfs_extent_free_item
|
|||||||
/* Map something in the CoW fork. */
|
/* Map something in the CoW fork. */
|
||||||
#define XFS_BMAPI_COWFORK 0x200
|
#define XFS_BMAPI_COWFORK 0x200
|
||||||
|
|
||||||
|
/* Only convert delalloc space, don't allocate entirely new extents */
|
||||||
|
#define XFS_BMAPI_DELALLOC 0x400
|
||||||
|
|
||||||
#define XFS_BMAPI_FLAGS \
|
#define XFS_BMAPI_FLAGS \
|
||||||
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
|
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
|
||||||
{ XFS_BMAPI_METADATA, "METADATA" }, \
|
{ XFS_BMAPI_METADATA, "METADATA" }, \
|
||||||
@@ -120,7 +123,8 @@ struct xfs_extent_free_item
|
|||||||
{ XFS_BMAPI_CONVERT, "CONVERT" }, \
|
{ XFS_BMAPI_CONVERT, "CONVERT" }, \
|
||||||
{ XFS_BMAPI_ZERO, "ZERO" }, \
|
{ XFS_BMAPI_ZERO, "ZERO" }, \
|
||||||
{ XFS_BMAPI_REMAP, "REMAP" }, \
|
{ XFS_BMAPI_REMAP, "REMAP" }, \
|
||||||
{ XFS_BMAPI_COWFORK, "COWFORK" }
|
{ XFS_BMAPI_COWFORK, "COWFORK" }, \
|
||||||
|
{ XFS_BMAPI_DELALLOC, "DELALLOC" }
|
||||||
|
|
||||||
|
|
||||||
static inline int xfs_bmapi_aflag(int w)
|
static inline int xfs_bmapi_aflag(int w)
|
||||||
|
@@ -681,7 +681,7 @@ xfs_iomap_write_allocate(
|
|||||||
xfs_trans_t *tp;
|
xfs_trans_t *tp;
|
||||||
int nimaps;
|
int nimaps;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
int flags = 0;
|
int flags = XFS_BMAPI_DELALLOC;
|
||||||
int nres;
|
int nres;
|
||||||
|
|
||||||
if (whichfork == XFS_COW_FORK)
|
if (whichfork == XFS_COW_FORK)
|
||||||
|
Reference in New Issue
Block a user