ext4: simplify io_end handling for AIO DIO

When mapping blocks for direct IO, we allocate io_end structure before
mapping blocks and store pointer to it in the inode. This creates a
requirement that any AIO DIO using io_end must be protected by i_mutex.
This created problems in the past with dioread_nolock mode which was
corrupting io_end pointers. Also io_end is allocated unnecessarily in
case where we don't need to convert any extents (which is a common case
for example when overwriting file).

We fix the problem by allocating io_end only once we return unwritten
extent from block mapping function for AIO DIO (so we can save some
pointless io_end allocations) and we pass pointer to it in bh->b_private
which generic DIO code later passes to our end IO callback. That way we
remove any need for global pointer to io_end structure and thus fix the
races.

The downside of this change is that the checking for unwritten IO in
flight in ext4_extents_can_be_merged() is more racy since we now
increment i_unwritten / set EXT4_STATE_DIO_UNWRITTEN only after dropping
i_data_sem. However the check has been racy already before because
ext4_writepages() already increment i_unwritten after dropping
i_data_sem and reserved blocks save us from hitting ENOSPC in the worst
case.

Signed-off-by: Jan Kara <jack@suse.cz>
This commit is contained in:
Jan Kara
2016-03-08 23:36:46 -05:00
committed by Theodore Ts'o
parent efe70c2951
commit 109811c20f
3 changed files with 74 additions and 104 deletions

View File

@@ -1736,6 +1736,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
*/
if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
return 0;
/*
* The check for IO to unwritten extent is somewhat racy as we
* increment i_unwritten / set EXT4_STATE_DIO_UNWRITTEN only after
* dropping i_data_sem. But reserved blocks should save us in that
* case.
*/
if (ext4_ext_is_unwritten(ex1) &&
(ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
atomic_read(&EXT4_I(inode)->i_unwritten) ||
@@ -4007,7 +4013,6 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path = *ppath;
int ret = 0;
int err = 0;
ext4_io_end_t *io = ext4_inode_aio(inode);
ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical "
"block %llu, max_blocks %u, flags %x, allocated %u\n",
@@ -4030,15 +4035,6 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
flags | EXT4_GET_BLOCKS_CONVERT);
if (ret <= 0)
goto out;
/*
* Flag the inode(non aio case) or end_io struct (aio case)
* that this IO needs to conversion to written when IO is
* completed
*/
if (io)
ext4_set_io_unwritten_flag(inode, io);
else
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
map->m_flags |= EXT4_MAP_UNWRITTEN;
goto out;
}
@@ -4283,9 +4279,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
unsigned int allocated = 0, offset = 0;
unsigned int allocated_clusters = 0;
struct ext4_allocation_request ar;
ext4_io_end_t *io = ext4_inode_aio(inode);
ext4_lblk_t cluster_offset;
int set_unwritten = 0;
bool map_from_cluster = false;
ext_debug("blocks %u/%u requested for inode %lu\n",
@@ -4482,15 +4476,6 @@ got_allocated_blocks:
if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){
ext4_ext_mark_unwritten(&newex);
map->m_flags |= EXT4_MAP_UNWRITTEN;
/*
* io_end structure was created for every IO write to an
* unwritten extent. To avoid unnecessary conversion,
* here we flag the IO that really needs the conversion.
* For non asycn direct IO case, flag the inode state
* that we need to perform conversion when IO is done.
*/
if (flags & EXT4_GET_BLOCKS_PRE_IO)
set_unwritten = 1;
}
err = 0;
@@ -4501,14 +4486,6 @@ got_allocated_blocks:
err = ext4_ext_insert_extent(handle, inode, &path,
&newex, flags);
if (!err && set_unwritten) {
if (io)
ext4_set_io_unwritten_flag(inode, io);
else
ext4_set_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN);
}
if (err && free_on_err) {
int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;