ext4: fix races between page faults and hole punching
Currently, page faults and hole punching are completely unsynchronized. This can result in page fault faulting in a page into a range that we are punching after truncate_pagecache_range() has been called and thus we can end up with a page mapped to disk blocks that will be shortly freed. Filesystem corruption will shortly follow. Note that the same race is avoided for truncate by checking page fault offset against i_size but there isn't similar mechanism available for punching holes. Fix the problem by creating new rw semaphore i_mmap_sem in inode and grab it for writing over truncate, hole punching, and other functions removing blocks from extent tree and for read over page faults. We cannot easily use i_data_sem for this since that ranks below transaction start and we need something ranking above it so that it can be held over the whole truncate / hole punching operation. Also remove various workarounds we had in the code to reduce race window when page fault could have created pages with stale mapping information. Signed-off-by: Jan Kara <jack@suse.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
@@ -910,6 +910,15 @@ struct ext4_inode_info {
|
||||
* by other means, so we have i_data_sem.
|
||||
*/
|
||||
struct rw_semaphore i_data_sem;
|
||||
/*
|
||||
* i_mmap_sem is for serializing page faults with truncate / punch hole
|
||||
* operations. We have to make sure that new page cannot be faulted in
|
||||
* a section of the inode that is being punched. We cannot easily use
|
||||
* i_data_sem for this since we need protection for the whole punch
|
||||
* operation and i_data_sem ranks below transaction start so we have
|
||||
* to occasionally drop it.
|
||||
*/
|
||||
struct rw_semaphore i_mmap_sem;
|
||||
struct inode vfs_inode;
|
||||
struct jbd2_inode *jinode;
|
||||
|
||||
@@ -2484,6 +2493,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
|
||||
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
|
||||
loff_t lstart, loff_t lend);
|
||||
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
|
||||
extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
|
||||
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
|
||||
extern void ext4_da_update_reserve_space(struct inode *inode,
|
||||
int used, int quota_claim);
|
||||
|
Reference in New Issue
Block a user