Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: - further restructure ext4 documentation - fix up ext4's delayed allocation for bigalloc file systems - fix up some syzbot-detected races in EXT4_IOC_MOVE_EXT, EXT4_IOC_SWAP_BOOT, and ext4_remount - ... and a few other miscellaneous bugs and optimizations. * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (21 commits) ext4: fix use-after-free race in ext4_remount()'s error path ext4: cache NULL when both default_acl and acl are NULL docs: promote the ext4 data structures book to top level docs: move ext4 administrative docs to admin-guide/ jbd2: fix use after free in jbd2_log_do_checkpoint() ext4: propagate error from dquot_initialize() in EXT4_IOC_FSSETXATTR ext4: fix setattr project check in fssetxattr ioctl docs: make ext4 readme tables readable docs: fix ext4 documentation table formatting problems docs: generate a separate ext4 pdf file from the documentation ext4: convert fault handler to use vm_fault_t type ext4: initialize retries variable in ext4_da_write_inline_data_begin() ext4: fix EXT4_IOC_SWAP_BOOT ext4: fix build error when DX_DEBUG is defined ext4: fix argument checking in EXT4_IOC_MOVE_EXT ext4: fix reserved cluster accounting at page invalidation time ext4: adjust reserved cluster count when removing extents ext4: reduce reserved cluster count by number of allocated clusters ext4: fix reserved cluster accounting at delayed write time ext4: add new pending reservation mechanism ...
This commit is contained in:
@@ -284,12 +284,16 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
|
||||
error = __ext4_set_acl(handle, inode, ACL_TYPE_DEFAULT,
|
||||
default_acl, XATTR_CREATE);
|
||||
posix_acl_release(default_acl);
|
||||
} else {
|
||||
inode->i_default_acl = NULL;
|
||||
}
|
||||
if (acl) {
|
||||
if (!error)
|
||||
error = __ext4_set_acl(handle, inode, ACL_TYPE_ACCESS,
|
||||
acl, XATTR_CREATE);
|
||||
posix_acl_release(acl);
|
||||
} else {
|
||||
inode->i_acl = NULL;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
@@ -628,6 +628,7 @@ enum {
|
||||
#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
|
||||
#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
|
||||
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
|
||||
#define EXT4_FREE_BLOCKS_RERESERVE_CLUSTER 0x0040
|
||||
|
||||
/*
|
||||
* ioctl commands
|
||||
@@ -1030,6 +1031,9 @@ struct ext4_inode_info {
|
||||
ext4_lblk_t i_da_metadata_calc_last_lblock;
|
||||
int i_da_metadata_calc_len;
|
||||
|
||||
/* pending cluster reservations for bigalloc file systems */
|
||||
struct ext4_pending_tree i_pending_tree;
|
||||
|
||||
/* on-disk additional length */
|
||||
__u16 i_extra_isize;
|
||||
|
||||
@@ -1401,7 +1405,8 @@ struct ext4_sb_info {
|
||||
u32 s_min_batch_time;
|
||||
struct block_device *journal_bdev;
|
||||
#ifdef CONFIG_QUOTA
|
||||
char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */
|
||||
/* Names of quota files with journalled quota */
|
||||
char __rcu *s_qf_names[EXT4_MAXQUOTAS];
|
||||
int s_jquota_fmt; /* Format of quota to use */
|
||||
#endif
|
||||
unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
|
||||
@@ -2483,10 +2488,11 @@ extern int ext4_writepage_trans_blocks(struct inode *);
|
||||
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
|
||||
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
|
||||
loff_t lstart, loff_t lend);
|
||||
extern int ext4_page_mkwrite(struct vm_fault *vmf);
|
||||
extern int ext4_filemap_fault(struct vm_fault *vmf);
|
||||
extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
|
||||
extern vm_fault_t ext4_filemap_fault(struct vm_fault *vmf);
|
||||
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
|
||||
extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
|
||||
extern void ext4_da_release_space(struct inode *inode, int to_free);
|
||||
extern void ext4_da_update_reserve_space(struct inode *inode,
|
||||
int used, int quota_claim);
|
||||
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
|
||||
@@ -3142,10 +3148,6 @@ extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
|
||||
int flags);
|
||||
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
|
||||
extern int ext4_ext_check_inode(struct inode *inode);
|
||||
extern int ext4_find_delalloc_range(struct inode *inode,
|
||||
ext4_lblk_t lblk_start,
|
||||
ext4_lblk_t lblk_end);
|
||||
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
|
||||
extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
|
||||
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
__u64 start, __u64 len);
|
||||
@@ -3156,6 +3158,7 @@ extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
|
||||
struct inode *inode2, ext4_lblk_t lblk1,
|
||||
ext4_lblk_t lblk2, ext4_lblk_t count,
|
||||
int mark_unwritten,int *err);
|
||||
extern int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu);
|
||||
|
||||
/* move_extent.c */
|
||||
extern void ext4_double_down_write_data_sem(struct inode *first,
|
||||
|
@@ -119,6 +119,19 @@ struct ext4_ext_path {
|
||||
struct buffer_head *p_bh;
|
||||
};
|
||||
|
||||
/*
|
||||
* Used to record a portion of a cluster found at the beginning or end
|
||||
* of an extent while traversing the extent tree during space removal.
|
||||
* A partial cluster may be removed if it does not contain blocks shared
|
||||
* with extents that aren't being deleted (tofree state). Otherwise,
|
||||
* it cannot be removed (nofree state).
|
||||
*/
|
||||
struct partial_cluster {
|
||||
ext4_fsblk_t pclu; /* physical cluster number */
|
||||
ext4_lblk_t lblk; /* logical block number within logical cluster */
|
||||
enum {initial, tofree, nofree} state;
|
||||
};
|
||||
|
||||
/*
|
||||
* structure for external API
|
||||
*/
|
||||
|
@@ -2351,8 +2351,8 @@ ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
|
||||
{
|
||||
struct extent_status es;
|
||||
|
||||
ext4_es_find_delayed_extent_range(inode, hole_start,
|
||||
hole_start + hole_len - 1, &es);
|
||||
ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
|
||||
hole_start + hole_len - 1, &es);
|
||||
if (es.es_len) {
|
||||
/* There's delayed extent containing lblock? */
|
||||
if (es.es_lblk <= hole_start)
|
||||
@@ -2490,106 +2490,157 @@ static inline int get_default_free_blocks_flags(struct inode *inode)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_rereserve_cluster - increment the reserved cluster count when
|
||||
* freeing a cluster with a pending reservation
|
||||
*
|
||||
* @inode - file containing the cluster
|
||||
* @lblk - logical block in cluster to be reserved
|
||||
*
|
||||
* Increments the reserved cluster count and adjusts quota in a bigalloc
|
||||
* file system when freeing a partial cluster containing at least one
|
||||
* delayed and unwritten block. A partial cluster meeting that
|
||||
* requirement will have a pending reservation. If so, the
|
||||
* RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to
|
||||
* defer reserved and allocated space accounting to a subsequent call
|
||||
* to this function.
|
||||
*/
|
||||
static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
|
||||
dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));
|
||||
|
||||
spin_lock(&ei->i_block_reservation_lock);
|
||||
ei->i_reserved_data_blocks++;
|
||||
percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
|
||||
spin_unlock(&ei->i_block_reservation_lock);
|
||||
|
||||
percpu_counter_add(&sbi->s_freeclusters_counter, 1);
|
||||
ext4_remove_pending(inode, lblk);
|
||||
}
|
||||
|
||||
static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
||||
struct ext4_extent *ex,
|
||||
long long *partial_cluster,
|
||||
struct partial_cluster *partial,
|
||||
ext4_lblk_t from, ext4_lblk_t to)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
unsigned short ee_len = ext4_ext_get_actual_len(ex);
|
||||
ext4_fsblk_t pblk;
|
||||
int flags = get_default_free_blocks_flags(inode);
|
||||
ext4_fsblk_t last_pblk, pblk;
|
||||
ext4_lblk_t num;
|
||||
int flags;
|
||||
|
||||
/* only extent tail removal is allowed */
|
||||
if (from < le32_to_cpu(ex->ee_block) ||
|
||||
to != le32_to_cpu(ex->ee_block) + ee_len - 1) {
|
||||
ext4_error(sbi->s_sb,
|
||||
"strange request: removal(2) %u-%u from %u:%u",
|
||||
from, to, le32_to_cpu(ex->ee_block), ee_len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef EXTENTS_STATS
|
||||
spin_lock(&sbi->s_ext_stats_lock);
|
||||
sbi->s_ext_blocks += ee_len;
|
||||
sbi->s_ext_extents++;
|
||||
if (ee_len < sbi->s_ext_min)
|
||||
sbi->s_ext_min = ee_len;
|
||||
if (ee_len > sbi->s_ext_max)
|
||||
sbi->s_ext_max = ee_len;
|
||||
if (ext_depth(inode) > sbi->s_depth_max)
|
||||
sbi->s_depth_max = ext_depth(inode);
|
||||
spin_unlock(&sbi->s_ext_stats_lock);
|
||||
#endif
|
||||
|
||||
trace_ext4_remove_blocks(inode, ex, from, to, partial);
|
||||
|
||||
/*
|
||||
* if we have a partial cluster, and it's different from the
|
||||
* cluster of the last block in the extent, we free it
|
||||
*/
|
||||
last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
|
||||
|
||||
if (partial->state != initial &&
|
||||
partial->pclu != EXT4_B2C(sbi, last_pblk)) {
|
||||
if (partial->state == tofree) {
|
||||
flags = get_default_free_blocks_flags(inode);
|
||||
if (ext4_is_pending(inode, partial->lblk))
|
||||
flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
|
||||
ext4_free_blocks(handle, inode, NULL,
|
||||
EXT4_C2B(sbi, partial->pclu),
|
||||
sbi->s_cluster_ratio, flags);
|
||||
if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
|
||||
ext4_rereserve_cluster(inode, partial->lblk);
|
||||
}
|
||||
partial->state = initial;
|
||||
}
|
||||
|
||||
num = le32_to_cpu(ex->ee_block) + ee_len - from;
|
||||
pblk = ext4_ext_pblock(ex) + ee_len - num;
|
||||
|
||||
/*
|
||||
* We free the partial cluster at the end of the extent (if any),
|
||||
* unless the cluster is used by another extent (partial_cluster
|
||||
* state is nofree). If a partial cluster exists here, it must be
|
||||
* shared with the last block in the extent.
|
||||
*/
|
||||
flags = get_default_free_blocks_flags(inode);
|
||||
|
||||
/* partial, left end cluster aligned, right end unaligned */
|
||||
if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
|
||||
(EXT4_LBLK_CMASK(sbi, to) >= from) &&
|
||||
(partial->state != nofree)) {
|
||||
if (ext4_is_pending(inode, to))
|
||||
flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
|
||||
ext4_free_blocks(handle, inode, NULL,
|
||||
EXT4_PBLK_CMASK(sbi, last_pblk),
|
||||
sbi->s_cluster_ratio, flags);
|
||||
if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
|
||||
ext4_rereserve_cluster(inode, to);
|
||||
partial->state = initial;
|
||||
flags = get_default_free_blocks_flags(inode);
|
||||
}
|
||||
|
||||
flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
|
||||
|
||||
/*
|
||||
* For bigalloc file systems, we never free a partial cluster
|
||||
* at the beginning of the extent. Instead, we make a note
|
||||
* that we tried freeing the cluster, and check to see if we
|
||||
* at the beginning of the extent. Instead, we check to see if we
|
||||
* need to free it on a subsequent call to ext4_remove_blocks,
|
||||
* or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
|
||||
*/
|
||||
flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
|
||||
ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
|
||||
|
||||
/* reset the partial cluster if we've freed past it */
|
||||
if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk))
|
||||
partial->state = initial;
|
||||
|
||||
trace_ext4_remove_blocks(inode, ex, from, to, *partial_cluster);
|
||||
/*
|
||||
* If we have a partial cluster, and it's different from the
|
||||
* cluster of the last block, we need to explicitly free the
|
||||
* partial cluster here.
|
||||
* If we've freed the entire extent but the beginning is not left
|
||||
* cluster aligned and is not marked as ineligible for freeing we
|
||||
* record the partial cluster at the beginning of the extent. It
|
||||
* wasn't freed by the preceding ext4_free_blocks() call, and we
|
||||
* need to look farther to the left to determine if it's to be freed
|
||||
* (not shared with another extent). Else, reset the partial
|
||||
* cluster - we're either done freeing or the beginning of the
|
||||
* extent is left cluster aligned.
|
||||
*/
|
||||
pblk = ext4_ext_pblock(ex) + ee_len - 1;
|
||||
if (*partial_cluster > 0 &&
|
||||
*partial_cluster != (long long) EXT4_B2C(sbi, pblk)) {
|
||||
ext4_free_blocks(handle, inode, NULL,
|
||||
EXT4_C2B(sbi, *partial_cluster),
|
||||
sbi->s_cluster_ratio, flags);
|
||||
*partial_cluster = 0;
|
||||
}
|
||||
|
||||
#ifdef EXTENTS_STATS
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
spin_lock(&sbi->s_ext_stats_lock);
|
||||
sbi->s_ext_blocks += ee_len;
|
||||
sbi->s_ext_extents++;
|
||||
if (ee_len < sbi->s_ext_min)
|
||||
sbi->s_ext_min = ee_len;
|
||||
if (ee_len > sbi->s_ext_max)
|
||||
sbi->s_ext_max = ee_len;
|
||||
if (ext_depth(inode) > sbi->s_depth_max)
|
||||
sbi->s_depth_max = ext_depth(inode);
|
||||
spin_unlock(&sbi->s_ext_stats_lock);
|
||||
}
|
||||
#endif
|
||||
if (from >= le32_to_cpu(ex->ee_block)
|
||||
&& to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
|
||||
/* tail removal */
|
||||
ext4_lblk_t num;
|
||||
long long first_cluster;
|
||||
|
||||
num = le32_to_cpu(ex->ee_block) + ee_len - from;
|
||||
pblk = ext4_ext_pblock(ex) + ee_len - num;
|
||||
/*
|
||||
* Usually we want to free partial cluster at the end of the
|
||||
* extent, except for the situation when the cluster is still
|
||||
* used by any other extent (partial_cluster is negative).
|
||||
*/
|
||||
if (*partial_cluster < 0 &&
|
||||
*partial_cluster == -(long long) EXT4_B2C(sbi, pblk+num-1))
|
||||
flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
|
||||
|
||||
ext_debug("free last %u blocks starting %llu partial %lld\n",
|
||||
num, pblk, *partial_cluster);
|
||||
ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
|
||||
/*
|
||||
* If the block range to be freed didn't start at the
|
||||
* beginning of a cluster, and we removed the entire
|
||||
* extent and the cluster is not used by any other extent,
|
||||
* save the partial cluster here, since we might need to
|
||||
* delete if we determine that the truncate or punch hole
|
||||
* operation has removed all of the blocks in the cluster.
|
||||
* If that cluster is used by another extent, preserve its
|
||||
* negative value so it isn't freed later on.
|
||||
*
|
||||
* If the whole extent wasn't freed, we've reached the
|
||||
* start of the truncated/punched region and have finished
|
||||
* removing blocks. If there's a partial cluster here it's
|
||||
* shared with the remainder of the extent and is no longer
|
||||
* a candidate for removal.
|
||||
*/
|
||||
if (EXT4_PBLK_COFF(sbi, pblk) && ee_len == num) {
|
||||
first_cluster = (long long) EXT4_B2C(sbi, pblk);
|
||||
if (first_cluster != -*partial_cluster)
|
||||
*partial_cluster = first_cluster;
|
||||
} else {
|
||||
*partial_cluster = 0;
|
||||
if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
|
||||
if (partial->state == initial) {
|
||||
partial->pclu = EXT4_B2C(sbi, pblk);
|
||||
partial->lblk = from;
|
||||
partial->state = tofree;
|
||||
}
|
||||
} else
|
||||
ext4_error(sbi->s_sb, "strange request: removal(2) "
|
||||
"%u-%u from %u:%u",
|
||||
from, to, le32_to_cpu(ex->ee_block), ee_len);
|
||||
} else {
|
||||
partial->state = initial;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ext4_ext_rm_leaf() Removes the extents associated with the
|
||||
* blocks appearing between "start" and "end". Both "start"
|
||||
@@ -2608,7 +2659,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
||||
static int
|
||||
ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
struct ext4_ext_path *path,
|
||||
long long *partial_cluster,
|
||||
struct partial_cluster *partial,
|
||||
ext4_lblk_t start, ext4_lblk_t end)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
@@ -2640,7 +2691,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
ex_ee_block = le32_to_cpu(ex->ee_block);
|
||||
ex_ee_len = ext4_ext_get_actual_len(ex);
|
||||
|
||||
trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
|
||||
trace_ext4_ext_rm_leaf(inode, start, ex, partial);
|
||||
|
||||
while (ex >= EXT_FIRST_EXTENT(eh) &&
|
||||
ex_ee_block + ex_ee_len > start) {
|
||||
@@ -2671,8 +2722,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
*/
|
||||
if (sbi->s_cluster_ratio > 1) {
|
||||
pblk = ext4_ext_pblock(ex);
|
||||
*partial_cluster =
|
||||
-(long long) EXT4_B2C(sbi, pblk);
|
||||
partial->pclu = EXT4_B2C(sbi, pblk);
|
||||
partial->state = nofree;
|
||||
}
|
||||
ex--;
|
||||
ex_ee_block = le32_to_cpu(ex->ee_block);
|
||||
@@ -2714,8 +2765,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ext4_remove_blocks(handle, inode, ex, partial_cluster,
|
||||
a, b);
|
||||
err = ext4_remove_blocks(handle, inode, ex, partial, a, b);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
@@ -2769,18 +2819,23 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
* If there's a partial cluster and at least one extent remains in
|
||||
* the leaf, free the partial cluster if it isn't shared with the
|
||||
* current extent. If it is shared with the current extent
|
||||
* we zero partial_cluster because we've reached the start of the
|
||||
* we reset the partial cluster because we've reached the start of the
|
||||
* truncated/punched region and we're done removing blocks.
|
||||
*/
|
||||
if (*partial_cluster > 0 && ex >= EXT_FIRST_EXTENT(eh)) {
|
||||
if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
|
||||
pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
|
||||
if (*partial_cluster != (long long) EXT4_B2C(sbi, pblk)) {
|
||||
if (partial->pclu != EXT4_B2C(sbi, pblk)) {
|
||||
int flags = get_default_free_blocks_flags(inode);
|
||||
|
||||
if (ext4_is_pending(inode, partial->lblk))
|
||||
flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
|
||||
ext4_free_blocks(handle, inode, NULL,
|
||||
EXT4_C2B(sbi, *partial_cluster),
|
||||
sbi->s_cluster_ratio,
|
||||
get_default_free_blocks_flags(inode));
|
||||
EXT4_C2B(sbi, partial->pclu),
|
||||
sbi->s_cluster_ratio, flags);
|
||||
if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
|
||||
ext4_rereserve_cluster(inode, partial->lblk);
|
||||
}
|
||||
*partial_cluster = 0;
|
||||
partial->state = initial;
|
||||
}
|
||||
|
||||
/* if this leaf is free, then we should
|
||||
@@ -2819,10 +2874,14 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
int depth = ext_depth(inode);
|
||||
struct ext4_ext_path *path = NULL;
|
||||
long long partial_cluster = 0;
|
||||
struct partial_cluster partial;
|
||||
handle_t *handle;
|
||||
int i = 0, err = 0;
|
||||
|
||||
partial.pclu = 0;
|
||||
partial.lblk = 0;
|
||||
partial.state = initial;
|
||||
|
||||
ext_debug("truncate since %u to %u\n", start, end);
|
||||
|
||||
/* probably first extent we're gonna free will be last in block */
|
||||
@@ -2882,8 +2941,8 @@ again:
|
||||
*/
|
||||
if (sbi->s_cluster_ratio > 1) {
|
||||
pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
|
||||
partial_cluster =
|
||||
-(long long) EXT4_B2C(sbi, pblk);
|
||||
partial.pclu = EXT4_B2C(sbi, pblk);
|
||||
partial.state = nofree;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2911,9 +2970,10 @@ again:
|
||||
&ex);
|
||||
if (err)
|
||||
goto out;
|
||||
if (pblk)
|
||||
partial_cluster =
|
||||
-(long long) EXT4_B2C(sbi, pblk);
|
||||
if (pblk) {
|
||||
partial.pclu = EXT4_B2C(sbi, pblk);
|
||||
partial.state = nofree;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
@@ -2948,8 +3008,7 @@ again:
|
||||
if (i == depth) {
|
||||
/* this is leaf block */
|
||||
err = ext4_ext_rm_leaf(handle, inode, path,
|
||||
&partial_cluster, start,
|
||||
end);
|
||||
&partial, start, end);
|
||||
/* root level has p_bh == NULL, brelse() eats this */
|
||||
brelse(path[i].p_bh);
|
||||
path[i].p_bh = NULL;
|
||||
@@ -3021,21 +3080,24 @@ again:
|
||||
}
|
||||
}
|
||||
|
||||
trace_ext4_ext_remove_space_done(inode, start, end, depth,
|
||||
partial_cluster, path->p_hdr->eh_entries);
|
||||
trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial,
|
||||
path->p_hdr->eh_entries);
|
||||
|
||||
/*
|
||||
* If we still have something in the partial cluster and we have removed
|
||||
* even the first extent, then we should free the blocks in the partial
|
||||
* cluster as well. (This code will only run when there are no leaves
|
||||
* to the immediate left of the truncated/punched region.)
|
||||
* if there's a partial cluster and we have removed the first extent
|
||||
* in the file, then we also free the partial cluster, if any
|
||||
*/
|
||||
if (partial_cluster > 0 && err == 0) {
|
||||
/* don't zero partial_cluster since it's not used afterwards */
|
||||
if (partial.state == tofree && err == 0) {
|
||||
int flags = get_default_free_blocks_flags(inode);
|
||||
|
||||
if (ext4_is_pending(inode, partial.lblk))
|
||||
flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
|
||||
ext4_free_blocks(handle, inode, NULL,
|
||||
EXT4_C2B(sbi, partial_cluster),
|
||||
sbi->s_cluster_ratio,
|
||||
get_default_free_blocks_flags(inode));
|
||||
EXT4_C2B(sbi, partial.pclu),
|
||||
sbi->s_cluster_ratio, flags);
|
||||
if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
|
||||
ext4_rereserve_cluster(inode, partial.lblk);
|
||||
partial.state = initial;
|
||||
}
|
||||
|
||||
/* TODO: flexible tree reduction should be here */
|
||||
@@ -3819,114 +3881,6 @@ out:
|
||||
return ext4_mark_inode_dirty(handle, inode);
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_find_delalloc_range: find delayed allocated block in the given range.
|
||||
*
|
||||
* Return 1 if there is a delalloc block in the range, otherwise 0.
|
||||
*/
|
||||
int ext4_find_delalloc_range(struct inode *inode,
|
||||
ext4_lblk_t lblk_start,
|
||||
ext4_lblk_t lblk_end)
|
||||
{
|
||||
struct extent_status es;
|
||||
|
||||
ext4_es_find_delayed_extent_range(inode, lblk_start, lblk_end, &es);
|
||||
if (es.es_len == 0)
|
||||
return 0; /* there is no delay extent in this tree */
|
||||
else if (es.es_lblk <= lblk_start &&
|
||||
lblk_start < es.es_lblk + es.es_len)
|
||||
return 1;
|
||||
else if (lblk_start <= es.es_lblk && es.es_lblk <= lblk_end)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
ext4_lblk_t lblk_start, lblk_end;
|
||||
lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
|
||||
lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
|
||||
|
||||
return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines how many complete clusters (out of those specified by the 'map')
|
||||
* are under delalloc and were reserved quota for.
|
||||
* This function is called when we are writing out the blocks that were
|
||||
* originally written with their allocation delayed, but then the space was
|
||||
* allocated using fallocate() before the delayed allocation could be resolved.
|
||||
* The cases to look for are:
|
||||
* ('=' indicated delayed allocated blocks
|
||||
* '-' indicates non-delayed allocated blocks)
|
||||
* (a) partial clusters towards beginning and/or end outside of allocated range
|
||||
* are not delalloc'ed.
|
||||
* Ex:
|
||||
* |----c---=|====c====|====c====|===-c----|
|
||||
* |++++++ allocated ++++++|
|
||||
* ==> 4 complete clusters in above example
|
||||
*
|
||||
* (b) partial cluster (outside of allocated range) towards either end is
|
||||
* marked for delayed allocation. In this case, we will exclude that
|
||||
* cluster.
|
||||
* Ex:
|
||||
* |----====c========|========c========|
|
||||
* |++++++ allocated ++++++|
|
||||
* ==> 1 complete clusters in above example
|
||||
*
|
||||
* Ex:
|
||||
* |================c================|
|
||||
* |++++++ allocated ++++++|
|
||||
* ==> 0 complete clusters in above example
|
||||
*
|
||||
* The ext4_da_update_reserve_space will be called only if we
|
||||
* determine here that there were some "entire" clusters that span
|
||||
* this 'allocated' range.
|
||||
* In the non-bigalloc case, this function will just end up returning num_blks
|
||||
* without ever calling ext4_find_delalloc_range.
|
||||
*/
|
||||
static unsigned int
|
||||
get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
|
||||
unsigned int num_blks)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
ext4_lblk_t alloc_cluster_start, alloc_cluster_end;
|
||||
ext4_lblk_t lblk_from, lblk_to, c_offset;
|
||||
unsigned int allocated_clusters = 0;
|
||||
|
||||
alloc_cluster_start = EXT4_B2C(sbi, lblk_start);
|
||||
alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1);
|
||||
|
||||
/* max possible clusters for this allocation */
|
||||
allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1;
|
||||
|
||||
trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
|
||||
|
||||
/* Check towards left side */
|
||||
c_offset = EXT4_LBLK_COFF(sbi, lblk_start);
|
||||
if (c_offset) {
|
||||
lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start);
|
||||
lblk_to = lblk_from + c_offset - 1;
|
||||
|
||||
if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
|
||||
allocated_clusters--;
|
||||
}
|
||||
|
||||
/* Now check towards right. */
|
||||
c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks);
|
||||
if (allocated_clusters && c_offset) {
|
||||
lblk_from = lblk_start + num_blks;
|
||||
lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
|
||||
|
||||
if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
|
||||
allocated_clusters--;
|
||||
}
|
||||
|
||||
return allocated_clusters;
|
||||
}
|
||||
|
||||
static int
|
||||
convert_initialized_extent(handle_t *handle, struct inode *inode,
|
||||
struct ext4_map_blocks *map,
|
||||
@@ -4108,23 +4062,6 @@ out:
|
||||
}
|
||||
map->m_len = allocated;
|
||||
|
||||
/*
|
||||
* If we have done fallocate with the offset that is already
|
||||
* delayed allocated, we would have block reservation
|
||||
* and quota reservation done in the delayed write path.
|
||||
* But fallocate would have already updated quota and block
|
||||
* count for this offset. So cancel these reservation
|
||||
*/
|
||||
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
|
||||
unsigned int reserved_clusters;
|
||||
reserved_clusters = get_reserved_cluster_alloc(inode,
|
||||
map->m_lblk, map->m_len);
|
||||
if (reserved_clusters)
|
||||
ext4_da_update_reserve_space(inode,
|
||||
reserved_clusters,
|
||||
0);
|
||||
}
|
||||
|
||||
map_out:
|
||||
map->m_flags |= EXT4_MAP_MAPPED;
|
||||
if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) {
|
||||
@@ -4513,77 +4450,39 @@ got_allocated_blocks:
|
||||
map->m_flags |= EXT4_MAP_NEW;
|
||||
|
||||
/*
|
||||
* Update reserved blocks/metadata blocks after successful
|
||||
* block allocation which had been deferred till now.
|
||||
* Reduce the reserved cluster count to reflect successful deferred
|
||||
* allocation of delayed allocated clusters or direct allocation of
|
||||
* clusters discovered to be delayed allocated. Once allocated, a
|
||||
* cluster is not included in the reserved count.
|
||||
*/
|
||||
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
|
||||
unsigned int reserved_clusters;
|
||||
/*
|
||||
* Check how many clusters we had reserved this allocated range
|
||||
*/
|
||||
reserved_clusters = get_reserved_cluster_alloc(inode,
|
||||
map->m_lblk, allocated);
|
||||
if (!map_from_cluster) {
|
||||
BUG_ON(allocated_clusters < reserved_clusters);
|
||||
if (reserved_clusters < allocated_clusters) {
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
int reservation = allocated_clusters -
|
||||
reserved_clusters;
|
||||
/*
|
||||
* It seems we claimed few clusters outside of
|
||||
* the range of this allocation. We should give
|
||||
* it back to the reservation pool. This can
|
||||
* happen in the following case:
|
||||
*
|
||||
* * Suppose s_cluster_ratio is 4 (i.e., each
|
||||
* cluster has 4 blocks. Thus, the clusters
|
||||
* are [0-3],[4-7],[8-11]...
|
||||
* * First comes delayed allocation write for
|
||||
* logical blocks 10 & 11. Since there were no
|
||||
* previous delayed allocated blocks in the
|
||||
* range [8-11], we would reserve 1 cluster
|
||||
* for this write.
|
||||
* * Next comes write for logical blocks 3 to 8.
|
||||
* In this case, we will reserve 2 clusters
|
||||
* (for [0-3] and [4-7]; and not for [8-11] as
|
||||
* that range has a delayed allocated blocks.
|
||||
* Thus total reserved clusters now becomes 3.
|
||||
* * Now, during the delayed allocation writeout
|
||||
* time, we will first write blocks [3-8] and
|
||||
* allocate 3 clusters for writing these
|
||||
* blocks. Also, we would claim all these
|
||||
* three clusters above.
|
||||
* * Now when we come here to writeout the
|
||||
* blocks [10-11], we would expect to claim
|
||||
* the reservation of 1 cluster we had made
|
||||
* (and we would claim it since there are no
|
||||
* more delayed allocated blocks in the range
|
||||
* [8-11]. But our reserved cluster count had
|
||||
* already gone to 0.
|
||||
*
|
||||
* Thus, at the step 4 above when we determine
|
||||
* that there are still some unwritten delayed
|
||||
* allocated blocks outside of our current
|
||||
* block range, we should increment the
|
||||
* reserved clusters count so that when the
|
||||
* remaining blocks finally gets written, we
|
||||
* could claim them.
|
||||
*/
|
||||
dquot_reserve_block(inode,
|
||||
EXT4_C2B(sbi, reservation));
|
||||
spin_lock(&ei->i_block_reservation_lock);
|
||||
ei->i_reserved_data_blocks += reservation;
|
||||
spin_unlock(&ei->i_block_reservation_lock);
|
||||
}
|
||||
if (test_opt(inode->i_sb, DELALLOC) && !map_from_cluster) {
|
||||
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
|
||||
/*
|
||||
* We will claim quota for all newly allocated blocks.
|
||||
* We're updating the reserved space *after* the
|
||||
* correction above so we do not accidentally free
|
||||
* all the metadata reservation because we might
|
||||
* actually need it later on.
|
||||
* When allocating delayed allocated clusters, simply
|
||||
* reduce the reserved cluster count and claim quota
|
||||
*/
|
||||
ext4_da_update_reserve_space(inode, allocated_clusters,
|
||||
1);
|
||||
} else {
|
||||
ext4_lblk_t lblk, len;
|
||||
unsigned int n;
|
||||
|
||||
/*
|
||||
* When allocating non-delayed allocated clusters
|
||||
* (from fallocate, filemap, DIO, or clusters
|
||||
* allocated when delalloc has been disabled by
|
||||
* ext4_nonda_switch), reduce the reserved cluster
|
||||
* count by the number of allocated clusters that
|
||||
* have previously been delayed allocated. Quota
|
||||
* has been claimed by ext4_mb_new_blocks() above,
|
||||
* so release the quota reservations made for any
|
||||
* previously delayed allocated clusters.
|
||||
*/
|
||||
lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk);
|
||||
len = allocated_clusters << sbi->s_cluster_bits;
|
||||
n = ext4_es_delayed_clu(inode, lblk, len);
|
||||
if (n > 0)
|
||||
ext4_da_update_reserve_space(inode, (int) n, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5075,8 +4974,10 @@ static int ext4_find_delayed_extent(struct inode *inode,
|
||||
ext4_lblk_t block, next_del;
|
||||
|
||||
if (newes->es_pblk == 0) {
|
||||
ext4_es_find_delayed_extent_range(inode, newes->es_lblk,
|
||||
newes->es_lblk + newes->es_len - 1, &es);
|
||||
ext4_es_find_extent_range(inode, &ext4_es_is_delayed,
|
||||
newes->es_lblk,
|
||||
newes->es_lblk + newes->es_len - 1,
|
||||
&es);
|
||||
|
||||
/*
|
||||
* No extent in extent-tree contains block @newes->es_pblk,
|
||||
@@ -5097,7 +4998,8 @@ static int ext4_find_delayed_extent(struct inode *inode,
|
||||
}
|
||||
|
||||
block = newes->es_lblk + newes->es_len;
|
||||
ext4_es_find_delayed_extent_range(inode, block, EXT_MAX_BLOCKS, &es);
|
||||
ext4_es_find_extent_range(inode, &ext4_es_is_delayed, block,
|
||||
EXT_MAX_BLOCKS, &es);
|
||||
if (es.es_len == 0)
|
||||
next_del = EXT_MAX_BLOCKS;
|
||||
else
|
||||
@@ -5958,3 +5860,82 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
|
||||
}
|
||||
return replaced_count;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_clu_mapped - determine whether any block in a logical cluster has
|
||||
* been mapped to a physical cluster
|
||||
*
|
||||
* @inode - file containing the logical cluster
|
||||
* @lclu - logical cluster of interest
|
||||
*
|
||||
* Returns 1 if any block in the logical cluster is mapped, signifying
|
||||
* that a physical cluster has been allocated for it. Otherwise,
|
||||
* returns 0. Can also return negative error codes. Derived from
|
||||
* ext4_ext_map_blocks().
|
||||
*/
|
||||
int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct ext4_ext_path *path;
|
||||
int depth, mapped = 0, err = 0;
|
||||
struct ext4_extent *extent;
|
||||
ext4_lblk_t first_lblk, first_lclu, last_lclu;
|
||||
|
||||
/* search for the extent closest to the first block in the cluster */
|
||||
path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
|
||||
if (IS_ERR(path)) {
|
||||
err = PTR_ERR(path);
|
||||
path = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
depth = ext_depth(inode);
|
||||
|
||||
/*
|
||||
* A consistent leaf must not be empty. This situation is possible,
|
||||
* though, _during_ tree modification, and it's why an assert can't
|
||||
* be put in ext4_find_extent().
|
||||
*/
|
||||
if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
|
||||
EXT4_ERROR_INODE(inode,
|
||||
"bad extent address - lblock: %lu, depth: %d, pblock: %lld",
|
||||
(unsigned long) EXT4_C2B(sbi, lclu),
|
||||
depth, path[depth].p_block);
|
||||
err = -EFSCORRUPTED;
|
||||
goto out;
|
||||
}
|
||||
|
||||
extent = path[depth].p_ext;
|
||||
|
||||
/* can't be mapped if the extent tree is empty */
|
||||
if (extent == NULL)
|
||||
goto out;
|
||||
|
||||
first_lblk = le32_to_cpu(extent->ee_block);
|
||||
first_lclu = EXT4_B2C(sbi, first_lblk);
|
||||
|
||||
/*
|
||||
* Three possible outcomes at this point - found extent spanning
|
||||
* the target cluster, to the left of the target cluster, or to the
|
||||
* right of the target cluster. The first two cases are handled here.
|
||||
* The last case indicates the target cluster is not mapped.
|
||||
*/
|
||||
if (lclu >= first_lclu) {
|
||||
last_lclu = EXT4_B2C(sbi, first_lblk +
|
||||
ext4_ext_get_actual_len(extent) - 1);
|
||||
if (lclu <= last_lclu) {
|
||||
mapped = 1;
|
||||
} else {
|
||||
first_lblk = ext4_ext_next_allocated_block(path);
|
||||
first_lclu = EXT4_B2C(sbi, first_lblk);
|
||||
if (lclu == first_lclu)
|
||||
mapped = 1;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
ext4_ext_drop_refs(path);
|
||||
kfree(path);
|
||||
|
||||
return err ? err : mapped;
|
||||
}
|
||||
|
@@ -142,6 +142,7 @@
|
||||
*/
|
||||
|
||||
static struct kmem_cache *ext4_es_cachep;
|
||||
static struct kmem_cache *ext4_pending_cachep;
|
||||
|
||||
static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
|
||||
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
@@ -149,6 +150,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
|
||||
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
||||
struct ext4_inode_info *locked_ei);
|
||||
static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len);
|
||||
|
||||
int __init ext4_init_es(void)
|
||||
{
|
||||
@@ -233,30 +236,38 @@ static struct extent_status *__es_tree_search(struct rb_root *root,
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_es_find_delayed_extent_range: find the 1st delayed extent covering
|
||||
* @es->lblk if it exists, otherwise, the next extent after @es->lblk.
|
||||
* ext4_es_find_extent_range - find extent with specified status within block
|
||||
* range or next extent following block range in
|
||||
* extents status tree
|
||||
*
|
||||
* @inode: the inode which owns delayed extents
|
||||
* @lblk: the offset where we start to search
|
||||
* @end: the offset where we stop to search
|
||||
* @es: delayed extent that we found
|
||||
* @inode - file containing the range
|
||||
* @matching_fn - pointer to function that matches extents with desired status
|
||||
* @lblk - logical block defining start of range
|
||||
* @end - logical block defining end of range
|
||||
* @es - extent found, if any
|
||||
*
|
||||
* Find the first extent within the block range specified by @lblk and @end
|
||||
* in the extents status tree that satisfies @matching_fn. If a match
|
||||
* is found, it's returned in @es. If not, and a matching extent is found
|
||||
* beyond the block range, it's returned in @es. If no match is found, an
|
||||
* extent is returned in @es whose es_lblk, es_len, and es_pblk components
|
||||
* are 0.
|
||||
*/
|
||||
void ext4_es_find_delayed_extent_range(struct inode *inode,
|
||||
ext4_lblk_t lblk, ext4_lblk_t end,
|
||||
struct extent_status *es)
|
||||
static void __es_find_extent_range(struct inode *inode,
|
||||
int (*matching_fn)(struct extent_status *es),
|
||||
ext4_lblk_t lblk, ext4_lblk_t end,
|
||||
struct extent_status *es)
|
||||
{
|
||||
struct ext4_es_tree *tree = NULL;
|
||||
struct extent_status *es1 = NULL;
|
||||
struct rb_node *node;
|
||||
|
||||
BUG_ON(es == NULL);
|
||||
BUG_ON(end < lblk);
|
||||
trace_ext4_es_find_delayed_extent_range_enter(inode, lblk);
|
||||
WARN_ON(es == NULL);
|
||||
WARN_ON(end < lblk);
|
||||
|
||||
read_lock(&EXT4_I(inode)->i_es_lock);
|
||||
tree = &EXT4_I(inode)->i_es_tree;
|
||||
|
||||
/* find extent in cache firstly */
|
||||
/* see if the extent has been cached */
|
||||
es->es_lblk = es->es_len = es->es_pblk = 0;
|
||||
if (tree->cache_es) {
|
||||
es1 = tree->cache_es;
|
||||
@@ -271,28 +282,133 @@ void ext4_es_find_delayed_extent_range(struct inode *inode,
|
||||
es1 = __es_tree_search(&tree->root, lblk);
|
||||
|
||||
out:
|
||||
if (es1 && !ext4_es_is_delayed(es1)) {
|
||||
if (es1 && !matching_fn(es1)) {
|
||||
while ((node = rb_next(&es1->rb_node)) != NULL) {
|
||||
es1 = rb_entry(node, struct extent_status, rb_node);
|
||||
if (es1->es_lblk > end) {
|
||||
es1 = NULL;
|
||||
break;
|
||||
}
|
||||
if (ext4_es_is_delayed(es1))
|
||||
if (matching_fn(es1))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (es1 && ext4_es_is_delayed(es1)) {
|
||||
if (es1 && matching_fn(es1)) {
|
||||
tree->cache_es = es1;
|
||||
es->es_lblk = es1->es_lblk;
|
||||
es->es_len = es1->es_len;
|
||||
es->es_pblk = es1->es_pblk;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Locking for __es_find_extent_range() for external use
|
||||
*/
|
||||
void ext4_es_find_extent_range(struct inode *inode,
|
||||
int (*matching_fn)(struct extent_status *es),
|
||||
ext4_lblk_t lblk, ext4_lblk_t end,
|
||||
struct extent_status *es)
|
||||
{
|
||||
trace_ext4_es_find_extent_range_enter(inode, lblk);
|
||||
|
||||
read_lock(&EXT4_I(inode)->i_es_lock);
|
||||
__es_find_extent_range(inode, matching_fn, lblk, end, es);
|
||||
read_unlock(&EXT4_I(inode)->i_es_lock);
|
||||
|
||||
trace_ext4_es_find_delayed_extent_range_exit(inode, es);
|
||||
trace_ext4_es_find_extent_range_exit(inode, es);
|
||||
}
|
||||
|
||||
/*
|
||||
* __es_scan_range - search block range for block with specified status
|
||||
* in extents status tree
|
||||
*
|
||||
* @inode - file containing the range
|
||||
* @matching_fn - pointer to function that matches extents with desired status
|
||||
* @lblk - logical block defining start of range
|
||||
* @end - logical block defining end of range
|
||||
*
|
||||
* Returns true if at least one block in the specified block range satisfies
|
||||
* the criterion specified by @matching_fn, and false if not. If at least
|
||||
* one extent has the specified status, then there is at least one block
|
||||
* in the cluster with that status. Should only be called by code that has
|
||||
* taken i_es_lock.
|
||||
*/
|
||||
static bool __es_scan_range(struct inode *inode,
|
||||
int (*matching_fn)(struct extent_status *es),
|
||||
ext4_lblk_t start, ext4_lblk_t end)
|
||||
{
|
||||
struct extent_status es;
|
||||
|
||||
__es_find_extent_range(inode, matching_fn, start, end, &es);
|
||||
if (es.es_len == 0)
|
||||
return false; /* no matching extent in the tree */
|
||||
else if (es.es_lblk <= start &&
|
||||
start < es.es_lblk + es.es_len)
|
||||
return true;
|
||||
else if (start <= es.es_lblk && es.es_lblk <= end)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* Locking for __es_scan_range() for external use
|
||||
*/
|
||||
bool ext4_es_scan_range(struct inode *inode,
|
||||
int (*matching_fn)(struct extent_status *es),
|
||||
ext4_lblk_t lblk, ext4_lblk_t end)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
read_lock(&EXT4_I(inode)->i_es_lock);
|
||||
ret = __es_scan_range(inode, matching_fn, lblk, end);
|
||||
read_unlock(&EXT4_I(inode)->i_es_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* __es_scan_clu - search cluster for block with specified status in
|
||||
* extents status tree
|
||||
*
|
||||
* @inode - file containing the cluster
|
||||
* @matching_fn - pointer to function that matches extents with desired status
|
||||
* @lblk - logical block in cluster to be searched
|
||||
*
|
||||
* Returns true if at least one extent in the cluster containing @lblk
|
||||
* satisfies the criterion specified by @matching_fn, and false if not. If at
|
||||
* least one extent has the specified status, then there is at least one block
|
||||
* in the cluster with that status. Should only be called by code that has
|
||||
* taken i_es_lock.
|
||||
*/
|
||||
static bool __es_scan_clu(struct inode *inode,
|
||||
int (*matching_fn)(struct extent_status *es),
|
||||
ext4_lblk_t lblk)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
ext4_lblk_t lblk_start, lblk_end;
|
||||
|
||||
lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
|
||||
lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
|
||||
|
||||
return __es_scan_range(inode, matching_fn, lblk_start, lblk_end);
|
||||
}
|
||||
|
||||
/*
|
||||
* Locking for __es_scan_clu() for external use
|
||||
*/
|
||||
bool ext4_es_scan_clu(struct inode *inode,
|
||||
int (*matching_fn)(struct extent_status *es),
|
||||
ext4_lblk_t lblk)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
read_lock(&EXT4_I(inode)->i_es_lock);
|
||||
ret = __es_scan_clu(inode, matching_fn, lblk);
|
||||
read_unlock(&EXT4_I(inode)->i_es_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ext4_es_list_add(struct inode *inode)
|
||||
@@ -694,6 +810,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
struct extent_status newes;
|
||||
ext4_lblk_t end = lblk + len - 1;
|
||||
int err = 0;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
|
||||
es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
|
||||
lblk, len, pblk, status, inode->i_ino);
|
||||
@@ -730,6 +847,11 @@ retry:
|
||||
if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
|
||||
err = 0;
|
||||
|
||||
if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
|
||||
(status & EXTENT_STATUS_WRITTEN ||
|
||||
status & EXTENT_STATUS_UNWRITTEN))
|
||||
__revise_pending(inode, lblk, len);
|
||||
|
||||
error:
|
||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||
|
||||
@@ -1252,3 +1374,499 @@ static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
|
||||
ei->i_es_tree.cache_es = NULL;
|
||||
return nr_shrunk;
|
||||
}
|
||||
|
||||
#ifdef ES_DEBUG__
|
||||
static void ext4_print_pending_tree(struct inode *inode)
|
||||
{
|
||||
struct ext4_pending_tree *tree;
|
||||
struct rb_node *node;
|
||||
struct pending_reservation *pr;
|
||||
|
||||
printk(KERN_DEBUG "pending reservations for inode %lu:", inode->i_ino);
|
||||
tree = &EXT4_I(inode)->i_pending_tree;
|
||||
node = rb_first(&tree->root);
|
||||
while (node) {
|
||||
pr = rb_entry(node, struct pending_reservation, rb_node);
|
||||
printk(KERN_DEBUG " %u", pr->lclu);
|
||||
node = rb_next(node);
|
||||
}
|
||||
printk(KERN_DEBUG "\n");
|
||||
}
|
||||
#else
|
||||
#define ext4_print_pending_tree(inode)
|
||||
#endif
|
||||
|
||||
int __init ext4_init_pending(void)
|
||||
{
|
||||
ext4_pending_cachep = kmem_cache_create("ext4_pending_reservation",
|
||||
sizeof(struct pending_reservation),
|
||||
0, (SLAB_RECLAIM_ACCOUNT), NULL);
|
||||
if (ext4_pending_cachep == NULL)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ext4_exit_pending(void)
|
||||
{
|
||||
kmem_cache_destroy(ext4_pending_cachep);
|
||||
}
|
||||
|
||||
void ext4_init_pending_tree(struct ext4_pending_tree *tree)
|
||||
{
|
||||
tree->root = RB_ROOT;
|
||||
}
|
||||
|
||||
/*
|
||||
* __get_pending - retrieve a pointer to a pending reservation
|
||||
*
|
||||
* @inode - file containing the pending cluster reservation
|
||||
* @lclu - logical cluster of interest
|
||||
*
|
||||
* Returns a pointer to a pending reservation if it's a member of
|
||||
* the set, and NULL if not. Must be called holding i_es_lock.
|
||||
*/
|
||||
static struct pending_reservation *__get_pending(struct inode *inode,
|
||||
ext4_lblk_t lclu)
|
||||
{
|
||||
struct ext4_pending_tree *tree;
|
||||
struct rb_node *node;
|
||||
struct pending_reservation *pr = NULL;
|
||||
|
||||
tree = &EXT4_I(inode)->i_pending_tree;
|
||||
node = (&tree->root)->rb_node;
|
||||
|
||||
while (node) {
|
||||
pr = rb_entry(node, struct pending_reservation, rb_node);
|
||||
if (lclu < pr->lclu)
|
||||
node = node->rb_left;
|
||||
else if (lclu > pr->lclu)
|
||||
node = node->rb_right;
|
||||
else if (lclu == pr->lclu)
|
||||
return pr;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* __insert_pending - adds a pending cluster reservation to the set of
|
||||
* pending reservations
|
||||
*
|
||||
* @inode - file containing the cluster
|
||||
* @lblk - logical block in the cluster to be added
|
||||
*
|
||||
* Returns 0 on successful insertion and -ENOMEM on failure. If the
|
||||
* pending reservation is already in the set, returns successfully.
|
||||
*/
|
||||
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
|
||||
struct rb_node **p = &tree->root.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct pending_reservation *pr;
|
||||
ext4_lblk_t lclu;
|
||||
int ret = 0;
|
||||
|
||||
lclu = EXT4_B2C(sbi, lblk);
|
||||
/* search to find parent for insertion */
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
pr = rb_entry(parent, struct pending_reservation, rb_node);
|
||||
|
||||
if (lclu < pr->lclu) {
|
||||
p = &(*p)->rb_left;
|
||||
} else if (lclu > pr->lclu) {
|
||||
p = &(*p)->rb_right;
|
||||
} else {
|
||||
/* pending reservation already inserted */
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
|
||||
if (pr == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
pr->lclu = lclu;
|
||||
|
||||
rb_link_node(&pr->rb_node, parent, p);
|
||||
rb_insert_color(&pr->rb_node, &tree->root);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* __remove_pending - removes a pending cluster reservation from the set
|
||||
* of pending reservations
|
||||
*
|
||||
* @inode - file containing the cluster
|
||||
* @lblk - logical block in the pending cluster reservation to be removed
|
||||
*
|
||||
* Returns successfully if pending reservation is not a member of the set.
|
||||
*/
|
||||
static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct pending_reservation *pr;
|
||||
struct ext4_pending_tree *tree;
|
||||
|
||||
pr = __get_pending(inode, EXT4_B2C(sbi, lblk));
|
||||
if (pr != NULL) {
|
||||
tree = &EXT4_I(inode)->i_pending_tree;
|
||||
rb_erase(&pr->rb_node, &tree->root);
|
||||
kmem_cache_free(ext4_pending_cachep, pr);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_remove_pending - removes a pending cluster reservation from the set
|
||||
* of pending reservations
|
||||
*
|
||||
* @inode - file containing the cluster
|
||||
* @lblk - logical block in the pending cluster reservation to be removed
|
||||
*
|
||||
* Locking for external use of __remove_pending.
|
||||
*/
|
||||
void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
|
||||
write_lock(&ei->i_es_lock);
|
||||
__remove_pending(inode, lblk);
|
||||
write_unlock(&ei->i_es_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_is_pending - determine whether a cluster has a pending reservation
|
||||
* on it
|
||||
*
|
||||
* @inode - file containing the cluster
|
||||
* @lblk - logical block in the cluster
|
||||
*
|
||||
* Returns true if there's a pending reservation for the cluster in the
|
||||
* set of pending reservations, and false if not.
|
||||
*/
|
||||
bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
bool ret;
|
||||
|
||||
read_lock(&ei->i_es_lock);
|
||||
ret = (bool)(__get_pending(inode, EXT4_B2C(sbi, lblk)) != NULL);
|
||||
read_unlock(&ei->i_es_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_es_insert_delayed_block - adds a delayed block to the extents status
|
||||
* tree, adding a pending reservation where
|
||||
* needed
|
||||
*
|
||||
* @inode - file containing the newly added block
|
||||
* @lblk - logical block to be added
|
||||
* @allocated - indicates whether a physical cluster has been allocated for
|
||||
* the logical cluster that contains the block
|
||||
*
|
||||
* Returns 0 on success, negative error code on failure.
|
||||
*/
|
||||
int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
|
||||
bool allocated)
|
||||
{
|
||||
struct extent_status newes;
|
||||
int err = 0;
|
||||
|
||||
es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
|
||||
lblk, inode->i_ino);
|
||||
|
||||
newes.es_lblk = lblk;
|
||||
newes.es_len = 1;
|
||||
ext4_es_store_pblock_status(&newes, ~0, EXTENT_STATUS_DELAYED);
|
||||
trace_ext4_es_insert_delayed_block(inode, &newes, allocated);
|
||||
|
||||
ext4_es_insert_extent_check(inode, &newes);
|
||||
|
||||
write_lock(&EXT4_I(inode)->i_es_lock);
|
||||
|
||||
err = __es_remove_extent(inode, lblk, lblk);
|
||||
if (err != 0)
|
||||
goto error;
|
||||
retry:
|
||||
err = __es_insert_extent(inode, &newes);
|
||||
if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
|
||||
128, EXT4_I(inode)))
|
||||
goto retry;
|
||||
if (err != 0)
|
||||
goto error;
|
||||
|
||||
if (allocated)
|
||||
__insert_pending(inode, lblk);
|
||||
|
||||
error:
|
||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||
|
||||
ext4_es_print_tree(inode);
|
||||
ext4_print_pending_tree(inode);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* __es_delayed_clu - count number of clusters containing blocks that
|
||||
* are delayed only
|
||||
*
|
||||
* @inode - file containing block range
|
||||
* @start - logical block defining start of range
|
||||
* @end - logical block defining end of range
|
||||
*
|
||||
* Returns the number of clusters containing only delayed (not delayed
|
||||
* and unwritten) blocks in the range specified by @start and @end. Any
|
||||
* cluster or part of a cluster within the range and containing a delayed
|
||||
* and not unwritten block within the range is counted as a whole cluster.
|
||||
*/
|
||||
static unsigned int __es_delayed_clu(struct inode *inode, ext4_lblk_t start,
|
||||
ext4_lblk_t end)
|
||||
{
|
||||
struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
|
||||
struct extent_status *es;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct rb_node *node;
|
||||
ext4_lblk_t first_lclu, last_lclu;
|
||||
unsigned long long last_counted_lclu;
|
||||
unsigned int n = 0;
|
||||
|
||||
/* guaranteed to be unequal to any ext4_lblk_t value */
|
||||
last_counted_lclu = ~0ULL;
|
||||
|
||||
es = __es_tree_search(&tree->root, start);
|
||||
|
||||
while (es && (es->es_lblk <= end)) {
|
||||
if (ext4_es_is_delonly(es)) {
|
||||
if (es->es_lblk <= start)
|
||||
first_lclu = EXT4_B2C(sbi, start);
|
||||
else
|
||||
first_lclu = EXT4_B2C(sbi, es->es_lblk);
|
||||
|
||||
if (ext4_es_end(es) >= end)
|
||||
last_lclu = EXT4_B2C(sbi, end);
|
||||
else
|
||||
last_lclu = EXT4_B2C(sbi, ext4_es_end(es));
|
||||
|
||||
if (first_lclu == last_counted_lclu)
|
||||
n += last_lclu - first_lclu;
|
||||
else
|
||||
n += last_lclu - first_lclu + 1;
|
||||
last_counted_lclu = last_lclu;
|
||||
}
|
||||
node = rb_next(&es->rb_node);
|
||||
if (!node)
|
||||
break;
|
||||
es = rb_entry(node, struct extent_status, rb_node);
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_es_delayed_clu - count number of clusters containing blocks that
|
||||
* are both delayed and unwritten
|
||||
*
|
||||
* @inode - file containing block range
|
||||
* @lblk - logical block defining start of range
|
||||
* @len - number of blocks in range
|
||||
*
|
||||
* Locking for external use of __es_delayed_clu().
|
||||
*/
|
||||
unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
ext4_lblk_t end;
|
||||
unsigned int n;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
end = lblk + len - 1;
|
||||
WARN_ON(end < lblk);
|
||||
|
||||
read_lock(&ei->i_es_lock);
|
||||
|
||||
n = __es_delayed_clu(inode, lblk, end);
|
||||
|
||||
read_unlock(&ei->i_es_lock);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
* __revise_pending - makes, cancels, or leaves unchanged pending cluster
|
||||
* reservations for a specified block range depending
|
||||
* upon the presence or absence of delayed blocks
|
||||
* outside the range within clusters at the ends of the
|
||||
* range
|
||||
*
|
||||
* @inode - file containing the range
|
||||
* @lblk - logical block defining the start of range
|
||||
* @len - length of range in blocks
|
||||
*
|
||||
* Used after a newly allocated extent is added to the extents status tree.
|
||||
* Requires that the extents in the range have either written or unwritten
|
||||
* status. Must be called while holding i_es_lock.
|
||||
*/
|
||||
static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
ext4_lblk_t end = lblk + len - 1;
|
||||
ext4_lblk_t first, last;
|
||||
bool f_del = false, l_del = false;
|
||||
|
||||
if (len == 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Two cases - block range within single cluster and block range
|
||||
* spanning two or more clusters. Note that a cluster belonging
|
||||
* to a range starting and/or ending on a cluster boundary is treated
|
||||
* as if it does not contain a delayed extent. The new range may
|
||||
* have allocated space for previously delayed blocks out to the
|
||||
* cluster boundary, requiring that any pre-existing pending
|
||||
* reservation be canceled. Because this code only looks at blocks
|
||||
* outside the range, it should revise pending reservations
|
||||
* correctly even if the extent represented by the range can't be
|
||||
* inserted in the extents status tree due to ENOSPC.
|
||||
*/
|
||||
|
||||
if (EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) {
|
||||
first = EXT4_LBLK_CMASK(sbi, lblk);
|
||||
if (first != lblk)
|
||||
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
||||
first, lblk - 1);
|
||||
if (f_del) {
|
||||
__insert_pending(inode, first);
|
||||
} else {
|
||||
last = EXT4_LBLK_CMASK(sbi, end) +
|
||||
sbi->s_cluster_ratio - 1;
|
||||
if (last != end)
|
||||
l_del = __es_scan_range(inode,
|
||||
&ext4_es_is_delonly,
|
||||
end + 1, last);
|
||||
if (l_del)
|
||||
__insert_pending(inode, last);
|
||||
else
|
||||
__remove_pending(inode, last);
|
||||
}
|
||||
} else {
|
||||
first = EXT4_LBLK_CMASK(sbi, lblk);
|
||||
if (first != lblk)
|
||||
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
||||
first, lblk - 1);
|
||||
if (f_del)
|
||||
__insert_pending(inode, first);
|
||||
else
|
||||
__remove_pending(inode, first);
|
||||
|
||||
last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
|
||||
if (last != end)
|
||||
l_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
||||
end + 1, last);
|
||||
if (l_del)
|
||||
__insert_pending(inode, last);
|
||||
else
|
||||
__remove_pending(inode, last);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_es_remove_blks - remove block range from extents status tree and
|
||||
* reduce reservation count or cancel pending
|
||||
* reservation as needed
|
||||
*
|
||||
* @inode - file containing range
|
||||
* @lblk - first block in range
|
||||
* @len - number of blocks to remove
|
||||
*
|
||||
*/
|
||||
void ext4_es_remove_blks(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
unsigned int clu_size, reserved = 0;
|
||||
ext4_lblk_t last_lclu, first, length, remainder, last;
|
||||
bool delonly;
|
||||
int err = 0;
|
||||
struct pending_reservation *pr;
|
||||
struct ext4_pending_tree *tree;
|
||||
|
||||
/*
|
||||
* Process cluster by cluster for bigalloc - there may be up to
|
||||
* two clusters in a 4k page with a 1k block size and two blocks
|
||||
* per cluster. Also necessary for systems with larger page sizes
|
||||
* and potentially larger block sizes.
|
||||
*/
|
||||
clu_size = sbi->s_cluster_ratio;
|
||||
last_lclu = EXT4_B2C(sbi, lblk + len - 1);
|
||||
|
||||
write_lock(&EXT4_I(inode)->i_es_lock);
|
||||
|
||||
for (first = lblk, remainder = len;
|
||||
remainder > 0;
|
||||
first += length, remainder -= length) {
|
||||
|
||||
if (EXT4_B2C(sbi, first) == last_lclu)
|
||||
length = remainder;
|
||||
else
|
||||
length = clu_size - EXT4_LBLK_COFF(sbi, first);
|
||||
|
||||
/*
|
||||
* The BH_Delay flag, which triggers calls to this function,
|
||||
* and the contents of the extents status tree can be
|
||||
* inconsistent due to writepages activity. So, note whether
|
||||
* the blocks to be removed actually belong to an extent with
|
||||
* delayed only status.
|
||||
*/
|
||||
delonly = __es_scan_clu(inode, &ext4_es_is_delonly, first);
|
||||
|
||||
/*
|
||||
* because of the writepages effect, written and unwritten
|
||||
* blocks could be removed here
|
||||
*/
|
||||
last = first + length - 1;
|
||||
err = __es_remove_extent(inode, first, last);
|
||||
if (err)
|
||||
ext4_warning(inode->i_sb,
|
||||
"%s: couldn't remove page (err = %d)",
|
||||
__func__, err);
|
||||
|
||||
/* non-bigalloc case: simply count the cluster for release */
|
||||
if (sbi->s_cluster_ratio == 1 && delonly) {
|
||||
reserved++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* bigalloc case: if all delayed allocated only blocks have
|
||||
* just been removed from a cluster, either cancel a pending
|
||||
* reservation if it exists or count a cluster for release
|
||||
*/
|
||||
if (delonly &&
|
||||
!__es_scan_clu(inode, &ext4_es_is_delonly, first)) {
|
||||
pr = __get_pending(inode, EXT4_B2C(sbi, first));
|
||||
if (pr != NULL) {
|
||||
tree = &EXT4_I(inode)->i_pending_tree;
|
||||
rb_erase(&pr->rb_node, &tree->root);
|
||||
kmem_cache_free(ext4_pending_cachep, pr);
|
||||
} else {
|
||||
reserved++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||
|
||||
ext4_da_release_space(inode, reserved);
|
||||
}
|
||||
|
@@ -78,6 +78,51 @@ struct ext4_es_stats {
|
||||
struct percpu_counter es_stats_shk_cnt;
|
||||
};
|
||||
|
||||
/*
|
||||
* Pending cluster reservations for bigalloc file systems
|
||||
*
|
||||
* A cluster with a pending reservation is a logical cluster shared by at
|
||||
* least one extent in the extents status tree with delayed and unwritten
|
||||
* status and at least one other written or unwritten extent. The
|
||||
* reservation is said to be pending because a cluster reservation would
|
||||
* have to be taken in the event all blocks in the cluster shared with
|
||||
* written or unwritten extents were deleted while the delayed and
|
||||
* unwritten blocks remained.
|
||||
*
|
||||
* The set of pending cluster reservations is an auxiliary data structure
|
||||
* used with the extents status tree to implement reserved cluster/block
|
||||
* accounting for bigalloc file systems. The set is kept in memory and
|
||||
* records all pending cluster reservations.
|
||||
*
|
||||
* Its primary function is to avoid the need to read extents from the
|
||||
* disk when invalidating pages as a result of a truncate, punch hole, or
|
||||
* collapse range operation. Page invalidation requires a decrease in the
|
||||
* reserved cluster count if it results in the removal of all delayed
|
||||
* and unwritten extents (blocks) from a cluster that is not shared with a
|
||||
* written or unwritten extent, and no decrease otherwise. Determining
|
||||
* whether the cluster is shared can be done by searching for a pending
|
||||
* reservation on it.
|
||||
*
|
||||
* Secondarily, it provides a potentially faster method for determining
|
||||
* whether the reserved cluster count should be increased when a physical
|
||||
* cluster is deallocated as a result of a truncate, punch hole, or
|
||||
* collapse range operation. The necessary information is also present
|
||||
* in the extents status tree, but might be more rapidly accessed in
|
||||
* the pending reservation set in many cases due to smaller size.
|
||||
*
|
||||
* The pending cluster reservation set is implemented as a red-black tree
|
||||
* with the goal of minimizing per page search time overhead.
|
||||
*/
|
||||
|
||||
struct pending_reservation {
|
||||
struct rb_node rb_node;
|
||||
ext4_lblk_t lclu;
|
||||
};
|
||||
|
||||
struct ext4_pending_tree {
|
||||
struct rb_root root;
|
||||
};
|
||||
|
||||
extern int __init ext4_init_es(void);
|
||||
extern void ext4_exit_es(void);
|
||||
extern void ext4_es_init_tree(struct ext4_es_tree *tree);
|
||||
@@ -90,11 +135,18 @@ extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
unsigned int status);
|
||||
extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len);
|
||||
extern void ext4_es_find_delayed_extent_range(struct inode *inode,
|
||||
ext4_lblk_t lblk, ext4_lblk_t end,
|
||||
struct extent_status *es);
|
||||
extern void ext4_es_find_extent_range(struct inode *inode,
|
||||
int (*match_fn)(struct extent_status *es),
|
||||
ext4_lblk_t lblk, ext4_lblk_t end,
|
||||
struct extent_status *es);
|
||||
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
struct extent_status *es);
|
||||
extern bool ext4_es_scan_range(struct inode *inode,
|
||||
int (*matching_fn)(struct extent_status *es),
|
||||
ext4_lblk_t lblk, ext4_lblk_t end);
|
||||
extern bool ext4_es_scan_clu(struct inode *inode,
|
||||
int (*matching_fn)(struct extent_status *es),
|
||||
ext4_lblk_t lblk);
|
||||
|
||||
static inline unsigned int ext4_es_status(struct extent_status *es)
|
||||
{
|
||||
@@ -126,6 +178,16 @@ static inline int ext4_es_is_hole(struct extent_status *es)
|
||||
return (ext4_es_type(es) & EXTENT_STATUS_HOLE) != 0;
|
||||
}
|
||||
|
||||
static inline int ext4_es_is_mapped(struct extent_status *es)
|
||||
{
|
||||
return (ext4_es_is_written(es) || ext4_es_is_unwritten(es));
|
||||
}
|
||||
|
||||
static inline int ext4_es_is_delonly(struct extent_status *es)
|
||||
{
|
||||
return (ext4_es_is_delayed(es) && !ext4_es_is_unwritten(es));
|
||||
}
|
||||
|
||||
static inline void ext4_es_set_referenced(struct extent_status *es)
|
||||
{
|
||||
es->es_pblk |= ((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT;
|
||||
@@ -175,4 +237,16 @@ extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
|
||||
|
||||
extern int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v);
|
||||
|
||||
extern int __init ext4_init_pending(void);
|
||||
extern void ext4_exit_pending(void);
|
||||
extern void ext4_init_pending_tree(struct ext4_pending_tree *tree);
|
||||
extern void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk);
|
||||
extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);
|
||||
extern int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
|
||||
bool allocated);
|
||||
extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len);
|
||||
extern void ext4_es_remove_blks(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len);
|
||||
|
||||
#endif /* _EXT4_EXTENTS_STATUS_H */
|
||||
|
@@ -863,7 +863,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
|
||||
handle_t *handle;
|
||||
struct page *page;
|
||||
struct ext4_iloc iloc;
|
||||
int retries;
|
||||
int retries = 0;
|
||||
|
||||
ret = ext4_get_inode_loc(inode, &iloc);
|
||||
if (ret)
|
||||
|
142
fs/ext4/inode.c
142
fs/ext4/inode.c
@@ -577,8 +577,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
||||
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
|
||||
if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
|
||||
!(status & EXTENT_STATUS_WRITTEN) &&
|
||||
ext4_find_delalloc_range(inode, map->m_lblk,
|
||||
map->m_lblk + map->m_len - 1))
|
||||
ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
|
||||
map->m_lblk + map->m_len - 1))
|
||||
status |= EXTENT_STATUS_DELAYED;
|
||||
ret = ext4_es_insert_extent(inode, map->m_lblk,
|
||||
map->m_len, map->m_pblk, status);
|
||||
@@ -701,8 +701,8 @@ found:
|
||||
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
|
||||
if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
|
||||
!(status & EXTENT_STATUS_WRITTEN) &&
|
||||
ext4_find_delalloc_range(inode, map->m_lblk,
|
||||
map->m_lblk + map->m_len - 1))
|
||||
ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
|
||||
map->m_lblk + map->m_len - 1))
|
||||
status |= EXTENT_STATUS_DELAYED;
|
||||
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
||||
map->m_pblk, status);
|
||||
@@ -1595,7 +1595,7 @@ static int ext4_da_reserve_space(struct inode *inode)
|
||||
return 0; /* success */
|
||||
}
|
||||
|
||||
static void ext4_da_release_space(struct inode *inode, int to_free)
|
||||
void ext4_da_release_space(struct inode *inode, int to_free)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
@@ -1634,13 +1634,11 @@ static void ext4_da_page_release_reservation(struct page *page,
|
||||
unsigned int offset,
|
||||
unsigned int length)
|
||||
{
|
||||
int to_release = 0, contiguous_blks = 0;
|
||||
int contiguous_blks = 0;
|
||||
struct buffer_head *head, *bh;
|
||||
unsigned int curr_off = 0;
|
||||
struct inode *inode = page->mapping->host;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
unsigned int stop = offset + length;
|
||||
int num_clusters;
|
||||
ext4_fsblk_t lblk;
|
||||
|
||||
BUG_ON(stop > PAGE_SIZE || stop < length);
|
||||
@@ -1654,7 +1652,6 @@ static void ext4_da_page_release_reservation(struct page *page,
|
||||
break;
|
||||
|
||||
if ((offset <= curr_off) && (buffer_delay(bh))) {
|
||||
to_release++;
|
||||
contiguous_blks++;
|
||||
clear_buffer_delay(bh);
|
||||
} else if (contiguous_blks) {
|
||||
@@ -1662,7 +1659,7 @@ static void ext4_da_page_release_reservation(struct page *page,
|
||||
(PAGE_SHIFT - inode->i_blkbits);
|
||||
lblk += (curr_off >> inode->i_blkbits) -
|
||||
contiguous_blks;
|
||||
ext4_es_remove_extent(inode, lblk, contiguous_blks);
|
||||
ext4_es_remove_blks(inode, lblk, contiguous_blks);
|
||||
contiguous_blks = 0;
|
||||
}
|
||||
curr_off = next_off;
|
||||
@@ -1671,21 +1668,9 @@ static void ext4_da_page_release_reservation(struct page *page,
|
||||
if (contiguous_blks) {
|
||||
lblk = page->index << (PAGE_SHIFT - inode->i_blkbits);
|
||||
lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
|
||||
ext4_es_remove_extent(inode, lblk, contiguous_blks);
|
||||
ext4_es_remove_blks(inode, lblk, contiguous_blks);
|
||||
}
|
||||
|
||||
/* If we have released all the blocks belonging to a cluster, then we
|
||||
* need to release the reserved space for that cluster. */
|
||||
num_clusters = EXT4_NUM_B2C(sbi, to_release);
|
||||
while (num_clusters > 0) {
|
||||
lblk = (page->index << (PAGE_SHIFT - inode->i_blkbits)) +
|
||||
((num_clusters - 1) << sbi->s_cluster_bits);
|
||||
if (sbi->s_cluster_ratio == 1 ||
|
||||
!ext4_find_delalloc_cluster(inode, lblk))
|
||||
ext4_da_release_space(inode, 1);
|
||||
|
||||
num_clusters--;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1780,6 +1765,65 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
|
||||
return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_insert_delayed_block - adds a delayed block to the extents status
|
||||
* tree, incrementing the reserved cluster/block
|
||||
* count or making a pending reservation
|
||||
* where needed
|
||||
*
|
||||
* @inode - file containing the newly added block
|
||||
* @lblk - logical block to be added
|
||||
*
|
||||
* Returns 0 on success, negative error code on failure.
|
||||
*/
|
||||
static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
int ret;
|
||||
bool allocated = false;
|
||||
|
||||
/*
|
||||
* If the cluster containing lblk is shared with a delayed,
|
||||
* written, or unwritten extent in a bigalloc file system, it's
|
||||
* already been accounted for and does not need to be reserved.
|
||||
* A pending reservation must be made for the cluster if it's
|
||||
* shared with a written or unwritten extent and doesn't already
|
||||
* have one. Written and unwritten extents can be purged from the
|
||||
* extents status tree if the system is under memory pressure, so
|
||||
* it's necessary to examine the extent tree if a search of the
|
||||
* extents status tree doesn't get a match.
|
||||
*/
|
||||
if (sbi->s_cluster_ratio == 1) {
|
||||
ret = ext4_da_reserve_space(inode);
|
||||
if (ret != 0) /* ENOSPC */
|
||||
goto errout;
|
||||
} else { /* bigalloc */
|
||||
if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {
|
||||
if (!ext4_es_scan_clu(inode,
|
||||
&ext4_es_is_mapped, lblk)) {
|
||||
ret = ext4_clu_mapped(inode,
|
||||
EXT4_B2C(sbi, lblk));
|
||||
if (ret < 0)
|
||||
goto errout;
|
||||
if (ret == 0) {
|
||||
ret = ext4_da_reserve_space(inode);
|
||||
if (ret != 0) /* ENOSPC */
|
||||
goto errout;
|
||||
} else {
|
||||
allocated = true;
|
||||
}
|
||||
} else {
|
||||
allocated = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret = ext4_es_insert_delayed_block(inode, lblk, allocated);
|
||||
|
||||
errout:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is grabs code from the very beginning of
|
||||
* ext4_map_blocks, but assumes that the caller is from delayed write
|
||||
@@ -1859,28 +1903,14 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
|
||||
add_delayed:
|
||||
if (retval == 0) {
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* XXX: __block_prepare_write() unmaps passed block,
|
||||
* is it OK?
|
||||
*/
|
||||
/*
|
||||
* If the block was allocated from previously allocated cluster,
|
||||
* then we don't need to reserve it again. However we still need
|
||||
* to reserve metadata for every block we're going to write.
|
||||
*/
|
||||
if (EXT4_SB(inode->i_sb)->s_cluster_ratio == 1 ||
|
||||
!ext4_find_delalloc_cluster(inode, map->m_lblk)) {
|
||||
ret = ext4_da_reserve_space(inode);
|
||||
if (ret) {
|
||||
/* not enough space to reserve */
|
||||
retval = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
||||
~0, EXTENT_STATUS_DELAYED);
|
||||
if (ret) {
|
||||
ret = ext4_insert_delayed_block(inode, map->m_lblk);
|
||||
if (ret != 0) {
|
||||
retval = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
@@ -3450,7 +3480,8 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
ext4_lblk_t end = map.m_lblk + map.m_len - 1;
|
||||
struct extent_status es;
|
||||
|
||||
ext4_es_find_delayed_extent_range(inode, map.m_lblk, end, &es);
|
||||
ext4_es_find_extent_range(inode, &ext4_es_is_delayed,
|
||||
map.m_lblk, end, &es);
|
||||
|
||||
if (!es.es_len || es.es_lblk > end) {
|
||||
/* entire range is a hole */
|
||||
@@ -6153,13 +6184,14 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
|
||||
return !buffer_mapped(bh);
|
||||
}
|
||||
|
||||
int ext4_page_mkwrite(struct vm_fault *vmf)
|
||||
vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
struct page *page = vmf->page;
|
||||
loff_t size;
|
||||
unsigned long len;
|
||||
int ret;
|
||||
int err;
|
||||
vm_fault_t ret;
|
||||
struct file *file = vma->vm_file;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
@@ -6172,8 +6204,8 @@ int ext4_page_mkwrite(struct vm_fault *vmf)
|
||||
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
|
||||
ret = ext4_convert_inline_data(inode);
|
||||
if (ret)
|
||||
err = ext4_convert_inline_data(inode);
|
||||
if (err)
|
||||
goto out_ret;
|
||||
|
||||
/* Delalloc case is easy... */
|
||||
@@ -6181,9 +6213,9 @@ int ext4_page_mkwrite(struct vm_fault *vmf)
|
||||
!ext4_should_journal_data(inode) &&
|
||||
!ext4_nonda_switch(inode->i_sb)) {
|
||||
do {
|
||||
ret = block_page_mkwrite(vma, vmf,
|
||||
err = block_page_mkwrite(vma, vmf,
|
||||
ext4_da_get_block_prep);
|
||||
} while (ret == -ENOSPC &&
|
||||
} while (err == -ENOSPC &&
|
||||
ext4_should_retry_alloc(inode->i_sb, &retries));
|
||||
goto out_ret;
|
||||
}
|
||||
@@ -6228,8 +6260,8 @@ retry_alloc:
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
goto out;
|
||||
}
|
||||
ret = block_page_mkwrite(vma, vmf, get_block);
|
||||
if (!ret && ext4_should_journal_data(inode)) {
|
||||
err = block_page_mkwrite(vma, vmf, get_block);
|
||||
if (!err && ext4_should_journal_data(inode)) {
|
||||
if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
|
||||
PAGE_SIZE, NULL, do_journal_get_write_access)) {
|
||||
unlock_page(page);
|
||||
@@ -6240,24 +6272,24 @@ retry_alloc:
|
||||
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
|
||||
}
|
||||
ext4_journal_stop(handle);
|
||||
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
|
||||
if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
|
||||
goto retry_alloc;
|
||||
out_ret:
|
||||
ret = block_page_mkwrite_return(ret);
|
||||
ret = block_page_mkwrite_return(err);
|
||||
out:
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ext4_filemap_fault(struct vm_fault *vmf)
|
||||
vm_fault_t ext4_filemap_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct inode *inode = file_inode(vmf->vma->vm_file);
|
||||
int err;
|
||||
vm_fault_t ret;
|
||||
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
err = filemap_fault(vmf);
|
||||
ret = filemap_fault(vmf);
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
@@ -67,7 +67,6 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
|
||||
ei1 = EXT4_I(inode1);
|
||||
ei2 = EXT4_I(inode2);
|
||||
|
||||
swap(inode1->i_flags, inode2->i_flags);
|
||||
swap(inode1->i_version, inode2->i_version);
|
||||
swap(inode1->i_blocks, inode2->i_blocks);
|
||||
swap(inode1->i_bytes, inode2->i_bytes);
|
||||
@@ -85,6 +84,21 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
|
||||
i_size_write(inode2, isize);
|
||||
}
|
||||
|
||||
static void reset_inode_seed(struct inode *inode)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
__le32 inum = cpu_to_le32(inode->i_ino);
|
||||
__le32 gen = cpu_to_le32(inode->i_generation);
|
||||
__u32 csum;
|
||||
|
||||
if (!ext4_has_metadata_csum(inode->i_sb))
|
||||
return;
|
||||
|
||||
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, sizeof(inum));
|
||||
ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, sizeof(gen));
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap the information from the given @inode and the inode
|
||||
* EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other
|
||||
@@ -102,10 +116,13 @@ static long swap_inode_boot_loader(struct super_block *sb,
|
||||
struct inode *inode_bl;
|
||||
struct ext4_inode_info *ei_bl;
|
||||
|
||||
if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode))
|
||||
if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode) ||
|
||||
IS_SWAPFILE(inode) || IS_ENCRYPTED(inode) ||
|
||||
ext4_has_inline_data(inode))
|
||||
return -EINVAL;
|
||||
|
||||
if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN))
|
||||
if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) ||
|
||||
!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO);
|
||||
@@ -120,13 +137,13 @@ static long swap_inode_boot_loader(struct super_block *sb,
|
||||
* that only 1 swap_inode_boot_loader is running. */
|
||||
lock_two_nondirectories(inode, inode_bl);
|
||||
|
||||
truncate_inode_pages(&inode->i_data, 0);
|
||||
truncate_inode_pages(&inode_bl->i_data, 0);
|
||||
|
||||
/* Wait for all existing dio workers */
|
||||
inode_dio_wait(inode);
|
||||
inode_dio_wait(inode_bl);
|
||||
|
||||
truncate_inode_pages(&inode->i_data, 0);
|
||||
truncate_inode_pages(&inode_bl->i_data, 0);
|
||||
|
||||
handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2);
|
||||
if (IS_ERR(handle)) {
|
||||
err = -EINVAL;
|
||||
@@ -159,6 +176,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
|
||||
|
||||
inode->i_generation = prandom_u32();
|
||||
inode_bl->i_generation = prandom_u32();
|
||||
reset_inode_seed(inode);
|
||||
reset_inode_seed(inode_bl);
|
||||
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
@@ -169,6 +188,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
|
||||
inode->i_ino, err);
|
||||
/* Revert all changes: */
|
||||
swap_inode_data(inode, inode_bl);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
} else {
|
||||
err = ext4_mark_inode_dirty(handle, inode_bl);
|
||||
if (err < 0) {
|
||||
@@ -178,6 +198,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
|
||||
/* Revert all changes: */
|
||||
swap_inode_data(inode, inode_bl);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
ext4_mark_inode_dirty(handle, inode_bl);
|
||||
}
|
||||
}
|
||||
ext4_journal_stop(handle);
|
||||
@@ -339,19 +360,14 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
|
||||
if (projid_eq(kprojid, EXT4_I(inode)->i_projid))
|
||||
return 0;
|
||||
|
||||
err = mnt_want_write_file(filp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = -EPERM;
|
||||
inode_lock(inode);
|
||||
/* Is it quota file? Do not allow user to mess with it */
|
||||
if (ext4_is_quota_file(inode))
|
||||
goto out_unlock;
|
||||
return err;
|
||||
|
||||
err = ext4_get_inode_loc(inode, &iloc);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
return err;
|
||||
|
||||
raw_inode = ext4_raw_inode(&iloc);
|
||||
if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) {
|
||||
@@ -359,20 +375,20 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
|
||||
EXT4_SB(sb)->s_want_extra_isize,
|
||||
&iloc);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
return err;
|
||||
} else {
|
||||
brelse(iloc.bh);
|
||||
}
|
||||
|
||||
dquot_initialize(inode);
|
||||
err = dquot_initialize(inode);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
|
||||
EXT4_QUOTA_INIT_BLOCKS(sb) +
|
||||
EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
|
||||
if (IS_ERR(handle)) {
|
||||
err = PTR_ERR(handle);
|
||||
goto out_unlock;
|
||||
}
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
|
||||
err = ext4_reserve_inode_write(handle, inode, &iloc);
|
||||
if (err)
|
||||
@@ -400,9 +416,6 @@ out_dirty:
|
||||
err = rc;
|
||||
out_stop:
|
||||
ext4_journal_stop(handle);
|
||||
out_unlock:
|
||||
inode_unlock(inode);
|
||||
mnt_drop_write_file(filp);
|
||||
return err;
|
||||
}
|
||||
#else
|
||||
@@ -626,6 +639,30 @@ group_add_out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ext4_ioctl_check_project(struct inode *inode, struct fsxattr *fa)
|
||||
{
|
||||
/*
|
||||
* Project Quota ID state is only allowed to change from within the init
|
||||
* namespace. Enforce that restriction only if we are trying to change
|
||||
* the quota ID state. Everything else is allowed in user namespaces.
|
||||
*/
|
||||
if (current_user_ns() == &init_user_ns)
|
||||
return 0;
|
||||
|
||||
if (__kprojid_val(EXT4_I(inode)->i_projid) != fa->fsx_projid)
|
||||
return -EINVAL;
|
||||
|
||||
if (ext4_test_inode_flag(inode, EXT4_INODE_PROJINHERIT)) {
|
||||
if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT))
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file_inode(filp);
|
||||
@@ -1025,19 +1062,19 @@ resizefs_out:
|
||||
return err;
|
||||
|
||||
inode_lock(inode);
|
||||
err = ext4_ioctl_check_project(inode, &fa);
|
||||
if (err)
|
||||
goto out;
|
||||
flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
|
||||
(flags & EXT4_FL_XFLAG_VISIBLE);
|
||||
err = ext4_ioctl_setflags(inode, flags);
|
||||
if (err)
|
||||
goto out;
|
||||
err = ext4_ioctl_setproject(filp, fa.fsx_projid);
|
||||
out:
|
||||
inode_unlock(inode);
|
||||
mnt_drop_write_file(filp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = ext4_ioctl_setproject(filp, fa.fsx_projid);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
return err;
|
||||
}
|
||||
case EXT4_IOC_SHUTDOWN:
|
||||
return ext4_shutdown(sb, arg);
|
||||
|
@@ -4915,9 +4915,17 @@ do_more:
|
||||
&sbi->s_flex_groups[flex_group].free_clusters);
|
||||
}
|
||||
|
||||
if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
|
||||
dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
|
||||
percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
|
||||
/*
|
||||
* on a bigalloc file system, defer the s_freeclusters_counter
|
||||
* update to the caller (ext4_remove_space and friends) so they
|
||||
* can determine if a cluster freed here should be rereserved
|
||||
*/
|
||||
if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) {
|
||||
if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
|
||||
dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
|
||||
percpu_counter_add(&sbi->s_freeclusters_counter,
|
||||
count_clusters);
|
||||
}
|
||||
|
||||
ext4_mb_unload_buddy(&e4b);
|
||||
|
||||
|
@@ -516,9 +516,13 @@ mext_check_arguments(struct inode *orig_inode,
|
||||
orig_inode->i_ino, donor_inode->i_ino);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (orig_eof < orig_start + *len - 1)
|
||||
if (orig_eof <= orig_start)
|
||||
*len = 0;
|
||||
else if (orig_eof < orig_start + *len - 1)
|
||||
*len = orig_eof - orig_start;
|
||||
if (donor_eof < donor_start + *len - 1)
|
||||
if (donor_eof <= donor_start)
|
||||
*len = 0;
|
||||
else if (donor_eof < donor_start + *len - 1)
|
||||
*len = donor_eof - donor_start;
|
||||
if (!*len) {
|
||||
ext4_debug("ext4 move extent: len should not be 0 "
|
||||
|
@@ -2261,7 +2261,7 @@ again:
|
||||
dxroot->info.indirect_levels += 1;
|
||||
dxtrace(printk(KERN_DEBUG
|
||||
"Creating %d level index...\n",
|
||||
info->indirect_levels));
|
||||
dxroot->info.indirect_levels));
|
||||
err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
|
||||
if (err)
|
||||
goto journal_error;
|
||||
|
@@ -914,6 +914,18 @@ static inline void ext4_quota_off_umount(struct super_block *sb)
|
||||
for (type = 0; type < EXT4_MAXQUOTAS; type++)
|
||||
ext4_quota_off(sb, type);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a helper function which is used in the mount/remount
|
||||
* codepaths (which holds s_umount) to fetch the quota file name.
|
||||
*/
|
||||
static inline char *get_qf_name(struct super_block *sb,
|
||||
struct ext4_sb_info *sbi,
|
||||
int type)
|
||||
{
|
||||
return rcu_dereference_protected(sbi->s_qf_names[type],
|
||||
lockdep_is_held(&sb->s_umount));
|
||||
}
|
||||
#else
|
||||
static inline void ext4_quota_off_umount(struct super_block *sb)
|
||||
{
|
||||
@@ -965,7 +977,7 @@ static void ext4_put_super(struct super_block *sb)
|
||||
percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
|
||||
#ifdef CONFIG_QUOTA
|
||||
for (i = 0; i < EXT4_MAXQUOTAS; i++)
|
||||
kfree(sbi->s_qf_names[i]);
|
||||
kfree(get_qf_name(sb, sbi, i));
|
||||
#endif
|
||||
|
||||
/* Debugging code just in case the in-memory inode orphan list
|
||||
@@ -1040,6 +1052,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
|
||||
ei->i_da_metadata_calc_len = 0;
|
||||
ei->i_da_metadata_calc_last_lblock = 0;
|
||||
spin_lock_init(&(ei->i_block_reservation_lock));
|
||||
ext4_init_pending_tree(&ei->i_pending_tree);
|
||||
#ifdef CONFIG_QUOTA
|
||||
ei->i_reserved_quota = 0;
|
||||
memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
|
||||
@@ -1530,11 +1543,10 @@ static const char deprecated_msg[] =
|
||||
static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
char *qname;
|
||||
char *qname, *old_qname = get_qf_name(sb, sbi, qtype);
|
||||
int ret = -1;
|
||||
|
||||
if (sb_any_quota_loaded(sb) &&
|
||||
!sbi->s_qf_names[qtype]) {
|
||||
if (sb_any_quota_loaded(sb) && !old_qname) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"Cannot change journaled "
|
||||
"quota options when quota turned on");
|
||||
@@ -1551,8 +1563,8 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
|
||||
"Not enough memory for storing quotafile name");
|
||||
return -1;
|
||||
}
|
||||
if (sbi->s_qf_names[qtype]) {
|
||||
if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
|
||||
if (old_qname) {
|
||||
if (strcmp(old_qname, qname) == 0)
|
||||
ret = 1;
|
||||
else
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
@@ -1565,7 +1577,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
|
||||
"quotafile must be on filesystem root");
|
||||
goto errout;
|
||||
}
|
||||
sbi->s_qf_names[qtype] = qname;
|
||||
rcu_assign_pointer(sbi->s_qf_names[qtype], qname);
|
||||
set_opt(sb, QUOTA);
|
||||
return 1;
|
||||
errout:
|
||||
@@ -1577,15 +1589,16 @@ static int clear_qf_name(struct super_block *sb, int qtype)
|
||||
{
|
||||
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
char *old_qname = get_qf_name(sb, sbi, qtype);
|
||||
|
||||
if (sb_any_quota_loaded(sb) &&
|
||||
sbi->s_qf_names[qtype]) {
|
||||
if (sb_any_quota_loaded(sb) && old_qname) {
|
||||
ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
|
||||
" when quota turned on");
|
||||
return -1;
|
||||
}
|
||||
kfree(sbi->s_qf_names[qtype]);
|
||||
sbi->s_qf_names[qtype] = NULL;
|
||||
rcu_assign_pointer(sbi->s_qf_names[qtype], NULL);
|
||||
synchronize_rcu();
|
||||
kfree(old_qname);
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
@@ -1960,7 +1973,7 @@ static int parse_options(char *options, struct super_block *sb,
|
||||
int is_remount)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
char *p;
|
||||
char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name;
|
||||
substring_t args[MAX_OPT_ARGS];
|
||||
int token;
|
||||
|
||||
@@ -1991,11 +2004,13 @@ static int parse_options(char *options, struct super_block *sb,
|
||||
"Cannot enable project quota enforcement.");
|
||||
return 0;
|
||||
}
|
||||
if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
|
||||
if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
|
||||
usr_qf_name = get_qf_name(sb, sbi, USRQUOTA);
|
||||
grp_qf_name = get_qf_name(sb, sbi, GRPQUOTA);
|
||||
if (usr_qf_name || grp_qf_name) {
|
||||
if (test_opt(sb, USRQUOTA) && usr_qf_name)
|
||||
clear_opt(sb, USRQUOTA);
|
||||
|
||||
if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
|
||||
if (test_opt(sb, GRPQUOTA) && grp_qf_name)
|
||||
clear_opt(sb, GRPQUOTA);
|
||||
|
||||
if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
|
||||
@@ -2029,6 +2044,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
|
||||
{
|
||||
#if defined(CONFIG_QUOTA)
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
char *usr_qf_name, *grp_qf_name;
|
||||
|
||||
if (sbi->s_jquota_fmt) {
|
||||
char *fmtname = "";
|
||||
@@ -2047,11 +2063,14 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
|
||||
seq_printf(seq, ",jqfmt=%s", fmtname);
|
||||
}
|
||||
|
||||
if (sbi->s_qf_names[USRQUOTA])
|
||||
seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]);
|
||||
|
||||
if (sbi->s_qf_names[GRPQUOTA])
|
||||
seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]);
|
||||
rcu_read_lock();
|
||||
usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
|
||||
grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
|
||||
if (usr_qf_name)
|
||||
seq_show_option(seq, "usrjquota", usr_qf_name);
|
||||
if (grp_qf_name)
|
||||
seq_show_option(seq, "grpjquota", grp_qf_name);
|
||||
rcu_read_unlock();
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -5103,6 +5122,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
||||
int err = 0;
|
||||
#ifdef CONFIG_QUOTA
|
||||
int i, j;
|
||||
char *to_free[EXT4_MAXQUOTAS];
|
||||
#endif
|
||||
char *orig_data = kstrdup(data, GFP_KERNEL);
|
||||
|
||||
@@ -5122,8 +5142,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
||||
old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
|
||||
for (i = 0; i < EXT4_MAXQUOTAS; i++)
|
||||
if (sbi->s_qf_names[i]) {
|
||||
old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
|
||||
GFP_KERNEL);
|
||||
char *qf_name = get_qf_name(sb, sbi, i);
|
||||
|
||||
old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
|
||||
if (!old_opts.s_qf_names[i]) {
|
||||
for (j = 0; j < i; j++)
|
||||
kfree(old_opts.s_qf_names[j]);
|
||||
@@ -5352,9 +5373,12 @@ restore_opts:
|
||||
#ifdef CONFIG_QUOTA
|
||||
sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
|
||||
for (i = 0; i < EXT4_MAXQUOTAS; i++) {
|
||||
kfree(sbi->s_qf_names[i]);
|
||||
sbi->s_qf_names[i] = old_opts.s_qf_names[i];
|
||||
to_free[i] = get_qf_name(sb, sbi, i);
|
||||
rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
|
||||
}
|
||||
synchronize_rcu();
|
||||
for (i = 0; i < EXT4_MAXQUOTAS; i++)
|
||||
kfree(to_free[i]);
|
||||
#endif
|
||||
kfree(orig_data);
|
||||
return err;
|
||||
@@ -5545,7 +5569,7 @@ static int ext4_write_info(struct super_block *sb, int type)
|
||||
*/
|
||||
static int ext4_quota_on_mount(struct super_block *sb, int type)
|
||||
{
|
||||
return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
|
||||
return dquot_quota_on_mount(sb, get_qf_name(sb, EXT4_SB(sb), type),
|
||||
EXT4_SB(sb)->s_jquota_fmt, type);
|
||||
}
|
||||
|
||||
@@ -5954,6 +5978,10 @@ static int __init ext4_init_fs(void)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = ext4_init_pending();
|
||||
if (err)
|
||||
goto out6;
|
||||
|
||||
err = ext4_init_pageio();
|
||||
if (err)
|
||||
goto out5;
|
||||
@@ -5992,6 +6020,8 @@ out3:
|
||||
out4:
|
||||
ext4_exit_pageio();
|
||||
out5:
|
||||
ext4_exit_pending();
|
||||
out6:
|
||||
ext4_exit_es();
|
||||
|
||||
return err;
|
||||
@@ -6009,6 +6039,7 @@ static void __exit ext4_exit_fs(void)
|
||||
ext4_exit_system_zone();
|
||||
ext4_exit_pageio();
|
||||
ext4_exit_es();
|
||||
ext4_exit_pending();
|
||||
}
|
||||
|
||||
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
|
||||
|
Reference in New Issue
Block a user