Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "This merge window, we've added some performance improvements in how we
  handle inode locking in the read/write paths, and improving the
  performance of Direct I/O overwrites.

  We also now record the error code which caused the first and most
  recent ext4_error() report in the superblock, to make it easier to
  root cause problems in production systems.

  There are also many of the usual cleanups and miscellaneous bug fixes"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (49 commits)
  jbd2: clean __jbd2_journal_abort_hard() and __journal_abort_soft()
  jbd2: make sure ESHUTDOWN to be recorded in the journal superblock
  ext4, jbd2: ensure panic when aborting with zero errno
  jbd2: switch to use jbd2_journal_abort() when failed to submit the commit record
  jbd2_seq_info_next should increase position index
  jbd2: remove pointless assertion in __journal_remove_journal_head
  ext4,jbd2: fix comment and code style
  jbd2: delete the duplicated words in the comments
  ext4: fix extent_status trace points
  ext4: fix symbolic enum printing in trace output
  ext4: choose hardlimit when softlimit is larger than hardlimit in ext4_statfs_project()
  ext4: fix race conditions in ->d_compare() and ->d_hash()
  ext4: make dioread_nolock the default
  ext4: fix extent_status fragmentation for plain files
  jbd2: clear JBD2_ABORT flag before journal_reset to update log tail info when load journal
  ext4: drop ext4_kvmalloc()
  ext4: Add EXT4_IOC_FSGETXATTR/EXT4_IOC_FSSETXATTR to compat_ioctl
  ext4: remove unused macro MPAGE_DA_EXTENT_TAIL
  ext4: add missing braces in ext4_ext_drop_refs()
  ext4: fix some nonstandard indentation in extents.c
  ...
This commit is contained in:
Linus Torvalds
2020-01-30 15:17:05 -08:00
32 changed files with 709 additions and 432 deletions

View File

@@ -92,6 +92,8 @@ Currently Available
* efficient new ordered mode in JBD2 and ext4 (avoid using buffer head to force * efficient new ordered mode in JBD2 and ext4 (avoid using buffer head to force
the ordering) the ordering)
* Case-insensitive file name lookups * Case-insensitive file name lookups
* file-based encryption support (fscrypt)
* file-based verity support (fsverity)
[1] Filesystems with a block size of 1k may see a limit imposed by the [1] Filesystems with a block size of 1k may see a limit imposed by the
directory hash tree having a maximum depth of two. directory hash tree having a maximum depth of two.

View File

@@ -1016,9 +1016,9 @@ astute users may notice some differences in behavior:
- Direct I/O is not supported on encrypted files. Attempts to use - Direct I/O is not supported on encrypted files. Attempts to use
direct I/O on such files will fall back to buffered I/O. direct I/O on such files will fall back to buffered I/O.
- The fallocate operations FALLOC_FL_COLLAPSE_RANGE, - The fallocate operations FALLOC_FL_COLLAPSE_RANGE and
FALLOC_FL_INSERT_RANGE, and FALLOC_FL_ZERO_RANGE are not supported FALLOC_FL_INSERT_RANGE are not supported on encrypted files and will
on encrypted files and will fail with EOPNOTSUPP. fail with EOPNOTSUPP.
- Online defragmentation of encrypted files is not supported. The - Online defragmentation of encrypted files is not supported. The
EXT4_IOC_MOVE_EXT and F2FS_IOC_MOVE_RANGE ioctls will fail with EXT4_IOC_MOVE_EXT and F2FS_IOC_MOVE_RANGE ioctls will fail with

View File

@@ -4,12 +4,7 @@
# kernels after the removal of ext3 driver. # kernels after the removal of ext3 driver.
config EXT3_FS config EXT3_FS
tristate "The Extended 3 (ext3) filesystem" tristate "The Extended 3 (ext3) filesystem"
# These must match EXT4_FS selects...
select EXT4_FS select EXT4_FS
select JBD2
select CRC16
select CRYPTO
select CRYPTO_CRC32C
help help
This config option is here only for backward compatibility. ext3 This config option is here only for backward compatibility. ext3
filesystem is now handled by the ext4 driver. filesystem is now handled by the ext4 driver.
@@ -33,7 +28,6 @@ config EXT3_FS_SECURITY
config EXT4_FS config EXT4_FS
tristate "The Extended 4 (ext4) filesystem" tristate "The Extended 4 (ext4) filesystem"
# Please update EXT3_FS selects when changing these
select JBD2 select JBD2
select CRC16 select CRC16
select CRYPTO select CRYPTO

View File

@@ -371,7 +371,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
if (buffer_verified(bh)) if (buffer_verified(bh))
goto verified; goto verified;
if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
desc, bh))) { desc, bh) ||
ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_CRC))) {
ext4_unlock_group(sb, block_group); ext4_unlock_group(sb, block_group);
ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
ext4_mark_group_bitmap_corrupted(sb, block_group, ext4_mark_group_bitmap_corrupted(sb, block_group,
@@ -505,7 +506,9 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
if (!desc) if (!desc)
return -EFSCORRUPTED; return -EFSCORRUPTED;
wait_on_buffer(bh); wait_on_buffer(bh);
ext4_simulate_fail_bh(sb, bh, EXT4_SIM_BBITMAP_EIO);
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
ext4_set_errno(sb, EIO);
ext4_error(sb, "Cannot read block bitmap - " ext4_error(sb, "Cannot read block bitmap - "
"block_group = %u, block_bitmap = %llu", "block_group = %u, block_bitmap = %llu",
block_group, (unsigned long long) bh->b_blocknr); block_group, (unsigned long long) bh->b_blocknr);

View File

@@ -462,7 +462,6 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
new_fn->name_len = ent_name->len; new_fn->name_len = ent_name->len;
new_fn->file_type = dirent->file_type; new_fn->file_type = dirent->file_type;
memcpy(new_fn->name, ent_name->name, ent_name->len); memcpy(new_fn->name, ent_name->name, ent_name->len);
new_fn->name[ent_name->len] = 0;
while (*p) { while (*p) {
parent = *p; parent = *p;
@@ -672,9 +671,11 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name) const char *str, const struct qstr *name)
{ {
struct qstr qstr = {.name = str, .len = len }; struct qstr qstr = {.name = str, .len = len };
struct inode *inode = dentry->d_parent->d_inode; const struct dentry *parent = READ_ONCE(dentry->d_parent);
const struct inode *inode = READ_ONCE(parent->d_inode);
if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) { if (!inode || !IS_CASEFOLDED(inode) ||
!EXT4_SB(inode->i_sb)->s_encoding) {
if (len != name->len) if (len != name->len)
return -1; return -1;
return memcmp(str, name->name, len); return memcmp(str, name->name, len);
@@ -687,10 +688,11 @@ static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
{ {
const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb); const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
const struct unicode_map *um = sbi->s_encoding; const struct unicode_map *um = sbi->s_encoding;
const struct inode *inode = READ_ONCE(dentry->d_inode);
unsigned char *norm; unsigned char *norm;
int len, ret = 0; int len, ret = 0;
if (!IS_CASEFOLDED(dentry->d_inode) || !um) if (!inode || !IS_CASEFOLDED(inode) || !um)
return 0; return 0;
norm = kmalloc(PATH_MAX, GFP_ATOMIC); norm = kmalloc(PATH_MAX, GFP_ATOMIC);

View File

@@ -1052,8 +1052,6 @@ struct ext4_inode_info {
/* allocation reservation info for delalloc */ /* allocation reservation info for delalloc */
/* In case of bigalloc, this refer to clusters rather than blocks */ /* In case of bigalloc, this refer to clusters rather than blocks */
unsigned int i_reserved_data_blocks; unsigned int i_reserved_data_blocks;
ext4_lblk_t i_da_metadata_calc_last_lblock;
int i_da_metadata_calc_len;
/* pending cluster reservations for bigalloc file systems */ /* pending cluster reservations for bigalloc file systems */
struct ext4_pending_tree i_pending_tree; struct ext4_pending_tree i_pending_tree;
@@ -1343,7 +1341,8 @@ struct ext4_super_block {
__u8 s_lastcheck_hi; __u8 s_lastcheck_hi;
__u8 s_first_error_time_hi; __u8 s_first_error_time_hi;
__u8 s_last_error_time_hi; __u8 s_last_error_time_hi;
__u8 s_pad[2]; __u8 s_first_error_errcode;
__u8 s_last_error_errcode;
__le16 s_encoding; /* Filename charset encoding */ __le16 s_encoding; /* Filename charset encoding */
__le16 s_encoding_flags; /* Filename charset encoding flags */ __le16 s_encoding_flags; /* Filename charset encoding flags */
__le32 s_reserved[95]; /* Padding to the end of the block */ __le32 s_reserved[95]; /* Padding to the end of the block */
@@ -1556,6 +1555,9 @@ struct ext4_sb_info {
/* Barrier between changing inodes' journal flags and writepages ops. */ /* Barrier between changing inodes' journal flags and writepages ops. */
struct percpu_rw_semaphore s_journal_flag_rwsem; struct percpu_rw_semaphore s_journal_flag_rwsem;
struct dax_device *s_daxdev; struct dax_device *s_daxdev;
#ifdef CONFIG_EXT4_DEBUG
unsigned long s_simulate_fail;
#endif
}; };
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1574,6 +1576,66 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
} }
/*
* Simulate_fail codes
*/
#define EXT4_SIM_BBITMAP_EIO 1
#define EXT4_SIM_BBITMAP_CRC 2
#define EXT4_SIM_IBITMAP_EIO 3
#define EXT4_SIM_IBITMAP_CRC 4
#define EXT4_SIM_INODE_EIO 5
#define EXT4_SIM_INODE_CRC 6
#define EXT4_SIM_DIRBLOCK_EIO 7
#define EXT4_SIM_DIRBLOCK_CRC 8
static inline bool ext4_simulate_fail(struct super_block *sb,
unsigned long code)
{
#ifdef CONFIG_EXT4_DEBUG
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (unlikely(sbi->s_simulate_fail == code)) {
sbi->s_simulate_fail = 0;
return true;
}
#endif
return false;
}
static inline void ext4_simulate_fail_bh(struct super_block *sb,
struct buffer_head *bh,
unsigned long code)
{
if (!IS_ERR(bh) && ext4_simulate_fail(sb, code))
clear_buffer_uptodate(bh);
}
/*
* Error number codes for s_{first,last}_error_errno
*
* Linux errno numbers are architecture specific, so we need to translate
* them into something which is architecture independent. We don't define
* codes for all errno's; just the ones which are most likely to be the cause
* of an ext4_error() call.
*/
#define EXT4_ERR_UNKNOWN 1
#define EXT4_ERR_EIO 2
#define EXT4_ERR_ENOMEM 3
#define EXT4_ERR_EFSBADCRC 4
#define EXT4_ERR_EFSCORRUPTED 5
#define EXT4_ERR_ENOSPC 6
#define EXT4_ERR_ENOKEY 7
#define EXT4_ERR_EROFS 8
#define EXT4_ERR_EFBIG 9
#define EXT4_ERR_EEXIST 10
#define EXT4_ERR_ERANGE 11
#define EXT4_ERR_EOVERFLOW 12
#define EXT4_ERR_EBUSY 13
#define EXT4_ERR_ENOTDIR 14
#define EXT4_ERR_ENOTEMPTY 15
#define EXT4_ERR_ESHUTDOWN 16
#define EXT4_ERR_EFAULT 17
/* /*
* Inode dynamic state flags * Inode dynamic state flags
*/ */
@@ -2628,7 +2690,6 @@ extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
/* indirect.c */ /* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags); struct ext4_map_blocks *map, int flags);
extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
extern void ext4_ind_truncate(handle_t *, struct inode *inode); extern void ext4_ind_truncate(handle_t *, struct inode *inode);
extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode, extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
@@ -2679,8 +2740,6 @@ extern struct buffer_head *ext4_sb_bread(struct super_block *sb,
extern int ext4_seq_options_show(struct seq_file *seq, void *offset); extern int ext4_seq_options_show(struct seq_file *seq, void *offset);
extern int ext4_calculate_overhead(struct super_block *sb); extern int ext4_calculate_overhead(struct super_block *sb);
extern void ext4_superblock_csum_set(struct super_block *sb); extern void ext4_superblock_csum_set(struct super_block *sb);
extern void *ext4_kvmalloc(size_t size, gfp_t flags);
extern void *ext4_kvzalloc(size_t size, gfp_t flags);
extern int ext4_alloc_flex_bg_array(struct super_block *sb, extern int ext4_alloc_flex_bg_array(struct super_block *sb,
ext4_group_t ngroup); ext4_group_t ngroup);
extern const char *ext4_decode_error(struct super_block *sb, int errno, extern const char *ext4_decode_error(struct super_block *sb, int errno,
@@ -2688,6 +2747,7 @@ extern const char *ext4_decode_error(struct super_block *sb, int errno,
extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb, extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
ext4_group_t block_group, ext4_group_t block_group,
unsigned int flags); unsigned int flags);
extern void ext4_set_errno(struct super_block *sb, int err);
extern __printf(4, 5) extern __printf(4, 5)
void __ext4_error(struct super_block *, const char *, unsigned int, void __ext4_error(struct super_block *, const char *, unsigned int,
@@ -3254,7 +3314,6 @@ struct ext4_extent;
#define EXT_MAX_BLOCKS 0xffffffff #define EXT_MAX_BLOCKS 0xffffffff
extern int ext4_ext_tree_init(handle_t *handle, struct inode *); extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents);
extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags); struct ext4_map_blocks *map, int flags);
@@ -3271,14 +3330,9 @@ extern int ext4_convert_unwritten_io_end_vec(handle_t *handle,
ext4_io_end_t *io_end); ext4_io_end_t *io_end);
extern int ext4_map_blocks(handle_t *handle, struct inode *inode, extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags); struct ext4_map_blocks *map, int flags);
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
ext4_lblk_t lblocks);
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
int num, int num,
struct ext4_ext_path *path); struct ext4_ext_path *path);
extern int ext4_can_extents_be_merged(struct inode *inode,
struct ext4_extent *ex1,
struct ext4_extent *ex2);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, extern int ext4_ext_insert_extent(handle_t *, struct inode *,
struct ext4_ext_path **, struct ext4_ext_path **,
struct ext4_extent *, int); struct ext4_extent *, int);
@@ -3294,8 +3348,6 @@ extern int ext4_get_es_cache(struct inode *inode,
struct fiemap_extent_info *fieinfo, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len); __u64 start, __u64 len);
extern int ext4_ext_precache(struct inode *inode); extern int ext4_ext_precache(struct inode *inode);
extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
extern int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
extern int ext4_swap_extents(handle_t *handle, struct inode *inode1, extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
struct inode *inode2, ext4_lblk_t lblk1, struct inode *inode2, ext4_lblk_t lblk1,
ext4_lblk_t lblk2, ext4_lblk_t count, ext4_lblk_t lblk2, ext4_lblk_t count,
@@ -3390,6 +3442,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
} }
extern const struct iomap_ops ext4_iomap_ops; extern const struct iomap_ops ext4_iomap_ops;
extern const struct iomap_ops ext4_iomap_overwrite_ops;
extern const struct iomap_ops ext4_iomap_report_ops; extern const struct iomap_ops ext4_iomap_report_ops;
static inline int ext4_buffer_uptodate(struct buffer_head *bh) static inline int ext4_buffer_uptodate(struct buffer_head *bh)

View File

@@ -267,10 +267,5 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
0xffff); 0xffff);
} }
#define ext4_ext_dirty(handle, inode, path) \
__ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
struct inode *inode, struct ext4_ext_path *path);
#endif /* _EXT4_EXTENTS */ #endif /* _EXT4_EXTENTS */

View File

@@ -7,6 +7,28 @@
#include <trace/events/ext4.h> #include <trace/events/ext4.h>
int ext4_inode_journal_mode(struct inode *inode)
{
if (EXT4_JOURNAL(inode) == NULL)
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
/* We do not support data journalling with delayed allocation */
if (!S_ISREG(inode->i_mode) ||
ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE) ||
test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
(ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
!test_opt(inode->i_sb, DELALLOC))) {
/* We do not support data journalling for encrypted data */
if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode))
return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
}
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
BUG();
}
/* Just increment the non-pointer handle value */ /* Just increment the non-pointer handle value */
static handle_t *ext4_get_nojournal(void) static handle_t *ext4_get_nojournal(void)
{ {
@@ -58,6 +80,7 @@ static int ext4_journal_check_start(struct super_block *sb)
* take the FS itself readonly cleanly. * take the FS itself readonly cleanly.
*/ */
if (journal && is_journal_aborted(journal)) { if (journal && is_journal_aborted(journal)) {
ext4_set_errno(sb, -journal->j_errno);
ext4_abort(sb, "Detected aborted journal"); ext4_abort(sb, "Detected aborted journal");
return -EROFS; return -EROFS;
} }
@@ -249,6 +272,7 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
if (err) { if (err) {
ext4_journal_abort_handle(where, line, __func__, ext4_journal_abort_handle(where, line, __func__,
bh, handle, err); bh, handle, err);
ext4_set_errno(inode->i_sb, -err);
__ext4_abort(inode->i_sb, where, line, __ext4_abort(inode->i_sb, where, line,
"error %d when attempting revoke", err); "error %d when attempting revoke", err);
} }
@@ -320,6 +344,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
es = EXT4_SB(inode->i_sb)->s_es; es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_block = es->s_last_error_block =
cpu_to_le64(bh->b_blocknr); cpu_to_le64(bh->b_blocknr);
ext4_set_errno(inode->i_sb, EIO);
ext4_error_inode(inode, where, line, ext4_error_inode(inode, where, line,
bh->b_blocknr, bh->b_blocknr,
"IO error syncing itable block"); "IO error syncing itable block");

View File

@@ -463,27 +463,7 @@ int ext4_force_commit(struct super_block *sb);
#define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */ #define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */
#define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */ #define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */
static inline int ext4_inode_journal_mode(struct inode *inode) int ext4_inode_journal_mode(struct inode *inode);
{
if (EXT4_JOURNAL(inode) == NULL)
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
/* We do not support data journalling with delayed allocation */
if (!S_ISREG(inode->i_mode) ||
ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE) ||
test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
(ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
!test_opt(inode->i_sb, DELALLOC))) {
/* We do not support data journalling for encrypted data */
if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode))
return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
}
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
BUG();
}
static inline int ext4_should_journal_data(struct inode *inode) static inline int ext4_should_journal_data(struct inode *inode)
{ {

View File

@@ -161,8 +161,9 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
* - ENOMEM * - ENOMEM
* - EIO * - EIO
*/ */
int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle, static int __ext4_ext_dirty(const char *where, unsigned int line,
struct inode *inode, struct ext4_ext_path *path) handle_t *handle, struct inode *inode,
struct ext4_ext_path *path)
{ {
int err; int err;
@@ -179,6 +180,9 @@ int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
return err; return err;
} }
#define ext4_ext_dirty(handle, inode, path) \
__ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
struct ext4_ext_path *path, struct ext4_ext_path *path,
ext4_lblk_t block) ext4_lblk_t block)
@@ -309,53 +313,6 @@ ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
(nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0)); (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
} }
/*
* Calculate the number of metadata blocks needed
* to allocate @blocks
* Worse case is one block per extent
*/
int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
{
struct ext4_inode_info *ei = EXT4_I(inode);
int idxs;
idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
/ sizeof(struct ext4_extent_idx));
/*
* If the new delayed allocation block is contiguous with the
* previous da block, it can share index blocks with the
* previous block, so we only need to allocate a new index
* block every idxs leaf blocks. At ldxs**2 blocks, we need
* an additional index block, and at ldxs**3 blocks, yet
* another index blocks.
*/
if (ei->i_da_metadata_calc_len &&
ei->i_da_metadata_calc_last_lblock+1 == lblock) {
int num = 0;
if ((ei->i_da_metadata_calc_len % idxs) == 0)
num++;
if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
num++;
if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
num++;
ei->i_da_metadata_calc_len = 0;
} else
ei->i_da_metadata_calc_len++;
ei->i_da_metadata_calc_last_lblock++;
return num;
}
/*
* In the worst case we need a new set of index blocks at
* every level of the inode's extent tree.
*/
ei->i_da_metadata_calc_len = 1;
ei->i_da_metadata_calc_last_lblock = lblock;
return ext_depth(inode) + 1;
}
static int static int
ext4_ext_max_entries(struct inode *inode, int depth) ext4_ext_max_entries(struct inode *inode, int depth)
{ {
@@ -492,6 +449,7 @@ static int __ext4_ext_check(const char *function, unsigned int line,
return 0; return 0;
corrupted: corrupted:
ext4_set_errno(inode->i_sb, -err);
ext4_error_inode(inode, function, line, 0, ext4_error_inode(inode, function, line, 0,
"pblk %llu bad header/extent: %s - magic %x, " "pblk %llu bad header/extent: %s - magic %x, "
"entries %u, max %u(%u), depth %u(%u)", "entries %u, max %u(%u), depth %u(%u)",
@@ -510,6 +468,30 @@ int ext4_ext_check_inode(struct inode *inode)
return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0); return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
} }
static void ext4_cache_extents(struct inode *inode,
struct ext4_extent_header *eh)
{
struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
ext4_lblk_t prev = 0;
int i;
for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
unsigned int status = EXTENT_STATUS_WRITTEN;
ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
int len = ext4_ext_get_actual_len(ex);
if (prev && (prev != lblk))
ext4_es_cache_extent(inode, prev, lblk - prev, ~0,
EXTENT_STATUS_HOLE);
if (ext4_ext_is_unwritten(ex))
status = EXTENT_STATUS_UNWRITTEN;
ext4_es_cache_extent(inode, lblk, len,
ext4_ext_pblock(ex), status);
prev = lblk + len;
}
}
static struct buffer_head * static struct buffer_head *
__read_extent_tree_block(const char *function, unsigned int line, __read_extent_tree_block(const char *function, unsigned int line,
struct inode *inode, ext4_fsblk_t pblk, int depth, struct inode *inode, ext4_fsblk_t pblk, int depth,
@@ -544,26 +526,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
*/ */
if (!(flags & EXT4_EX_NOCACHE) && depth == 0) { if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
struct ext4_extent_header *eh = ext_block_hdr(bh); struct ext4_extent_header *eh = ext_block_hdr(bh);
struct ext4_extent *ex = EXT_FIRST_EXTENT(eh); ext4_cache_extents(inode, eh);
ext4_lblk_t prev = 0;
int i;
for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
unsigned int status = EXTENT_STATUS_WRITTEN;
ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
int len = ext4_ext_get_actual_len(ex);
if (prev && (prev != lblk))
ext4_es_cache_extent(inode, prev,
lblk - prev, ~0,
EXTENT_STATUS_HOLE);
if (ext4_ext_is_unwritten(ex))
status = EXTENT_STATUS_UNWRITTEN;
ext4_es_cache_extent(inode, lblk, len,
ext4_ext_pblock(ex), status);
prev = lblk + len;
}
} }
return bh; return bh;
errout: errout:
@@ -649,8 +612,9 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
ext_debug("path:"); ext_debug("path:");
for (k = 0; k <= l; k++, path++) { for (k = 0; k <= l; k++, path++) {
if (path->p_idx) { if (path->p_idx) {
ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), ext_debug(" %d->%llu",
ext4_idx_pblock(path->p_idx)); le32_to_cpu(path->p_idx->ei_block),
ext4_idx_pblock(path->p_idx));
} else if (path->p_ext) { } else if (path->p_ext) {
ext_debug(" %d:[%d]%d:%llu ", ext_debug(" %d:[%d]%d:%llu ",
le32_to_cpu(path->p_ext->ee_block), le32_to_cpu(path->p_ext->ee_block),
@@ -731,11 +695,12 @@ void ext4_ext_drop_refs(struct ext4_ext_path *path)
if (!path) if (!path)
return; return;
depth = path->p_depth; depth = path->p_depth;
for (i = 0; i <= depth; i++, path++) for (i = 0; i <= depth; i++, path++) {
if (path->p_bh) { if (path->p_bh) {
brelse(path->p_bh); brelse(path->p_bh);
path->p_bh = NULL; path->p_bh = NULL;
} }
}
} }
/* /*
@@ -777,8 +742,8 @@ ext4_ext_binsearch_idx(struct inode *inode,
chix = ix = EXT_FIRST_INDEX(eh); chix = ix = EXT_FIRST_INDEX(eh);
for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
if (k != 0 && if (k != 0 && le32_to_cpu(ix->ei_block) <=
le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { le32_to_cpu(ix[-1].ei_block)) {
printk(KERN_DEBUG "k=%d, ix=0x%p, " printk(KERN_DEBUG "k=%d, ix=0x%p, "
"first=0x%p\n", k, "first=0x%p\n", k,
ix, EXT_FIRST_INDEX(eh)); ix, EXT_FIRST_INDEX(eh));
@@ -911,6 +876,8 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
path[0].p_bh = NULL; path[0].p_bh = NULL;
i = depth; i = depth;
if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
ext4_cache_extents(inode, eh);
/* walk through the tree */ /* walk through the tree */
while (i) { while (i) {
ext_debug("depth %d: num %d, max %d\n", ext_debug("depth %d: num %d, max %d\n",
@@ -1632,17 +1599,16 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
return EXT_MAX_BLOCKS; return EXT_MAX_BLOCKS;
while (depth >= 0) { while (depth >= 0) {
struct ext4_ext_path *p = &path[depth];
if (depth == path->p_depth) { if (depth == path->p_depth) {
/* leaf */ /* leaf */
if (path[depth].p_ext && if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr))
path[depth].p_ext != return le32_to_cpu(p->p_ext[1].ee_block);
EXT_LAST_EXTENT(path[depth].p_hdr))
return le32_to_cpu(path[depth].p_ext[1].ee_block);
} else { } else {
/* index */ /* index */
if (path[depth].p_idx != if (p->p_idx != EXT_LAST_INDEX(p->p_hdr))
EXT_LAST_INDEX(path[depth].p_hdr)) return le32_to_cpu(p->p_idx[1].ei_block);
return le32_to_cpu(path[depth].p_idx[1].ei_block);
} }
depth--; depth--;
} }
@@ -1742,9 +1708,9 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
return err; return err;
} }
int static int ext4_can_extents_be_merged(struct inode *inode,
ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex1,
struct ext4_extent *ex2) struct ext4_extent *ex2)
{ {
unsigned short ext1_ee_len, ext2_ee_len; unsigned short ext1_ee_len, ext2_ee_len;
@@ -1758,11 +1724,6 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
le32_to_cpu(ex2->ee_block)) le32_to_cpu(ex2->ee_block))
return 0; return 0;
/*
* To allow future support for preallocated extents to be added
* as an RO_COMPAT feature, refuse to merge to extents if
* this can result in the top bit of ee_len being set.
*/
if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
return 0; return 0;
@@ -1870,13 +1831,14 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
} }
/* /*
* This function tries to merge the @ex extent to neighbours in the tree. * This function tries to merge the @ex extent to neighbours in the tree, then
* return 1 if merge left else 0. * tries to collapse the extent tree into the inode.
*/ */
static void ext4_ext_try_to_merge(handle_t *handle, static void ext4_ext_try_to_merge(handle_t *handle,
struct inode *inode, struct inode *inode,
struct ext4_ext_path *path, struct ext4_ext_path *path,
struct ext4_extent *ex) { struct ext4_extent *ex)
{
struct ext4_extent_header *eh; struct ext4_extent_header *eh;
unsigned int depth; unsigned int depth;
int merge_done = 0; int merge_done = 0;
@@ -3718,9 +3680,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
max_zeroout = sbi->s_extent_max_zeroout_kb >> max_zeroout = sbi->s_extent_max_zeroout_kb >>
(inode->i_sb->s_blocksize_bits - 10); (inode->i_sb->s_blocksize_bits - 10);
if (IS_ENCRYPTED(inode))
max_zeroout = 0;
/* /*
* five cases: * five cases:
* 1. split the extent into three extents. * 1. split the extent into three extents.
@@ -4706,6 +4665,10 @@ retry:
return ret > 0 ? ret2 : ret; return ret > 0 ? ret2 : ret;
} }
static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
static long ext4_zero_range(struct file *file, loff_t offset, static long ext4_zero_range(struct file *file, loff_t offset,
loff_t len, int mode) loff_t len, int mode)
{ {
@@ -4723,9 +4686,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
trace_ext4_zero_range(inode, offset, len, mode); trace_ext4_zero_range(inode, offset, len, mode);
if (!S_ISREG(inode->i_mode))
return -EINVAL;
/* Call ext4_force_commit to flush all data in case of data=journal. */ /* Call ext4_force_commit to flush all data in case of data=journal. */
if (ext4_should_journal_data(inode)) { if (ext4_should_journal_data(inode)) {
ret = ext4_force_commit(inode->i_sb); ret = ext4_force_commit(inode->i_sb);
@@ -4765,7 +4725,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
} }
if (!(mode & FALLOC_FL_KEEP_SIZE) && if (!(mode & FALLOC_FL_KEEP_SIZE) &&
(offset + len > i_size_read(inode) || (offset + len > inode->i_size ||
offset + len > EXT4_I(inode)->i_disksize)) { offset + len > EXT4_I(inode)->i_disksize)) {
new_size = offset + len; new_size = offset + len;
ret = inode_newsize_ok(inode, new_size); ret = inode_newsize_ok(inode, new_size);
@@ -4849,7 +4809,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
* Mark that we allocate beyond EOF so the subsequent truncate * Mark that we allocate beyond EOF so the subsequent truncate
* can proceed even if the new size is the same as i_size. * can proceed even if the new size is the same as i_size.
*/ */
if ((offset + len) > i_size_read(inode)) if (offset + len > inode->i_size)
ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
} }
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
@@ -4890,14 +4850,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
* range since we would need to re-encrypt blocks with a * range since we would need to re-encrypt blocks with a
* different IV or XTS tweak (which are based on the logical * different IV or XTS tweak (which are based on the logical
* block number). * block number).
*
* XXX It's not clear why zero range isn't working, but we'll
* leave it disabled for encrypted inodes for now. This is a
* bug we should fix....
*/ */
if (IS_ENCRYPTED(inode) && if (IS_ENCRYPTED(inode) &&
(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE | (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
FALLOC_FL_ZERO_RANGE)))
return -EOPNOTSUPP; return -EOPNOTSUPP;
/* Return error if mode is not supported */ /* Return error if mode is not supported */
@@ -4941,7 +4896,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
} }
if (!(mode & FALLOC_FL_KEEP_SIZE) && if (!(mode & FALLOC_FL_KEEP_SIZE) &&
(offset + len > i_size_read(inode) || (offset + len > inode->i_size ||
offset + len > EXT4_I(inode)->i_disksize)) { offset + len > EXT4_I(inode)->i_disksize)) {
new_size = offset + len; new_size = offset + len;
ret = inode_newsize_ok(inode, new_size); ret = inode_newsize_ok(inode, new_size);
@@ -5268,7 +5223,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
{ {
int depth, err = 0; int depth, err = 0;
struct ext4_extent *ex_start, *ex_last; struct ext4_extent *ex_start, *ex_last;
bool update = 0; bool update = false;
depth = path->p_depth; depth = path->p_depth;
while (depth >= 0) { while (depth >= 0) {
@@ -5284,7 +5239,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
goto out; goto out;
if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
update = 1; update = true;
while (ex_start <= ex_last) { while (ex_start <= ex_last) {
if (SHIFT == SHIFT_LEFT) { if (SHIFT == SHIFT_LEFT) {
@@ -5472,7 +5427,7 @@ out:
* This implements the fallocate's collapse range functionality for ext4 * This implements the fallocate's collapse range functionality for ext4
* Returns: 0 and non-zero on error. * Returns: 0 and non-zero on error.
*/ */
int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
ext4_lblk_t punch_start, punch_stop; ext4_lblk_t punch_start, punch_stop;
@@ -5489,12 +5444,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
return -EOPNOTSUPP; return -EOPNOTSUPP;
/* Collapse range works only on fs block size aligned offsets. */ /* Collapse range works only on fs cluster size aligned regions. */
if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) || if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
len & (EXT4_CLUSTER_SIZE(sb) - 1))
return -EINVAL;
if (!S_ISREG(inode->i_mode))
return -EINVAL; return -EINVAL;
trace_ext4_collapse_range(inode, offset, len); trace_ext4_collapse_range(inode, offset, len);
@@ -5514,7 +5465,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
* There is no need to overlap collapse range with EOF, in which case * There is no need to overlap collapse range with EOF, in which case
* it is effectively a truncate operation * it is effectively a truncate operation
*/ */
if (offset + len >= i_size_read(inode)) { if (offset + len >= inode->i_size) {
ret = -EINVAL; ret = -EINVAL;
goto out_mutex; goto out_mutex;
} }
@@ -5592,7 +5543,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
goto out_stop; goto out_stop;
} }
new_size = i_size_read(inode) - len; new_size = inode->i_size - len;
i_size_write(inode, new_size); i_size_write(inode, new_size);
EXT4_I(inode)->i_disksize = new_size; EXT4_I(inode)->i_disksize = new_size;
@@ -5620,7 +5571,7 @@ out_mutex:
* by len bytes. * by len bytes.
* Returns 0 on success, error otherwise. * Returns 0 on success, error otherwise.
*/ */
int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
handle_t *handle; handle_t *handle;
@@ -5639,14 +5590,10 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
return -EOPNOTSUPP; return -EOPNOTSUPP;
/* Insert range works only on fs block size aligned offsets. */ /* Insert range works only on fs cluster size aligned regions. */
if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) || if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
len & (EXT4_CLUSTER_SIZE(sb) - 1))
return -EINVAL; return -EINVAL;
if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP;
trace_ext4_insert_range(inode, offset, len); trace_ext4_insert_range(inode, offset, len);
offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb); offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
@@ -5666,14 +5613,14 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
goto out_mutex; goto out_mutex;
} }
/* Check for wrap through zero */ /* Check whether the maximum file size would be exceeded */
if (inode->i_size + len > inode->i_sb->s_maxbytes) { if (len > inode->i_sb->s_maxbytes - inode->i_size) {
ret = -EFBIG; ret = -EFBIG;
goto out_mutex; goto out_mutex;
} }
/* Offset should be less than i_size */ /* Offset must be less than i_size */
if (offset >= i_size_read(inode)) { if (offset >= inode->i_size) {
ret = -EINVAL; ret = -EINVAL;
goto out_mutex; goto out_mutex;
} }

View File

@@ -209,6 +209,12 @@ static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
return es->es_pblk & ~ES_MASK; return es->es_pblk & ~ES_MASK;
} }
static inline ext4_fsblk_t ext4_es_show_pblock(struct extent_status *es)
{
ext4_fsblk_t pblock = ext4_es_pblock(es);
return pblock == ~ES_MASK ? 0 : pblock;
}
static inline void ext4_es_store_pblock(struct extent_status *es, static inline void ext4_es_store_pblock(struct extent_status *es,
ext4_fsblk_t pb) ext4_fsblk_t pb)
{ {

View File

@@ -88,9 +88,10 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct inode *inode = file_inode(iocb->ki_filp); struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret; ssize_t ret;
if (!inode_trylock_shared(inode)) { if (iocb->ki_flags & IOCB_NOWAIT) {
if (iocb->ki_flags & IOCB_NOWAIT) if (!inode_trylock_shared(inode))
return -EAGAIN; return -EAGAIN;
} else {
inode_lock_shared(inode); inode_lock_shared(inode);
} }
/* /*
@@ -165,19 +166,25 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
* threads are at work on the same unwritten block, they must be synchronized * threads are at work on the same unwritten block, they must be synchronized
* or one thread will zero the other's data, causing corruption. * or one thread will zero the other's data, causing corruption.
*/ */
static int static bool
ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos) ext4_unaligned_io(struct inode *inode, struct iov_iter *from, loff_t pos)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
int blockmask = sb->s_blocksize - 1; unsigned long blockmask = sb->s_blocksize - 1;
if (pos >= ALIGN(i_size_read(inode), sb->s_blocksize))
return 0;
if ((pos | iov_iter_alignment(from)) & blockmask) if ((pos | iov_iter_alignment(from)) & blockmask)
return 1; return true;
return 0; return false;
}
static bool
ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
{
if (offset + len > i_size_read(inode) ||
offset + len > EXT4_I(inode)->i_disksize)
return true;
return false;
} }
/* Is IO overwriting allocated and initialized blocks? */ /* Is IO overwriting allocated and initialized blocks? */
@@ -203,7 +210,8 @@ static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
return err == blklen && (map.m_flags & EXT4_MAP_MAPPED); return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
} }
static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
struct iov_iter *from)
{ {
struct inode *inode = file_inode(iocb->ki_filp); struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret; ssize_t ret;
@@ -227,11 +235,21 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos); iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
} }
return iov_iter_count(from);
}
static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
{
ssize_t ret, count;
count = ext4_generic_write_checks(iocb, from);
if (count <= 0)
return count;
ret = file_modified(iocb->ki_filp); ret = file_modified(iocb->ki_filp);
if (ret) if (ret)
return ret; return ret;
return count;
return iov_iter_count(from);
} }
static ssize_t ext4_buffered_write_iter(struct kiocb *iocb, static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
@@ -363,62 +381,137 @@ static const struct iomap_dio_ops ext4_dio_write_ops = {
.end_io = ext4_dio_write_end_io, .end_io = ext4_dio_write_end_io,
}; };
/*
* The intention here is to start with shared lock acquired then see if any
* condition requires an exclusive inode lock. If yes, then we restart the
* whole operation by releasing the shared lock and acquiring exclusive lock.
*
* - For unaligned_io we never take shared lock as it may cause data corruption
* when two unaligned IO tries to modify the same block e.g. while zeroing.
*
* - For extending writes case we don't take the shared lock, since it requires
* updating inode i_disksize and/or orphan handling with exclusive lock.
*
* - shared locking will only be true mostly with overwrites. Otherwise we will
* switch to exclusive i_rwsem lock.
*/
static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
bool *ilock_shared, bool *extend)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
loff_t offset;
size_t count;
ssize_t ret;
restart:
ret = ext4_generic_write_checks(iocb, from);
if (ret <= 0)
goto out;
offset = iocb->ki_pos;
count = ret;
if (ext4_extending_io(inode, offset, count))
*extend = true;
/*
* Determine whether the IO operation will overwrite allocated
* and initialized blocks.
* We need exclusive i_rwsem for changing security info
* in file_modified().
*/
if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
!ext4_overwrite_io(inode, offset, count))) {
inode_unlock_shared(inode);
*ilock_shared = false;
inode_lock(inode);
goto restart;
}
ret = file_modified(file);
if (ret < 0)
goto out;
return count;
out:
if (*ilock_shared)
inode_unlock_shared(inode);
else
inode_unlock(inode);
return ret;
}
static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
{ {
ssize_t ret; ssize_t ret;
size_t count;
loff_t offset;
handle_t *handle; handle_t *handle;
struct inode *inode = file_inode(iocb->ki_filp); struct inode *inode = file_inode(iocb->ki_filp);
bool extend = false, overwrite = false, unaligned_aio = false; loff_t offset = iocb->ki_pos;
size_t count = iov_iter_count(from);
const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
bool extend = false, unaligned_io = false;
bool ilock_shared = true;
/*
* We initially start with shared inode lock unless it is
* unaligned IO which needs exclusive lock anyways.
*/
if (ext4_unaligned_io(inode, from, offset)) {
unaligned_io = true;
ilock_shared = false;
}
/*
* Quick check here without any i_rwsem lock to see if it is extending
* IO. A more reliable check is done in ext4_dio_write_checks() with
* proper locking in place.
*/
if (offset + count > i_size_read(inode))
ilock_shared = false;
if (iocb->ki_flags & IOCB_NOWAIT) { if (iocb->ki_flags & IOCB_NOWAIT) {
if (!inode_trylock(inode)) if (ilock_shared) {
return -EAGAIN; if (!inode_trylock_shared(inode))
return -EAGAIN;
} else {
if (!inode_trylock(inode))
return -EAGAIN;
}
} else { } else {
inode_lock(inode); if (ilock_shared)
inode_lock_shared(inode);
else
inode_lock(inode);
} }
/* Fallback to buffered I/O if the inode does not support direct I/O. */
if (!ext4_dio_supported(inode)) { if (!ext4_dio_supported(inode)) {
inode_unlock(inode); if (ilock_shared)
/* inode_unlock_shared(inode);
* Fallback to buffered I/O if the inode does not support else
* direct I/O. inode_unlock(inode);
*/
return ext4_buffered_write_iter(iocb, from); return ext4_buffered_write_iter(iocb, from);
} }
ret = ext4_write_checks(iocb, from); ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend);
if (ret <= 0) { if (ret <= 0)
inode_unlock(inode);
return ret; return ret;
}
/*
* Unaligned asynchronous direct I/O must be serialized among each
* other as the zeroing of partial blocks of two competing unaligned
* asynchronous direct I/O writes can result in data corruption.
*/
offset = iocb->ki_pos; offset = iocb->ki_pos;
count = iov_iter_count(from); count = ret;
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
!is_sync_kiocb(iocb) && ext4_unaligned_aio(inode, from, offset)) {
unaligned_aio = true;
inode_dio_wait(inode);
}
/* /*
* Determine whether the I/O will overwrite allocated and initialized * Unaligned direct IO must be serialized among each other as zeroing
* blocks. If so, check to see whether it is possible to take the * of partial blocks of two competing unaligned IOs can result in data
* dioread_nolock path. * corruption.
*
* So we make sure we don't allow any unaligned IO in flight.
* For IOs where we need not wait (like unaligned non-AIO DIO),
* below inode_dio_wait() may anyway become a no-op, since we start
* with exclusive lock.
*/ */
if (!unaligned_aio && ext4_overwrite_io(inode, offset, count) && if (unaligned_io)
ext4_should_dioread_nolock(inode)) { inode_dio_wait(inode);
overwrite = true;
downgrade_write(&inode->i_rwsem);
}
if (offset + count > EXT4_I(inode)->i_disksize) { if (extend) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
@@ -431,18 +524,19 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out; goto out;
} }
extend = true;
ext4_journal_stop(handle); ext4_journal_stop(handle);
} }
ret = iomap_dio_rw(iocb, from, &ext4_iomap_ops, &ext4_dio_write_ops, if (ilock_shared)
is_sync_kiocb(iocb) || unaligned_aio || extend); iomap_ops = &ext4_iomap_overwrite_ops;
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
is_sync_kiocb(iocb) || unaligned_io || extend);
if (extend) if (extend)
ret = ext4_handle_inode_extension(inode, offset, ret, count); ret = ext4_handle_inode_extension(inode, offset, ret, count);
out: out:
if (overwrite) if (ilock_shared)
inode_unlock_shared(inode); inode_unlock_shared(inode);
else else
inode_unlock(inode); inode_unlock(inode);
@@ -487,9 +581,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
bool extend = false; bool extend = false;
struct inode *inode = file_inode(iocb->ki_filp); struct inode *inode = file_inode(iocb->ki_filp);
if (!inode_trylock(inode)) { if (iocb->ki_flags & IOCB_NOWAIT) {
if (iocb->ki_flags & IOCB_NOWAIT) if (!inode_trylock(inode))
return -EAGAIN; return -EAGAIN;
} else {
inode_lock(inode); inode_lock(inode);
} }

View File

@@ -94,7 +94,8 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
goto verified; goto verified;
blk = ext4_inode_bitmap(sb, desc); blk = ext4_inode_bitmap(sb, desc);
if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh,
EXT4_INODES_PER_GROUP(sb) / 8)) { EXT4_INODES_PER_GROUP(sb) / 8) ||
ext4_simulate_fail(sb, EXT4_SIM_IBITMAP_CRC)) {
ext4_unlock_group(sb, block_group); ext4_unlock_group(sb, block_group);
ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
"inode_bitmap = %llu", block_group, blk); "inode_bitmap = %llu", block_group, blk);
@@ -192,8 +193,10 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
get_bh(bh); get_bh(bh);
submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
wait_on_buffer(bh); wait_on_buffer(bh);
ext4_simulate_fail_bh(sb, bh, EXT4_SIM_IBITMAP_EIO);
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
put_bh(bh); put_bh(bh);
ext4_set_errno(sb, EIO);
ext4_error(sb, "Cannot read inode bitmap - " ext4_error(sb, "Cannot read inode bitmap - "
"block_group = %u, inode_bitmap = %llu", "block_group = %u, inode_bitmap = %llu",
block_group, bitmap_blk); block_group, bitmap_blk);
@@ -1223,6 +1226,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
if (IS_ERR(inode)) { if (IS_ERR(inode)) {
err = PTR_ERR(inode); err = PTR_ERR(inode);
ext4_set_errno(sb, -err);
ext4_error(sb, "couldn't read orphan inode %lu (err %d)", ext4_error(sb, "couldn't read orphan inode %lu (err %d)",
ino, err); ino, err);
return inode; return inode;

View File

@@ -659,32 +659,6 @@ out:
return err; return err;
} }
/*
* Calculate the number of metadata blocks need to reserve
* to allocate a new block at @lblocks for non extent file based file
*/
int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock)
{
struct ext4_inode_info *ei = EXT4_I(inode);
sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1);
int blk_bits;
if (lblock < EXT4_NDIR_BLOCKS)
return 0;
lblock -= EXT4_NDIR_BLOCKS;
if (ei->i_da_metadata_calc_len &&
(lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
ei->i_da_metadata_calc_len++;
return 0;
}
ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
ei->i_da_metadata_calc_len = 1;
blk_bits = order_base_2(lblock);
return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
}
/* /*
* Calculate number of indirect blocks touched by mapping @nrblocks logically * Calculate number of indirect blocks touched by mapping @nrblocks logically
* contiguous blocks * contiguous blocks

View File

@@ -98,6 +98,7 @@ int ext4_get_max_inline_size(struct inode *inode)
error = ext4_get_inode_loc(inode, &iloc); error = ext4_get_inode_loc(inode, &iloc);
if (error) { if (error) {
ext4_set_errno(inode->i_sb, -error);
ext4_error_inode(inode, __func__, __LINE__, 0, ext4_error_inode(inode, __func__, __LINE__, 0,
"can't get inode location %lu", "can't get inode location %lu",
inode->i_ino); inode->i_ino);
@@ -849,7 +850,7 @@ out:
/* /*
* Prepare the write for the inline data. * Prepare the write for the inline data.
* If the the data can be written into the inode, we just read * If the data can be written into the inode, we just read
* the page and make it uptodate, and start the journal. * the page and make it uptodate, and start the journal.
* Otherwise read the page, makes it dirty so that it can be * Otherwise read the page, makes it dirty so that it can be
* handle in writepages(the i_disksize update is left to the * handle in writepages(the i_disksize update is left to the
@@ -1761,6 +1762,7 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
err = ext4_get_inode_loc(dir, &iloc); err = ext4_get_inode_loc(dir, &iloc);
if (err) { if (err) {
ext4_set_errno(dir->i_sb, -err);
EXT4_ERROR_INODE(dir, "error %d getting inode %lu block", EXT4_ERROR_INODE(dir, "error %d getting inode %lu block",
err, dir->i_ino); err, dir->i_ino);
return true; return true;

View File

@@ -48,8 +48,6 @@
#include <trace/events/ext4.h> #include <trace/events/ext4.h>
#define MPAGE_DA_EXTENT_TAIL 0x01
static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
struct ext4_inode_info *ei) struct ext4_inode_info *ei)
{ {
@@ -271,6 +269,7 @@ void ext4_evict_inode(struct inode *inode)
if (inode->i_blocks) { if (inode->i_blocks) {
err = ext4_truncate(inode); err = ext4_truncate(inode);
if (err) { if (err) {
ext4_set_errno(inode->i_sb, -err);
ext4_error(inode->i_sb, ext4_error(inode->i_sb,
"couldn't truncate inode %lu (err %d)", "couldn't truncate inode %lu (err %d)",
inode->i_ino, err); inode->i_ino, err);
@@ -402,7 +401,7 @@ int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
{ {
int ret; int ret;
if (IS_ENCRYPTED(inode)) if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
return fscrypt_zeroout_range(inode, lblk, pblk, len); return fscrypt_zeroout_range(inode, lblk, pblk, len);
ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS); ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
@@ -2478,10 +2477,12 @@ update_disksize:
EXT4_I(inode)->i_disksize = disksize; EXT4_I(inode)->i_disksize = disksize;
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
err2 = ext4_mark_inode_dirty(handle, inode); err2 = ext4_mark_inode_dirty(handle, inode);
if (err2) if (err2) {
ext4_set_errno(inode->i_sb, -err2);
ext4_error(inode->i_sb, ext4_error(inode->i_sb,
"Failed to mark inode %lu dirty", "Failed to mark inode %lu dirty",
inode->i_ino); inode->i_ino);
}
if (!err) if (!err)
err = err2; err = err2;
} }
@@ -3448,6 +3449,22 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
return 0; return 0;
} }
static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset,
loff_t length, unsigned flags, struct iomap *iomap,
struct iomap *srcmap)
{
int ret;
/*
* Even for writes we don't need to allocate blocks, so just pretend
* we are reading to save overhead of starting a transaction.
*/
flags &= ~IOMAP_WRITE;
ret = ext4_iomap_begin(inode, offset, length, flags, iomap, srcmap);
WARN_ON_ONCE(iomap->type != IOMAP_MAPPED);
return ret;
}
static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
ssize_t written, unsigned flags, struct iomap *iomap) ssize_t written, unsigned flags, struct iomap *iomap)
{ {
@@ -3469,6 +3486,11 @@ const struct iomap_ops ext4_iomap_ops = {
.iomap_end = ext4_iomap_end, .iomap_end = ext4_iomap_end,
}; };
const struct iomap_ops ext4_iomap_overwrite_ops = {
.iomap_begin = ext4_iomap_overwrite_begin,
.iomap_end = ext4_iomap_end,
};
static bool ext4_iomap_is_delalloc(struct inode *inode, static bool ext4_iomap_is_delalloc(struct inode *inode,
struct ext4_map_blocks *map) struct ext4_map_blocks *map)
{ {
@@ -3701,8 +3723,12 @@ static int __ext4_block_zero_page_range(handle_t *handle,
if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) { if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) {
/* We expect the key to be set. */ /* We expect the key to be set. */
BUG_ON(!fscrypt_has_encryption_key(inode)); BUG_ON(!fscrypt_has_encryption_key(inode));
WARN_ON_ONCE(fscrypt_decrypt_pagecache_blocks( err = fscrypt_decrypt_pagecache_blocks(page, blocksize,
page, blocksize, bh_offset(bh))); bh_offset(bh));
if (err) {
clear_buffer_uptodate(bh);
goto unlock;
}
} }
} }
if (ext4_should_journal_data(inode)) { if (ext4_should_journal_data(inode)) {
@@ -3912,9 +3938,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
unsigned int credits; unsigned int credits;
int ret = 0; int ret = 0;
if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP;
trace_ext4_punch_hole(inode, offset, length, 0); trace_ext4_punch_hole(inode, offset, length, 0);
ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
@@ -4240,6 +4263,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
bh = sb_getblk(sb, block); bh = sb_getblk(sb, block);
if (unlikely(!bh)) if (unlikely(!bh))
return -ENOMEM; return -ENOMEM;
if (ext4_simulate_fail(sb, EXT4_SIM_INODE_EIO))
goto simulate_eio;
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
lock_buffer(bh); lock_buffer(bh);
@@ -4338,6 +4363,8 @@ make_io:
blk_finish_plug(&plug); blk_finish_plug(&plug);
wait_on_buffer(bh); wait_on_buffer(bh);
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
simulate_eio:
ext4_set_errno(inode->i_sb, EIO);
EXT4_ERROR_INODE_BLOCK(inode, block, EXT4_ERROR_INODE_BLOCK(inode, block,
"unable to read itable block"); "unable to read itable block");
brelse(bh); brelse(bh);
@@ -4551,7 +4578,9 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
sizeof(gen)); sizeof(gen));
} }
if (!ext4_inode_csum_verify(inode, raw_inode, ei)) { if (!ext4_inode_csum_verify(inode, raw_inode, ei) ||
ext4_simulate_fail(sb, EXT4_SIM_INODE_CRC)) {
ext4_set_errno(inode->i_sb, EFSBADCRC);
ext4_error_inode(inode, function, line, 0, ext4_error_inode(inode, function, line, 0,
"iget: checksum invalid"); "iget: checksum invalid");
ret = -EFSBADCRC; ret = -EFSBADCRC;
@@ -5090,6 +5119,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
sync_dirty_buffer(iloc.bh); sync_dirty_buffer(iloc.bh);
if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
ext4_set_errno(inode->i_sb, EIO);
EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
"IO error syncing inode"); "IO error syncing inode");
err = -EIO; err = -EIO;
@@ -5368,7 +5398,8 @@ int ext4_getattr(const struct path *path, struct kstat *stat,
struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int flags; unsigned int flags;
if (EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) { if ((request_mask & STATX_BTIME) &&
EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) {
stat->result_mask |= STATX_BTIME; stat->result_mask |= STATX_BTIME;
stat->btime.tv_sec = ei->i_crtime.tv_sec; stat->btime.tv_sec = ei->i_crtime.tv_sec;
stat->btime.tv_nsec = ei->i_crtime.tv_nsec; stat->btime.tv_nsec = ei->i_crtime.tv_nsec;

View File

@@ -1377,6 +1377,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_CLEAR_ES_CACHE: case EXT4_IOC_CLEAR_ES_CACHE:
case EXT4_IOC_GETSTATE: case EXT4_IOC_GETSTATE:
case EXT4_IOC_GET_ES_CACHE: case EXT4_IOC_GET_ES_CACHE:
case EXT4_IOC_FSGETXATTR:
case EXT4_IOC_FSSETXATTR:
break; break;
default: default:
return -ENOIOCTLCMD; return -ENOIOCTLCMD;

View File

@@ -3895,6 +3895,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
bitmap_bh = ext4_read_block_bitmap(sb, group); bitmap_bh = ext4_read_block_bitmap(sb, group);
if (IS_ERR(bitmap_bh)) { if (IS_ERR(bitmap_bh)) {
err = PTR_ERR(bitmap_bh); err = PTR_ERR(bitmap_bh);
ext4_set_errno(sb, -err);
ext4_error(sb, "Error %d reading block bitmap for %u", ext4_error(sb, "Error %d reading block bitmap for %u",
err, group); err, group);
return 0; return 0;
@@ -4063,6 +4064,7 @@ repeat:
err = ext4_mb_load_buddy_gfp(sb, group, &e4b, err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
GFP_NOFS|__GFP_NOFAIL); GFP_NOFS|__GFP_NOFAIL);
if (err) { if (err) {
ext4_set_errno(sb, -err);
ext4_error(sb, "Error %d loading buddy information for %u", ext4_error(sb, "Error %d loading buddy information for %u",
err, group); err, group);
continue; continue;
@@ -4071,6 +4073,7 @@ repeat:
bitmap_bh = ext4_read_block_bitmap(sb, group); bitmap_bh = ext4_read_block_bitmap(sb, group);
if (IS_ERR(bitmap_bh)) { if (IS_ERR(bitmap_bh)) {
err = PTR_ERR(bitmap_bh); err = PTR_ERR(bitmap_bh);
ext4_set_errno(sb, -err);
ext4_error(sb, "Error %d reading block bitmap for %u", ext4_error(sb, "Error %d reading block bitmap for %u",
err, group); err, group);
ext4_mb_unload_buddy(&e4b); ext4_mb_unload_buddy(&e4b);
@@ -4325,6 +4328,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
err = ext4_mb_load_buddy_gfp(sb, group, &e4b, err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
GFP_NOFS|__GFP_NOFAIL); GFP_NOFS|__GFP_NOFAIL);
if (err) { if (err) {
ext4_set_errno(sb, -err);
ext4_error(sb, "Error %d loading buddy information for %u", ext4_error(sb, "Error %d loading buddy information for %u",
err, group); err, group);
continue; continue;

View File

@@ -173,8 +173,10 @@ static int kmmpd(void *data)
* (s_mmp_update_interval * 60) seconds. * (s_mmp_update_interval * 60) seconds.
*/ */
if (retval) { if (retval) {
if ((failed_writes % 60) == 0) if ((failed_writes % 60) == 0) {
ext4_set_errno(sb, -retval);
ext4_error(sb, "Error writing to MMP block"); ext4_error(sb, "Error writing to MMP block");
}
failed_writes++; failed_writes++;
} }
@@ -205,6 +207,7 @@ static int kmmpd(void *data)
retval = read_mmp_block(sb, &bh_check, mmp_block); retval = read_mmp_block(sb, &bh_check, mmp_block);
if (retval) { if (retval) {
ext4_set_errno(sb, -retval);
ext4_error(sb, "error reading MMP data: %d", ext4_error(sb, "error reading MMP data: %d",
retval); retval);
goto exit_thread; goto exit_thread;
@@ -218,6 +221,7 @@ static int kmmpd(void *data)
"Error while updating MMP info. " "Error while updating MMP info. "
"The filesystem seems to have been" "The filesystem seems to have been"
" multiply mounted."); " multiply mounted.");
ext4_set_errno(sb, EBUSY);
ext4_error(sb, "abort"); ext4_error(sb, "abort");
put_bh(bh_check); put_bh(bh_check);
retval = -EBUSY; retval = -EBUSY;

View File

@@ -109,7 +109,10 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
struct ext4_dir_entry *dirent; struct ext4_dir_entry *dirent;
int is_dx_block = 0; int is_dx_block = 0;
bh = ext4_bread(NULL, inode, block, 0); if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
bh = ERR_PTR(-EIO);
else
bh = ext4_bread(NULL, inode, block, 0);
if (IS_ERR(bh)) { if (IS_ERR(bh)) {
__ext4_warning(inode->i_sb, func, line, __ext4_warning(inode->i_sb, func, line,
"inode #%lu: lblock %lu: comm %s: " "inode #%lu: lblock %lu: comm %s: "
@@ -153,9 +156,11 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
* caller is sure it should be an index block. * caller is sure it should be an index block.
*/ */
if (is_dx_block && type == INDEX) { if (is_dx_block && type == INDEX) {
if (ext4_dx_csum_verify(inode, dirent)) if (ext4_dx_csum_verify(inode, dirent) &&
!ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC))
set_buffer_verified(bh); set_buffer_verified(bh);
else { else {
ext4_set_errno(inode->i_sb, EFSBADCRC);
ext4_error_inode(inode, func, line, block, ext4_error_inode(inode, func, line, block,
"Directory index failed checksum"); "Directory index failed checksum");
brelse(bh); brelse(bh);
@@ -163,9 +168,11 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
} }
} }
if (!is_dx_block) { if (!is_dx_block) {
if (ext4_dirblock_csum_verify(inode, bh)) if (ext4_dirblock_csum_verify(inode, bh) &&
!ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC))
set_buffer_verified(bh); set_buffer_verified(bh);
else { else {
ext4_set_errno(inode->i_sb, EFSBADCRC);
ext4_error_inode(inode, func, line, block, ext4_error_inode(inode, func, line, block,
"Directory block failed checksum"); "Directory block failed checksum");
brelse(bh); brelse(bh);
@@ -1002,7 +1009,6 @@ static int htree_dirblock_to_tree(struct file *dir_file,
top = (struct ext4_dir_entry_2 *) ((char *) de + top = (struct ext4_dir_entry_2 *) ((char *) de +
dir->i_sb->s_blocksize - dir->i_sb->s_blocksize -
EXT4_DIR_REC_LEN(0)); EXT4_DIR_REC_LEN(0));
#ifdef CONFIG_FS_ENCRYPTION
/* Check if the directory is encrypted */ /* Check if the directory is encrypted */
if (IS_ENCRYPTED(dir)) { if (IS_ENCRYPTED(dir)) {
err = fscrypt_get_encryption_info(dir); err = fscrypt_get_encryption_info(dir);
@@ -1017,7 +1023,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
return err; return err;
} }
} }
#endif
for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
if (ext4_check_dir_entry(dir, NULL, de, bh, if (ext4_check_dir_entry(dir, NULL, de, bh,
bh->b_data, bh->b_size, bh->b_data, bh->b_size,
@@ -1065,9 +1071,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
} }
errout: errout:
brelse(bh); brelse(bh);
#ifdef CONFIG_FS_ENCRYPTION
fscrypt_fname_free_buffer(&fname_crypto_str); fscrypt_fname_free_buffer(&fname_crypto_str);
#endif
return count; return count;
} }
@@ -1527,6 +1531,7 @@ restart:
goto next; goto next;
wait_on_buffer(bh); wait_on_buffer(bh);
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
ext4_set_errno(sb, EIO);
EXT4_ERROR_INODE(dir, "reading directory lblock %lu", EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
(unsigned long) block); (unsigned long) block);
brelse(bh); brelse(bh);
@@ -1537,6 +1542,7 @@ restart:
!is_dx_internal_node(dir, block, !is_dx_internal_node(dir, block,
(struct ext4_dir_entry *)bh->b_data) && (struct ext4_dir_entry *)bh->b_data) &&
!ext4_dirblock_csum_verify(dir, bh)) { !ext4_dirblock_csum_verify(dir, bh)) {
ext4_set_errno(sb, EFSBADCRC);
EXT4_ERROR_INODE(dir, "checksumming directory " EXT4_ERROR_INODE(dir, "checksumming directory "
"block %lu", (unsigned long)block); "block %lu", (unsigned long)block);
brelse(bh); brelse(bh);

View File

@@ -512,17 +512,26 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
gfp_t gfp_flags = GFP_NOFS; gfp_t gfp_flags = GFP_NOFS;
unsigned int enc_bytes = round_up(len, i_blocksize(inode)); unsigned int enc_bytes = round_up(len, i_blocksize(inode));
/*
* Since bounce page allocation uses a mempool, we can only use
* a waiting mask (i.e. request guaranteed allocation) on the
* first page of the bio. Otherwise it can deadlock.
*/
if (io->io_bio)
gfp_flags = GFP_NOWAIT | __GFP_NOWARN;
retry_encrypt: retry_encrypt:
bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes, bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes,
0, gfp_flags); 0, gfp_flags);
if (IS_ERR(bounce_page)) { if (IS_ERR(bounce_page)) {
ret = PTR_ERR(bounce_page); ret = PTR_ERR(bounce_page);
if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { if (ret == -ENOMEM &&
if (io->io_bio) { (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) {
gfp_flags = GFP_NOFS;
if (io->io_bio)
ext4_io_submit(io); ext4_io_submit(io);
congestion_wait(BLK_RW_ASYNC, HZ/50); else
} gfp_flags |= __GFP_NOFAIL;
gfp_flags |= __GFP_NOFAIL; congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry_encrypt; goto retry_encrypt;
} }

View File

@@ -57,6 +57,7 @@ enum bio_post_read_step {
STEP_INITIAL = 0, STEP_INITIAL = 0,
STEP_DECRYPT, STEP_DECRYPT,
STEP_VERITY, STEP_VERITY,
STEP_MAX,
}; };
struct bio_post_read_ctx { struct bio_post_read_ctx {
@@ -106,10 +107,22 @@ static void verity_work(struct work_struct *work)
{ {
struct bio_post_read_ctx *ctx = struct bio_post_read_ctx *ctx =
container_of(work, struct bio_post_read_ctx, work); container_of(work, struct bio_post_read_ctx, work);
struct bio *bio = ctx->bio;
fsverity_verify_bio(ctx->bio); /*
* fsverity_verify_bio() may call readpages() again, and although verity
* will be disabled for that, decryption may still be needed, causing
* another bio_post_read_ctx to be allocated. So to guarantee that
* mempool_alloc() never deadlocks we must free the current ctx first.
* This is safe because verity is the last post-read step.
*/
BUILD_BUG_ON(STEP_VERITY + 1 != STEP_MAX);
mempool_free(ctx, bio_post_read_ctx_pool);
bio->bi_private = NULL;
bio_post_read_processing(ctx); fsverity_verify_bio(bio);
__read_end_io(bio);
} }
static void bio_post_read_processing(struct bio_post_read_ctx *ctx) static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
@@ -176,12 +189,11 @@ static inline bool ext4_need_verity(const struct inode *inode, pgoff_t idx)
idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
} }
static struct bio_post_read_ctx *get_bio_post_read_ctx(struct inode *inode, static void ext4_set_bio_post_read_ctx(struct bio *bio,
struct bio *bio, const struct inode *inode,
pgoff_t first_idx) pgoff_t first_idx)
{ {
unsigned int post_read_steps = 0; unsigned int post_read_steps = 0;
struct bio_post_read_ctx *ctx = NULL;
if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
post_read_steps |= 1 << STEP_DECRYPT; post_read_steps |= 1 << STEP_DECRYPT;
@@ -190,14 +202,14 @@ static struct bio_post_read_ctx *get_bio_post_read_ctx(struct inode *inode,
post_read_steps |= 1 << STEP_VERITY; post_read_steps |= 1 << STEP_VERITY;
if (post_read_steps) { if (post_read_steps) {
ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); /* Due to the mempool, this never fails. */
if (!ctx) struct bio_post_read_ctx *ctx =
return ERR_PTR(-ENOMEM); mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
ctx->bio = bio; ctx->bio = bio;
ctx->enabled_steps = post_read_steps; ctx->enabled_steps = post_read_steps;
bio->bi_private = ctx; bio->bi_private = ctx;
} }
return ctx;
} }
static inline loff_t ext4_readpage_limit(struct inode *inode) static inline loff_t ext4_readpage_limit(struct inode *inode)
@@ -358,24 +370,16 @@ int ext4_mpage_readpages(struct address_space *mapping,
bio = NULL; bio = NULL;
} }
if (bio == NULL) { if (bio == NULL) {
struct bio_post_read_ctx *ctx;
/* /*
* bio_alloc will _always_ be able to allocate a bio if * bio_alloc will _always_ be able to allocate a bio if
* __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset(). * __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset().
*/ */
bio = bio_alloc(GFP_KERNEL, bio = bio_alloc(GFP_KERNEL,
min_t(int, nr_pages, BIO_MAX_PAGES)); min_t(int, nr_pages, BIO_MAX_PAGES));
ctx = get_bio_post_read_ctx(inode, bio, page->index); ext4_set_bio_post_read_ctx(bio, inode, page->index);
if (IS_ERR(ctx)) {
bio_put(bio);
bio = NULL;
goto set_error_page;
}
bio_set_dev(bio, bdev); bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
bio->bi_end_io = mpage_end_io; bio->bi_end_io = mpage_end_io;
bio->bi_private = ctx;
bio_set_op_attrs(bio, REQ_OP_READ, bio_set_op_attrs(bio, REQ_OP_READ,
is_readahead ? REQ_RAHEAD : 0); is_readahead ? REQ_RAHEAD : 0);
} }

View File

@@ -824,9 +824,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
if (unlikely(err)) if (unlikely(err))
goto errout; goto errout;
n_group_desc = ext4_kvmalloc((gdb_num + 1) * n_group_desc = kvmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
sizeof(struct buffer_head *), GFP_KERNEL);
GFP_NOFS);
if (!n_group_desc) { if (!n_group_desc) {
err = -ENOMEM; err = -ENOMEM;
ext4_warning(sb, "not enough memory for %lu groups", ext4_warning(sb, "not enough memory for %lu groups",
@@ -900,9 +899,8 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
gdb_bh = ext4_sb_bread(sb, gdblock, 0); gdb_bh = ext4_sb_bread(sb, gdblock, 0);
if (IS_ERR(gdb_bh)) if (IS_ERR(gdb_bh))
return PTR_ERR(gdb_bh); return PTR_ERR(gdb_bh);
n_group_desc = ext4_kvmalloc((gdb_num + 1) * n_group_desc = kvmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
sizeof(struct buffer_head *), GFP_KERNEL);
GFP_NOFS);
if (!n_group_desc) { if (!n_group_desc) {
brelse(gdb_bh); brelse(gdb_bh);
err = -ENOMEM; err = -ENOMEM;

View File

@@ -154,7 +154,7 @@ ext4_sb_bread(struct super_block *sb, sector_t block, int op_flags)
if (bh == NULL) if (bh == NULL)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
if (buffer_uptodate(bh)) if (ext4_buffer_uptodate(bh))
return bh; return bh;
ll_rw_block(REQ_OP_READ, REQ_META | op_flags, 1, &bh); ll_rw_block(REQ_OP_READ, REQ_META | op_flags, 1, &bh);
wait_on_buffer(bh); wait_on_buffer(bh);
@@ -204,26 +204,6 @@ void ext4_superblock_csum_set(struct super_block *sb)
es->s_checksum = ext4_superblock_csum(sb, es); es->s_checksum = ext4_superblock_csum(sb, es);
} }
void *ext4_kvmalloc(size_t size, gfp_t flags)
{
void *ret;
ret = kmalloc(size, flags | __GFP_NOWARN);
if (!ret)
ret = __vmalloc(size, flags, PAGE_KERNEL);
return ret;
}
void *ext4_kvzalloc(size_t size, gfp_t flags)
{
void *ret;
ret = kzalloc(size, flags | __GFP_NOWARN);
if (!ret)
ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
return ret;
}
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
struct ext4_group_desc *bg) struct ext4_group_desc *bg)
{ {
@@ -367,6 +347,8 @@ static void __save_error_info(struct super_block *sb, const char *func,
ext4_update_tstamp(es, s_last_error_time); ext4_update_tstamp(es, s_last_error_time);
strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
es->s_last_error_line = cpu_to_le32(line); es->s_last_error_line = cpu_to_le32(line);
if (es->s_last_error_errcode == 0)
es->s_last_error_errcode = EXT4_ERR_EFSCORRUPTED;
if (!es->s_first_error_time) { if (!es->s_first_error_time) {
es->s_first_error_time = es->s_last_error_time; es->s_first_error_time = es->s_last_error_time;
es->s_first_error_time_hi = es->s_last_error_time_hi; es->s_first_error_time_hi = es->s_last_error_time_hi;
@@ -375,6 +357,7 @@ static void __save_error_info(struct super_block *sb, const char *func,
es->s_first_error_line = cpu_to_le32(line); es->s_first_error_line = cpu_to_le32(line);
es->s_first_error_ino = es->s_last_error_ino; es->s_first_error_ino = es->s_last_error_ino;
es->s_first_error_block = es->s_last_error_block; es->s_first_error_block = es->s_last_error_block;
es->s_first_error_errcode = es->s_last_error_errcode;
} }
/* /*
* Start the daily error reporting function if it hasn't been * Start the daily error reporting function if it hasn't been
@@ -631,6 +614,66 @@ const char *ext4_decode_error(struct super_block *sb, int errno,
return errstr; return errstr;
} }
void ext4_set_errno(struct super_block *sb, int err)
{
if (err < 0)
err = -err;
switch (err) {
case EIO:
err = EXT4_ERR_EIO;
break;
case ENOMEM:
err = EXT4_ERR_ENOMEM;
break;
case EFSBADCRC:
err = EXT4_ERR_EFSBADCRC;
break;
case EFSCORRUPTED:
err = EXT4_ERR_EFSCORRUPTED;
break;
case ENOSPC:
err = EXT4_ERR_ENOSPC;
break;
case ENOKEY:
err = EXT4_ERR_ENOKEY;
break;
case EROFS:
err = EXT4_ERR_EROFS;
break;
case EFBIG:
err = EXT4_ERR_EFBIG;
break;
case EEXIST:
err = EXT4_ERR_EEXIST;
break;
case ERANGE:
err = EXT4_ERR_ERANGE;
break;
case EOVERFLOW:
err = EXT4_ERR_EOVERFLOW;
break;
case EBUSY:
err = EXT4_ERR_EBUSY;
break;
case ENOTDIR:
err = EXT4_ERR_ENOTDIR;
break;
case ENOTEMPTY:
err = EXT4_ERR_ENOTEMPTY;
break;
case ESHUTDOWN:
err = EXT4_ERR_ESHUTDOWN;
break;
case EFAULT:
err = EXT4_ERR_EFAULT;
break;
default:
err = EXT4_ERR_UNKNOWN;
}
EXT4_SB(sb)->s_es->s_last_error_errcode = err;
}
/* __ext4_std_error decodes expected errors from journaling functions /* __ext4_std_error decodes expected errors from journaling functions
* automatically and invokes the appropriate error response. */ * automatically and invokes the appropriate error response. */
@@ -655,6 +698,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
sb->s_id, function, line, errstr); sb->s_id, function, line, errstr);
} }
ext4_set_errno(sb, -errno);
save_error_info(sb, function, line); save_error_info(sb, function, line);
ext4_handle_error(sb); ext4_handle_error(sb);
} }
@@ -982,8 +1026,10 @@ static void ext4_put_super(struct super_block *sb)
aborted = is_journal_aborted(sbi->s_journal); aborted = is_journal_aborted(sbi->s_journal);
err = jbd2_journal_destroy(sbi->s_journal); err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL; sbi->s_journal = NULL;
if ((err < 0) && !aborted) if ((err < 0) && !aborted) {
ext4_set_errno(sb, -err);
ext4_abort(sb, "Couldn't clean up the journal"); ext4_abort(sb, "Couldn't clean up the journal");
}
} }
ext4_unregister_sysfs(sb); ext4_unregister_sysfs(sb);
@@ -1085,8 +1131,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei->i_es_shk_nr = 0; ei->i_es_shk_nr = 0;
ei->i_es_shrink_lblk = 0; ei->i_es_shrink_lblk = 0;
ei->i_reserved_data_blocks = 0; ei->i_reserved_data_blocks = 0;
ei->i_da_metadata_calc_len = 0;
ei->i_da_metadata_calc_last_lblock = 0;
spin_lock_init(&(ei->i_block_reservation_lock)); spin_lock_init(&(ei->i_block_reservation_lock));
ext4_init_pending_tree(&ei->i_pending_tree); ext4_init_pending_tree(&ei->i_pending_tree);
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
@@ -1548,6 +1592,7 @@ static const match_table_t tokens = {
{Opt_auto_da_alloc, "auto_da_alloc"}, {Opt_auto_da_alloc, "auto_da_alloc"},
{Opt_noauto_da_alloc, "noauto_da_alloc"}, {Opt_noauto_da_alloc, "noauto_da_alloc"},
{Opt_dioread_nolock, "dioread_nolock"}, {Opt_dioread_nolock, "dioread_nolock"},
{Opt_dioread_lock, "nodioread_nolock"},
{Opt_dioread_lock, "dioread_lock"}, {Opt_dioread_lock, "dioread_lock"},
{Opt_discard, "discard"}, {Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"}, {Opt_nodiscard, "nodiscard"},
@@ -3720,6 +3765,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
set_opt(sb, NO_UID32); set_opt(sb, NO_UID32);
/* xattr user namespace & acls are now defaulted on */ /* xattr user namespace & acls are now defaulted on */
set_opt(sb, XATTR_USER); set_opt(sb, XATTR_USER);
set_opt(sb, DIOREAD_NOLOCK);
#ifdef CONFIG_EXT4_FS_POSIX_ACL #ifdef CONFIG_EXT4_FS_POSIX_ACL
set_opt(sb, POSIX_ACL); set_opt(sb, POSIX_ACL);
#endif #endif
@@ -3887,9 +3933,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
#endif #endif
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
printk_once(KERN_WARNING "EXT4-fs: Warning: mounting " printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, and O_DIRECT support!\n");
"with data=journal disables delayed " clear_opt(sb, DIOREAD_NOLOCK);
"allocation and O_DIRECT support!\n");
if (test_opt2(sb, EXPLICIT_DELALLOC)) { if (test_opt2(sb, EXPLICIT_DELALLOC)) {
ext4_msg(sb, KERN_ERR, "can't mount with " ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and delalloc"); "both data=journal and delalloc");
@@ -5540,9 +5585,15 @@ static int ext4_statfs_project(struct super_block *sb,
return PTR_ERR(dquot); return PTR_ERR(dquot);
spin_lock(&dquot->dq_dqb_lock); spin_lock(&dquot->dq_dqb_lock);
limit = (dquot->dq_dqb.dqb_bsoftlimit ? limit = 0;
dquot->dq_dqb.dqb_bsoftlimit : if (dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits; (!limit || dquot->dq_dqb.dqb_bsoftlimit < limit))
limit = dquot->dq_dqb.dqb_bsoftlimit;
if (dquot->dq_dqb.dqb_bhardlimit &&
(!limit || dquot->dq_dqb.dqb_bhardlimit < limit))
limit = dquot->dq_dqb.dqb_bhardlimit;
limit >>= sb->s_blocksize_bits;
if (limit && buf->f_blocks > limit) { if (limit && buf->f_blocks > limit) {
curblock = (dquot->dq_dqb.dqb_curspace + curblock = (dquot->dq_dqb.dqb_curspace +
dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits; dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
@@ -5552,9 +5603,14 @@ static int ext4_statfs_project(struct super_block *sb,
(buf->f_blocks - curblock) : 0; (buf->f_blocks - curblock) : 0;
} }
limit = dquot->dq_dqb.dqb_isoftlimit ? limit = 0;
dquot->dq_dqb.dqb_isoftlimit : if (dquot->dq_dqb.dqb_isoftlimit &&
dquot->dq_dqb.dqb_ihardlimit; (!limit || dquot->dq_dqb.dqb_isoftlimit < limit))
limit = dquot->dq_dqb.dqb_isoftlimit;
if (dquot->dq_dqb.dqb_ihardlimit &&
(!limit || dquot->dq_dqb.dqb_ihardlimit < limit))
limit = dquot->dq_dqb.dqb_ihardlimit;
if (limit && buf->f_files > limit) { if (limit && buf->f_files > limit) {
buf->f_files = limit; buf->f_files = limit;
buf->f_ffree = buf->f_ffree =

View File

@@ -29,6 +29,10 @@ typedef enum {
attr_last_error_time, attr_last_error_time,
attr_feature, attr_feature,
attr_pointer_ui, attr_pointer_ui,
attr_pointer_ul,
attr_pointer_u64,
attr_pointer_u8,
attr_pointer_string,
attr_pointer_atomic, attr_pointer_atomic,
attr_journal_task, attr_journal_task,
} attr_id_t; } attr_id_t;
@@ -46,6 +50,7 @@ struct ext4_attr {
struct attribute attr; struct attribute attr;
short attr_id; short attr_id;
short attr_ptr; short attr_ptr;
unsigned short attr_size;
union { union {
int offset; int offset;
void *explicit_ptr; void *explicit_ptr;
@@ -154,12 +159,35 @@ static struct ext4_attr ext4_attr_##_name = { \
}, \ }, \
} }
#define EXT4_ATTR_STRING(_name,_mode,_size,_struct,_elname) \
static struct ext4_attr ext4_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.attr_id = attr_pointer_string, \
.attr_size = _size, \
.attr_ptr = ptr_##_struct##_offset, \
.u = { \
.offset = offsetof(struct _struct, _elname),\
}, \
}
#define EXT4_RO_ATTR_ES_UI(_name,_elname) \ #define EXT4_RO_ATTR_ES_UI(_name,_elname) \
EXT4_ATTR_OFFSET(_name, 0444, pointer_ui, ext4_super_block, _elname) EXT4_ATTR_OFFSET(_name, 0444, pointer_ui, ext4_super_block, _elname)
#define EXT4_RO_ATTR_ES_U8(_name,_elname) \
EXT4_ATTR_OFFSET(_name, 0444, pointer_u8, ext4_super_block, _elname)
#define EXT4_RO_ATTR_ES_U64(_name,_elname) \
EXT4_ATTR_OFFSET(_name, 0444, pointer_u64, ext4_super_block, _elname)
#define EXT4_RO_ATTR_ES_STRING(_name,_elname,_size) \
EXT4_ATTR_STRING(_name, 0444, _size, ext4_super_block, _elname)
#define EXT4_RW_ATTR_SBI_UI(_name,_elname) \ #define EXT4_RW_ATTR_SBI_UI(_name,_elname) \
EXT4_ATTR_OFFSET(_name, 0644, pointer_ui, ext4_sb_info, _elname) EXT4_ATTR_OFFSET(_name, 0644, pointer_ui, ext4_sb_info, _elname)
#define EXT4_RW_ATTR_SBI_UL(_name,_elname) \
EXT4_ATTR_OFFSET(_name, 0644, pointer_ul, ext4_sb_info, _elname)
#define EXT4_ATTR_PTR(_name,_mode,_id,_ptr) \ #define EXT4_ATTR_PTR(_name,_mode,_id,_ptr) \
static struct ext4_attr ext4_attr_##_name = { \ static struct ext4_attr ext4_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \ .attr = {.name = __stringify(_name), .mode = _mode }, \
@@ -194,7 +222,20 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.int
EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
#ifdef CONFIG_EXT4_DEBUG
EXT4_RW_ATTR_SBI_UL(simulate_fail, s_simulate_fail);
#endif
EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
EXT4_RO_ATTR_ES_U8(first_error_errcode, s_first_error_errcode);
EXT4_RO_ATTR_ES_U8(last_error_errcode, s_last_error_errcode);
EXT4_RO_ATTR_ES_UI(first_error_ino, s_first_error_ino);
EXT4_RO_ATTR_ES_UI(last_error_ino, s_last_error_ino);
EXT4_RO_ATTR_ES_U64(first_error_block, s_first_error_block);
EXT4_RO_ATTR_ES_U64(last_error_block, s_last_error_block);
EXT4_RO_ATTR_ES_UI(first_error_line, s_first_error_line);
EXT4_RO_ATTR_ES_UI(last_error_line, s_last_error_line);
EXT4_RO_ATTR_ES_STRING(first_error_func, s_first_error_func, 32);
EXT4_RO_ATTR_ES_STRING(last_error_func, s_last_error_func, 32);
EXT4_ATTR(first_error_time, 0444, first_error_time); EXT4_ATTR(first_error_time, 0444, first_error_time);
EXT4_ATTR(last_error_time, 0444, last_error_time); EXT4_ATTR(last_error_time, 0444, last_error_time);
EXT4_ATTR(journal_task, 0444, journal_task); EXT4_ATTR(journal_task, 0444, journal_task);
@@ -225,9 +266,22 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(msg_ratelimit_interval_ms), ATTR_LIST(msg_ratelimit_interval_ms),
ATTR_LIST(msg_ratelimit_burst), ATTR_LIST(msg_ratelimit_burst),
ATTR_LIST(errors_count), ATTR_LIST(errors_count),
ATTR_LIST(first_error_ino),
ATTR_LIST(last_error_ino),
ATTR_LIST(first_error_block),
ATTR_LIST(last_error_block),
ATTR_LIST(first_error_line),
ATTR_LIST(last_error_line),
ATTR_LIST(first_error_func),
ATTR_LIST(last_error_func),
ATTR_LIST(first_error_errcode),
ATTR_LIST(last_error_errcode),
ATTR_LIST(first_error_time), ATTR_LIST(first_error_time),
ATTR_LIST(last_error_time), ATTR_LIST(last_error_time),
ATTR_LIST(journal_task), ATTR_LIST(journal_task),
#ifdef CONFIG_EXT4_DEBUG
ATTR_LIST(simulate_fail),
#endif
NULL, NULL,
}; };
ATTRIBUTE_GROUPS(ext4); ATTRIBUTE_GROUPS(ext4);
@@ -280,7 +334,7 @@ static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi)
static ssize_t __print_tstamp(char *buf, __le32 lo, __u8 hi) static ssize_t __print_tstamp(char *buf, __le32 lo, __u8 hi)
{ {
return snprintf(buf, PAGE_SIZE, "%lld", return snprintf(buf, PAGE_SIZE, "%lld\n",
((time64_t)hi << 32) + le32_to_cpu(lo)); ((time64_t)hi << 32) + le32_to_cpu(lo));
} }
@@ -318,6 +372,30 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
else else
return snprintf(buf, PAGE_SIZE, "%u\n", return snprintf(buf, PAGE_SIZE, "%u\n",
*((unsigned int *) ptr)); *((unsigned int *) ptr));
case attr_pointer_ul:
if (!ptr)
return 0;
return snprintf(buf, PAGE_SIZE, "%lu\n",
*((unsigned long *) ptr));
case attr_pointer_u8:
if (!ptr)
return 0;
return snprintf(buf, PAGE_SIZE, "%u\n",
*((unsigned char *) ptr));
case attr_pointer_u64:
if (!ptr)
return 0;
if (a->attr_ptr == ptr_ext4_super_block_offset)
return snprintf(buf, PAGE_SIZE, "%llu\n",
le64_to_cpup(ptr));
else
return snprintf(buf, PAGE_SIZE, "%llu\n",
*((unsigned long long *) ptr));
case attr_pointer_string:
if (!ptr)
return 0;
return snprintf(buf, PAGE_SIZE, "%.*s\n", a->attr_size,
(char *) ptr);
case attr_pointer_atomic: case attr_pointer_atomic:
if (!ptr) if (!ptr)
return 0; return 0;
@@ -361,6 +439,14 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
else else
*((unsigned int *) ptr) = t; *((unsigned int *) ptr) = t;
return len; return len;
case attr_pointer_ul:
if (!ptr)
return 0;
ret = kstrtoul(skip_spaces(buf), 0, &t);
if (ret)
return ret;
*((unsigned long *) ptr) = t;
return len;
case attr_inode_readahead: case attr_inode_readahead:
return inode_readahead_blks_store(sbi, buf, len); return inode_readahead_blks_store(sbi, buf, len);
case attr_trigger_test_error: case attr_trigger_test_error:

View File

@@ -1456,7 +1456,7 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
if (!ce) if (!ce)
return NULL; return NULL;
ea_data = ext4_kvmalloc(value_len, GFP_NOFS); ea_data = kvmalloc(value_len, GFP_KERNEL);
if (!ea_data) { if (!ea_data) {
mb_cache_entry_put(ea_inode_cache, ce); mb_cache_entry_put(ea_inode_cache, ce);
return NULL; return NULL;
@@ -2879,9 +2879,11 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
if (IS_ERR(bh)) { if (IS_ERR(bh)) {
error = PTR_ERR(bh); error = PTR_ERR(bh);
if (error == -EIO) if (error == -EIO) {
ext4_set_errno(inode->i_sb, EIO);
EXT4_ERROR_INODE(inode, "block %llu read error", EXT4_ERROR_INODE(inode, "block %llu read error",
EXT4_I(inode)->i_file_acl); EXT4_I(inode)->i_file_acl);
}
bh = NULL; bh = NULL;
goto cleanup; goto cleanup;
} }

View File

@@ -164,7 +164,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
"journal space in %s\n", __func__, "journal space in %s\n", __func__,
journal->j_devname); journal->j_devname);
WARN_ON(1); WARN_ON(1);
jbd2_journal_abort(journal, 0); jbd2_journal_abort(journal, -EIO);
} }
write_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
} else { } else {

View File

@@ -782,7 +782,7 @@ start_journal_io:
err = journal_submit_commit_record(journal, commit_transaction, err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum); &cbh, crc32_sum);
if (err) if (err)
__jbd2_journal_abort_hard(journal); jbd2_journal_abort(journal, err);
} }
blk_finish_plug(&plug); blk_finish_plug(&plug);
@@ -875,7 +875,7 @@ start_journal_io:
err = journal_submit_commit_record(journal, commit_transaction, err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum); &cbh, crc32_sum);
if (err) if (err)
__jbd2_journal_abort_hard(journal); jbd2_journal_abort(journal, err);
} }
if (cbh) if (cbh)
err = journal_wait_on_commit_record(journal, cbh); err = journal_wait_on_commit_record(journal, cbh);

View File

@@ -96,7 +96,6 @@ EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
EXPORT_SYMBOL(jbd2_inode_cache); EXPORT_SYMBOL(jbd2_inode_cache);
static void __journal_abort_soft (journal_t *journal, int errno);
static int jbd2_journal_create_slab(size_t slab_size); static int jbd2_journal_create_slab(size_t slab_size);
#ifdef CONFIG_JBD2_DEBUG #ifdef CONFIG_JBD2_DEBUG
@@ -805,7 +804,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
"at offset %lu on %s\n", "at offset %lu on %s\n",
__func__, blocknr, journal->j_devname); __func__, blocknr, journal->j_devname);
err = -EIO; err = -EIO;
__journal_abort_soft(journal, err); jbd2_journal_abort(journal, err);
} }
} else { } else {
*retp = blocknr; /* +journal->j_blk_offset */ *retp = blocknr; /* +journal->j_blk_offset */
@@ -982,6 +981,7 @@ static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)
static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
{ {
(*pos)++;
return NULL; return NULL;
} }
@@ -1710,6 +1710,11 @@ int jbd2_journal_load(journal_t *journal)
journal->j_devname); journal->j_devname);
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
/*
* clear JBD2_ABORT flag initialized in journal_init_common
* here to update log tail information with the newest seq.
*/
journal->j_flags &= ~JBD2_ABORT;
/* OK, we've finished with the dynamic journal bits: /* OK, we've finished with the dynamic journal bits:
* reinitialise the dynamic contents of the superblock in memory * reinitialise the dynamic contents of the superblock in memory
@@ -1717,7 +1722,6 @@ int jbd2_journal_load(journal_t *journal)
if (journal_reset(journal)) if (journal_reset(journal))
goto recovery_error; goto recovery_error;
journal->j_flags &= ~JBD2_ABORT;
journal->j_flags |= JBD2_LOADED; journal->j_flags |= JBD2_LOADED;
return 0; return 0;
@@ -2098,67 +2102,6 @@ int jbd2_journal_wipe(journal_t *journal, int write)
return err; return err;
} }
/*
* Journal abort has very specific semantics, which we describe
* for journal abort.
*
* Two internal functions, which provide abort to the jbd layer
* itself are here.
*/
/*
* Quick version for internal journal use (doesn't lock the journal).
* Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
* and don't attempt to make any other journal updates.
*/
void __jbd2_journal_abort_hard(journal_t *journal)
{
transaction_t *transaction;
if (journal->j_flags & JBD2_ABORT)
return;
printk(KERN_ERR "Aborting journal on device %s.\n",
journal->j_devname);
write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_ABORT;
transaction = journal->j_running_transaction;
if (transaction)
__jbd2_log_start_commit(journal, transaction->t_tid);
write_unlock(&journal->j_state_lock);
}
/* Soft abort: record the abort error status in the journal superblock,
* but don't do any other IO. */
static void __journal_abort_soft (journal_t *journal, int errno)
{
int old_errno;
write_lock(&journal->j_state_lock);
old_errno = journal->j_errno;
if (!journal->j_errno || errno == -ESHUTDOWN)
journal->j_errno = errno;
if (journal->j_flags & JBD2_ABORT) {
write_unlock(&journal->j_state_lock);
if (!old_errno && old_errno != -ESHUTDOWN &&
errno == -ESHUTDOWN)
jbd2_journal_update_sb_errno(journal);
return;
}
write_unlock(&journal->j_state_lock);
__jbd2_journal_abort_hard(journal);
if (errno) {
jbd2_journal_update_sb_errno(journal);
write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_REC_ERR;
write_unlock(&journal->j_state_lock);
}
}
/** /**
* void jbd2_journal_abort () - Shutdown the journal immediately. * void jbd2_journal_abort () - Shutdown the journal immediately.
* @journal: the journal to shutdown. * @journal: the journal to shutdown.
@@ -2198,16 +2141,51 @@ static void __journal_abort_soft (journal_t *journal, int errno)
* failure to disk. ext3_error, for example, now uses this * failure to disk. ext3_error, for example, now uses this
* functionality. * functionality.
* *
* Errors which originate from within the journaling layer will NOT
* supply an errno; a null errno implies that absolutely no further
* writes are done to the journal (unless there are any already in
* progress).
*
*/ */
void jbd2_journal_abort(journal_t *journal, int errno) void jbd2_journal_abort(journal_t *journal, int errno)
{ {
__journal_abort_soft(journal, errno); transaction_t *transaction;
/*
* ESHUTDOWN always takes precedence because a file system check
* caused by any other journal abort error is not required after
* a shutdown triggered.
*/
write_lock(&journal->j_state_lock);
if (journal->j_flags & JBD2_ABORT) {
int old_errno = journal->j_errno;
write_unlock(&journal->j_state_lock);
if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) {
journal->j_errno = errno;
jbd2_journal_update_sb_errno(journal);
}
return;
}
/*
* Mark the abort as occurred and start current running transaction
* to release all journaled buffer.
*/
pr_err("Aborting journal on device %s.\n", journal->j_devname);
journal->j_flags |= JBD2_ABORT;
journal->j_errno = errno;
transaction = journal->j_running_transaction;
if (transaction)
__jbd2_log_start_commit(journal, transaction->t_tid);
write_unlock(&journal->j_state_lock);
/*
* Record errno to the journal super block, so that fsck and jbd2
* layer could realise that a filesystem check is needed.
*/
jbd2_journal_update_sb_errno(journal);
write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_REC_ERR;
write_unlock(&journal->j_state_lock);
} }
/** /**
@@ -2556,7 +2534,6 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
{ {
struct journal_head *jh = bh2jh(bh); struct journal_head *jh = bh2jh(bh);
J_ASSERT_JH(jh, jh->b_jcount >= 0);
J_ASSERT_JH(jh, jh->b_transaction == NULL); J_ASSERT_JH(jh, jh->b_transaction == NULL);
J_ASSERT_JH(jh, jh->b_next_transaction == NULL); J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);

View File

@@ -525,7 +525,7 @@ EXPORT_SYMBOL(jbd2__journal_start);
* modified buffers in the log. We block until the log can guarantee * modified buffers in the log. We block until the log can guarantee
* that much space. Additionally, if rsv_blocks > 0, we also create another * that much space. Additionally, if rsv_blocks > 0, we also create another
* handle with rsv_blocks reserved blocks in the journal. This handle is * handle with rsv_blocks reserved blocks in the journal. This handle is
* is stored in h_rsv_handle. It is not attached to any particular transaction * stored in h_rsv_handle. It is not attached to any particular transaction
* and thus doesn't block transaction commit. If the caller uses this reserved * and thus doesn't block transaction commit. If the caller uses this reserved
* handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop() * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
* on the parent handle will dispose the reserved one. Reserved handle has to * on the parent handle will dispose the reserved one. Reserved handle has to
@@ -1595,7 +1595,7 @@ out:
* Allow this call even if the handle has aborted --- it may be part of * Allow this call even if the handle has aborted --- it may be part of
* the caller's cleanup after an abort. * the caller's cleanup after an abort.
*/ */
int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
{ {
transaction_t *transaction = handle->h_transaction; transaction_t *transaction = handle->h_transaction;
journal_t *journal; journal_t *journal;

View File

@@ -1403,7 +1403,6 @@ extern int jbd2_journal_skip_recovery (journal_t *);
extern void jbd2_journal_update_sb_errno(journal_t *); extern void jbd2_journal_update_sb_errno(journal_t *);
extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t, extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
unsigned long, int); unsigned long, int);
extern void __jbd2_journal_abort_hard (journal_t *);
extern void jbd2_journal_abort (journal_t *, int); extern void jbd2_journal_abort (journal_t *, int);
extern int jbd2_journal_errno (journal_t *); extern int jbd2_journal_errno (journal_t *);
extern void jbd2_journal_ack_err (journal_t *); extern void jbd2_journal_ack_err (journal_t *);

View File

@@ -48,6 +48,16 @@ struct partial_cluster;
{ EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \ { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \
{ EXT4_GET_BLOCKS_ZERO, "ZERO" }) { EXT4_GET_BLOCKS_ZERO, "ZERO" })
/*
* __print_flags() requires that all enum values be wrapped in the
* TRACE_DEFINE_ENUM macro so that the enum value can be encoded in the ftrace
* ring buffer.
*/
TRACE_DEFINE_ENUM(BH_New);
TRACE_DEFINE_ENUM(BH_Mapped);
TRACE_DEFINE_ENUM(BH_Unwritten);
TRACE_DEFINE_ENUM(BH_Boundary);
#define show_mflags(flags) __print_flags(flags, "", \ #define show_mflags(flags) __print_flags(flags, "", \
{ EXT4_MAP_NEW, "N" }, \ { EXT4_MAP_NEW, "N" }, \
{ EXT4_MAP_MAPPED, "M" }, \ { EXT4_MAP_MAPPED, "M" }, \
@@ -62,11 +72,18 @@ struct partial_cluster;
{ EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER,"1ST_CLUSTER" },\ { EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER,"1ST_CLUSTER" },\
{ EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER, "LAST_CLUSTER" }) { EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER, "LAST_CLUSTER" })
TRACE_DEFINE_ENUM(ES_WRITTEN_B);
TRACE_DEFINE_ENUM(ES_UNWRITTEN_B);
TRACE_DEFINE_ENUM(ES_DELAYED_B);
TRACE_DEFINE_ENUM(ES_HOLE_B);
TRACE_DEFINE_ENUM(ES_REFERENCED_B);
#define show_extent_status(status) __print_flags(status, "", \ #define show_extent_status(status) __print_flags(status, "", \
{ EXTENT_STATUS_WRITTEN, "W" }, \ { EXTENT_STATUS_WRITTEN, "W" }, \
{ EXTENT_STATUS_UNWRITTEN, "U" }, \ { EXTENT_STATUS_UNWRITTEN, "U" }, \
{ EXTENT_STATUS_DELAYED, "D" }, \ { EXTENT_STATUS_DELAYED, "D" }, \
{ EXTENT_STATUS_HOLE, "H" }) { EXTENT_STATUS_HOLE, "H" }, \
{ EXTENT_STATUS_REFERENCED, "R" })
#define show_falloc_mode(mode) __print_flags(mode, "|", \ #define show_falloc_mode(mode) __print_flags(mode, "|", \
{ FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \
@@ -2265,7 +2282,7 @@ DECLARE_EVENT_CLASS(ext4__es_extent,
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->lblk = es->es_lblk; __entry->lblk = es->es_lblk;
__entry->len = es->es_len; __entry->len = es->es_len;
__entry->pblk = ext4_es_pblock(es); __entry->pblk = ext4_es_show_pblock(es);
__entry->status = ext4_es_status(es); __entry->status = ext4_es_status(es);
), ),
@@ -2354,7 +2371,7 @@ TRACE_EVENT(ext4_es_find_extent_range_exit,
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->lblk = es->es_lblk; __entry->lblk = es->es_lblk;
__entry->len = es->es_len; __entry->len = es->es_len;
__entry->pblk = ext4_es_pblock(es); __entry->pblk = ext4_es_show_pblock(es);
__entry->status = ext4_es_status(es); __entry->status = ext4_es_status(es);
), ),
@@ -2408,7 +2425,7 @@ TRACE_EVENT(ext4_es_lookup_extent_exit,
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->lblk = es->es_lblk; __entry->lblk = es->es_lblk;
__entry->len = es->es_len; __entry->len = es->es_len;
__entry->pblk = ext4_es_pblock(es); __entry->pblk = ext4_es_show_pblock(es);
__entry->status = ext4_es_status(es); __entry->status = ext4_es_status(es);
__entry->found = found; __entry->found = found;
), ),
@@ -2576,7 +2593,7 @@ TRACE_EVENT(ext4_es_insert_delayed_block,
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->lblk = es->es_lblk; __entry->lblk = es->es_lblk;
__entry->len = es->es_len; __entry->len = es->es_len;
__entry->pblk = ext4_es_pblock(es); __entry->pblk = ext4_es_show_pblock(es);
__entry->status = ext4_es_status(es); __entry->status = ext4_es_status(es);
__entry->allocated = allocated; __entry->allocated = allocated;
), ),