Merge tag 'for-f2fs-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "This patch series contains several performance tuning patches
  regarding to the IO submission flow, in addition to supporting new
  features such as a ZBC-base drive and multiple devices.

  It also includes some major bug fixes such as:
   - checkpoint version control
   - fdatasync-related roll-forward recovery routine
   - memory boundary or null-pointer access in corner cases
   - missing error cases

  It has various minor clean-up patches as well"

* tag 'for-f2fs-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (66 commits)
  f2fs: fix a missing size change in f2fs_setattr
  f2fs: fix to access nullified flush_cmd_control pointer
  f2fs: free meta pages if sanity check for ckpt is failed
  f2fs: detect wrong layout
  f2fs: call sync_fs when f2fs is idle
  Revert "f2fs: use percpu_counter for # of dirty pages in inode"
  f2fs: return AOP_WRITEPAGE_ACTIVATE for writepage
  f2fs: do not activate auto_recovery for fallocated i_size
  f2fs: fix to determine start_cp_addr by sbi->cur_cp_pack
  f2fs: fix 32-bit build
  f2fs: set ->owner for debugfs status file's file_operations
  f2fs: fix incorrect free inode count in ->statfs
  f2fs: drop duplicate header timer.h
  f2fs: fix wrong AUTO_RECOVER condition
  f2fs: do not recover i_size if it's valid
  f2fs: fix fdatasync
  f2fs: fix to account total free nid correctly
  f2fs: fix an infinite loop when flush nodes in cp
  f2fs: don't wait writeback for datas during checkpoint
  f2fs: fix wrong written_valid_blocks counting
  ...
This commit is contained in:
Linus Torvalds
2016-12-14 09:07:36 -08:00
22 changed files with 1049 additions and 477 deletions

View File

@@ -274,8 +274,10 @@ static int __commit_inmem_pages(struct inode *inode,
set_page_dirty(page);
f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page))
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
remove_dirty_inode(inode);
}
fio.page = page;
err = do_write_data_page(&fio);
@@ -287,7 +289,6 @@ static int __commit_inmem_pages(struct inode *inode,
/* record old blkaddr for revoking */
cur->old_addr = fio.old_blkaddr;
clear_cold_data(page);
submit_bio = true;
}
unlock_page(page);
@@ -363,7 +364,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
*/
if (has_not_enough_free_secs(sbi, 0, 0)) {
mutex_lock(&sbi->gc_mutex);
f2fs_gc(sbi, false);
f2fs_gc(sbi, false, false);
}
}
@@ -380,14 +381,17 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
if (!available_free_memory(sbi, FREE_NIDS))
try_to_free_nids(sbi, MAX_FREE_NIDS);
else
build_free_nids(sbi);
build_free_nids(sbi, false);
if (!is_idle(sbi))
return;
/* checkpoint is the only way to shrink partial cached entries */
if (!available_free_memory(sbi, NAT_ENTRIES) ||
!available_free_memory(sbi, INO_ENTRIES) ||
excess_prefree_segs(sbi) ||
excess_dirty_nats(sbi) ||
(is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
f2fs_time_over(sbi, CP_TIME)) {
if (test_opt(sbi, DATA_FLUSH)) {
struct blk_plug plug;
@@ -400,6 +404,33 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
}
}
static int __submit_flush_wait(struct block_device *bdev)
{
struct bio *bio = f2fs_bio_alloc(0);
int ret;
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
bio->bi_bdev = bdev;
ret = submit_bio_wait(bio);
bio_put(bio);
return ret;
}
static int submit_flush_wait(struct f2fs_sb_info *sbi)
{
int ret = __submit_flush_wait(sbi->sb->s_bdev);
int i;
if (sbi->s_ndevs && !ret) {
for (i = 1; i < sbi->s_ndevs; i++) {
ret = __submit_flush_wait(FDEV(i).bdev);
if (ret)
break;
}
}
return ret;
}
static int issue_flush_thread(void *data)
{
struct f2fs_sb_info *sbi = data;
@@ -410,25 +441,18 @@ repeat:
return 0;
if (!llist_empty(&fcc->issue_list)) {
struct bio *bio;
struct flush_cmd *cmd, *next;
int ret;
bio = f2fs_bio_alloc(0);
fcc->dispatch_list = llist_del_all(&fcc->issue_list);
fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
bio->bi_bdev = sbi->sb->s_bdev;
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
ret = submit_bio_wait(bio);
ret = submit_flush_wait(sbi);
llist_for_each_entry_safe(cmd, next,
fcc->dispatch_list, llnode) {
cmd->ret = ret;
complete(&cmd->wait);
}
bio_put(bio);
fcc->dispatch_list = NULL;
}
@@ -449,15 +473,11 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
return 0;
if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) {
struct bio *bio = f2fs_bio_alloc(0);
int ret;
atomic_inc(&fcc->submit_flush);
bio->bi_bdev = sbi->sb->s_bdev;
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
ret = submit_bio_wait(bio);
ret = submit_flush_wait(sbi);
atomic_dec(&fcc->submit_flush);
bio_put(bio);
return ret;
}
@@ -469,8 +489,13 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
if (!fcc->dispatch_list)
wake_up(&fcc->flush_wait_queue);
wait_for_completion(&cmd.wait);
atomic_dec(&fcc->submit_flush);
if (fcc->f2fs_issue_flush) {
wait_for_completion(&cmd.wait);
atomic_dec(&fcc->submit_flush);
} else {
llist_del_all(&fcc->issue_list);
atomic_set(&fcc->submit_flush, 0);
}
return cmd.ret;
}
@@ -481,6 +506,11 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
struct flush_cmd_control *fcc;
int err = 0;
if (SM_I(sbi)->cmd_control_info) {
fcc = SM_I(sbi)->cmd_control_info;
goto init_thread;
}
fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
if (!fcc)
return -ENOMEM;
@@ -488,6 +518,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
SM_I(sbi)->cmd_control_info = fcc;
init_thread:
fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
@@ -500,14 +531,20 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
return err;
}
void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
{
struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
if (fcc && fcc->f2fs_issue_flush)
kthread_stop(fcc->f2fs_issue_flush);
kfree(fcc);
SM_I(sbi)->cmd_control_info = NULL;
if (fcc && fcc->f2fs_issue_flush) {
struct task_struct *flush_thread = fcc->f2fs_issue_flush;
fcc->f2fs_issue_flush = NULL;
kthread_stop(flush_thread);
}
if (free) {
kfree(fcc);
SM_I(sbi)->cmd_control_info = NULL;
}
}
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
@@ -633,15 +670,23 @@ static void f2fs_submit_bio_wait_endio(struct bio *bio)
}
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
struct block_device *bdev = sbi->sb->s_bdev;
struct bio *bio = NULL;
int err;
err = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
&bio);
trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
if (sbi->s_ndevs) {
int devi = f2fs_target_device_index(sbi, blkstart);
blkstart -= FDEV(devi).start_blk;
}
err = __blkdev_issue_discard(bdev,
SECTOR_FROM_BLOCK(blkstart),
SECTOR_FROM_BLOCK(blklen),
GFP_NOFS, 0, &bio);
if (!err && bio) {
struct bio_entry *be = __add_bio_entry(sbi, bio);
@@ -654,24 +699,101 @@ int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, sector_t sector,
return err;
}
#ifdef CONFIG_BLK_DEV_ZONED
static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
sector_t nr_sects = SECTOR_FROM_BLOCK(blklen);
sector_t sector;
int devi = 0;
if (sbi->s_ndevs) {
devi = f2fs_target_device_index(sbi, blkstart);
blkstart -= FDEV(devi).start_blk;
}
sector = SECTOR_FROM_BLOCK(blkstart);
if (sector & (bdev_zone_size(bdev) - 1) ||
nr_sects != bdev_zone_size(bdev)) {
f2fs_msg(sbi->sb, KERN_INFO,
"(%d) %s: Unaligned discard attempted (block %x + %x)",
devi, sbi->s_ndevs ? FDEV(devi).path: "",
blkstart, blklen);
return -EIO;
}
/*
* We need to know the type of the zone: for conventional zones,
* use regular discard if the drive supports it. For sequential
* zones, reset the zone write pointer.
*/
switch (get_blkz_type(sbi, bdev, blkstart)) {
case BLK_ZONE_TYPE_CONVENTIONAL:
if (!blk_queue_discard(bdev_get_queue(bdev)))
return 0;
return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
case BLK_ZONE_TYPE_SEQWRITE_REQ:
case BLK_ZONE_TYPE_SEQWRITE_PREF:
trace_f2fs_issue_reset_zone(sbi->sb, blkstart);
return blkdev_reset_zones(bdev, sector,
nr_sects, GFP_NOFS);
default:
/* Unknown zone type: broken device ? */
return -EIO;
}
}
#endif
static int __issue_discard_async(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
bdev_zoned_model(bdev) != BLK_ZONED_NONE)
return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
}
static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
block_t blkstart, block_t blklen)
{
sector_t start = SECTOR_FROM_BLOCK(blkstart);
sector_t len = SECTOR_FROM_BLOCK(blklen);
sector_t start = blkstart, len = 0;
struct block_device *bdev;
struct seg_entry *se;
unsigned int offset;
block_t i;
int err = 0;
bdev = f2fs_target_device(sbi, blkstart, NULL);
for (i = blkstart; i < blkstart + blklen; i++, len++) {
if (i != start) {
struct block_device *bdev2 =
f2fs_target_device(sbi, i, NULL);
if (bdev2 != bdev) {
err = __issue_discard_async(sbi, bdev,
start, len);
if (err)
return err;
bdev = bdev2;
start = i;
len = 0;
}
}
for (i = blkstart; i < blkstart + blklen; i++) {
se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
offset = GET_BLKOFF_FROM_SEG0(sbi, i);
if (!f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
}
trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
return __f2fs_issue_discard_async(sbi, start, len, GFP_NOFS, 0);
if (len)
err = __issue_discard_async(sbi, bdev, start, len);
return err;
}
static void __add_discard_entry(struct f2fs_sb_info *sbi,
@@ -1296,25 +1418,21 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
stat_inc_seg_type(sbi, curseg);
}
static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int old_segno;
old_segno = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
locate_dirty_segment(sbi, old_segno);
}
void allocate_new_segments(struct f2fs_sb_info *sbi)
{
struct curseg_info *curseg;
unsigned int old_segno;
int i;
if (test_opt(sbi, LFS))
return;
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
__allocate_new_segments(sbi, i);
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
curseg = CURSEG_I(sbi, i);
old_segno = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
locate_dirty_segment(sbi, old_segno);
}
}
static const struct segment_allocation default_salloc_ops = {
@@ -1448,21 +1566,11 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
struct f2fs_summary *sum, int type)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
bool direct_io = (type == CURSEG_DIRECT_IO);
type = direct_io ? CURSEG_WARM_DATA : type;
curseg = CURSEG_I(sbi, type);
struct curseg_info *curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
mutex_lock(&sit_i->sentry_lock);
/* direct_io'ed data is aligned to the segment for better performance */
if (direct_io && curseg->next_blkoff &&
!has_not_enough_free_secs(sbi, 0, 0))
__allocate_new_segments(sbi, type);
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/*
@@ -2166,7 +2274,6 @@ out:
static int build_sit_info(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct sit_info *sit_i;
unsigned int sit_segs, start;
char *src_bitmap, *dst_bitmap;
@@ -2233,7 +2340,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
sit_i->written_valid_blocks = 0;
sit_i->sit_bitmap = dst_bitmap;
sit_i->bitmap_size = bitmap_size;
sit_i->dirty_sentries = 0;
@@ -2315,10 +2422,10 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
int sit_blk_cnt = SIT_BLK_CNT(sbi);
unsigned int i, start, end;
unsigned int readed, start_blk = 0;
int nrpages = MAX_BIO_BLOCKS(sbi) * 8;
do {
readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true);
readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
META_SIT, true);
start = start_blk * sit_i->sents_per_block;
end = (start_blk + readed) * sit_i->sents_per_block;
@@ -2387,6 +2494,9 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
struct seg_entry *sentry = get_seg_entry(sbi, start);
if (!sentry->valid_blocks)
__set_free(sbi, start);
else
SIT_I(sbi)->written_valid_blocks +=
sentry->valid_blocks;
}
/* set use the current segments */
@@ -2645,7 +2755,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
if (!sm_info)
return;
destroy_flush_cmd_control(sbi);
destroy_flush_cmd_control(sbi, true);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);