f2fs: support zone capacity less than zone size
NVMe Zoned Namespace devices can have zone-capacity less than zone-size. Zone-capacity indicates the maximum number of sectors that are usable in a zone beginning from the first sector of the zone. This makes the sectors sectors after the zone-capacity till zone-size to be unusable. This patch set tracks zone-size and zone-capacity in zoned devices and calculate the usable blocks per segment and usable segments per section. If zone-capacity is less than zone-size mark only those segments which start before zone-capacity as free segments. All segments at and beyond zone-capacity are treated as permanently used segments. In cases where zone-capacity does not align with segment size the last segment will start before zone-capacity and end beyond the zone-capacity of the zone. For such spanning segments only sectors within the zone-capacity are used. During writes and GC manage the usable segments in a section and usable blocks per segment. Segments which are beyond zone-capacity are never allocated, and do not need to be garbage collected, only the segments which are before zone-capacity needs to garbage collected. For spanning segments based on the number of usable blocks in that segment, write to blocks only up to zone-capacity. Zone-capacity is device specific and cannot be configured by the user. Since NVMe ZNS device zones are sequentially write only, a block device with conventional zones or any normal block device is needed along with the ZNS device for the metadata operations of F2fs. A typical nvme-cli output of a zoned device shows zone start and capacity and write pointer as below: SLBA: 0x0 WP: 0x0 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ SLBA: 0x20000 WP: 0x20000 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ SLBA: 0x40000 WP: 0x40000 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ Here zone size is 64MB, capacity is 49MB, WP is at zone start as the zones are in EMPTY state. For each zone, only zone start + 49MB is usable area, any lba/sector after 49MB cannot be read or written to, the drive will fail any attempts to read/write. So, the second zone starts at 64MB and is usable till 113MB (64 + 49) and the range between 113 and 128MB is again unusable. The next zone starts at 128MB, and so on. Signed-off-by: Aravind Ramesh <aravind.ramesh@wdc.com> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com> Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
This commit is contained in:

committed by
Jaegeuk Kim

parent
581cb3a26b
commit
de881df977
@@ -859,20 +859,22 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
|
||||
{
|
||||
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
|
||||
unsigned short valid_blocks, ckpt_valid_blocks;
|
||||
unsigned int usable_blocks;
|
||||
|
||||
if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
|
||||
return;
|
||||
|
||||
usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
|
||||
mutex_lock(&dirty_i->seglist_lock);
|
||||
|
||||
valid_blocks = get_valid_blocks(sbi, segno, false);
|
||||
ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
|
||||
|
||||
if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
|
||||
ckpt_valid_blocks == sbi->blocks_per_seg)) {
|
||||
ckpt_valid_blocks == usable_blocks)) {
|
||||
__locate_dirty_segment(sbi, segno, PRE);
|
||||
__remove_dirty_segment(sbi, segno, DIRTY);
|
||||
} else if (valid_blocks < sbi->blocks_per_seg) {
|
||||
} else if (valid_blocks < usable_blocks) {
|
||||
__locate_dirty_segment(sbi, segno, DIRTY);
|
||||
} else {
|
||||
/* Recovery routine with SSR needs this */
|
||||
@@ -915,9 +917,11 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
|
||||
for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
|
||||
se = get_seg_entry(sbi, segno);
|
||||
if (IS_NODESEG(se->type))
|
||||
holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
|
||||
holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
|
||||
se->valid_blocks;
|
||||
else
|
||||
holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
|
||||
holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
|
||||
se->valid_blocks;
|
||||
}
|
||||
mutex_unlock(&dirty_i->seglist_lock);
|
||||
|
||||
@@ -2167,7 +2171,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
|
||||
offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
|
||||
|
||||
f2fs_bug_on(sbi, (new_vblocks < 0 ||
|
||||
(new_vblocks > sbi->blocks_per_seg)));
|
||||
(new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
|
||||
|
||||
se->valid_blocks = new_vblocks;
|
||||
se->mtime = get_mtime(sbi, false);
|
||||
@@ -2933,9 +2937,9 @@ out:
|
||||
static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
|
||||
{
|
||||
struct curseg_info *curseg = CURSEG_I(sbi, type);
|
||||
if (curseg->next_blkoff < sbi->blocks_per_seg)
|
||||
return true;
|
||||
return false;
|
||||
|
||||
return curseg->next_blkoff < f2fs_usable_blks_in_seg(sbi,
|
||||
curseg->segno);
|
||||
}
|
||||
|
||||
int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
|
||||
@@ -4294,9 +4298,12 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
|
||||
{
|
||||
unsigned int start;
|
||||
int type;
|
||||
struct seg_entry *sentry;
|
||||
|
||||
for (start = 0; start < MAIN_SEGS(sbi); start++) {
|
||||
struct seg_entry *sentry = get_seg_entry(sbi, start);
|
||||
if (f2fs_usable_blks_in_seg(sbi, start) == 0)
|
||||
continue;
|
||||
sentry = get_seg_entry(sbi, start);
|
||||
if (!sentry->valid_blocks)
|
||||
__set_free(sbi, start);
|
||||
else
|
||||
@@ -4316,7 +4323,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
|
||||
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
|
||||
struct free_segmap_info *free_i = FREE_I(sbi);
|
||||
unsigned int segno = 0, offset = 0, secno;
|
||||
block_t valid_blocks;
|
||||
block_t valid_blocks, usable_blks_in_seg;
|
||||
block_t blks_per_sec = BLKS_PER_SEC(sbi);
|
||||
|
||||
while (1) {
|
||||
@@ -4326,9 +4333,10 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
|
||||
break;
|
||||
offset = segno + 1;
|
||||
valid_blocks = get_valid_blocks(sbi, segno, false);
|
||||
if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
|
||||
usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
|
||||
if (valid_blocks == usable_blks_in_seg || !valid_blocks)
|
||||
continue;
|
||||
if (valid_blocks > sbi->blocks_per_seg) {
|
||||
if (valid_blocks > usable_blks_in_seg) {
|
||||
f2fs_bug_on(sbi, 1);
|
||||
continue;
|
||||
}
|
||||
@@ -4678,6 +4686,101 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx,
|
||||
unsigned int dev_idx)
|
||||
{
|
||||
if (!bdev_is_zoned(FDEV(dev_idx).bdev))
|
||||
return true;
|
||||
return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq);
|
||||
}
|
||||
|
||||
/* Return the zone index in the given device */
|
||||
static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno,
|
||||
int dev_idx)
|
||||
{
|
||||
block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
|
||||
|
||||
return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >>
|
||||
sbi->log_blocks_per_blkz;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the usable segments in a section based on the zone's
|
||||
* corresponding zone capacity. Zone is equal to a section.
|
||||
*/
|
||||
static inline unsigned int f2fs_usable_zone_segs_in_sec(
|
||||
struct f2fs_sb_info *sbi, unsigned int segno)
|
||||
{
|
||||
unsigned int dev_idx, zone_idx, unusable_segs_in_sec;
|
||||
|
||||
dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno));
|
||||
zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx);
|
||||
|
||||
/* Conventional zone's capacity is always equal to zone size */
|
||||
if (is_conv_zone(sbi, zone_idx, dev_idx))
|
||||
return sbi->segs_per_sec;
|
||||
|
||||
/*
|
||||
* If the zone_capacity_blocks array is NULL, then zone capacity
|
||||
* is equal to the zone size for all zones
|
||||
*/
|
||||
if (!FDEV(dev_idx).zone_capacity_blocks)
|
||||
return sbi->segs_per_sec;
|
||||
|
||||
/* Get the segment count beyond zone capacity block */
|
||||
unusable_segs_in_sec = (sbi->blocks_per_blkz -
|
||||
FDEV(dev_idx).zone_capacity_blocks[zone_idx]) >>
|
||||
sbi->log_blocks_per_seg;
|
||||
return sbi->segs_per_sec - unusable_segs_in_sec;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of usable blocks in a segment. The number of blocks
|
||||
* returned is always equal to the number of blocks in a segment for
|
||||
* segments fully contained within a sequential zone capacity or a
|
||||
* conventional zone. For segments partially contained in a sequential
|
||||
* zone capacity, the number of usable blocks up to the zone capacity
|
||||
* is returned. 0 is returned in all other cases.
|
||||
*/
|
||||
static inline unsigned int f2fs_usable_zone_blks_in_seg(
|
||||
struct f2fs_sb_info *sbi, unsigned int segno)
|
||||
{
|
||||
block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
|
||||
unsigned int zone_idx, dev_idx, secno;
|
||||
|
||||
secno = GET_SEC_FROM_SEG(sbi, segno);
|
||||
seg_start = START_BLOCK(sbi, segno);
|
||||
dev_idx = f2fs_target_device_index(sbi, seg_start);
|
||||
zone_idx = get_zone_idx(sbi, secno, dev_idx);
|
||||
|
||||
/*
|
||||
* Conventional zone's capacity is always equal to zone size,
|
||||
* so, blocks per segment is unchanged.
|
||||
*/
|
||||
if (is_conv_zone(sbi, zone_idx, dev_idx))
|
||||
return sbi->blocks_per_seg;
|
||||
|
||||
if (!FDEV(dev_idx).zone_capacity_blocks)
|
||||
return sbi->blocks_per_seg;
|
||||
|
||||
sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
|
||||
sec_cap_blkaddr = sec_start_blkaddr +
|
||||
FDEV(dev_idx).zone_capacity_blocks[zone_idx];
|
||||
|
||||
/*
|
||||
* If segment starts before zone capacity and spans beyond
|
||||
* zone capacity, then usable blocks are from seg start to
|
||||
* zone capacity. If the segment starts after the zone capacity,
|
||||
* then there are no usable blocks.
|
||||
*/
|
||||
if (seg_start >= sec_cap_blkaddr)
|
||||
return 0;
|
||||
if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
|
||||
return sec_cap_blkaddr - seg_start;
|
||||
|
||||
return sbi->blocks_per_seg;
|
||||
}
|
||||
#else
|
||||
int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
|
||||
{
|
||||
@@ -4688,7 +4791,36 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
|
||||
unsigned int segno)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info *sbi,
|
||||
unsigned int segno)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
|
||||
unsigned int segno)
|
||||
{
|
||||
if (f2fs_sb_has_blkzoned(sbi))
|
||||
return f2fs_usable_zone_blks_in_seg(sbi, segno);
|
||||
|
||||
return sbi->blocks_per_seg;
|
||||
}
|
||||
|
||||
unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
|
||||
unsigned int segno)
|
||||
{
|
||||
if (f2fs_sb_has_blkzoned(sbi))
|
||||
return f2fs_usable_zone_segs_in_sec(sbi, segno);
|
||||
|
||||
return sbi->segs_per_sec;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update min, max modified time for cost-benefit GC algorithm
|
||||
|
Reference in New Issue
Block a user