Re: [f2fs-dev] [PATCH] f2fs: support dio write for zoned storage
Brought to you by:
kjgkr
From: Daeho J. <da...@gm...> - 2024-10-10 20:43:00
|
On Fri, Sep 27, 2024 at 12:25 AM Daejun Park <dae...@sa...> wrote: > > With zoned storage, F2FS avoids direct IO writes and uses buffered writes > with page cache flushes to prevent unaligned writes. However, the > unaligned write can be avoided by allowing only a single thread per zone > to perform direct writes. > > To achieve direct writes in zoned storage, it uses semephores to serialize > block allocation and writes per zone. > > Signed-off-by: Daejun Park <dae...@sa...> > --- > fs/f2fs/data.c | 28 ++++++++++++++++++++++++++- > fs/f2fs/f2fs.h | 7 +++++-- > fs/f2fs/file.c | 48 ++++++++++++++++++++++++++++++++++++++++------- > fs/f2fs/gc.c | 4 ++-- > fs/f2fs/segment.c | 6 +++--- > fs/f2fs/super.c | 5 ++++- > 6 files changed, 82 insertions(+), 16 deletions(-) > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index b94cf6eea2f9..fa2bd88a2ed2 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -922,7 +922,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) > } > > #ifdef CONFIG_BLK_DEV_ZONED > -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) > +bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) > { > struct block_device *bdev = sbi->sb->s_bdev; > int devi = 0; > @@ -4207,6 +4207,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, > struct iomap *srcmap) > { > struct f2fs_map_blocks map = {}; > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > pgoff_t next_pgofs = 0; > int err; > > @@ -4218,6 +4219,18 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, > if (flags & IOMAP_WRITE) > map.m_may_create = true; > > + if (f2fs_sb_has_blkzoned(sbi) && !f2fs_is_pinned_file(inode)) { I think it's better that we can skip this for conventional LU by examining the block address. > + struct f2fs_rwsem *io_order_lock = > + &sbi->io_order_lock[map.m_seg_type]; > + > + f2fs_down_write(io_order_lock); > + > + /* set io order lock */ > + iomap->private = io_order_lock; > + } else { > + iomap->private = NULL; > + } > + > err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO); > if (err) > return err; > @@ -4273,6 +4286,19 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, > return 0; > } > > +static int f2fs_iomap_end(struct inode *inode, loff_t pos, loff_t length, > + ssize_t written, unsigned int flags, struct iomap *iomap) > +{ > + struct f2fs_rwsem *io_order_lock = iomap->private; > + > + /* ordered write */ > + if (io_order_lock) > + f2fs_up_write(io_order_lock); > + > + return 0; > +} > + > const struct iomap_ops f2fs_iomap_ops = { > .iomap_begin = f2fs_iomap_begin, > + .iomap_end = f2fs_iomap_end, > }; > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 33f5449dc22d..06ed132f22ad 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -1582,8 +1582,8 @@ struct f2fs_sb_info { > > /* for bio operations */ > struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ > - /* keep migration IO order for LFS mode */ > - struct f2fs_rwsem io_order_lock; > + /* keep IO order for LFS mode */ > + struct f2fs_rwsem io_order_lock[NR_CURSEG_DATA_TYPE]; > pgoff_t page_eio_ofs[NR_PAGE_TYPE]; /* EIO page offset */ > int page_eio_cnt[NR_PAGE_TYPE]; /* EIO count */ > > @@ -3863,6 +3863,9 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, > void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); > int f2fs_submit_page_bio(struct f2fs_io_info *fio); > int f2fs_merge_page_bio(struct f2fs_io_info *fio); > +#ifdef CONFIG_BLK_DEV_ZONED > +bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr); > +#endif > void f2fs_submit_page_write(struct f2fs_io_info *fio); > struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, > block_t blk_addr, sector_t *sector); > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 99903eafa7fe..fde49f3e54cf 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -869,13 +869,7 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) > /* disallow direct IO if any of devices has unaligned blksize */ > if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) > return true; > - /* > - * for blkzoned device, fallback direct IO to buffered IO, so > - * all IOs can be serialized by log-structured write. > - */ > - if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) && > - !f2fs_is_pinned_file(inode)) > - return true; > + > if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) > return true; > > @@ -4815,6 +4809,17 @@ static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, > return 0; > } > > +#ifdef CONFIG_BLK_DEV_ZONED > +static void f2fs_dio_zone_write_end_io(struct bio *bio) > +{ > + struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private; > + > + bio->bi_private = io->bi_private; > + complete(&io->zone_wait); > + iomap_dio_bio_end_io(bio); > +} > +#endif > + > static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, > struct bio *bio, loff_t file_offset) > { > @@ -4824,6 +4829,31 @@ static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, > enum temp_type temp = f2fs_get_segment_temp(seg_type); > > bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); > + > +#ifdef CONFIG_BLK_DEV_ZONED > + if (f2fs_sb_has_blkzoned(sbi) && !f2fs_is_pinned_file(inode)) { > + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; > + block_t last_blkaddr = SECTOR_TO_BLOCK(bio_end_sector(bio) - 1); > + > + f2fs_down_write(&io->io_rwsem); > + if (io->zone_pending_bio) { > + wait_for_completion_io(&io->zone_wait); > + bio_put(io->zone_pending_bio); > + io->zone_pending_bio = NULL; > + io->bi_private = NULL; > + } > + > + if (is_end_zone_blkaddr(sbi, last_blkaddr)) { > + bio_get(bio); > + reinit_completion(&io->zone_wait); > + io->bi_private = bio->bi_private; > + bio->bi_private = io; > + bio->bi_end_io = f2fs_dio_zone_write_end_io; > + io->zone_pending_bio = bio; > + } > + f2fs_up_write(&io->io_rwsem); > + } > +#endif > submit_bio(bio); > } > > @@ -4897,6 +4927,10 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, > dio_flags = 0; > if (pos + count > inode->i_size) > dio_flags |= IOMAP_DIO_FORCE_WAIT; > + > + if (f2fs_sb_has_blkzoned(sbi) && !f2fs_is_pinned_file(inode)) > + dio_flags |= IOMAP_DIO_FORCE_WAIT; > + > dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, > &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); > if (IS_ERR_OR_NULL(dio)) { > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c > index 9322a7200e31..49270713f739 100644 > --- a/fs/f2fs/gc.c > +++ b/fs/f2fs/gc.c > @@ -1361,7 +1361,7 @@ static int move_data_block(struct inode *inode, block_t bidx, > fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; > > if (lfs_mode) > - f2fs_down_write(&fio.sbi->io_order_lock); > + f2fs_down_write(&fio.sbi->io_order_lock[CURSEG_COLD_DATA]); > > mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), > fio.old_blkaddr, false); > @@ -1444,7 +1444,7 @@ static int move_data_block(struct inode *inode, block_t bidx, > true, true, true); > up_out: > if (lfs_mode) > - f2fs_up_write(&fio.sbi->io_order_lock); > + f2fs_up_write(&fio.sbi->io_order_lock[CURSEG_COLD_DATA]); > put_out: > f2fs_put_dnode(&dn); > out: > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index 1766254279d2..d602ae4d79e3 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -3796,10 +3796,10 @@ void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, > static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) > { > int type = __get_segment_type(fio); > - bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); > + bool keep_order = (f2fs_lfs_mode(fio->sbi) && type <= CURSEG_COLD_DATA); ditto > > if (keep_order) > - f2fs_down_read(&fio->sbi->io_order_lock); > + f2fs_down_read(&fio->sbi->io_order_lock[type]); > > if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, > &fio->new_blkaddr, sum, type, fio)) { > @@ -3819,7 +3819,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) > f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); > out: > if (keep_order) > - f2fs_up_read(&fio->sbi->io_order_lock); > + f2fs_up_read(&fio->sbi->io_order_lock[type]); > } > > void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c > index fc2c586c7619..5289b6f5f6f3 100644 > --- a/fs/f2fs/super.c > +++ b/fs/f2fs/super.c > @@ -3833,7 +3833,10 @@ static void init_sb_info(struct f2fs_sb_info *sbi) > > INIT_LIST_HEAD(&sbi->s_list); > mutex_init(&sbi->umount_mutex); > - init_f2fs_rwsem(&sbi->io_order_lock); > + > + for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) > + init_f2fs_rwsem(&sbi->io_order_lock[i]); > + > spin_lock_init(&sbi->cp_lock); > > sbi->dirty_device = 0; > -- > 2.25.1 > > > > _______________________________________________ > Linux-f2fs-devel mailing list > Lin...@li... > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel |