linux-f2fs-devel Mailing List for linux-f2fs
Brought to you by:
kjgkr
You can subscribe to this list here.
2012 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(10) |
Dec
(98) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2013 |
Jan
(100) |
Feb
(72) |
Mar
(79) |
Apr
(122) |
May
(93) |
Jun
(97) |
Jul
(72) |
Aug
(72) |
Sep
(73) |
Oct
(121) |
Nov
(161) |
Dec
(206) |
2014 |
Jan
(75) |
Feb
(54) |
Mar
(82) |
Apr
(98) |
May
(67) |
Jun
(89) |
Jul
(136) |
Aug
(122) |
Sep
(136) |
Oct
(58) |
Nov
(87) |
Dec
(114) |
2015 |
Jan
(140) |
Feb
(129) |
Mar
(141) |
Apr
(71) |
May
(192) |
Jun
(52) |
Jul
(120) |
Aug
(125) |
Sep
(157) |
Oct
(100) |
Nov
(54) |
Dec
(248) |
2016 |
Jan
(301) |
Feb
(180) |
Mar
(138) |
Apr
(137) |
May
(145) |
Jun
(123) |
Jul
(98) |
Aug
(143) |
Sep
(196) |
Oct
(166) |
Nov
(205) |
Dec
(141) |
2017 |
Jan
(167) |
Feb
(275) |
Mar
(273) |
Apr
(239) |
May
(193) |
Jun
(171) |
Jul
(226) |
Aug
(153) |
Sep
(212) |
Oct
(311) |
Nov
(257) |
Dec
(418) |
2018 |
Jan
(474) |
Feb
(188) |
Mar
(252) |
Apr
(500) |
May
(176) |
Jun
(291) |
Jul
(361) |
Aug
(331) |
Sep
(355) |
Oct
(154) |
Nov
(209) |
Dec
(185) |
2019 |
Jan
(172) |
Feb
(214) |
Mar
(247) |
Apr
(425) |
May
(273) |
Jun
(360) |
Jul
(400) |
Aug
(409) |
Sep
(149) |
Oct
(218) |
Nov
(319) |
Dec
(225) |
2020 |
Jan
(231) |
Feb
(487) |
Mar
(411) |
Apr
(258) |
May
(292) |
Jun
(369) |
Jul
(407) |
Aug
(173) |
Sep
(266) |
Oct
(317) |
Nov
(273) |
Dec
(391) |
2021 |
Jan
(285) |
Feb
(130) |
Mar
(232) |
Apr
(156) |
May
(311) |
Jun
(252) |
Jul
(336) |
Aug
(326) |
Sep
(151) |
Oct
(86) |
Nov
(114) |
Dec
(125) |
2022 |
Jan
(132) |
Feb
(167) |
Mar
(230) |
Apr
(460) |
May
(334) |
Jun
(324) |
Jul
(147) |
Aug
(188) |
Sep
(262) |
Oct
(346) |
Nov
(314) |
Dec
(245) |
2023 |
Jan
(306) |
Feb
(190) |
Mar
(199) |
Apr
(444) |
May
(378) |
Jun
(441) |
Jul
(403) |
Aug
(464) |
Sep
(144) |
Oct
(98) |
Nov
(152) |
Dec
(212) |
2024 |
Jan
(288) |
Feb
(365) |
Mar
(218) |
Apr
(275) |
May
(200) |
Jun
(228) |
Jul
(255) |
Aug
(228) |
Sep
(280) |
Oct
(40) |
Nov
|
Dec
|
From: Daeho J. <da...@gm...> - 2024-10-10 20:43:00
|
On Fri, Sep 27, 2024 at 12:25 AM Daejun Park <dae...@sa...> wrote: > > With zoned storage, F2FS avoids direct IO writes and uses buffered writes > with page cache flushes to prevent unaligned writes. However, the > unaligned write can be avoided by allowing only a single thread per zone > to perform direct writes. > > To achieve direct writes in zoned storage, it uses semephores to serialize > block allocation and writes per zone. > > Signed-off-by: Daejun Park <dae...@sa...> > --- > fs/f2fs/data.c | 28 ++++++++++++++++++++++++++- > fs/f2fs/f2fs.h | 7 +++++-- > fs/f2fs/file.c | 48 ++++++++++++++++++++++++++++++++++++++++------- > fs/f2fs/gc.c | 4 ++-- > fs/f2fs/segment.c | 6 +++--- > fs/f2fs/super.c | 5 ++++- > 6 files changed, 82 insertions(+), 16 deletions(-) > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index b94cf6eea2f9..fa2bd88a2ed2 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -922,7 +922,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) > } > > #ifdef CONFIG_BLK_DEV_ZONED > -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) > +bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) > { > struct block_device *bdev = sbi->sb->s_bdev; > int devi = 0; > @@ -4207,6 +4207,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, > struct iomap *srcmap) > { > struct f2fs_map_blocks map = {}; > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > pgoff_t next_pgofs = 0; > int err; > > @@ -4218,6 +4219,18 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, > if (flags & IOMAP_WRITE) > map.m_may_create = true; > > + if (f2fs_sb_has_blkzoned(sbi) && !f2fs_is_pinned_file(inode)) { I think it's better that we can skip this for conventional LU by examining the block address. > + struct f2fs_rwsem *io_order_lock = > + &sbi->io_order_lock[map.m_seg_type]; > + > + f2fs_down_write(io_order_lock); > + > + /* set io order lock */ > + iomap->private = io_order_lock; > + } else { > + iomap->private = NULL; > + } > + > err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO); > if (err) > return err; > @@ -4273,6 +4286,19 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, > return 0; > } > > +static int f2fs_iomap_end(struct inode *inode, loff_t pos, loff_t length, > + ssize_t written, unsigned int flags, struct iomap *iomap) > +{ > + struct f2fs_rwsem *io_order_lock = iomap->private; > + > + /* ordered write */ > + if (io_order_lock) > + f2fs_up_write(io_order_lock); > + > + return 0; > +} > + > const struct iomap_ops f2fs_iomap_ops = { > .iomap_begin = f2fs_iomap_begin, > + .iomap_end = f2fs_iomap_end, > }; > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 33f5449dc22d..06ed132f22ad 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -1582,8 +1582,8 @@ struct f2fs_sb_info { > > /* for bio operations */ > struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ > - /* keep migration IO order for LFS mode */ > - struct f2fs_rwsem io_order_lock; > + /* keep IO order for LFS mode */ > + struct f2fs_rwsem io_order_lock[NR_CURSEG_DATA_TYPE]; > pgoff_t page_eio_ofs[NR_PAGE_TYPE]; /* EIO page offset */ > int page_eio_cnt[NR_PAGE_TYPE]; /* EIO count */ > > @@ -3863,6 +3863,9 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, > void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); > int f2fs_submit_page_bio(struct f2fs_io_info *fio); > int f2fs_merge_page_bio(struct f2fs_io_info *fio); > +#ifdef CONFIG_BLK_DEV_ZONED > +bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr); > +#endif > void f2fs_submit_page_write(struct f2fs_io_info *fio); > struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, > block_t blk_addr, sector_t *sector); > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 99903eafa7fe..fde49f3e54cf 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -869,13 +869,7 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) > /* disallow direct IO if any of devices has unaligned blksize */ > if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) > return true; > - /* > - * for blkzoned device, fallback direct IO to buffered IO, so > - * all IOs can be serialized by log-structured write. > - */ > - if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) && > - !f2fs_is_pinned_file(inode)) > - return true; > + > if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) > return true; > > @@ -4815,6 +4809,17 @@ static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, > return 0; > } > > +#ifdef CONFIG_BLK_DEV_ZONED > +static void f2fs_dio_zone_write_end_io(struct bio *bio) > +{ > + struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private; > + > + bio->bi_private = io->bi_private; > + complete(&io->zone_wait); > + iomap_dio_bio_end_io(bio); > +} > +#endif > + > static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, > struct bio *bio, loff_t file_offset) > { > @@ -4824,6 +4829,31 @@ static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, > enum temp_type temp = f2fs_get_segment_temp(seg_type); > > bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); > + > +#ifdef CONFIG_BLK_DEV_ZONED > + if (f2fs_sb_has_blkzoned(sbi) && !f2fs_is_pinned_file(inode)) { > + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; > + block_t last_blkaddr = SECTOR_TO_BLOCK(bio_end_sector(bio) - 1); > + > + f2fs_down_write(&io->io_rwsem); > + if (io->zone_pending_bio) { > + wait_for_completion_io(&io->zone_wait); > + bio_put(io->zone_pending_bio); > + io->zone_pending_bio = NULL; > + io->bi_private = NULL; > + } > + > + if (is_end_zone_blkaddr(sbi, last_blkaddr)) { > + bio_get(bio); > + reinit_completion(&io->zone_wait); > + io->bi_private = bio->bi_private; > + bio->bi_private = io; > + bio->bi_end_io = f2fs_dio_zone_write_end_io; > + io->zone_pending_bio = bio; > + } > + f2fs_up_write(&io->io_rwsem); > + } > +#endif > submit_bio(bio); > } > > @@ -4897,6 +4927,10 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, > dio_flags = 0; > if (pos + count > inode->i_size) > dio_flags |= IOMAP_DIO_FORCE_WAIT; > + > + if (f2fs_sb_has_blkzoned(sbi) && !f2fs_is_pinned_file(inode)) > + dio_flags |= IOMAP_DIO_FORCE_WAIT; > + > dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, > &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); > if (IS_ERR_OR_NULL(dio)) { > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c > index 9322a7200e31..49270713f739 100644 > --- a/fs/f2fs/gc.c > +++ b/fs/f2fs/gc.c > @@ -1361,7 +1361,7 @@ static int move_data_block(struct inode *inode, block_t bidx, > fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; > > if (lfs_mode) > - f2fs_down_write(&fio.sbi->io_order_lock); > + f2fs_down_write(&fio.sbi->io_order_lock[CURSEG_COLD_DATA]); > > mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), > fio.old_blkaddr, false); > @@ -1444,7 +1444,7 @@ static int move_data_block(struct inode *inode, block_t bidx, > true, true, true); > up_out: > if (lfs_mode) > - f2fs_up_write(&fio.sbi->io_order_lock); > + f2fs_up_write(&fio.sbi->io_order_lock[CURSEG_COLD_DATA]); > put_out: > f2fs_put_dnode(&dn); > out: > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index 1766254279d2..d602ae4d79e3 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -3796,10 +3796,10 @@ void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, > static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) > { > int type = __get_segment_type(fio); > - bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); > + bool keep_order = (f2fs_lfs_mode(fio->sbi) && type <= CURSEG_COLD_DATA); ditto > > if (keep_order) > - f2fs_down_read(&fio->sbi->io_order_lock); > + f2fs_down_read(&fio->sbi->io_order_lock[type]); > > if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, > &fio->new_blkaddr, sum, type, fio)) { > @@ -3819,7 +3819,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) > f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); > out: > if (keep_order) > - f2fs_up_read(&fio->sbi->io_order_lock); > + f2fs_up_read(&fio->sbi->io_order_lock[type]); > } > > void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c > index fc2c586c7619..5289b6f5f6f3 100644 > --- a/fs/f2fs/super.c > +++ b/fs/f2fs/super.c > @@ -3833,7 +3833,10 @@ static void init_sb_info(struct f2fs_sb_info *sbi) > > INIT_LIST_HEAD(&sbi->s_list); > mutex_init(&sbi->umount_mutex); > - init_f2fs_rwsem(&sbi->io_order_lock); > + > + for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) > + init_f2fs_rwsem(&sbi->io_order_lock[i]); > + > spin_lock_init(&sbi->cp_lock); > > sbi->dirty_device = 0; > -- > 2.25.1 > > > > _______________________________________________ > Linux-f2fs-devel mailing list > Lin...@li... > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel |
From: <pat...@ke...> - 2024-10-10 20:10:38
|
Hello: This patch was applied to jaegeuk/f2fs.git (dev) by Jaegeuk Kim <ja...@ke...>: On Thu, 10 Oct 2024 00:09:23 +0000 you wrote: > This fixes a regression which prevents parallel DIO reads. > > Fixes: 0cac51185e65 ("f2fs: fix to avoid racing in between read and OPU dio write") > Signed-off-by: Jaegeuk Kim <ja...@ke...> > --- > fs/f2fs/file.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) Here is the summary with links: - [f2fs-dev] f2fs: allow parallel DIO reads https://git.kernel.org/jaegeuk/f2fs/c/bdab38b79199 You are awesome, thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/patchwork/pwbot.html |
From: <pat...@ke...> - 2024-10-10 20:10:37
|
Hello: The following patches were marked "accepted", because they were applied to jaegeuk/f2fs.git (dev): Patch: [f2fs-dev] f2fs: allow parallel DIO reads Submitter: Jaegeuk Kim <ja...@ke...> Committer: Jaegeuk Kim <ja...@ke...> Patchwork: https://patchwork.kernel.org/project/f2fs/list/?series=897407 Lore link: https://lore.kernel.org/r/202...@ke... Total patches: 1 -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/patchwork/pwbot.html |
From: Daeho J. <da...@gm...> - 2024-10-10 19:56:41
|
Reviewed-by: Daeho Jeong <dae...@go...> Thanks, On Wed, Oct 9, 2024 at 5:10 PM Jaegeuk Kim via Linux-f2fs-devel <lin...@li...> wrote: > > This fixes a regression which prevents parallel DIO reads. > > Fixes: 0cac51185e65 ("f2fs: fix to avoid racing in between read and OPU dio write") > Signed-off-by: Jaegeuk Kim <ja...@ke...> > --- > fs/f2fs/file.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 71d5ded9eeda..adc7d64a6f47 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -4647,7 +4647,8 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) > iov_iter_count(to), READ); > > /* In LFS mode, if there is inflight dio, wait for its completion */ > - if (f2fs_lfs_mode(F2FS_I_SB(inode))) > + if (f2fs_lfs_mode(F2FS_I_SB(inode)) && > + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) > inode_dio_wait(inode); > > if (f2fs_should_use_dio(inode, iocb, to)) { > -- > 2.47.0.rc1.288.g06298d1525-goog > > > > _______________________________________________ > Linux-f2fs-devel mailing list > Lin...@li... > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel |
From: Daeho J. <da...@gm...> - 2024-10-10 19:26:43
|
From: Daeho Jeong <dae...@go...> F2FS should understand how the device aliasing file works and support deleting the file after use. A device aliasing file can be created by mkfs.f2fs tool and it can map the whole device with an extrent, not using node blocks. The file space should be pinned and normally used for read-only usages. Signed-off-by: Daeho Jeong <dae...@go...> Signed-off-by: Chao Yu <ch...@ke...> --- v5: added a ioctl to know whether a file is for device aliasing v4: added file pinning check in sanity check v3: merged Chao's extent cache sanity check. prevented device aliasing support with noextent mount option v2: changed the position of f2fs_destroy_extent_tree() only for device aliasing files --- fs/f2fs/data.c | 5 +++++ fs/f2fs/extent_cache.c | 45 ++++++++++++++++++++++++++++++++++++++- fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/file.c | 44 ++++++++++++++++++++++++++++++++++---- fs/f2fs/inode.c | 19 ++++++++++++++++- fs/f2fs/super.c | 4 ++++ fs/f2fs/sysfs.c | 2 ++ include/uapi/linux/f2fs.h | 1 + 8 files changed, 119 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b94cf6eea2f9..385b46e62ede 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3441,6 +3441,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, if (!f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { + if (IS_DEVICE_ALIASING(inode)) { + err = -ENODATA; + goto out; + } + if (locked) { err = f2fs_reserve_block(&dn, index); goto out; diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 62ac440d9416..019c1f7b7fa5 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -24,6 +24,7 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; struct extent_info ei; + int devi; get_read_extent_info(&ei, i_ext); @@ -38,7 +39,36 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) ei.blk, ei.fofs, ei.len); return false; } - return true; + + if (!IS_DEVICE_ALIASING(inode)) + return true; + + for (devi = 0; devi < sbi->s_ndevs; devi++) { + if (FDEV(devi).start_blk != ei.blk || + FDEV(devi).end_blk != ei.blk + ei.len - 1) + continue; + + if (devi == 0) { + f2fs_warn(sbi, + "%s: inode (ino=%lx) is an alias of meta device", + __func__, inode->i_ino); + return false; + } + + if (bdev_is_zoned(FDEV(devi).bdev)) { + f2fs_warn(sbi, + "%s: device alias inode (ino=%lx)'s extent info " + "[%u, %u, %u] maps to zoned block device", + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); + return false; + } + return true; + } + + f2fs_warn(sbi, "%s: device alias inode (ino=%lx)'s extent info " + "[%u, %u, %u] is inconsistent w/ any devices", + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); + return false; } static void __set_extent_info(struct extent_info *ei, @@ -76,6 +106,9 @@ static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) static bool __may_extent_tree(struct inode *inode, enum extent_type type) { + if (IS_DEVICE_ALIASING(inode) && type == EX_READ) + return true; + /* * for recovered files during mount do not create extents * if shrinker is not registered. @@ -401,6 +434,11 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) if (atomic_read(&et->node_cnt) || !ei.len) goto skip; + if (IS_DEVICE_ALIASING(inode)) { + et->largest = ei; + goto skip; + } + en = __attach_extent_node(sbi, et, &ei, NULL, &et->root.rb_root.rb_node, true); if (en) { @@ -463,6 +501,11 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, goto out; } + if (IS_DEVICE_ALIASING(inode)) { + ret = false; + goto out; + } + en = __lookup_extent_node(&et->root, et->cached_en, pgofs); if (!en) goto out; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 33f5449dc22d..b6ba22a1da47 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -213,6 +213,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_CASEFOLD 0x00001000 #define F2FS_FEATURE_COMPRESSION 0x00002000 #define F2FS_FEATURE_RO 0x00004000 +#define F2FS_FEATURE_DEVICE_ALIAS 0x00008000 #define __F2FS_HAS_FEATURE(raw_super, mask) \ ((raw_super->feature & cpu_to_le32(mask)) != 0) @@ -3046,6 +3047,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */ +#define F2FS_DEVICE_ALIAS_FL 0x80000000 /* File for aliasing a device */ #define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL) @@ -3061,6 +3063,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) /* Flags that are appropriate for non-directories/regular files. */ #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) +#define IS_DEVICE_ALIASING(inode) (F2FS_I(inode)->i_flags & F2FS_DEVICE_ALIAS_FL) + static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) { if (S_ISDIR(mode)) @@ -4510,6 +4514,7 @@ F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); F2FS_FEATURE_FUNCS(casefold, CASEFOLD); F2FS_FEATURE_FUNCS(compression, COMPRESSION); F2FS_FEATURE_FUNCS(readonly, RO); +F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); #ifdef CONFIG_BLK_DEV_ZONED static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 99903eafa7fe..6363d23221b2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -725,6 +725,11 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) trace_f2fs_truncate_blocks_enter(inode, from); + if (IS_DEVICE_ALIASING(inode) && from) { + err = -EINVAL; + goto out_err; + } + free_from = (pgoff_t)F2FS_BLK_ALIGN(from); if (free_from >= max_file_blocks(inode)) @@ -739,6 +744,21 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) goto out; } + if (IS_DEVICE_ALIASING(inode)) { + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; + struct extent_info ei = et->largest; + unsigned int i; + + for (i = 0; i < ei.len; i++) + f2fs_invalidate_blocks(sbi, ei.blk + i); + + dec_valid_block_count(sbi, inode, ei.len); + f2fs_update_time(sbi, REQ_TIME); + + f2fs_put_page(ipage, 1); + goto out; + } + if (f2fs_has_inline_data(inode)) { f2fs_truncate_inline_inode(inode, ipage, from); f2fs_put_page(ipage, 1); @@ -774,7 +794,7 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) /* lastly zero out the first data page */ if (!err) err = truncate_partial_data_page(inode, from, truncate_page); - +out_err: trace_f2fs_truncate_blocks_exit(inode, err); return err; } @@ -992,7 +1012,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return -EPERM; if ((attr->ia_valid & ATTR_SIZE)) { - if (!f2fs_is_compress_backend_ready(inode)) + if (!f2fs_is_compress_backend_ready(inode) || + IS_DEVICE_ALIASING(inode)) return -EOPNOTSUPP; if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && !IS_ALIGNED(attr->ia_size, @@ -1860,7 +1881,7 @@ static long f2fs_fallocate(struct file *file, int mode, return -EIO; if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) return -ENOSPC; - if (!f2fs_is_compress_backend_ready(inode)) + if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) return -EOPNOTSUPP; /* f2fs only support ->fallocate for regular file */ @@ -3296,6 +3317,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + if (IS_DEVICE_ALIASING(inode)) + return -EINVAL; + if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", __func__, inode->i_ino, fi->i_gc_failures); @@ -3326,6 +3350,9 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (!pin && IS_DEVICE_ALIASING(inode)) + return -EOPNOTSUPP; + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -3391,6 +3418,12 @@ static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) return put_user(pin, (u32 __user *)arg); } +static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg) +{ + return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0, + (u32 __user *)arg); +} + int f2fs_precache_extents(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); @@ -4490,6 +4523,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_decompress_file(filp); case F2FS_IOC_COMPRESS_FILE: return f2fs_ioc_compress_file(filp); + case F2FS_IOC_GET_DEV_ALIAS_FILE: + return f2fs_ioc_get_dev_alias_file(filp, arg); default: return -ENOTTY; } @@ -4764,7 +4799,8 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, else return 0; - map.m_may_create = true; + if (!IS_DEVICE_ALIASING(inode)) + map.m_may_create = true; if (dio) { map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 1ed86df343a5..194dc0f53ad8 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -372,6 +372,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (IS_DEVICE_ALIASING(inode)) { + if (!f2fs_sb_has_device_alias(sbi)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but the feature is off", + __func__, inode->i_ino); + return false; + } + if (!f2fs_is_pinned_file(inode)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but is not pinned", + __func__, inode->i_ino); + return false; + } + } + return true; } @@ -823,7 +836,8 @@ void f2fs_evict_inode(struct inode *inode) f2fs_bug_on(sbi, get_dirty_pages(inode)); f2fs_remove_dirty_inode(inode); - f2fs_destroy_extent_tree(inode); + if (!IS_DEVICE_ALIASING(inode)) + f2fs_destroy_extent_tree(inode); if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; @@ -879,6 +893,9 @@ void f2fs_evict_inode(struct inode *inode) goto retry; } + if (IS_DEVICE_ALIASING(inode)) + f2fs_destroy_extent_tree(inode); + if (err) { f2fs_update_inode_page(inode); if (dquot_initialize_needed(inode)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fc2c586c7619..95097498b544 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -834,6 +834,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) set_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noextent_cache: + if (F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_DEVICE_ALIAS)) { + f2fs_err(sbi, "device aliasing requires extent cache"); + return -EINVAL; + } clear_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noinline_data: diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index c56e8c873935..e51304bc65ea 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1313,6 +1313,7 @@ F2FS_SB_FEATURE_RO_ATTR(sb_checksum, SB_CHKSUM); F2FS_SB_FEATURE_RO_ATTR(casefold, CASEFOLD); F2FS_SB_FEATURE_RO_ATTR(compression, COMPRESSION); F2FS_SB_FEATURE_RO_ATTR(readonly, RO); +F2FS_SB_FEATURE_RO_ATTR(device_alias, DEVICE_ALIAS); static struct attribute *f2fs_sb_feat_attrs[] = { ATTR_LIST(sb_encryption), @@ -1329,6 +1330,7 @@ static struct attribute *f2fs_sb_feat_attrs[] = { ATTR_LIST(sb_casefold), ATTR_LIST(sb_compression), ATTR_LIST(sb_readonly), + ATTR_LIST(sb_device_alias), NULL, }; ATTRIBUTE_GROUPS(f2fs_sb_feat); diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h index 955d440be104..f7aaf8d23e20 100644 --- a/include/uapi/linux/f2fs.h +++ b/include/uapi/linux/f2fs.h @@ -43,6 +43,7 @@ #define F2FS_IOC_DECOMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 23) #define F2FS_IOC_COMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 24) #define F2FS_IOC_START_ATOMIC_REPLACE _IO(F2FS_IOCTL_MAGIC, 25) +#define F2FS_IOC_GET_DEV_ALIAS_FILE _IOR(F2FS_IOCTL_MAGIC, 26, __u32) /* * should be same as XFS_IOC_GOINGDOWN. -- 2.47.0.rc1.288.g06298d1525-goog |
From: Daeho J. <da...@gm...> - 2024-10-10 17:56:32
|
On Tue, Oct 8, 2024 at 7:32 PM Chao Yu <ch...@ke...> wrote: > > On 2024/10/9 2:05, Daeho Jeong wrote: > > From: Daeho Jeong <dae...@go...> > > > > F2FS should understand how the device aliasing file works and support > > deleting the file after use. A device aliasing file can be created by > > mkfs.f2fs tool and it can map the whole device with an extrent, not > > using node blocks. The file space should be pinned and normally used for > > read-only usages. > > > > Signed-off-by: Daeho Jeong <dae...@go...> > > Signed-off-by: Chao Yu <ch...@ke...> > > --- > > v4: added file pinning check in sanity check > > v3: merged Chao's extent cache sanity check. > > prevented device aliasing support with noextent mount option > > v2: changed the position of f2fs_destroy_extent_tree() only for device > > aliasing files > > --- > > fs/f2fs/data.c | 5 +++++ > > fs/f2fs/extent_cache.c | 45 +++++++++++++++++++++++++++++++++++++++++- > > fs/f2fs/f2fs.h | 5 +++++ > > fs/f2fs/file.c | 36 +++++++++++++++++++++++++++++---- > > fs/f2fs/inode.c | 19 +++++++++++++++++- > > fs/f2fs/super.c | 4 ++++ > > fs/f2fs/sysfs.c | 2 ++ > > 7 files changed, 110 insertions(+), 6 deletions(-) > > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > > index b94cf6eea2f9..385b46e62ede 100644 > > --- a/fs/f2fs/data.c > > +++ b/fs/f2fs/data.c > > @@ -3441,6 +3441,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > > > > if (!f2fs_lookup_read_extent_cache_block(inode, index, > > &dn.data_blkaddr)) { > > + if (IS_DEVICE_ALIASING(inode)) { > > + err = -ENODATA; > > + goto out; > > + } > > + > > if (locked) { > > err = f2fs_reserve_block(&dn, index); > > goto out; > > diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c > > index 62ac440d9416..019c1f7b7fa5 100644 > > --- a/fs/f2fs/extent_cache.c > > +++ b/fs/f2fs/extent_cache.c > > @@ -24,6 +24,7 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) > > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > > struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; > > struct extent_info ei; > > + int devi; > > > > get_read_extent_info(&ei, i_ext); > > > > @@ -38,7 +39,36 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) > > ei.blk, ei.fofs, ei.len); > > return false; > > } > > - return true; > > + > > + if (!IS_DEVICE_ALIASING(inode)) > > + return true; > > + > > + for (devi = 0; devi < sbi->s_ndevs; devi++) { > > + if (FDEV(devi).start_blk != ei.blk || > > + FDEV(devi).end_blk != ei.blk + ei.len - 1) > > + continue; > > + > > + if (devi == 0) { > > + f2fs_warn(sbi, > > + "%s: inode (ino=%lx) is an alias of meta device", > > + __func__, inode->i_ino); > > + return false; > > + } > > + > > + if (bdev_is_zoned(FDEV(devi).bdev)) { > > + f2fs_warn(sbi, > > + "%s: device alias inode (ino=%lx)'s extent info " > > + "[%u, %u, %u] maps to zoned block device", > > + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); > > + return false; > > + } > > + return true; > > + } > > + > > + f2fs_warn(sbi, "%s: device alias inode (ino=%lx)'s extent info " > > + "[%u, %u, %u] is inconsistent w/ any devices", > > + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); > > + return false; > > } > > > > static void __set_extent_info(struct extent_info *ei, > > @@ -76,6 +106,9 @@ static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) > > > > static bool __may_extent_tree(struct inode *inode, enum extent_type type) > > { > > + if (IS_DEVICE_ALIASING(inode) && type == EX_READ) > > + return true; > > + > > /* > > * for recovered files during mount do not create extents > > * if shrinker is not registered. > > @@ -401,6 +434,11 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) > > if (atomic_read(&et->node_cnt) || !ei.len) > > goto skip; > > > > + if (IS_DEVICE_ALIASING(inode)) { > > + et->largest = ei; > > + goto skip; > > + } > > + > > en = __attach_extent_node(sbi, et, &ei, NULL, > > &et->root.rb_root.rb_node, true); > > if (en) { > > @@ -463,6 +501,11 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, > > goto out; > > } > > > > + if (IS_DEVICE_ALIASING(inode)) { > > + ret = false; > > + goto out; > > + } > > + > > en = __lookup_extent_node(&et->root, et->cached_en, pgofs); > > if (!en) > > goto out; > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > > index 33f5449dc22d..b6ba22a1da47 100644 > > --- a/fs/f2fs/f2fs.h > > +++ b/fs/f2fs/f2fs.h > > @@ -213,6 +213,7 @@ struct f2fs_mount_info { > > #define F2FS_FEATURE_CASEFOLD 0x00001000 > > #define F2FS_FEATURE_COMPRESSION 0x00002000 > > #define F2FS_FEATURE_RO 0x00004000 > > +#define F2FS_FEATURE_DEVICE_ALIAS 0x00008000 > > > > #define __F2FS_HAS_FEATURE(raw_super, mask) \ > > ((raw_super->feature & cpu_to_le32(mask)) != 0) > > @@ -3046,6 +3047,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) > > #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ > > #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ > > #define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */ > > +#define F2FS_DEVICE_ALIAS_FL 0x80000000 /* File for aliasing a device */ > > Is there any way to know which inode is device-alias one? maybe > we can export this flag to userspace via .fileattr_get? or via > newly introduced ioctl interface? Right, we need to add a new ioctl interface to get the flag. Let me take care of this. Thanks, > > > > > #define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL) > > > > @@ -3061,6 +3063,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) > > /* Flags that are appropriate for non-directories/regular files. */ > > #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) > > > > +#define IS_DEVICE_ALIASING(inode) (F2FS_I(inode)->i_flags & F2FS_DEVICE_ALIAS_FL) > > + > > static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) > > { > > if (S_ISDIR(mode)) > > @@ -4510,6 +4514,7 @@ F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); > > F2FS_FEATURE_FUNCS(casefold, CASEFOLD); > > F2FS_FEATURE_FUNCS(compression, COMPRESSION); > > F2FS_FEATURE_FUNCS(readonly, RO); > > +F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); > > > > #ifdef CONFIG_BLK_DEV_ZONED > > static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > > index 99903eafa7fe..f2d2d84d025b 100644 > > --- a/fs/f2fs/file.c > > +++ b/fs/f2fs/file.c > > @@ -725,6 +725,11 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > > > > trace_f2fs_truncate_blocks_enter(inode, from); > > > > + if (IS_DEVICE_ALIASING(inode) && from) { > > + err = -EINVAL; > > + goto out_err; > > + } > > + > > free_from = (pgoff_t)F2FS_BLK_ALIGN(from); > > > > if (free_from >= max_file_blocks(inode)) > > @@ -739,6 +744,21 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > > goto out; > > } > > > > + if (IS_DEVICE_ALIASING(inode)) { > > + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; > > + struct extent_info ei = et->largest; > > + unsigned int i; > > + > > + for (i = 0; i < ei.len; i++) > > + f2fs_invalidate_blocks(sbi, ei.blk + i); > > + > > + dec_valid_block_count(sbi, inode, ei.len); > > + f2fs_update_time(sbi, REQ_TIME); > > + > > + f2fs_put_page(ipage, 1); > > + goto out; > > + } > > + > > if (f2fs_has_inline_data(inode)) { > > f2fs_truncate_inline_inode(inode, ipage, from); > > f2fs_put_page(ipage, 1); > > @@ -774,7 +794,7 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > > /* lastly zero out the first data page */ > > if (!err) > > err = truncate_partial_data_page(inode, from, truncate_page); > > - > > +out_err: > > trace_f2fs_truncate_blocks_exit(inode, err); > > return err; > > } > > @@ -992,7 +1012,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, > > return -EPERM; > > > > if ((attr->ia_valid & ATTR_SIZE)) { > > - if (!f2fs_is_compress_backend_ready(inode)) > > + if (!f2fs_is_compress_backend_ready(inode) || > > + IS_DEVICE_ALIASING(inode)) > > return -EOPNOTSUPP; > > if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && > > !IS_ALIGNED(attr->ia_size, > > @@ -1860,7 +1881,7 @@ static long f2fs_fallocate(struct file *file, int mode, > > return -EIO; > > if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) > > return -ENOSPC; > > - if (!f2fs_is_compress_backend_ready(inode)) > > + if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) > > return -EOPNOTSUPP; > > > > /* f2fs only support ->fallocate for regular file */ > > @@ -3296,6 +3317,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) > > struct f2fs_inode_info *fi = F2FS_I(inode); > > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > > > > + if (IS_DEVICE_ALIASING(inode)) > > + return -EINVAL; > > + > > if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { > > f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", > > __func__, inode->i_ino, fi->i_gc_failures); > > @@ -3326,6 +3350,9 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > > if (f2fs_readonly(sbi->sb)) > > return -EROFS; > > > > + if (!pin && IS_DEVICE_ALIASING(inode)) > > + return -EOPNOTSUPP; > > + > > ret = mnt_want_write_file(filp); > > if (ret) > > return ret; > > @@ -4764,7 +4791,8 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, > > else > > return 0; > > > > - map.m_may_create = true; > > + if (!IS_DEVICE_ALIASING(inode)) > > + map.m_may_create = true; > > if (dio) { > > map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, > > inode->i_write_hint); > > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c > > index 1ed86df343a5..e2d30fc79644 100644 > > --- a/fs/f2fs/inode.c > > +++ b/fs/f2fs/inode.c > > @@ -372,6 +372,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) > > return false; > > } > > > > + if (fi->i_flags & F2FS_DEVICE_ALIAS_FL) { > > Trivial cleanup. > > IS_DEVICE_ALIASING(inode) > > Thanks, > > > + if (!f2fs_sb_has_device_alias(sbi)) { > > + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but the feature is off", > > + __func__, inode->i_ino); > > + return false; > > + } > > + if (!f2fs_is_pinned_file(inode)) { > > + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but is not pinned", > > + __func__, inode->i_ino); > > + return false; > > + } > > + } > > + > > return true; > > } > > > > @@ -823,7 +836,8 @@ void f2fs_evict_inode(struct inode *inode) > > f2fs_bug_on(sbi, get_dirty_pages(inode)); > > f2fs_remove_dirty_inode(inode); > > > > - f2fs_destroy_extent_tree(inode); > > + if (!IS_DEVICE_ALIASING(inode)) > > + f2fs_destroy_extent_tree(inode); > > > > if (inode->i_nlink || is_bad_inode(inode)) > > goto no_delete; > > @@ -879,6 +893,9 @@ void f2fs_evict_inode(struct inode *inode) > > goto retry; > > } > > > > + if (IS_DEVICE_ALIASING(inode)) > > + f2fs_destroy_extent_tree(inode); > > + > > if (err) { > > f2fs_update_inode_page(inode); > > if (dquot_initialize_needed(inode)) > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c > > index fc2c586c7619..95097498b544 100644 > > --- a/fs/f2fs/super.c > > +++ b/fs/f2fs/super.c > > @@ -834,6 +834,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) > > set_opt(sbi, READ_EXTENT_CACHE); > > break; > > case Opt_noextent_cache: > > + if (F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_DEVICE_ALIAS)) { > > + f2fs_err(sbi, "device aliasing requires extent cache"); > > + return -EINVAL; > > + } > > clear_opt(sbi, READ_EXTENT_CACHE); > > break; > > case Opt_noinline_data: > > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c > > index c56e8c873935..e51304bc65ea 100644 > > --- a/fs/f2fs/sysfs.c > > +++ b/fs/f2fs/sysfs.c > > @@ -1313,6 +1313,7 @@ F2FS_SB_FEATURE_RO_ATTR(sb_checksum, SB_CHKSUM); > > F2FS_SB_FEATURE_RO_ATTR(casefold, CASEFOLD); > > F2FS_SB_FEATURE_RO_ATTR(compression, COMPRESSION); > > F2FS_SB_FEATURE_RO_ATTR(readonly, RO); > > +F2FS_SB_FEATURE_RO_ATTR(device_alias, DEVICE_ALIAS); > > > > static struct attribute *f2fs_sb_feat_attrs[] = { > > ATTR_LIST(sb_encryption), > > @@ -1329,6 +1330,7 @@ static struct attribute *f2fs_sb_feat_attrs[] = { > > ATTR_LIST(sb_casefold), > > ATTR_LIST(sb_compression), > > ATTR_LIST(sb_readonly), > > + ATTR_LIST(sb_device_alias), > > NULL, > > }; > > ATTRIBUTE_GROUPS(f2fs_sb_feat); > |
From: Yonggil S. <yon...@sa...> - 2024-10-10 05:08:17
|
There was a problem that did not subtract the super block area when calculating the usable segments for a single zoned device with a conventional zone. This resulted in incorrect the overprovision and reserved area. <256MiB legacy block + zoned block w/ 32MiB zone size> Info: Overprovision ratio = 3.570% Info: Overprovision segments = 656 (GC reserved = 560) <8 conventional zone + 1016 sequential zone w/ 32MiB zone size> Info: Overprovision ratio = 3.700% Info: Overprovision segments = 676 (GC reserved = 578) This patch addresses the problem by subtracting the super block area when there is only one zoned device. Signed-off-by: Yonggil Song <yon...@sa...> --- lib/libf2fs_zoned.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/libf2fs_zoned.c b/lib/libf2fs_zoned.c index 89ba5ad73a76..1a0985378789 100644 --- a/lib/libf2fs_zoned.c +++ b/lib/libf2fs_zoned.c @@ -555,6 +555,11 @@ uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb) } usable_segs -= (get_sb(main_blkaddr) - get_sb(segment0_blkaddr)) >> get_sb(log_blocks_per_seg); + + /* single zoned device needs to remove a super block area */ + if (c.ndevs == 1 && c.devices[0].zoned_model == F2FS_ZONED_HM) + usable_segs -= (get_sb(segment0_blkaddr) >> get_sb(log_blocks_per_seg)); + return usable_segs; #endif return get_sb(segment_count_main); -- 2.43.0 |
From: Yonggil S. <yon...@sa...> - 2024-10-10 04:59:50
|
There was a problem that did not subtract the super block area when calculating the usable segments for a single zoned device with a conventional zone. This resulted in incorrect the overprovision and reserved area. <256MiB legacy block + zoned block w/ 32MiB zone size> Info: Overprovision ratio = 3.570% Info: Overprovision segments = 656 (GC reserved = 560) <8 conventional zone + 1016 sequential zone w/ 32MiB zone size> Info: Overprovision ratio = 3.700% Info: Overprovision segments = 676 (GC reserved = 578) This patch addresses the problem by subtracting the super block area when there is only one zoned device. Signed-off-by: Yonggil Song <yon...@sa...> --- lib/libf2fs_zoned.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/libf2fs_zoned.c b/lib/libf2fs_zoned.c index 89ba5ad73a76..1a0985378789 100644 --- a/lib/libf2fs_zoned.c +++ b/lib/libf2fs_zoned.c @@ -555,6 +555,11 @@ uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb) } usable_segs -= (get_sb(main_blkaddr) - get_sb(segment0_blkaddr)) >> get_sb(log_blocks_per_seg); + + /* single zoned device needs to remove a super block area */ + if (c.ndevs == 1 && c.devices[0].zoned_model == F2FS_ZONED_HM) + usable_segs -= (get_sb(segment0_blkaddr) >> get_sb(log_blocks_per_seg)); + return usable_segs; #endif return get_sb(segment_count_main); -- 2.43.0 |
From: Yonggil S. <yon...@sa...> - 2024-10-10 04:31:38
|
> > On 2024/10/7 13:21, Yonggil Song wrote: > > > There was a bug that did not subtract the super block area when calculating > > > the usable segments for a single zoned device with a conventional zone. > > > This bug resulted in incorrect the overprovision and reserved area. > > > > > > <256MiB legacy block + zoned block w/ 32MiB zone size> > > > Info: Overprovision ratio = 3.570% > > > Info: Overprovision segments = 656 (GC reserved = 560) > > > > > > <8 conventional zone + 1016 sequential zone w/ 32MiB zone size> > > > Info: Overprovision ratio = 3.700% > > > Info: Overprovision segments = 676 (GC reserved = 578) > > > > > > This patch fixes the bug by subtracting the super block area when there is > > > only one zoned device. > > > > > > Signed-off-by: Yonggil Song <yon...@sa...> > > > --- > > > lib/libf2fs_zoned.c | 5 +++++ > > > 1 file changed, 5 insertions(+) > > > > > > diff --git a/lib/libf2fs_zoned.c b/lib/libf2fs_zoned.c > > > index 89ba5ad73a76..cc5c064b7e3e 100644 > > > --- a/lib/libf2fs_zoned.c > > > +++ b/lib/libf2fs_zoned.c > > > @@ -555,6 +555,11 @@ uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb) > > > } > > > usable_segs -= (get_sb(main_blkaddr) - get_sb(segment0_blkaddr)) >> > > > get_sb(log_blocks_per_seg); > > > + > > > + /* single zoned device needs to remove a super block area */ > > > > If we don't format zoned device, it doesn't need to do this? > > > > Thanks, > > > Yes, single zoned block only needs this. > > legacy block just return a segment_count_main. > > thanks. Sorry, I misunderstood. I'll resend v2 with zoned block checker. thanks. > > > + if (c.ndevs == 1) > > > + usable_segs -= (get_sb(segment0_blkaddr) >> get_sb(log_blocks_per_seg));> + > > > return usable_segs; > > > #endif > > > return get_sb(segment_count_main); |
From: Yonggil S. <yon...@sa...> - 2024-10-10 02:15:22
|
> On 2024/10/7 13:21, Yonggil Song wrote: > > There was a bug that did not subtract the super block area when calculating > > the usable segments for a single zoned device with a conventional zone. > > This bug resulted in incorrect the overprovision and reserved area. > > > > <256MiB legacy block + zoned block w/ 32MiB zone size> > > Info: Overprovision ratio = 3.570% > > Info: Overprovision segments = 656 (GC reserved = 560) > > > > <8 conventional zone + 1016 sequential zone w/ 32MiB zone size> > > Info: Overprovision ratio = 3.700% > > Info: Overprovision segments = 676 (GC reserved = 578) > > > > This patch fixes the bug by subtracting the super block area when there is > > only one zoned device. > > > > Signed-off-by: Yonggil Song <yon...@sa...> > > --- > > lib/libf2fs_zoned.c | 5 +++++ > > 1 file changed, 5 insertions(+) > > > > diff --git a/lib/libf2fs_zoned.c b/lib/libf2fs_zoned.c > > index 89ba5ad73a76..cc5c064b7e3e 100644 > > --- a/lib/libf2fs_zoned.c > > +++ b/lib/libf2fs_zoned.c > > @@ -555,6 +555,11 @@ uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb) > > } > > usable_segs -= (get_sb(main_blkaddr) - get_sb(segment0_blkaddr)) >> > > get_sb(log_blocks_per_seg); > > + > > + /* single zoned device needs to remove a super block area */ > > If we don't format zoned device, it doesn't need to do this? > > Thanks, > Yes, single zoned block only needs this. legacy block just return a segment_count_main. thanks. > > + if (c.ndevs == 1) > > + usable_segs -= (get_sb(segment0_blkaddr) >> get_sb(log_blocks_per_seg));> + > > return usable_segs; > > #endif > > return get_sb(segment_count_main); |
From: Jaegeuk K. <ja...@ke...> - 2024-10-10 00:09:32
|
This fixes a regression which prevents parallel DIO reads. Fixes: 0cac51185e65 ("f2fs: fix to avoid racing in between read and OPU dio write") Signed-off-by: Jaegeuk Kim <ja...@ke...> --- fs/f2fs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 71d5ded9eeda..adc7d64a6f47 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4647,7 +4647,8 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) iov_iter_count(to), READ); /* In LFS mode, if there is inflight dio, wait for its completion */ - if (f2fs_lfs_mode(F2FS_I_SB(inode))) + if (f2fs_lfs_mode(F2FS_I_SB(inode)) && + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) inode_dio_wait(inode); if (f2fs_should_use_dio(inode, iocb, to)) { -- 2.47.0.rc1.288.g06298d1525-goog |
From: Ryan R. <rya...@ar...> - 2024-10-09 11:23:23
|
On 07/10/2024 17:38, Jaegeuk Kim wrote: > Hi Ryan, > > On 10/02, Ryan Roberts wrote: >> Hi Jaegeuk Kim, Chao Yu, >> >> I heard (via Matthew Wilcox) that you may be in the process of forming plans to >> add large folio support to f2fs? If so, I wonder if you could let me know how >> those plans are progressing? I saw your v6.12-rc1 pull request did a lot of >> conversion from struct page to struct folio (of the small variety for now) but >> wondered if this is intended as pre-work for enabling large folios? >> >> I've been doing a lot of work on the anonymous memory side to support large >> folios ("mTHP") and hook that up to arm64's contpte pgtable support. This >> improves performance nicely. >> >> Additionally I have experimented (on XFS) with ensuring that text mappings are >> contpte mapped where possible and this reduces pressure in the iTLB to improve >> performance further. But it all relies on the file system supporting large >> folios. I'd very much like to realize these performance gains on Android, but >> that requires the file systems that Android uses to support large folios. >> >> It would be great to understand your plans, if any, and figure out if/where Arm >> may be able to help accelerate activities in this area. I'm not (yet!) an fs >> expert, but I see that f2fs is already using iomap, so perhaps now that you are >> supporting small folios, switching up to large folios is not too big of a step? > > Thanks for sharing the background. By any chance, have you talked larg folio > with Android kernel team? Android kernel team are actively working (along with partners) to enable mTHP - that's large folios for anonymous memory. They haven't yet got to looking specifically at large-folios for file-backed memory. But I've had some informal conversations with a couple of Android kernel team folks which have been positive about the prospect. If indeed there's a fair benefit from Android side, > I'd like to circle back supporting it in f2fs seriously. My own experiments have demonstrated performance uplift in Chromium web browsing when using contpte-mapped code [1] (about 1.5% improvement on its own). We also have plans to selectively use 2M THP (that's PMD-sized for 4K pages, or contpte-sized for 16K pages) for some hot code, which previous experiments have shown to be beneficial (I see about 2.5% for the same speedometer benchmark). This all relies upon the filesystem supporting large folios though. I'd really like to get large-folio support into f2fs to enable gathering more performance data in a real Android environment. How best can be work together to add this feature to f2fs? [1] https://lore.kernel.org/linux-mm/202...@ar.../ Thanks, Ryan > >> >> Thanks, >> Ryan |
From: Qi H. <ha...@vi...> - 2024-10-09 10:12:00
|
When the free segment is used up during CP disable, many write or ioctl operations will get ENOSPC error codes, even if there are still many blocks available. We can reproduce it in the following steps: dd if=/dev/zero of=f2fs.img bs=1M count=55 mkfs.f2fs -f f2fs.img mount f2fs.img f2fs_dir -o checkpoint=disable:10% cd f2fs_dir dd if=/dev/zero of=bigfile bs=1M count=50 sync rm bigfile i=1; while [[ $i -lt 10000000 ]]; do (file_name=./file$i; dd \ if=/dev/random of=$file_name bs=1M count=0); i=$((i+1)); done stat -f ./ In f2fs_need_SSR() function, it is allowed to use SSR to allocate blocks when CP is disabled, so in f2fs_is_checkpoint_ready function, can we judge the number of invalid blocks when free segment is not enough, and return ENOSPC only if the number of invalid blocks is also not enough? Signed-off-by: Qi Han <ha...@vi...> --- fs/f2fs/segment.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 71adb4a43bec..9bf0cf3a6a31 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -637,12 +637,33 @@ static inline bool has_enough_free_secs(struct f2fs_sb_info *sbi, return !has_not_enough_free_secs(sbi, freed, needed); } +static inline bool has_enough_available_blocks(struct f2fs_sb_info *sbi) +{ + unsigned int total_free_blocks = sbi->user_block_count - + valid_user_blocks(sbi) - + sbi->current_reserved_blocks; + + if (total_free_blocks <= sbi->unusable_block_count) + total_free_blocks = 0; + else + total_free_blocks -= sbi->unusable_block_count; + + if (total_free_blocks > F2FS_OPTION(sbi).root_reserved_blocks) + total_free_blocks -= F2FS_OPTION(sbi).root_reserved_blocks; + else + total_free_blocks = 0; + + return (total_free_blocks > 0) ? true : false; +} + static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) { if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; if (likely(has_enough_free_secs(sbi, 0, 0))) return true; + if (likely(has_enough_available_blocks(sbi))) + return true; return false; } -- 2.39.0 |
From: <in...@so...> - 2024-10-09 09:01:17
|
拝啓 米国中央銀行(連邦準備制度)は、当社のソフトウェアがインターネット詐欺の被害者をすべて検出するシステムを構築したことをここに通知し ます。 その結果、お客様が被害者の一人であることが分かりましたので、補償プランを通じて損失を返金できるように情報をお伝えするためにご連絡さ せていただきました。 あなたが本当にインターネット詐欺師に騙された被害者の一人である場合は、当社に連絡して何を失ったかを確認してください。その後、当社は 支店に指示して損失を返金します。 Yours sincerely Mrs.Larnberti Juan USA Federal Reserve Address: 1850 K Street, NW, 3rd floor, in Washington USA USA FEDERAL RESERVE |
From: Chao Yu <ch...@ke...> - 2024-10-09 07:40:07
|
On 2024/9/24 17:56, Liao Yuanhong wrote: > The f2fs-tools support manual configuration of rsvd and ovp rate. In cases > where only a small rsvd is set, the automatically calculated ovp rate can > be very large, resulting in the reserved space of the entire file system > being almost the same as before, failing to achieve the goal of reducing > space usage. Therefore, for cases where only rsvd is set and ovp rate is > not, we will provide the same ovp rate as in normal situations, which > exceeds overprovision_segment_buffer, and does not occupy additional space. > > Signed-off-by: Liao Yuanhong <lia...@vi...> > --- > fsck/resize.c | 2 +- > include/f2fs_fs.h | 8 ++++---- > mkfs/f2fs_format.c | 15 ++++++++++++--- > 3 files changed, 17 insertions(+), 8 deletions(-) > > diff --git a/fsck/resize.c b/fsck/resize.c > index 049ddd3..eca6555 100644 > --- a/fsck/resize.c > +++ b/fsck/resize.c > @@ -147,7 +147,7 @@ safe_resize: > > /* Let's determine the best reserved and overprovisioned space */ > if (c.new_overprovision == 0) > - c.new_overprovision = get_best_overprovision(sb); > + c.new_overprovision = get_best_overprovision(sb, true); > > c.new_reserved_segments = > (100 / c.new_overprovision + 1 + NR_CURSEG_TYPE) * > diff --git a/include/f2fs_fs.h b/include/f2fs_fs.h > index 870a6e4..038002a 100644 > --- a/include/f2fs_fs.h > +++ b/include/f2fs_fs.h > @@ -1760,13 +1760,13 @@ extern uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb); > #define ZONE_ALIGN(blks) SIZE_ALIGN(blks, c.blks_per_seg * \ > c.segs_per_zone) > > -static inline uint32_t get_reserved(struct f2fs_super_block *sb, double ovp) > +static inline uint32_t get_reserved(struct f2fs_super_block *sb, double ovp, bool conf_reserved) > { > uint32_t usable_main_segs = f2fs_get_usable_segments(sb); > uint32_t segs_per_sec = round_up(usable_main_segs, get_sb(section_count)); > uint32_t reserved; > > - if (c.conf_reserved_sections) > + if (c.conf_reserved_sections && conf_reserved) > reserved = c.conf_reserved_sections * segs_per_sec; > else > reserved = (100 / ovp + 1 + NR_CURSEG_TYPE) * segs_per_sec; > @@ -1781,7 +1781,7 @@ static inline uint32_t overprovision_segment_buffer(struct f2fs_super_block *sb) > return 6 * get_sb(segs_per_sec); > } > > -static inline double get_best_overprovision(struct f2fs_super_block *sb) > +static inline double get_best_overprovision(struct f2fs_super_block *sb, bool conf_reserved) > { > double ovp, candidate, end, diff, space; > double max_ovp = 0, max_space = 0; > @@ -1799,7 +1799,7 @@ static inline double get_best_overprovision(struct f2fs_super_block *sb) > } > > for (; candidate <= end; candidate += diff) { > - reserved = get_reserved(sb, candidate); > + reserved = get_reserved(sb, candidate, conf_reserved); > ovp = (usable_main_segs - reserved) * candidate / 100; > if (ovp < 0) > continue; > diff --git a/mkfs/f2fs_format.c b/mkfs/f2fs_format.c > index e26a513..9c917c9 100644 > --- a/mkfs/f2fs_format.c > +++ b/mkfs/f2fs_format.c > @@ -480,10 +480,19 @@ static int f2fs_prepare_super_block(void) > * overprovision ratio and reserved seg count based on avg usable > * segs_per_sec. > */ > - if (c.overprovision == 0) > - c.overprovision = get_best_overprovision(sb); > + if (c.overprovision == 0) { > > - c.reserved_segments = get_reserved(sb, c.overprovision); > + /* > + * If rsvd is manually set but ovp rate is not, > + * provide the same ovp rate as in normal allocation. > + */ > + if (c.conf_reserved_sections) > + c.overprovision = get_best_overprovision(sb, false); > + else > + c.overprovision = get_best_overprovision(sb, true); get_best_overprovision() can provide a pair value [ovp, reserved], which can maximize available space for user. If we just pick one of them, and use configured one instead of another of them, it's weird. For your case, maybe we can introduce a default ovp value, and use it w/ configured reserved value? or you can assign ovp/reserved secs directly by using -o and -Z? Thanks, > + } > + > + c.reserved_segments = get_reserved(sb, c.overprovision, true); > > if (c.feature & F2FS_FEATURE_RO) { > c.overprovision = 0; |
From: Chao Yu <ch...@ke...> - 2024-10-09 02:32:42
|
On 2024/10/9 2:05, Daeho Jeong wrote: > From: Daeho Jeong <dae...@go...> > > F2FS should understand how the device aliasing file works and support > deleting the file after use. A device aliasing file can be created by > mkfs.f2fs tool and it can map the whole device with an extrent, not > using node blocks. The file space should be pinned and normally used for > read-only usages. > > Signed-off-by: Daeho Jeong <dae...@go...> > Signed-off-by: Chao Yu <ch...@ke...> > --- > v4: added file pinning check in sanity check > v3: merged Chao's extent cache sanity check. > prevented device aliasing support with noextent mount option > v2: changed the position of f2fs_destroy_extent_tree() only for device > aliasing files > --- > fs/f2fs/data.c | 5 +++++ > fs/f2fs/extent_cache.c | 45 +++++++++++++++++++++++++++++++++++++++++- > fs/f2fs/f2fs.h | 5 +++++ > fs/f2fs/file.c | 36 +++++++++++++++++++++++++++++---- > fs/f2fs/inode.c | 19 +++++++++++++++++- > fs/f2fs/super.c | 4 ++++ > fs/f2fs/sysfs.c | 2 ++ > 7 files changed, 110 insertions(+), 6 deletions(-) > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index b94cf6eea2f9..385b46e62ede 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -3441,6 +3441,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > > if (!f2fs_lookup_read_extent_cache_block(inode, index, > &dn.data_blkaddr)) { > + if (IS_DEVICE_ALIASING(inode)) { > + err = -ENODATA; > + goto out; > + } > + > if (locked) { > err = f2fs_reserve_block(&dn, index); > goto out; > diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c > index 62ac440d9416..019c1f7b7fa5 100644 > --- a/fs/f2fs/extent_cache.c > +++ b/fs/f2fs/extent_cache.c > @@ -24,6 +24,7 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; > struct extent_info ei; > + int devi; > > get_read_extent_info(&ei, i_ext); > > @@ -38,7 +39,36 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) > ei.blk, ei.fofs, ei.len); > return false; > } > - return true; > + > + if (!IS_DEVICE_ALIASING(inode)) > + return true; > + > + for (devi = 0; devi < sbi->s_ndevs; devi++) { > + if (FDEV(devi).start_blk != ei.blk || > + FDEV(devi).end_blk != ei.blk + ei.len - 1) > + continue; > + > + if (devi == 0) { > + f2fs_warn(sbi, > + "%s: inode (ino=%lx) is an alias of meta device", > + __func__, inode->i_ino); > + return false; > + } > + > + if (bdev_is_zoned(FDEV(devi).bdev)) { > + f2fs_warn(sbi, > + "%s: device alias inode (ino=%lx)'s extent info " > + "[%u, %u, %u] maps to zoned block device", > + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); > + return false; > + } > + return true; > + } > + > + f2fs_warn(sbi, "%s: device alias inode (ino=%lx)'s extent info " > + "[%u, %u, %u] is inconsistent w/ any devices", > + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); > + return false; > } > > static void __set_extent_info(struct extent_info *ei, > @@ -76,6 +106,9 @@ static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) > > static bool __may_extent_tree(struct inode *inode, enum extent_type type) > { > + if (IS_DEVICE_ALIASING(inode) && type == EX_READ) > + return true; > + > /* > * for recovered files during mount do not create extents > * if shrinker is not registered. > @@ -401,6 +434,11 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) > if (atomic_read(&et->node_cnt) || !ei.len) > goto skip; > > + if (IS_DEVICE_ALIASING(inode)) { > + et->largest = ei; > + goto skip; > + } > + > en = __attach_extent_node(sbi, et, &ei, NULL, > &et->root.rb_root.rb_node, true); > if (en) { > @@ -463,6 +501,11 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, > goto out; > } > > + if (IS_DEVICE_ALIASING(inode)) { > + ret = false; > + goto out; > + } > + > en = __lookup_extent_node(&et->root, et->cached_en, pgofs); > if (!en) > goto out; > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 33f5449dc22d..b6ba22a1da47 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -213,6 +213,7 @@ struct f2fs_mount_info { > #define F2FS_FEATURE_CASEFOLD 0x00001000 > #define F2FS_FEATURE_COMPRESSION 0x00002000 > #define F2FS_FEATURE_RO 0x00004000 > +#define F2FS_FEATURE_DEVICE_ALIAS 0x00008000 > > #define __F2FS_HAS_FEATURE(raw_super, mask) \ > ((raw_super->feature & cpu_to_le32(mask)) != 0) > @@ -3046,6 +3047,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) > #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ > #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ > #define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */ > +#define F2FS_DEVICE_ALIAS_FL 0x80000000 /* File for aliasing a device */ Is there any way to know which inode is device-alias one? maybe we can export this flag to userspace via .fileattr_get? or via newly introduced ioctl interface? > > #define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL) > > @@ -3061,6 +3063,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) > /* Flags that are appropriate for non-directories/regular files. */ > #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) > > +#define IS_DEVICE_ALIASING(inode) (F2FS_I(inode)->i_flags & F2FS_DEVICE_ALIAS_FL) > + > static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) > { > if (S_ISDIR(mode)) > @@ -4510,6 +4514,7 @@ F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); > F2FS_FEATURE_FUNCS(casefold, CASEFOLD); > F2FS_FEATURE_FUNCS(compression, COMPRESSION); > F2FS_FEATURE_FUNCS(readonly, RO); > +F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); > > #ifdef CONFIG_BLK_DEV_ZONED > static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 99903eafa7fe..f2d2d84d025b 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -725,6 +725,11 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > > trace_f2fs_truncate_blocks_enter(inode, from); > > + if (IS_DEVICE_ALIASING(inode) && from) { > + err = -EINVAL; > + goto out_err; > + } > + > free_from = (pgoff_t)F2FS_BLK_ALIGN(from); > > if (free_from >= max_file_blocks(inode)) > @@ -739,6 +744,21 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > goto out; > } > > + if (IS_DEVICE_ALIASING(inode)) { > + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; > + struct extent_info ei = et->largest; > + unsigned int i; > + > + for (i = 0; i < ei.len; i++) > + f2fs_invalidate_blocks(sbi, ei.blk + i); > + > + dec_valid_block_count(sbi, inode, ei.len); > + f2fs_update_time(sbi, REQ_TIME); > + > + f2fs_put_page(ipage, 1); > + goto out; > + } > + > if (f2fs_has_inline_data(inode)) { > f2fs_truncate_inline_inode(inode, ipage, from); > f2fs_put_page(ipage, 1); > @@ -774,7 +794,7 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > /* lastly zero out the first data page */ > if (!err) > err = truncate_partial_data_page(inode, from, truncate_page); > - > +out_err: > trace_f2fs_truncate_blocks_exit(inode, err); > return err; > } > @@ -992,7 +1012,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, > return -EPERM; > > if ((attr->ia_valid & ATTR_SIZE)) { > - if (!f2fs_is_compress_backend_ready(inode)) > + if (!f2fs_is_compress_backend_ready(inode) || > + IS_DEVICE_ALIASING(inode)) > return -EOPNOTSUPP; > if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && > !IS_ALIGNED(attr->ia_size, > @@ -1860,7 +1881,7 @@ static long f2fs_fallocate(struct file *file, int mode, > return -EIO; > if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) > return -ENOSPC; > - if (!f2fs_is_compress_backend_ready(inode)) > + if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) > return -EOPNOTSUPP; > > /* f2fs only support ->fallocate for regular file */ > @@ -3296,6 +3317,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) > struct f2fs_inode_info *fi = F2FS_I(inode); > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > > + if (IS_DEVICE_ALIASING(inode)) > + return -EINVAL; > + > if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { > f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", > __func__, inode->i_ino, fi->i_gc_failures); > @@ -3326,6 +3350,9 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > if (f2fs_readonly(sbi->sb)) > return -EROFS; > > + if (!pin && IS_DEVICE_ALIASING(inode)) > + return -EOPNOTSUPP; > + > ret = mnt_want_write_file(filp); > if (ret) > return ret; > @@ -4764,7 +4791,8 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, > else > return 0; > > - map.m_may_create = true; > + if (!IS_DEVICE_ALIASING(inode)) > + map.m_may_create = true; > if (dio) { > map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, > inode->i_write_hint); > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c > index 1ed86df343a5..e2d30fc79644 100644 > --- a/fs/f2fs/inode.c > +++ b/fs/f2fs/inode.c > @@ -372,6 +372,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) > return false; > } > > + if (fi->i_flags & F2FS_DEVICE_ALIAS_FL) { Trivial cleanup. IS_DEVICE_ALIASING(inode) Thanks, > + if (!f2fs_sb_has_device_alias(sbi)) { > + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but the feature is off", > + __func__, inode->i_ino); > + return false; > + } > + if (!f2fs_is_pinned_file(inode)) { > + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but is not pinned", > + __func__, inode->i_ino); > + return false; > + } > + } > + > return true; > } > > @@ -823,7 +836,8 @@ void f2fs_evict_inode(struct inode *inode) > f2fs_bug_on(sbi, get_dirty_pages(inode)); > f2fs_remove_dirty_inode(inode); > > - f2fs_destroy_extent_tree(inode); > + if (!IS_DEVICE_ALIASING(inode)) > + f2fs_destroy_extent_tree(inode); > > if (inode->i_nlink || is_bad_inode(inode)) > goto no_delete; > @@ -879,6 +893,9 @@ void f2fs_evict_inode(struct inode *inode) > goto retry; > } > > + if (IS_DEVICE_ALIASING(inode)) > + f2fs_destroy_extent_tree(inode); > + > if (err) { > f2fs_update_inode_page(inode); > if (dquot_initialize_needed(inode)) > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c > index fc2c586c7619..95097498b544 100644 > --- a/fs/f2fs/super.c > +++ b/fs/f2fs/super.c > @@ -834,6 +834,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) > set_opt(sbi, READ_EXTENT_CACHE); > break; > case Opt_noextent_cache: > + if (F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_DEVICE_ALIAS)) { > + f2fs_err(sbi, "device aliasing requires extent cache"); > + return -EINVAL; > + } > clear_opt(sbi, READ_EXTENT_CACHE); > break; > case Opt_noinline_data: > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c > index c56e8c873935..e51304bc65ea 100644 > --- a/fs/f2fs/sysfs.c > +++ b/fs/f2fs/sysfs.c > @@ -1313,6 +1313,7 @@ F2FS_SB_FEATURE_RO_ATTR(sb_checksum, SB_CHKSUM); > F2FS_SB_FEATURE_RO_ATTR(casefold, CASEFOLD); > F2FS_SB_FEATURE_RO_ATTR(compression, COMPRESSION); > F2FS_SB_FEATURE_RO_ATTR(readonly, RO); > +F2FS_SB_FEATURE_RO_ATTR(device_alias, DEVICE_ALIAS); > > static struct attribute *f2fs_sb_feat_attrs[] = { > ATTR_LIST(sb_encryption), > @@ -1329,6 +1330,7 @@ static struct attribute *f2fs_sb_feat_attrs[] = { > ATTR_LIST(sb_casefold), > ATTR_LIST(sb_compression), > ATTR_LIST(sb_readonly), > + ATTR_LIST(sb_device_alias), > NULL, > }; > ATTRIBUTE_GROUPS(f2fs_sb_feat); |
From: Daeho J. <da...@gm...> - 2024-10-08 18:05:27
|
From: Daeho Jeong <dae...@go...> F2FS should understand how the device aliasing file works and support deleting the file after use. A device aliasing file can be created by mkfs.f2fs tool and it can map the whole device with an extrent, not using node blocks. The file space should be pinned and normally used for read-only usages. Signed-off-by: Daeho Jeong <dae...@go...> Signed-off-by: Chao Yu <ch...@ke...> --- v4: added file pinning check in sanity check v3: merged Chao's extent cache sanity check. prevented device aliasing support with noextent mount option v2: changed the position of f2fs_destroy_extent_tree() only for device aliasing files --- fs/f2fs/data.c | 5 +++++ fs/f2fs/extent_cache.c | 45 +++++++++++++++++++++++++++++++++++++++++- fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/file.c | 36 +++++++++++++++++++++++++++++---- fs/f2fs/inode.c | 19 +++++++++++++++++- fs/f2fs/super.c | 4 ++++ fs/f2fs/sysfs.c | 2 ++ 7 files changed, 110 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b94cf6eea2f9..385b46e62ede 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3441,6 +3441,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, if (!f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { + if (IS_DEVICE_ALIASING(inode)) { + err = -ENODATA; + goto out; + } + if (locked) { err = f2fs_reserve_block(&dn, index); goto out; diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 62ac440d9416..019c1f7b7fa5 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -24,6 +24,7 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; struct extent_info ei; + int devi; get_read_extent_info(&ei, i_ext); @@ -38,7 +39,36 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) ei.blk, ei.fofs, ei.len); return false; } - return true; + + if (!IS_DEVICE_ALIASING(inode)) + return true; + + for (devi = 0; devi < sbi->s_ndevs; devi++) { + if (FDEV(devi).start_blk != ei.blk || + FDEV(devi).end_blk != ei.blk + ei.len - 1) + continue; + + if (devi == 0) { + f2fs_warn(sbi, + "%s: inode (ino=%lx) is an alias of meta device", + __func__, inode->i_ino); + return false; + } + + if (bdev_is_zoned(FDEV(devi).bdev)) { + f2fs_warn(sbi, + "%s: device alias inode (ino=%lx)'s extent info " + "[%u, %u, %u] maps to zoned block device", + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); + return false; + } + return true; + } + + f2fs_warn(sbi, "%s: device alias inode (ino=%lx)'s extent info " + "[%u, %u, %u] is inconsistent w/ any devices", + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); + return false; } static void __set_extent_info(struct extent_info *ei, @@ -76,6 +106,9 @@ static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) static bool __may_extent_tree(struct inode *inode, enum extent_type type) { + if (IS_DEVICE_ALIASING(inode) && type == EX_READ) + return true; + /* * for recovered files during mount do not create extents * if shrinker is not registered. @@ -401,6 +434,11 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) if (atomic_read(&et->node_cnt) || !ei.len) goto skip; + if (IS_DEVICE_ALIASING(inode)) { + et->largest = ei; + goto skip; + } + en = __attach_extent_node(sbi, et, &ei, NULL, &et->root.rb_root.rb_node, true); if (en) { @@ -463,6 +501,11 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, goto out; } + if (IS_DEVICE_ALIASING(inode)) { + ret = false; + goto out; + } + en = __lookup_extent_node(&et->root, et->cached_en, pgofs); if (!en) goto out; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 33f5449dc22d..b6ba22a1da47 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -213,6 +213,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_CASEFOLD 0x00001000 #define F2FS_FEATURE_COMPRESSION 0x00002000 #define F2FS_FEATURE_RO 0x00004000 +#define F2FS_FEATURE_DEVICE_ALIAS 0x00008000 #define __F2FS_HAS_FEATURE(raw_super, mask) \ ((raw_super->feature & cpu_to_le32(mask)) != 0) @@ -3046,6 +3047,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */ +#define F2FS_DEVICE_ALIAS_FL 0x80000000 /* File for aliasing a device */ #define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL) @@ -3061,6 +3063,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) /* Flags that are appropriate for non-directories/regular files. */ #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) +#define IS_DEVICE_ALIASING(inode) (F2FS_I(inode)->i_flags & F2FS_DEVICE_ALIAS_FL) + static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) { if (S_ISDIR(mode)) @@ -4510,6 +4514,7 @@ F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); F2FS_FEATURE_FUNCS(casefold, CASEFOLD); F2FS_FEATURE_FUNCS(compression, COMPRESSION); F2FS_FEATURE_FUNCS(readonly, RO); +F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); #ifdef CONFIG_BLK_DEV_ZONED static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 99903eafa7fe..f2d2d84d025b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -725,6 +725,11 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) trace_f2fs_truncate_blocks_enter(inode, from); + if (IS_DEVICE_ALIASING(inode) && from) { + err = -EINVAL; + goto out_err; + } + free_from = (pgoff_t)F2FS_BLK_ALIGN(from); if (free_from >= max_file_blocks(inode)) @@ -739,6 +744,21 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) goto out; } + if (IS_DEVICE_ALIASING(inode)) { + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; + struct extent_info ei = et->largest; + unsigned int i; + + for (i = 0; i < ei.len; i++) + f2fs_invalidate_blocks(sbi, ei.blk + i); + + dec_valid_block_count(sbi, inode, ei.len); + f2fs_update_time(sbi, REQ_TIME); + + f2fs_put_page(ipage, 1); + goto out; + } + if (f2fs_has_inline_data(inode)) { f2fs_truncate_inline_inode(inode, ipage, from); f2fs_put_page(ipage, 1); @@ -774,7 +794,7 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) /* lastly zero out the first data page */ if (!err) err = truncate_partial_data_page(inode, from, truncate_page); - +out_err: trace_f2fs_truncate_blocks_exit(inode, err); return err; } @@ -992,7 +1012,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return -EPERM; if ((attr->ia_valid & ATTR_SIZE)) { - if (!f2fs_is_compress_backend_ready(inode)) + if (!f2fs_is_compress_backend_ready(inode) || + IS_DEVICE_ALIASING(inode)) return -EOPNOTSUPP; if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && !IS_ALIGNED(attr->ia_size, @@ -1860,7 +1881,7 @@ static long f2fs_fallocate(struct file *file, int mode, return -EIO; if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) return -ENOSPC; - if (!f2fs_is_compress_backend_ready(inode)) + if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) return -EOPNOTSUPP; /* f2fs only support ->fallocate for regular file */ @@ -3296,6 +3317,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + if (IS_DEVICE_ALIASING(inode)) + return -EINVAL; + if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", __func__, inode->i_ino, fi->i_gc_failures); @@ -3326,6 +3350,9 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (!pin && IS_DEVICE_ALIASING(inode)) + return -EOPNOTSUPP; + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -4764,7 +4791,8 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, else return 0; - map.m_may_create = true; + if (!IS_DEVICE_ALIASING(inode)) + map.m_may_create = true; if (dio) { map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 1ed86df343a5..e2d30fc79644 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -372,6 +372,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (fi->i_flags & F2FS_DEVICE_ALIAS_FL) { + if (!f2fs_sb_has_device_alias(sbi)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but the feature is off", + __func__, inode->i_ino); + return false; + } + if (!f2fs_is_pinned_file(inode)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but is not pinned", + __func__, inode->i_ino); + return false; + } + } + return true; } @@ -823,7 +836,8 @@ void f2fs_evict_inode(struct inode *inode) f2fs_bug_on(sbi, get_dirty_pages(inode)); f2fs_remove_dirty_inode(inode); - f2fs_destroy_extent_tree(inode); + if (!IS_DEVICE_ALIASING(inode)) + f2fs_destroy_extent_tree(inode); if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; @@ -879,6 +893,9 @@ void f2fs_evict_inode(struct inode *inode) goto retry; } + if (IS_DEVICE_ALIASING(inode)) + f2fs_destroy_extent_tree(inode); + if (err) { f2fs_update_inode_page(inode); if (dquot_initialize_needed(inode)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fc2c586c7619..95097498b544 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -834,6 +834,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) set_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noextent_cache: + if (F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_DEVICE_ALIAS)) { + f2fs_err(sbi, "device aliasing requires extent cache"); + return -EINVAL; + } clear_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noinline_data: diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index c56e8c873935..e51304bc65ea 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1313,6 +1313,7 @@ F2FS_SB_FEATURE_RO_ATTR(sb_checksum, SB_CHKSUM); F2FS_SB_FEATURE_RO_ATTR(casefold, CASEFOLD); F2FS_SB_FEATURE_RO_ATTR(compression, COMPRESSION); F2FS_SB_FEATURE_RO_ATTR(readonly, RO); +F2FS_SB_FEATURE_RO_ATTR(device_alias, DEVICE_ALIAS); static struct attribute *f2fs_sb_feat_attrs[] = { ATTR_LIST(sb_encryption), @@ -1329,6 +1330,7 @@ static struct attribute *f2fs_sb_feat_attrs[] = { ATTR_LIST(sb_casefold), ATTR_LIST(sb_compression), ATTR_LIST(sb_readonly), + ATTR_LIST(sb_device_alias), NULL, }; ATTRIBUTE_GROUPS(f2fs_sb_feat); -- 2.47.0.rc0.187.ge670bccf7e-goog |
From: Markus E. <Mar...@we...> - 2024-10-08 13:16:58
|
… > This patch fixes the bug by … * How do you think about to choose another imperative wording for an improved change description? https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/submitting-patches.rst?h=v6.12-rc2#n94 * Would you like to add any tags (like “Fixes” and “Cc”) accordingly? Regards, Markus |
From: Chao Yu <ch...@ke...> - 2024-10-08 09:02:22
|
On 2024/10/7 13:21, Yonggil Song wrote: > There was a bug that did not subtract the super block area when calculating > the usable segments for a single zoned device with a conventional zone. > This bug resulted in incorrect the overprovision and reserved area. > > <256MiB legacy block + zoned block w/ 32MiB zone size> > Info: Overprovision ratio = 3.570% > Info: Overprovision segments = 656 (GC reserved = 560) > > <8 conventional zone + 1016 sequential zone w/ 32MiB zone size> > Info: Overprovision ratio = 3.700% > Info: Overprovision segments = 676 (GC reserved = 578) > > This patch fixes the bug by subtracting the super block area when there is > only one zoned device. > > Signed-off-by: Yonggil Song <yon...@sa...> > --- > lib/libf2fs_zoned.c | 5 +++++ > 1 file changed, 5 insertions(+) > > diff --git a/lib/libf2fs_zoned.c b/lib/libf2fs_zoned.c > index 89ba5ad73a76..cc5c064b7e3e 100644 > --- a/lib/libf2fs_zoned.c > +++ b/lib/libf2fs_zoned.c > @@ -555,6 +555,11 @@ uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb) > } > usable_segs -= (get_sb(main_blkaddr) - get_sb(segment0_blkaddr)) >> > get_sb(log_blocks_per_seg); > + > + /* single zoned device needs to remove a super block area */ If we don't format zoned device, it doesn't need to do this? Thanks, > + if (c.ndevs == 1) > + usable_segs -= (get_sb(segment0_blkaddr) >> get_sb(log_blocks_per_seg));> + > return usable_segs; > #endif > return get_sb(segment_count_main); |
From: Chao Yu <ch...@ke...> - 2024-10-08 08:51:21
|
On 2024/10/7 19:46, Thorsten Blum wrote: > Use struct_size() to calculate the number of bytes to allocate for a > cloned acl. > > Signed-off-by: Thorsten Blum <tho...@li...v> Reviewed-by: Chao Yu <ch...@ke...> Thanks, |
From: syzbot <syz...@sy...> - 2024-10-08 06:51:31
|
Hello, syzbot found the following issue on: HEAD commit: 3840cbe24cf0 sched: psi: fix bogus pressure spikes from ag.. git tree: upstream console output: https://syzkaller.appspot.com/x/log.txt?x=10bbab9f980000 kernel config: https://syzkaller.appspot.com/x/.config?x=f95955e3f7b5790c dashboard link: https://syzkaller.appspot.com/bug?extid=7988d9999219aea9f2db compiler: Debian clang version 15.0.6, GNU ld (GNU Binutils for Debian) 2.40 syz repro: https://syzkaller.appspot.com/x/repro.syz?x=17a4b3d0580000 Downloadable assets: disk image (non-bootable): https://storage.googleapis.com/syzbot-assets/7feb34a89c2a/non_bootable_disk-3840cbe2.raw.xz vmlinux: https://storage.googleapis.com/syzbot-assets/55888d19e055/vmlinux-3840cbe2.xz kernel image: https://storage.googleapis.com/syzbot-assets/f6b8ca10a019/bzImage-3840cbe2.xz mounted in repro #1: https://storage.googleapis.com/syzbot-assets/1ebfe9150f9d/mount_0.gz mounted in repro #2: https://storage.googleapis.com/syzbot-assets/91e39079514c/mount_7.gz IMPORTANT: if you fix the issue, please add the following tag to the commit: Reported-by: syz...@sy... ====================================================== WARNING: possible circular locking dependency detected 6.12.0-rc1-syzkaller-00114-g3840cbe24cf0 #0 Not tainted ------------------------------------------------------ kswapd0/79 is trying to acquire lock: ffff888057a62610 (sb_internal#2){.+.+}-{0:0}, at: f2fs_evict_inode+0x662/0x15c0 fs/f2fs/inode.c:842 but task is already holding lock: ffffffff8ea37160 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat mm/vmscan.c:6844 [inline] ffffffff8ea37160 (fs_reclaim){+.+.}-{0:0}, at: kswapd+0xbf1/0x3700 mm/vmscan.c:7226 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (fs_reclaim){+.+.}-{0:0}: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5825 __fs_reclaim_acquire mm/page_alloc.c:3834 [inline] fs_reclaim_acquire+0x88/0x140 mm/page_alloc.c:3848 might_alloc include/linux/sched/mm.h:327 [inline] prepare_alloc_pages+0x147/0x5d0 mm/page_alloc.c:4493 __alloc_pages_noprof+0x166/0x6c0 mm/page_alloc.c:4722 alloc_pages_mpol_noprof+0x3e8/0x680 mm/mempolicy.c:2265 alloc_pages_noprof mm/mempolicy.c:2345 [inline] folio_alloc_noprof+0x128/0x180 mm/mempolicy.c:2352 filemap_alloc_folio_noprof+0xdf/0x500 mm/filemap.c:1010 do_read_cache_folio+0x2eb/0x850 mm/filemap.c:3787 read_mapping_folio include/linux/pagemap.h:1011 [inline] f2fs_commit_super+0x52b/0x7d0 fs/f2fs/super.c:4044 f2fs_record_stop_reason+0x13b/0x1d0 fs/f2fs/super.c:4079 f2fs_handle_critical_error+0x2ac/0x5c0 fs/f2fs/super.c:4174 f2fs_write_inode+0x35f/0x4d0 fs/f2fs/inode.c:785 f2fs_do_sync_file+0x1395/0x19f0 fs/f2fs/file.c:356 f2fs_ioc_commit_atomic_write fs/f2fs/file.c:2259 [inline] __f2fs_ioctl+0x49db/0xb8f0 fs/f2fs/file.c:4417 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl+0xf9/0x170 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #1 (&sbi->sb_lock){++++}-{3:3}: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5825 down_write+0x99/0x220 kernel/locking/rwsem.c:1577 f2fs_down_write fs/f2fs/f2fs.h:2199 [inline] f2fs_record_errors fs/f2fs/super.c:4121 [inline] f2fs_handle_error+0xf6/0x250 fs/f2fs/super.c:4138 __get_node_page+0x8d0/0xfc0 fs/f2fs/node.c:1489 f2fs_update_inode_page+0x87/0x170 fs/f2fs/inode.c:746 f2fs_evict_inode+0xa61/0x15c0 fs/f2fs/inode.c:883 evict+0x4e8/0x9b0 fs/inode.c:723 do_unlinkat+0x512/0x830 fs/namei.c:4540 __do_sys_unlink fs/namei.c:4581 [inline] __se_sys_unlink fs/namei.c:4579 [inline] __x64_sys_unlink+0x47/0x50 fs/namei.c:4579 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (sb_internal#2){.+.+}-{0:0}: check_prev_add kernel/locking/lockdep.c:3161 [inline] check_prevs_add kernel/locking/lockdep.c:3280 [inline] validate_chain+0x18ef/0x5920 kernel/locking/lockdep.c:3904 __lock_acquire+0x1384/0x2050 kernel/locking/lockdep.c:5202 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5825 percpu_down_read include/linux/percpu-rwsem.h:51 [inline] __sb_start_write include/linux/fs.h:1716 [inline] sb_start_intwrite+0x4d/0x1c0 include/linux/fs.h:1899 f2fs_evict_inode+0x662/0x15c0 fs/f2fs/inode.c:842 evict+0x4e8/0x9b0 fs/inode.c:723 f2fs_evict_inode+0x1a4/0x15c0 fs/f2fs/inode.c:807 evict+0x4e8/0x9b0 fs/inode.c:723 dispose_list fs/inode.c:772 [inline] prune_icache_sb+0x239/0x2f0 fs/inode.c:961 super_cache_scan+0x38c/0x4b0 fs/super.c:223 do_shrink_slab+0x701/0x1160 mm/shrinker.c:435 shrink_slab_memcg mm/shrinker.c:548 [inline] shrink_slab+0x878/0x14d0 mm/shrinker.c:626 shrink_one+0x43b/0x850 mm/vmscan.c:4818 shrink_many mm/vmscan.c:4879 [inline] lru_gen_shrink_node mm/vmscan.c:4957 [inline] shrink_node+0x3799/0x3de0 mm/vmscan.c:5937 kswapd_shrink_node mm/vmscan.c:6765 [inline] balance_pgdat mm/vmscan.c:6957 [inline] kswapd+0x1ca3/0x3700 mm/vmscan.c:7226 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 other info that might help us debug this: Chain exists of: sb_internal#2 --> &sbi->sb_lock --> fs_reclaim Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(&sbi->sb_lock); lock(fs_reclaim); rlock(sb_internal#2); *** DEADLOCK *** 2 locks held by kswapd0/79: #0: ffffffff8ea37160 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat mm/vmscan.c:6844 [inline] #0: ffffffff8ea37160 (fs_reclaim){+.+.}-{0:0}, at: kswapd+0xbf1/0x3700 mm/vmscan.c:7226 #1: ffff888057a620e0 (&type->s_umount_key#54){++++}-{3:3}, at: super_trylock_shared fs/super.c:562 [inline] #1: ffff888057a620e0 (&type->s_umount_key#54){++++}-{3:3}, at: super_cache_scan+0x94/0x4b0 fs/super.c:196 stack backtrace: CPU: 0 UID: 0 PID: 79 Comm: kswapd0 Not tainted 6.12.0-rc1-syzkaller-00114-g3840cbe24cf0 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: <TASK> __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_circular_bug+0x13a/0x1b0 kernel/locking/lockdep.c:2074 check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2206 check_prev_add kernel/locking/lockdep.c:3161 [inline] check_prevs_add kernel/locking/lockdep.c:3280 [inline] validate_chain+0x18ef/0x5920 kernel/locking/lockdep.c:3904 __lock_acquire+0x1384/0x2050 kernel/locking/lockdep.c:5202 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5825 percpu_down_read include/linux/percpu-rwsem.h:51 [inline] __sb_start_write include/linux/fs.h:1716 [inline] sb_start_intwrite+0x4d/0x1c0 include/linux/fs.h:1899 f2fs_evict_inode+0x662/0x15c0 fs/f2fs/inode.c:842 evict+0x4e8/0x9b0 fs/inode.c:723 f2fs_evict_inode+0x1a4/0x15c0 fs/f2fs/inode.c:807 evict+0x4e8/0x9b0 fs/inode.c:723 dispose_list fs/inode.c:772 [inline] prune_icache_sb+0x239/0x2f0 fs/inode.c:961 super_cache_scan+0x38c/0x4b0 fs/super.c:223 do_shrink_slab+0x701/0x1160 mm/shrinker.c:435 shrink_slab_memcg mm/shrinker.c:548 [inline] shrink_slab+0x878/0x14d0 mm/shrinker.c:626 shrink_one+0x43b/0x850 mm/vmscan.c:4818 shrink_many mm/vmscan.c:4879 [inline] lru_gen_shrink_node mm/vmscan.c:4957 [inline] shrink_node+0x3799/0x3de0 mm/vmscan.c:5937 kswapd_shrink_node mm/vmscan.c:6765 [inline] balance_pgdat mm/vmscan.c:6957 [inline] kswapd+0x1ca3/0x3700 mm/vmscan.c:7226 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 </TASK> --- This report is generated by a bot. It may contain errors. See https://goo.gl/tpsmEJ for more information about syzbot. syzbot engineers can be reached at syz...@go.... syzbot will keep track of this issue. See: https://goo.gl/tpsmEJ#status for how to communicate with syzbot. If the report is already addressed, let syzbot know by replying with: #syz fix: exact-commit-title If you want syzbot to run the reproducer, reply with: #syz test: git://repo/address.git branch-or-commit-hash If you attach or paste a git patch, syzbot will apply it before testing. If you want to overwrite report's subsystems, reply with: #syz set subsystems: new-subsystem (See the list of subsystem names on the web dashboard) If the report is a duplicate of another one, reply with: #syz dup: exact-subject-of-another-report If you want to undo deduplication, reply with: #syz undup |
From: Chao Yu <ch...@ke...> - 2024-10-08 03:26:01
|
On 2024/9/18 16:44, Qi Han wrote: > creating a large files during checkpoint disable until it runs out of > space and then delete it, then remount to enable checkpoint again, and > then unmount the filesystem triggers the f2fs_bug_on as below: > > ------------[ cut here ]------------ > kernel BUG at fs/f2fs/inode.c:896! > CPU: 2 UID: 0 PID: 1286 Comm: umount Not tainted 6.11.0-rc7-dirty #360 > Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI > RIP: 0010:f2fs_evict_inode+0x58c/0x610 > Call Trace: > __die_body+0x15/0x60 > die+0x33/0x50 > do_trap+0x10a/0x120 > f2fs_evict_inode+0x58c/0x610 > do_error_trap+0x60/0x80 > f2fs_evict_inode+0x58c/0x610 > exc_invalid_op+0x53/0x60 > f2fs_evict_inode+0x58c/0x610 > asm_exc_invalid_op+0x16/0x20 > f2fs_evict_inode+0x58c/0x610 > evict+0x101/0x260 > dispose_list+0x30/0x50 > evict_inodes+0x140/0x190 > generic_shutdown_super+0x2f/0x150 > kill_block_super+0x11/0x40 > kill_f2fs_super+0x7d/0x140 > deactivate_locked_super+0x2a/0x70 > cleanup_mnt+0xb3/0x140 > task_work_run+0x61/0x90 > > The root cause is: creating large files during disable checkpoint > period results in not enough free segments, so when writing back root > inode will failed in f2fs_enable_checkpoint. When umount the file > system after enabling checkpoint, the root inode is dirty in > f2fs_evict_inode function, which triggers BUG_ON. The steps to > reproduce are as follows: > > dd if=/dev/zero of=f2fs.img bs=1M count=55 > mount f2fs.img f2fs_dir -o checkpoint=disable:10% > dd if=/dev/zero of=big bs=1M count=50 > sync > rm big > mount -o remount,checkpoint=enable f2fs_dir > umount f2fs_dir > > Let's redirty inode when there is not free segments during checkpoint > is disable. > > Signed-off-by: Qi Han <ha...@vi...> Reviewed-by: Chao Yu <ch...@ke...> Thanks, |
From: Chao Yu <ch...@ke...> - 2024-10-08 03:04:28
|
On 2024/9/26 5:52, Daeho Jeong wrote: > From: Daeho Jeong <dae...@go...> > > F2FS should understand how the device aliasing file works and support > deleting the file after use. A device aliasing file can be created by > mkfs.f2fs tool and it can map the whole device with an extrent, not > using node blocks. The file space should be pinned and normally used for > read-only usages. > > Signed-off-by: Daeho Jeong <dae...@go...> > Signed-off-by: Chao Yu <ch...@ke...> > --- > v3: merged Chao's extent cache sanity check. > prevented device aliasing support with noextent mount option > v2: changed the position of f2fs_destroy_extent_tree() only for device > aliasing files > --- > fs/f2fs/data.c | 5 +++++ > fs/f2fs/extent_cache.c | 45 +++++++++++++++++++++++++++++++++++++++++- > fs/f2fs/f2fs.h | 5 +++++ > fs/f2fs/file.c | 36 +++++++++++++++++++++++++++++---- > fs/f2fs/inode.c | 12 ++++++++++- > fs/f2fs/super.c | 4 ++++ > fs/f2fs/sysfs.c | 2 ++ > 7 files changed, 103 insertions(+), 6 deletions(-) > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index b94cf6eea2f9..385b46e62ede 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -3441,6 +3441,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > > if (!f2fs_lookup_read_extent_cache_block(inode, index, > &dn.data_blkaddr)) { > + if (IS_DEVICE_ALIASING(inode)) { > + err = -ENODATA; > + goto out; > + } > + > if (locked) { > err = f2fs_reserve_block(&dn, index); > goto out; > diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c > index 62ac440d9416..019c1f7b7fa5 100644 > --- a/fs/f2fs/extent_cache.c > +++ b/fs/f2fs/extent_cache.c > @@ -24,6 +24,7 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; > struct extent_info ei; > + int devi; > > get_read_extent_info(&ei, i_ext); > > @@ -38,7 +39,36 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) > ei.blk, ei.fofs, ei.len); > return false; > } > - return true; > + > + if (!IS_DEVICE_ALIASING(inode)) > + return true; > + > + for (devi = 0; devi < sbi->s_ndevs; devi++) { > + if (FDEV(devi).start_blk != ei.blk || > + FDEV(devi).end_blk != ei.blk + ei.len - 1) > + continue; > + > + if (devi == 0) { > + f2fs_warn(sbi, > + "%s: inode (ino=%lx) is an alias of meta device", > + __func__, inode->i_ino); > + return false; > + } > + > + if (bdev_is_zoned(FDEV(devi).bdev)) { > + f2fs_warn(sbi, > + "%s: device alias inode (ino=%lx)'s extent info " > + "[%u, %u, %u] maps to zoned block device", > + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); > + return false; > + } > + return true; > + } > + > + f2fs_warn(sbi, "%s: device alias inode (ino=%lx)'s extent info " > + "[%u, %u, %u] is inconsistent w/ any devices", > + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); > + return false; > } > > static void __set_extent_info(struct extent_info *ei, > @@ -76,6 +106,9 @@ static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) > > static bool __may_extent_tree(struct inode *inode, enum extent_type type) > { > + if (IS_DEVICE_ALIASING(inode) && type == EX_READ) > + return true; > + > /* > * for recovered files during mount do not create extents > * if shrinker is not registered. > @@ -401,6 +434,11 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) > if (atomic_read(&et->node_cnt) || !ei.len) > goto skip; > > + if (IS_DEVICE_ALIASING(inode)) { > + et->largest = ei; > + goto skip; > + } > + > en = __attach_extent_node(sbi, et, &ei, NULL, > &et->root.rb_root.rb_node, true); > if (en) { > @@ -463,6 +501,11 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, > goto out; > } > > + if (IS_DEVICE_ALIASING(inode)) { > + ret = false; > + goto out; > + } > + > en = __lookup_extent_node(&et->root, et->cached_en, pgofs); > if (!en) > goto out; > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 33f5449dc22d..b6ba22a1da47 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -213,6 +213,7 @@ struct f2fs_mount_info { > #define F2FS_FEATURE_CASEFOLD 0x00001000 > #define F2FS_FEATURE_COMPRESSION 0x00002000 > #define F2FS_FEATURE_RO 0x00004000 > +#define F2FS_FEATURE_DEVICE_ALIAS 0x00008000 > > #define __F2FS_HAS_FEATURE(raw_super, mask) \ > ((raw_super->feature & cpu_to_le32(mask)) != 0) > @@ -3046,6 +3047,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) > #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ > #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ > #define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */ > +#define F2FS_DEVICE_ALIAS_FL 0x80000000 /* File for aliasing a device */ > > #define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL) > > @@ -3061,6 +3063,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) > /* Flags that are appropriate for non-directories/regular files. */ > #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) > > +#define IS_DEVICE_ALIASING(inode) (F2FS_I(inode)->i_flags & F2FS_DEVICE_ALIAS_FL) > + > static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) > { > if (S_ISDIR(mode)) > @@ -4510,6 +4514,7 @@ F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); > F2FS_FEATURE_FUNCS(casefold, CASEFOLD); > F2FS_FEATURE_FUNCS(compression, COMPRESSION); > F2FS_FEATURE_FUNCS(readonly, RO); > +F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); > > #ifdef CONFIG_BLK_DEV_ZONED > static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 99903eafa7fe..f2d2d84d025b 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -725,6 +725,11 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > > trace_f2fs_truncate_blocks_enter(inode, from); > > + if (IS_DEVICE_ALIASING(inode) && from) { > + err = -EINVAL; > + goto out_err; > + } > + > free_from = (pgoff_t)F2FS_BLK_ALIGN(from); > > if (free_from >= max_file_blocks(inode)) > @@ -739,6 +744,21 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > goto out; > } > > + if (IS_DEVICE_ALIASING(inode)) { > + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; > + struct extent_info ei = et->largest; > + unsigned int i; > + > + for (i = 0; i < ei.len; i++) > + f2fs_invalidate_blocks(sbi, ei.blk + i); > + > + dec_valid_block_count(sbi, inode, ei.len); > + f2fs_update_time(sbi, REQ_TIME); > + > + f2fs_put_page(ipage, 1); > + goto out; > + } > + > if (f2fs_has_inline_data(inode)) { > f2fs_truncate_inline_inode(inode, ipage, from); > f2fs_put_page(ipage, 1); > @@ -774,7 +794,7 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) > /* lastly zero out the first data page */ > if (!err) > err = truncate_partial_data_page(inode, from, truncate_page); > - > +out_err: > trace_f2fs_truncate_blocks_exit(inode, err); > return err; > } > @@ -992,7 +1012,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, > return -EPERM; > > if ((attr->ia_valid & ATTR_SIZE)) { > - if (!f2fs_is_compress_backend_ready(inode)) > + if (!f2fs_is_compress_backend_ready(inode) || > + IS_DEVICE_ALIASING(inode)) > return -EOPNOTSUPP; > if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && > !IS_ALIGNED(attr->ia_size, > @@ -1860,7 +1881,7 @@ static long f2fs_fallocate(struct file *file, int mode, > return -EIO; > if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) > return -ENOSPC; > - if (!f2fs_is_compress_backend_ready(inode)) > + if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) > return -EOPNOTSUPP; > > /* f2fs only support ->fallocate for regular file */ > @@ -3296,6 +3317,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) > struct f2fs_inode_info *fi = F2FS_I(inode); > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > > + if (IS_DEVICE_ALIASING(inode)) > + return -EINVAL; > + > if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { > f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", > __func__, inode->i_ino, fi->i_gc_failures); > @@ -3326,6 +3350,9 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > if (f2fs_readonly(sbi->sb)) > return -EROFS; > > + if (!pin && IS_DEVICE_ALIASING(inode)) > + return -EOPNOTSUPP; > + > ret = mnt_want_write_file(filp); > if (ret) > return ret; > @@ -4764,7 +4791,8 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, > else > return 0; > > - map.m_may_create = true; > + if (!IS_DEVICE_ALIASING(inode)) > + map.m_may_create = true; > if (dio) { > map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, > inode->i_write_hint); > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c > index 1ed86df343a5..25f66a0ed831 100644 > --- a/fs/f2fs/inode.c > +++ b/fs/f2fs/inode.c > @@ -372,6 +372,12 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) > return false; > } > > + if ((fi->i_flags & F2FS_DEVICE_ALIAS_FL) && !f2fs_sb_has_device_alias(sbi)) { > + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but the feature is off", > + __func__, inode->i_ino); > + return false; > + } Device alias inode should be pinned, so we need to add below sanity check? if ((fi->i_flags & F2FS_DEVICE_ALIAS_FL) && !f2fs_is_pinned_file(inode)) { f2fs_warn(...); return false; } Thanks, > + > return true; > } > > @@ -823,7 +829,8 @@ void f2fs_evict_inode(struct inode *inode) > f2fs_bug_on(sbi, get_dirty_pages(inode)); > f2fs_remove_dirty_inode(inode); > > - f2fs_destroy_extent_tree(inode); > + if (!IS_DEVICE_ALIASING(inode)) > + f2fs_destroy_extent_tree(inode); > > if (inode->i_nlink || is_bad_inode(inode)) > goto no_delete; > @@ -879,6 +886,9 @@ void f2fs_evict_inode(struct inode *inode) > goto retry; > } > > + if (IS_DEVICE_ALIASING(inode)) > + f2fs_destroy_extent_tree(inode); > + > if (err) { > f2fs_update_inode_page(inode); > if (dquot_initialize_needed(inode)) > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c > index fc2c586c7619..95097498b544 100644 > --- a/fs/f2fs/super.c > +++ b/fs/f2fs/super.c > @@ -834,6 +834,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) > set_opt(sbi, READ_EXTENT_CACHE); > break; > case Opt_noextent_cache: > + if (F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_DEVICE_ALIAS)) { > + f2fs_err(sbi, "device aliasing requires extent cache"); > + return -EINVAL; > + } > clear_opt(sbi, READ_EXTENT_CACHE); > break; > case Opt_noinline_data: > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c > index c56e8c873935..e51304bc65ea 100644 > --- a/fs/f2fs/sysfs.c > +++ b/fs/f2fs/sysfs.c > @@ -1313,6 +1313,7 @@ F2FS_SB_FEATURE_RO_ATTR(sb_checksum, SB_CHKSUM); > F2FS_SB_FEATURE_RO_ATTR(casefold, CASEFOLD); > F2FS_SB_FEATURE_RO_ATTR(compression, COMPRESSION); > F2FS_SB_FEATURE_RO_ATTR(readonly, RO); > +F2FS_SB_FEATURE_RO_ATTR(device_alias, DEVICE_ALIAS); > > static struct attribute *f2fs_sb_feat_attrs[] = { > ATTR_LIST(sb_encryption), > @@ -1329,6 +1330,7 @@ static struct attribute *f2fs_sb_feat_attrs[] = { > ATTR_LIST(sb_casefold), > ATTR_LIST(sb_compression), > ATTR_LIST(sb_readonly), > + ATTR_LIST(sb_device_alias), > NULL, > }; > ATTRIBUTE_GROUPS(f2fs_sb_feat); |
From: Chao Yu <ch...@ke...> - 2024-10-08 02:57:30
|
On 2024/9/29 16:00, Qi Han wrote: > After release a file and subsequently reserve it, the FSCK flag is set > when the file is deleted, as shown in the following backtrace: > > F2FS-fs (dm-48): Inconsistent i_blocks, ino:401231, iblocks:1448, sectors:1472 > fs_rec_info_write_type+0x58/0x274 > f2fs_rec_info_write+0x1c/0x2c > set_sbi_flag+0x74/0x98 > dec_valid_block_count+0x150/0x190 > f2fs_truncate_data_blocks_range+0x2d4/0x3cc > f2fs_do_truncate_blocks+0x2fc/0x5f0 > f2fs_truncate_blocks+0x68/0x100 > f2fs_truncate+0x80/0x128 > f2fs_evict_inode+0x1a4/0x794 > evict+0xd4/0x280 > iput+0x238/0x284 > do_unlinkat+0x1ac/0x298 > __arm64_sys_unlinkat+0x48/0x68 > invoke_syscall+0x58/0x11c > > For clusters of the following type, i_blocks are decremented by 1 and > i_compr_blocks are incremented by 7 in release_compress_blocks, while > updates to i_blocks and i_compr_blocks are skipped in reserve_compress_blocks. > > raw node: > D D D D D D D D > after compress: > C D D D D D D D > after reserve: > C D D D D D D D > > Let's update i_blocks and i_compr_blocks properly in reserve_compress_blocks. > > Fixes: eb8fbaa53374 ("f2fs: compress: fix to check unreleased compressed cluster") > Signed-off-by: Qi Han <ha...@vi...> Reviewed-by: Chao Yu <ch...@ke...> Thanks, |
From: Chao Yu <ch...@ke...> - 2024-10-08 02:54:28
|
On 2024/9/29 14:17, LongPing Wei wrote: > Signed-off-by: LongPing Wei <wei...@op...> Reviewed-by: Chao Yu <ch...@ke...> Thanks, |