linux-f2fs-devel Mailing List for linux-f2fs (Page 5)
Brought to you by:
kjgkr
You can subscribe to this list here.
2012 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(10) |
Dec
(98) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2013 |
Jan
(100) |
Feb
(72) |
Mar
(79) |
Apr
(122) |
May
(93) |
Jun
(97) |
Jul
(72) |
Aug
(72) |
Sep
(73) |
Oct
(121) |
Nov
(161) |
Dec
(206) |
2014 |
Jan
(75) |
Feb
(54) |
Mar
(82) |
Apr
(98) |
May
(67) |
Jun
(89) |
Jul
(136) |
Aug
(122) |
Sep
(136) |
Oct
(58) |
Nov
(87) |
Dec
(114) |
2015 |
Jan
(140) |
Feb
(129) |
Mar
(141) |
Apr
(71) |
May
(192) |
Jun
(52) |
Jul
(120) |
Aug
(125) |
Sep
(157) |
Oct
(100) |
Nov
(54) |
Dec
(248) |
2016 |
Jan
(301) |
Feb
(180) |
Mar
(138) |
Apr
(137) |
May
(145) |
Jun
(123) |
Jul
(98) |
Aug
(143) |
Sep
(196) |
Oct
(166) |
Nov
(205) |
Dec
(141) |
2017 |
Jan
(167) |
Feb
(275) |
Mar
(273) |
Apr
(239) |
May
(193) |
Jun
(171) |
Jul
(226) |
Aug
(153) |
Sep
(212) |
Oct
(311) |
Nov
(257) |
Dec
(418) |
2018 |
Jan
(474) |
Feb
(188) |
Mar
(252) |
Apr
(500) |
May
(176) |
Jun
(291) |
Jul
(361) |
Aug
(331) |
Sep
(355) |
Oct
(154) |
Nov
(209) |
Dec
(185) |
2019 |
Jan
(172) |
Feb
(214) |
Mar
(247) |
Apr
(425) |
May
(273) |
Jun
(360) |
Jul
(400) |
Aug
(409) |
Sep
(149) |
Oct
(218) |
Nov
(319) |
Dec
(225) |
2020 |
Jan
(231) |
Feb
(487) |
Mar
(411) |
Apr
(258) |
May
(292) |
Jun
(369) |
Jul
(407) |
Aug
(173) |
Sep
(266) |
Oct
(317) |
Nov
(273) |
Dec
(391) |
2021 |
Jan
(285) |
Feb
(130) |
Mar
(232) |
Apr
(156) |
May
(311) |
Jun
(252) |
Jul
(336) |
Aug
(326) |
Sep
(151) |
Oct
(86) |
Nov
(114) |
Dec
(125) |
2022 |
Jan
(132) |
Feb
(167) |
Mar
(230) |
Apr
(460) |
May
(334) |
Jun
(324) |
Jul
(147) |
Aug
(188) |
Sep
(262) |
Oct
(346) |
Nov
(314) |
Dec
(245) |
2023 |
Jan
(306) |
Feb
(190) |
Mar
(199) |
Apr
(444) |
May
(378) |
Jun
(441) |
Jul
(403) |
Aug
(464) |
Sep
(144) |
Oct
(98) |
Nov
(152) |
Dec
(212) |
2024 |
Jan
(288) |
Feb
(365) |
Mar
(218) |
Apr
(275) |
May
(200) |
Jun
(228) |
Jul
(255) |
Aug
(228) |
Sep
(280) |
Oct
(319) |
Nov
(241) |
Dec
(174) |
2025 |
Jan
(166) |
Feb
(171) |
Mar
(469) |
Apr
(235) |
May
(257) |
Jun
(342) |
Jul
(379) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: yohan.joung <yoh...@sk...> - 2025-07-22 06:02:51
|
read for the pinfile using Direct I/O do not wait for dio write. Signed-off-by: yohan.joung <yoh...@sk...> --- fs/f2fs/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4039ccb5022c..58a4d25eb08f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4834,6 +4834,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); const loff_t pos = iocb->ki_pos; ssize_t ret; + bool dio; if (!f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; @@ -4842,12 +4843,15 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, iov_iter_count(to), READ); + dio = f2fs_should_use_dio(inode, iocb, to); + /* In LFS mode, if there is inflight dio, wait for its completion */ if (f2fs_lfs_mode(F2FS_I_SB(inode)) && - get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && + (!f2fs_is_pinned_file(inode) || !dio)) inode_dio_wait(inode); - if (f2fs_should_use_dio(inode, iocb, to)) { + if (dio) { ret = f2fs_dio_read_iter(iocb, to); } else { ret = filemap_read(iocb, to, 0); -- 2.33.0 |
From: Chao Yu <ch...@ke...> - 2025-07-22 05:52:03
|
On 7/22/25 13:20, yohan.joung wrote: > read for the pinfile using Direct I/O do not wait for dio write. > > Signed-off-by: yohan.joung <yoh...@sk...> > --- > fs/f2fs/file.c | 8 ++++++-- > 1 file changed, 6 insertions(+), 2 deletions(-) > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 4039ccb5022c..2b7e5c46c1ae 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -4834,6 +4834,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) > struct inode *inode = file_inode(iocb->ki_filp); > const loff_t pos = iocb->ki_pos; > ssize_t ret; > + bool dio; > > if (!f2fs_is_compress_backend_ready(inode)) > return -EOPNOTSUPP; > @@ -4842,12 +4843,15 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) > f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, > iov_iter_count(to), READ); > > + dio = f2fs_should_use_dio(inode, iocb, to); > + > /* In LFS mode, if there is inflight dio, wait for its completion */ > if (f2fs_lfs_mode(F2FS_I_SB(inode)) && > - get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) > + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && > + !(f2fs_is_pinned_file(inode) && dio)) How about? (!f2fs_is_pinned_file(inode) || !dio) Thanks, > inode_dio_wait(inode); > > - if (f2fs_should_use_dio(inode, iocb, to)) { > + if (dio) { > ret = f2fs_dio_read_iter(iocb, to); > } else { > ret = filemap_read(iocb, to, 0); |
From: yohan.joung <yoh...@sk...> - 2025-07-22 05:21:02
|
read for the pinfile using Direct I/O do not wait for dio write. Signed-off-by: yohan.joung <yoh...@sk...> --- fs/f2fs/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4039ccb5022c..2b7e5c46c1ae 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4834,6 +4834,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); const loff_t pos = iocb->ki_pos; ssize_t ret; + bool dio; if (!f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; @@ -4842,12 +4843,15 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, iov_iter_count(to), READ); + dio = f2fs_should_use_dio(inode, iocb, to); + /* In LFS mode, if there is inflight dio, wait for its completion */ if (f2fs_lfs_mode(F2FS_I_SB(inode)) && - get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && + !(f2fs_is_pinned_file(inode) && dio)) inode_dio_wait(inode); - if (f2fs_should_use_dio(inode, iocb, to)) { + if (dio) { ret = f2fs_dio_read_iter(iocb, to); } else { ret = filemap_read(iocb, to, 0); -- 2.33.0 |
From: yohan.joung <yoh...@sk...> - 2025-07-22 04:25:34
|
>On 7/21/25 13:41, yohan.joung wrote: >> pinfile is excluded as it operates with direct I/O > >pinfile can use buffer IO as well? only considered direct I/O. I'll re-upload the pinfile considering buffered I/O Thanks > >Thanks, > >> >> Signed-off-by: yohan.joung <yoh...@sk...> >> --- >> fs/f2fs/file.c | 3 ++- >> 1 file changed, 2 insertions(+), 1 deletion(-) >> >> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c >> index 4039ccb5022c..cac8c9650a7a 100644 >> --- a/fs/f2fs/file.c >> +++ b/fs/f2fs/file.c >> @@ -4844,7 +4844,8 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) >> >> /* In LFS mode, if there is inflight dio, wait for its completion */ >> if (f2fs_lfs_mode(F2FS_I_SB(inode)) && >> - get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) >> + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && >> + !f2fs_is_pinned_file(inode)) >> inode_dio_wait(inode); >> |
From: Chao Yu <ch...@ke...> - 2025-07-22 03:57:12
|
On 7/21/25 13:41, yohan.joung wrote: > pinfile is excluded as it operates with direct I/O pinfile can use buffer IO as well? Thanks, > > Signed-off-by: yohan.joung <yoh...@sk...> > --- > fs/f2fs/file.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 4039ccb5022c..cac8c9650a7a 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -4844,7 +4844,8 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) > > /* In LFS mode, if there is inflight dio, wait for its completion */ > if (f2fs_lfs_mode(F2FS_I_SB(inode)) && > - get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) > + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && > + !f2fs_is_pinned_file(inode)) > inode_dio_wait(inode); > > if (f2fs_should_use_dio(inode, iocb, to)) { |
From: Barry S. <21...@gm...> - 2025-07-22 03:54:37
|
On Mon, Jul 21, 2025 at 7:37 PM Qu Wenruo <quw...@gm...> wrote: > > > > 在 2025/7/21 19:55, Jan Kara 写道: > > On Mon 21-07-25 11:14:02, Gao Xiang wrote: > >> Hi Barry, > >> > >> On 2025/7/21 09:02, Barry Song wrote: > >>> On Wed, Jul 16, 2025 at 8:28 AM Gao Xiang <hsi...@li...> wrote: > [...] > >>> Given the difficulty of allocating large folios, it's always a good > >>> idea to have order-0 as a fallback. While I agree with your point, > >>> I have a slightly different perspective — enabling large folios for > >>> those devices might be beneficial, but the maximum order should > >>> remain small. I'm referring to "small" large folios. > >> > >> Yeah, agreed. Having a way to limit the maximum order for those small > >> devices (rather than disabling it completely) would be helpful. At > >> least "small" large folios could still provide benefits when memory > >> pressure is light. > > > > Well, in the page cache you can tune not only the minimum but also the > > maximum order of a folio being allocated for each inode. Btrfs and ext4 > > already use this functionality. So in principle the functionality is there, > > it is "just" a question of proper user interfaces or automatic logic to > > tune this limit. > > > > Honza > > And enabling large folios doesn't mean all fs operations will grab an > unnecessarily large folio. > > For buffered write, all those filesystem will only try to get folios as > large as necessary, not overly large. > > This means if the user space program is always doing buffered IO in a > power-of-two unit (and aligned offset of course), the folio size will > match the buffer size perfectly (if we have enough memory). > > So for properly aligned buffered writes, large folios won't really cause > unnecessarily large folios, meanwhile brings all the benefits. I don't think this captures the full picture. For example, in memory reclamation, if any single subpage is hot, the entire large folio is treated as hot and cannot be reclaimed. So I’m not convinced that "filesystems will only try to get folios as large as necessary" is the right policy. Large folios are a good idea, but the lack of control over their maximum size limits their practical applicability. When an embedded device enables large folios and only observes performance regressions, the immediate reaction is often to disable the feature entirely. This, in turn, harms the adoption and development of large folios. > > Although I'm not familiar enough with filemap to comment on folio read > and readahead... > > Thanks, > Qu Best Regards Barry |
From: Chao Yu <ch...@ke...> - 2025-07-22 03:45:01
|
On 7/19/25 06:04, Daeho Jeong wrote: > From: Daeho Jeong <dae...@go...> > > Otherwise F2FS will not do GC in background in low free section. > > Signed-off-by: Daeho Jeong <dae...@go...> Reviewed-by: Chao Yu <ch...@ke...> Thanks, |
From: Chao Yu <ch...@ke...> - 2025-07-22 03:24:56
|
On 7/19/25 05:50, Daeho Jeong wrote: > From: Daeho Jeong <dae...@go...> > > Add this to control GC algorithm for boost GC. > > Signed-off-by: Daeho Jeong <dae...@go...> > --- > v2: use GC_GREEDY instead of 1 > --- > Documentation/ABI/testing/sysfs-fs-f2fs | 8 +++++++- > fs/f2fs/gc.c | 3 ++- > fs/f2fs/gc.h | 1 + > fs/f2fs/sysfs.c | 16 ++++++++++++++++ > 4 files changed, 26 insertions(+), 2 deletions(-) > > diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs > index 931c1f63aa2e..2158055cd9d1 100644 > --- a/Documentation/ABI/testing/sysfs-fs-f2fs > +++ b/Documentation/ABI/testing/sysfs-fs-f2fs > @@ -866,6 +866,12 @@ What: /sys/fs/f2fs/<disk>/gc_boost_gc_multiple > Date: June 2025 > Contact: "Daeho Jeong" <dae...@go...> > Description: Set a multiplier for the background GC migration window when F2FS GC is > - boosted. > + boosted. the range should be from 1 to the segment count in a section. > Default: 5 > > +What: /sys/fs/f2fs/<disk>/gc_boost_gc_greedy > +Date: June 2025 > +Contact: "Daeho Jeong" <dae...@go...> > +Description: Control GC algorithm for boost GC. 0: cost benefit, 1: greedy > + Default: 1 > + > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c > index de7e59bc0906..0d7703e7f9e0 100644 > --- a/fs/f2fs/gc.c > +++ b/fs/f2fs/gc.c > @@ -141,7 +141,7 @@ static int gc_thread_func(void *data) > FOREGROUND : BACKGROUND); > > sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) || > - gc_control.one_time; > + (gc_control.one_time && gc_th->boost_gc_greedy); > > /* foreground GC was been triggered via f2fs_balance_fs() */ > if (foreground) > @@ -198,6 +198,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) > gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; > gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO; > gc_th->boost_gc_multiple = BOOST_GC_MULTIPLE; > + gc_th->boost_gc_greedy = GC_GREEDY; > > if (f2fs_sb_has_blkzoned(sbi)) { > gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; > diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h > index efa1968810a0..1a2e7a84b59f 100644 > --- a/fs/f2fs/gc.h > +++ b/fs/f2fs/gc.h > @@ -69,6 +69,7 @@ struct f2fs_gc_kthread { > unsigned int boost_zoned_gc_percent; > unsigned int valid_thresh_ratio; > unsigned int boost_gc_multiple; > + unsigned int boost_gc_greedy; > }; > > struct gc_inode_list { > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c > index b0270b1c939c..3a52f51ee3c6 100644 > --- a/fs/f2fs/sysfs.c > +++ b/fs/f2fs/sysfs.c > @@ -824,6 +824,20 @@ static ssize_t __sbi_store(struct f2fs_attr *a, > return count; > } > > + if (!strcmp(a->attr.name, "gc_boost_gc_multiple")) { > + if (t < 1 || t > SEGS_PER_SEC(sbi)) > + return -EINVAL; > + sbi->gc_thread->boost_gc_multiple = (unsigned int)t; > + return count; > + } This check should be in ("f2fs: add gc_boost_gc_multiple sysfs node"), right? Thanks, > + > + if (!strcmp(a->attr.name, "gc_boost_gc_greedy")) { > + if (t > GC_GREEDY) > + return -EINVAL; > + sbi->gc_thread->boost_gc_greedy = (unsigned int)t; > + return count; > + } > + > *ui = (unsigned int)t; > > return count; > @@ -1051,6 +1065,7 @@ GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); > GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); > GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio); > GC_THREAD_RW_ATTR(gc_boost_gc_multiple, boost_gc_multiple); > +GC_THREAD_RW_ATTR(gc_boost_gc_greedy, boost_gc_greedy); > > /* SM_INFO ATTR */ > SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); > @@ -1222,6 +1237,7 @@ static struct attribute *f2fs_attrs[] = { > ATTR_LIST(gc_boost_zoned_gc_percent), > ATTR_LIST(gc_valid_thresh_ratio), > ATTR_LIST(gc_boost_gc_multiple), > + ATTR_LIST(gc_boost_gc_greedy), > ATTR_LIST(gc_idle), > ATTR_LIST(gc_urgent), > ATTR_LIST(reclaim_segments), |
From: Chao Yu <ch...@ke...> - 2025-07-22 03:23:24
|
On 7/19/25 05:40, Daeho Jeong wrote: > From: Daeho Jeong <dae...@go...> > > Add a sysfs knob to set a multiplier for the background GC migration > window when F2FS Garbage Collection is boosted. > > Signed-off-by: Daeho Jeong <dae...@go...> Reviewed-by: Chao Yu <ch...@ke...> Thanks, |
From: Chao Yu <ch...@ke...> - 2025-07-22 03:20:31
|
As we know, Android is the only user of casefold feature, in casefolded directory, creating a filename w/ character has ignorable code points in buggy kernel v6.12 is a very rare case, we don't get any report that user can not access file w/ character has ignorable code points till now. Let's disable linear lookup in fsck for Android by default, once there is any related bug report of unicode code points, we can enable it again. mkfs.f2fs -f -O casefold -C utf8 /dev/vdb dump.f2fs -d3 /dev/vdb |grep s_encoding_flags s_encoding_flags [0x 0 : 0] fsck.f2fs /dev/vdb -g android dump.f2fs -d3 /dev/vdb |grep s_encoding_flags s_encoding_flags [0x 2 : 2] Signed-off-by: Chao Yu <ch...@ke...> --- fsck/main.c | 5 ++++- man/fsck.f2fs.8 | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fsck/main.c b/fsck/main.c index c5d4159..e05c23a 100644 --- a/fsck/main.c +++ b/fsck/main.c @@ -91,7 +91,7 @@ void fsck_usage() MSG(0, " --no-kernel-check skips detecting kernel change\n"); MSG(0, " --kernel-check checks kernel change\n"); MSG(0, " --debug-cache to debug cache when -c is used\n"); - MSG(0, " --nolinear-lookup=X X=1: disable linear lookup, X=0: enable linear lookup\n"); + MSG(0, " --nolinear-lookup=X X=1: disable linear lookup, X=0: enable linear lookup. For android case, it will disable linear lookup by default\n"); MSG(0, " --fault_injection=%%d to enable fault injection with specified injection rate\n"); MSG(0, " --fault_type=%%d to configure enabled fault injection type\n"); exit(1); @@ -224,6 +224,9 @@ static void add_default_options(void) if (c.func == FSCK) { /* -a */ c.auto_fix = 1; + + /* disable linear lookup by default */ + c.nolinear_lookup = LINEAR_LOOKUP_DISABLE; } else if (c.func == RESIZE) { c.force = 1; } diff --git a/man/fsck.f2fs.8 b/man/fsck.f2fs.8 index 89cc455..c20c431 100644 --- a/man/fsck.f2fs.8 +++ b/man/fsck.f2fs.8 @@ -68,7 +68,7 @@ Specify the level of debugging options. The default number is 0, which shows basic debugging messages. .TP .BI \--nolinear-lookup -Tune linear lookup fallback, must specify an argument, 0: enable linear lookup, 1: disable linear lookup. +Tune linear lookup fallback, must specify an argument, 0: enable linear lookup, 1: disable linear lookup. For android case, it will disable linear lookup by default. .TP .BI \-\-fault_injection=%d " enable fault injection" Enable fault injection in all supported types with specified injection rate. -- 2.49.0 |
From: Jaegeuk K. <ja...@ke...> - 2025-07-21 17:21:43
|
I think it'd be better to add a parameter like __lookup_nat_cache(for_dirty). On 07/18, wangzijie wrote: > __lookup_nat_cache follows LRU manner to move clean nat entry, when nat > entries are going to be dirty, no need to move them to tail of lru list. > > Signed-off-by: wangzijie <wan...@ho...> > --- > fs/f2fs/node.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c > index 4b3d9070e..b9fbc6bf7 100644 > --- a/fs/f2fs/node.c > +++ b/fs/f2fs/node.c > @@ -460,7 +460,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, > struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); > > f2fs_down_write(&nm_i->nat_tree_lock); > - e = __lookup_nat_cache(nm_i, ni->nid); > + e = radix_tree_lookup(&nm_i->nat_root, ni->nid); > if (!e) { > e = __init_nat_entry(nm_i, new, NULL, true); > copy_node_info(&e->ni, ni); > @@ -2926,7 +2926,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) > > raw_ne = nat_in_journal(journal, i); > > - ne = __lookup_nat_cache(nm_i, nid); > + ne = radix_tree_lookup(&nm_i->nat_root, nid); > if (!ne) { > ne = __alloc_nat_entry(sbi, nid, true); > __init_nat_entry(nm_i, ne, &raw_ne, true); > -- > 2.25.1 |
From: Gao X. <hsi...@li...> - 2025-07-21 11:52:48
|
On 2025/7/21 19:36, Qu Wenruo wrote: > > > 在 2025/7/21 19:55, Jan Kara 写道: >> On Mon 21-07-25 11:14:02, Gao Xiang wrote: >>> Hi Barry, >>> >>> On 2025/7/21 09:02, Barry Song wrote: >>>> On Wed, Jul 16, 2025 at 8:28 AM Gao Xiang <hsi...@li...> wrote: > [...] >>>> Given the difficulty of allocating large folios, it's always a good >>>> idea to have order-0 as a fallback. While I agree with your point, >>>> I have a slightly different perspective — enabling large folios for >>>> those devices might be beneficial, but the maximum order should >>>> remain small. I'm referring to "small" large folios. >>> >>> Yeah, agreed. Having a way to limit the maximum order for those small >>> devices (rather than disabling it completely) would be helpful. At >>> least "small" large folios could still provide benefits when memory >>> pressure is light. >> >> Well, in the page cache you can tune not only the minimum but also the >> maximum order of a folio being allocated for each inode. Btrfs and ext4 >> already use this functionality. So in principle the functionality is there, >> it is "just" a question of proper user interfaces or automatic logic to >> tune this limit. >> >> Honza > > And enabling large folios doesn't mean all fs operations will grab an unnecessarily large folio. > > For buffered write, all those filesystem will only try to get folios as large as necessary, not overly large. > > This means if the user space program is always doing buffered IO in a power-of-two unit (and aligned offset of course), the folio size will match the buffer size perfectly (if we have enough memory). > > So for properly aligned buffered writes, large folios won't really cause unnecessarily large folios, meanwhile brings all the benefits. That really depends on the user behavior & I/O pattern and could cause unexpected spike. Anyway, IMHO, how to limit the maximum order may be useful for small devices if large folios is enabled. When direct reclaim is the common case, it might be too late. Thanks, Gao Xiang > > > Although I'm not familiar enough with filemap to comment on folio read and readahead... > > Thanks, > Qu |
From: Gao X. <hsi...@li...> - 2025-07-21 11:40:30
|
Hi Jan, On 2025/7/21 18:25, Jan Kara wrote: > On Mon 21-07-25 11:14:02, Gao Xiang wrote: >> Hi Barry, >> >> On 2025/7/21 09:02, Barry Song wrote: >>> On Wed, Jul 16, 2025 at 8:28 AM Gao Xiang <hsi...@li...> wrote: >>>> >> >> ... >> >>>> >>>> ... high-order folios can cause side effects on embedded devices >>>> like routers and IoT devices, which still have MiBs of memory (and I >>>> believe this won't change due to their use cases) but they also use >>>> Linux kernel for quite long time. In short, I don't think enabling >>>> large folios for those devices is very useful, let alone limiting >>>> the minimum folio order for them (It would make the filesystem not >>>> suitable any more for those users. At least that is what I never >>>> want to do). And I believe this is different from the current LBS >>>> support to match hardware characteristics or LBS atomic write >>>> requirement. >>> >>> Given the difficulty of allocating large folios, it's always a good >>> idea to have order-0 as a fallback. While I agree with your point, >>> I have a slightly different perspective — enabling large folios for >>> those devices might be beneficial, but the maximum order should >>> remain small. I'm referring to "small" large folios. >> >> Yeah, agreed. Having a way to limit the maximum order for those small >> devices (rather than disabling it completely) would be helpful. At >> least "small" large folios could still provide benefits when memory >> pressure is light. > > Well, in the page cache you can tune not only the minimum but also the > maximum order of a folio being allocated for each inode. Btrfs and ext4 > already use this functionality. So in principle the functionality is there, > it is "just" a question of proper user interfaces or automatic logic to > tune this limit. Yes, I took a quick glance of the current ext4 and btrfs cases weeks ago which use this to fulfill the journal reservation for example. but considering that specific memory overhead use cases (to limit maximum large folio order for small devices), it sounds more like a generic page cache user interface for all filesystems instead, and in the effective maximum order should combine these two maximum numbers. Thanks, Gao Xiang > > Honza |
From: Qu W. <quw...@gm...> - 2025-07-21 11:37:28
|
在 2025/7/21 19:55, Jan Kara 写道: > On Mon 21-07-25 11:14:02, Gao Xiang wrote: >> Hi Barry, >> >> On 2025/7/21 09:02, Barry Song wrote: >>> On Wed, Jul 16, 2025 at 8:28 AM Gao Xiang <hsi...@li...> wrote: [...] >>> Given the difficulty of allocating large folios, it's always a good >>> idea to have order-0 as a fallback. While I agree with your point, >>> I have a slightly different perspective — enabling large folios for >>> those devices might be beneficial, but the maximum order should >>> remain small. I'm referring to "small" large folios. >> >> Yeah, agreed. Having a way to limit the maximum order for those small >> devices (rather than disabling it completely) would be helpful. At >> least "small" large folios could still provide benefits when memory >> pressure is light. > > Well, in the page cache you can tune not only the minimum but also the > maximum order of a folio being allocated for each inode. Btrfs and ext4 > already use this functionality. So in principle the functionality is there, > it is "just" a question of proper user interfaces or automatic logic to > tune this limit. > > Honza And enabling large folios doesn't mean all fs operations will grab an unnecessarily large folio. For buffered write, all those filesystem will only try to get folios as large as necessary, not overly large. This means if the user space program is always doing buffered IO in a power-of-two unit (and aligned offset of course), the folio size will match the buffer size perfectly (if we have enough memory). So for properly aligned buffered writes, large folios won't really cause unnecessarily large folios, meanwhile brings all the benefits. Although I'm not familiar enough with filemap to comment on folio read and readahead... Thanks, Qu |
From: Jan K. <ja...@su...> - 2025-07-21 10:26:10
|
On Mon 21-07-25 11:14:02, Gao Xiang wrote: > Hi Barry, > > On 2025/7/21 09:02, Barry Song wrote: > > On Wed, Jul 16, 2025 at 8:28 AM Gao Xiang <hsi...@li...> wrote: > > > > > ... > > > > > > > ... high-order folios can cause side effects on embedded devices > > > like routers and IoT devices, which still have MiBs of memory (and I > > > believe this won't change due to their use cases) but they also use > > > Linux kernel for quite long time. In short, I don't think enabling > > > large folios for those devices is very useful, let alone limiting > > > the minimum folio order for them (It would make the filesystem not > > > suitable any more for those users. At least that is what I never > > > want to do). And I believe this is different from the current LBS > > > support to match hardware characteristics or LBS atomic write > > > requirement. > > > > Given the difficulty of allocating large folios, it's always a good > > idea to have order-0 as a fallback. While I agree with your point, > > I have a slightly different perspective — enabling large folios for > > those devices might be beneficial, but the maximum order should > > remain small. I'm referring to "small" large folios. > > Yeah, agreed. Having a way to limit the maximum order for those small > devices (rather than disabling it completely) would be helpful. At > least "small" large folios could still provide benefits when memory > pressure is light. Well, in the page cache you can tune not only the minimum but also the maximum order of a folio being allocated for each inode. Btrfs and ext4 already use this functionality. So in principle the functionality is there, it is "just" a question of proper user interfaces or automatic logic to tune this limit. Honza -- Jan Kara <ja...@su...> SUSE Labs, CR |
From: <bug...@ke...> - 2025-07-21 06:50:49
|
https://bugzilla.kernel.org/show_bug.cgi?id=220321 --- Comment #5 from SEO HOYOUNG (hy5...@sa...) --- Hi, I uploaded to mainline fix patch. But I do not know it is right. https://lore.kernel.org/linux-scsi/202...@sa.../T/#u I thought of another way, how about below it? How about it to change "flush_delayed_work" to "cancel_work_sync" or "cancel_delayed_work_sync". Then it will be wait until writeback workqueue done. And "quota_release_work" function will queueing to events_unbound. Because if "cancel_work_sync" is called, the second argument of "__flsuh work" is called as true, and "check_flush_dependency" will be return normally, so it is unlikely that there will be a problem. -- You may reply to this email to add a comment. You are receiving this mail because: You are watching the assignee of the bug. |
From: yohan.joung <yoh...@sk...> - 2025-07-21 05:41:54
|
pinfile is excluded as it operates with direct I/O Signed-off-by: yohan.joung <yoh...@sk...> --- fs/f2fs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4039ccb5022c..cac8c9650a7a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4844,7 +4844,8 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) /* In LFS mode, if there is inflight dio, wait for its completion */ if (f2fs_lfs_mode(F2FS_I_SB(inode)) && - get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && + !f2fs_is_pinned_file(inode)) inode_dio_wait(inode); if (f2fs_should_use_dio(inode, iocb, to)) { -- 2.33.0 |
From: Gao X. <hsi...@li...> - 2025-07-21 03:14:22
|
Hi Barry, On 2025/7/21 09:02, Barry Song wrote: > On Wed, Jul 16, 2025 at 8:28 AM Gao Xiang <hsi...@li...> wrote: >> ... >> >> ... high-order folios can cause side effects on embedded devices >> like routers and IoT devices, which still have MiBs of memory (and I >> believe this won't change due to their use cases) but they also use >> Linux kernel for quite long time. In short, I don't think enabling >> large folios for those devices is very useful, let alone limiting >> the minimum folio order for them (It would make the filesystem not >> suitable any more for those users. At least that is what I never >> want to do). And I believe this is different from the current LBS >> support to match hardware characteristics or LBS atomic write >> requirement. > > Given the difficulty of allocating large folios, it's always a good > idea to have order-0 as a fallback. While I agree with your point, > I have a slightly different perspective — enabling large folios for > those devices might be beneficial, but the maximum order should > remain small. I'm referring to "small" large folios. Yeah, agreed. Having a way to limit the maximum order for those small devices (rather than disabling it completely) would be helpful. At least "small" large folios could still provide benefits when memory pressure is light. Thanks, Gao Xiang > > Still, even with those, allocation can be difficult — especially > since so many other allocations (which aren't large folios) can cause > fragmentation. So having order-0 as a fallback remains important. > > It seems we're missing a mechanism to enable "small" large folios > for files. For anon large folios, we do have sysfs knobs—though they > don’t seem to be universally appreciated. :-) > > Thanks > Barry |
From: Chao Yu <ch...@ke...> - 2025-07-21 02:02:47
|
Commit 0638a3197c19 ("f2fs: avoid unused block when dio write in LFS mode") has fixed unused block issue for dio write in lfs mode. However, f2fs_map_blocks() may break and return smaller extent when last allocated block locates in the end of section, even allocator can allocate contiguous blocks across sections. Actually, for the case that allocator returns a block address which is not contiguous w/ current extent, we can record the block address in iomap->private, in the next round, skip reallocating for the last allocated block, then we can fix unused block issue, meanwhile, also, we can allocates contiguous physical blocks as much as possible for dio write in lfs mode. Testcase: - mkfs.f2fs -f /dev/vdb - mount -o mode=lfs /dev/vdb /mnt/f2fs - dd if=/dev/zero of=/mnt/f2fs/file bs=1M count=3; sync; - dd if=/dev/zero of=/mnt/f2fs/dio bs=2M count=1 oflag=direct; - umount /mnt/f2fs Before: f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 0, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 256, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 512, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 5, file offset = 0, start blkaddr = 0x4700, len = 0x100, flags = 3, seg_type = 1, may_create = 1, multidevice = 0, flag = 3, err = 0 f2fs_map_blocks: dev = (253,16), ino = 5, file offset = 256, start blkaddr = 0x4800, len = 0x100, flags = 3, seg_type = 1, may_create = 1, multidevice = 0, flag = 3, err = 0 After: f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 0, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 256, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 4, file offset = 512, start blkaddr = 0x0, len = 0x100, flags = 1, seg_type = 8, may_create = 1, multidevice = 0, flag = 5, err = 0 f2fs_map_blocks: dev = (253,16), ino = 5, file offset = 0, start blkaddr = 0x4700, len = 0x200, flags = 3, seg_type = 1, may_create = 1, multidevice = 0, flag = 3, err = 0 Cc: Daejun Park <dae...@sa...> Signed-off-by: Chao Yu <ch...@ke...> --- fs/f2fs/data.c | 28 ++++++++++++++++++---------- fs/f2fs/f2fs.h | 1 + 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d1a2616d41be..4e62f7f00b70 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1550,10 +1550,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) unsigned int start_pgofs; int bidx = 0; bool is_hole; + bool lfs_dio_write; if (!maxblocks) return 0; + lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && + map->m_may_create); + if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) goto out; @@ -1600,7 +1604,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) /* use out-place-update for direct IO under LFS mode */ if (map->m_may_create && (is_hole || (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && - !f2fs_is_pinned_file(inode)))) { + !f2fs_is_pinned_file(inode) && map->m_last_pblk != blkaddr))) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto sync_out; @@ -1684,10 +1688,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) if (map->m_multidev_dio) map->m_bdev = FDEV(bidx).bdev; + + if (lfs_dio_write) + map->m_last_pblk = NULL_ADDR; } else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) { ofs++; map->m_len++; } else { + if (lfs_dio_write && !f2fs_is_pinned_file(inode)) + map->m_last_pblk = blkaddr; goto sync_out; } @@ -1712,14 +1721,6 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) dn.ofs_in_node = end_offset; } - if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && - map->m_may_create) { - /* the next block to be allocated may not be contiguous. */ - if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) == - CAP_BLKS_PER_SEC(sbi) - 1) - goto sync_out; - } - if (pgofs >= end) goto sync_out; else if (dn.ofs_in_node < end_offset) @@ -4162,7 +4163,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { - struct f2fs_map_blocks map = {}; + struct f2fs_map_blocks map = { NULL, }; pgoff_t next_pgofs = 0; int err; @@ -4171,6 +4172,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, map.m_next_pgofs = &next_pgofs; map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), inode->i_write_hint); + if (flags & IOMAP_WRITE && iomap->private) { + map.m_last_pblk = (unsigned long)iomap->private; + iomap->private = NULL; + } /* * If the blocks being overwritten are already allocated, @@ -4209,6 +4214,9 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, iomap->flags |= IOMAP_F_MERGED; iomap->bdev = map.m_bdev; iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk); + + if (flags & IOMAP_WRITE && map.m_last_pblk) + iomap->private = (void *)map.m_last_pblk; } else { if (flags & IOMAP_WRITE) return -ENOTBLK; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dfddb66910b3..97c1a2a3fbd7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -732,6 +732,7 @@ struct f2fs_map_blocks { block_t m_lblk; unsigned int m_len; unsigned int m_flags; + unsigned long m_last_pblk; /* last allocated block, only used for DIO in LFS mode */ pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ pgoff_t *m_next_extent; /* point to next possible extent */ int m_seg_type; -- 2.49.0 |
From: Barry S. <21...@gm...> - 2025-07-21 01:03:09
|
On Wed, Jul 16, 2025 at 8:28 AM Gao Xiang <hsi...@li...> wrote: > > > > On 2025/7/16 07:32, Gao Xiang wrote: > > Hi Matthew, > > > > On 2025/7/16 04:40, Matthew Wilcox wrote: > >> I've started looking at how the page cache can help filesystems handle > >> compressed data better. Feedback would be appreciated! I'll probably > >> say a few things which are obvious to anyone who knows how compressed > >> files work, but I'm trying to be explicit about my assumptions. > >> > >> First, I believe that all filesystems work by compressing fixed-size > >> plaintext into variable-sized compressed blocks. This would be a good > >> point to stop reading and tell me about counterexamples. > > > > At least the typical EROFS compresses variable-sized plaintext (at least > > one block, e.g. 4k, but also 4k+1, 4k+2, ...) into fixed-sized compressed > > blocks for efficient I/Os, which is really useful for small compression > > granularity (e.g. 4KiB, 8KiB) because use cases like Android are usually > > under memory pressure so large compression granularity is almost > > unacceptable in the low memory scenarios, see: > > https://erofs.docs.kernel.org/en/latest/design.html > > > > Currently EROFS works pretty well on these devices and has been > > successfully deployed in billions of real devices. > > > >> > >> From what I've been reading in all your filesystems is that you want to > >> allocate extra pages in the page cache in order to store the excess data > >> retrieved along with the page that you're actually trying to read. That's > >> because compressing in larger chunks leads to better compression. > >> > >> There's some discrepancy between filesystems whether you need scratch > >> space for decompression. Some filesystems read the compressed data into > >> the pagecache and decompress in-place, while other filesystems read the > >> compressed data into scratch pages and decompress into the page cache. > >> > >> There also seems to be some discrepancy between filesystems whether the > >> decompression involves vmap() of all the memory allocated or whether the > >> decompression routines can handle doing kmap_local() on individual pages. > >> > >> So, my proposal is that filesystems tell the page cache that their minimum > >> folio size is the compression block size. That seems to be around 64k, > >> so not an unreasonable minimum allocation size. That removes all the > >> extra code in filesystems to allocate extra memory in the page cache.> It means we don't attempt to track dirtiness at a sub-folio granularity > >> (there's no point, we have to write back the entire compressed bock > >> at once). We also get a single virtually contiguous block ... if you're > >> willing to ditch HIGHMEM support. Or there's a proposal to introduce a > >> vmap_file() which would give us a virtually contiguous chunk of memory > >> (and could be trivially turned into a noop for the case of trying to > >> vmap a single large folio). > > > > I don't see this will work for EROFS because EROFS always supports > > variable uncompressed extent lengths and that will break typical > > EROFS use cases and on-disk formats. > > > > Other thing is that large order folios (physical consecutive) will > > caused "increase the latency on UX task with filemap_fault()" > > because of high-order direct reclaims, see: > > https://android-review.googlesource.com/c/kernel/common/+/3692333 > > so EROFS will not set min-order and always support order-0 folios. > > > > I think EROFS will not use this new approach, vmap() interface is > > always the case for us. > > ... high-order folios can cause side effects on embedded devices > like routers and IoT devices, which still have MiBs of memory (and I > believe this won't change due to their use cases) but they also use > Linux kernel for quite long time. In short, I don't think enabling > large folios for those devices is very useful, let alone limiting > the minimum folio order for them (It would make the filesystem not > suitable any more for those users. At least that is what I never > want to do). And I believe this is different from the current LBS > support to match hardware characteristics or LBS atomic write > requirement. Given the difficulty of allocating large folios, it's always a good idea to have order-0 as a fallback. While I agree with your point, I have a slightly different perspective — enabling large folios for those devices might be beneficial, but the maximum order should remain small. I'm referring to "small" large folios. Still, even with those, allocation can be difficult — especially since so many other allocations (which aren't large folios) can cause fragmentation. So having order-0 as a fallback remains important. It seems we're missing a mechanism to enable "small" large folios for files. For anon large folios, we do have sysfs knobs—though they don’t seem to be universally appreciated. :-) Thanks Barry |
From: Barry S. <21...@gm...> - 2025-07-21 00:44:14
|
On Wed, Jul 16, 2025 at 7:32 AM Gao Xiang <hsi...@li...> wrote: [...] > > I don't see this will work for EROFS because EROFS always supports > variable uncompressed extent lengths and that will break typical > EROFS use cases and on-disk formats. > > Other thing is that large order folios (physical consecutive) will > caused "increase the latency on UX task with filemap_fault()" > because of high-order direct reclaims, see: > https://android-review.googlesource.com/c/kernel/common/+/3692333 > so EROFS will not set min-order and always support order-0 folios. Regarding Hailong's Android hook, it's essentially a complaint about the GFP mask used to allocate large folios for files. I'm wondering why the page cache hasn't adopted the same approach that's used for anon large folios: gfp = vma_thp_gfp_mask(vma); Another concern might be that the allocation order is too large, which could lead to memory fragmentation and waste. Ideally, we'd have "small" large folios—say, with order <= 4—to strike a better balance. > > I think EROFS will not use this new approach, vmap() interface is > always the case for us. > > Thanks, > Gao Xiang > > > > Thanks Barry |
From: Daeho J. <da...@gm...> - 2025-07-18 22:04:48
|
From: Daeho Jeong <dae...@go...> Otherwise F2FS will not do GC in background in low free section. Signed-off-by: Daeho Jeong <dae...@go...> --- fs/f2fs/gc.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 0d7703e7f9e0..08eead027648 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -391,14 +391,15 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) } static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, - unsigned int segno, struct victim_sel_policy *p) + unsigned int segno, struct victim_sel_policy *p, + unsigned int valid_thresh_ratio) { if (p->alloc_mode == SSR) return get_seg_entry(sbi, segno)->ckpt_valid_blocks; - if (p->one_time_gc && (get_valid_blocks(sbi, segno, true) >= - CAP_BLKS_PER_SEC(sbi) * sbi->gc_thread->valid_thresh_ratio / - 100)) + if (p->one_time_gc && (valid_thresh_ratio < 100) && + (get_valid_blocks(sbi, segno, true) >= + CAP_BLKS_PER_SEC(sbi) * valid_thresh_ratio / 100)) return UINT_MAX; /* alloc_mode == LFS */ @@ -779,6 +780,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, unsigned int secno, last_victim; unsigned int last_segment; unsigned int nsearched; + unsigned int valid_thresh_ratio = 100; bool is_atgc; int ret = 0; @@ -788,7 +790,11 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, p.alloc_mode = alloc_mode; p.age = age; p.age_threshold = sbi->am.age_threshold; - p.one_time_gc = one_time; + if (one_time) { + p.one_time_gc = one_time; + if (has_enough_free_secs(sbi, 0, NR_PERSISTENT_LOG)) + valid_thresh_ratio = sbi->gc_thread->valid_thresh_ratio; + } retry: select_policy(sbi, gc_type, type, &p); @@ -914,7 +920,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, goto next; } - cost = get_gc_cost(sbi, segno, &p); + cost = get_gc_cost(sbi, segno, &p, valid_thresh_ratio); if (p.min_cost > cost) { p.min_segno = segno; -- 2.50.0.727.gbf7dc18ff4-goog |
From: Daeho J. <da...@gm...> - 2025-07-18 21:50:14
|
From: Daeho Jeong <dae...@go...> Add this to control GC algorithm for boost GC. Signed-off-by: Daeho Jeong <dae...@go...> --- v2: use GC_GREEDY instead of 1 --- Documentation/ABI/testing/sysfs-fs-f2fs | 8 +++++++- fs/f2fs/gc.c | 3 ++- fs/f2fs/gc.h | 1 + fs/f2fs/sysfs.c | 16 ++++++++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 931c1f63aa2e..2158055cd9d1 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -866,6 +866,12 @@ What: /sys/fs/f2fs/<disk>/gc_boost_gc_multiple Date: June 2025 Contact: "Daeho Jeong" <dae...@go...> Description: Set a multiplier for the background GC migration window when F2FS GC is - boosted. + boosted. the range should be from 1 to the segment count in a section. Default: 5 +What: /sys/fs/f2fs/<disk>/gc_boost_gc_greedy +Date: June 2025 +Contact: "Daeho Jeong" <dae...@go...> +Description: Control GC algorithm for boost GC. 0: cost benefit, 1: greedy + Default: 1 + diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index de7e59bc0906..0d7703e7f9e0 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -141,7 +141,7 @@ static int gc_thread_func(void *data) FOREGROUND : BACKGROUND); sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) || - gc_control.one_time; + (gc_control.one_time && gc_th->boost_gc_greedy); /* foreground GC was been triggered via f2fs_balance_fs() */ if (foreground) @@ -198,6 +198,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO; gc_th->boost_gc_multiple = BOOST_GC_MULTIPLE; + gc_th->boost_gc_greedy = GC_GREEDY; if (f2fs_sb_has_blkzoned(sbi)) { gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index efa1968810a0..1a2e7a84b59f 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -69,6 +69,7 @@ struct f2fs_gc_kthread { unsigned int boost_zoned_gc_percent; unsigned int valid_thresh_ratio; unsigned int boost_gc_multiple; + unsigned int boost_gc_greedy; }; struct gc_inode_list { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index b0270b1c939c..3a52f51ee3c6 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -824,6 +824,20 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "gc_boost_gc_multiple")) { + if (t < 1 || t > SEGS_PER_SEC(sbi)) + return -EINVAL; + sbi->gc_thread->boost_gc_multiple = (unsigned int)t; + return count; + } + + if (!strcmp(a->attr.name, "gc_boost_gc_greedy")) { + if (t > GC_GREEDY) + return -EINVAL; + sbi->gc_thread->boost_gc_greedy = (unsigned int)t; + return count; + } + *ui = (unsigned int)t; return count; @@ -1051,6 +1065,7 @@ GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio); GC_THREAD_RW_ATTR(gc_boost_gc_multiple, boost_gc_multiple); +GC_THREAD_RW_ATTR(gc_boost_gc_greedy, boost_gc_greedy); /* SM_INFO ATTR */ SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); @@ -1222,6 +1237,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_boost_zoned_gc_percent), ATTR_LIST(gc_valid_thresh_ratio), ATTR_LIST(gc_boost_gc_multiple), + ATTR_LIST(gc_boost_gc_greedy), ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), -- 2.50.0.727.gbf7dc18ff4-goog |
From: Daeho J. <da...@gm...> - 2025-07-18 21:40:32
|
From: Daeho Jeong <dae...@go...> Add a sysfs knob to set a multiplier for the background GC migration window when F2FS Garbage Collection is boosted. Signed-off-by: Daeho Jeong <dae...@go...> --- v2: limit the available value range --- Documentation/ABI/testing/sysfs-fs-f2fs | 8 ++++++++ fs/f2fs/gc.c | 3 ++- fs/f2fs/gc.h | 1 + fs/f2fs/sysfs.c | 2 ++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index bf03263b9f46..931c1f63aa2e 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -861,3 +861,11 @@ Description: This is a read-only entry to show the value of sb.s_encoding_flags, SB_ENC_STRICT_MODE_FL 0x00000001 SB_ENC_NO_COMPAT_FALLBACK_FL 0x00000002 ============================ ========== + +What: /sys/fs/f2fs/<disk>/gc_boost_gc_multiple +Date: June 2025 +Contact: "Daeho Jeong" <dae...@go...> +Description: Set a multiplier for the background GC migration window when F2FS GC is + boosted. + Default: 5 + diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 3cb5242f4ddf..de7e59bc0906 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -197,6 +197,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO; + gc_th->boost_gc_multiple = BOOST_GC_MULTIPLE; if (f2fs_sb_has_blkzoned(sbi)) { gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; @@ -1749,7 +1750,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, !has_enough_free_blocks(sbi, sbi->gc_thread->boost_zoned_gc_percent)) window_granularity *= - BOOST_GC_MULTIPLE; + sbi->gc_thread->boost_gc_multiple; end_segno = start_segno + window_granularity; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 5c1eaf55e127..efa1968810a0 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -68,6 +68,7 @@ struct f2fs_gc_kthread { unsigned int no_zoned_gc_percent; unsigned int boost_zoned_gc_percent; unsigned int valid_thresh_ratio; + unsigned int boost_gc_multiple; }; struct gc_inode_list { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 75134d69a0bd..b0270b1c939c 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1050,6 +1050,7 @@ GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio); +GC_THREAD_RW_ATTR(gc_boost_gc_multiple, boost_gc_multiple); /* SM_INFO ATTR */ SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); @@ -1220,6 +1221,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_no_zoned_gc_percent), ATTR_LIST(gc_boost_zoned_gc_percent), ATTR_LIST(gc_valid_thresh_ratio), + ATTR_LIST(gc_boost_gc_multiple), ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), -- 2.50.0.727.gbf7dc18ff4-goog |
From: <pat...@ke...> - 2025-07-18 20:19:51
|
Hello: The following patches were marked "accepted", because they were applied to jaegeuk/f2fs.git (dev): Patch: [f2fs-dev] f2fs: fix to avoid out-of-boundary access in dnode page Submitter: Chao Yu <ch...@ke...> Committer: Jaegeuk Kim <ja...@ke...> Patchwork: https://patchwork.kernel.org/project/f2fs/list/?series=983421 Lore link: https://lore.kernel.org/r/202...@ke... Total patches: 1 -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/patchwork/pwbot.html |