You can subscribe to this list here.
2004 |
Jan
|
Feb
|
Mar
(2) |
Apr
(2) |
May
(5) |
Jun
|
Jul
|
Aug
(3) |
Sep
(4) |
Oct
(3) |
Nov
(2) |
Dec
(16) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2005 |
Jan
(36) |
Feb
(3) |
Mar
|
Apr
|
May
(2) |
Jun
|
Jul
|
Aug
|
Sep
(5) |
Oct
(6) |
Nov
(9) |
Dec
(5) |
2006 |
Jan
|
Feb
(1) |
Mar
(43) |
Apr
(7) |
May
(4) |
Jun
(11) |
Jul
(4) |
Aug
(13) |
Sep
(3) |
Oct
(14) |
Nov
(8) |
Dec
(7) |
2007 |
Jan
(5) |
Feb
(9) |
Mar
(5) |
Apr
|
May
(9) |
Jun
(7) |
Jul
(12) |
Aug
(13) |
Sep
(10) |
Oct
(11) |
Nov
(3) |
Dec
(3) |
2008 |
Jan
(22) |
Feb
(17) |
Mar
(3) |
Apr
(8) |
May
(8) |
Jun
(25) |
Jul
(9) |
Aug
(46) |
Sep
(15) |
Oct
|
Nov
(3) |
Dec
(5) |
2009 |
Jan
(11) |
Feb
(3) |
Mar
(21) |
Apr
(18) |
May
(1) |
Jun
(6) |
Jul
(10) |
Aug
(1) |
Sep
(4) |
Oct
(8) |
Nov
(2) |
Dec
|
2010 |
Jan
|
Feb
(7) |
Mar
(8) |
Apr
(24) |
May
(24) |
Jun
|
Jul
(1) |
Aug
|
Sep
(8) |
Oct
(1) |
Nov
(9) |
Dec
(1) |
2011 |
Jan
|
Feb
(2) |
Mar
(16) |
Apr
(8) |
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(1) |
Nov
|
Dec
|
2012 |
Jan
(1) |
Feb
(1) |
Mar
(5) |
Apr
(1) |
May
(7) |
Jun
(8) |
Jul
(14) |
Aug
(2) |
Sep
(2) |
Oct
|
Nov
|
Dec
|
2013 |
Jan
|
Feb
(12) |
Mar
(2) |
Apr
(2) |
May
|
Jun
|
Jul
(4) |
Aug
(1) |
Sep
(1) |
Oct
|
Nov
(1) |
Dec
|
2014 |
Jan
|
Feb
(1) |
Mar
|
Apr
(6) |
May
(1) |
Jun
(1) |
Jul
|
Aug
|
Sep
(26) |
Oct
|
Nov
|
Dec
(1) |
2015 |
Jan
(2) |
Feb
(1) |
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(1) |
Nov
(1) |
Dec
(5) |
2016 |
Jan
|
Feb
(1) |
Mar
|
Apr
(1) |
May
|
Jun
|
Jul
(1) |
Aug
|
Sep
|
Oct
|
Nov
(2) |
Dec
|
2017 |
Jan
|
Feb
|
Mar
|
Apr
(3) |
May
(5) |
Jun
(8) |
Jul
(11) |
Aug
(42) |
Sep
(1) |
Oct
(4) |
Nov
(4) |
Dec
(3) |
2018 |
Jan
|
Feb
(1) |
Mar
|
Apr
(3) |
May
|
Jun
(4) |
Jul
|
Aug
|
Sep
|
Oct
(1) |
Nov
(1) |
Dec
(1) |
2019 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
(3) |
Jul
(1) |
Aug
(2) |
Sep
|
Oct
(1) |
Nov
|
Dec
|
2020 |
Jan
|
Feb
|
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2022 |
Jan
|
Feb
|
Mar
(3) |
Apr
(1) |
May
(65) |
Jun
(3) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2023 |
Jan
|
Feb
|
Mar
(1) |
Apr
|
May
(4) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(4) |
Dec
|
2024 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(1) |
Jun
|
Jul
|
Aug
(4) |
Sep
|
Oct
|
Nov
|
Dec
|
From: Li Z. <liz...@hu...> - 2024-08-17 10:04:50
|
convert to use folio, so that we can get rid of 'page->index' to prepare for removal of 'index' field in structure page [1]. [1]: https://lore.kernel.org/all/Zp8...@ca.../ Cc: Matthew Wilcox <wi...@in...> Signed-off-by: Li Zetao <liz...@hu...> --- fs/squashfs/file.c | 31 ++++++++++++++++--------------- fs/squashfs/file_cache.c | 2 +- fs/squashfs/squashfs.h | 2 +- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index a8c1e7f9a609..893043ecf973 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -378,13 +378,13 @@ void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, } /* Copy data into page cache */ -void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, +void squashfs_copy_cache(struct folio *folio, struct squashfs_cache_entry *buffer, int bytes, int offset) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio_inode(folio); struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; - int start_index = page->index & ~mask, end_index = start_index | mask; + int start_index = folio->index & ~mask, end_index = start_index | mask; /* * Loop copying datablock into pages. As the datablock likely covers @@ -394,25 +394,26 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, */ for (i = start_index; i <= end_index && bytes > 0; i++, bytes -= PAGE_SIZE, offset += PAGE_SIZE) { - struct page *push_page; + struct folio *push_folio; int avail = buffer ? min_t(int, bytes, PAGE_SIZE) : 0; TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); - push_page = (i == page->index) ? page : - grab_cache_page_nowait(page->mapping, i); - - if (!push_page) + push_folio = (i == folio->index) ? folio : + __filemap_get_folio(folio->mapping, i, + FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT, + mapping_gfp_mask(folio->mapping)); + if (IS_ERR(push_folio)) continue; - if (PageUptodate(push_page)) + if (folio_test_uptodate(push_folio)) goto skip_page; - squashfs_fill_page(push_page, buffer, offset, avail); + squashfs_fill_page(folio_file_page(push_folio, i), buffer, offset, avail); skip_page: - unlock_page(push_page); - if (i != page->index) - put_page(push_page); + folio_unlock(push_folio); + if (i != folio->index) + folio_put(push_folio); } } @@ -430,7 +431,7 @@ static int squashfs_readpage_fragment(struct page *page, int expected) squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); else - squashfs_copy_cache(page, buffer, expected, + squashfs_copy_cache(page_folio(page), buffer, expected, squashfs_i(inode)->fragment_offset); squashfs_cache_put(buffer); @@ -439,7 +440,7 @@ static int squashfs_readpage_fragment(struct page *page, int expected) static int squashfs_readpage_sparse(struct page *page, int expected) { - squashfs_copy_cache(page, NULL, expected, 0); + squashfs_copy_cache(page_folio(page), NULL, expected, 0); return 0; } diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c index 54c17b7c85fd..23d585025882 100644 --- a/fs/squashfs/file_cache.c +++ b/fs/squashfs/file_cache.c @@ -29,7 +29,7 @@ int squashfs_readpage_block(struct page *page, u64 block, int bsize, int expecte ERROR("Unable to read page, block %llx, size %x\n", block, bsize); else - squashfs_copy_cache(page, buffer, expected, 0); + squashfs_copy_cache(page_folio(page), buffer, expected, 0); squashfs_cache_put(buffer); return res; diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 5a756e6790b5..a31bd5e9c8bf 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -68,7 +68,7 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *, /* file.c */ void squashfs_fill_page(struct page *, struct squashfs_cache_entry *, int, int); -void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, +void squashfs_copy_cache(struct folio *, struct squashfs_cache_entry *, int, int); /* file_xxx.c */ -- 2.34.1 |
From: Li Z. <liz...@hu...> - 2024-08-17 10:04:49
|
In order to remote page APIs and index field in structure page [1], this patch convert squashfs_read_folio to use folio APIs, switch from kmap_atomic to kmap_local and use folio_end_read() to set uptodate and unlock folio. [1]: https://lore.kernel.org/all/Zp8...@ca.../ Cc: Matthew Wilcox <wi...@in...> Signed-off-by: Li Zetao <liz...@hu...> --- fs/squashfs/file.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 893043ecf973..8c80d77a6115 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -446,10 +446,9 @@ static int squashfs_readpage_sparse(struct page *page, int expected) static int squashfs_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - struct inode *inode = page->mapping->host; + struct inode *inode = folio_inode(folio); struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int index = page->index >> (msblk->block_log - PAGE_SHIFT); + int index = folio->index >> (msblk->block_log - PAGE_SHIFT); int file_end = i_size_read(inode) >> msblk->block_log; int expected = index == file_end ? (i_size_read(inode) & (msblk->block_size - 1)) : @@ -457,10 +456,10 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) int res = 0; void *pageaddr; - TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", - page->index, squashfs_i(inode)->start); + TRACE("Entered squashfs_readpage, folio index %lx, start block %llx\n", + folio->index, squashfs_i(inode)->start); - if (page->index >= ((i_size_read(inode) + PAGE_SIZE - 1) >> + if (folio->index >= ((i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT)) goto out; @@ -473,23 +472,21 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) goto out; if (res == 0) - res = squashfs_readpage_sparse(page, expected); + res = squashfs_readpage_sparse(&folio->page, expected); else - res = squashfs_readpage_block(page, block, res, expected); + res = squashfs_readpage_block(&folio->page, block, res, expected); } else - res = squashfs_readpage_fragment(page, expected); + res = squashfs_readpage_fragment(&folio->page, expected); if (!res) return 0; out: - pageaddr = kmap_atomic(page); + pageaddr = kmap_local_folio(folio, 0); memset(pageaddr, 0, PAGE_SIZE); - kunmap_atomic(pageaddr); - flush_dcache_page(page); - if (res == 0) - SetPageUptodate(page); - unlock_page(page); + kunmap_local(pageaddr); + flush_dcache_folio(folio); + folio_end_read(folio, res == 0); return res; } -- 2.34.1 |
From: Phillip L. <ph...@sq...> - 2024-08-04 22:51:22
|
On 04/08/2024 22:20, Al Viro wrote: > On Sun, Aug 04, 2024 at 10:16:05PM +0100, Phillip Lougher wrote: > >> NACK. I see no reason to introduce an intermediate variable here. >> >> Please do what Al Viro suggested. > > Alternatively, just check ->i_size after assignment. loff_t is > always a 64bit signed; le32_to_cpu() returns 32bit unsigned. > Conversion from u32 to s64 is always going to yield a non-negative > result; comparison with PAGE_SIZE is all you need there. I'm happy with that as well. Thanks Phillip |
From: Phillip L. <ph...@sq...> - 2024-08-04 21:35:22
|
On 03/08/2024 08:43, Lizhi Xu wrote: > syzbot report KMSAN: uninit-value in pick_link, the root cause is that > squashfs_symlink_read_folio did not check the length, resulting in folio > not being initialized and did not return the corresponding error code. > > The length is calculated from i_size, this case is about symlink, so i_size > value is derived from symlink_size, so it is necessary to add a check to > confirm that symlink_size value is valid, otherwise an error -EINVAL will > be returned. > > If symlink_size is too large, it may result in a negative value when > calculating length in squashfs_symlink_read_folio due to int overflow, > and its value must be greater than PAGE_SIZE at this time. > > Reported-and-tested-by: syz...@sy... > Closes: https://syzkaller.appspot.com/bug?extid=24ac24ff58dc5b0d26b9 > Signed-off-by: Lizhi Xu <liz...@wi...> > --- > fs/squashfs/inode.c | 9 ++++++++- > 1 file changed, 8 insertions(+), 1 deletion(-) > > diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c > index 16bd693d0b3a..bed6764e4461 100644 > --- a/fs/squashfs/inode.c > +++ b/fs/squashfs/inode.c > @@ -273,14 +273,21 @@ int squashfs_read_inode(struct inode *inode, long long ino) > case SQUASHFS_SYMLINK_TYPE: > case SQUASHFS_LSYMLINK_TYPE: { > struct squashfs_symlink_inode *sqsh_ino = &squashfs_ino.symlink; > + loff_t symlink_size; > > err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, > sizeof(*sqsh_ino)); > if (err < 0) > goto failed_read; > > + symlink_size = le32_to_cpu(sqsh_ino->symlink_size); > + if (symlink_size > PAGE_SIZE) { > + ERROR("Corrupted symlink, size [%llu]\n", symlink_size); > + return -EINVAL; > + } > + > set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); > - inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); > + inode->i_size = symlink_size; NACK. I see no reason to introduce an intermediate variable here. Please do what Al Viro suggested. Thanks Phillip Lougher -- Squashfs author and maintainer BTW I have been on vacation since last week, and only saw this today. > inode->i_op = &squashfs_symlink_inode_ops; > inode_nohighmem(inode); > inode->i_data.a_ops = &squashfs_symlink_aops; |
From: C. A. M. <an...@ma...> - 2024-05-16 19:21:30
|
The metadata cache size was hard-coded to 8 metadata blocks. A large parallel workload can cause a lot of spinlock thrashing in squashfs_cache_get if the number of metadata blocks is smaller than the number of parallel metadata reads, as the decompression time can keep the metadata cache full, and squashfs_cache_get uses a simple spinlock to synchronize the cache. Allow the cache size to be tuned by adding CONFIG_SQUASHFS_METADATA_CACHE_SIZE which defaults to the old hard-coded value of 8. A good setting for systems with plenty of memory would be something a big larger than the expected number of parallel readers on a single squashfs. For highly memory constrained systems, a smaller setting may be appropriate. This issue was discovered on an embedded where a large performance drop in boot times was noticed when the system from an 8 core (4 physical) machine to a 16 core (8 physical) machine. It was discovered that much CPU time was being spun away in the spin_lock call in squashfs_cache_get. This was due to the fact that the metadata cache is fixed at 8 entries, and having more cores allowed more parallel file system walks (which happens to be a part of one of our service start scripts for each of our many parallel services). Because when each cache entry is released all waiting cores are awakened to attempt to grab another cache entry, those cores fight over the spinlock just to find out they are not going to get another cache entry. While this commit isn't a general solution, it does provide a simple way for one to configure their kernel to alleviate the performance issue. A better solution would be to use a less CPU intensive and preemptable synchronization method, and to only wake up one waiter when one cache entry comes up. Others have pointed out this issue: http://lkml.iu.edu/hypermail/linux/kernel/1805.0/01702.html And this is a similar issue, but on the data cache, but points out many of the same technical issues with squashfs_cache_get: https://chrisdown.name/2018/04/17/kernel-adventures-the-curious-case-of-squashfs-stalls.html A simple way to reproduce and measure the time for various parallel workloads (assuming a fairly large number of directories and files in the squashfs): time ( N=16; for ((i=0;i<$N;++i)); \ do find /path/to/mounted/squash/ -print > /dev/null & done; \ for ((i=0;i<$N;++i)); do wait; done) On one system, with N=8, the loop above takes 1 second of elapsed time, but on the same system with N=16, it takes 13 seconds (when 2 would be a reasonable scale up). --- fs/squashfs/Kconfig | 20 ++++++++++++++++++++ fs/squashfs/squashfs_fs.h | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 60fc98bdf4212..311e3141df9af 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -264,3 +264,23 @@ config SQUASHFS_FRAGMENT_CACHE_SIZE Note there must be at least one cached fragment. Anything much more than three will probably not make much difference. + +config SQUASHFS_METADATA_CACHE_SIZE + int "Number of metadata blocks cached" if SQUASHFS_EMBEDDED + depends on SQUASHFS + default "8" + help + By default SquashFS caches the last 8 metadata blocks read from the + filesystem. A metadata block is 8KiB. Increasing this amount may + mean SquashFS has to re-read metadata less often from disk, at the + expense of extra system memory. Decreasing this amount will mean + SquashFS uses less memory at the expense of extra reads from disk. + + Note there must be at least one cached metadata block. A setting too + low with a large parallel workload can cause a lot of spinlock + thrashing in squashfs_cache_get. A good setting for the metadata + cache size is something a bit larger than the number of expected + parallel metadata reads. When booting with multiple services on a + single squashfs on a machine with a lot of cores, a higher setting + than the default will net a large performance improvement by avoiding + spinlock thrashing. diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 95f8e89017689..c4e32358f922c 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -202,7 +202,7 @@ static inline int squashfs_block_size(__le32 raw) #define SQUASHFS_XATTR_OFFSET(A) ((unsigned int) ((A) & 0xffff)) /* cached data constants for filesystem */ -#define SQUASHFS_CACHED_BLKS 8 +#define SQUASHFS_CACHED_BLKS CONFIG_SQUASHFS_METADATA_CACHE_SIZE /* meta index cache */ #define SQUASHFS_META_INDEXES (SQUASHFS_METADATA_SIZE / sizeof(unsigned int)) -- 2.34.1 |
From: Phillip L. <ph...@sq...> - 2023-11-17 02:17:56
|
On 16/11/2023 21:43, Andrew Morton wrote: > On Thu, 16 Nov 2023 11:13:52 +0800 Lizhi Xu <liz...@wi...> wrote: > >> when the length passed in is 0, the subsequent process should be exited. > > Thanks, but when fixing a bug, please always describe the runtime > effects of that bug. Amongst other things, other people need this > information to be able to decide which kernel versions need patching. > >> Reported-by: syz...@sy... > > Which is a reason why we're now adding the "Closes:" tag after > Reported-by:. Which is also one reason why you should always run scripts/checkpatch.pl on your patch. This alerted me to the need for a "Closes:" tag after Reported-by: on the last patch I sent. > > I googled the sysbot email address and so added > > Closes: https://lkml.kernel.org/r/000...@go... > > to the changelog. Thanks. That is indeed the sysbot issue that the patch fixes. > > I'll assume that a -stable kernel backport is needed. > > Yes. Phillip |
From: Phillip L. <ph...@sq...> - 2023-11-16 16:07:35
|
> On 16/11/2023 03:13 GMT Lizhi Xu <liz...@wi...> wrote: > > > when the length passed in is 0, the subsequent process should be exited. > Reproduced and tested. Reviewed-by: Phillip Lougher (ph...@sq...) > Reported-by: syz...@sy... > Signed-off-by: Lizhi Xu <liz...@wi...> > --- > fs/squashfs/block.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c > index 581ce9519339..2dc730800f44 100644 > --- a/fs/squashfs/block.c > +++ b/fs/squashfs/block.c > @@ -321,7 +321,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, > TRACE("Block @ 0x%llx, %scompressed size %d\n", index - 2, > compressed ? "" : "un", length); > } > - if (length < 0 || length > output->length || > + if (length <= 0 || length > output->length || > (index + length) > msblk->bytes_used) { > res = -EIO; > goto out; > -- > 2.25.1 |
From: Phillip L. <ph...@sq...> - 2023-11-16 15:26:16
|
> [Bug] > path_openat() called open_last_lookups() before calling do_open() and > open_last_lookups() will eventually call squashfs_read_inode() to set > inode->i_size, but before setting i_size, it is necessary to obtain file_size > from the disk. > > However, during the value retrieval process, the length of the value retrieved > from the disk was greater than output->length, resulting(-EIO) in the failure of > squashfs_read_data(), further leading to i_size has not been initialized, > i.e. its value is 0. > NACK This analysis is completely *wrong*. First, if there was I/O error reading the inode it would never be created, and squasfs_readahead() would never be called on it, because it will never exist. Second i_size isn't unintialised and it isn't 0 in value. Where you got this bogus information from is because in your test patches, i.e. https://lore.kernel.org/all/000...@go.../ You have + if (!file_end) { + printk("i:%p, is:%d, %s\n", inode, i_size_read(inode), __func__); + res = -EINVAL; + goto out; + } + You have used %d, and the result of i_size_read(inode) overflows, giving the bogus 0 value. The actual value is 1407374883553280, or 0x5000000000000, which is too big to fit into an unsigned int. > This resulted in the failure of squashfs_read_data(), where "SQUASHFS error: > Failed to read block 0x6fc: -5" was output in the syz log. > This also resulted in the failure of squashfs_cache_get(), outputting "SQUASHFS > error: Unable to read metadata cache entry [6fa]" in the syz log. > NO, *that* is caused by the failure to read some other inodes which as a result are correctly not created. Nothing to do with the oops here. > [Fix] > Before performing a read ahead operation in squashfs_read_folio() and > squashfs_readahead(), check if i_size is not 0 before continuing. > A third NO, it is only 0 because the variable overflowed. Additionally, let's look at your "fix" here. > @@ -461,6 +461,11 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) > TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", > page->index, squashfs_i(inode)->start); > > + if (!file_end) { > + res = -EINVAL; > + goto out; > + } > + file_end is computed by int file_end = i_size_read(inode) >> msblk->block_log; So your "fix" will reject *any* file less than msblk->block_log in size as invalid, including perfectly valid zero size files (empty files are valid too). I already identified the cause and send a fix patch here: https://lore.kernel.org/all/202...@sq.../ NACK Phillip |
From: Phillip L. <phi...@gm...> - 2023-11-13 15:28:14
|
On Sun, Nov 12, 2023 at 5:32 AM syzbot <syz...@sy...> wrote: > > Hello, > > syzbot found the following issue on: > > HEAD commit: 13d88ac54ddd Merge tag 'vfs-6.7.fsid' of git://git.kernel... > git tree: upstream > console+strace: https://syzkaller.appspot.com/x/log.txt?x=121965ef680000 > kernel config: https://syzkaller.appspot.com/x/.config?x=beb32a598fd79db9 > dashboard link: https://syzkaller.appspot.com/bug?extid=604424eb051c2f696163 > compiler: Debian clang version 15.0.6, GNU ld (GNU Binutils for Debian) 2.40 > syz repro: https://syzkaller.appspot.com/x/repro.syz?x=12b40c7b680000 > C reproducer: https://syzkaller.appspot.com/x/repro.c?x=10f691ef680000 > > Downloadable assets: > disk image: https://storage.googleapis.com/syzbot-assets/9e81dc4903c2/disk-13d88ac5.raw.xz > vmlinux: https://storage.googleapis.com/syzbot-assets/f40fd7326b3f/vmlinux-13d88ac5.xz > kernel image: https://storage.googleapis.com/syzbot-assets/2f399cd6ff7d/bzImage-13d88ac5.xz > mounted in repro: https://storage.googleapis.com/syzbot-assets/8e127c645a04/mount_0.gz > > The issue was bisected to: > > commit f268eedddf3595e85f8883dc50aed29654785696 > Author: Phillip Lougher <ph...@sq...> > Date: Sat Jun 11 03:21:32 2022 +0000 > > squashfs: extend "page actor" to handle missing pages Fixed the issue, and will send a patch shortly. Phillip > > bisection log: https://syzkaller.appspot.com/x/bisect.txt?x=1252b717680000 > final oops: https://syzkaller.appspot.com/x/report.txt?x=1152b717680000 > console output: https://syzkaller.appspot.com/x/log.txt?x=1652b717680000 > > IMPORTANT: if you fix the issue, please add the following tag to the commit: > Reported-by: syz...@sy... > Fixes: f268eedddf35 ("squashfs: extend "page actor" to handle missing pages") > > SQUASHFS error: Unable to read metadata cache entry [6fa] > SQUASHFS error: Unable to read metadata cache entry [6fa] > SQUASHFS error: Unable to read metadata cache entry [6fa] > ================================================================== > BUG: KASAN: slab-out-of-bounds in __readahead_batch include/linux/pagemap.h:1364 [inline] > BUG: KASAN: slab-out-of-bounds in squashfs_readahead+0x9a6/0x20d0 fs/squashfs/file.c:569 > Write of size 8 at addr ffff88801e393648 by task syz-executor100/5067 > > CPU: 1 PID: 5067 Comm: syz-executor100 Not tainted 6.6.0-syzkaller-15156-g13d88ac54ddd #0 > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 > Call Trace: > <TASK> > __dump_stack lib/dump_stack.c:88 [inline] > dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106 > print_address_description mm/kasan/report.c:364 [inline] > print_report+0x163/0x540 mm/kasan/report.c:475 > kasan_report+0x142/0x170 mm/kasan/report.c:588 > __readahead_batch include/linux/pagemap.h:1364 [inline] > squashfs_readahead+0x9a6/0x20d0 fs/squashfs/file.c:569 > read_pages+0x183/0x830 mm/readahead.c:160 > page_cache_ra_unbounded+0x68e/0x7c0 mm/readahead.c:269 > page_cache_sync_readahead include/linux/pagemap.h:1266 [inline] > filemap_get_pages+0x49c/0x2080 mm/filemap.c:2497 > filemap_read+0x42b/0x10b0 mm/filemap.c:2593 > __kernel_read+0x425/0x8b0 fs/read_write.c:428 > integrity_kernel_read+0xb0/0xf0 security/integrity/iint.c:221 > ima_calc_file_hash_tfm security/integrity/ima/ima_crypto.c:485 [inline] > ima_calc_file_shash security/integrity/ima/ima_crypto.c:516 [inline] > ima_calc_file_hash+0xad1/0x1b30 security/integrity/ima/ima_crypto.c:573 > ima_collect_measurement+0x554/0xb30 security/integrity/ima/ima_api.c:290 > process_measurement+0x1373/0x21c0 security/integrity/ima/ima_main.c:359 > ima_file_check+0xf1/0x170 security/integrity/ima/ima_main.c:557 > do_open fs/namei.c:3624 [inline] > path_openat+0x2893/0x3280 fs/namei.c:3779 > do_filp_open+0x234/0x490 fs/namei.c:3809 > do_sys_openat2+0x13e/0x1d0 fs/open.c:1440 > do_sys_open fs/open.c:1455 [inline] > __do_sys_open fs/open.c:1463 [inline] > __se_sys_open fs/open.c:1459 [inline] > __x64_sys_open+0x225/0x270 fs/open.c:1459 > do_syscall_x64 arch/x86/entry/common.c:51 [inline] > do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 > entry_SYSCALL_64_after_hwframe+0x63/0x6b > RIP: 0033:0x7f0e73d6c5f9 > Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 61 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 > RSP: 002b:00007ffd516ff158 EFLAGS: 00000246 ORIG_RAX: 0000000000000002 > RAX: ffffffffffffffda RBX: 0031656c69662f2e RCX: 00007f0e73d6c5f9 > RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000200000c0 > RBP: 00007f0e73ddf610 R08: 0000000000000225 R09: 0000000000000000 > R10: 00007ffd516ff020 R11: 0000000000000246 R12: 0000000000000001 > R13: 00007ffd516ff328 R14: 0000000000000001 R15: 0000000000000001 > </TASK> > > Allocated by task 5067: > kasan_save_stack mm/kasan/common.c:45 [inline] > kasan_set_track+0x4f/0x70 mm/kasan/common.c:52 > ____kasan_kmalloc mm/kasan/common.c:374 [inline] > __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:383 > kasan_kmalloc include/linux/kasan.h:198 [inline] > __do_kmalloc_node mm/slab_common.c:1007 [inline] > __kmalloc+0xb9/0x230 mm/slab_common.c:1020 > kmalloc_array include/linux/slab.h:637 [inline] > squashfs_readahead+0x30c/0x20d0 fs/squashfs/file.c:552 > read_pages+0x183/0x830 mm/readahead.c:160 > page_cache_ra_unbounded+0x68e/0x7c0 mm/readahead.c:269 > page_cache_sync_readahead include/linux/pagemap.h:1266 [inline] > filemap_get_pages+0x49c/0x2080 mm/filemap.c:2497 > filemap_read+0x42b/0x10b0 mm/filemap.c:2593 > __kernel_read+0x425/0x8b0 fs/read_write.c:428 > integrity_kernel_read+0xb0/0xf0 security/integrity/iint.c:221 > ima_calc_file_hash_tfm security/integrity/ima/ima_crypto.c:485 [inline] > ima_calc_file_shash security/integrity/ima/ima_crypto.c:516 [inline] > ima_calc_file_hash+0xad1/0x1b30 security/integrity/ima/ima_crypto.c:573 > ima_collect_measurement+0x554/0xb30 security/integrity/ima/ima_api.c:290 > process_measurement+0x1373/0x21c0 security/integrity/ima/ima_main.c:359 > ima_file_check+0xf1/0x170 security/integrity/ima/ima_main.c:557 > do_open fs/namei.c:3624 [inline] > path_openat+0x2893/0x3280 fs/namei.c:3779 > do_filp_open+0x234/0x490 fs/namei.c:3809 > do_sys_openat2+0x13e/0x1d0 fs/open.c:1440 > do_sys_open fs/open.c:1455 [inline] > __do_sys_open fs/open.c:1463 [inline] > __se_sys_open fs/open.c:1459 [inline] > __x64_sys_open+0x225/0x270 fs/open.c:1459 > do_syscall_x64 arch/x86/entry/common.c:51 [inline] > do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 > entry_SYSCALL_64_after_hwframe+0x63/0x6b > > The buggy address belongs to the object at ffff88801e393640 > which belongs to the cache kmalloc-8 of size 8 > The buggy address is located 0 bytes to the right of > allocated 8-byte region [ffff88801e393640, ffff88801e393648) > > The buggy address belongs to the physical page: > page:ffffea000078e4c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88801e393230 pfn:0x1e393 > anon flags: 0xfff00000000800(slab|node=0|zone=1|lastcpupid=0x7ff) > page_type: 0xffffffff() > raw: 00fff00000000800 ffff888012c41280 0000000000000000 dead000000000001 > raw: ffff88801e393230 0000000080660063 00000001ffffffff 0000000000000000 > page dumped because: kasan: bad access detected > page_owner tracks the page as allocated > page last allocated via order 0, migratetype Unmovable, gfp_mask 0x12cc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY), pid 1, tgid 1 (swapper/0), ts 10649202635, free_ts 9358727270 > set_page_owner include/linux/page_owner.h:31 [inline] > post_alloc_hook+0x1e6/0x210 mm/page_alloc.c:1537 > prep_new_page mm/page_alloc.c:1544 [inline] > get_page_from_freelist+0x339a/0x3530 mm/page_alloc.c:3312 > __alloc_pages+0x255/0x670 mm/page_alloc.c:4568 > alloc_pages_mpol+0x3de/0x640 mm/mempolicy.c:2133 > alloc_slab_page+0x6a/0x160 mm/slub.c:1870 > allocate_slab mm/slub.c:2017 [inline] > new_slab+0x84/0x2f0 mm/slub.c:2070 > ___slab_alloc+0xc85/0x1310 mm/slub.c:3223 > __slab_alloc mm/slub.c:3322 [inline] > __slab_alloc_node mm/slub.c:3375 [inline] > slab_alloc_node mm/slub.c:3468 [inline] > __kmem_cache_alloc_node+0x21d/0x300 mm/slub.c:3517 > __do_kmalloc_node mm/slab_common.c:1006 [inline] > __kmalloc_node_track_caller+0xa5/0x230 mm/slab_common.c:1027 > kstrdup+0x3a/0x70 mm/util.c:62 > __kernfs_new_node+0x9d/0x870 fs/kernfs/dir.c:611 > kernfs_new_node+0x99/0x170 fs/kernfs/dir.c:679 > kernfs_create_link+0xa5/0x1f0 fs/kernfs/symlink.c:39 > sysfs_do_create_link_sd+0x85/0x100 fs/sysfs/symlink.c:44 > device_create_sys_dev_entry+0x10f/0x170 drivers/base/core.c:3451 > device_add+0x8cf/0xd30 drivers/base/core.c:3595 > page last free stack trace: > reset_page_owner include/linux/page_owner.h:24 [inline] > free_pages_prepare mm/page_alloc.c:1137 [inline] > free_unref_page_prepare+0x92a/0xa50 mm/page_alloc.c:2347 > free_unref_page+0x37/0x3f0 mm/page_alloc.c:2487 > vfree+0x186/0x2e0 mm/vmalloc.c:2842 > delayed_vfree_work+0x56/0x80 mm/vmalloc.c:2763 > process_one_work kernel/workqueue.c:2630 [inline] > process_scheduled_works+0x90f/0x1400 kernel/workqueue.c:2703 > worker_thread+0xa5f/0xff0 kernel/workqueue.c:2784 > kthread+0x2d3/0x370 kernel/kthread.c:388 > ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147 > ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 > > Memory state around the buggy address: > ffff88801e393500: 00 fc fc fc fc fa fc fc fc fc 05 fc fc fc fc 05 > ffff88801e393580: fc fc fc fc 00 fc fc fc fc 05 fc fc fc fc fa fc > >ffff88801e393600: fc fc fc 00 fc fc fc fc 00 fc fc fc fc 00 fc fc > ^ > ffff88801e393680: fc fc 00 fc fc fc fc 00 fc fc fc fc 00 fc fc fc > ffff88801e393700: fc 00 fc fc fc fc fb fc fc fc fc 00 fc fc fc fc > ================================================================== > > > --- > This report is generated by a bot. It may contain errors. > See https://goo.gl/tpsmEJ for more information about syzbot. > syzbot engineers can be reached at syz...@go.... > > syzbot will keep track of this issue. See: > https://goo.gl/tpsmEJ#status for how to communicate with syzbot. > For information about bisection process see: https://goo.gl/tpsmEJ#bisection > > If the report is already addressed, let syzbot know by replying with: > #syz fix: exact-commit-title > > If you want syzbot to run the reproducer, reply with: > #syz test: git://repo/address.git branch-or-commit-hash > If you attach or paste a git patch, syzbot will apply it before testing. > > If you want to overwrite report's subsystems, reply with: > #syz set subsystems: new-subsystem > (See the list of subsystem names on the web dashboard) > > If the report is a duplicate of another one, reply with: > #syz dup: exact-subject-of-another-report > > If you want to undo deduplication, reply with: > #syz undup > |
From: Phillip L. <ph...@sq...> - 2023-05-17 18:07:20
|
On 17/05/2023 08:16, Christoph Hellwig wrote: > Squashfs has stopped using buffers heads in 93e72b3c612adcaca1 > ("squashfs: migrate from ll_rw_block usage to BIO"). > > Signed-off-by: Christoph Hellwig <hc...@ls...> Reviewed-by: Phillip Lougher <ph...@sq...> |
From: Phillip L. <ph...@sq...> - 2023-05-17 17:49:07
|
On 17/05/2023 15:18, Vincent Whitchurch wrote: > Before commit 93e72b3c612adcaca1 ("squashfs: migrate from ll_rw_block > usage to BIO"), compressed blocks read by squashfs were cached in the > page cache, but that is not the case after that commit. That has lead > to squashfs having to re-read a lot of sectors from disk/flash. > > For example, the first sectors of every metadata block need to be read > twice from the disk. Once partially to read the length, and a > second time to read the block itself. Also, in linear reads of large > files, the last sectors of one data block are re-read from disk when > reading the next data block, since the compressed blocks are of variable > sizes and not aligned to device blocks. This extra I/O results in a > degrade in read performance of, for example, ~16% in one scenario on my > ARM platform using squashfs with dm-verity and NAND. > > Since the decompressed data is cached in the page cache or squashfs' > internal metadata and fragment caches, caching _all_ compressed pages > would lead to a lot of double caching and is undesirable. But make the > code cache any disk blocks which were only partially requested, since > these are the ones likely to include data which is needed by other file > system blocks. This restores read performance in my test scenario. > > The compressed block caching is only applied when the disk block size is > equal to the page size, to avoid having to deal with caching sub-page > reads. > > Signed-off-by: Vincent Whitchurch <vin...@ax...> Reviewed-by: Phillip Lougher <ph...@sq...> |
From: Phillip L. <ph...@sq...> - 2023-05-16 19:37:25
|
On 16/05/2023 09:22, Vincent Whitchurch wrote: > Before commit 93e72b3c612adcaca1 ("squashfs: migrate from ll_rw_block > usage to BIO"), compressed blocks read by squashfs were cached in the > page cache, but that is not the case after that commit. That has lead > to squashfs having to re-read a lot of sectors from disk/flash. > > For example, the first sectors of every metadata block need to be read > twice from the disk. Once partially to read the length, and a > second time to read the block itself. Also, in linear reads of large > files, the last sectors of one data block are re-read from disk when > reading the next data block, since the compressed blocks are of variable > sizes and not aligned to device blocks. This extra I/O results in a > degrade in read performance of, for example, ~16% in one scenario on my > ARM platform using squashfs with dm-verity and NAND. > > Since the decompressed data is cached in the page cache or squashfs' > internal metadata and fragment caches, caching _all_ compressed pages > would lead to a lot of double caching and is undesirable. But make the > code cache any disk blocks which were only partially requested, since > these are the ones likely to include data which is needed by other file > system blocks. This restores read performance in my test scenario. > > The compressed block caching is only applied when the disk block size is > equal to the page size, to avoid having to deal with caching sub-page > reads. > > Signed-off-by: Vincent Whitchurch <vin...@ax...> Reviewed-by: Phillip Lougher <ph...@sq...> |
From: Phillip L. <ph...@sq...> - 2023-05-13 02:32:10
|
On 12/05/2023 14:18, Vincent Whitchurch wrote: > Before commit 93e72b3c612adcaca1 ("squashfs: migrate from ll_rw_block > usage to BIO"), compressed blocks read by squashfs were cached in the > page cache, but that is not the case after that commit. That has lead > to squashfs having to re-read a lot of sectors from disk/flash. Good catch. I wasn't aware of that regression. > For example, the first sectors of every metadata block need to be read > twice from the disk. Once partially to read the length, and a > second time to read the block itself. Also, in linear reads of large > files, the last sectors of one data block are re-read from disk when > reading the next data block, since the compressed blocks are of variable > sizes and not aligned to device blocks. This extra I/O results in a > degrade in read performance of, for example, ~16% in one scenario on my > ARM platform using squashfs with dm-verity and NAND. > > Since the decompressed data is cached in the page cache or squashfs' > internal metadata and fragment caches, caching _all_ compressed pages > would lead to a lot of double caching and is undesirable. But make the > code cache any disk blocks which were only partially requested, since > these are the ones likely to include data which is needed by other file > system blocks. This restores read performance in my test scenario. > > The compressed block caching is only applied when the disk block size is > equal to the page size, to avoid having to deal with caching sub-page > reads. > > Signed-off-by: Vincent Whitchurch <vin...@ax...> Patch looks good. Reviewed-by: Phillip Lougher <ph...@sq...> > --- > Changes in v2: > - Do not remove static from squashfs_bio_read() > - Link to v1: https://lore.kernel.org/r/202...@ax... > --- > fs/squashfs/block.c | 116 +++++++++++++++++++++++++++++++++++++++++-- > fs/squashfs/squashfs_fs_sb.h | 1 + > fs/squashfs/super.c | 12 +++++ > 3 files changed, 126 insertions(+), 3 deletions(-) > > diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c > index bed3bb8b27fa..d750f72711fa 100644 > --- a/fs/squashfs/block.c > +++ b/fs/squashfs/block.c > @@ -76,10 +76,101 @@ static int copy_bio_to_actor(struct bio *bio, > return copied_bytes; > } > > +static int squashfs_bio_read_cached(struct bio *fullbio, struct address_space *cache_mapping, > + u64 index, int length, u64 read_start, u64 read_end, > + int page_count) > +{ > + struct page *head_to_cache = NULL, *tail_to_cache = NULL; > + struct block_device *bdev = fullbio->bi_bdev; > + struct bvec_iter_all iter_all; > + struct bio *bio = NULL; > + int prev_io_idx = -1; > + struct bio_vec *bv; > + int idx = 0; > + int err = 0; > + > + bio_for_each_segment_all(bv, fullbio, iter_all) { > + struct page *page = bv->bv_page; > + int retlen; > + > + if (page->mapping == cache_mapping && PageUptodate(page)) { > + idx++; > + continue; > + } > + > + /* > + * We only use this when the device block size is the same as > + * the page size, so read_start and read_end cover full pages. > + * > + * Compare these to the original required index and length to > + * only cache pages which were requested partially, since these > + * are the ones which are likely to be needed when reading > + * adjacent blocks. > + */ > + if (idx == 0 && index != read_start) > + head_to_cache = page; > + else if (idx == page_count - 1 && index + length != read_end) > + tail_to_cache = page; > + > + if (!bio || idx != prev_io_idx + 1) { > + unsigned int remaining_pages; > + unsigned int this_nr_pages; > + > +submit_and_retry: > + remaining_pages = page_count - idx; > + this_nr_pages = min(remaining_pages, BIO_MAX_VECS); > + bio = blk_next_bio(bio, bdev, this_nr_pages, REQ_OP_READ, > + GFP_NOIO); > + bio->bi_iter.bi_sector = fullbio->bi_iter.bi_sector + > + idx * (PAGE_SIZE / SECTOR_SIZE); > + } > + > + retlen = bio_add_page(bio, bv->bv_page, bv->bv_len, bv->bv_offset); > + if (retlen != bv->bv_len) > + goto submit_and_retry; > + > + prev_io_idx = idx; > + idx++; > + } > + > + if (bio) { > + err = submit_bio_wait(bio); > + bio_put(bio); > + } > + > + if (err) > + return err; > + > + if (head_to_cache) { > + int ret = add_to_page_cache_lru(head_to_cache, cache_mapping, > + read_start, GFP_NOIO); > + > + if (!ret) { > + SetPageUptodate(head_to_cache); > + unlock_page(head_to_cache); > + } > + > + } > + > + if (tail_to_cache) { > + int ret = add_to_page_cache_lru(tail_to_cache, cache_mapping, > + read_end - PAGE_SIZE, GFP_NOIO); > + > + if (!ret) { > + SetPageUptodate(tail_to_cache); > + unlock_page(tail_to_cache); > + } > + } > + > + return 0; > +} > + > static int squashfs_bio_read(struct super_block *sb, u64 index, int length, > struct bio **biop, int *block_offset) > { > struct squashfs_sb_info *msblk = sb->s_fs_info; > + struct inode *cache_inode = msblk->cache_inode; > + struct address_space *cache_mapping = cache_inode ? cache_inode->i_mapping : NULL; > const u64 read_start = round_down(index, msblk->devblksize); > const sector_t block = read_start >> msblk->devblksize_log2; > const u64 read_end = round_up(index + length, msblk->devblksize); > @@ -99,13 +190,27 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length, > for (i = 0; i < page_count; ++i) { > unsigned int len = > min_t(unsigned int, PAGE_SIZE - offset, total_len); > - struct page *page = alloc_page(GFP_NOIO); > + struct page *page = NULL; > + > + if (cache_mapping) > + page = find_get_page(cache_mapping, > + read_start + i * PAGE_SIZE); > + if (!page) > + page = alloc_page(GFP_NOIO); > > if (!page) { > error = -ENOMEM; > goto out_free_bio; > } > - if (!bio_add_page(bio, page, len, offset)) { > + > + if (cache_mapping) { > + /* > + * Use the __ version to avoid merging since we need > + * each page to be separate when we check for and avoid > + * cached pages. > + */ > + __bio_add_page(bio, page, len, offset); > + } else if (!bio_add_page(bio, page, len, offset)) { > error = -EIO; > goto out_free_bio; > } > @@ -113,7 +218,12 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length, > total_len -= len; > } > > - error = submit_bio_wait(bio); > + if (cache_mapping) > + error = squashfs_bio_read_cached(bio, cache_mapping, index, > + length, read_start, read_end, > + page_count); > + else > + error = submit_bio_wait(bio); > if (error) > goto out_free_bio; > > diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h > index 72f6f4b37863..dfee65845d48 100644 > --- a/fs/squashfs/squashfs_fs_sb.h > +++ b/fs/squashfs/squashfs_fs_sb.h > @@ -47,6 +47,7 @@ struct squashfs_sb_info { > struct squashfs_cache *block_cache; > struct squashfs_cache *fragment_cache; > struct squashfs_cache *read_page; > + struct inode *cache_inode; > int next_meta_index; > __le64 *id_table; > __le64 *fragment_index; > diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c > index e090fae48e68..64d6bc95950b 100644 > --- a/fs/squashfs/super.c > +++ b/fs/squashfs/super.c > @@ -329,6 +329,16 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) > goto failed_mount; > } > > + if (msblk->devblksize == PAGE_SIZE) { > + msblk->cache_inode = new_inode(sb); > + if (msblk->cache_inode == NULL) > + goto failed_mount; > + > + set_nlink(msblk->cache_inode, 1); > + msblk->cache_inode->i_size = OFFSET_MAX; > + mapping_set_gfp_mask(msblk->cache_inode->i_mapping, GFP_NOFS); > + } > + > msblk->stream = squashfs_decompressor_setup(sb, flags); > if (IS_ERR(msblk->stream)) { > err = PTR_ERR(msblk->stream); > @@ -454,6 +464,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) > squashfs_cache_delete(msblk->block_cache); > squashfs_cache_delete(msblk->fragment_cache); > squashfs_cache_delete(msblk->read_page); > + iput(msblk->cache_inode); > msblk->thread_ops->destroy(msblk); > kfree(msblk->inode_lookup_table); > kfree(msblk->fragment_index); > @@ -572,6 +583,7 @@ static void squashfs_put_super(struct super_block *sb) > squashfs_cache_delete(sbi->block_cache); > squashfs_cache_delete(sbi->fragment_cache); > squashfs_cache_delete(sbi->read_page); > + iput(sbi->cache_inode); > sbi->thread_ops->destroy(sbi); > kfree(sbi->id_table); > kfree(sbi->fragment_index); > > --- > base-commit: 457391b0380335d5e9a5babdec90ac53928b23b4 > change-id: 20230510-squashfs-cache-7a3b9e7355c1 > > Best regards, |
From: Phillip L. <ph...@sq...> - 2023-03-22 05:39:50
|
Hi, I'm pleased to announce the release of Squashfs tools 4.6. The release can be downloaded either from Sourceforge, or GitHub. https://sourceforge.net/projects/squashfs/files/latest/download https://github.com/plougher/squashfs-tools/archive/refs/tags/4.6.tar.gz A summary of the changes is below. Please see the README-4.6 file in the release tarball for more information and the USAGE files. Phillip 1. Summary of changes --------------------- 1. Extended attribute handling improved in Mksquashfs and Sqfstar 1.1.New -xattrs-exclude option to exclude extended attributes from files using a regular expression. 1.2 New -xattrs-include option to include extended attributes from files using a regular expression. 1.3 New -xattrs-add option to add extended attributes to files. 1.4 New Pseudo file xattr definition to add extended attributes to files. 1.5 New xattrs-add Action to add extended attributes to files (Mksquashfs only). 2. Extended attribute handling improved in Unsquashfs 2.1 New -xattrs-exclude option to exclude extended attributes from files using a regular expression. 2.2 New -xattrs-include option to include extended attributes from files using a regular expression. 2.3 Extended attributes are now supported in Pseudo file output. 3. Other major improvements 3.1 Unsquashfs can now output Pseudo files to standard out. 3.2 Mksquashfs can now input Pseudo files from standard in. 3.3 Squashfs filesystems can now be converted (different block size compression etc) without unpacking to an intermediate filesystem or mounting, by piping the output of Unsquashfs to Mksquashfs. 3.4 Pseudo files are now supported by Sqfstar. 3.5 "Non-anchored" excludes are now supported by Unsquashfs. 4. Mksquashfs minor improvements 4.1 A new -max-depth option has been added, which limits the depth Mksquashfs descends when creating the filesystem. 4.2 A new -mem-percent option which allows memory for caches to be specified as a percentage of physical RAM, rather than requiring an absolute value. 4.3 A new -percentage option added which rather than generating the full progress-bar instead outputs a percentage. This can be used with dialog --gauge etc. 4.4 -mkfs-time, -all-time and -root-time options now take a human date string, in addition to the seconds since the epoch of 1970 00:00 UTC. For example "now", "last week", "Wed Mar 8 05:55:01 GMT 2023" are supported. 4.5 -root-uid, -root-gid, -force-uid and -force-gid options now take a user/group name in addition to the integer uid/gid. 4.6 A new -mem-default option which displays default memory usage for caches in Mbytes. 4.7 A new -no-compression option which produces no compression, and it is a short-cut for -noI, -noD, -noF and -noX. 4.8 A new -pseudo-override option which makes pseudo file uids and gids override -all-root, -force-uid and -force-gid options. Normally these options take precedence. 5. Unsquashfs minor improvements 5.1 New -all-time option which sets all file timestamps to <time>, rather than the time stored in the filesystem inode. <time> can be an integer indicating seconds since the epoch (1970-01-01) or a human string value such as "now", "last week", or "Wed Feb 15 21:02:39 GMT 2023". 5.2 New -full-precision option which uses full precision when displaying times including seconds. Use with -linfo, -lls, -lln and -llc options. 5.3 New -match option where Unsquashfs will abort if any extract file does not match on anything, and can not be resolved. 5.4 New -percentage option added which rather than generating the full progress-bar instead outputs a percentage. This can be used with dialog --gauge etc. 6. Sqfstar minor improvements 6.1 New -ignore-zeros option added which allows tar files to be concatenated together and fed to Sqfstar. Normally a tarfile has two consecutive 512 byte blocks filled with zeros which means EOF and Sqfstar will stop reading after the first tar file on encountering them. This option makes Sqfstar ignore the zero filled blocks. 6.2 A new -mem-percent option which allows memory for caches to be specified as a percentage of physical RAM, rather than requiring an absolute value. 6.3 A new -percentage option added which rather than generating the full progress-bar instead outputs a percentage. This can be used with dialog --gauge etc. 6.4 -mkfs-time, -all-time and -root-time options now take a human date string, in addition to the seconds since the epoch of 1970 00:00 UTC. For example "now", "last week", "Wed Mar 8 05:55:01 GMT 2023" are supported. 6.5 -root-uid, -root-gid, -force-uid and -force-gid options now take a user/group name in addition to the integer uid/gid. 6.6 A new -mem-default option which displays default memory usage for caches in Mbytes. 6.7 A new -no-compression option which produces no compression, and it is a short-cut for -noI, -noD, -noF and -noX. 6.8 A new -pseudo-override option which makes pseudo file uids and gids override -all-root, -force-uid and -force-gid options. Normally these options take precedence. 6.9 Do not abort if ZERO filled blocks indicating end of the TAR archive are missing. 7. Other minor improvements 7.1 If Mksquashfs/Unsquashfs fails to execute generating the manpages because they have been cross-compiled, fall back to using the pre-built manpages. 7.2 Add new Makefile configure option USE_PREBUILT_MANPAGES to always use pre-built manpages rather than generating them when "make install" is run. 8. Major bug fixes 8.1 Following a symlink in Sqfscat or where -follow-symlinks option is given with Unsquashfs, incorrectly triggered the corrupted filesystem loop detection code. 8.2 In Unsquashfs if a file was not writable it could not add extended attributes to it. 8.3 Sqfstar would incorrectly reject compressor specific options that have an argument. 8.4 Sqfstar would incorrectly strip pathname components in PAX header linkpath if symbolic. 8.5 Sqfstar -root-uid, -root-gid and -root-time options were documented but not implemented. 8.6 Mksquashfs -one-file-system option would not create empty mount point directory when filesystem boundary crossed. 8.7 Mksquashfs did not check the close() return result. |
From: Phillip L. <ph...@sq...> - 2022-06-11 03:23:10
|
This patch extends the "page actor" code to handle missing pages. Previously if the full set of pages needed to decompress a Squashfs datablock was unavailable, this would cause decompression to fail on the missing pages. In this case direct decompression into the page cache could not be achieved and the code would fall back to using the older intermediate buffer method. With this patch, direct decompression into the page cache can be achieved with missing pages. For "multi-shot" decompressors (zlib, xz, zstd), the page actor will allocate a temporary buffer which is passed to the decompressor, and then freed by the page actor. For "single shot" decompressors (lz4, lzo) which decompress into a contiguous "bounce buffer", and which is then copied into the page cache, it would be pointless to allocate a temporary buffer, memcpy into it, and then free it. For these decompressors -ENOMEM is returned, which signifies that the memcpy for that page should be skipped. This also happens if the data block is uncompressed. Signed-off-by: Phillip Lougher <ph...@sq...> --- fs/squashfs/block.c | 10 ++++--- fs/squashfs/decompressor.h | 1 + fs/squashfs/file_direct.c | 21 ++++++++------- fs/squashfs/lz4_wrapper.c | 7 +++-- fs/squashfs/lzo_wrapper.c | 7 +++-- fs/squashfs/page_actor.c | 55 ++++++++++++++++++++++++++++++++------ fs/squashfs/page_actor.h | 21 ++++++++++++--- fs/squashfs/xz_wrapper.c | 11 +++++++- fs/squashfs/zlib_wrapper.c | 12 ++++++++- fs/squashfs/zstd_wrapper.c | 12 ++++++++- 10 files changed, 126 insertions(+), 31 deletions(-) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 622c844f6d11..acbb0dc80f89 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -34,12 +34,15 @@ static int copy_bio_to_actor(struct bio *bio, struct squashfs_page_actor *actor, int offset, int req_length) { - void *actor_addr = squashfs_first_page(actor); + void *actor_addr; struct bvec_iter_all iter_all = {}; struct bio_vec *bvec = bvec_init_iter_all(&iter_all); int copied_bytes = 0; int actor_offset = 0; + squashfs_actor_nobuff(actor); + actor_addr = squashfs_first_page(actor); + if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) return 0; @@ -49,8 +52,9 @@ static int copy_bio_to_actor(struct bio *bio, bytes_to_copy = min_t(int, bytes_to_copy, req_length - copied_bytes); - memcpy(actor_addr + actor_offset, bvec_virt(bvec) + offset, - bytes_to_copy); + if (!IS_ERR(actor_addr)) + memcpy(actor_addr + actor_offset, bvec_virt(bvec) + + offset, bytes_to_copy); actor_offset += bytes_to_copy; copied_bytes += bytes_to_copy; diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 1b9ccfd0aa51..19ab60834389 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -20,6 +20,7 @@ struct squashfs_decompressor { struct bio *, int, int, struct squashfs_page_actor *); int id; char *name; + int alloc_buffer; int supported; }; diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index a4894cc59447..5af5802f5626 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -47,14 +47,6 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, if (page == NULL) return res; - /* - * Create a "page actor" which will kmap and kunmap the - * page cache pages appropriately within the decompressor - */ - actor = squashfs_page_actor_init_special(page, pages, 0); - if (actor == NULL) - goto out; - /* Try to grab all the pages covered by the Squashfs block */ for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { page[i] = (n == target_page->index) ? target_page : @@ -89,8 +81,19 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, goto out; } + /* + * Create a "page actor" which will kmap and kunmap the + * page cache pages appropriately within the decompressor + */ + actor = squashfs_page_actor_init_special(msblk, page, pages, 0); + if (actor == NULL) + goto out; + /* Decompress directly into the page cache buffers */ res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); + + kfree(actor); + if (res < 0) goto mark_errored; @@ -116,7 +119,6 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, put_page(page[i]); } - kfree(actor); kfree(page); return 0; @@ -135,7 +137,6 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, } out: - kfree(actor); kfree(page); return res; } diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c index b685b6238316..49797729f143 100644 --- a/fs/squashfs/lz4_wrapper.c +++ b/fs/squashfs/lz4_wrapper.c @@ -119,10 +119,12 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, buff = stream->output; while (data) { if (bytes <= PAGE_SIZE) { - memcpy(data, buff, bytes); + if (!IS_ERR(data)) + memcpy(data, buff, bytes); break; } - memcpy(data, buff, PAGE_SIZE); + if (!IS_ERR(data)) + memcpy(data, buff, PAGE_SIZE); buff += PAGE_SIZE; bytes -= PAGE_SIZE; data = squashfs_next_page(output); @@ -139,5 +141,6 @@ const struct squashfs_decompressor squashfs_lz4_comp_ops = { .decompress = lz4_uncompress, .id = LZ4_COMPRESSION, .name = "lz4", + .alloc_buffer = 0, .supported = 1 }; diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index cb510a631968..d216aeefa865 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -93,10 +93,12 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, buff = stream->output; while (data) { if (bytes <= PAGE_SIZE) { - memcpy(data, buff, bytes); + if (!IS_ERR(data)) + memcpy(data, buff, bytes); break; } else { - memcpy(data, buff, PAGE_SIZE); + if (!IS_ERR(data)) + memcpy(data, buff, PAGE_SIZE); buff += PAGE_SIZE; bytes -= PAGE_SIZE; data = squashfs_next_page(output); @@ -116,5 +118,6 @@ const struct squashfs_decompressor squashfs_lzo_comp_ops = { .decompress = lzo_uncompress, .id = LZO_COMPRESSION, .name = "lzo", + .alloc_buffer = 0, .supported = 1 }; diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c index 520d323a99ce..b23b780d8f42 100644 --- a/fs/squashfs/page_actor.c +++ b/fs/squashfs/page_actor.c @@ -7,6 +7,8 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/pagemap.h> +#include "squashfs_fs_sb.h" +#include "decompressor.h" #include "page_actor.h" /* @@ -57,29 +59,62 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, } /* Implementation of page_actor for decompressing directly into page cache. */ +static void *handle_next_page(struct squashfs_page_actor *actor) +{ + int max_pages = (actor->length + PAGE_SIZE - 1) >> PAGE_SHIFT; + + if (actor->returned_pages == max_pages) + return NULL; + + if ((actor->next_page == actor->pages) || + (actor->next_index != actor->page[actor->next_page]->index)) { + if (actor->alloc_buffer) { + void *tmp_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + + if (tmp_buffer) { + actor->tmp_buffer = tmp_buffer; + actor->next_index++; + actor->returned_pages++; + return tmp_buffer; + } + } + + actor->next_index++; + actor->returned_pages++; + return ERR_PTR(-ENOMEM); + } + + actor->next_index++; + actor->returned_pages++; + return actor->pageaddr = kmap_local_page(actor->page[actor->next_page++]); +} + static void *direct_first_page(struct squashfs_page_actor *actor) { - actor->next_page = 1; - return actor->pageaddr = kmap_atomic(actor->page[0]); + return handle_next_page(actor); } static void *direct_next_page(struct squashfs_page_actor *actor) { if (actor->pageaddr) - kunmap_atomic(actor->pageaddr); + kunmap_local(actor->pageaddr); + + kfree(actor->tmp_buffer); + actor->pageaddr = actor->tmp_buffer = NULL; - return actor->pageaddr = actor->next_page == actor->pages ? NULL : - kmap_atomic(actor->page[actor->next_page++]); + return handle_next_page(actor); } static void direct_finish_page(struct squashfs_page_actor *actor) { if (actor->pageaddr) - kunmap_atomic(actor->pageaddr); + kunmap_local(actor->pageaddr); + + kfree(actor->tmp_buffer); } -struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, - int pages, int length) +struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_info *msblk, + struct page **page, int pages, int length) { struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); @@ -90,7 +125,11 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, actor->page = page; actor->pages = pages; actor->next_page = 0; + actor->returned_pages = 0; + actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1); actor->pageaddr = NULL; + actor->tmp_buffer = NULL; + actor->alloc_buffer = msblk->decompressor->alloc_buffer; actor->squashfs_first_page = direct_first_page; actor->squashfs_next_page = direct_next_page; actor->squashfs_finish_page = direct_finish_page; diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 2e3073ace009..37523c54256f 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -45,6 +45,11 @@ static inline void squashfs_finish_page(struct squashfs_page_actor *actor) { /* empty */ } + +static inline void squashfs_actor_nobuff(struct squashfs_page_actor *actor) +{ + /* empty */ +} #else struct squashfs_page_actor { union { @@ -52,17 +57,23 @@ struct squashfs_page_actor { struct page **page; }; void *pageaddr; + void *tmp_buffer; void *(*squashfs_first_page)(struct squashfs_page_actor *); void *(*squashfs_next_page)(struct squashfs_page_actor *); void (*squashfs_finish_page)(struct squashfs_page_actor *); int pages; int length; int next_page; + int alloc_buffer; + int returned_pages; + pgoff_t next_index; }; -extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); -extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page - **, int, int); +extern struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, + int pages, int length); +extern struct squashfs_page_actor *squashfs_page_actor_init_special( + struct squashfs_sb_info *msblk, + struct page **page, int pages, int length); static inline void *squashfs_first_page(struct squashfs_page_actor *actor) { return actor->squashfs_first_page(actor); @@ -75,5 +86,9 @@ static inline void squashfs_finish_page(struct squashfs_page_actor *actor) { actor->squashfs_finish_page(actor); } +static inline void squashfs_actor_nobuff(struct squashfs_page_actor *actor) +{ + actor->alloc_buffer = 0; +} #endif #endif diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 68f6d09bb3a2..6c49481a2f8c 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -131,6 +131,10 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, stream->buf.out_pos = 0; stream->buf.out_size = PAGE_SIZE; stream->buf.out = squashfs_first_page(output); + if (IS_ERR(stream->buf.out)) { + error = PTR_ERR(stream->buf.out); + goto finish; + } for (;;) { enum xz_ret xz_err; @@ -156,7 +160,10 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, if (stream->buf.out_pos == stream->buf.out_size) { stream->buf.out = squashfs_next_page(output); - if (stream->buf.out != NULL) { + if (IS_ERR(stream->buf.out)) { + error = PTR_ERR(stream->buf.out); + break; + } else if (stream->buf.out != NULL) { stream->buf.out_pos = 0; total += PAGE_SIZE; } @@ -171,6 +178,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, } } +finish: squashfs_finish_page(output); return error ? error : total + stream->buf.out_pos; @@ -183,5 +191,6 @@ const struct squashfs_decompressor squashfs_xz_comp_ops = { .decompress = squashfs_xz_uncompress, .id = XZ_COMPRESSION, .name = "xz", + .alloc_buffer = 1, .supported = 1 }; diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index a20e9042146b..cbb7afe7bc46 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -62,6 +62,11 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, stream->next_out = squashfs_first_page(output); stream->avail_in = 0; + if (IS_ERR(stream->next_out)) { + error = PTR_ERR(stream->next_out); + goto finish; + } + for (;;) { int zlib_err; @@ -85,7 +90,10 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, if (stream->avail_out == 0) { stream->next_out = squashfs_next_page(output); - if (stream->next_out != NULL) + if (IS_ERR(stream->next_out)) { + error = PTR_ERR(stream->next_out); + break; + } else if (stream->next_out != NULL) stream->avail_out = PAGE_SIZE; } @@ -107,6 +115,7 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, } } +finish: squashfs_finish_page(output); if (!error) @@ -122,6 +131,7 @@ const struct squashfs_decompressor squashfs_zlib_comp_ops = { .decompress = zlib_uncompress, .id = ZLIB_COMPRESSION, .name = "zlib", + .alloc_buffer = 1, .supported = 1 }; diff --git a/fs/squashfs/zstd_wrapper.c b/fs/squashfs/zstd_wrapper.c index c40445dbf38c..0e407c4d8b3b 100644 --- a/fs/squashfs/zstd_wrapper.c +++ b/fs/squashfs/zstd_wrapper.c @@ -80,6 +80,10 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, out_buf.size = PAGE_SIZE; out_buf.dst = squashfs_first_page(output); + if (IS_ERR(out_buf.dst)) { + error = PTR_ERR(out_buf.dst); + goto finish; + } for (;;) { size_t zstd_err; @@ -104,7 +108,10 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, if (out_buf.pos == out_buf.size) { out_buf.dst = squashfs_next_page(output); - if (out_buf.dst == NULL) { + if (IS_ERR(out_buf.dst)) { + error = PTR_ERR(out_buf.dst); + break; + } else if (out_buf.dst == NULL) { /* Shouldn't run out of pages * before stream is done. */ @@ -129,6 +136,8 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, } } +finish: + squashfs_finish_page(output); return error ? error : total_out; @@ -140,5 +149,6 @@ const struct squashfs_decompressor squashfs_zstd_comp_ops = { .decompress = zstd_uncompress, .id = ZSTD_COMPRESSION, .name = "zstd", + .alloc_buffer = 1, .supported = 1 }; -- 2.34.1 |
From: Phillip L. <ph...@sq...> - 2022-06-11 03:23:09
|
Now that the "page actor" can handle missing pages, we don't have to fall back to using an intermediate buffer in Squashfs_readpage_block() if all the pages necessary can't be obtained. Signed-off-by: Phillip Lougher <ph...@sq...> --- fs/squashfs/file_direct.c | 75 +++++++-------------------------------- 1 file changed, 12 insertions(+), 63 deletions(-) diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 5af5802f5626..be4b12d31e0c 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -18,9 +18,6 @@ #include "squashfs.h" #include "page_actor.h" -static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page, int bytes); - /* Read separately compressed datablock directly into page cache */ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, int expected) @@ -33,7 +30,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; int start_index = target_page->index & ~mask; int end_index = start_index | mask; - int i, n, pages, missing_pages, bytes, res = -ENOMEM; + int i, n, pages, bytes, res = -ENOMEM; struct page **page; struct squashfs_page_actor *actor; void *pageaddr; @@ -48,44 +45,29 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, return res; /* Try to grab all the pages covered by the Squashfs block */ - for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { + for (i = 0, n = start_index; n <= end_index; n++) { page[i] = (n == target_page->index) ? target_page : grab_cache_page_nowait(target_page->mapping, n); - if (page[i] == NULL) { - missing_pages++; + if (page[i] == NULL) continue; - } if (PageUptodate(page[i])) { unlock_page(page[i]); put_page(page[i]); - page[i] = NULL; - missing_pages++; + continue; } - } - - if (missing_pages) { - /* - * Couldn't get one or more pages, this page has either - * been VM reclaimed, but others are still in the page cache - * and uptodate, or we're racing with another thread in - * squashfs_readpage also trying to grab them. Fall back to - * using an intermediate buffer. - */ - res = squashfs_read_cache(target_page, block, bsize, pages, - page, expected); - if (res < 0) - goto mark_errored; - goto out; + i++; } + pages = i; + /* * Create a "page actor" which will kmap and kunmap the * page cache pages appropriately within the decompressor */ - actor = squashfs_page_actor_init_special(msblk, page, pages, 0); + actor = squashfs_page_actor_init_special(msblk, page, pages, expected); if (actor == NULL) goto out; @@ -102,12 +84,12 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, goto mark_errored; } - /* Last page may have trailing bytes not filled */ + /* Last page (if present) may have trailing bytes not filled */ bytes = res % PAGE_SIZE; - if (bytes) { - pageaddr = kmap_atomic(page[pages - 1]); + if (page[pages - 1]->index == end_index && bytes) { + pageaddr = kmap_local_page(page[pages - 1]); memset(pageaddr + bytes, 0, PAGE_SIZE - bytes); - kunmap_atomic(pageaddr); + kunmap_local(pageaddr); } /* Mark pages as uptodate, unlock and release */ @@ -140,36 +122,3 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, kfree(page); return res; } - - -static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page, int bytes) -{ - struct inode *i = target_page->mapping->host; - struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, - block, bsize); - int res = buffer->error, n, offset = 0; - - if (res) { - ERROR("Unable to read page, block %llx, size %x\n", block, - bsize); - goto out; - } - - for (n = 0; n < pages && bytes > 0; n++, - bytes -= PAGE_SIZE, offset += PAGE_SIZE) { - int avail = min_t(int, bytes, PAGE_SIZE); - - if (page[n] == NULL) - continue; - - squashfs_fill_page(page[n], buffer, offset, avail); - unlock_page(page[n]); - if (page[n] != target_page) - put_page(page[n]); - } - -out: - squashfs_cache_put(buffer); - return res; -} -- 2.34.1 |
From: Phillip L. <ph...@sq...> - 2022-06-11 03:23:06
|
Hi all, This patch-set enables Squashfs to handle missing pages when directly decompressing datablocks into the page cache. Previously if the full set of pages needed was not available, Squashfs would have to fall back to using an intermediate buffer (the older method), which is slower, involving a memcopy, and it introduces contention on a shared buffer. The first patch extends the "page actor" code to handle missing pages. The second patch updates Squashfs_readpage_block() to use the new functionality, and removes the code that falls back to using an intermediate buffer. This patch-set is independent of the readahead work, and it is standalone. It can be merged on its own. But the readahead patch for efficiency also needs this patch-set. Phillip ---------------------------------------------------------------- Phillip Lougher (2): Squashfs: extend "page actor" to handle missing pages Squashfs: don't use intermediate buffer if pages missing fs/squashfs/block.c | 10 ++++-- fs/squashfs/decompressor.h | 1 + fs/squashfs/file_direct.c | 90 +++++++++++----------------------------------- fs/squashfs/lz4_wrapper.c | 7 ++-- fs/squashfs/lzo_wrapper.c | 7 ++-- fs/squashfs/page_actor.c | 67 ++++++++++++++++++++++++++++------ fs/squashfs/page_actor.h | 17 +++++++-- fs/squashfs/xz_wrapper.c | 11 +++++- fs/squashfs/zlib_wrapper.c | 12 ++++++- fs/squashfs/zstd_wrapper.c | 12 ++++++- 10 files changed, 142 insertions(+), 92 deletions(-) |
From: Hsin-Yi W. <hs...@ch...> - 2022-05-23 07:01:39
|
On Sat, May 21, 2022 at 4:22 AM Phillip Lougher <ph...@sq...> wrote: > > On 20/05/2022 08:38, Hsin-Yi Wang wrote: > > On Fri, May 20, 2022 at 11:02 AM Phillip Lougher > > <ph...@sq...> wrote: > >> > >> On 19/05/2022 09:09, Hsin-Yi Wang wrote: > >>> On Tue, May 17, 2022 at 4:28 PM Hsin-Yi Wang <hs...@ch...> wrote: > >>>> > >>>> Implement readahead callback for squashfs. It will read datablocks > >>>> which cover pages in readahead request. For a few cases it will > >>>> not mark page as uptodate, including: > >>>> - file end is 0. > >>>> - zero filled blocks. > >>>> - current batch of pages isn't in the same datablock or not enough in a > >>>> datablock. > >>>> Otherwise pages will be marked as uptodate. The unhandled pages will be > >>>> updated by readpage later. > >>>> > >>>> Suggested-by: Matthew Wilcox <wi...@in...> > >>>> Signed-off-by: Hsin-Yi Wang <hs...@ch...> > >>>> Reported-by: Matthew Wilcox <wi...@in...> > >>>> Reported-by: Phillip Lougher <ph...@sq...> > >>>> Reported-by: Xiongwei Song <sx...@gm...> > >>>> --- > >>>> v1->v2: remove unused check on readahead_expand(). > >>>> v1: https://lore.kernel.org/lkml/202...@ch.../ > >>>> --- > >>> > >>> Hi Phillip and Matthew, > >>> > >>> Regarding the performance issue of this patch, I saw a possible > >>> performance gain if we only read the first block instead of reading > >>> until nr_pages == 0. > >>> > >>> To be more clear, apply the following diff (Please ignore the skipping > >>> of nr_pages check first. This is a demonstration of "only read and > >>> update the first block per readahead call"): > >>> > >>> diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c > >>> index aad6823f0615..c52f7c4a7cfe 100644 > >>> --- a/fs/squashfs/file.c > >>> +++ b/fs/squashfs/file.c > >>> @@ -524,10 +524,8 @@ static void squashfs_readahead(struct > >>> readahead_control *ractl) > >>> if (!actor) > >>> goto out; > >>> > >>> - for (;;) { > >>> + { > >>> nr_pages = __readahead_batch(ractl, pages, max_pages); > >>> - if (!nr_pages) > >>> - break; > >>> > >>> if (readahead_pos(ractl) >= i_size_read(inode) || > >>> nr_pages < max_pages) > >>> > >>> > >>> All the performance numbers: > >>> 1. original: 39s > >>> 2. revert "mm: put readahead pages in cache earlier": 2.8s > >>> 3. v2 of this patch: 2.7s > >>> 4. v2 of this patch and apply the diff: 1.8s > >>> > >>> In my testing data, normally it reads and updates 1~2 blocks per > >>> readahead call. The change might not make sense since the performance > >>> improvement may only happen in certain cases. > >>> What do you think? Or is the performance of the current patch > >>> considered reasonable? > >> > >> It entirely depends on where the speed improvement comes from. > >> > >> From experience, the speed improvement is probably worthwhile, > >> and probably isn't gained at the expense of worse performance > >> on other work-loads. > >> > >> But this is a guestimate, based on the fact timings 2 and 3 > >> (2.8s v 2.7s) are almost identical. Which implies the v2 > >> patch isn't now doing any more work than the previous > >> baseline before the "mm: put readahead pages in cache earlier" > >> patch (*). > >> > >> As such the speed improvement must be coming from increased > >> parallelism. Such as moving from serially reading the > >> readahead blocks to parallel reading. > >> > > Thanks for the idea. I checked this by offlining other cores until > > only one core exists. Removing loops still results in less time. > > > > But after counting the #traces lines in squashfs_read_data(): > > If we remove the for loop (timings 4), the logs are less: 2.3K lines, > > while v2 (timings 3) has 3.7K (other timings are also around 3.7K), so > > removing loop doesn't look right. > > If a lot less data is being read than the other timings, then this does > look incorrect. > > > > > I think v2 should be fine considering the slightly to none regression > > compared to before. > > > > The fact the timings are almost identical implies all that needs > to be done to remove the performance regression has been done. > > There are two things missing from the patch which need to > be handled. These are not related to performance but error > handling and correctness. So I have waited until now to > raise it. > > If you look at the code for file_direct.c::squashfs_readpage_block() > > https://elixir.bootlin.com/linux/latest/source/fs/squashfs/file_direct.c#L93 > > **** > res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); > if (res < 0) > goto mark_errored; > > if (res != expected) { > res = -EIO; > goto mark_errored; > } > **** > > You will see that it checks for two return conditions from > squashfs_read_data(). > > If the decompressor returns error, or if the decompressed block > is different in size to that expected, then this is an error situation > (e.g. corrupted filesystem), and the read is marked as bad. > > The current V2 patch doesn't check that the block decompressed > to the correct size (res != expected), and this could mean > filesystem corruption is not detected, which will be an > error handling regression. > Thanks for the review. In v3: Added the check to see if the returned size is expected too. Since we didn't mark error pages here (in readahead), if the size wasn't expected, we just don't mark the page as Uptodate. > Secondly, if you look at > https://elixir.bootlin.com/linux/latest/source/fs/squashfs/file_direct.c#L102 > > **** > /* Last page may have trailing bytes not filled */ > bytes = res % PAGE_SIZE; > if (bytes) { > pageaddr = kmap_atomic(page[pages - 1]); > memset(pageaddr + bytes, 0, PAGE_SIZE - bytes); > kunmap_atomic(pageaddr); > } > **** > > The V2 patch is always reading max_pages, but at the > end of a file the last page may not be a full page. This > is because the last block may not be complete (i.e. it is > only 126 Kbytes rather than the block_size of 128 Kbytes). > > This will leave part of the last page unfilled by the decompressor, > and it should be zero filled, to avoid leaking data to user-space. If the size was expected, further check if it's not a full page. If so, fill it with zeros at the end. Performance is the same as v2. > > Phillip > > > > Hi Matthew, what do you think? Do you have other comments? If not, > > should I send a v3 to change Xiongwei Song's email address or can you > > help modify it? > > > > Thanks > > > >> But, without looking at any trace output, that is just a > >> guestimate. > >> > >> Phillip > >> > >> (*) multiply decompressing the same blocks, which > >> is the cause of the performance regression. > >>> > >>> Thanks. > >>> > >>> testing env: > >>> - arm64 on kernel 5.10 > >>> - data: ~ 300K pack file contains some android files > >>> > >>>> fs/squashfs/file.c | 77 +++++++++++++++++++++++++++++++++++++++++++++- > >>>> 1 file changed, 76 insertions(+), 1 deletion(-) > >>>> > >>>> diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c > >>>> index a8e495d8eb86..e10a55c5b1eb 100644 > >>>> --- a/fs/squashfs/file.c > >>>> +++ b/fs/squashfs/file.c > >>>> @@ -39,6 +39,7 @@ > >>>> #include "squashfs_fs_sb.h" > >>>> #include "squashfs_fs_i.h" > >>>> #include "squashfs.h" > >>>> +#include "page_actor.h" > >>>> > >>>> /* > >>>> * Locate cache slot in range [offset, index] for specified inode. If > >>>> @@ -495,7 +496,81 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) > >>>> return 0; > >>>> } > >>>> > >>>> +static void squashfs_readahead(struct readahead_control *ractl) > >>>> +{ > >>>> + struct inode *inode = ractl->mapping->host; > >>>> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; > >>>> + size_t mask = (1UL << msblk->block_log) - 1; > >>>> + size_t shift = msblk->block_log - PAGE_SHIFT; > >>>> + loff_t start = readahead_pos(ractl) &~ mask; > >>>> + size_t len = readahead_length(ractl) + readahead_pos(ractl) - start; > >>>> + struct squashfs_page_actor *actor; > >>>> + unsigned int nr_pages = 0; > >>>> + struct page **pages; > >>>> + u64 block = 0; > >>>> + int bsize, res, i, index; > >>>> + int file_end = i_size_read(inode) >> msblk->block_log; > >>>> + unsigned int max_pages = 1UL << shift; > >>>> + > >>>> + readahead_expand(ractl, start, (len | mask) + 1); > >>>> + > >>>> + if (file_end == 0) > >>>> + return; > >>>> + > >>>> + pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL); > >>>> + if (!pages) > >>>> + return; > >>>> + > >>>> + actor = squashfs_page_actor_init_special(pages, max_pages, 0); > >>>> + if (!actor) > >>>> + goto out; > >>>> + > >>>> + for (;;) { > >>>> + nr_pages = __readahead_batch(ractl, pages, max_pages); > >>>> + if (!nr_pages) > >>>> + break; > >>>> + > >>>> + if (readahead_pos(ractl) >= i_size_read(inode) || > >>>> + nr_pages < max_pages) > >>>> + goto skip_pages; > >>>> + > >>>> + index = pages[0]->index >> shift; > >>>> + if ((pages[nr_pages - 1]->index >> shift) != index) > >>>> + goto skip_pages; > >>>> + > >>>> + bsize = read_blocklist(inode, index, &block); > >>>> + if (bsize == 0) > >>>> + goto skip_pages; > >>>> + > >>>> + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, > >>>> + actor); > >>>> + > >>>> + if (res >= 0) > >>>> + for (i = 0; i < nr_pages; i++) > >>>> + SetPageUptodate(pages[i]); > >>>> + > >>>> + for (i = 0; i < nr_pages; i++) { > >>>> + unlock_page(pages[i]); > >>>> + put_page(pages[i]); > >>>> + } > >>>> + } > >>>> + > >>>> + kfree(actor); > >>>> + kfree(pages); > >>>> + return; > >>>> + > >>>> +skip_pages: > >>>> + for (i = 0; i < nr_pages; i++) { > >>>> + unlock_page(pages[i]); > >>>> + put_page(pages[i]); > >>>> + } > >>>> + > >>>> + kfree(actor); > >>>> +out: > >>>> + kfree(pages); > >>>> +} > >>>> > >>>> const struct address_space_operations squashfs_aops = { > >>>> - .read_folio = squashfs_read_folio > >>>> + .read_folio = squashfs_read_folio, > >>>> + .readahead = squashfs_readahead > >>>> }; > >>>> -- > >>>> 2.36.0.550.gb090851708-goog > >>>> > >> > |
From: Hsin-Yi W. <hs...@ch...> - 2022-05-23 07:00:32
|
From: Phillip Lougher <ph...@sq...> Squashfs_readahead uses the "file direct" version of the page actor, and so build it unconditionally. Reported-by: kernel test robot <lk...@in...> Signed-off-by: Phillip Lougher <ph...@sq...> Signed-off-by: Hsin-Yi Wang <hs...@ch...> --- fs/squashfs/Makefile | 4 ++-- fs/squashfs/page_actor.h | 41 ---------------------------------------- 2 files changed, 2 insertions(+), 43 deletions(-) diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 7bd9b8b856d0..477c89a519ee 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,9 +5,9 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o -squashfs-y += namei.o super.o symlink.o decompressor.o +squashfs-y += namei.o super.o symlink.o decompressor.o page_actor.o squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o -squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o +squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 2e3073ace009..26e07373af8a 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -6,46 +6,6 @@ * Phillip Lougher <ph...@sq...> */ -#ifndef CONFIG_SQUASHFS_FILE_DIRECT -struct squashfs_page_actor { - void **page; - int pages; - int length; - int next_page; -}; - -static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, - int pages, int length) -{ - struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); - - if (actor == NULL) - return NULL; - - actor->length = length ? : pages * PAGE_SIZE; - actor->page = page; - actor->pages = pages; - actor->next_page = 0; - return actor; -} - -static inline void *squashfs_first_page(struct squashfs_page_actor *actor) -{ - actor->next_page = 1; - return actor->page[0]; -} - -static inline void *squashfs_next_page(struct squashfs_page_actor *actor) -{ - return actor->next_page == actor->pages ? NULL : - actor->page[actor->next_page++]; -} - -static inline void squashfs_finish_page(struct squashfs_page_actor *actor) -{ - /* empty */ -} -#else struct squashfs_page_actor { union { void **buffer; @@ -76,4 +36,3 @@ static inline void squashfs_finish_page(struct squashfs_page_actor *actor) actor->squashfs_finish_page(actor); } #endif -#endif -- 2.36.1.124.g0e6072fb45-goog |
From: Hsin-Yi W. <hs...@ch...> - 2022-05-23 07:00:32
|
Implement readahead callback for squashfs. It will read datablocks which cover pages in readahead request. For a few cases it will not mark page as uptodate, including: - file end is 0. - zero filled blocks. - current batch of pages isn't in the same datablock or not enough in a datablock. - decompressor error. Otherwise pages will be marked as uptodate. The unhandled pages will be updated by readpage later. Suggested-by: Matthew Wilcox <wi...@in...> Signed-off-by: Hsin-Yi Wang <hs...@ch...> Reported-by: Matthew Wilcox <wi...@in...> Reported-by: Phillip Lougher <ph...@sq...> Reported-by: Xiongwei Song <Xio...@wi...> --- v2->v3: Add checks on - decompressed block size. - fill zeros if the last page is not a full page. v2: https://lore.kernel.org/lkml/202...@ch.../ v1: https://lore.kernel.org/lkml/202...@ch.../ --- fs/squashfs/file.c | 91 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index a8e495d8eb86..c311fc685fe4 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -39,6 +39,7 @@ #include "squashfs_fs_sb.h" #include "squashfs_fs_i.h" #include "squashfs.h" +#include "page_actor.h" /* * Locate cache slot in range [offset, index] for specified inode. If @@ -495,7 +496,95 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) return 0; } +static void squashfs_readahead(struct readahead_control *ractl) +{ + struct inode *inode = ractl->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + size_t mask = (1UL << msblk->block_log) - 1; + size_t shift = msblk->block_log - PAGE_SHIFT; + loff_t start = readahead_pos(ractl) &~ mask; + size_t len = readahead_length(ractl) + readahead_pos(ractl) - start; + struct squashfs_page_actor *actor; + unsigned int nr_pages = 0; + struct page **pages; + u64 block = 0; + int bsize, res, i, index, bytes, expected; + int file_end = i_size_read(inode) >> msblk->block_log; + unsigned int max_pages = 1UL << shift; + void *pageaddr; + + readahead_expand(ractl, start, (len | mask) + 1); + + if (file_end == 0) + return; + + pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL); + if (!pages) + return; + + actor = squashfs_page_actor_init_special(pages, max_pages, 0); + if (!actor) + goto out; + + for (;;) { + nr_pages = __readahead_batch(ractl, pages, max_pages); + if (!nr_pages) + break; + + if (readahead_pos(ractl) >= i_size_read(inode) || + nr_pages < max_pages) + goto skip_pages; + + index = pages[0]->index >> shift; + if ((pages[nr_pages - 1]->index >> shift) != index) + goto skip_pages; + + expected = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; + + bsize = read_blocklist(inode, index, &block); + if (bsize == 0) + goto skip_pages; + + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, + actor); + + if (res == expected) { + /* Last page may have trailing bytes not filled */ + bytes = res % PAGE_SIZE; + if (bytes) { + pageaddr = kmap_atomic(pages[nr_pages - 1]); + memset(pageaddr + bytes, 0, PAGE_SIZE - bytes); + kunmap_atomic(pageaddr); + } + + for (i = 0; i < nr_pages; i++) + SetPageUptodate(pages[i]); + } + + for (i = 0; i < nr_pages; i++) { + unlock_page(pages[i]); + put_page(pages[i]); + } + } + + kfree(actor); + kfree(pages); + return; + +skip_pages: + for (i = 0; i < nr_pages; i++) { + unlock_page(pages[i]); + put_page(pages[i]); + } + + kfree(actor); +out: + kfree(pages); +} const struct address_space_operations squashfs_aops = { - .read_folio = squashfs_read_folio + .read_folio = squashfs_read_folio, + .readahead = squashfs_readahead }; -- 2.36.1.124.g0e6072fb45-goog |
From: Hsin-Yi W. <hs...@ch...> - 2022-05-23 06:59:59
|
This reverts commit 9eec1d897139e5de287af5d559a02b811b844d82. Revert closing the readahead to squashfs since the readahead callback for squashfs is implemented. Suggested-by: Xiongwei Song <Xio...@wi...> Signed-off-by: Hsin-Yi Wang <hs...@ch...> --- fs/squashfs/super.c | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 6d594ba2ed28..32565dafa7f3 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -29,7 +29,6 @@ #include <linux/module.h> #include <linux/magic.h> #include <linux/xattr.h> -#include <linux/backing-dev.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -113,24 +112,6 @@ static const struct squashfs_decompressor *supported_squashfs_filesystem( return decompressor; } -static int squashfs_bdi_init(struct super_block *sb) -{ - int err; - unsigned int major = MAJOR(sb->s_dev); - unsigned int minor = MINOR(sb->s_dev); - - bdi_put(sb->s_bdi); - sb->s_bdi = &noop_backing_dev_info; - - err = super_setup_bdi_name(sb, "squashfs_%u_%u", major, minor); - if (err) - return err; - - sb->s_bdi->ra_pages = 0; - sb->s_bdi->io_pages = 0; - - return 0; -} static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) { @@ -146,20 +127,6 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) TRACE("Entered squashfs_fill_superblock\n"); - /* - * squashfs provides 'backing_dev_info' in order to disable read-ahead. For - * squashfs, I/O is not deferred, it is done immediately in read_folio, - * which means the user would always have to wait their own I/O. So the effect - * of readahead is very weak for squashfs. squashfs_bdi_init will set - * sb->s_bdi->ra_pages and sb->s_bdi->io_pages to 0 and close readahead for - * squashfs. - */ - err = squashfs_bdi_init(sb); - if (err) { - errorf(fc, "squashfs init bdi failed"); - return err; - } - sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL); if (sb->s_fs_info == NULL) { ERROR("Failed to allocate squashfs_sb_info\n"); -- 2.36.1.124.g0e6072fb45-goog |
From: Hsin-Yi W. <hs...@ch...> - 2022-05-23 06:59:47
|
Commit c1f6925e1091("mm: put readahead pages in cache earlier") requires fs to implement readahead callback. Otherwise there will be a performance regression. Commit 9eec1d897139("squashfs: provide backing_dev_info in order to disable read-ahead") mitigates the performance drop issue for squashfs by closing readahead for it. This series implements readahead callback for squashfs. The previous discussion are in [1] and [2]. [1] https://lore.kernel.org/all/CAJMQK-g9G6KQmH-V=BRG...@ma.../T/ [2] https://lore.kernel.org/linux-mm/Yn5...@ca.../t/#m4af4473b94f98a4996cb11756b633a07e5e059d1 Hsin-Yi Wang (2): Revert "squashfs: provide backing_dev_info in order to disable read-ahead" squashfs: implement readahead Phillip Lougher (1): squashfs: always build "file direct" version of page actor fs/squashfs/Makefile | 4 +- fs/squashfs/file.c | 91 +++++++++++++++++++++++++++++++++++++++- fs/squashfs/page_actor.h | 41 ------------------ fs/squashfs/super.c | 33 --------------- 4 files changed, 92 insertions(+), 77 deletions(-) -- 2.36.1.124.g0e6072fb45-goog |
From: Phillip L. <ph...@sq...> - 2022-05-20 20:22:13
|
On 20/05/2022 08:38, Hsin-Yi Wang wrote: > On Fri, May 20, 2022 at 11:02 AM Phillip Lougher > <ph...@sq...> wrote: >> >> On 19/05/2022 09:09, Hsin-Yi Wang wrote: >>> On Tue, May 17, 2022 at 4:28 PM Hsin-Yi Wang <hs...@ch...> wrote: >>>> >>>> Implement readahead callback for squashfs. It will read datablocks >>>> which cover pages in readahead request. For a few cases it will >>>> not mark page as uptodate, including: >>>> - file end is 0. >>>> - zero filled blocks. >>>> - current batch of pages isn't in the same datablock or not enough in a >>>> datablock. >>>> Otherwise pages will be marked as uptodate. The unhandled pages will be >>>> updated by readpage later. >>>> >>>> Suggested-by: Matthew Wilcox <wi...@in...> >>>> Signed-off-by: Hsin-Yi Wang <hs...@ch...> >>>> Reported-by: Matthew Wilcox <wi...@in...> >>>> Reported-by: Phillip Lougher <ph...@sq...> >>>> Reported-by: Xiongwei Song <sx...@gm...> >>>> --- >>>> v1->v2: remove unused check on readahead_expand(). >>>> v1: https://lore.kernel.org/lkml/202...@ch.../ >>>> --- >>> >>> Hi Phillip and Matthew, >>> >>> Regarding the performance issue of this patch, I saw a possible >>> performance gain if we only read the first block instead of reading >>> until nr_pages == 0. >>> >>> To be more clear, apply the following diff (Please ignore the skipping >>> of nr_pages check first. This is a demonstration of "only read and >>> update the first block per readahead call"): >>> >>> diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c >>> index aad6823f0615..c52f7c4a7cfe 100644 >>> --- a/fs/squashfs/file.c >>> +++ b/fs/squashfs/file.c >>> @@ -524,10 +524,8 @@ static void squashfs_readahead(struct >>> readahead_control *ractl) >>> if (!actor) >>> goto out; >>> >>> - for (;;) { >>> + { >>> nr_pages = __readahead_batch(ractl, pages, max_pages); >>> - if (!nr_pages) >>> - break; >>> >>> if (readahead_pos(ractl) >= i_size_read(inode) || >>> nr_pages < max_pages) >>> >>> >>> All the performance numbers: >>> 1. original: 39s >>> 2. revert "mm: put readahead pages in cache earlier": 2.8s >>> 3. v2 of this patch: 2.7s >>> 4. v2 of this patch and apply the diff: 1.8s >>> >>> In my testing data, normally it reads and updates 1~2 blocks per >>> readahead call. The change might not make sense since the performance >>> improvement may only happen in certain cases. >>> What do you think? Or is the performance of the current patch >>> considered reasonable? >> >> It entirely depends on where the speed improvement comes from. >> >> From experience, the speed improvement is probably worthwhile, >> and probably isn't gained at the expense of worse performance >> on other work-loads. >> >> But this is a guestimate, based on the fact timings 2 and 3 >> (2.8s v 2.7s) are almost identical. Which implies the v2 >> patch isn't now doing any more work than the previous >> baseline before the "mm: put readahead pages in cache earlier" >> patch (*). >> >> As such the speed improvement must be coming from increased >> parallelism. Such as moving from serially reading the >> readahead blocks to parallel reading. >> > Thanks for the idea. I checked this by offlining other cores until > only one core exists. Removing loops still results in less time. > > But after counting the #traces lines in squashfs_read_data(): > If we remove the for loop (timings 4), the logs are less: 2.3K lines, > while v2 (timings 3) has 3.7K (other timings are also around 3.7K), so > removing loop doesn't look right. If a lot less data is being read than the other timings, then this does look incorrect. > > I think v2 should be fine considering the slightly to none regression > compared to before. > The fact the timings are almost identical implies all that needs to be done to remove the performance regression has been done. There are two things missing from the patch which need to be handled. These are not related to performance but error handling and correctness. So I have waited until now to raise it. If you look at the code for file_direct.c::squashfs_readpage_block() https://elixir.bootlin.com/linux/latest/source/fs/squashfs/file_direct.c#L93 **** res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); if (res < 0) goto mark_errored; if (res != expected) { res = -EIO; goto mark_errored; } **** You will see that it checks for two return conditions from squashfs_read_data(). If the decompressor returns error, or if the decompressed block is different in size to that expected, then this is an error situation (e.g. corrupted filesystem), and the read is marked as bad. The current V2 patch doesn't check that the block decompressed to the correct size (res != expected), and this could mean filesystem corruption is not detected, which will be an error handling regression. Secondly, if you look at https://elixir.bootlin.com/linux/latest/source/fs/squashfs/file_direct.c#L102 **** /* Last page may have trailing bytes not filled */ bytes = res % PAGE_SIZE; if (bytes) { pageaddr = kmap_atomic(page[pages - 1]); memset(pageaddr + bytes, 0, PAGE_SIZE - bytes); kunmap_atomic(pageaddr); } **** The V2 patch is always reading max_pages, but at the end of a file the last page may not be a full page. This is because the last block may not be complete (i.e. it is only 126 Kbytes rather than the block_size of 128 Kbytes). This will leave part of the last page unfilled by the decompressor, and it should be zero filled, to avoid leaking data to user-space. Phillip > Hi Matthew, what do you think? Do you have other comments? If not, > should I send a v3 to change Xiongwei Song's email address or can you > help modify it? > > Thanks > >> But, without looking at any trace output, that is just a >> guestimate. >> >> Phillip >> >> (*) multiply decompressing the same blocks, which >> is the cause of the performance regression. >>> >>> Thanks. >>> >>> testing env: >>> - arm64 on kernel 5.10 >>> - data: ~ 300K pack file contains some android files >>> >>>> fs/squashfs/file.c | 77 +++++++++++++++++++++++++++++++++++++++++++++- >>>> 1 file changed, 76 insertions(+), 1 deletion(-) >>>> >>>> diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c >>>> index a8e495d8eb86..e10a55c5b1eb 100644 >>>> --- a/fs/squashfs/file.c >>>> +++ b/fs/squashfs/file.c >>>> @@ -39,6 +39,7 @@ >>>> #include "squashfs_fs_sb.h" >>>> #include "squashfs_fs_i.h" >>>> #include "squashfs.h" >>>> +#include "page_actor.h" >>>> >>>> /* >>>> * Locate cache slot in range [offset, index] for specified inode. If >>>> @@ -495,7 +496,81 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) >>>> return 0; >>>> } >>>> >>>> +static void squashfs_readahead(struct readahead_control *ractl) >>>> +{ >>>> + struct inode *inode = ractl->mapping->host; >>>> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; >>>> + size_t mask = (1UL << msblk->block_log) - 1; >>>> + size_t shift = msblk->block_log - PAGE_SHIFT; >>>> + loff_t start = readahead_pos(ractl) &~ mask; >>>> + size_t len = readahead_length(ractl) + readahead_pos(ractl) - start; >>>> + struct squashfs_page_actor *actor; >>>> + unsigned int nr_pages = 0; >>>> + struct page **pages; >>>> + u64 block = 0; >>>> + int bsize, res, i, index; >>>> + int file_end = i_size_read(inode) >> msblk->block_log; >>>> + unsigned int max_pages = 1UL << shift; >>>> + >>>> + readahead_expand(ractl, start, (len | mask) + 1); >>>> + >>>> + if (file_end == 0) >>>> + return; >>>> + >>>> + pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL); >>>> + if (!pages) >>>> + return; >>>> + >>>> + actor = squashfs_page_actor_init_special(pages, max_pages, 0); >>>> + if (!actor) >>>> + goto out; >>>> + >>>> + for (;;) { >>>> + nr_pages = __readahead_batch(ractl, pages, max_pages); >>>> + if (!nr_pages) >>>> + break; >>>> + >>>> + if (readahead_pos(ractl) >= i_size_read(inode) || >>>> + nr_pages < max_pages) >>>> + goto skip_pages; >>>> + >>>> + index = pages[0]->index >> shift; >>>> + if ((pages[nr_pages - 1]->index >> shift) != index) >>>> + goto skip_pages; >>>> + >>>> + bsize = read_blocklist(inode, index, &block); >>>> + if (bsize == 0) >>>> + goto skip_pages; >>>> + >>>> + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, >>>> + actor); >>>> + >>>> + if (res >= 0) >>>> + for (i = 0; i < nr_pages; i++) >>>> + SetPageUptodate(pages[i]); >>>> + >>>> + for (i = 0; i < nr_pages; i++) { >>>> + unlock_page(pages[i]); >>>> + put_page(pages[i]); >>>> + } >>>> + } >>>> + >>>> + kfree(actor); >>>> + kfree(pages); >>>> + return; >>>> + >>>> +skip_pages: >>>> + for (i = 0; i < nr_pages; i++) { >>>> + unlock_page(pages[i]); >>>> + put_page(pages[i]); >>>> + } >>>> + >>>> + kfree(actor); >>>> +out: >>>> + kfree(pages); >>>> +} >>>> >>>> const struct address_space_operations squashfs_aops = { >>>> - .read_folio = squashfs_read_folio >>>> + .read_folio = squashfs_read_folio, >>>> + .readahead = squashfs_readahead >>>> }; >>>> -- >>>> 2.36.0.550.gb090851708-goog >>>> >> |
From: Hsin-Yi W. <hs...@ch...> - 2022-05-20 07:38:40
|
On Fri, May 20, 2022 at 11:02 AM Phillip Lougher <ph...@sq...> wrote: > > On 19/05/2022 09:09, Hsin-Yi Wang wrote: > > On Tue, May 17, 2022 at 4:28 PM Hsin-Yi Wang <hs...@ch...> wrote: > >> > >> Implement readahead callback for squashfs. It will read datablocks > >> which cover pages in readahead request. For a few cases it will > >> not mark page as uptodate, including: > >> - file end is 0. > >> - zero filled blocks. > >> - current batch of pages isn't in the same datablock or not enough in a > >> datablock. > >> Otherwise pages will be marked as uptodate. The unhandled pages will be > >> updated by readpage later. > >> > >> Suggested-by: Matthew Wilcox <wi...@in...> > >> Signed-off-by: Hsin-Yi Wang <hs...@ch...> > >> Reported-by: Matthew Wilcox <wi...@in...> > >> Reported-by: Phillip Lougher <ph...@sq...> > >> Reported-by: Xiongwei Song <sx...@gm...> > >> --- > >> v1->v2: remove unused check on readahead_expand(). > >> v1: https://lore.kernel.org/lkml/202...@ch.../ > >> --- > > > > Hi Phillip and Matthew, > > > > Regarding the performance issue of this patch, I saw a possible > > performance gain if we only read the first block instead of reading > > until nr_pages == 0. > > > > To be more clear, apply the following diff (Please ignore the skipping > > of nr_pages check first. This is a demonstration of "only read and > > update the first block per readahead call"): > > > > diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c > > index aad6823f0615..c52f7c4a7cfe 100644 > > --- a/fs/squashfs/file.c > > +++ b/fs/squashfs/file.c > > @@ -524,10 +524,8 @@ static void squashfs_readahead(struct > > readahead_control *ractl) > > if (!actor) > > goto out; > > > > - for (;;) { > > + { > > nr_pages = __readahead_batch(ractl, pages, max_pages); > > - if (!nr_pages) > > - break; > > > > if (readahead_pos(ractl) >= i_size_read(inode) || > > nr_pages < max_pages) > > > > > > All the performance numbers: > > 1. original: 39s > > 2. revert "mm: put readahead pages in cache earlier": 2.8s > > 3. v2 of this patch: 2.7s > > 4. v2 of this patch and apply the diff: 1.8s > > > > In my testing data, normally it reads and updates 1~2 blocks per > > readahead call. The change might not make sense since the performance > > improvement may only happen in certain cases. > > What do you think? Or is the performance of the current patch > > considered reasonable? > > It entirely depends on where the speed improvement comes from. > > From experience, the speed improvement is probably worthwhile, > and probably isn't gained at the expense of worse performance > on other work-loads. > > But this is a guestimate, based on the fact timings 2 and 3 > (2.8s v 2.7s) are almost identical. Which implies the v2 > patch isn't now doing any more work than the previous > baseline before the "mm: put readahead pages in cache earlier" > patch (*). > > As such the speed improvement must be coming from increased > parallelism. Such as moving from serially reading the > readahead blocks to parallel reading. > Thanks for the idea. I checked this by offlining other cores until only one core exists. Removing loops still results in less time. But after counting the #traces lines in squashfs_read_data(): If we remove the for loop (timings 4), the logs are less: 2.3K lines, while v2 (timings 3) has 3.7K (other timings are also around 3.7K), so removing loop doesn't look right. I think v2 should be fine considering the slightly to none regression compared to before. Hi Matthew, what do you think? Do you have other comments? If not, should I send a v3 to change Xiongwei Song's email address or can you help modify it? Thanks > But, without looking at any trace output, that is just a > guestimate. > > Phillip > > (*) multiply decompressing the same blocks, which > is the cause of the performance regression. > > > > Thanks. > > > > testing env: > > - arm64 on kernel 5.10 > > - data: ~ 300K pack file contains some android files > > > >> fs/squashfs/file.c | 77 +++++++++++++++++++++++++++++++++++++++++++++- > >> 1 file changed, 76 insertions(+), 1 deletion(-) > >> > >> diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c > >> index a8e495d8eb86..e10a55c5b1eb 100644 > >> --- a/fs/squashfs/file.c > >> +++ b/fs/squashfs/file.c > >> @@ -39,6 +39,7 @@ > >> #include "squashfs_fs_sb.h" > >> #include "squashfs_fs_i.h" > >> #include "squashfs.h" > >> +#include "page_actor.h" > >> > >> /* > >> * Locate cache slot in range [offset, index] for specified inode. If > >> @@ -495,7 +496,81 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) > >> return 0; > >> } > >> > >> +static void squashfs_readahead(struct readahead_control *ractl) > >> +{ > >> + struct inode *inode = ractl->mapping->host; > >> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; > >> + size_t mask = (1UL << msblk->block_log) - 1; > >> + size_t shift = msblk->block_log - PAGE_SHIFT; > >> + loff_t start = readahead_pos(ractl) &~ mask; > >> + size_t len = readahead_length(ractl) + readahead_pos(ractl) - start; > >> + struct squashfs_page_actor *actor; > >> + unsigned int nr_pages = 0; > >> + struct page **pages; > >> + u64 block = 0; > >> + int bsize, res, i, index; > >> + int file_end = i_size_read(inode) >> msblk->block_log; > >> + unsigned int max_pages = 1UL << shift; > >> + > >> + readahead_expand(ractl, start, (len | mask) + 1); > >> + > >> + if (file_end == 0) > >> + return; > >> + > >> + pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL); > >> + if (!pages) > >> + return; > >> + > >> + actor = squashfs_page_actor_init_special(pages, max_pages, 0); > >> + if (!actor) > >> + goto out; > >> + > >> + for (;;) { > >> + nr_pages = __readahead_batch(ractl, pages, max_pages); > >> + if (!nr_pages) > >> + break; > >> + > >> + if (readahead_pos(ractl) >= i_size_read(inode) || > >> + nr_pages < max_pages) > >> + goto skip_pages; > >> + > >> + index = pages[0]->index >> shift; > >> + if ((pages[nr_pages - 1]->index >> shift) != index) > >> + goto skip_pages; > >> + > >> + bsize = read_blocklist(inode, index, &block); > >> + if (bsize == 0) > >> + goto skip_pages; > >> + > >> + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, > >> + actor); > >> + > >> + if (res >= 0) > >> + for (i = 0; i < nr_pages; i++) > >> + SetPageUptodate(pages[i]); > >> + > >> + for (i = 0; i < nr_pages; i++) { > >> + unlock_page(pages[i]); > >> + put_page(pages[i]); > >> + } > >> + } > >> + > >> + kfree(actor); > >> + kfree(pages); > >> + return; > >> + > >> +skip_pages: > >> + for (i = 0; i < nr_pages; i++) { > >> + unlock_page(pages[i]); > >> + put_page(pages[i]); > >> + } > >> + > >> + kfree(actor); > >> +out: > >> + kfree(pages); > >> +} > >> > >> const struct address_space_operations squashfs_aops = { > >> - .read_folio = squashfs_read_folio > >> + .read_folio = squashfs_read_folio, > >> + .readahead = squashfs_readahead > >> }; > >> -- > >> 2.36.0.550.gb090851708-goog > >> > |
From: Phillip L. <ph...@sq...> - 2022-05-20 03:02:26
|
On 19/05/2022 09:09, Hsin-Yi Wang wrote: > On Tue, May 17, 2022 at 4:28 PM Hsin-Yi Wang <hs...@ch...> wrote: >> >> Implement readahead callback for squashfs. It will read datablocks >> which cover pages in readahead request. For a few cases it will >> not mark page as uptodate, including: >> - file end is 0. >> - zero filled blocks. >> - current batch of pages isn't in the same datablock or not enough in a >> datablock. >> Otherwise pages will be marked as uptodate. The unhandled pages will be >> updated by readpage later. >> >> Suggested-by: Matthew Wilcox <wi...@in...> >> Signed-off-by: Hsin-Yi Wang <hs...@ch...> >> Reported-by: Matthew Wilcox <wi...@in...> >> Reported-by: Phillip Lougher <ph...@sq...> >> Reported-by: Xiongwei Song <sx...@gm...> >> --- >> v1->v2: remove unused check on readahead_expand(). >> v1: https://lore.kernel.org/lkml/202...@ch.../ >> --- > > Hi Phillip and Matthew, > > Regarding the performance issue of this patch, I saw a possible > performance gain if we only read the first block instead of reading > until nr_pages == 0. > > To be more clear, apply the following diff (Please ignore the skipping > of nr_pages check first. This is a demonstration of "only read and > update the first block per readahead call"): > > diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c > index aad6823f0615..c52f7c4a7cfe 100644 > --- a/fs/squashfs/file.c > +++ b/fs/squashfs/file.c > @@ -524,10 +524,8 @@ static void squashfs_readahead(struct > readahead_control *ractl) > if (!actor) > goto out; > > - for (;;) { > + { > nr_pages = __readahead_batch(ractl, pages, max_pages); > - if (!nr_pages) > - break; > > if (readahead_pos(ractl) >= i_size_read(inode) || > nr_pages < max_pages) > > > All the performance numbers: > 1. original: 39s > 2. revert "mm: put readahead pages in cache earlier": 2.8s > 3. v2 of this patch: 2.7s > 4. v2 of this patch and apply the diff: 1.8s > > In my testing data, normally it reads and updates 1~2 blocks per > readahead call. The change might not make sense since the performance > improvement may only happen in certain cases. > What do you think? Or is the performance of the current patch > considered reasonable? It entirely depends on where the speed improvement comes from. From experience, the speed improvement is probably worthwhile, and probably isn't gained at the expense of worse performance on other work-loads. But this is a guestimate, based on the fact timings 2 and 3 (2.8s v 2.7s) are almost identical. Which implies the v2 patch isn't now doing any more work than the previous baseline before the "mm: put readahead pages in cache earlier" patch (*). As such the speed improvement must be coming from increased parallelism. Such as moving from serially reading the readahead blocks to parallel reading. But, without looking at any trace output, that is just a guestimate. Phillip (*) multiply decompressing the same blocks, which is the cause of the performance regression. > > Thanks. > > testing env: > - arm64 on kernel 5.10 > - data: ~ 300K pack file contains some android files > >> fs/squashfs/file.c | 77 +++++++++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 76 insertions(+), 1 deletion(-) >> >> diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c >> index a8e495d8eb86..e10a55c5b1eb 100644 >> --- a/fs/squashfs/file.c >> +++ b/fs/squashfs/file.c >> @@ -39,6 +39,7 @@ >> #include "squashfs_fs_sb.h" >> #include "squashfs_fs_i.h" >> #include "squashfs.h" >> +#include "page_actor.h" >> >> /* >> * Locate cache slot in range [offset, index] for specified inode. If >> @@ -495,7 +496,81 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) >> return 0; >> } >> >> +static void squashfs_readahead(struct readahead_control *ractl) >> +{ >> + struct inode *inode = ractl->mapping->host; >> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; >> + size_t mask = (1UL << msblk->block_log) - 1; >> + size_t shift = msblk->block_log - PAGE_SHIFT; >> + loff_t start = readahead_pos(ractl) &~ mask; >> + size_t len = readahead_length(ractl) + readahead_pos(ractl) - start; >> + struct squashfs_page_actor *actor; >> + unsigned int nr_pages = 0; >> + struct page **pages; >> + u64 block = 0; >> + int bsize, res, i, index; >> + int file_end = i_size_read(inode) >> msblk->block_log; >> + unsigned int max_pages = 1UL << shift; >> + >> + readahead_expand(ractl, start, (len | mask) + 1); >> + >> + if (file_end == 0) >> + return; >> + >> + pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL); >> + if (!pages) >> + return; >> + >> + actor = squashfs_page_actor_init_special(pages, max_pages, 0); >> + if (!actor) >> + goto out; >> + >> + for (;;) { >> + nr_pages = __readahead_batch(ractl, pages, max_pages); >> + if (!nr_pages) >> + break; >> + >> + if (readahead_pos(ractl) >= i_size_read(inode) || >> + nr_pages < max_pages) >> + goto skip_pages; >> + >> + index = pages[0]->index >> shift; >> + if ((pages[nr_pages - 1]->index >> shift) != index) >> + goto skip_pages; >> + >> + bsize = read_blocklist(inode, index, &block); >> + if (bsize == 0) >> + goto skip_pages; >> + >> + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, >> + actor); >> + >> + if (res >= 0) >> + for (i = 0; i < nr_pages; i++) >> + SetPageUptodate(pages[i]); >> + >> + for (i = 0; i < nr_pages; i++) { >> + unlock_page(pages[i]); >> + put_page(pages[i]); >> + } >> + } >> + >> + kfree(actor); >> + kfree(pages); >> + return; >> + >> +skip_pages: >> + for (i = 0; i < nr_pages; i++) { >> + unlock_page(pages[i]); >> + put_page(pages[i]); >> + } >> + >> + kfree(actor); >> +out: >> + kfree(pages); >> +} >> >> const struct address_space_operations squashfs_aops = { >> - .read_folio = squashfs_read_folio >> + .read_folio = squashfs_read_folio, >> + .readahead = squashfs_readahead >> }; >> -- >> 2.36.0.550.gb090851708-goog >> |