Changes since last update:

- Micro-optimize superblock checksum;
 
  - Avoid overly large bvecs[] for file-backed mounts;
 
  - Some leftover folio conversion in z_erofs_bind_cache();
 
  - Minor bugfixes and cleanups.
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEQ0A6bDUS9Y+83NPFUXZn5Zlu5qoFAmeVA7kRHHhpYW5nQGtl
 cm5lbC5vcmcACgkQUXZn5Zlu5qqhtA/7BH6b8r5vuyIT8i63rxQA0Wkf45norCQL
 xi8Lvcp3wd+7XPJB3ifmKHyd5Yr8X/7z7KPcwY8eJYUcUYwzzlM95Vdh/gYdHYTW
 qqLZ+CCxmAnE5/0IxWuvQ/SWorj+e29mSxYOZB0MFbuGX87uX4xkeBWr5uE36X56
 AVlBvNTKhqvFnifW7a7QEUjgbUA+YC+WXnnKJWI8fysHY8Hs+kf7JE0boTaFsClb
 VQhdn3YERDvdjh3JzUbjTA3OETwrQ6416j+stN6ExIazy1j3kmNBPtCkgNZ3ja6/
 0ye6SSo5dVTwM/jI26rz10Ge4wlTYlSNvTwocH3iZAb73b8T+S5cvCqPv4NFOAKs
 aQc5gY+lv8hQf8r088BX4HqnDgdYSJKZwpo8sfHJ6RET/fAQItYcwWLi3GB8+JZB
 coZm7FQp5x9xvF0JzKT3X0WSFnzsTzm3pIt/Xgw0hUYo92t6V5hnaHH7EEn9LSBM
 6c/JLX1VU5wwJx4UP1qiOMr3awJANISDm6ogZ55yiNmoPeVM+yTHFcRfwehVkQHm
 2H8vGyhvWpq++v1hhesb7YB3xQx6BcpOoI1r4t11ISZraY0MTEKW4Up4eWSXHADP
 lB3h1xSI6TpkQ8FD/AzXA7tQjoB+nc15VPbXhSowRPQ5C3i0rl4aXINRTkl4TUZh
 Kje7IFTbyT8=
 =kjU1
 -----END PGP SIGNATURE-----

Merge tag 'erofs-for-6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
 "Still no new features for this cycle, as some ongoing improvements
  remain premature for now.

  This includes a micro-optimization for the superblock checksum, along
  with minor bugfixes and code cleanups, as usual:

   - Micro-optimize superblock checksum

   - Avoid overly large bvecs[] for file-backed mounts

   - Some leftover folio conversion in z_erofs_bind_cache()

   - Minor bugfixes and cleanups"

* tag 'erofs-for-6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: refine z_erofs_get_extent_compressedlen()
  erofs: remove dead code in erofs_fc_parse_param
  erofs: return SHRINK_EMPTY if no objects to free
  erofs: convert z_erofs_bind_cache() to folios
  erofs: tidy up zdata.c
  erofs: get rid of `z_erofs_next_pcluster_t`
  erofs: simplify z_erofs_load_compact_lcluster()
  erofs: fix potential return value overflow of z_erofs_shrink_scan()
  erofs: shorten bvecs[] for file-backed mounts
  erofs: micro-optimize superblock checksum
  fs: erofs: xattr.c change kzalloc to kcalloc
This commit is contained in:
Linus Torvalds 2025-01-25 20:03:04 -08:00
commit c2da8b3f91
8 changed files with 162 additions and 272 deletions

View File

@ -29,29 +29,8 @@ struct z_erofs_decompressor {
char *name;
};
/* some special page->private (unsigned long, see below) */
#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
#define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2)
/*
* For all pages in a pcluster, page->private should be one of
* Type Last 2bits page->private
* short-lived page 00 Z_EROFS_SHORTLIVED_PAGE
* preallocated page (tryalloc) 00 Z_EROFS_PREALLOCATED_PAGE
* cached/managed page 00 pointer to z_erofs_pcluster
* online page (file-backed, 01/10/11 sub-index << 2 | count
* some pages can be used for inplace I/O)
*
* page->mapping should be one of
* Type page->mapping
* short-lived page NULL
* preallocated page NULL
* cached/managed page non-NULL or NULL (invalidated/truncated page)
* online page non-NULL
*
* For all managed pages, PG_private should be set with 1 extra refcount,
* which is used for page reclaim / migration.
*/
#define Z_EROFS_PREALLOCATED_FOLIO ((void *)(-2UL << 2))
/*
* Currently, short-lived pages are pages directly from buddy system

View File

@ -9,6 +9,7 @@
#ifndef __EROFS_FS_H
#define __EROFS_FS_H
/* to allow for x86 boot sectors and other oddities. */
#define EROFS_SUPER_OFFSET 1024
#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
@ -54,7 +55,7 @@ struct erofs_deviceslot {
/* erofs on-disk super block (currently 128 bytes) */
struct erofs_super_block {
__le32 magic; /* file system magic number */
__le32 checksum; /* crc32c(super_block) */
__le32 checksum; /* crc32c to avoid unexpected on-disk overlap */
__le32 feature_compat;
__u8 blkszbits; /* filesystem block size in bit shift */
__u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */

View File

@ -6,7 +6,7 @@
#include <trace/events/erofs.h>
struct erofs_fileio_rq {
struct bio_vec bvecs[BIO_MAX_VECS];
struct bio_vec bvecs[16];
struct bio bio;
struct kiocb iocb;
struct super_block *sb;
@ -68,7 +68,7 @@ static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev)
struct erofs_fileio_rq *rq = kzalloc(sizeof(*rq),
GFP_KERNEL | __GFP_NOFAIL);
bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ);
bio_init(&rq->bio, NULL, rq->bvecs, ARRAY_SIZE(rq->bvecs), REQ_OP_READ);
rq->iocb.ki_filp = mdev->m_dif->file;
rq->sb = mdev->m_sb;
return rq;

View File

@ -39,29 +39,21 @@ void _erofs_printk(struct super_block *sb, const char *fmt, ...)
static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata)
{
size_t len = 1 << EROFS_SB(sb)->blkszbits;
struct erofs_super_block *dsb;
u32 expected_crc, crc;
struct erofs_super_block *dsb = sbdata + EROFS_SUPER_OFFSET;
u32 len = 1 << EROFS_SB(sb)->blkszbits, crc;
if (len > EROFS_SUPER_OFFSET)
len -= EROFS_SUPER_OFFSET;
len -= offsetof(struct erofs_super_block, checksum) +
sizeof(dsb->checksum);
dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, len, GFP_KERNEL);
if (!dsb)
return -ENOMEM;
expected_crc = le32_to_cpu(dsb->checksum);
dsb->checksum = 0;
/* to allow for x86 boot sectors and other oddities. */
crc = crc32c(~0, dsb, len);
kfree(dsb);
if (crc != expected_crc) {
erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected",
crc, expected_crc);
return -EBADMSG;
}
return 0;
/* skip .magic(pre-verified) and .checksum(0) fields */
crc = crc32c(0x5045B54A, (&dsb->checksum) + 1, len);
if (crc == le32_to_cpu(dsb->checksum))
return 0;
erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected",
crc, le32_to_cpu(dsb->checksum));
return -EBADMSG;
}
static void erofs_inode_init_once(void *ptr)
@ -516,8 +508,6 @@ static int erofs_fc_parse_param(struct fs_context *fc,
errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
#endif
break;
default:
return -ENOPARAM;
}
return 0;
}

View File

@ -478,7 +478,7 @@ int erofs_xattr_prefixes_init(struct super_block *sb)
if (!sbi->xattr_prefix_count)
return 0;
pfs = kzalloc(sbi->xattr_prefix_count * sizeof(*pfs), GFP_KERNEL);
pfs = kcalloc(sbi->xattr_prefix_count, sizeof(*pfs), GFP_KERNEL);
if (!pfs)
return -ENOMEM;

View File

@ -12,12 +12,6 @@
#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
#define Z_EROFS_INLINE_BVECS 2
/*
* let's leave a type here in case of introducing
* another tagged pointer later.
*/
typedef void *z_erofs_next_pcluster_t;
struct z_erofs_bvec {
struct page *page;
int offset;
@ -48,7 +42,7 @@ struct z_erofs_pcluster {
struct lockref lockref;
/* A: point to next chained pcluster or TAILs */
z_erofs_next_pcluster_t next;
struct z_erofs_pcluster *next;
/* I: start block address of this pcluster */
erofs_off_t index;
@ -94,12 +88,11 @@ struct z_erofs_pcluster {
/* the end of a chain of pclusters */
#define Z_EROFS_PCLUSTER_TAIL ((void *) 0x700 + POISON_POINTER_DELTA)
#define Z_EROFS_PCLUSTER_NIL (NULL)
struct z_erofs_decompressqueue {
struct super_block *sb;
struct z_erofs_pcluster *head;
atomic_t pending_bios;
z_erofs_next_pcluster_t head;
union {
struct completion done;
@ -462,39 +455,32 @@ err_decompressor:
}
enum z_erofs_pclustermode {
/* It has previously been linked into another processing chain */
Z_EROFS_PCLUSTER_INFLIGHT,
/*
* a weak form of Z_EROFS_PCLUSTER_FOLLOWED, the difference is that it
* could be dispatched into bypass queue later due to uptodated managed
* pages. All related online pages cannot be reused for inplace I/O (or
* bvpage) since it can be directly decoded without I/O submission.
* A weaker form of Z_EROFS_PCLUSTER_FOLLOWED; the difference is that it
* may be dispatched to the bypass queue later due to uptodated managed
* folios. All file-backed folios related to this pcluster cannot be
* reused for in-place I/O (or bvpage) since the pcluster may be decoded
* in a separate queue (and thus out of order).
*/
Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE,
/*
* The pcluster was just linked to a decompression chain by us. It can
* also be linked with the remaining pclusters, which means if the
* processing page is the tail page of a pcluster, this pcluster can
* safely use the whole page (since the previous pcluster is within the
* same chain) for in-place I/O, as illustrated below:
* ___________________________________________________
* | tail (partial) page | head (partial) page |
* | (of the current pcl) | (of the previous pcl) |
* |___PCLUSTER_FOLLOWED___|_____PCLUSTER_FOLLOWED_____|
*
* [ (*) the page above can be used as inplace I/O. ]
* The pcluster has just been linked to our processing chain.
* File-backed folios (except for the head page) related to it can be
* used for in-place I/O (or bvpage).
*/
Z_EROFS_PCLUSTER_FOLLOWED,
};
struct z_erofs_decompress_frontend {
struct z_erofs_frontend {
struct inode *const inode;
struct erofs_map_blocks map;
struct z_erofs_bvec_iter biter;
struct page *pagepool;
struct page *candidate_bvpage;
struct z_erofs_pcluster *pcl;
z_erofs_next_pcluster_t owned_head;
struct z_erofs_pcluster *pcl, *head;
enum z_erofs_pclustermode mode;
erofs_off_t headoffset;
@ -503,11 +489,11 @@ struct z_erofs_decompress_frontend {
unsigned int icur;
};
#define DECOMPRESS_FRONTEND_INIT(__i) { \
.inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
.mode = Z_EROFS_PCLUSTER_FOLLOWED }
#define Z_EROFS_DEFINE_FRONTEND(fe, i, ho) struct z_erofs_frontend fe = { \
.inode = i, .head = Z_EROFS_PCLUSTER_TAIL, \
.mode = Z_EROFS_PCLUSTER_FOLLOWED, .headoffset = ho }
static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
static bool z_erofs_should_alloc_cache(struct z_erofs_frontend *fe)
{
unsigned int cachestrategy = EROFS_I_SB(fe->inode)->opt.cache_strategy;
@ -524,19 +510,17 @@ static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
return false;
}
static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
{
struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode));
struct z_erofs_pcluster *pcl = fe->pcl;
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
bool shouldalloc = z_erofs_should_alloc_cache(fe);
bool standalone = true;
/*
* optimistic allocation without direct reclaim since inplace I/O
* can be used if low memory otherwise.
*/
bool may_bypass = true;
/* Optimistic allocation, as in-place I/O can be used as a fallback */
gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
struct folio *folio, *newfolio;
unsigned int i;
if (i_blocksize(fe->inode) != PAGE_SIZE ||
@ -544,47 +528,42 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
return;
for (i = 0; i < pclusterpages; ++i) {
struct page *page, *newpage;
/* Inaccurate check w/o locking to avoid unneeded lookups */
if (READ_ONCE(pcl->compressed_bvecs[i].page))
continue;
page = find_get_page(mc, pcl->index + i);
if (!page) {
/* I/O is needed, no possible to decompress directly */
standalone = false;
folio = filemap_get_folio(mc, pcl->index + i);
if (IS_ERR(folio)) {
may_bypass = false;
if (!shouldalloc)
continue;
/*
* Try cached I/O if allocation succeeds or fallback to
* in-place I/O instead to avoid any direct reclaim.
* Allocate a managed folio for cached I/O, or it may be
* then filled with a file-backed folio for in-place I/O
*/
newpage = erofs_allocpage(&fe->pagepool, gfp);
if (!newpage)
newfolio = filemap_alloc_folio(gfp, 0);
if (!newfolio)
continue;
set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
newfolio->private = Z_EROFS_PREALLOCATED_FOLIO;
folio = NULL;
}
spin_lock(&pcl->lockref.lock);
if (!pcl->compressed_bvecs[i].page) {
pcl->compressed_bvecs[i].page = page ? page : newpage;
pcl->compressed_bvecs[i].page =
folio_page(folio ?: newfolio, 0);
spin_unlock(&pcl->lockref.lock);
continue;
}
spin_unlock(&pcl->lockref.lock);
if (page)
put_page(page);
else if (newpage)
erofs_pagepool_add(&fe->pagepool, newpage);
folio_put(folio ?: newfolio);
}
/*
* don't do inplace I/O if all compressed pages are available in
* managed cache since it can be moved to the bypass queue instead.
* Don't perform in-place I/O if all compressed pages are available in
* the managed cache, as the pcluster can be moved to the bypass queue.
*/
if (standalone)
if (may_bypass)
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
}
@ -681,7 +660,7 @@ int erofs_init_managed_cache(struct super_block *sb)
}
/* callers must be with pcluster lock held */
static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
static int z_erofs_attach_page(struct z_erofs_frontend *fe,
struct z_erofs_bvec *bvec, bool exclusive)
{
struct z_erofs_pcluster *pcl = fe->pcl;
@ -727,7 +706,7 @@ static bool z_erofs_get_pcluster(struct z_erofs_pcluster *pcl)
return true;
}
static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
{
struct erofs_map_blocks *map = &fe->map;
struct super_block *sb = fe->inode->i_sb;
@ -751,9 +730,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
pcl->algorithmformat = map->m_algorithmformat;
pcl->length = 0;
pcl->partial = true;
/* new pclusters should be claimed as type 1, primary and followed */
pcl->next = fe->owned_head;
pcl->next = fe->head;
pcl->pageofs_out = map->m_la & ~PAGE_MASK;
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
@ -789,8 +766,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
goto err_out;
}
}
fe->owned_head = &pcl->next;
fe->pcl = pcl;
fe->head = fe->pcl = pcl;
return 0;
err_out:
@ -799,7 +775,7 @@ err_out:
return err;
}
static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
{
struct erofs_map_blocks *map = &fe->map;
struct super_block *sb = fe->inode->i_sb;
@ -809,7 +785,7 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
DBG_BUGON(fe->pcl);
/* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous pcluster */
DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
DBG_BUGON(!fe->head);
if (!(map->m_flags & EROFS_MAP_META)) {
while (1) {
@ -837,10 +813,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
if (ret == -EEXIST) {
mutex_lock(&fe->pcl->lock);
/* check if this pcluster hasn't been linked into any chain. */
if (cmpxchg(&fe->pcl->next, Z_EROFS_PCLUSTER_NIL,
fe->owned_head) == Z_EROFS_PCLUSTER_NIL) {
if (!cmpxchg(&fe->pcl->next, NULL, fe->head)) {
/* .. so it can be attached to our submission chain */
fe->owned_head = &fe->pcl->next;
fe->head = fe->pcl;
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
} else { /* otherwise, it belongs to an inflight chain */
fe->mode = Z_EROFS_PCLUSTER_INFLIGHT;
@ -873,14 +848,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
return 0;
}
/*
* keep in mind that no referenced pclusters will be freed
* only after a RCU grace period.
*/
static void z_erofs_rcu_callback(struct rcu_head *head)
{
z_erofs_free_pcluster(container_of(head,
struct z_erofs_pcluster, rcu));
z_erofs_free_pcluster(container_of(head, struct z_erofs_pcluster, rcu));
}
static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
@ -922,12 +892,10 @@ static bool erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
return free;
}
unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi,
unsigned long nr_shrink)
unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi, unsigned long nr)
{
struct z_erofs_pcluster *pcl;
unsigned int freed = 0;
unsigned long index;
unsigned long index, freed = 0;
xa_lock(&sbi->managed_pslots);
xa_for_each(&sbi->managed_pslots, index, pcl) {
@ -937,7 +905,7 @@ unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi,
xa_unlock(&sbi->managed_pslots);
++freed;
if (!--nr_shrink)
if (!--nr)
return freed;
xa_lock(&sbi->managed_pslots);
}
@ -966,7 +934,7 @@ static void z_erofs_put_pcluster(struct erofs_sb_info *sbi,
call_rcu(&pcl->rcu, z_erofs_rcu_callback);
}
static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
static void z_erofs_pcluster_end(struct z_erofs_frontend *fe)
{
struct z_erofs_pcluster *pcl = fe->pcl;
@ -979,13 +947,9 @@ static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
if (fe->candidate_bvpage)
fe->candidate_bvpage = NULL;
/*
* if all pending pages are added, don't hold its reference
* any longer if the pcluster isn't hosted by ourselves.
*/
/* Drop refcount if it doesn't belong to our processing chain */
if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE)
z_erofs_put_pcluster(EROFS_I_SB(fe->inode), pcl, false);
fe->pcl = NULL;
}
@ -1014,7 +978,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct folio *folio,
return 0;
}
static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *f,
static int z_erofs_scan_folio(struct z_erofs_frontend *f,
struct folio *folio, bool ra)
{
struct inode *const inode = f->inode;
@ -1129,7 +1093,7 @@ static bool z_erofs_page_is_invalidated(struct page *page)
return !page_folio(page)->mapping && !z_erofs_is_shortlived_page(page);
}
struct z_erofs_decompress_backend {
struct z_erofs_backend {
struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES];
struct super_block *sb;
struct z_erofs_pcluster *pcl;
@ -1149,7 +1113,7 @@ struct z_erofs_bvec_item {
struct list_head list;
};
static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
static void z_erofs_do_decompressed_bvec(struct z_erofs_backend *be,
struct z_erofs_bvec *bvec)
{
struct z_erofs_bvec_item *item;
@ -1172,8 +1136,7 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
list_add(&item->list, &be->decompressed_secondary_bvecs);
}
static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
int err)
static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err)
{
unsigned int off0 = be->pcl->pageofs_out;
struct list_head *p, *n;
@ -1214,7 +1177,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
}
}
static void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be)
static void z_erofs_parse_out_bvecs(struct z_erofs_backend *be)
{
struct z_erofs_pcluster *pcl = be->pcl;
struct z_erofs_bvec_iter biter;
@ -1239,8 +1202,7 @@ static void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be)
z_erofs_put_shortlivedpage(be->pagepool, old_bvpage);
}
static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
bool *overlapped)
static int z_erofs_parse_in_bvecs(struct z_erofs_backend *be, bool *overlapped)
{
struct z_erofs_pcluster *pcl = be->pcl;
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
@ -1275,8 +1237,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
return err;
}
static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
int err)
static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
{
struct erofs_sb_info *const sbi = EROFS_SB(be->sb);
struct z_erofs_pcluster *pcl = be->pcl;
@ -1393,7 +1354,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
pcl->vcnt = 0;
/* pcluster lock MUST be taken before the following line */
WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL);
WRITE_ONCE(pcl->next, NULL);
mutex_unlock(&pcl->lock);
if (z_erofs_is_inline_pcluster(pcl))
@ -1406,21 +1367,19 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
static int z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
struct page **pagepool)
{
struct z_erofs_decompress_backend be = {
struct z_erofs_backend be = {
.sb = io->sb,
.pagepool = pagepool,
.decompressed_secondary_bvecs =
LIST_HEAD_INIT(be.decompressed_secondary_bvecs),
.pcl = io->head,
};
z_erofs_next_pcluster_t owned = io->head;
struct z_erofs_pcluster *next;
int err = io->eio ? -EIO : 0;
while (owned != Z_EROFS_PCLUSTER_TAIL) {
DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL);
be.pcl = container_of(owned, struct z_erofs_pcluster, next);
owned = READ_ONCE(be.pcl->next);
for (; be.pcl != Z_EROFS_PCLUSTER_TAIL; be.pcl = next) {
DBG_BUGON(!be.pcl);
next = READ_ONCE(be.pcl->next);
err = z_erofs_decompress_pcluster(&be, err) ?: err;
}
return err;
@ -1486,7 +1445,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
}
static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
struct z_erofs_decompress_frontend *f,
struct z_erofs_frontend *f,
struct z_erofs_pcluster *pcl,
unsigned int nr,
struct address_space *mc)
@ -1513,12 +1472,8 @@ repeat:
DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page));
folio = page_folio(zbv.page);
/*
* Handle preallocated cached folios. We tried to allocate such folios
* without triggering direct reclaim. If allocation failed, inplace
* file-backed folios will be used instead.
*/
if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
/* For preallocated managed folios, add them to page cache here */
if (folio->private == Z_EROFS_PREALLOCATED_FOLIO) {
tocache = true;
goto out_tocache;
}
@ -1630,18 +1585,13 @@ enum {
NR_JOBQUEUES,
};
static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
z_erofs_next_pcluster_t qtail[],
z_erofs_next_pcluster_t owned_head)
static void z_erofs_move_to_bypass_queue(struct z_erofs_pcluster *pcl,
struct z_erofs_pcluster *next,
struct z_erofs_pcluster **qtail[])
{
z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT];
z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS];
WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL);
WRITE_ONCE(*submit_qtail, owned_head);
WRITE_ONCE(*bypass_qtail, &pcl->next);
WRITE_ONCE(*qtail[JQ_SUBMIT], next);
WRITE_ONCE(*qtail[JQ_BYPASS], pcl);
qtail[JQ_BYPASS] = &pcl->next;
}
@ -1670,15 +1620,15 @@ static void z_erofs_endio(struct bio *bio)
bio_put(bio);
}
static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
static void z_erofs_submit_queue(struct z_erofs_frontend *f,
struct z_erofs_decompressqueue *fgq,
bool *force_fg, bool readahead)
{
struct super_block *sb = f->inode->i_sb;
struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb));
z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
struct z_erofs_pcluster **qtail[NR_JOBQUEUES];
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
z_erofs_next_pcluster_t owned_head = f->owned_head;
struct z_erofs_pcluster *pcl, *next;
/* bio is NULL initially, so no need to initialize last_{index,bdev} */
erofs_off_t last_pa;
unsigned int nr_bios = 0;
@ -1694,22 +1644,19 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head;
/* by default, all need io submission */
q[JQ_SUBMIT]->head = owned_head;
q[JQ_SUBMIT]->head = next = f->head;
do {
struct erofs_map_dev mdev;
struct z_erofs_pcluster *pcl;
erofs_off_t cur, end;
struct bio_vec bvec;
unsigned int i = 0;
bool bypass = true;
DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL);
pcl = container_of(owned_head, struct z_erofs_pcluster, next);
owned_head = READ_ONCE(pcl->next);
pcl = next;
next = READ_ONCE(pcl->next);
if (z_erofs_is_inline_pcluster(pcl)) {
move_to_bypass_jobqueue(pcl, qtail, owned_head);
z_erofs_move_to_bypass_queue(pcl, next, qtail);
continue;
}
@ -1781,8 +1728,8 @@ drain_io:
if (!bypass)
qtail[JQ_SUBMIT] = &pcl->next;
else
move_to_bypass_jobqueue(pcl, qtail, owned_head);
} while (owned_head != Z_EROFS_PCLUSTER_TAIL);
z_erofs_move_to_bypass_queue(pcl, next, qtail);
} while (next != Z_EROFS_PCLUSTER_TAIL);
if (bio) {
if (erofs_is_fileio_mode(EROFS_SB(sb)))
@ -1806,17 +1753,16 @@ drain_io:
z_erofs_decompress_kickoff(q[JQ_SUBMIT], nr_bios);
}
static int z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
unsigned int ra_folios)
static int z_erofs_runqueue(struct z_erofs_frontend *f, unsigned int rapages)
{
struct z_erofs_decompressqueue io[NR_JOBQUEUES];
struct erofs_sb_info *sbi = EROFS_I_SB(f->inode);
bool force_fg = z_erofs_is_sync_decompress(sbi, ra_folios);
bool force_fg = z_erofs_is_sync_decompress(sbi, rapages);
int err;
if (f->owned_head == Z_EROFS_PCLUSTER_TAIL)
if (f->head == Z_EROFS_PCLUSTER_TAIL)
return 0;
z_erofs_submit_queue(f, io, &force_fg, !!ra_folios);
z_erofs_submit_queue(f, io, &force_fg, !!rapages);
/* handle bypass queue (no i/o pclusters) immediately */
err = z_erofs_decompress_queue(&io[JQ_BYPASS], &f->pagepool);
@ -1834,7 +1780,7 @@ static int z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
* Since partial uptodate is still unimplemented for now, we have to use
* approximate readmore strategies as a start.
*/
static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
static void z_erofs_pcluster_readmore(struct z_erofs_frontend *f,
struct readahead_control *rac, bool backmost)
{
struct inode *inode = f->inode;
@ -1889,12 +1835,10 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
static int z_erofs_read_folio(struct file *file, struct folio *folio)
{
struct inode *const inode = folio->mapping->host;
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
Z_EROFS_DEFINE_FRONTEND(f, inode, folio_pos(folio));
int err;
trace_erofs_read_folio(folio, false);
f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;
z_erofs_pcluster_readmore(&f, NULL, true);
err = z_erofs_scan_folio(&f, folio, false);
z_erofs_pcluster_readmore(&f, NULL, false);
@ -1914,17 +1858,14 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
static void z_erofs_readahead(struct readahead_control *rac)
{
struct inode *const inode = rac->mapping->host;
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
Z_EROFS_DEFINE_FRONTEND(f, inode, readahead_pos(rac));
struct folio *head = NULL, *folio;
unsigned int nr_folios;
unsigned int nrpages = readahead_count(rac);
int err;
f.headoffset = readahead_pos(rac);
z_erofs_pcluster_readmore(&f, rac, true);
nr_folios = readahead_count(rac);
trace_erofs_readpages(inode, readahead_index(rac), nr_folios, false);
nrpages = readahead_count(rac);
trace_erofs_readpages(inode, readahead_index(rac), nrpages, false);
while ((folio = readahead_folio(rac))) {
folio->private = head;
head = folio;
@ -1943,7 +1884,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
z_erofs_pcluster_readmore(&f, rac, false);
z_erofs_pcluster_end(&f);
(void)z_erofs_runqueue(&f, nr_folios);
(void)z_erofs_runqueue(&f, nrpages);
erofs_put_metabuf(&f.map.buf);
erofs_release_pages(&f.pagepool);
}

View File

@ -97,17 +97,48 @@ static int get_compacted_la_distance(unsigned int lobits,
return d1;
}
static int unpack_compacted_index(struct z_erofs_maprecorder *m,
unsigned int amortizedshift,
erofs_off_t pos, bool lookahead)
static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
unsigned long lcn, bool lookahead)
{
struct erofs_inode *const vi = EROFS_I(m->inode);
struct inode *const inode = m->inode;
struct erofs_inode *const vi = EROFS_I(inode);
const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
const unsigned int lclusterbits = vi->z_logical_clusterbits;
const unsigned int totalidx = erofs_iblks(inode);
unsigned int compacted_4b_initial, compacted_2b, amortizedshift;
unsigned int vcnt, lo, lobits, encodebits, nblk, bytes;
bool big_pcluster;
bool big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
erofs_off_t pos;
u8 *in, type;
int i;
if (lcn >= totalidx || lclusterbits > 14)
return -EINVAL;
m->lcn = lcn;
/* used to align to 32-byte (compacted_2b) alignment */
compacted_4b_initial = ((32 - ebase % 32) / 4) & 7;
compacted_2b = 0;
if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
compacted_4b_initial < totalidx)
compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
pos = ebase;
amortizedshift = 2; /* compact_4b */
if (lcn >= compacted_4b_initial) {
pos += compacted_4b_initial * 4;
lcn -= compacted_4b_initial;
if (lcn < compacted_2b) {
amortizedshift = 1;
} else {
pos += compacted_2b * 2;
lcn -= compacted_2b;
}
}
pos += lcn * (1 << amortizedshift);
/* figure out the lcluster count in this pack */
if (1 << amortizedshift == 4 && lclusterbits <= 14)
vcnt = 2;
else if (1 << amortizedshift == 2 && lclusterbits <= 12)
@ -122,7 +153,6 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
/* it doesn't equal to round_up(..) */
m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
(vcnt << amortizedshift);
big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U);
encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
bytes = pos & ((vcnt << amortizedshift) - 1);
@ -207,53 +237,6 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
return 0;
}
static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
unsigned long lcn, bool lookahead)
{
struct inode *const inode = m->inode;
struct erofs_inode *const vi = EROFS_I(inode);
const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
unsigned int totalidx = erofs_iblks(inode);
unsigned int compacted_4b_initial, compacted_2b;
unsigned int amortizedshift;
erofs_off_t pos;
if (lcn >= totalidx || vi->z_logical_clusterbits > 14)
return -EINVAL;
m->lcn = lcn;
/* used to align to 32-byte (compacted_2b) alignment */
compacted_4b_initial = (32 - ebase % 32) / 4;
if (compacted_4b_initial == 32 / 4)
compacted_4b_initial = 0;
if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
compacted_4b_initial < totalidx)
compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
else
compacted_2b = 0;
pos = ebase;
if (lcn < compacted_4b_initial) {
amortizedshift = 2;
goto out;
}
pos += compacted_4b_initial * 4;
lcn -= compacted_4b_initial;
if (lcn < compacted_2b) {
amortizedshift = 1;
goto out;
}
pos += compacted_2b * 2;
lcn -= compacted_2b;
amortizedshift = 2;
out:
pos += lcn * (1 << amortizedshift);
return unpack_compacted_index(m, amortizedshift, pos, lookahead);
}
static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m,
unsigned int lcn, bool lookahead)
{
@ -311,27 +294,23 @@ err_bogus:
static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
unsigned int initial_lcn)
{
struct super_block *sb = m->inode->i_sb;
struct erofs_inode *const vi = EROFS_I(m->inode);
struct erofs_map_blocks *const map = m->map;
const unsigned int lclusterbits = vi->z_logical_clusterbits;
unsigned long lcn;
struct inode *inode = m->inode;
struct super_block *sb = inode->i_sb;
struct erofs_inode *vi = EROFS_I(inode);
bool bigpcl1 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
bool bigpcl2 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2;
unsigned long lcn = m->lcn + 1;
int err;
DBG_BUGON(m->type != Z_EROFS_LCLUSTER_TYPE_PLAIN &&
m->type != Z_EROFS_LCLUSTER_TYPE_HEAD1 &&
m->type != Z_EROFS_LCLUSTER_TYPE_HEAD2);
DBG_BUGON(m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
DBG_BUGON(m->type != m->headtype);
if (m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1) &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
map->m_plen = 1ULL << lclusterbits;
return 0;
}
lcn = m->lcn + 1;
if ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1 && !bigpcl1) ||
((m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !bigpcl2) ||
(lcn << vi->z_logical_clusterbits) >= inode->i_size)
m->compressedblks = 1;
if (m->compressedblks)
goto out;
@ -356,9 +335,9 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
case Z_EROFS_LCLUSTER_TYPE_HEAD2:
/*
* if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
* rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
* rather than CBLKCNT, it's a 1 block-sized pcluster.
*/
m->compressedblks = 1 << (lclusterbits - sb->s_blocksize_bits);
m->compressedblks = 1;
break;
case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
if (m->delta[0] != 1)
@ -373,7 +352,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
return -EFSCORRUPTED;
}
out:
map->m_plen = erofs_pos(sb, m->compressedblks);
m->map->m_plen = erofs_pos(sb, m->compressedblks);
return 0;
err_bonus_cblkcnt:
erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);

View File

@ -243,7 +243,7 @@ void erofs_shrinker_unregister(struct super_block *sb)
static unsigned long erofs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc)
{
return atomic_long_read(&erofs_global_shrink_cnt);
return atomic_long_read(&erofs_global_shrink_cnt) ?: SHRINK_EMPTY;
}
static unsigned long erofs_shrink_scan(struct shrinker *shrink,