ext4: fix zombie groups in average fragment size lists

commit 1c320d8e92 upstream.

Groups with no free blocks shouldn't be in any average fragment size list.
However, when all blocks in a group are allocated(i.e., bb_fragments or
bb_free is 0), we currently skip updating the average fragment size, which
means the group isn't removed from its previous s_mb_avg_fragment_size[old]
list.

This created "zombie" groups that were always skipped during traversal as
they couldn't satisfy any block allocation requests, negatively impacting
traversal efficiency.

Therefore, when a group becomes completely full, bb_avg_fragment_size_order
is now set to -1. If the old order was not -1, a removal operation is
performed; if the new order is not -1, an insertion is performed.

Fixes: 196e402adf ("ext4: improve cr 0 / cr 1 group scanning")
CC: stable@vger.kernel.org
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Link: https://patch.msgid.link/20250714130327.1830534-11-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Baokun Li 2025-07-14 21:03:20 +08:00 committed by Greg Kroah-Hartman
parent 2a6901d698
commit 2199da0bb3

View File

@ -829,30 +829,30 @@ static void
mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
int new_order;
int new, old;
if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0)
if (!test_opt2(sb, MB_OPTIMIZE_SCAN))
return;
new_order = mb_avg_fragment_size_order(sb,
grp->bb_free / grp->bb_fragments);
if (new_order == grp->bb_avg_fragment_size_order)
old = grp->bb_avg_fragment_size_order;
new = grp->bb_fragments == 0 ? -1 :
mb_avg_fragment_size_order(sb, grp->bb_free / grp->bb_fragments);
if (new == old)
return;
if (grp->bb_avg_fragment_size_order != -1) {
write_lock(&sbi->s_mb_avg_fragment_size_locks[
grp->bb_avg_fragment_size_order]);
if (old >= 0) {
write_lock(&sbi->s_mb_avg_fragment_size_locks[old]);
list_del(&grp->bb_avg_fragment_size_node);
write_unlock(&sbi->s_mb_avg_fragment_size_locks[
grp->bb_avg_fragment_size_order]);
write_unlock(&sbi->s_mb_avg_fragment_size_locks[old]);
}
grp->bb_avg_fragment_size_order = new;
if (new >= 0) {
write_lock(&sbi->s_mb_avg_fragment_size_locks[new]);
list_add_tail(&grp->bb_avg_fragment_size_node,
&sbi->s_mb_avg_fragment_size[new]);
write_unlock(&sbi->s_mb_avg_fragment_size_locks[new]);
}
grp->bb_avg_fragment_size_order = new_order;
write_lock(&sbi->s_mb_avg_fragment_size_locks[
grp->bb_avg_fragment_size_order]);
list_add_tail(&grp->bb_avg_fragment_size_node,
&sbi->s_mb_avg_fragment_size[grp->bb_avg_fragment_size_order]);
write_unlock(&sbi->s_mb_avg_fragment_size_locks[
grp->bb_avg_fragment_size_order]);
}
/*