bcachefs fixes for 6.16-rc5

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmhnIIEACgkQE6szbY3K
 bnbi5g/9G3EMNL9LtU6tNPoLQdqINuKYtPtR/Wv3ZkPbLDlTdEilJSRelRpzYjaR
 aCxL3VgtIONX2uxOLOl3ODA9T4vnDBYCYdyPPeGzdMXA0YVRKZCmmg5REmFjhToK
 c0s5y6lTA6hXdWX0+DVvUODdnFgtVMeXgErzDqTxMZv3h/f1E5feuMaOZilJtlBl
 JElM5NsKsZSZCyDnq8pIowpvPA7WhH4HQeFLaK7HznFl7BFEUUt6ohhsieAiClMY
 1gfUcV/FwXRL6a7KbqKrdE8dtO6nB3mezx/TTHH5tbzvuoqbq375NNwvq0L4Vr2G
 DaSEU73he5Q0xvVFMj2DCyqUKe6cwccIgs+CFpM9FBrl4SUdVyq4/dN9GYrdyI5L
 ufK7Jd+f8Ekjl8WcAcS3LPp9pI8KwmT6fTsoZqZVvi+bFPeIVBb/YVP9Rm12iS2m
 ia+jj3xsPfYwMzsI0Rj/gxb+KnggnKOnDMKhgw4Yz5H0M9i8Rls6VAc62ZZx9xmz
 oyXdGuJN8wk8uXyr4yjux7i0hacFNSkBHcfnkVNu90rlJ8qh07O2EoLncQdv7vG6
 YpjwGR9XD0YIF8RfKlufkHhzVC6R7DUx0W7UCrEYhduRu+hEGhLrzL9vxlNVceWK
 5SRDB7KwFotoECStDlWAQY3g8nTWRH1d2t8qBJDOSGG7SauCKro=
 =b+3P
 -----END PGP SIGNATURE-----

Merge tag 'bcachefs-2025-07-03' of git://evilpiepirate.org/bcachefs

Pull bcachefs fixes from Kent Overstreet:
 "The 'opts.casefold_disabled' patch is non critical, but would be a
  6.15 backport; it's to address the casefolding + overlayfs
  incompatibility that was discovvered late.

  It's late because I was hoping that this would be addressed on the
  overlayfs side (and will be in 6.17), but user reports keep coming in
  on this one (lots of people are using docker these days)"

* tag 'bcachefs-2025-07-03' of git://evilpiepirate.org/bcachefs:
  bcachefs: opts.casefold_disabled
  bcachefs: Work around deadlock to btree node rewrites in journal replay
  bcachefs: Fix incorrect transaction restart handling
  bcachefs: fix btree_trans_peek_prev_journal()
  bcachefs: mark invalid_btree_id autofix
This commit is contained in:
Linus Torvalds 2025-07-04 09:29:22 -07:00
commit 482deed9df
13 changed files with 92 additions and 53 deletions

View File

@ -863,9 +863,7 @@ struct bch_fs {
DARRAY(enum bcachefs_metadata_version)
incompat_versions_requested;
#ifdef CONFIG_UNICODE
struct unicode_map *cf_encoding;
#endif
struct bch_sb_handle disk_sb;
@ -1285,4 +1283,13 @@ static inline bool bch2_discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca
: ca->mi.discard;
}
static inline bool bch2_fs_casefold_enabled(struct bch_fs *c)
{
#ifdef CONFIG_UNICODE
return !c->opts.casefold_disabled;
#else
return false;
#endif
}
#endif /* _BCACHEFS_H */

View File

@ -1337,15 +1337,42 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_node_reset_sib_u64s(b);
scoped_guard(rcu)
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);
/*
* XXX:
*
* We deadlock if too many btree updates require node rewrites while
* we're still in journal replay.
*
* This is because btree node rewrites generate more updates for the
* interior updates (alloc, backpointers), and if those updates touch
* new nodes and generate more rewrites - well, you see the problem.
*
* The biggest cause is that we don't use the btree write buffer (for
* the backpointer updates - this needs some real thought on locking in
* order to fix.
*
* The problem with this workaround (not doing the rewrite for degraded
* nodes in journal replay) is that those degraded nodes persist, and we
* don't want that (this is a real bug when a btree node write completes
* with fewer replicas than we wanted and leaves a degraded node due to
* device _removal_, i.e. the device went away mid write).
*
* It's less of a bug here, but still a problem because we don't yet
* have a way of tracking degraded data - we another index (all
* extents/btree nodes, by replicas entry) in order to fix properly
* (re-replicate degraded data at the earliest possible time).
*/
if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay)) {
scoped_guard(rcu)
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);
if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) {
set_btree_node_need_rewrite(b);
set_btree_node_need_rewrite_degraded(b);
if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) {
set_btree_node_need_rewrite(b);
set_btree_node_need_rewrite_degraded(b);
}
}
}
}
if (!ptr_written) {
set_btree_node_need_rewrite(b);

View File

@ -2189,7 +2189,7 @@ void btree_trans_peek_prev_journal(struct btree_trans *trans,
struct btree_path *path = btree_iter_path(trans, iter);
struct bkey_i *next_journal =
bch2_btree_journal_peek_prev(trans, iter, search_key,
k->k ? k->k->p : path_l(path)->b->key.k.p);
k->k ? k->k->p : path_l(path)->b->data->min_key);
if (next_journal) {
iter->k = next_journal->k;

View File

@ -18,7 +18,9 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
{
*out_cf = (struct qstr) QSTR_INIT(NULL, 0);
#ifdef CONFIG_UNICODE
if (!bch2_fs_casefold_enabled(trans->c))
return -EOPNOTSUPP;
unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1);
int ret = PTR_ERR_OR_ZERO(buf);
if (ret)
@ -30,9 +32,6 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
*out_cf = (struct qstr) QSTR_INIT(buf, ret);
return 0;
#else
return -EOPNOTSUPP;
#endif
}
static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
@ -231,7 +230,8 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
prt_printf(out, " type %s", bch2_d_type_str(d.v->d_type));
}
int bch2_dirent_init_name(struct bkey_i_dirent *dirent,
int bch2_dirent_init_name(struct bch_fs *c,
struct bkey_i_dirent *dirent,
const struct bch_hash_info *hash_info,
const struct qstr *name,
const struct qstr *cf_name)
@ -251,7 +251,9 @@ int bch2_dirent_init_name(struct bkey_i_dirent *dirent,
offsetof(struct bch_dirent, d_name) -
name->len);
} else {
#ifdef CONFIG_UNICODE
if (!bch2_fs_casefold_enabled(c))
return -EOPNOTSUPP;
memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
char *cf_out = &dirent->v.d_cf_name_block.d_names[name->len];
@ -277,9 +279,6 @@ int bch2_dirent_init_name(struct bkey_i_dirent *dirent,
dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_len);
EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_len);
#else
return -EOPNOTSUPP;
#endif
}
unsigned u64s = dirent_val_u64s(name->len, cf_len);
@ -313,7 +312,7 @@ struct bkey_i_dirent *bch2_dirent_create_key(struct btree_trans *trans,
dirent->v.d_type = type;
dirent->v.d_unused = 0;
int ret = bch2_dirent_init_name(dirent, hash_info, name, cf_name);
int ret = bch2_dirent_init_name(trans->c, dirent, hash_info, name, cf_name);
if (ret)
return ERR_PTR(ret);

View File

@ -59,7 +59,8 @@ static inline void dirent_copy_target(struct bkey_i_dirent *dst,
dst->v.d_type = src.v->d_type;
}
int bch2_dirent_init_name(struct bkey_i_dirent *,
int bch2_dirent_init_name(struct bch_fs *,
struct bkey_i_dirent *,
const struct bch_hash_info *,
const struct qstr *,
const struct qstr *);

View File

@ -722,7 +722,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
if (IS_ERR(inode))
inode = NULL;
#ifdef CONFIG_UNICODE
if (!inode && IS_CASEFOLDED(vdir)) {
/*
* Do not cache a negative dentry in casefolded directories
@ -737,7 +736,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
*/
return NULL;
}
#endif
return d_splice_alias(&inode->v, dentry);
}
@ -2566,9 +2564,10 @@ got_sb:
sb->s_shrink->seeks = 0;
#ifdef CONFIG_UNICODE
sb->s_encoding = c->cf_encoding;
#endif
if (bch2_fs_casefold_enabled(c))
sb->s_encoding = c->cf_encoding;
generic_set_sb_d_ops(sb);
#endif
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
ret = PTR_ERR_OR_ZERO(vinode);

View File

@ -2302,9 +2302,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode);
dir->first_this_inode = false;
#ifdef CONFIG_UNICODE
hash_info->cf_encoding = bch2_inode_casefold(c, &i->inode) ? c->cf_encoding : NULL;
#endif
ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info,
iter, k, need_second_pass);
@ -2819,7 +2817,7 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
ret = remove_backpointer(trans, &inode);
bch_err_msg(c, ret, "removing dirent");
if (ret)
break;
goto out;
ret = reattach_inode(trans, &inode);
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);

View File

@ -1265,7 +1265,14 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum,
{
struct bch_fs *c = trans->c;
#ifdef CONFIG_UNICODE
#ifndef CONFIG_UNICODE
bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE");
return -EOPNOTSUPP;
#endif
if (c->opts.casefold_disabled)
return -EOPNOTSUPP;
int ret = 0;
/* Not supported on individual files. */
if (!S_ISDIR(bi->bi_mode))
@ -1289,10 +1296,6 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum,
bi->bi_fields_set |= BIT(Inode_opt_casefold);
return bch2_maybe_propagate_has_case_insensitive(trans, inum, bi);
#else
bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE");
return -EOPNOTSUPP;
#endif
}
static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)

View File

@ -234,6 +234,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH_SB_CASEFOLD, false, \
NULL, "Dirent lookups are casefolded") \
x(casefold_disabled, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, "Disable casefolding filesystem wide") \
x(inodes_32bit, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \

View File

@ -314,7 +314,7 @@ enum bch_fsck_flags {
x(accounting_mismatch, 272, FSCK_AUTOFIX) \
x(accounting_replicas_not_marked, 273, 0) \
x(accounting_to_invalid_device, 289, 0) \
x(invalid_btree_id, 274, 0) \
x(invalid_btree_id, 274, FSCK_AUTOFIX) \
x(alloc_key_io_time_bad, 275, 0) \
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
x(accounting_key_junk_at_end, 277, FSCK_AUTOFIX) \

View File

@ -38,6 +38,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans,
struct bkey_s_c_dirent old,
bool *updated_before_k_pos)
{
struct bch_fs *c = trans->c;
struct qstr old_name = bch2_dirent_get_name(old);
struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, BKEY_U64s_MAX * sizeof(u64));
int ret = PTR_ERR_OR_ZERO(new);
@ -60,7 +61,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans,
sprintf(renamed_buf, "%.*s.fsck_renamed-%u",
old_name.len, old_name.name, i));
ret = bch2_dirent_init_name(new, hash_info, &renamed_name, NULL);
ret = bch2_dirent_init_name(c, new, hash_info, &renamed_name, NULL);
if (ret)
return ret;
@ -79,7 +80,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans,
}
ret = ret ?: bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
bch_err_fn(trans->c, ret);
bch_err_fn(c, ret);
return ret;
}

View File

@ -48,9 +48,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
struct bch_hash_info info = {
.inum_snapshot = bi->bi_snapshot,
.type = INODE_STR_HASH(bi),
#ifdef CONFIG_UNICODE
.cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL,
#endif
.siphash_key = { .k0 = bi->bi_hash_seed }
};

View File

@ -1025,15 +1025,17 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
}
#ifdef CONFIG_UNICODE
/* Default encoding until we can potentially have more as an option. */
c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
if (IS_ERR(c->cf_encoding)) {
printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
ret = -EINVAL;
goto err;
if (bch2_fs_casefold_enabled(c)) {
/* Default encoding until we can potentially have more as an option. */
c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
if (IS_ERR(c->cf_encoding)) {
printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
ret = -EINVAL;
goto err;
}
}
#else
if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) {
@ -1160,12 +1162,11 @@ int bch2_fs_start(struct bch_fs *c)
print_mount_opts(c);
#ifdef CONFIG_UNICODE
bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
#endif
if (c->cf_encoding)
bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
if (!bch2_fs_may_start(c))
return bch_err_throw(c, insufficient_devices_to_start);