mirror of
https://github.com/nxp-imx/linux-imx.git
synced 2025-07-18 07:09:36 +02:00
Btrfs: deal with convert_extent_bit errors to avoid fs corruption
When committing a transaction or a log, we look for btree extents that need to be durably persisted by searching for ranges in a io tree that have some bits set (EXTENT_DIRTY or EXTENT_NEW). We then attempt to clear those bits and set the EXTENT_NEED_WAIT bit, with calls to the function convert_extent_bit, and then start writeback for the extents. That function however can return an error (at the moment only -ENOMEM is possible, specially when it does GFP_ATOMIC allocation requests through alloc_extent_state_atomic) - that means the ranges didn't got the EXTENT_NEED_WAIT bit set (or at least not for the whole range), which in turn means a call to btrfs_wait_marked_extents() won't find those ranges for which we started writeback, causing a transaction commit or a log commit to persist a new superblock without waiting for the writeback of extents in that range to finish first. Therefore if a crash happens after persisting the new superblock and before writeback finishes, we have a superblock pointing to roots that weren't fully persisted or roots that point to nodes or leafs that weren't fully persisted, causing all sorts of unexpected/bad behaviour as we endup reading garbage from disk or the content of some node/leaf from a past generation that got cowed or deleted and is no longer valid (for this later case we end up getting error messages like "parent transid verify failed on X wanted Y found Z" when reading btree nodes/leafs from disk). Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
parent
2fc9f6baa2
commit
663dfbb077
|
@ -76,6 +76,32 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void clear_btree_io_tree(struct extent_io_tree *tree)
|
||||||
|
{
|
||||||
|
spin_lock(&tree->lock);
|
||||||
|
while (!RB_EMPTY_ROOT(&tree->state)) {
|
||||||
|
struct rb_node *node;
|
||||||
|
struct extent_state *state;
|
||||||
|
|
||||||
|
node = rb_first(&tree->state);
|
||||||
|
state = rb_entry(node, struct extent_state, rb_node);
|
||||||
|
rb_erase(&state->rb_node, &tree->state);
|
||||||
|
RB_CLEAR_NODE(&state->rb_node);
|
||||||
|
/*
|
||||||
|
* btree io trees aren't supposed to have tasks waiting for
|
||||||
|
* changes in the flags of extent states ever.
|
||||||
|
*/
|
||||||
|
ASSERT(!waitqueue_active(&state->wq));
|
||||||
|
free_extent_state(state);
|
||||||
|
if (need_resched()) {
|
||||||
|
spin_unlock(&tree->lock);
|
||||||
|
cond_resched();
|
||||||
|
spin_lock(&tree->lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock(&tree->lock);
|
||||||
|
}
|
||||||
|
|
||||||
static noinline void switch_commit_roots(struct btrfs_transaction *trans,
|
static noinline void switch_commit_roots(struct btrfs_transaction *trans,
|
||||||
struct btrfs_fs_info *fs_info)
|
struct btrfs_fs_info *fs_info)
|
||||||
{
|
{
|
||||||
|
@ -89,6 +115,7 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans,
|
||||||
root->commit_root = btrfs_root_node(root);
|
root->commit_root = btrfs_root_node(root);
|
||||||
if (is_fstree(root->objectid))
|
if (is_fstree(root->objectid))
|
||||||
btrfs_unpin_free_ino(root);
|
btrfs_unpin_free_ino(root);
|
||||||
|
clear_btree_io_tree(&root->dirty_log_pages);
|
||||||
}
|
}
|
||||||
up_write(&fs_info->commit_root_sem);
|
up_write(&fs_info->commit_root_sem);
|
||||||
}
|
}
|
||||||
|
@ -828,17 +855,38 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
|
||||||
|
|
||||||
while (!find_first_extent_bit(dirty_pages, start, &start, &end,
|
while (!find_first_extent_bit(dirty_pages, start, &start, &end,
|
||||||
mark, &cached_state)) {
|
mark, &cached_state)) {
|
||||||
convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT,
|
bool wait_writeback = false;
|
||||||
|
|
||||||
|
err = convert_extent_bit(dirty_pages, start, end,
|
||||||
|
EXTENT_NEED_WAIT,
|
||||||
mark, &cached_state, GFP_NOFS);
|
mark, &cached_state, GFP_NOFS);
|
||||||
cached_state = NULL;
|
/*
|
||||||
|
* convert_extent_bit can return -ENOMEM, which is most of the
|
||||||
|
* time a temporary error. So when it happens, ignore the error
|
||||||
|
* and wait for writeback of this range to finish - because we
|
||||||
|
* failed to set the bit EXTENT_NEED_WAIT for the range, a call
|
||||||
|
* to btrfs_wait_marked_extents() would not know that writeback
|
||||||
|
* for this range started and therefore wouldn't wait for it to
|
||||||
|
* finish - we don't want to commit a superblock that points to
|
||||||
|
* btree nodes/leafs for which writeback hasn't finished yet
|
||||||
|
* (and without errors).
|
||||||
|
* We cleanup any entries left in the io tree when committing
|
||||||
|
* the transaction (through clear_btree_io_tree()).
|
||||||
|
*/
|
||||||
|
if (err == -ENOMEM) {
|
||||||
|
err = 0;
|
||||||
|
wait_writeback = true;
|
||||||
|
}
|
||||||
|
if (!err)
|
||||||
err = filemap_fdatawrite_range(mapping, start, end);
|
err = filemap_fdatawrite_range(mapping, start, end);
|
||||||
if (err)
|
if (err)
|
||||||
werr = err;
|
werr = err;
|
||||||
|
else if (wait_writeback)
|
||||||
|
werr = filemap_fdatawait_range(mapping, start, end);
|
||||||
|
cached_state = NULL;
|
||||||
cond_resched();
|
cond_resched();
|
||||||
start = end + 1;
|
start = end + 1;
|
||||||
}
|
}
|
||||||
if (err)
|
|
||||||
werr = err;
|
|
||||||
return werr;
|
return werr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -862,8 +910,20 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
|
||||||
|
|
||||||
while (!find_first_extent_bit(dirty_pages, start, &start, &end,
|
while (!find_first_extent_bit(dirty_pages, start, &start, &end,
|
||||||
EXTENT_NEED_WAIT, &cached_state)) {
|
EXTENT_NEED_WAIT, &cached_state)) {
|
||||||
clear_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT,
|
/*
|
||||||
|
* Ignore -ENOMEM errors returned by clear_extent_bit().
|
||||||
|
* When committing the transaction, we'll remove any entries
|
||||||
|
* left in the io tree. For a log commit, we don't remove them
|
||||||
|
* after committing the log because the tree can be accessed
|
||||||
|
* concurrently - we do it only at transaction commit time when
|
||||||
|
* it's safe to do it (through clear_btree_io_tree()).
|
||||||
|
*/
|
||||||
|
err = clear_extent_bit(dirty_pages, start, end,
|
||||||
|
EXTENT_NEED_WAIT,
|
||||||
0, 0, &cached_state, GFP_NOFS);
|
0, 0, &cached_state, GFP_NOFS);
|
||||||
|
if (err == -ENOMEM)
|
||||||
|
err = 0;
|
||||||
|
if (!err)
|
||||||
err = filemap_fdatawait_range(mapping, start, end);
|
err = filemap_fdatawait_range(mapping, start, end);
|
||||||
if (err)
|
if (err)
|
||||||
werr = err;
|
werr = err;
|
||||||
|
@ -919,17 +979,17 @@ static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
|
static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root)
|
struct btrfs_root *root)
|
||||||
{
|
{
|
||||||
if (!trans || !trans->transaction) {
|
int ret;
|
||||||
struct inode *btree_inode;
|
|
||||||
btree_inode = root->fs_info->btree_inode;
|
ret = btrfs_write_and_wait_marked_extents(root,
|
||||||
return filemap_write_and_wait(btree_inode->i_mapping);
|
|
||||||
}
|
|
||||||
return btrfs_write_and_wait_marked_extents(root,
|
|
||||||
&trans->transaction->dirty_pages,
|
&trans->transaction->dirty_pages,
|
||||||
EXTENT_DIRTY);
|
EXTENT_DIRTY);
|
||||||
|
clear_btree_io_tree(&trans->transaction->dirty_pages);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -145,8 +145,6 @@ struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
|
||||||
struct btrfs_root *root);
|
struct btrfs_root *root);
|
||||||
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
|
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
|
||||||
int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
|
int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
|
||||||
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
|
|
||||||
struct btrfs_root *root);
|
|
||||||
|
|
||||||
void btrfs_add_dead_root(struct btrfs_root *root);
|
void btrfs_add_dead_root(struct btrfs_root *root);
|
||||||
int btrfs_defrag_root(struct btrfs_root *root);
|
int btrfs_defrag_root(struct btrfs_root *root);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user