mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-22 23:13:01 +02:00
for-6.14-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmeHvVQACgkQxWXV+ddt WDsJ6w//cPqI8tf3kMxurZcG7clJRIIISotPrC6hm3UDNpJLa7HDaVJ50FAoIhMV sB4RQNZky4mfB6ypXxmETzV3ZHvP0+oFgRs72Ommi0ZbdnBgxhaUTrDXLKl52o4r UoeqvRKReEYOesN09rPXYPwytUOkxHU/GjNzv7bC/Tzvq/xKaIN5qMYZwkHtJ8PK JtCFypfbmDPNDJz37l0BhRya2oMtpcUtxM9uP8RWVuQtaELgjcy56W/+osoyJTy9 FSKaoWUPsDVDufnILlGR8Kub2Z5mcISVqyARUdr/q3j5CDfyTdQvahmUy7sHgUAe HGh5QBdRJu1QTvdZw+nK4YCaYpK6Nj4liDtO1cwVitde5RXsJrt6kYBLlY/kU2Qr KODOloM/zVKxULR0ARl11NULZquUsczP6Wxfn+dtyDJ3JGlY9OcuESmorHoUtkMX 75Tj1AtRMNcfZAE2HquL1Oz3bIMcg4btDJsC+9Yp5K11SP12XpOwC42k/9Bx3iBe Iki0BSuppFqX5MMY3OEWzD1pz2vOGYR8ISD6EIsjpjl2vBeRwydaCCZfuszSC7gl Y4goSdwFMPVlqllL1h27XUjKVXvttCqqdB6P28MbvZKnFAPlm189BJQZC5cbHAJU ceBww5PvI9QxnJnFG5iOLcnko6liUWPP9l2c5LLtUsJIi8B5Hu0= =SXLv -----END PGP SIGNATURE----- Merge tag 'for-6.14-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "User visible changes, features: - rebuilding of the free space tree at mount time is done in more transactions, fix potential hangs when the transaction thread is blocked due to large amount of block groups - more read IO balancing strategies (experimental config), add two new ways how to select a device for read if the profiles allow that (all RAID1*), the current default selects the device by pid which is good on average but less performant for single reader workloads - select preferred device for all reads (namely for testing) - round-robin, balance reads across devices relevant for the requested IO range - add encoded write ioctl support to io_uring (read was added in 6.12), basis for writing send stream using that instead of syscalls, non-blocking mode is not yet implemented - support FS_IOC_READ_VERITY_METADATA, applications can use the metadata to do their own verification - pass inode's i_write_hint to bios, for parity with other filesystems, ioctls F_GET_RW_HINT/F_SET_RW_HINT Core: - in zoned mode: allow to directly reclaim a block group by simply resetting it, then it can be reused and another block group does not need to be allocated - super block validation now also does more comprehensive sys array validation, adding it to the points where superblock is validated (post-read, pre-write) - subpage mode fixes: - fix double accounting of blocks due to some races - improved or fixed error handling in a few cases (compression, delalloc) - raid stripe tree: - fix various cases with extent range splitting or deleting - implement hole punching to extent range - reduce number of stripe tree lookups during bio submission - more self-tests - updated self-tests (delayed refs) - error handling improvements - cleanups, refactoring - remove rest of backref caching infrastructure from relocation, not needed anymore - error message updates - remove unnecessary calls when extent buffer was marked dirty - unused parameter removal - code moved to new files Other code changes: add rb_find_add_cached() to the rb-tree API" * tag 'for-6.14-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (127 commits) btrfs: selftests: add a selftest for deleting two out of three extents btrfs: selftests: add test for punching a hole into 3 RAID stripe-extents btrfs: selftests: add selftest for punching holes into the RAID stripe extents btrfs: selftests: test RAID stripe-tree deletion spanning two items btrfs: selftests: don't split RAID extents in half btrfs: selftests: check for correct return value of failed lookup btrfs: don't use btrfs_set_item_key_safe on RAID stripe-extents btrfs: implement hole punching for RAID stripe extents btrfs: fix deletion of a range spanning parts two RAID stripe extents btrfs: fix tail delete of RAID stripe-extents btrfs: fix front delete range calculation for RAID stripe extents btrfs: assert RAID stripe-extent length is always greater than 0 btrfs: don't try to delete RAID stripe-extents if we don't need to btrfs: selftests: correct RAID stripe-tree feature flag setting btrfs: add io_uring interface for encoded writes btrfs: remove the unused locked_folio parameter from btrfs_cleanup_ordered_extents() btrfs: add extra error messages for delalloc range related errors btrfs: subpage: dump the involved bitmap when ASSERT() failed btrfs: subpage: fix the bitmap dump of the locked flags btrfs: do proper folio cleanup when run_delalloc_nocow() failed ...
This commit is contained in:
commit
0eb4aaa230
|
@ -44,4 +44,4 @@ btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
|
|||
tests/extent-buffer-tests.o tests/btrfs-tests.o \
|
||||
tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \
|
||||
tests/free-space-tree-tests.o tests/extent-map-tests.o \
|
||||
tests/raid-stripe-tree-tests.o
|
||||
tests/raid-stripe-tree-tests.o tests/delayed-refs-tests.o
|
||||
|
|
|
@ -18,7 +18,7 @@ enum {
|
|||
};
|
||||
|
||||
#define NO_THRESHOLD (-1)
|
||||
#define DFT_THRESHOLD (32)
|
||||
#define DEFAULT_THRESHOLD (32)
|
||||
|
||||
struct btrfs_workqueue {
|
||||
struct workqueue_struct *normal_wq;
|
||||
|
@ -94,9 +94,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
|
|||
|
||||
ret->limit_active = limit_active;
|
||||
if (thresh == 0)
|
||||
thresh = DFT_THRESHOLD;
|
||||
thresh = DEFAULT_THRESHOLD;
|
||||
/* For low threshold, disabling threshold is a better choice */
|
||||
if (thresh < DFT_THRESHOLD) {
|
||||
if (thresh < DEFAULT_THRESHOLD) {
|
||||
ret->current_active = limit_active;
|
||||
ret->thresh = NO_THRESHOLD;
|
||||
} else {
|
||||
|
|
|
@ -250,6 +250,21 @@ static int prelim_ref_compare(const struct prelim_ref *ref1,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int prelim_ref_rb_add_cmp(const struct rb_node *new,
|
||||
const struct rb_node *exist)
|
||||
{
|
||||
const struct prelim_ref *ref_new =
|
||||
rb_entry(new, struct prelim_ref, rbnode);
|
||||
const struct prelim_ref *ref_exist =
|
||||
rb_entry(exist, struct prelim_ref, rbnode);
|
||||
|
||||
/*
|
||||
* prelim_ref_compare() expects the first parameter as the existing one,
|
||||
* different from the rb_find_add_cached() order.
|
||||
*/
|
||||
return prelim_ref_compare(ref_exist, ref_new);
|
||||
}
|
||||
|
||||
static void update_share_count(struct share_check *sc, int oldcount,
|
||||
int newcount, const struct prelim_ref *newref)
|
||||
{
|
||||
|
@ -278,55 +293,39 @@ static void prelim_ref_insert(const struct btrfs_fs_info *fs_info,
|
|||
struct share_check *sc)
|
||||
{
|
||||
struct rb_root_cached *root;
|
||||
struct rb_node **p;
|
||||
struct rb_node *parent = NULL;
|
||||
struct prelim_ref *ref;
|
||||
int result;
|
||||
bool leftmost = true;
|
||||
struct rb_node *exist;
|
||||
|
||||
root = &preftree->root;
|
||||
p = &root->rb_root.rb_node;
|
||||
exist = rb_find_add_cached(&newref->rbnode, root, prelim_ref_rb_add_cmp);
|
||||
if (exist) {
|
||||
struct prelim_ref *ref = rb_entry(exist, struct prelim_ref, rbnode);
|
||||
/* Identical refs, merge them and free @newref */
|
||||
struct extent_inode_elem *eie = ref->inode_list;
|
||||
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
ref = rb_entry(parent, struct prelim_ref, rbnode);
|
||||
result = prelim_ref_compare(ref, newref);
|
||||
if (result < 0) {
|
||||
p = &(*p)->rb_left;
|
||||
} else if (result > 0) {
|
||||
p = &(*p)->rb_right;
|
||||
leftmost = false;
|
||||
} else {
|
||||
/* Identical refs, merge them and free @newref */
|
||||
struct extent_inode_elem *eie = ref->inode_list;
|
||||
while (eie && eie->next)
|
||||
eie = eie->next;
|
||||
|
||||
while (eie && eie->next)
|
||||
eie = eie->next;
|
||||
|
||||
if (!eie)
|
||||
ref->inode_list = newref->inode_list;
|
||||
else
|
||||
eie->next = newref->inode_list;
|
||||
trace_btrfs_prelim_ref_merge(fs_info, ref, newref,
|
||||
preftree->count);
|
||||
/*
|
||||
* A delayed ref can have newref->count < 0.
|
||||
* The ref->count is updated to follow any
|
||||
* BTRFS_[ADD|DROP]_DELAYED_REF actions.
|
||||
*/
|
||||
update_share_count(sc, ref->count,
|
||||
ref->count + newref->count, newref);
|
||||
ref->count += newref->count;
|
||||
free_pref(newref);
|
||||
return;
|
||||
}
|
||||
if (!eie)
|
||||
ref->inode_list = newref->inode_list;
|
||||
else
|
||||
eie->next = newref->inode_list;
|
||||
trace_btrfs_prelim_ref_merge(fs_info, ref, newref,
|
||||
preftree->count);
|
||||
/*
|
||||
* A delayed ref can have newref->count < 0.
|
||||
* The ref->count is updated to follow any
|
||||
* BTRFS_[ADD|DROP]_DELAYED_REF actions.
|
||||
*/
|
||||
update_share_count(sc, ref->count,
|
||||
ref->count + newref->count, newref);
|
||||
ref->count += newref->count;
|
||||
free_pref(newref);
|
||||
return;
|
||||
}
|
||||
|
||||
update_share_count(sc, 0, newref->count, newref);
|
||||
preftree->count++;
|
||||
trace_btrfs_prelim_ref_insert(fs_info, newref, NULL, preftree->count);
|
||||
rb_link_node(&newref->rbnode, parent, p);
|
||||
rb_insert_color_cached(&newref->rbnode, root, leftmost);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3022,9 +3021,6 @@ void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
|
|||
cache->rb_root = RB_ROOT;
|
||||
for (i = 0; i < BTRFS_MAX_LEVEL; i++)
|
||||
INIT_LIST_HEAD(&cache->pending[i]);
|
||||
INIT_LIST_HEAD(&cache->changed);
|
||||
INIT_LIST_HEAD(&cache->detached);
|
||||
INIT_LIST_HEAD(&cache->leaves);
|
||||
INIT_LIST_HEAD(&cache->pending_edge);
|
||||
INIT_LIST_HEAD(&cache->useless_node);
|
||||
cache->fs_info = fs_info;
|
||||
|
@ -3132,29 +3128,17 @@ void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
|
|||
void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
|
||||
struct btrfs_backref_node *node)
|
||||
{
|
||||
struct btrfs_backref_node *upper;
|
||||
struct btrfs_backref_edge *edge;
|
||||
|
||||
if (!node)
|
||||
return;
|
||||
|
||||
BUG_ON(!node->lowest && !node->detached);
|
||||
while (!list_empty(&node->upper)) {
|
||||
edge = list_entry(node->upper.next, struct btrfs_backref_edge,
|
||||
list[LOWER]);
|
||||
upper = edge->node[UPPER];
|
||||
list_del(&edge->list[LOWER]);
|
||||
list_del(&edge->list[UPPER]);
|
||||
btrfs_backref_free_edge(cache, edge);
|
||||
|
||||
/*
|
||||
* Add the node to leaf node list if no other child block
|
||||
* cached.
|
||||
*/
|
||||
if (list_empty(&upper->lower)) {
|
||||
list_add_tail(&upper->lower, &cache->leaves);
|
||||
upper->lowest = 1;
|
||||
}
|
||||
}
|
||||
|
||||
btrfs_backref_drop_node(cache, node);
|
||||
|
@ -3166,33 +3150,13 @@ void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
|
|||
void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
|
||||
{
|
||||
struct btrfs_backref_node *node;
|
||||
int i;
|
||||
|
||||
while (!list_empty(&cache->detached)) {
|
||||
node = list_entry(cache->detached.next,
|
||||
struct btrfs_backref_node, list);
|
||||
while ((node = rb_entry_safe(rb_first(&cache->rb_root),
|
||||
struct btrfs_backref_node, rb_node)))
|
||||
btrfs_backref_cleanup_node(cache, node);
|
||||
}
|
||||
|
||||
while (!list_empty(&cache->leaves)) {
|
||||
node = list_entry(cache->leaves.next,
|
||||
struct btrfs_backref_node, lower);
|
||||
btrfs_backref_cleanup_node(cache, node);
|
||||
}
|
||||
|
||||
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
|
||||
while (!list_empty(&cache->pending[i])) {
|
||||
node = list_first_entry(&cache->pending[i],
|
||||
struct btrfs_backref_node,
|
||||
list);
|
||||
btrfs_backref_cleanup_node(cache, node);
|
||||
}
|
||||
}
|
||||
ASSERT(list_empty(&cache->pending_edge));
|
||||
ASSERT(list_empty(&cache->useless_node));
|
||||
ASSERT(list_empty(&cache->changed));
|
||||
ASSERT(list_empty(&cache->detached));
|
||||
ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
|
||||
ASSERT(!cache->nr_nodes);
|
||||
ASSERT(!cache->nr_edges);
|
||||
}
|
||||
|
@ -3316,8 +3280,12 @@ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans,
|
|||
root = btrfs_get_fs_root(fs_info, ref_key->offset, false);
|
||||
if (IS_ERR(root))
|
||||
return PTR_ERR(root);
|
||||
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
|
||||
cur->cowonly = 1;
|
||||
|
||||
/* We shouldn't be using backref cache for non-shareable roots. */
|
||||
if (unlikely(!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))) {
|
||||
btrfs_put_root(root);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
if (btrfs_root_level(&root->root_item) == cur->level) {
|
||||
/* Tree root */
|
||||
|
@ -3403,8 +3371,15 @@ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans,
|
|||
goto out;
|
||||
}
|
||||
upper->owner = btrfs_header_owner(eb);
|
||||
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
|
||||
upper->cowonly = 1;
|
||||
|
||||
/* We shouldn't be using backref cache for non shareable roots. */
|
||||
if (unlikely(!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))) {
|
||||
btrfs_put_root(root);
|
||||
btrfs_backref_free_edge(cache, edge);
|
||||
btrfs_backref_free_node(cache, upper);
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we know the block isn't shared we can avoid
|
||||
|
@ -3595,15 +3570,9 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
|
|||
|
||||
ASSERT(start->checked);
|
||||
|
||||
/* Insert this node to cache if it's not COW-only */
|
||||
if (!start->cowonly) {
|
||||
rb_node = rb_simple_insert(&cache->rb_root, start->bytenr,
|
||||
&start->rb_node);
|
||||
if (rb_node)
|
||||
btrfs_backref_panic(cache->fs_info, start->bytenr,
|
||||
-EEXIST);
|
||||
list_add_tail(&start->lower, &cache->leaves);
|
||||
}
|
||||
rb_node = rb_simple_insert(&cache->rb_root, start->bytenr, &start->rb_node);
|
||||
if (rb_node)
|
||||
btrfs_backref_panic(cache->fs_info, start->bytenr, -EEXIST);
|
||||
|
||||
/*
|
||||
* Use breadth first search to iterate all related edges.
|
||||
|
@ -3642,11 +3611,6 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
|
|||
* parents have already been linked.
|
||||
*/
|
||||
if (!RB_EMPTY_NODE(&upper->rb_node)) {
|
||||
if (upper->lowest) {
|
||||
list_del_init(&upper->lower);
|
||||
upper->lowest = 0;
|
||||
}
|
||||
|
||||
list_add_tail(&edge->list[UPPER], &upper->lower);
|
||||
continue;
|
||||
}
|
||||
|
@ -3657,23 +3621,13 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
|
|||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/* Sanity check, COW-only node has non-COW-only parent */
|
||||
if (start->cowonly != upper->cowonly) {
|
||||
ASSERT(0);
|
||||
rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
|
||||
&upper->rb_node);
|
||||
if (unlikely(rb_node)) {
|
||||
btrfs_backref_panic(cache->fs_info, upper->bytenr, -EEXIST);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/* Only cache non-COW-only (subvolume trees) tree blocks */
|
||||
if (!upper->cowonly) {
|
||||
rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
|
||||
&upper->rb_node);
|
||||
if (rb_node) {
|
||||
btrfs_backref_panic(cache->fs_info,
|
||||
upper->bytenr, -EEXIST);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
}
|
||||
|
||||
list_add_tail(&edge->list[UPPER], &upper->lower);
|
||||
|
||||
/*
|
||||
|
|
|
@ -318,6 +318,12 @@ struct btrfs_backref_node {
|
|||
u64 bytenr;
|
||||
}; /* Use rb_simple_node for search/insert */
|
||||
|
||||
/*
|
||||
* This is a sanity check, whenever we COW a block we will update
|
||||
* new_bytenr with it's current location, and we will check this in
|
||||
* various places to validate that the cache makes sense, it shouldn't
|
||||
* be used for anything else.
|
||||
*/
|
||||
u64 new_bytenr;
|
||||
/* Objectid of tree block owner, can be not uptodate */
|
||||
u64 owner;
|
||||
|
@ -335,10 +341,6 @@ struct btrfs_backref_node {
|
|||
struct extent_buffer *eb;
|
||||
/* Level of the tree block */
|
||||
unsigned int level:8;
|
||||
/* Is the block in a non-shareable tree */
|
||||
unsigned int cowonly:1;
|
||||
/* 1 if no child node is in the cache */
|
||||
unsigned int lowest:1;
|
||||
/* Is the extent buffer locked */
|
||||
unsigned int locked:1;
|
||||
/* Has the block been processed */
|
||||
|
@ -391,12 +393,6 @@ struct btrfs_backref_cache {
|
|||
* level blocks may not reflect the new location
|
||||
*/
|
||||
struct list_head pending[BTRFS_MAX_LEVEL];
|
||||
/* List of backref nodes with no child node */
|
||||
struct list_head leaves;
|
||||
/* List of blocks that have been COWed in current transaction */
|
||||
struct list_head changed;
|
||||
/* List of detached backref node. */
|
||||
struct list_head detached;
|
||||
|
||||
u64 last_trans;
|
||||
|
||||
|
|
|
@ -453,6 +453,14 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
|
|||
(unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev),
|
||||
dev->devid, bio->bi_iter.bi_size);
|
||||
|
||||
/*
|
||||
* Track reads if tracking is enabled; ignore I/O operations before the
|
||||
* filesystem is fully initialized.
|
||||
*/
|
||||
if (dev->fs_devices->collect_fs_stats && bio_op(bio) == REQ_OP_READ && dev->fs_info)
|
||||
percpu_counter_add(&dev->fs_info->stats_read_blocks,
|
||||
bio->bi_iter.bi_size >> dev->fs_info->sectorsize_bits);
|
||||
|
||||
if (bio->bi_opf & REQ_BTRFS_CGROUP_PUNT)
|
||||
blkcg_punt_bio_submit(bio);
|
||||
else
|
||||
|
@ -725,8 +733,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
|
|||
bio->bi_opf |= REQ_OP_ZONE_APPEND;
|
||||
}
|
||||
|
||||
if (is_data_bbio(bbio) && bioc &&
|
||||
btrfs_need_stripe_tree_update(bioc->fs_info, bioc->map_type)) {
|
||||
if (is_data_bbio(bbio) && bioc && bioc->use_rst) {
|
||||
/*
|
||||
* No locking for the list update, as we only add to
|
||||
* the list in the I/O submission path, and list
|
||||
|
|
|
@ -173,43 +173,41 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
|
|||
}
|
||||
}
|
||||
|
||||
static int btrfs_bg_start_cmp(const struct rb_node *new,
|
||||
const struct rb_node *exist)
|
||||
{
|
||||
const struct btrfs_block_group *new_bg =
|
||||
rb_entry(new, struct btrfs_block_group, cache_node);
|
||||
const struct btrfs_block_group *exist_bg =
|
||||
rb_entry(exist, struct btrfs_block_group, cache_node);
|
||||
|
||||
if (new_bg->start < exist_bg->start)
|
||||
return -1;
|
||||
if (new_bg->start > exist_bg->start)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This adds the block group to the fs_info rb tree for the block group cache
|
||||
*/
|
||||
static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
|
||||
struct btrfs_block_group *block_group)
|
||||
{
|
||||
struct rb_node **p;
|
||||
struct rb_node *parent = NULL;
|
||||
struct btrfs_block_group *cache;
|
||||
bool leftmost = true;
|
||||
struct rb_node *exist;
|
||||
int ret = 0;
|
||||
|
||||
ASSERT(block_group->length != 0);
|
||||
|
||||
write_lock(&info->block_group_cache_lock);
|
||||
p = &info->block_group_cache_tree.rb_root.rb_node;
|
||||
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
cache = rb_entry(parent, struct btrfs_block_group, cache_node);
|
||||
if (block_group->start < cache->start) {
|
||||
p = &(*p)->rb_left;
|
||||
} else if (block_group->start > cache->start) {
|
||||
p = &(*p)->rb_right;
|
||||
leftmost = false;
|
||||
} else {
|
||||
write_unlock(&info->block_group_cache_lock);
|
||||
return -EEXIST;
|
||||
}
|
||||
}
|
||||
|
||||
rb_link_node(&block_group->cache_node, parent, p);
|
||||
rb_insert_color_cached(&block_group->cache_node,
|
||||
&info->block_group_cache_tree, leftmost);
|
||||
|
||||
exist = rb_find_add_cached(&block_group->cache_node,
|
||||
&info->block_group_cache_tree, btrfs_bg_start_cmp);
|
||||
if (exist)
|
||||
ret = -EEXIST;
|
||||
write_unlock(&info->block_group_cache_lock);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1223,7 +1221,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
|||
block_group->space_info->total_bytes -= block_group->length;
|
||||
block_group->space_info->bytes_readonly -=
|
||||
(block_group->length - block_group->zone_unusable);
|
||||
btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info,
|
||||
btrfs_space_info_update_bytes_zone_unusable(block_group->space_info,
|
||||
-block_group->zone_unusable);
|
||||
block_group->space_info->disk_total -= block_group->length * factor;
|
||||
|
||||
|
@ -1396,8 +1394,7 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
|
|||
if (btrfs_is_zoned(cache->fs_info)) {
|
||||
/* Migrate zone_unusable bytes to readonly */
|
||||
sinfo->bytes_readonly += cache->zone_unusable;
|
||||
btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo,
|
||||
-cache->zone_unusable);
|
||||
btrfs_space_info_update_bytes_zone_unusable(sinfo, -cache->zone_unusable);
|
||||
cache->zone_unusable = 0;
|
||||
}
|
||||
cache->ro++;
|
||||
|
@ -1645,8 +1642,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
|||
spin_lock(&space_info->lock);
|
||||
spin_lock(&block_group->lock);
|
||||
|
||||
btrfs_space_info_update_bytes_pinned(fs_info, space_info,
|
||||
-block_group->pinned);
|
||||
btrfs_space_info_update_bytes_pinned(space_info, -block_group->pinned);
|
||||
space_info->bytes_readonly += block_group->pinned;
|
||||
block_group->pinned = 0;
|
||||
|
||||
|
@ -2672,7 +2668,6 @@ static int insert_dev_extent(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
|
||||
|
||||
btrfs_set_dev_extent_length(leaf, extent, num_bytes);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
@ -3060,8 +3055,7 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
|
|||
(cache->alloc_offset - cache->used - cache->pinned -
|
||||
cache->reserved) +
|
||||
(cache->length - cache->zone_capacity);
|
||||
btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo,
|
||||
cache->zone_unusable);
|
||||
btrfs_space_info_update_bytes_zone_unusable(sinfo, cache->zone_unusable);
|
||||
sinfo->bytes_readonly -= cache->zone_unusable;
|
||||
}
|
||||
num_bytes = cache->length - cache->reserved -
|
||||
|
@ -3123,7 +3117,6 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
|
|||
cache->global_root_id);
|
||||
btrfs_set_stack_block_group_flags(&bgi, cache->flags);
|
||||
write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
fail:
|
||||
btrfs_release_path(path);
|
||||
/*
|
||||
|
@ -3699,7 +3692,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
|||
old_val -= num_bytes;
|
||||
cache->used = old_val;
|
||||
cache->pinned += num_bytes;
|
||||
btrfs_space_info_update_bytes_pinned(info, space_info, num_bytes);
|
||||
btrfs_space_info_update_bytes_pinned(space_info, num_bytes);
|
||||
space_info->bytes_used -= num_bytes;
|
||||
space_info->disk_used -= num_bytes * factor;
|
||||
if (READ_ONCE(space_info->periodic_reclaim))
|
||||
|
@ -3781,8 +3774,7 @@ int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
|
|||
space_info->bytes_reserved += num_bytes;
|
||||
trace_btrfs_space_reservation(cache->fs_info, "space_info",
|
||||
space_info->flags, num_bytes, 1);
|
||||
btrfs_space_info_update_bytes_may_use(cache->fs_info,
|
||||
space_info, -ram_bytes);
|
||||
btrfs_space_info_update_bytes_may_use(space_info, -ram_bytes);
|
||||
if (delalloc)
|
||||
cache->delalloc_bytes += num_bytes;
|
||||
|
||||
|
|
|
@ -150,9 +150,7 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
|||
spin_unlock(&dest->lock);
|
||||
}
|
||||
if (num_bytes)
|
||||
btrfs_space_info_free_bytes_may_use(fs_info,
|
||||
space_info,
|
||||
num_bytes);
|
||||
btrfs_space_info_free_bytes_may_use(space_info, num_bytes);
|
||||
}
|
||||
if (qgroup_to_release_ret)
|
||||
*qgroup_to_release_ret = qgroup_to_release;
|
||||
|
@ -383,13 +381,11 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
|
|||
|
||||
if (block_rsv->reserved < block_rsv->size) {
|
||||
num_bytes = block_rsv->size - block_rsv->reserved;
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
|
||||
num_bytes);
|
||||
btrfs_space_info_update_bytes_may_use(sinfo, num_bytes);
|
||||
block_rsv->reserved = block_rsv->size;
|
||||
} else if (block_rsv->reserved > block_rsv->size) {
|
||||
num_bytes = block_rsv->reserved - block_rsv->size;
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
|
||||
-num_bytes);
|
||||
btrfs_space_info_update_bytes_may_use(sinfo, -num_bytes);
|
||||
block_rsv->reserved = block_rsv->size;
|
||||
btrfs_try_granting_tickets(fs_info, sinfo);
|
||||
}
|
||||
|
|
|
@ -526,7 +526,7 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
|
|||
u32 bio_offset, struct bio_vec *bv);
|
||||
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
|
||||
struct btrfs_file_extent *file_extent,
|
||||
bool nowait, bool strict);
|
||||
bool nowait);
|
||||
|
||||
void btrfs_del_delalloc_inode(struct btrfs_inode *inode);
|
||||
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
|
||||
|
|
|
@ -37,19 +37,6 @@ static int push_node_left(struct btrfs_trans_handle *trans,
|
|||
static int balance_node_right(struct btrfs_trans_handle *trans,
|
||||
struct extent_buffer *dst_buf,
|
||||
struct extent_buffer *src_buf);
|
||||
|
||||
static const struct btrfs_csums {
|
||||
u16 size;
|
||||
const char name[10];
|
||||
const char driver[12];
|
||||
} btrfs_csums[] = {
|
||||
[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
|
||||
[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
|
||||
[BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
|
||||
[BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
|
||||
.driver = "blake2b-256" },
|
||||
};
|
||||
|
||||
/*
|
||||
* The leaf data grows from end-to-front in the node. this returns the address
|
||||
* of the start of the last item, which is the stop of the leaf data stack.
|
||||
|
@ -148,44 +135,6 @@ static inline void copy_leaf_items(const struct extent_buffer *dst,
|
|||
nr_items * sizeof(struct btrfs_item));
|
||||
}
|
||||
|
||||
/* This exists for btrfs-progs usages. */
|
||||
u16 btrfs_csum_type_size(u16 type)
|
||||
{
|
||||
return btrfs_csums[type].size;
|
||||
}
|
||||
|
||||
int btrfs_super_csum_size(const struct btrfs_super_block *s)
|
||||
{
|
||||
u16 t = btrfs_super_csum_type(s);
|
||||
/*
|
||||
* csum type is validated at mount time
|
||||
*/
|
||||
return btrfs_csum_type_size(t);
|
||||
}
|
||||
|
||||
const char *btrfs_super_csum_name(u16 csum_type)
|
||||
{
|
||||
/* csum type is validated at mount time */
|
||||
return btrfs_csums[csum_type].name;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return driver name if defined, otherwise the name that's also a valid driver
|
||||
* name
|
||||
*/
|
||||
const char *btrfs_super_csum_driver(u16 csum_type)
|
||||
{
|
||||
/* csum type is validated at mount time */
|
||||
return btrfs_csums[csum_type].driver[0] ?
|
||||
btrfs_csums[csum_type].driver :
|
||||
btrfs_csums[csum_type].name;
|
||||
}
|
||||
|
||||
size_t __attribute_const__ btrfs_get_num_csums(void)
|
||||
{
|
||||
return ARRAY_SIZE(btrfs_csums);
|
||||
}
|
||||
|
||||
struct btrfs_path *btrfs_alloc_path(void)
|
||||
{
|
||||
might_sleep();
|
||||
|
@ -225,22 +174,6 @@ noinline void btrfs_release_path(struct btrfs_path *p)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We want the transaction abort to print stack trace only for errors where the
|
||||
* cause could be a bug, eg. due to ENOSPC, and not for common errors that are
|
||||
* caused by external factors.
|
||||
*/
|
||||
bool __cold abort_should_print_stack(int error)
|
||||
{
|
||||
switch (error) {
|
||||
case -EIO:
|
||||
case -EROFS:
|
||||
case -ENOMEM:
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* safely gets a reference on the root node of a tree. A lock
|
||||
* is not taken, so a concurrent writer may put a different node
|
||||
|
@ -3900,6 +3833,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
|
|||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
|
||||
BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
|
||||
key.type != BTRFS_RAID_STRIPE_KEY &&
|
||||
key.type != BTRFS_EXTENT_CSUM_KEY);
|
||||
|
||||
if (btrfs_leaf_free_space(leaf) >= ins_len)
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
#define BTRFS_CTREE_H
|
||||
|
||||
#include "linux/cleanup.h"
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/mutex.h>
|
||||
|
@ -506,20 +505,6 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
|
|||
return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item);
|
||||
}
|
||||
|
||||
#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \
|
||||
((bytes) >> (fs_info)->sectorsize_bits)
|
||||
|
||||
static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
|
||||
{
|
||||
return mapping_gfp_constraint(mapping, ~__GFP_FS);
|
||||
}
|
||||
|
||||
void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end);
|
||||
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 num_bytes, u64 *actual_bytes);
|
||||
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
|
||||
|
||||
/* ctree.c */
|
||||
int __init btrfs_ctree_init(void);
|
||||
void __cold btrfs_ctree_exit(void);
|
||||
|
||||
|
@ -756,18 +741,4 @@ static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
|
|||
return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
|
||||
}
|
||||
|
||||
u16 btrfs_csum_type_size(u16 type);
|
||||
int btrfs_super_csum_size(const struct btrfs_super_block *s);
|
||||
const char *btrfs_super_csum_name(u16 csum_type);
|
||||
const char *btrfs_super_csum_driver(u16 csum_type);
|
||||
size_t __attribute_const__ btrfs_get_num_csums(void);
|
||||
|
||||
/*
|
||||
* We use folio flag owner_2 to indicate there is an ordered extent with
|
||||
* unfinished IO.
|
||||
*/
|
||||
#define folio_test_ordered(folio) folio_test_owner_2(folio)
|
||||
#define folio_set_ordered(folio) folio_set_owner_2(folio)
|
||||
#define folio_clear_ordered(folio) folio_clear_owner_2(folio)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -176,7 +176,7 @@ void btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info,
|
|||
ASSERT(IS_ALIGNED(len, fs_info->sectorsize));
|
||||
|
||||
data_sinfo = fs_info->data_sinfo;
|
||||
btrfs_space_info_free_bytes_may_use(fs_info, data_sinfo, len);
|
||||
btrfs_space_info_free_bytes_may_use(data_sinfo, len);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -366,40 +366,35 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static int btrfs_delayed_item_cmp(const struct rb_node *new,
|
||||
const struct rb_node *exist)
|
||||
{
|
||||
const struct btrfs_delayed_item *new_item =
|
||||
rb_entry(new, struct btrfs_delayed_item, rb_node);
|
||||
const struct btrfs_delayed_item *exist_item =
|
||||
rb_entry(exist, struct btrfs_delayed_item, rb_node);
|
||||
|
||||
if (new_item->index < exist_item->index)
|
||||
return -1;
|
||||
if (new_item->index > exist_item->index)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
|
||||
struct btrfs_delayed_item *ins)
|
||||
{
|
||||
struct rb_node **p, *node;
|
||||
struct rb_node *parent_node = NULL;
|
||||
struct rb_root_cached *root;
|
||||
struct btrfs_delayed_item *item;
|
||||
bool leftmost = true;
|
||||
struct rb_node *exist;
|
||||
|
||||
if (ins->type == BTRFS_DELAYED_INSERTION_ITEM)
|
||||
root = &delayed_node->ins_root;
|
||||
else
|
||||
root = &delayed_node->del_root;
|
||||
|
||||
p = &root->rb_root.rb_node;
|
||||
node = &ins->rb_node;
|
||||
|
||||
while (*p) {
|
||||
parent_node = *p;
|
||||
item = rb_entry(parent_node, struct btrfs_delayed_item,
|
||||
rb_node);
|
||||
|
||||
if (item->index < ins->index) {
|
||||
p = &(*p)->rb_right;
|
||||
leftmost = false;
|
||||
} else if (item->index > ins->index) {
|
||||
p = &(*p)->rb_left;
|
||||
} else {
|
||||
return -EEXIST;
|
||||
}
|
||||
}
|
||||
|
||||
rb_link_node(node, parent_node, p);
|
||||
rb_insert_color_cached(node, root, leftmost);
|
||||
exist = rb_find_add_cached(&ins->rb_node, root, btrfs_delayed_item_cmp);
|
||||
if (exist)
|
||||
return -EEXIST;
|
||||
|
||||
if (ins->type == BTRFS_DELAYED_INSERTION_ITEM &&
|
||||
ins->index >= delayed_node->index_cnt)
|
||||
|
@ -1038,7 +1033,6 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_inode_item);
|
||||
write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
|
||||
sizeof(struct btrfs_inode_item));
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
|
||||
if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
|
||||
goto out;
|
||||
|
@ -1561,8 +1555,7 @@ release_node:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_node *node,
|
||||
static int btrfs_delete_delayed_insertion_item(struct btrfs_delayed_node *node,
|
||||
u64 index)
|
||||
{
|
||||
struct btrfs_delayed_item *item;
|
||||
|
@ -1620,7 +1613,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
|
|||
if (IS_ERR(node))
|
||||
return PTR_ERR(node);
|
||||
|
||||
ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node, index);
|
||||
ret = btrfs_delete_delayed_insertion_item(node, index);
|
||||
if (!ret)
|
||||
goto end;
|
||||
|
||||
|
|
|
@ -93,6 +93,9 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
|
|||
u64 num_bytes;
|
||||
u64 reserved_bytes;
|
||||
|
||||
if (btrfs_is_testing(fs_info))
|
||||
return;
|
||||
|
||||
num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, trans->delayed_ref_updates);
|
||||
num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info,
|
||||
trans->delayed_ref_csum_deletions);
|
||||
|
@ -254,7 +257,7 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
|
|||
spin_unlock(&block_rsv->lock);
|
||||
|
||||
if (to_free > 0)
|
||||
btrfs_space_info_free_bytes_may_use(fs_info, space_info, to_free);
|
||||
btrfs_space_info_free_bytes_may_use(space_info, to_free);
|
||||
|
||||
if (refilled_bytes > 0)
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 0,
|
||||
|
@ -265,8 +268,8 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
|
|||
/*
|
||||
* compare two delayed data backrefs with same bytenr and type
|
||||
*/
|
||||
static int comp_data_refs(struct btrfs_delayed_ref_node *ref1,
|
||||
struct btrfs_delayed_ref_node *ref2)
|
||||
static int comp_data_refs(const struct btrfs_delayed_ref_node *ref1,
|
||||
const struct btrfs_delayed_ref_node *ref2)
|
||||
{
|
||||
if (ref1->data_ref.objectid < ref2->data_ref.objectid)
|
||||
return -1;
|
||||
|
@ -279,8 +282,8 @@ static int comp_data_refs(struct btrfs_delayed_ref_node *ref1,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int comp_refs(struct btrfs_delayed_ref_node *ref1,
|
||||
struct btrfs_delayed_ref_node *ref2,
|
||||
static int comp_refs(const struct btrfs_delayed_ref_node *ref1,
|
||||
const struct btrfs_delayed_ref_node *ref2,
|
||||
bool check_seq)
|
||||
{
|
||||
int ret = 0;
|
||||
|
@ -314,34 +317,25 @@ static int comp_refs(struct btrfs_delayed_ref_node *ref1,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int cmp_refs_node(const struct rb_node *new, const struct rb_node *exist)
|
||||
{
|
||||
const struct btrfs_delayed_ref_node *new_node =
|
||||
rb_entry(new, struct btrfs_delayed_ref_node, ref_node);
|
||||
const struct btrfs_delayed_ref_node *exist_node =
|
||||
rb_entry(exist, struct btrfs_delayed_ref_node, ref_node);
|
||||
|
||||
return comp_refs(new_node, exist_node, true);
|
||||
}
|
||||
|
||||
static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
|
||||
struct btrfs_delayed_ref_node *ins)
|
||||
{
|
||||
struct rb_node **p = &root->rb_root.rb_node;
|
||||
struct rb_node *node = &ins->ref_node;
|
||||
struct rb_node *parent_node = NULL;
|
||||
struct btrfs_delayed_ref_node *entry;
|
||||
bool leftmost = true;
|
||||
struct rb_node *exist;
|
||||
|
||||
while (*p) {
|
||||
int comp;
|
||||
|
||||
parent_node = *p;
|
||||
entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
|
||||
ref_node);
|
||||
comp = comp_refs(ins, entry, true);
|
||||
if (comp < 0) {
|
||||
p = &(*p)->rb_left;
|
||||
} else if (comp > 0) {
|
||||
p = &(*p)->rb_right;
|
||||
leftmost = false;
|
||||
} else {
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
rb_link_node(node, parent_node, p);
|
||||
rb_insert_color_cached(node, root, leftmost);
|
||||
exist = rb_find_add_cached(node, root, cmp_refs_node);
|
||||
if (exist)
|
||||
return rb_entry(exist, struct btrfs_delayed_ref_node, ref_node);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -555,6 +549,32 @@ void btrfs_delete_ref_head(const struct btrfs_fs_info *fs_info,
|
|||
delayed_refs->num_heads_ready--;
|
||||
}
|
||||
|
||||
struct btrfs_delayed_ref_node *btrfs_select_delayed_ref(struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
struct btrfs_delayed_ref_node *ref;
|
||||
|
||||
lockdep_assert_held(&head->mutex);
|
||||
lockdep_assert_held(&head->lock);
|
||||
|
||||
if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
|
||||
* This is to prevent a ref count from going down to zero, which deletes
|
||||
* the extent item from the extent tree, when there still are references
|
||||
* to add, which would fail because they would not find the extent item.
|
||||
*/
|
||||
if (!list_empty(&head->ref_add_list))
|
||||
return list_first_entry(&head->ref_add_list,
|
||||
struct btrfs_delayed_ref_node, add_list);
|
||||
|
||||
ref = rb_entry(rb_first_cached(&head->ref_tree),
|
||||
struct btrfs_delayed_ref_node, ref_node);
|
||||
ASSERT(list_empty(&ref->add_list));
|
||||
return ref;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper to insert the ref_node to the tail or merge with tail.
|
||||
*
|
||||
|
@ -1234,6 +1254,7 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
|
|||
{
|
||||
struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
bool testing = btrfs_is_testing(fs_info);
|
||||
|
||||
spin_lock(&delayed_refs->lock);
|
||||
while (true) {
|
||||
|
@ -1263,7 +1284,7 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
|
|||
spin_unlock(&delayed_refs->lock);
|
||||
mutex_unlock(&head->mutex);
|
||||
|
||||
if (pin_bytes) {
|
||||
if (!testing && pin_bytes) {
|
||||
struct btrfs_block_group *bg;
|
||||
|
||||
bg = btrfs_lookup_block_group(fs_info, head->bytenr);
|
||||
|
@ -1281,8 +1302,7 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
|
|||
spin_lock(&bg->space_info->lock);
|
||||
spin_lock(&bg->lock);
|
||||
bg->pinned += head->num_bytes;
|
||||
btrfs_space_info_update_bytes_pinned(fs_info,
|
||||
bg->space_info,
|
||||
btrfs_space_info_update_bytes_pinned(bg->space_info,
|
||||
head->num_bytes);
|
||||
bg->reserved -= head->num_bytes;
|
||||
bg->space_info->bytes_reserved -= head->num_bytes;
|
||||
|
@ -1295,12 +1315,15 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
|
|||
btrfs_error_unpin_extent_range(fs_info, head->bytenr,
|
||||
head->bytenr + head->num_bytes - 1);
|
||||
}
|
||||
btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
|
||||
if (!testing)
|
||||
btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
|
||||
btrfs_put_delayed_ref_head(head);
|
||||
cond_resched();
|
||||
spin_lock(&delayed_refs->lock);
|
||||
}
|
||||
btrfs_qgroup_destroy_extent_records(trans);
|
||||
|
||||
if (!testing)
|
||||
btrfs_qgroup_destroy_extent_records(trans);
|
||||
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
}
|
||||
|
|
|
@ -402,6 +402,7 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head(
|
|||
struct btrfs_delayed_ref_root *delayed_refs);
|
||||
void btrfs_unselect_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head);
|
||||
struct btrfs_delayed_ref_node *btrfs_select_delayed_ref(struct btrfs_delayed_ref_head *head);
|
||||
|
||||
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq);
|
||||
|
||||
|
|
|
@ -440,9 +440,6 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans)
|
|||
dev_replace->cursor_right);
|
||||
dev_replace->item_needs_writeback = 0;
|
||||
up_write(&dev_replace->rwsem);
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, eb);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
|
||||
|
|
|
@ -92,7 +92,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
|
|||
|
||||
write_extent_buffer(leaf, name, name_ptr, name_len);
|
||||
write_extent_buffer(leaf, data, data_ptr, data_len);
|
||||
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -152,7 +151,6 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
|
|||
name_ptr = (unsigned long)(dir_item + 1);
|
||||
|
||||
write_extent_buffer(leaf, name->name, name_ptr, name->len);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
|
||||
second_insert:
|
||||
/* FIXME, use some real flag for selecting the extra index */
|
||||
|
|
|
@ -248,8 +248,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
|
|||
len = min(len, em->len - (start - em->start));
|
||||
block_start = extent_map_block_start(em) + (start - em->start);
|
||||
|
||||
if (can_nocow_extent(inode, start, &len,
|
||||
&file_extent, false, false) == 1) {
|
||||
if (can_nocow_extent(inode, start, &len, &file_extent, false) == 1) {
|
||||
bg = btrfs_inc_nocow_writers(fs_info, block_start);
|
||||
if (bg)
|
||||
can_nocow = true;
|
||||
|
|
|
@ -226,7 +226,7 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
|
|||
|
||||
while (1) {
|
||||
clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
|
||||
ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num, check);
|
||||
ret = read_extent_buffer_pages(eb, mirror_num, check);
|
||||
if (!ret)
|
||||
break;
|
||||
|
||||
|
@ -1258,6 +1258,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
|
|||
{
|
||||
struct percpu_counter *em_counter = &fs_info->evictable_extent_maps;
|
||||
|
||||
percpu_counter_destroy(&fs_info->stats_read_blocks);
|
||||
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
|
||||
percpu_counter_destroy(&fs_info->delalloc_bytes);
|
||||
percpu_counter_destroy(&fs_info->ordered_bytes);
|
||||
|
@ -2327,6 +2328,71 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int validate_sys_chunk_array(const struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_super_block *sb)
|
||||
{
|
||||
unsigned int cur = 0; /* Offset inside the sys chunk array */
|
||||
/*
|
||||
* At sb read time, fs_info is not fully initialized. Thus we have
|
||||
* to use super block sectorsize, which should have been validated.
|
||||
*/
|
||||
const u32 sectorsize = btrfs_super_sectorsize(sb);
|
||||
u32 sys_array_size = btrfs_super_sys_array_size(sb);
|
||||
|
||||
if (sys_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
|
||||
btrfs_err(fs_info, "system chunk array too big %u > %u",
|
||||
sys_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
while (cur < sys_array_size) {
|
||||
struct btrfs_disk_key *disk_key;
|
||||
struct btrfs_chunk *chunk;
|
||||
struct btrfs_key key;
|
||||
u64 type;
|
||||
u16 num_stripes;
|
||||
u32 len;
|
||||
int ret;
|
||||
|
||||
disk_key = (struct btrfs_disk_key *)(sb->sys_chunk_array + cur);
|
||||
len = sizeof(*disk_key);
|
||||
|
||||
if (cur + len > sys_array_size)
|
||||
goto short_read;
|
||||
cur += len;
|
||||
|
||||
btrfs_disk_key_to_cpu(&key, disk_key);
|
||||
if (key.type != BTRFS_CHUNK_ITEM_KEY) {
|
||||
btrfs_err(fs_info,
|
||||
"unexpected item type %u in sys_array at offset %u",
|
||||
key.type, cur);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
chunk = (struct btrfs_chunk *)(sb->sys_chunk_array + cur);
|
||||
num_stripes = btrfs_stack_chunk_num_stripes(chunk);
|
||||
if (cur + btrfs_chunk_item_size(num_stripes) > sys_array_size)
|
||||
goto short_read;
|
||||
type = btrfs_stack_chunk_type(chunk);
|
||||
if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) {
|
||||
btrfs_err(fs_info,
|
||||
"invalid chunk type %llu in sys_array at offset %u",
|
||||
type, cur);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
ret = btrfs_check_chunk_valid(fs_info, NULL, chunk, key.offset,
|
||||
sectorsize);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
cur += btrfs_chunk_item_size(num_stripes);
|
||||
}
|
||||
return 0;
|
||||
short_read:
|
||||
btrfs_err(fs_info,
|
||||
"super block sys chunk array short read, cur=%u sys_array_size=%u",
|
||||
cur, sys_array_size);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Real super block validation
|
||||
* NOTE: super csum type and incompat features will not be checked here.
|
||||
|
@ -2495,6 +2561,8 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
|
|||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
ret = validate_sys_chunk_array(fs_info, sb);
|
||||
|
||||
/*
|
||||
* Obvious sys_chunk_array corruptions, it must hold at least one key
|
||||
* and one chunk
|
||||
|
@ -2856,6 +2924,10 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = percpu_counter_init(&fs_info->stats_read_blocks, 0, GFP_KERNEL);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
fs_info->dirty_metadata_batch = PAGE_SIZE *
|
||||
(1 + ilog2(nr_cpu_ids));
|
||||
|
||||
|
@ -3321,6 +3393,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
|||
fs_info->sectors_per_page = (PAGE_SIZE >> fs_info->sectorsize_bits);
|
||||
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
|
||||
fs_info->stripesize = stripesize;
|
||||
fs_info->fs_devices->fs_info = fs_info;
|
||||
|
||||
/*
|
||||
* Handle the space caching options appropriately now that we have the
|
||||
|
|
|
@ -96,9 +96,6 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
|
|||
/*
|
||||
* This function is used to grab the root, and avoid it is freed when we
|
||||
* access it. But it doesn't ensure that the tree is not dropped.
|
||||
*
|
||||
* If you want to ensure the whole tree is safe, you should use
|
||||
* fs_info->subvol_srcu
|
||||
*/
|
||||
static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
|
||||
{
|
||||
|
|
|
@ -570,7 +570,6 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
|
||||
}
|
||||
}
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
ret = 0;
|
||||
fail:
|
||||
btrfs_release_path(path);
|
||||
|
@ -618,7 +617,6 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
|
||||
else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
|
||||
btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -1050,7 +1048,6 @@ void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
|
|||
} else {
|
||||
btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
|
||||
}
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
}
|
||||
|
||||
static int lookup_extent_backref(struct btrfs_trans_handle *trans,
|
||||
|
@ -1195,7 +1192,6 @@ static noinline_for_stack int update_inline_extent_backref(
|
|||
item_size -= size;
|
||||
btrfs_truncate_item(trans, path, item_size, 1);
|
||||
}
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1260,12 +1256,12 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
|
|||
{
|
||||
int j, ret = 0;
|
||||
u64 bytes_left, end;
|
||||
u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
|
||||
u64 aligned_start = ALIGN(start, SECTOR_SIZE);
|
||||
|
||||
/* Adjust the range to be aligned to 512B sectors if necessary. */
|
||||
if (start != aligned_start) {
|
||||
len -= aligned_start - start;
|
||||
len = round_down(len, 1 << SECTOR_SHIFT);
|
||||
len = round_down(len, SECTOR_SIZE);
|
||||
start = aligned_start;
|
||||
}
|
||||
|
||||
|
@ -1527,7 +1523,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
|||
if (extent_op)
|
||||
__run_delayed_extent_op(extent_op, leaf, item);
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_release_path(path);
|
||||
|
||||
/* now insert the actual backref */
|
||||
|
@ -1711,8 +1706,6 @@ again:
|
|||
|
||||
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
|
||||
__run_delayed_extent_op(extent_op, leaf, ei);
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
@ -1803,30 +1796,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static inline struct btrfs_delayed_ref_node *
|
||||
select_delayed_ref(struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
struct btrfs_delayed_ref_node *ref;
|
||||
|
||||
if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
|
||||
* This is to prevent a ref count from going down to zero, which deletes
|
||||
* the extent item from the extent tree, when there still are references
|
||||
* to add, which would fail because they would not find the extent item.
|
||||
*/
|
||||
if (!list_empty(&head->ref_add_list))
|
||||
return list_first_entry(&head->ref_add_list,
|
||||
struct btrfs_delayed_ref_node, add_list);
|
||||
|
||||
ref = rb_entry(rb_first_cached(&head->ref_tree),
|
||||
struct btrfs_delayed_ref_node, ref_node);
|
||||
ASSERT(list_empty(&ref->add_list));
|
||||
return ref;
|
||||
}
|
||||
|
||||
static struct btrfs_delayed_extent_op *cleanup_extent_op(
|
||||
struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
|
@ -1959,7 +1928,7 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
|
|||
lockdep_assert_held(&locked_ref->mutex);
|
||||
lockdep_assert_held(&locked_ref->lock);
|
||||
|
||||
while ((ref = select_delayed_ref(locked_ref))) {
|
||||
while ((ref = btrfs_select_delayed_ref(locked_ref))) {
|
||||
if (ref->seq &&
|
||||
btrfs_check_delayed_seq(fs_info, ref->seq)) {
|
||||
spin_unlock(&locked_ref->lock);
|
||||
|
@ -2230,10 +2199,11 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static noinline int check_delayed_ref(struct btrfs_root *root,
|
||||
static noinline int check_delayed_ref(struct btrfs_inode *inode,
|
||||
struct btrfs_path *path,
|
||||
u64 objectid, u64 offset, u64 bytenr)
|
||||
u64 offset, u64 bytenr)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_delayed_ref_head *head;
|
||||
struct btrfs_delayed_ref_node *ref;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
|
@ -2307,7 +2277,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
|
|||
* then we have a cross reference.
|
||||
*/
|
||||
if (ref->ref_root != btrfs_root_id(root) ||
|
||||
ref_owner != objectid || ref_offset != offset) {
|
||||
ref_owner != btrfs_ino(inode) || ref_offset != offset) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
|
@ -2318,11 +2288,53 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static noinline int check_committed_ref(struct btrfs_root *root,
|
||||
/*
|
||||
* Check if there are references for a data extent other than the one belonging
|
||||
* to the given inode and offset.
|
||||
*
|
||||
* @inode: The only inode we expect to find associated with the data extent.
|
||||
* @path: A path to use for searching the extent tree.
|
||||
* @offset: The only offset we expect to find associated with the data extent.
|
||||
* @bytenr: The logical address of the data extent.
|
||||
*
|
||||
* When the extent does not have any other references other than the one we
|
||||
* expect to find, we always return a value of 0 with the path having a locked
|
||||
* leaf that contains the extent's extent item - this is necessary to ensure
|
||||
* we don't race with a task running delayed references, and our caller must
|
||||
* have such a path when calling check_delayed_ref() - it must lock a delayed
|
||||
* ref head while holding the leaf locked. In case the extent item is not found
|
||||
* in the extent tree, we return -ENOENT with the path having the leaf (locked)
|
||||
* where the extent item should be, in order to prevent races with another task
|
||||
* running delayed references, so that we don't miss any reference when calling
|
||||
* check_delayed_ref().
|
||||
*
|
||||
* Note: this may return false positives, and this is because we want to be
|
||||
* quick here as we're called in write paths (when flushing delalloc and
|
||||
* in the direct IO write path). For example we can have an extent with
|
||||
* a single reference but that reference is not inlined, or we may have
|
||||
* many references in the extent tree but we also have delayed references
|
||||
* that cancel all the reference except the one for our inode and offset,
|
||||
* but it would be expensive to do such checks and complex due to all
|
||||
* locking to avoid races between the checks and flushing delayed refs,
|
||||
* plus non-inline references may be located on leaves other than the one
|
||||
* that contains the extent item in the extent tree. The important thing
|
||||
* here is to not return false negatives and that the false positives are
|
||||
* not very common.
|
||||
*
|
||||
* Returns: 0 if there are no cross references and with the path having a locked
|
||||
* leaf from the extent tree that contains the extent's extent item.
|
||||
*
|
||||
* 1 if there are cross references (false positives can happen).
|
||||
*
|
||||
* < 0 in case of an error. In case of -ENOENT the leaf in the extent
|
||||
* tree where the extent item should be located at is read locked and
|
||||
* accessible in the given path.
|
||||
*/
|
||||
static noinline int check_committed_ref(struct btrfs_inode *inode,
|
||||
struct btrfs_path *path,
|
||||
u64 objectid, u64 offset, u64 bytenr,
|
||||
bool strict)
|
||||
u64 offset, u64 bytenr)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
|
||||
struct extent_buffer *leaf;
|
||||
|
@ -2341,35 +2353,32 @@ static noinline int check_committed_ref(struct btrfs_root *root,
|
|||
|
||||
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
return ret;
|
||||
if (ret == 0) {
|
||||
/*
|
||||
* Key with offset -1 found, there would have to exist an extent
|
||||
* item with such offset, but this is out of the valid range.
|
||||
*/
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
ret = -ENOENT;
|
||||
if (path->slots[0] == 0)
|
||||
goto out;
|
||||
return -ENOENT;
|
||||
|
||||
path->slots[0]--;
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
|
||||
if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
|
||||
goto out;
|
||||
return -ENOENT;
|
||||
|
||||
ret = 1;
|
||||
item_size = btrfs_item_size(leaf, path->slots[0]);
|
||||
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
|
||||
expected_size = sizeof(*ei) + btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY);
|
||||
|
||||
/* No inline refs; we need to bail before checking for owner ref. */
|
||||
if (item_size == sizeof(*ei))
|
||||
goto out;
|
||||
return 1;
|
||||
|
||||
/* Check for an owner ref; skip over it to the real inline refs. */
|
||||
iref = (struct btrfs_extent_inline_ref *)(ei + 1);
|
||||
|
@ -2377,56 +2386,69 @@ static noinline int check_committed_ref(struct btrfs_root *root,
|
|||
if (btrfs_fs_incompat(fs_info, SIMPLE_QUOTA) && type == BTRFS_EXTENT_OWNER_REF_KEY) {
|
||||
expected_size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY);
|
||||
iref = (struct btrfs_extent_inline_ref *)(iref + 1);
|
||||
type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
|
||||
}
|
||||
|
||||
/* If extent item has more than 1 inline ref then it's shared */
|
||||
if (item_size != expected_size)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* If extent created before last snapshot => it's shared unless the
|
||||
* snapshot has been deleted. Use the heuristic if strict is false.
|
||||
*/
|
||||
if (!strict &&
|
||||
(btrfs_extent_generation(leaf, ei) <=
|
||||
btrfs_root_last_snapshot(&root->root_item)))
|
||||
goto out;
|
||||
return 1;
|
||||
|
||||
/* If this extent has SHARED_DATA_REF then it's shared */
|
||||
type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
|
||||
if (type != BTRFS_EXTENT_DATA_REF_KEY)
|
||||
goto out;
|
||||
return 1;
|
||||
|
||||
ref = (struct btrfs_extent_data_ref *)(&iref->offset);
|
||||
if (btrfs_extent_refs(leaf, ei) !=
|
||||
btrfs_extent_data_ref_count(leaf, ref) ||
|
||||
btrfs_extent_data_ref_root(leaf, ref) != btrfs_root_id(root) ||
|
||||
btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
|
||||
btrfs_extent_data_ref_objectid(leaf, ref) != btrfs_ino(inode) ||
|
||||
btrfs_extent_data_ref_offset(leaf, ref) != offset)
|
||||
goto out;
|
||||
return 1;
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
|
||||
u64 bytenr, bool strict, struct btrfs_path *path)
|
||||
int btrfs_cross_ref_exist(struct btrfs_inode *inode, u64 offset,
|
||||
u64 bytenr, struct btrfs_path *path)
|
||||
{
|
||||
int ret;
|
||||
|
||||
do {
|
||||
ret = check_committed_ref(root, path, objectid,
|
||||
offset, bytenr, strict);
|
||||
ret = check_committed_ref(inode, path, offset, bytenr);
|
||||
if (ret && ret != -ENOENT)
|
||||
goto out;
|
||||
|
||||
ret = check_delayed_ref(root, path, objectid, offset, bytenr);
|
||||
/*
|
||||
* The path must have a locked leaf from the extent tree where
|
||||
* the extent item for our extent is located, in case it exists,
|
||||
* or where it should be located in case it doesn't exist yet
|
||||
* because it's new and its delayed ref was not yet flushed.
|
||||
* We need to lock the delayed ref head at check_delayed_ref(),
|
||||
* if one exists, while holding the leaf locked in order to not
|
||||
* race with delayed ref flushing, missing references and
|
||||
* incorrectly reporting that the extent is not shared.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_BTRFS_ASSERT)) {
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
|
||||
ASSERT(leaf != NULL);
|
||||
btrfs_assert_tree_read_locked(leaf);
|
||||
|
||||
if (ret != -ENOENT) {
|
||||
struct btrfs_key key;
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
ASSERT(key.objectid == bytenr);
|
||||
ASSERT(key.type == BTRFS_EXTENT_ITEM_KEY);
|
||||
}
|
||||
}
|
||||
|
||||
ret = check_delayed_ref(inode, path, offset, bytenr);
|
||||
} while (ret == -EAGAIN && !path->nowait);
|
||||
|
||||
out:
|
||||
btrfs_release_path(path);
|
||||
if (btrfs_is_data_reloc_root(root))
|
||||
if (btrfs_is_data_reloc_root(inode->root))
|
||||
WARN_ON(ret > 0);
|
||||
return ret;
|
||||
}
|
||||
|
@ -2571,13 +2593,10 @@ static int pin_down_extent(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_block_group *cache,
|
||||
u64 bytenr, u64 num_bytes, int reserved)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = cache->fs_info;
|
||||
|
||||
spin_lock(&cache->space_info->lock);
|
||||
spin_lock(&cache->lock);
|
||||
cache->pinned += num_bytes;
|
||||
btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
|
||||
num_bytes);
|
||||
btrfs_space_info_update_bytes_pinned(cache->space_info, num_bytes);
|
||||
if (reserved) {
|
||||
cache->reserved -= num_bytes;
|
||||
cache->space_info->bytes_reserved -= num_bytes;
|
||||
|
@ -2724,15 +2743,15 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
|
|||
{
|
||||
struct btrfs_block_group *cache = NULL;
|
||||
struct btrfs_space_info *space_info;
|
||||
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
||||
struct btrfs_free_cluster *cluster = NULL;
|
||||
u64 len;
|
||||
u64 total_unpinned = 0;
|
||||
u64 empty_cluster = 0;
|
||||
bool readonly;
|
||||
int ret = 0;
|
||||
|
||||
while (start <= end) {
|
||||
u64 len;
|
||||
|
||||
readonly = false;
|
||||
if (!cache ||
|
||||
start >= cache->start + cache->length) {
|
||||
|
@ -2778,37 +2797,19 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
|
|||
spin_lock(&space_info->lock);
|
||||
spin_lock(&cache->lock);
|
||||
cache->pinned -= len;
|
||||
btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
|
||||
btrfs_space_info_update_bytes_pinned(space_info, -len);
|
||||
space_info->max_extent_size = 0;
|
||||
if (cache->ro) {
|
||||
space_info->bytes_readonly += len;
|
||||
readonly = true;
|
||||
} else if (btrfs_is_zoned(fs_info)) {
|
||||
/* Need reset before reusing in a zoned block group */
|
||||
btrfs_space_info_update_bytes_zone_unusable(fs_info, space_info,
|
||||
len);
|
||||
btrfs_space_info_update_bytes_zone_unusable(space_info, len);
|
||||
readonly = true;
|
||||
}
|
||||
spin_unlock(&cache->lock);
|
||||
if (!readonly && return_free_space &&
|
||||
global_rsv->space_info == space_info) {
|
||||
spin_lock(&global_rsv->lock);
|
||||
if (!global_rsv->full) {
|
||||
u64 to_add = min(len, global_rsv->size -
|
||||
global_rsv->reserved);
|
||||
|
||||
global_rsv->reserved += to_add;
|
||||
btrfs_space_info_update_bytes_may_use(fs_info,
|
||||
space_info, to_add);
|
||||
if (global_rsv->reserved >= global_rsv->size)
|
||||
global_rsv->full = 1;
|
||||
len -= to_add;
|
||||
}
|
||||
spin_unlock(&global_rsv->lock);
|
||||
}
|
||||
/* Add to any tickets we may have */
|
||||
if (!readonly && return_free_space && len)
|
||||
btrfs_try_granting_tickets(fs_info, space_info);
|
||||
if (!readonly && return_free_space)
|
||||
btrfs_return_free_space(space_info, len);
|
||||
spin_unlock(&space_info->lock);
|
||||
}
|
||||
|
||||
|
@ -3259,7 +3260,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
} else {
|
||||
btrfs_set_extent_refs(leaf, ei, refs);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
}
|
||||
if (found_extent) {
|
||||
ret = remove_extent_backref(trans, extent_root, path,
|
||||
|
@ -4827,7 +4827,6 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
|
||||
}
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
|
||||
btrfs_free_path(path);
|
||||
|
||||
return alloc_reserved_extent(trans, ins->objectid, ins->offset);
|
||||
|
@ -4902,7 +4901,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_extent_inline_ref_offset(leaf, iref, node->ref_root);
|
||||
}
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_free_path(path);
|
||||
|
||||
return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
|
||||
|
|
|
@ -116,8 +116,7 @@ int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num,
|
|||
int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
|
||||
const struct extent_buffer *eb);
|
||||
int btrfs_exclude_logged_extents(struct extent_buffer *eb);
|
||||
int btrfs_cross_ref_exist(struct btrfs_root *root,
|
||||
u64 objectid, u64 offset, u64 bytenr, bool strict,
|
||||
int btrfs_cross_ref_exist(struct btrfs_inode *inode, u64 offset, u64 bytenr,
|
||||
struct btrfs_path *path);
|
||||
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
|
@ -163,5 +162,9 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_root *root,
|
||||
struct extent_buffer *node,
|
||||
struct extent_buffer *parent);
|
||||
void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end);
|
||||
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 num_bytes, u64 *actual_bytes);
|
||||
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -198,9 +198,8 @@ static void __process_folios_contig(struct address_space *mapping,
|
|||
u64 end, unsigned long page_ops)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode_to_fs_info(mapping->host);
|
||||
pgoff_t start_index = start >> PAGE_SHIFT;
|
||||
pgoff_t index = start >> PAGE_SHIFT;
|
||||
pgoff_t end_index = end >> PAGE_SHIFT;
|
||||
pgoff_t index = start_index;
|
||||
struct folio_batch fbatch;
|
||||
int i;
|
||||
|
||||
|
@ -221,7 +220,7 @@ static void __process_folios_contig(struct address_space *mapping,
|
|||
}
|
||||
}
|
||||
|
||||
static noinline void __unlock_for_delalloc(const struct inode *inode,
|
||||
static noinline void unlock_delalloc_folio(const struct inode *inode,
|
||||
const struct folio *locked_folio,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
|
@ -242,9 +241,8 @@ static noinline int lock_delalloc_folios(struct inode *inode,
|
|||
{
|
||||
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
pgoff_t start_index = start >> PAGE_SHIFT;
|
||||
pgoff_t index = start >> PAGE_SHIFT;
|
||||
pgoff_t end_index = end >> PAGE_SHIFT;
|
||||
pgoff_t index = start_index;
|
||||
u64 processed_end = start;
|
||||
struct folio_batch fbatch;
|
||||
|
||||
|
@ -288,8 +286,7 @@ static noinline int lock_delalloc_folios(struct inode *inode,
|
|||
out:
|
||||
folio_batch_release(&fbatch);
|
||||
if (processed_end > start)
|
||||
__unlock_for_delalloc(inode, locked_folio, start,
|
||||
processed_end);
|
||||
unlock_delalloc_folio(inode, locked_folio, start, processed_end);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
|
@ -390,7 +387,7 @@ again:
|
|||
|
||||
unlock_extent(tree, delalloc_start, delalloc_end, &cached_state);
|
||||
if (!ret) {
|
||||
__unlock_for_delalloc(inode, locked_folio, delalloc_start,
|
||||
unlock_delalloc_folio(inode, locked_folio, delalloc_start,
|
||||
delalloc_end);
|
||||
cond_resched();
|
||||
goto again;
|
||||
|
@ -710,6 +707,7 @@ static void alloc_new_bio(struct btrfs_inode *inode,
|
|||
bbio = btrfs_bio_alloc(BIO_MAX_VECS, bio_ctrl->opf, fs_info,
|
||||
bio_ctrl->end_io_func, NULL);
|
||||
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
|
||||
bbio->bio.bi_write_hint = inode->vfs_inode.i_write_hint;
|
||||
bbio->inode = inode;
|
||||
bbio->file_offset = file_offset;
|
||||
bio_ctrl->bbio = bbio;
|
||||
|
@ -862,11 +860,6 @@ static int attach_extent_buffer_folio(struct extent_buffer *eb,
|
|||
return ret;
|
||||
}
|
||||
|
||||
int set_page_extent_mapped(struct page *page)
|
||||
{
|
||||
return set_folio_extent_mapped(page_folio(page));
|
||||
}
|
||||
|
||||
int set_folio_extent_mapped(struct folio *folio)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info;
|
||||
|
@ -901,9 +894,9 @@ void clear_folio_extent_mapped(struct folio *folio)
|
|||
folio_detach_private(folio);
|
||||
}
|
||||
|
||||
static struct extent_map *__get_extent_map(struct inode *inode,
|
||||
struct folio *folio, u64 start,
|
||||
u64 len, struct extent_map **em_cached)
|
||||
static struct extent_map *get_extent_map(struct btrfs_inode *inode,
|
||||
struct folio *folio, u64 start,
|
||||
u64 len, struct extent_map **em_cached)
|
||||
{
|
||||
struct extent_map *em;
|
||||
struct extent_state *cached_state = NULL;
|
||||
|
@ -922,14 +915,14 @@ static struct extent_map *__get_extent_map(struct inode *inode,
|
|||
*em_cached = NULL;
|
||||
}
|
||||
|
||||
btrfs_lock_and_flush_ordered_range(BTRFS_I(inode), start, start + len - 1, &cached_state);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), folio, start, len);
|
||||
btrfs_lock_and_flush_ordered_range(inode, start, start + len - 1, &cached_state);
|
||||
em = btrfs_get_extent(inode, folio, start, len);
|
||||
if (!IS_ERR(em)) {
|
||||
BUG_ON(*em_cached);
|
||||
refcount_inc(&em->refs);
|
||||
*em_cached = em;
|
||||
}
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len - 1, &cached_state);
|
||||
unlock_extent(&inode->io_tree, start, start + len - 1, &cached_state);
|
||||
|
||||
return em;
|
||||
}
|
||||
|
@ -985,8 +978,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
|
|||
end_folio_read(folio, true, cur, iosize);
|
||||
break;
|
||||
}
|
||||
em = __get_extent_map(inode, folio, cur, end - cur + 1,
|
||||
em_cached);
|
||||
em = get_extent_map(BTRFS_I(inode), folio, cur, end - cur + 1, em_cached);
|
||||
if (IS_ERR(em)) {
|
||||
end_folio_read(folio, false, cur, end + 1 - cur);
|
||||
return PTR_ERR(em);
|
||||
|
@ -1142,14 +1134,19 @@ static bool find_next_delalloc_bitmap(struct folio *folio,
|
|||
}
|
||||
|
||||
/*
|
||||
* helper for extent_writepage(), doing all of the delayed allocation setup.
|
||||
* Do all of the delayed allocation setup.
|
||||
*
|
||||
* This returns 1 if btrfs_run_delalloc_range function did all the work required
|
||||
* to write the page (copy into inline extent). In this case the IO has
|
||||
* been started and the page is already unlocked.
|
||||
* Return >0 if all the dirty blocks are submitted async (compression) or inlined.
|
||||
* The @folio should no longer be touched (treat it as already unlocked).
|
||||
*
|
||||
* This returns 0 if all went well (page still locked)
|
||||
* This returns < 0 if there were errors (page still locked)
|
||||
* Return 0 if there is still dirty block that needs to be submitted through
|
||||
* extent_writepage_io().
|
||||
* bio_ctrl->submit_bitmap will indicate which blocks of the folio should be
|
||||
* submitted, and @folio is still kept locked.
|
||||
*
|
||||
* Return <0 if there is any error hit.
|
||||
* Any allocated ordered extent range covering this folio will be marked
|
||||
* finished (IOERR), and @folio is still kept locked.
|
||||
*/
|
||||
static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
||||
struct folio *folio,
|
||||
|
@ -1167,6 +1164,16 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
|||
* last delalloc end.
|
||||
*/
|
||||
u64 last_delalloc_end = 0;
|
||||
/*
|
||||
* The range end (exclusive) of the last successfully finished delalloc
|
||||
* range.
|
||||
* Any range covered by ordered extent must either be manually marked
|
||||
* finished (error handling), or has IO submitted (and finish the
|
||||
* ordered extent normally).
|
||||
*
|
||||
* This records the end of ordered extent cleanup if we hit an error.
|
||||
*/
|
||||
u64 last_finished_delalloc_end = page_start;
|
||||
u64 delalloc_start = page_start;
|
||||
u64 delalloc_end = page_end;
|
||||
u64 delalloc_to_write = 0;
|
||||
|
@ -1235,11 +1242,28 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
|||
found_len = last_delalloc_end + 1 - found_start;
|
||||
|
||||
if (ret >= 0) {
|
||||
/*
|
||||
* Some delalloc range may be created by previous folios.
|
||||
* Thus we still need to clean up this range during error
|
||||
* handling.
|
||||
*/
|
||||
last_finished_delalloc_end = found_start;
|
||||
/* No errors hit so far, run the current delalloc range. */
|
||||
ret = btrfs_run_delalloc_range(inode, folio,
|
||||
found_start,
|
||||
found_start + found_len - 1,
|
||||
wbc);
|
||||
if (ret >= 0)
|
||||
last_finished_delalloc_end = found_start + found_len;
|
||||
if (unlikely(ret < 0))
|
||||
btrfs_err_rl(fs_info,
|
||||
"failed to run delalloc range, root=%lld ino=%llu folio=%llu submit_bitmap=%*pbl start=%llu len=%u: %d",
|
||||
btrfs_root_id(inode->root),
|
||||
btrfs_ino(inode),
|
||||
folio_pos(folio),
|
||||
fs_info->sectors_per_page,
|
||||
&bio_ctrl->submit_bitmap,
|
||||
found_start, found_len, ret);
|
||||
} else {
|
||||
/*
|
||||
* We've hit an error during previous delalloc range,
|
||||
|
@ -1247,7 +1271,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
|||
*/
|
||||
unlock_extent(&inode->io_tree, found_start,
|
||||
found_start + found_len - 1, NULL);
|
||||
__unlock_for_delalloc(&inode->vfs_inode, folio,
|
||||
unlock_delalloc_folio(&inode->vfs_inode, folio,
|
||||
found_start,
|
||||
found_start + found_len - 1);
|
||||
}
|
||||
|
@ -1274,8 +1298,22 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
|||
|
||||
delalloc_start = found_start + found_len;
|
||||
}
|
||||
if (ret < 0)
|
||||
/*
|
||||
* It's possible we had some ordered extents created before we hit
|
||||
* an error, cleanup non-async successfully created delalloc ranges.
|
||||
*/
|
||||
if (unlikely(ret < 0)) {
|
||||
unsigned int bitmap_size = min(
|
||||
(last_finished_delalloc_end - page_start) >>
|
||||
fs_info->sectorsize_bits,
|
||||
fs_info->sectors_per_page);
|
||||
|
||||
for_each_set_bit(bit, &bio_ctrl->submit_bitmap, bitmap_size)
|
||||
btrfs_mark_ordered_io_finished(inode, folio,
|
||||
page_start + (bit << fs_info->sectorsize_bits),
|
||||
fs_info->sectorsize, false);
|
||||
return ret;
|
||||
}
|
||||
out:
|
||||
if (last_delalloc_end)
|
||||
delalloc_end = last_delalloc_end;
|
||||
|
@ -1335,7 +1373,7 @@ static int submit_one_sector(struct btrfs_inode *inode,
|
|||
|
||||
em = btrfs_get_extent(inode, NULL, filepos, sectorsize);
|
||||
if (IS_ERR(em))
|
||||
return PTR_ERR_OR_ZERO(em);
|
||||
return PTR_ERR(em);
|
||||
|
||||
extent_offset = filepos - em->start;
|
||||
em_end = extent_map_end(em);
|
||||
|
@ -1391,6 +1429,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
|
|||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
unsigned long range_bitmap = 0;
|
||||
bool submitted_io = false;
|
||||
bool error = false;
|
||||
const u64 folio_start = folio_pos(folio);
|
||||
u64 cur;
|
||||
int bit;
|
||||
|
@ -1433,11 +1472,26 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
|
|||
break;
|
||||
}
|
||||
ret = submit_one_sector(inode, folio, cur, bio_ctrl, i_size);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (unlikely(ret < 0)) {
|
||||
/*
|
||||
* bio_ctrl may contain a bio crossing several folios.
|
||||
* Submit it immediately so that the bio has a chance
|
||||
* to finish normally, other than marked as error.
|
||||
*/
|
||||
submit_one_bio(bio_ctrl);
|
||||
/*
|
||||
* Failed to grab the extent map which should be very rare.
|
||||
* Since there is no bio submitted to finish the ordered
|
||||
* extent, we have to manually finish this sector.
|
||||
*/
|
||||
btrfs_mark_ordered_io_finished(inode, folio, cur,
|
||||
fs_info->sectorsize, false);
|
||||
error = true;
|
||||
continue;
|
||||
}
|
||||
submitted_io = true;
|
||||
}
|
||||
out:
|
||||
|
||||
/*
|
||||
* If we didn't submitted any sector (>= i_size), folio dirty get
|
||||
* cleared but PAGECACHE_TAG_DIRTY is not cleared (only cleared
|
||||
|
@ -1445,8 +1499,11 @@ out:
|
|||
*
|
||||
* Here we set writeback and clear for the range. If the full folio
|
||||
* is no longer dirty then we clear the PAGECACHE_TAG_DIRTY tag.
|
||||
*
|
||||
* If we hit any error, the corresponding sector will still be dirty
|
||||
* thus no need to clear PAGECACHE_TAG_DIRTY.
|
||||
*/
|
||||
if (!submitted_io) {
|
||||
if (!submitted_io && !error) {
|
||||
btrfs_folio_set_writeback(fs_info, folio, start, len);
|
||||
btrfs_folio_clear_writeback(fs_info, folio, start, len);
|
||||
}
|
||||
|
@ -1464,15 +1521,14 @@ out:
|
|||
*/
|
||||
static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl)
|
||||
{
|
||||
struct inode *inode = folio->mapping->host;
|
||||
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
|
||||
const u64 page_start = folio_pos(folio);
|
||||
struct btrfs_inode *inode = BTRFS_I(folio->mapping->host);
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
int ret;
|
||||
size_t pg_offset;
|
||||
loff_t i_size = i_size_read(inode);
|
||||
loff_t i_size = i_size_read(&inode->vfs_inode);
|
||||
unsigned long end_index = i_size >> PAGE_SHIFT;
|
||||
|
||||
trace_extent_writepage(folio, inode, bio_ctrl->wbc);
|
||||
trace_extent_writepage(folio, &inode->vfs_inode, bio_ctrl->wbc);
|
||||
|
||||
WARN_ON(!folio_test_locked(folio));
|
||||
|
||||
|
@ -1496,26 +1552,28 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
|
|||
if (ret < 0)
|
||||
goto done;
|
||||
|
||||
ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl);
|
||||
ret = writepage_delalloc(inode, folio, bio_ctrl);
|
||||
if (ret == 1)
|
||||
return 0;
|
||||
if (ret)
|
||||
goto done;
|
||||
|
||||
ret = extent_writepage_io(BTRFS_I(inode), folio, folio_pos(folio),
|
||||
ret = extent_writepage_io(inode, folio, folio_pos(folio),
|
||||
PAGE_SIZE, bio_ctrl, i_size);
|
||||
if (ret == 1)
|
||||
return 0;
|
||||
if (ret < 0)
|
||||
btrfs_err_rl(fs_info,
|
||||
"failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d",
|
||||
btrfs_root_id(inode->root), btrfs_ino(inode),
|
||||
folio_pos(folio), fs_info->sectors_per_page,
|
||||
&bio_ctrl->submit_bitmap, ret);
|
||||
|
||||
bio_ctrl->wbc->nr_to_write--;
|
||||
|
||||
done:
|
||||
if (ret) {
|
||||
btrfs_mark_ordered_io_finished(BTRFS_I(inode), folio,
|
||||
page_start, PAGE_SIZE, !ret);
|
||||
if (ret < 0)
|
||||
mapping_set_error(folio->mapping, ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Only unlock ranges that are submitted. As there can be some async
|
||||
* submitted ranges inside the folio.
|
||||
|
@ -1525,12 +1583,6 @@ done:
|
|||
return ret;
|
||||
}
|
||||
|
||||
void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
|
||||
{
|
||||
wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock extent buffer status and pages for writeback.
|
||||
*
|
||||
|
@ -1671,11 +1723,10 @@ static void end_bbio_meta_write(struct btrfs_bio *bbio)
|
|||
{
|
||||
struct extent_buffer *eb = bbio->private;
|
||||
struct btrfs_fs_info *fs_info = eb->fs_info;
|
||||
bool uptodate = !bbio->bio.bi_status;
|
||||
struct folio_iter fi;
|
||||
u32 bio_offset = 0;
|
||||
|
||||
if (!uptodate)
|
||||
if (bbio->bio.bi_status != BLK_STS_OK)
|
||||
set_btree_ioerr(eb);
|
||||
|
||||
bio_for_each_folio_all(fi, &bbio->bio) {
|
||||
|
@ -2292,11 +2343,8 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
|
|||
if (ret == 1)
|
||||
goto next_page;
|
||||
|
||||
if (ret) {
|
||||
btrfs_mark_ordered_io_finished(BTRFS_I(inode), folio,
|
||||
cur, cur_len, !ret);
|
||||
if (ret)
|
||||
mapping_set_error(mapping, ret);
|
||||
}
|
||||
btrfs_folio_end_lock(fs_info, folio, cur, cur_len);
|
||||
if (ret < 0)
|
||||
found_error = true;
|
||||
|
@ -2495,11 +2543,6 @@ next:
|
|||
return try_release_extent_state(io_tree, folio);
|
||||
}
|
||||
|
||||
static void __free_extent_buffer(struct extent_buffer *eb)
|
||||
{
|
||||
kmem_cache_free(extent_buffer_cache, eb);
|
||||
}
|
||||
|
||||
static int extent_buffer_under_io(const struct extent_buffer *eb)
|
||||
{
|
||||
return (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
|
||||
|
@ -2580,8 +2623,8 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo
|
|||
spin_unlock(&folio->mapping->i_private_lock);
|
||||
}
|
||||
|
||||
/* Release all pages attached to the extent buffer */
|
||||
static void btrfs_release_extent_buffer_pages(const struct extent_buffer *eb)
|
||||
/* Release all folios attached to the extent buffer */
|
||||
static void btrfs_release_extent_buffer_folios(const struct extent_buffer *eb)
|
||||
{
|
||||
ASSERT(!extent_buffer_under_io(eb));
|
||||
|
||||
|
@ -2603,9 +2646,9 @@ static void btrfs_release_extent_buffer_pages(const struct extent_buffer *eb)
|
|||
*/
|
||||
static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
|
||||
{
|
||||
btrfs_release_extent_buffer_pages(eb);
|
||||
btrfs_release_extent_buffer_folios(eb);
|
||||
btrfs_leak_debug_del_eb(eb);
|
||||
__free_extent_buffer(eb);
|
||||
kmem_cache_free(extent_buffer_cache, eb);
|
||||
}
|
||||
|
||||
static struct extent_buffer *
|
||||
|
@ -2703,7 +2746,7 @@ err:
|
|||
folio_put(eb->folios[i]);
|
||||
}
|
||||
}
|
||||
__free_extent_buffer(eb);
|
||||
kmem_cache_free(extent_buffer_cache, eb);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -2830,13 +2873,12 @@ free_eb:
|
|||
}
|
||||
#endif
|
||||
|
||||
static struct extent_buffer *grab_extent_buffer(
|
||||
struct btrfs_fs_info *fs_info, struct page *page)
|
||||
static struct extent_buffer *grab_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio)
|
||||
{
|
||||
struct folio *folio = page_folio(page);
|
||||
struct extent_buffer *exists;
|
||||
|
||||
lockdep_assert_held(&page->mapping->i_private_lock);
|
||||
lockdep_assert_held(&folio->mapping->i_private_lock);
|
||||
|
||||
/*
|
||||
* For subpage case, we completely rely on radix tree to ensure we
|
||||
|
@ -2851,7 +2893,7 @@ static struct extent_buffer *grab_extent_buffer(
|
|||
return NULL;
|
||||
|
||||
/*
|
||||
* We could have already allocated an eb for this page and attached one
|
||||
* We could have already allocated an eb for this folio and attached one
|
||||
* so lets see if we can get a ref on the existing eb, and if we can we
|
||||
* know it's good and we can just return that one, else we know we can
|
||||
* just overwrite folio private.
|
||||
|
@ -2860,16 +2902,19 @@ static struct extent_buffer *grab_extent_buffer(
|
|||
if (atomic_inc_not_zero(&exists->refs))
|
||||
return exists;
|
||||
|
||||
WARN_ON(PageDirty(page));
|
||||
WARN_ON(folio_test_dirty(folio));
|
||||
folio_detach_private(folio);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
|
||||
/*
|
||||
* Validate alignment constraints of eb at logical address @start.
|
||||
*/
|
||||
static bool check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
|
||||
{
|
||||
if (!IS_ALIGNED(start, fs_info->sectorsize)) {
|
||||
btrfs_err(fs_info, "bad tree block start %llu", start);
|
||||
return -EINVAL;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (fs_info->nodesize < PAGE_SIZE &&
|
||||
|
@ -2877,14 +2922,14 @@ static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
|
|||
btrfs_err(fs_info,
|
||||
"tree block crosses page boundary, start %llu nodesize %u",
|
||||
start, fs_info->nodesize);
|
||||
return -EINVAL;
|
||||
return true;
|
||||
}
|
||||
if (fs_info->nodesize >= PAGE_SIZE &&
|
||||
!PAGE_ALIGNED(start)) {
|
||||
btrfs_err(fs_info,
|
||||
"tree block is not page aligned, start %llu nodesize %u",
|
||||
start, fs_info->nodesize);
|
||||
return -EINVAL;
|
||||
return true;
|
||||
}
|
||||
if (!IS_ALIGNED(start, fs_info->nodesize) &&
|
||||
!test_and_set_bit(BTRFS_FS_UNALIGNED_TREE_BLOCK, &fs_info->flags)) {
|
||||
|
@ -2892,10 +2937,9 @@ static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
|
|||
"tree block not nodesize aligned, start %llu nodesize %u, can be resolved by a full metadata balance",
|
||||
start, fs_info->nodesize);
|
||||
}
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Return 0 if eb->folios[i] is attached to btree inode successfully.
|
||||
* Return >0 if there is already another extent buffer for the range,
|
||||
|
@ -2951,8 +2995,7 @@ finish:
|
|||
} else if (existing_folio) {
|
||||
struct extent_buffer *existing_eb;
|
||||
|
||||
existing_eb = grab_extent_buffer(fs_info,
|
||||
folio_page(existing_folio, 0));
|
||||
existing_eb = grab_extent_buffer(fs_info, existing_folio);
|
||||
if (existing_eb) {
|
||||
/* The extent buffer still exists, we can use it directly. */
|
||||
*found_eb_ret = existing_eb;
|
||||
|
@ -3149,7 +3192,7 @@ again:
|
|||
* live buffer and won't free them prematurely.
|
||||
*/
|
||||
for (int i = 0; i < num_folios; i++)
|
||||
unlock_page(folio_page(eb->folios[i], 0));
|
||||
folio_unlock(eb->folios[i]);
|
||||
return eb;
|
||||
|
||||
out:
|
||||
|
@ -3173,7 +3216,7 @@ out:
|
|||
for (int i = 0; i < attached; i++) {
|
||||
ASSERT(eb->folios[i]);
|
||||
detach_extent_buffer_folio(eb, eb->folios[i]);
|
||||
unlock_page(folio_page(eb->folios[i], 0));
|
||||
folio_unlock(eb->folios[i]);
|
||||
folio_put(eb->folios[i]);
|
||||
eb->folios[i] = NULL;
|
||||
}
|
||||
|
@ -3195,7 +3238,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
|
|||
struct extent_buffer *eb =
|
||||
container_of(head, struct extent_buffer, rcu_head);
|
||||
|
||||
__free_extent_buffer(eb);
|
||||
kmem_cache_free(extent_buffer_cache, eb);
|
||||
}
|
||||
|
||||
static int release_extent_buffer(struct extent_buffer *eb)
|
||||
|
@ -3219,11 +3262,11 @@ static int release_extent_buffer(struct extent_buffer *eb)
|
|||
}
|
||||
|
||||
btrfs_leak_debug_del_eb(eb);
|
||||
/* Should be safe to release our pages at this point */
|
||||
btrfs_release_extent_buffer_pages(eb);
|
||||
/* Should be safe to release folios at this point. */
|
||||
btrfs_release_extent_buffer_folios(eb);
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
|
||||
__free_extent_buffer(eb);
|
||||
kmem_cache_free(extent_buffer_cache, eb);
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
@ -3382,12 +3425,12 @@ void set_extent_buffer_dirty(struct extent_buffer *eb)
|
|||
* the above race.
|
||||
*/
|
||||
if (subpage)
|
||||
lock_page(folio_page(eb->folios[0], 0));
|
||||
folio_lock(eb->folios[0]);
|
||||
for (int i = 0; i < num_folios; i++)
|
||||
btrfs_folio_set_dirty(eb->fs_info, eb->folios[i],
|
||||
eb->start, eb->len);
|
||||
if (subpage)
|
||||
unlock_page(folio_page(eb->folios[0], 0));
|
||||
folio_unlock(eb->folios[0]);
|
||||
percpu_counter_add_batch(&eb->fs_info->dirty_metadata_bytes,
|
||||
eb->len,
|
||||
eb->fs_info->dirty_metadata_batch);
|
||||
|
@ -3497,8 +3540,8 @@ static void end_bbio_meta_read(struct btrfs_bio *bbio)
|
|||
bio_put(&bbio->bio);
|
||||
}
|
||||
|
||||
int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
|
||||
const struct btrfs_tree_parent_check *check)
|
||||
int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
|
||||
const struct btrfs_tree_parent_check *check)
|
||||
{
|
||||
struct btrfs_bio *bbio;
|
||||
bool ret;
|
||||
|
@ -3516,7 +3559,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
|
|||
|
||||
/* Someone else is already reading the buffer, just wait for it. */
|
||||
if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags))
|
||||
goto done;
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Between the initial test_bit(EXTENT_BUFFER_UPTODATE) and the above
|
||||
|
@ -3556,14 +3599,21 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
|
|||
}
|
||||
}
|
||||
btrfs_submit_bbio(bbio, mirror_num);
|
||||
return 0;
|
||||
}
|
||||
|
||||
done:
|
||||
if (wait == WAIT_COMPLETE) {
|
||||
wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING, TASK_UNINTERRUPTIBLE);
|
||||
if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
|
||||
return -EIO;
|
||||
}
|
||||
int read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num,
|
||||
const struct btrfs_tree_parent_check *check)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = read_extent_buffer_pages_nowait(eb, mirror_num, check);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING, TASK_UNINTERRUPTIBLE);
|
||||
if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
|
||||
return -EIO;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -4294,7 +4344,7 @@ void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
|
|||
return;
|
||||
}
|
||||
|
||||
ret = read_extent_buffer_pages(eb, WAIT_NONE, 0, &check);
|
||||
ret = read_extent_buffer_pages_nowait(eb, 0, &check);
|
||||
if (ret < 0)
|
||||
free_extent_buffer_stale(eb);
|
||||
else
|
||||
|
|
|
@ -248,7 +248,6 @@ int btree_write_cache_pages(struct address_space *mapping,
|
|||
struct writeback_control *wbc);
|
||||
void btrfs_readahead(struct readahead_control *rac);
|
||||
int set_folio_extent_mapped(struct folio *folio);
|
||||
int set_page_extent_mapped(struct page *page);
|
||||
void clear_folio_extent_mapped(struct folio *folio);
|
||||
|
||||
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||
|
@ -262,12 +261,17 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
|
|||
u64 start);
|
||||
void free_extent_buffer(struct extent_buffer *eb);
|
||||
void free_extent_buffer_stale(struct extent_buffer *eb);
|
||||
#define WAIT_NONE 0
|
||||
#define WAIT_COMPLETE 1
|
||||
#define WAIT_PAGE_LOCK 2
|
||||
int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
|
||||
int read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num,
|
||||
const struct btrfs_tree_parent_check *parent_check);
|
||||
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
|
||||
int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
|
||||
const struct btrfs_tree_parent_check *parent_check);
|
||||
|
||||
static inline void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
|
||||
{
|
||||
wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
|
||||
u64 bytenr, u64 owner_root, u64 gen, int level);
|
||||
void btrfs_readahead_node_child(struct extent_buffer *node, int slot);
|
||||
|
|
|
@ -190,8 +190,6 @@ int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_file_extent_compression(leaf, item, 0);
|
||||
btrfs_set_file_extent_encryption(leaf, item, 0);
|
||||
btrfs_set_file_extent_other_encoding(leaf, item, 0);
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
@ -1259,7 +1257,6 @@ found:
|
|||
ins_size /= csum_size;
|
||||
total_bytes += ins_size * fs_info->sectorsize;
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
|
||||
if (total_bytes < sums->len) {
|
||||
btrfs_release_path(path);
|
||||
cond_resched();
|
||||
|
|
106
fs/btrfs/file.c
106
fs/btrfs/file.c
|
@ -36,52 +36,7 @@
|
|||
#include "ioctl.h"
|
||||
#include "file.h"
|
||||
#include "super.h"
|
||||
|
||||
/*
|
||||
* Helper to fault in page and copy. This should go away and be replaced with
|
||||
* calls into generic code.
|
||||
*/
|
||||
static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
|
||||
struct folio *folio, struct iov_iter *i)
|
||||
{
|
||||
size_t copied = 0;
|
||||
size_t total_copied = 0;
|
||||
int offset = offset_in_page(pos);
|
||||
|
||||
while (write_bytes > 0) {
|
||||
size_t count = min_t(size_t, PAGE_SIZE - offset, write_bytes);
|
||||
/*
|
||||
* Copy data from userspace to the current page
|
||||
*/
|
||||
copied = copy_folio_from_iter_atomic(folio, offset, count, i);
|
||||
|
||||
/* Flush processor's dcache for this page */
|
||||
flush_dcache_folio(folio);
|
||||
|
||||
/*
|
||||
* if we get a partial write, we can end up with
|
||||
* partially up to date page. These add
|
||||
* a lot of complexity, so make sure they don't
|
||||
* happen by forcing this copy to be retried.
|
||||
*
|
||||
* The rest of the btrfs_file_write code will fall
|
||||
* back to page at a time copies after we return 0.
|
||||
*/
|
||||
if (unlikely(copied < count)) {
|
||||
if (!folio_test_uptodate(folio)) {
|
||||
iov_iter_revert(i, copied);
|
||||
copied = 0;
|
||||
}
|
||||
if (!copied)
|
||||
break;
|
||||
}
|
||||
|
||||
write_bytes -= copied;
|
||||
total_copied += copied;
|
||||
offset += copied;
|
||||
}
|
||||
return total_copied;
|
||||
}
|
||||
#include "print-tree.h"
|
||||
|
||||
/*
|
||||
* Unlock folio after btrfs_file_write() is done with it.
|
||||
|
@ -106,7 +61,7 @@ static void btrfs_drop_folio(struct btrfs_fs_info *fs_info, struct folio *folio,
|
|||
}
|
||||
|
||||
/*
|
||||
* After btrfs_copy_from_user(), update the following things for delalloc:
|
||||
* After copy_folio_from_iter_atomic(), update the following things for delalloc:
|
||||
* - Mark newly dirtied folio as DELALLOC in the io tree.
|
||||
* Used to advise which range is to be written back.
|
||||
* - Mark modified folio as Uptodate/Dirty and not needing COW fixup
|
||||
|
@ -224,7 +179,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
|||
if (args->drop_cache)
|
||||
btrfs_drop_extent_map_range(inode, args->start, args->end - 1, false);
|
||||
|
||||
if (args->start >= inode->disk_i_size && !args->replace_extent)
|
||||
if (data_race(args->start >= inode->disk_i_size) && !args->replace_extent)
|
||||
modify_tree = 0;
|
||||
|
||||
update_refs = (btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID);
|
||||
|
@ -245,7 +200,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
|||
next_slot:
|
||||
leaf = path->nodes[0];
|
||||
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
|
||||
BUG_ON(del_nr > 0);
|
||||
if (WARN_ON(del_nr > 0)) {
|
||||
btrfs_print_leaf(leaf);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
@ -321,7 +280,11 @@ next_slot:
|
|||
* | -------- extent -------- |
|
||||
*/
|
||||
if (args->start > key.offset && args->end < extent_end) {
|
||||
BUG_ON(del_nr > 0);
|
||||
if (WARN_ON(del_nr > 0)) {
|
||||
btrfs_print_leaf(leaf);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
ret = -EOPNOTSUPP;
|
||||
break;
|
||||
|
@ -351,7 +314,6 @@ next_slot:
|
|||
btrfs_set_file_extent_offset(leaf, fi, extent_offset);
|
||||
btrfs_set_file_extent_num_bytes(leaf, fi,
|
||||
extent_end - args->start);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
|
||||
if (update_refs && disk_bytenr > 0) {
|
||||
struct btrfs_ref ref = {
|
||||
|
@ -397,7 +359,6 @@ next_slot:
|
|||
btrfs_set_file_extent_offset(leaf, fi, extent_offset);
|
||||
btrfs_set_file_extent_num_bytes(leaf, fi,
|
||||
extent_end - args->end);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
if (update_refs && disk_bytenr > 0)
|
||||
args->bytes_found += args->end - key.offset;
|
||||
break;
|
||||
|
@ -409,7 +370,11 @@ next_slot:
|
|||
* | -------- extent -------- |
|
||||
*/
|
||||
if (args->start > key.offset && args->end >= extent_end) {
|
||||
BUG_ON(del_nr > 0);
|
||||
if (WARN_ON(del_nr > 0)) {
|
||||
btrfs_print_leaf(leaf);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
ret = -EOPNOTSUPP;
|
||||
break;
|
||||
|
@ -417,7 +382,6 @@ next_slot:
|
|||
|
||||
btrfs_set_file_extent_num_bytes(leaf, fi,
|
||||
args->start - key.offset);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
if (update_refs && disk_bytenr > 0)
|
||||
args->bytes_found += extent_end - args->start;
|
||||
if (args->end == extent_end)
|
||||
|
@ -437,7 +401,11 @@ delete_extent_item:
|
|||
del_slot = path->slots[0];
|
||||
del_nr = 1;
|
||||
} else {
|
||||
BUG_ON(del_slot + del_nr != path->slots[0]);
|
||||
if (WARN_ON(del_slot + del_nr != path->slots[0])) {
|
||||
btrfs_print_leaf(leaf);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
del_nr++;
|
||||
}
|
||||
|
||||
|
@ -668,7 +636,6 @@ again:
|
|||
trans->transid);
|
||||
btrfs_set_file_extent_num_bytes(leaf, fi,
|
||||
end - other_start);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
@ -697,7 +664,6 @@ again:
|
|||
other_end - start);
|
||||
btrfs_set_file_extent_offset(leaf, fi,
|
||||
start - orig_offset);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
@ -731,7 +697,6 @@ again:
|
|||
btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
|
||||
btrfs_set_file_extent_num_bytes(leaf, fi,
|
||||
extent_end - split);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
|
||||
ref.action = BTRFS_ADD_DELAYED_REF;
|
||||
ref.bytenr = bytenr;
|
||||
|
@ -810,7 +775,6 @@ again:
|
|||
btrfs_set_file_extent_type(leaf, fi,
|
||||
BTRFS_FILE_EXTENT_REG);
|
||||
btrfs_set_file_extent_generation(leaf, fi, trans->transid);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
} else {
|
||||
fi = btrfs_item_ptr(leaf, del_slot - 1,
|
||||
struct btrfs_file_extent_item);
|
||||
|
@ -819,7 +783,6 @@ again:
|
|||
btrfs_set_file_extent_generation(leaf, fi, trans->transid);
|
||||
btrfs_set_file_extent_num_bytes(leaf, fi,
|
||||
extent_end - key.offset);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
|
||||
ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
|
||||
if (ret < 0) {
|
||||
|
@ -1052,7 +1015,7 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
|||
&cached_state);
|
||||
}
|
||||
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
|
||||
NULL, nowait, false);
|
||||
NULL, nowait);
|
||||
if (ret <= 0)
|
||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
||||
else
|
||||
|
@ -1252,7 +1215,23 @@ again:
|
|||
break;
|
||||
}
|
||||
|
||||
copied = btrfs_copy_from_user(pos, write_bytes, folio, i);
|
||||
copied = copy_folio_from_iter_atomic(folio,
|
||||
offset_in_folio(folio, pos), write_bytes, i);
|
||||
flush_dcache_folio(folio);
|
||||
|
||||
/*
|
||||
* If we get a partial write, we can end up with partially
|
||||
* uptodate page. Although if sector size < page size we can
|
||||
* handle it, but if it's not sector aligned it can cause
|
||||
* a lot of complexity, so make sure they don't happen by
|
||||
* forcing retry this copy.
|
||||
*/
|
||||
if (unlikely(copied < write_bytes)) {
|
||||
if (!folio_test_uptodate(folio)) {
|
||||
iov_iter_revert(i, copied);
|
||||
copied = 0;
|
||||
}
|
||||
}
|
||||
|
||||
num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
|
||||
dirty_sectors = round_up(copied + sector_offset,
|
||||
|
@ -2029,7 +2008,6 @@ static int fill_holes(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
|
||||
btrfs_set_file_extent_offset(leaf, fi, 0);
|
||||
btrfs_set_file_extent_generation(leaf, fi, trans->transid);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -2046,7 +2024,6 @@ static int fill_holes(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
|
||||
btrfs_set_file_extent_offset(leaf, fi, 0);
|
||||
btrfs_set_file_extent_generation(leaf, fi, trans->transid);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
goto out;
|
||||
}
|
||||
btrfs_release_path(path);
|
||||
|
@ -2194,7 +2171,6 @@ static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_file_extent_num_bytes(leaf, extent, replace_len);
|
||||
if (extent_info->is_new_extent)
|
||||
btrfs_set_file_extent_generation(leaf, extent, trans->transid);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_release_path(path);
|
||||
|
||||
ret = btrfs_inode_set_file_extent_range(inode, extent_info->file_offset,
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include <linux/error-injection.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/string_choices.h>
|
||||
#include "ctree.h"
|
||||
#include "extent-tree.h"
|
||||
#include "fs.h"
|
||||
#include "messages.h"
|
||||
#include "misc.h"
|
||||
|
@ -198,7 +198,6 @@ static int __create_free_space_inode(struct btrfs_root *root,
|
|||
btrfs_set_inode_nlink(leaf, inode_item, 1);
|
||||
btrfs_set_inode_transid(leaf, inode_item, trans->transid);
|
||||
btrfs_set_inode_block_group(leaf, inode_item, offset);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_release_path(path);
|
||||
|
||||
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
|
||||
|
@ -216,7 +215,6 @@ static int __create_free_space_inode(struct btrfs_root *root,
|
|||
struct btrfs_free_space_header);
|
||||
memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header));
|
||||
btrfs_set_free_space_key(leaf, header, &disk_key);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_release_path(path);
|
||||
|
||||
return 0;
|
||||
|
@ -463,7 +461,7 @@ static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ret = set_page_extent_mapped(page);
|
||||
ret = set_folio_extent_mapped(page_folio(page));
|
||||
if (ret < 0) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
|
@ -1189,7 +1187,6 @@ update_cache_item(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_free_space_entries(leaf, header, entries);
|
||||
btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
|
||||
btrfs_set_free_space_generation(leaf, header, trans->transid);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_release_path(path);
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -89,7 +89,6 @@ static int add_new_free_space_info(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_free_space_info);
|
||||
btrfs_set_free_space_extent_count(leaf, info, 0);
|
||||
btrfs_set_free_space_flags(leaf, info, 0);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
|
@ -287,7 +286,6 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
|
|||
flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
|
||||
btrfs_set_free_space_flags(leaf, info, flags);
|
||||
expected_extent_count = btrfs_free_space_extent_count(leaf, info);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_release_path(path);
|
||||
|
||||
if (extent_count != expected_extent_count) {
|
||||
|
@ -324,7 +322,6 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
|
|||
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
||||
write_extent_buffer(leaf, bitmap_cursor, ptr,
|
||||
data_size);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_release_path(path);
|
||||
|
||||
i += extent_size;
|
||||
|
@ -430,7 +427,6 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
|
|||
flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
|
||||
btrfs_set_free_space_flags(leaf, info, flags);
|
||||
expected_extent_count = btrfs_free_space_extent_count(leaf, info);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_release_path(path);
|
||||
|
||||
nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
|
||||
|
@ -495,7 +491,6 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
|
|||
|
||||
extent_count += new_extents;
|
||||
btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
|
||||
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
|
||||
btrfs_release_path(path);
|
||||
|
||||
if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
|
||||
|
@ -1350,6 +1345,12 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
|
|||
btrfs_end_transaction(trans);
|
||||
return ret;
|
||||
}
|
||||
if (btrfs_should_end_transaction(trans)) {
|
||||
btrfs_end_transaction(trans);
|
||||
trans = btrfs_start_transaction(free_space_root, 1);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
}
|
||||
node = rb_next(node);
|
||||
}
|
||||
|
||||
|
|
130
fs/btrfs/fs.c
130
fs/btrfs/fs.c
|
@ -4,6 +4,136 @@
|
|||
#include "ctree.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "volumes.h"
|
||||
|
||||
static const struct btrfs_csums {
|
||||
u16 size;
|
||||
const char name[10];
|
||||
const char driver[12];
|
||||
} btrfs_csums[] = {
|
||||
[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
|
||||
[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
|
||||
[BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
|
||||
[BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
|
||||
.driver = "blake2b-256" },
|
||||
};
|
||||
|
||||
/* This exists for btrfs-progs usages. */
|
||||
u16 btrfs_csum_type_size(u16 type)
|
||||
{
|
||||
return btrfs_csums[type].size;
|
||||
}
|
||||
|
||||
int btrfs_super_csum_size(const struct btrfs_super_block *s)
|
||||
{
|
||||
u16 t = btrfs_super_csum_type(s);
|
||||
|
||||
/* csum type is validated at mount time. */
|
||||
return btrfs_csum_type_size(t);
|
||||
}
|
||||
|
||||
const char *btrfs_super_csum_name(u16 csum_type)
|
||||
{
|
||||
/* csum type is validated at mount time. */
|
||||
return btrfs_csums[csum_type].name;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return driver name if defined, otherwise the name that's also a valid driver
|
||||
* name.
|
||||
*/
|
||||
const char *btrfs_super_csum_driver(u16 csum_type)
|
||||
{
|
||||
/* csum type is validated at mount time */
|
||||
return btrfs_csums[csum_type].driver[0] ?
|
||||
btrfs_csums[csum_type].driver :
|
||||
btrfs_csums[csum_type].name;
|
||||
}
|
||||
|
||||
size_t __attribute_const__ btrfs_get_num_csums(void)
|
||||
{
|
||||
return ARRAY_SIZE(btrfs_csums);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start exclusive operation @type, return true on success.
|
||||
*/
|
||||
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation type)
|
||||
{
|
||||
bool ret = false;
|
||||
|
||||
spin_lock(&fs_info->super_lock);
|
||||
if (fs_info->exclusive_operation == BTRFS_EXCLOP_NONE) {
|
||||
fs_info->exclusive_operation = type;
|
||||
ret = true;
|
||||
}
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Conditionally allow to enter the exclusive operation in case it's compatible
|
||||
* with the running one. This must be paired with btrfs_exclop_start_unlock()
|
||||
* and btrfs_exclop_finish().
|
||||
*
|
||||
* Compatibility:
|
||||
* - the same type is already running
|
||||
* - when trying to add a device and balance has been paused
|
||||
* - not BTRFS_EXCLOP_NONE - this is intentionally incompatible and the caller
|
||||
* must check the condition first that would allow none -> @type
|
||||
*/
|
||||
bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation type)
|
||||
{
|
||||
spin_lock(&fs_info->super_lock);
|
||||
if (fs_info->exclusive_operation == type ||
|
||||
(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED &&
|
||||
type == BTRFS_EXCLOP_DEV_ADD))
|
||||
return true;
|
||||
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
}
|
||||
|
||||
void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
spin_lock(&fs_info->super_lock);
|
||||
WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE);
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation");
|
||||
}
|
||||
|
||||
void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation op)
|
||||
{
|
||||
switch (op) {
|
||||
case BTRFS_EXCLOP_BALANCE_PAUSED:
|
||||
spin_lock(&fs_info->super_lock);
|
||||
ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
|
||||
fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD ||
|
||||
fs_info->exclusive_operation == BTRFS_EXCLOP_NONE ||
|
||||
fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
|
||||
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
break;
|
||||
case BTRFS_EXCLOP_BALANCE:
|
||||
spin_lock(&fs_info->super_lock);
|
||||
ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
|
||||
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
break;
|
||||
default:
|
||||
btrfs_warn(fs_info,
|
||||
"invalid exclop balance operation %d requested", op);
|
||||
}
|
||||
}
|
||||
|
||||
void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
|
||||
const char *name)
|
||||
|
|
|
@ -14,10 +14,10 @@
|
|||
#include <linux/lockdep.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/rwlock_types.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/semaphore.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/wait.h>
|
||||
|
@ -627,6 +627,9 @@ struct btrfs_fs_info {
|
|||
struct kobject *qgroups_kobj;
|
||||
struct kobject *discard_kobj;
|
||||
|
||||
/* Track the number of blocks (sectors) read by the filesystem. */
|
||||
struct percpu_counter stats_read_blocks;
|
||||
|
||||
/* Used to keep from writing metadata until there is a nice batch */
|
||||
struct percpu_counter dirty_metadata_bytes;
|
||||
struct percpu_counter delalloc_bytes;
|
||||
|
@ -887,6 +890,11 @@ struct btrfs_fs_info {
|
|||
#define inode_to_fs_info(_inode) (BTRFS_I(_Generic((_inode), \
|
||||
struct inode *: (_inode)))->root->fs_info)
|
||||
|
||||
static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
|
||||
{
|
||||
return mapping_gfp_constraint(mapping, ~__GFP_FS);
|
||||
}
|
||||
|
||||
static inline u64 btrfs_get_fs_generation(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return READ_ONCE(fs_info->generation);
|
||||
|
@ -953,6 +961,8 @@ static inline u64 btrfs_calc_metadata_size(const struct btrfs_fs_info *fs_info,
|
|||
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
|
||||
sizeof(struct btrfs_item))
|
||||
|
||||
#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) ((bytes) >> (fs_info)->sectorsize_bits)
|
||||
|
||||
static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && fs_info->zone_size > 0;
|
||||
|
@ -982,6 +992,17 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
|
|||
|
||||
int btrfs_check_ioctl_vol_args_path(const struct btrfs_ioctl_vol_args *vol_args);
|
||||
|
||||
u16 btrfs_csum_type_size(u16 type);
|
||||
int btrfs_super_csum_size(const struct btrfs_super_block *s);
|
||||
const char *btrfs_super_csum_name(u16 csum_type);
|
||||
const char *btrfs_super_csum_driver(u16 csum_type);
|
||||
size_t __attribute_const__ btrfs_get_num_csums(void);
|
||||
|
||||
static inline bool btrfs_is_empty_uuid(const u8 *uuid)
|
||||
{
|
||||
return uuid_is_null((const uuid_t *)uuid);
|
||||
}
|
||||
|
||||
/* Compatibility and incompatibility defines */
|
||||
void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
|
||||
const char *name);
|
||||
|
@ -1058,6 +1079,14 @@ static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
|
|||
(unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \
|
||||
&(fs_info)->fs_state)))
|
||||
|
||||
/*
|
||||
* We use folio flag owner_2 to indicate there is an ordered extent with
|
||||
* unfinished IO.
|
||||
*/
|
||||
#define folio_test_ordered(folio) folio_test_owner_2(folio)
|
||||
#define folio_set_ordered(folio) folio_set_owner_2(folio)
|
||||
#define folio_clear_ordered(folio) folio_clear_owner_2(folio)
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
|
||||
#define EXPORT_FOR_TESTS
|
||||
|
|
|
@ -298,8 +298,6 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
|
|||
|
||||
ptr = (unsigned long)&extref->name;
|
||||
write_extent_buffer(path->nodes[0], name->name, ptr, name->len);
|
||||
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
@ -363,8 +361,6 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
|
|||
ptr = (unsigned long)(ref + 1);
|
||||
}
|
||||
write_extent_buffer(path->nodes[0], name->name, ptr, name->len);
|
||||
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
|
||||
|
@ -590,7 +586,6 @@ search_again:
|
|||
num_dec = (orig_num_bytes - extent_num_bytes);
|
||||
if (extent_start != 0)
|
||||
control->sub_bytes += num_dec;
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
} else {
|
||||
extent_num_bytes =
|
||||
btrfs_file_extent_disk_num_bytes(leaf, fi);
|
||||
|
|
325
fs/btrfs/inode.c
325
fs/btrfs/inode.c
|
@ -393,34 +393,13 @@ void btrfs_inode_unlock(struct btrfs_inode *inode, unsigned int ilock_flags)
|
|||
* extent (btrfs_finish_ordered_io()).
|
||||
*/
|
||||
static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
|
||||
struct folio *locked_folio,
|
||||
u64 offset, u64 bytes)
|
||||
{
|
||||
unsigned long index = offset >> PAGE_SHIFT;
|
||||
unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
|
||||
u64 page_start = 0, page_end = 0;
|
||||
struct folio *folio;
|
||||
|
||||
if (locked_folio) {
|
||||
page_start = folio_pos(locked_folio);
|
||||
page_end = page_start + folio_size(locked_folio) - 1;
|
||||
}
|
||||
|
||||
while (index <= end_index) {
|
||||
/*
|
||||
* For locked page, we will call btrfs_mark_ordered_io_finished
|
||||
* through btrfs_mark_ordered_io_finished() on it
|
||||
* in run_delalloc_range() for the error handling, which will
|
||||
* clear page Ordered and run the ordered extent accounting.
|
||||
*
|
||||
* Here we can't just clear the Ordered bit, or
|
||||
* btrfs_mark_ordered_io_finished() would skip the accounting
|
||||
* for the page range, and the ordered extent will never finish.
|
||||
*/
|
||||
if (locked_folio && index == (page_start >> PAGE_SHIFT)) {
|
||||
index++;
|
||||
continue;
|
||||
}
|
||||
folio = filemap_get_folio(inode->vfs_inode.i_mapping, index);
|
||||
index++;
|
||||
if (IS_ERR(folio))
|
||||
|
@ -436,23 +415,6 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
|
|||
folio_put(folio);
|
||||
}
|
||||
|
||||
if (locked_folio) {
|
||||
/* The locked page covers the full range, nothing needs to be done */
|
||||
if (bytes + offset <= page_start + folio_size(locked_folio))
|
||||
return;
|
||||
/*
|
||||
* In case this page belongs to the delalloc range being
|
||||
* instantiated then skip it, since the first page of a range is
|
||||
* going to be properly cleaned up by the caller of
|
||||
* run_delalloc_range
|
||||
*/
|
||||
if (page_start >= offset && page_end <= (offset + bytes - 1)) {
|
||||
bytes = offset + bytes - folio_pos(locked_folio) -
|
||||
folio_size(locked_folio);
|
||||
offset = folio_pos(locked_folio) + folio_size(locked_folio);
|
||||
}
|
||||
}
|
||||
|
||||
return btrfs_mark_ordered_io_finished(inode, NULL, offset, bytes, false);
|
||||
}
|
||||
|
||||
|
@ -564,7 +526,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
|
|||
kunmap_local(kaddr);
|
||||
folio_put(folio);
|
||||
}
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_release_path(path);
|
||||
|
||||
/*
|
||||
|
@ -1129,19 +1090,14 @@ static void submit_uncompressed_range(struct btrfs_inode *inode,
|
|||
&wbc, false);
|
||||
wbc_detach_inode(&wbc);
|
||||
if (ret < 0) {
|
||||
btrfs_cleanup_ordered_extents(inode, locked_folio,
|
||||
start, end - start + 1);
|
||||
if (locked_folio) {
|
||||
const u64 page_start = folio_pos(locked_folio);
|
||||
|
||||
folio_start_writeback(locked_folio);
|
||||
folio_end_writeback(locked_folio);
|
||||
btrfs_mark_ordered_io_finished(inode, locked_folio,
|
||||
page_start, PAGE_SIZE,
|
||||
!ret);
|
||||
mapping_set_error(locked_folio->mapping, ret);
|
||||
folio_unlock(locked_folio);
|
||||
}
|
||||
btrfs_cleanup_ordered_extents(inode, start, end - start + 1);
|
||||
if (locked_folio)
|
||||
btrfs_folio_end_lock(inode->root->fs_info, locked_folio,
|
||||
start, async_extent->ram_size);
|
||||
btrfs_err_rl(inode->root->fs_info,
|
||||
"%s failed, root=%llu inode=%llu start=%llu len=%llu: %d",
|
||||
__func__, btrfs_root_id(inode->root),
|
||||
btrfs_ino(inode), start, async_extent->ram_size, ret);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1372,6 +1328,17 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
|||
|
||||
alloc_hint = btrfs_get_extent_allocation_hint(inode, start, num_bytes);
|
||||
|
||||
/*
|
||||
* We're not doing compressed IO, don't unlock the first page (which
|
||||
* the caller expects to stay locked), don't clear any dirty bits and
|
||||
* don't set any writeback bits.
|
||||
*
|
||||
* Do set the Ordered (Private2) bit so we know this page was properly
|
||||
* setup for writepage.
|
||||
*/
|
||||
page_ops = (keep_locked ? 0 : PAGE_UNLOCK);
|
||||
page_ops |= PAGE_SET_ORDERED;
|
||||
|
||||
/*
|
||||
* Relocation relies on the relocated extents to have exactly the same
|
||||
* size as the original extents. Normally writeback for relocation data
|
||||
|
@ -1431,6 +1398,10 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
|||
file_extent.offset = 0;
|
||||
file_extent.compression = BTRFS_COMPRESS_NONE;
|
||||
|
||||
/*
|
||||
* Locked range will be released either during error clean up or
|
||||
* after the whole range is finished.
|
||||
*/
|
||||
lock_extent(&inode->io_tree, start, start + cur_alloc_size - 1,
|
||||
&cached);
|
||||
|
||||
|
@ -1476,21 +1447,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
|||
|
||||
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
|
||||
|
||||
/*
|
||||
* We're not doing compressed IO, don't unlock the first page
|
||||
* (which the caller expects to stay locked), don't clear any
|
||||
* dirty bits and don't set any writeback bits
|
||||
*
|
||||
* Do set the Ordered flag so we know this page was
|
||||
* properly setup for writepage.
|
||||
*/
|
||||
page_ops = (keep_locked ? 0 : PAGE_UNLOCK);
|
||||
page_ops |= PAGE_SET_ORDERED;
|
||||
|
||||
extent_clear_unlock_delalloc(inode, start, start + cur_alloc_size - 1,
|
||||
locked_folio, &cached,
|
||||
EXTENT_LOCKED | EXTENT_DELALLOC,
|
||||
page_ops);
|
||||
if (num_bytes < cur_alloc_size)
|
||||
num_bytes = 0;
|
||||
else
|
||||
|
@ -1507,6 +1463,8 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
|||
if (ret)
|
||||
goto out_unlock;
|
||||
}
|
||||
extent_clear_unlock_delalloc(inode, orig_start, end, locked_folio, &cached,
|
||||
EXTENT_LOCKED | EXTENT_DELALLOC, page_ops);
|
||||
done:
|
||||
if (done_offset)
|
||||
*done_offset = end;
|
||||
|
@ -1527,35 +1485,30 @@ out_unlock:
|
|||
* We process each region below.
|
||||
*/
|
||||
|
||||
clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
|
||||
EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
|
||||
page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK;
|
||||
|
||||
/*
|
||||
* For the range (1). We have already instantiated the ordered extents
|
||||
* for this region. They are cleaned up by
|
||||
* btrfs_cleanup_ordered_extents() in e.g,
|
||||
* btrfs_run_delalloc_range(). EXTENT_LOCKED | EXTENT_DELALLOC are
|
||||
* already cleared in the above loop. And, EXTENT_DELALLOC_NEW |
|
||||
* EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV are handled by the cleanup
|
||||
* function.
|
||||
* btrfs_run_delalloc_range().
|
||||
* EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV
|
||||
* are also handled by the cleanup function.
|
||||
*
|
||||
* However, in case of @keep_locked, we still need to unlock the pages
|
||||
* (except @locked_folio) to ensure all the pages are unlocked.
|
||||
* So here we only clear EXTENT_LOCKED and EXTENT_DELALLOC flag, and
|
||||
* finish the writeback of the involved folios, which will be never submitted.
|
||||
*/
|
||||
if (keep_locked && orig_start < start) {
|
||||
if (orig_start < start) {
|
||||
clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC;
|
||||
page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK;
|
||||
|
||||
if (!locked_folio)
|
||||
mapping_set_error(inode->vfs_inode.i_mapping, ret);
|
||||
extent_clear_unlock_delalloc(inode, orig_start, start - 1,
|
||||
locked_folio, NULL, 0, page_ops);
|
||||
locked_folio, NULL, clear_bits, page_ops);
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point we're unlocked, we want to make sure we're only
|
||||
* clearing these flags under the extent lock, so lock the rest of the
|
||||
* range and clear everything up.
|
||||
*/
|
||||
lock_extent(&inode->io_tree, start, end, NULL);
|
||||
clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
|
||||
EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
|
||||
page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK;
|
||||
|
||||
/*
|
||||
* For the range (2). If we reserved an extent for our delalloc range
|
||||
|
@ -1589,6 +1542,10 @@ out_unlock:
|
|||
btrfs_qgroup_free_data(inode, NULL, start + cur_alloc_size,
|
||||
end - start - cur_alloc_size + 1, NULL);
|
||||
}
|
||||
btrfs_err_rl(fs_info,
|
||||
"%s failed, root=%llu inode=%llu start=%llu len=%llu: %d",
|
||||
__func__, btrfs_root_id(inode->root),
|
||||
btrfs_ino(inode), orig_start, end + 1 - orig_start, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1809,7 +1766,7 @@ static int fallback_to_cow(struct btrfs_inode *inode,
|
|||
bytes = range_bytes;
|
||||
|
||||
spin_lock(&sinfo->lock);
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
|
||||
btrfs_space_info_update_bytes_may_use(sinfo, bytes);
|
||||
spin_unlock(&sinfo->lock);
|
||||
|
||||
if (count > 0)
|
||||
|
@ -1837,7 +1794,6 @@ struct can_nocow_file_extent_args {
|
|||
/* End file offset (inclusive) of the range we want to NOCOW. */
|
||||
u64 end;
|
||||
bool writeback_path;
|
||||
bool strict;
|
||||
/*
|
||||
* Free the path passed to can_nocow_file_extent() once it's not needed
|
||||
* anymore.
|
||||
|
@ -1892,8 +1848,7 @@ static int can_nocow_file_extent(struct btrfs_path *path,
|
|||
* for its subvolume was created, then this implies the extent is shared,
|
||||
* hence we must COW.
|
||||
*/
|
||||
if (!args->strict &&
|
||||
btrfs_file_extent_generation(leaf, fi) <=
|
||||
if (btrfs_file_extent_generation(leaf, fi) <=
|
||||
btrfs_root_last_snapshot(&root->root_item))
|
||||
goto out;
|
||||
|
||||
|
@ -1922,9 +1877,8 @@ static int can_nocow_file_extent(struct btrfs_path *path,
|
|||
*/
|
||||
btrfs_release_path(path);
|
||||
|
||||
ret = btrfs_cross_ref_exist(root, btrfs_ino(inode),
|
||||
key->offset - args->file_extent.offset,
|
||||
args->file_extent.disk_bytenr, args->strict, path);
|
||||
ret = btrfs_cross_ref_exist(inode, key->offset - args->file_extent.offset,
|
||||
args->file_extent.disk_bytenr, path);
|
||||
WARN_ON_ONCE(ret > 0 && is_freespace_inode);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
|
@ -1970,6 +1924,53 @@ static int can_nocow_file_extent(struct btrfs_path *path,
|
|||
return ret < 0 ? ret : can_nocow;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cleanup the dirty folios which will never be submitted due to error.
|
||||
*
|
||||
* When running a delalloc range, we may need to split the ranges (due to
|
||||
* fragmentation or NOCOW). If we hit an error in the later part, we will error
|
||||
* out and previously successfully executed range will never be submitted, thus
|
||||
* we have to cleanup those folios by clearing their dirty flag, starting and
|
||||
* finishing the writeback.
|
||||
*/
|
||||
static void cleanup_dirty_folios(struct btrfs_inode *inode,
|
||||
struct folio *locked_folio,
|
||||
u64 start, u64 end, int error)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct address_space *mapping = inode->vfs_inode.i_mapping;
|
||||
pgoff_t start_index = start >> PAGE_SHIFT;
|
||||
pgoff_t end_index = end >> PAGE_SHIFT;
|
||||
u32 len;
|
||||
|
||||
ASSERT(end + 1 - start < U32_MAX);
|
||||
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
|
||||
IS_ALIGNED(end + 1, fs_info->sectorsize));
|
||||
len = end + 1 - start;
|
||||
|
||||
/*
|
||||
* Handle the locked folio first.
|
||||
* The btrfs_folio_clamp_*() helpers can handle range out of the folio case.
|
||||
*/
|
||||
btrfs_folio_clamp_finish_io(fs_info, locked_folio, start, len);
|
||||
|
||||
for (pgoff_t index = start_index; index <= end_index; index++) {
|
||||
struct folio *folio;
|
||||
|
||||
/* Already handled at the beginning. */
|
||||
if (index == locked_folio->index)
|
||||
continue;
|
||||
folio = __filemap_get_folio(mapping, index, FGP_LOCK, GFP_NOFS);
|
||||
/* Cache already dropped, no need to do any cleanup. */
|
||||
if (IS_ERR(folio))
|
||||
continue;
|
||||
btrfs_folio_clamp_finish_io(fs_info, locked_folio, start, len);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
mapping_set_error(mapping, error);
|
||||
}
|
||||
|
||||
/*
|
||||
* when nowcow writeback call back. This checks for snapshots or COW copies
|
||||
* of the extents that exist in the file, and COWs the file as required.
|
||||
|
@ -1985,6 +1986,11 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
|
|||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_path *path;
|
||||
u64 cow_start = (u64)-1;
|
||||
/*
|
||||
* If not 0, represents the inclusive end of the last fallback_to_cow()
|
||||
* range. Only for error handling.
|
||||
*/
|
||||
u64 cow_end = 0;
|
||||
u64 cur_offset = start;
|
||||
int ret;
|
||||
bool check_prev = true;
|
||||
|
@ -2145,6 +2151,7 @@ must_cow:
|
|||
found_key.offset - 1);
|
||||
cow_start = (u64)-1;
|
||||
if (ret) {
|
||||
cow_end = found_key.offset - 1;
|
||||
btrfs_dec_nocow_writers(nocow_bg);
|
||||
goto error;
|
||||
}
|
||||
|
@ -2218,11 +2225,12 @@ must_cow:
|
|||
cow_start = cur_offset;
|
||||
|
||||
if (cow_start != (u64)-1) {
|
||||
cur_offset = end;
|
||||
ret = fallback_to_cow(inode, locked_folio, cow_start, end);
|
||||
cow_start = (u64)-1;
|
||||
if (ret)
|
||||
if (ret) {
|
||||
cow_end = end;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
btrfs_free_path(path);
|
||||
|
@ -2230,12 +2238,41 @@ must_cow:
|
|||
|
||||
error:
|
||||
/*
|
||||
* If an error happened while a COW region is outstanding, cur_offset
|
||||
* needs to be reset to cow_start to ensure the COW region is unlocked
|
||||
* as well.
|
||||
* There are several error cases:
|
||||
*
|
||||
* 1) Failed without falling back to COW
|
||||
* start cur_offset end
|
||||
* |/////////////| |
|
||||
*
|
||||
* For range [start, cur_offset) the folios are already unlocked (except
|
||||
* @locked_folio), EXTENT_DELALLOC already removed.
|
||||
* Only need to clear the dirty flag as they will never be submitted.
|
||||
* Ordered extent and extent maps are handled by
|
||||
* btrfs_mark_ordered_io_finished() inside run_delalloc_range().
|
||||
*
|
||||
* 2) Failed with error from fallback_to_cow()
|
||||
* start cur_offset cow_end end
|
||||
* |/////////////|-----------| |
|
||||
*
|
||||
* For range [start, cur_offset) it's the same as case 1).
|
||||
* But for range [cur_offset, cow_end), the folios have dirty flag
|
||||
* cleared and unlocked, EXTENT_DEALLLOC cleared by cow_file_range().
|
||||
*
|
||||
* Thus we should not call extent_clear_unlock_delalloc() on range
|
||||
* [cur_offset, cow_end), as the folios are already unlocked.
|
||||
*
|
||||
* So clear the folio dirty flags for [start, cur_offset) first.
|
||||
*/
|
||||
if (cow_start != (u64)-1)
|
||||
cur_offset = cow_start;
|
||||
if (cur_offset > start)
|
||||
cleanup_dirty_folios(inode, locked_folio, start, cur_offset - 1, ret);
|
||||
|
||||
/*
|
||||
* If an error happened while a COW region is outstanding, cur_offset
|
||||
* needs to be reset to @cow_end + 1 to skip the COW range, as
|
||||
* cow_file_range() will do the proper cleanup at error.
|
||||
*/
|
||||
if (cow_end)
|
||||
cur_offset = cow_end + 1;
|
||||
|
||||
/*
|
||||
* We need to lock the extent here because we're clearing DELALLOC and
|
||||
|
@ -2255,6 +2292,10 @@ error:
|
|||
btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL);
|
||||
}
|
||||
btrfs_free_path(path);
|
||||
btrfs_err_rl(fs_info,
|
||||
"%s failed, root=%llu inode=%llu start=%llu len=%llu: %d",
|
||||
__func__, btrfs_root_id(inode->root),
|
||||
btrfs_ino(inode), start, end + 1 - start, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -2305,8 +2346,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol
|
|||
|
||||
out:
|
||||
if (ret < 0)
|
||||
btrfs_cleanup_ordered_extents(inode, locked_folio, start,
|
||||
end - start + 1);
|
||||
btrfs_cleanup_ordered_extents(inode, start, end - start + 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -2921,7 +2961,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
|
|||
btrfs_item_ptr_offset(leaf, path->slots[0]),
|
||||
sizeof(struct btrfs_file_extent_item));
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_release_path(path);
|
||||
|
||||
/*
|
||||
|
@ -4085,7 +4124,6 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_inode_item);
|
||||
|
||||
fill_inode_item(trans, leaf, inode_item, &inode->vfs_inode);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_set_inode_last_trans(trans, inode);
|
||||
ret = 0;
|
||||
failed:
|
||||
|
@ -6380,7 +6418,6 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
}
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
|
||||
/*
|
||||
* We don't need the path anymore, plus inheriting properties, adding
|
||||
* ACLs, security xattrs, orphan item or adding the link, will result in
|
||||
|
@ -7011,8 +7048,6 @@ static bool btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
|
|||
* @orig_start: (optional) Return the original file offset of the file extent
|
||||
* @orig_len: (optional) Return the original on-disk length of the file extent
|
||||
* @ram_bytes: (optional) Return the ram_bytes of the file extent
|
||||
* @strict: if true, omit optimizations that might force us into unnecessary
|
||||
* cow. e.g., don't trust generation number.
|
||||
*
|
||||
* Return:
|
||||
* >0 and update @len if we can do nocow write
|
||||
|
@ -7024,7 +7059,7 @@ static bool btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
|
|||
*/
|
||||
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
|
||||
struct btrfs_file_extent *file_extent,
|
||||
bool nowait, bool strict)
|
||||
bool nowait)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
|
||||
struct can_nocow_file_extent_args nocow_args = { 0 };
|
||||
|
@ -7077,7 +7112,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
|
|||
|
||||
nocow_args.start = offset;
|
||||
nocow_args.end = offset + *len - 1;
|
||||
nocow_args.strict = strict;
|
||||
nocow_args.free_path = true;
|
||||
|
||||
ret = can_nocow_file_extent(path, &key, BTRFS_I(inode), &nocow_args);
|
||||
|
@ -8027,31 +8061,45 @@ static int btrfs_rename_exchange(struct inode *old_dir,
|
|||
/* src is a subvolume */
|
||||
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
|
||||
ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
} else { /* src is an inode */
|
||||
ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
|
||||
BTRFS_I(old_dentry->d_inode),
|
||||
old_name, &old_rename_ctx);
|
||||
if (!ret)
|
||||
ret = btrfs_update_inode(trans, BTRFS_I(old_inode));
|
||||
}
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
ret = btrfs_update_inode(trans, BTRFS_I(old_inode));
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
}
|
||||
|
||||
/* dest is a subvolume */
|
||||
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
|
||||
ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
} else { /* dest is an inode */
|
||||
ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir),
|
||||
BTRFS_I(new_dentry->d_inode),
|
||||
new_name, &new_rename_ctx);
|
||||
if (!ret)
|
||||
ret = btrfs_update_inode(trans, BTRFS_I(new_inode));
|
||||
}
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
ret = btrfs_update_inode(trans, BTRFS_I(new_inode));
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
}
|
||||
|
||||
ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
|
||||
|
@ -8287,16 +8335,23 @@ static int btrfs_rename(struct mnt_idmap *idmap,
|
|||
|
||||
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
|
||||
ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
} else {
|
||||
ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
|
||||
BTRFS_I(d_inode(old_dentry)),
|
||||
&old_fname.disk_name, &rename_ctx);
|
||||
if (!ret)
|
||||
ret = btrfs_update_inode(trans, BTRFS_I(old_inode));
|
||||
}
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
ret = btrfs_update_inode(trans, BTRFS_I(old_inode));
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
}
|
||||
|
||||
if (new_inode) {
|
||||
|
@ -8304,18 +8359,27 @@ static int btrfs_rename(struct mnt_idmap *idmap,
|
|||
if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
|
||||
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
|
||||
ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
BUG_ON(new_inode->i_nlink == 0);
|
||||
} else {
|
||||
ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir),
|
||||
BTRFS_I(d_inode(new_dentry)),
|
||||
&new_fname.disk_name);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
}
|
||||
if (!ret && new_inode->i_nlink == 0)
|
||||
if (new_inode->i_nlink == 0) {
|
||||
ret = btrfs_orphan_add(trans,
|
||||
BTRFS_I(d_inode(new_dentry)));
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8655,7 +8719,6 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
|
|||
|
||||
ptr = btrfs_file_extent_inline_start(ei);
|
||||
write_extent_buffer(leaf, symname, ptr, name_len);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_free_path(path);
|
||||
|
||||
d_instantiate_new(dentry, inode);
|
||||
|
|
222
fs/btrfs/ioctl.c
222
fs/btrfs/ioctl.c
|
@ -403,86 +403,6 @@ update_flags:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Start exclusive operation @type, return true on success
|
||||
*/
|
||||
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation type)
|
||||
{
|
||||
bool ret = false;
|
||||
|
||||
spin_lock(&fs_info->super_lock);
|
||||
if (fs_info->exclusive_operation == BTRFS_EXCLOP_NONE) {
|
||||
fs_info->exclusive_operation = type;
|
||||
ret = true;
|
||||
}
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Conditionally allow to enter the exclusive operation in case it's compatible
|
||||
* with the running one. This must be paired with btrfs_exclop_start_unlock and
|
||||
* btrfs_exclop_finish.
|
||||
*
|
||||
* Compatibility:
|
||||
* - the same type is already running
|
||||
* - when trying to add a device and balance has been paused
|
||||
* - not BTRFS_EXCLOP_NONE - this is intentionally incompatible and the caller
|
||||
* must check the condition first that would allow none -> @type
|
||||
*/
|
||||
bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation type)
|
||||
{
|
||||
spin_lock(&fs_info->super_lock);
|
||||
if (fs_info->exclusive_operation == type ||
|
||||
(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED &&
|
||||
type == BTRFS_EXCLOP_DEV_ADD))
|
||||
return true;
|
||||
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
}
|
||||
|
||||
void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
spin_lock(&fs_info->super_lock);
|
||||
WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE);
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation");
|
||||
}
|
||||
|
||||
void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation op)
|
||||
{
|
||||
switch (op) {
|
||||
case BTRFS_EXCLOP_BALANCE_PAUSED:
|
||||
spin_lock(&fs_info->super_lock);
|
||||
ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
|
||||
fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD ||
|
||||
fs_info->exclusive_operation == BTRFS_EXCLOP_NONE ||
|
||||
fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
|
||||
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
break;
|
||||
case BTRFS_EXCLOP_BALANCE:
|
||||
spin_lock(&fs_info->super_lock);
|
||||
ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
|
||||
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
break;
|
||||
default:
|
||||
btrfs_warn(fs_info,
|
||||
"invalid exclop balance operation %d requested", op);
|
||||
}
|
||||
}
|
||||
|
||||
static int btrfs_ioctl_getversion(struct inode *inode, int __user *arg)
|
||||
{
|
||||
return put_user(inode->i_generation, arg);
|
||||
|
@ -551,17 +471,6 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
|
|||
return ret;
|
||||
}
|
||||
|
||||
int __pure btrfs_is_empty_uuid(const u8 *uuid)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_UUID_SIZE; i++) {
|
||||
if (uuid[i])
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the number of transaction items to reserve for creating a subvolume
|
||||
* or snapshot, not including the inode, directory entries, or parent directory.
|
||||
|
@ -3007,7 +2916,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
|
|||
|
||||
btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
|
||||
btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
|
||||
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
|
||||
btrfs_release_path(path);
|
||||
|
||||
btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL);
|
||||
|
@ -5028,6 +4936,128 @@ out_acct:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
{
|
||||
loff_t pos;
|
||||
struct kiocb kiocb;
|
||||
struct file *file;
|
||||
ssize_t ret;
|
||||
void __user *sqe_addr;
|
||||
struct btrfs_uring_encoded_data *data = io_uring_cmd_get_async_data(cmd)->op_data;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
file = cmd->file;
|
||||
sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
|
||||
|
||||
if (!(file->f_mode & FMODE_WRITE)) {
|
||||
ret = -EBADF;
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
if (!data) {
|
||||
data = kzalloc(sizeof(*data), GFP_NOFS);
|
||||
if (!data) {
|
||||
ret = -ENOMEM;
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
io_uring_cmd_get_async_data(cmd)->op_data = data;
|
||||
|
||||
if (issue_flags & IO_URING_F_COMPAT) {
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
struct btrfs_ioctl_encoded_io_args_32 args32;
|
||||
|
||||
if (copy_from_user(&args32, sqe_addr, sizeof(args32))) {
|
||||
ret = -EFAULT;
|
||||
goto out_acct;
|
||||
}
|
||||
data->args.iov = compat_ptr(args32.iov);
|
||||
data->args.iovcnt = args32.iovcnt;
|
||||
data->args.offset = args32.offset;
|
||||
data->args.flags = args32.flags;
|
||||
data->args.len = args32.len;
|
||||
data->args.unencoded_len = args32.unencoded_len;
|
||||
data->args.unencoded_offset = args32.unencoded_offset;
|
||||
data->args.compression = args32.compression;
|
||||
data->args.encryption = args32.encryption;
|
||||
memcpy(data->args.reserved, args32.reserved,
|
||||
sizeof(data->args.reserved));
|
||||
#else
|
||||
ret = -ENOTTY;
|
||||
goto out_acct;
|
||||
#endif
|
||||
} else {
|
||||
if (copy_from_user(&data->args, sqe_addr, sizeof(data->args))) {
|
||||
ret = -EFAULT;
|
||||
goto out_acct;
|
||||
}
|
||||
}
|
||||
|
||||
ret = -EINVAL;
|
||||
if (data->args.flags != 0)
|
||||
goto out_acct;
|
||||
if (memchr_inv(data->args.reserved, 0, sizeof(data->args.reserved)))
|
||||
goto out_acct;
|
||||
if (data->args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
|
||||
data->args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
|
||||
goto out_acct;
|
||||
if (data->args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES ||
|
||||
data->args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
|
||||
goto out_acct;
|
||||
if (data->args.unencoded_offset > data->args.unencoded_len)
|
||||
goto out_acct;
|
||||
if (data->args.len > data->args.unencoded_len - data->args.unencoded_offset)
|
||||
goto out_acct;
|
||||
|
||||
data->iov = data->iovstack;
|
||||
ret = import_iovec(ITER_SOURCE, data->args.iov, data->args.iovcnt,
|
||||
ARRAY_SIZE(data->iovstack), &data->iov,
|
||||
&data->iter);
|
||||
if (ret < 0)
|
||||
goto out_acct;
|
||||
|
||||
if (iov_iter_count(&data->iter) == 0) {
|
||||
ret = 0;
|
||||
goto out_iov;
|
||||
}
|
||||
}
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK) {
|
||||
ret = -EAGAIN;
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
pos = data->args.offset;
|
||||
ret = rw_verify_area(WRITE, file, &pos, data->args.len);
|
||||
if (ret < 0)
|
||||
goto out_iov;
|
||||
|
||||
init_sync_kiocb(&kiocb, file);
|
||||
ret = kiocb_set_rw_flags(&kiocb, 0, WRITE);
|
||||
if (ret)
|
||||
goto out_iov;
|
||||
kiocb.ki_pos = pos;
|
||||
|
||||
file_start_write(file);
|
||||
|
||||
ret = btrfs_do_write_iter(&kiocb, &data->iter, &data->args);
|
||||
if (ret > 0)
|
||||
fsnotify_modify(file);
|
||||
|
||||
file_end_write(file);
|
||||
out_iov:
|
||||
kfree(data->iov);
|
||||
out_acct:
|
||||
if (ret > 0)
|
||||
add_wchar(current, ret);
|
||||
inc_syscw(current);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
{
|
||||
switch (cmd->cmd_op) {
|
||||
|
@ -5036,6 +5066,12 @@ int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
|||
case BTRFS_IOC_ENCODED_READ_32:
|
||||
#endif
|
||||
return btrfs_uring_encoded_read(cmd, issue_flags);
|
||||
|
||||
case BTRFS_IOC_ENCODED_WRITE:
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
case BTRFS_IOC_ENCODED_WRITE_32:
|
||||
#endif
|
||||
return btrfs_uring_encoded_write(cmd, issue_flags);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
|
@ -5308,6 +5344,8 @@ long btrfs_ioctl(struct file *file, unsigned int
|
|||
return fsverity_ioctl_enable(file, (const void __user *)argp);
|
||||
case FS_IOC_MEASURE_VERITY:
|
||||
return fsverity_ioctl_measure(file, argp);
|
||||
case FS_IOC_READ_VERITY_METADATA:
|
||||
return fsverity_ioctl_read_metadata(file, argp);
|
||||
case BTRFS_IOC_ENCODED_READ:
|
||||
return btrfs_ioctl_encoded_read(file, argp, false);
|
||||
case BTRFS_IOC_ENCODED_WRITE:
|
||||
|
|
|
@ -19,7 +19,6 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
|
|||
struct dentry *dentry, struct fileattr *fa);
|
||||
int btrfs_ioctl_get_supported_features(void __user *arg);
|
||||
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
|
||||
int __pure btrfs_is_empty_uuid(const u8 *uuid);
|
||||
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_ioctl_balance_args *bargs);
|
||||
int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
|
||||
|
|
|
@ -199,8 +199,13 @@ static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb)
|
|||
{
|
||||
lockdep_assert_held_write(&eb->lock);
|
||||
}
|
||||
static inline void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
|
||||
{
|
||||
lockdep_assert_held_read(&eb->lock);
|
||||
}
|
||||
#else
|
||||
static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb) { }
|
||||
static inline void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { }
|
||||
#endif
|
||||
|
||||
void btrfs_unlock_up_safe(struct btrfs_path *path, int level);
|
||||
|
|
|
@ -673,9 +673,6 @@ static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
|
|||
key.offset = dst;
|
||||
|
||||
ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
|
||||
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
@ -752,8 +749,6 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
|
||||
btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
|
||||
btrfs_release_path(path);
|
||||
|
||||
key.type = BTRFS_QGROUP_LIMIT_KEY;
|
||||
|
@ -771,8 +766,6 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
|
||||
btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
|
@ -859,9 +852,6 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
|
||||
btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
|
||||
btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, l);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
@ -905,9 +895,6 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
|
||||
btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
|
||||
btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, l);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
@ -947,9 +934,6 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
|
|||
btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
|
||||
btrfs_set_qgroup_status_rescan(l, ptr,
|
||||
fs_info->qgroup_rescan_progress.objectid);
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, l);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
@ -1130,8 +1114,6 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
|
|||
BTRFS_QGROUP_STATUS_FLAGS_MASK);
|
||||
btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
|
||||
key.objectid = 0;
|
||||
key.type = BTRFS_ROOT_REF_KEY;
|
||||
key.offset = 0;
|
||||
|
@ -1838,9 +1820,19 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
|
|||
* Thus its reserved space should all be zero, no matter if qgroup
|
||||
* is consistent or the mode.
|
||||
*/
|
||||
WARN_ON(qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] ||
|
||||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] ||
|
||||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS]);
|
||||
if (qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] ||
|
||||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] ||
|
||||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS]) {
|
||||
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
|
||||
btrfs_warn_rl(fs_info,
|
||||
"to be deleted qgroup %u/%llu has non-zero numbers, data %llu meta prealloc %llu meta pertrans %llu",
|
||||
btrfs_qgroup_level(qgroup->qgroupid),
|
||||
btrfs_qgroup_subvolid(qgroup->qgroupid),
|
||||
qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA],
|
||||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC],
|
||||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS]);
|
||||
|
||||
}
|
||||
/*
|
||||
* The same for rfer/excl numbers, but that's only if our qgroup is
|
||||
* consistent and if it's in regular qgroup mode.
|
||||
|
@ -1849,8 +1841,9 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
|
|||
*/
|
||||
if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_FULL &&
|
||||
!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)) {
|
||||
if (WARN_ON(qgroup->rfer || qgroup->excl ||
|
||||
qgroup->rfer_cmpr || qgroup->excl_cmpr)) {
|
||||
if (qgroup->rfer || qgroup->excl ||
|
||||
qgroup->rfer_cmpr || qgroup->excl_cmpr) {
|
||||
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
|
||||
btrfs_warn_rl(fs_info,
|
||||
"to be deleted qgroup %u/%llu has non-zero numbers, rfer %llu rfer_cmpr %llu excl %llu excl_cmpr %llu",
|
||||
btrfs_qgroup_level(qgroup->qgroupid),
|
||||
|
|
|
@ -13,12 +13,13 @@
|
|||
#include "volumes.h"
|
||||
#include "print-tree.h"
|
||||
|
||||
static void btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
|
||||
static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_path *path,
|
||||
const struct btrfs_key *oldkey,
|
||||
u64 newlen, u64 frontpad)
|
||||
{
|
||||
struct btrfs_stripe_extent *extent;
|
||||
struct btrfs_root *stripe_root = trans->fs_info->stripe_root;
|
||||
struct btrfs_stripe_extent *extent, *newitem;
|
||||
struct extent_buffer *leaf;
|
||||
int slot;
|
||||
size_t item_size;
|
||||
|
@ -27,23 +28,39 @@ static void btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
|
|||
.type = BTRFS_RAID_STRIPE_KEY,
|
||||
.offset = newlen,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ASSERT(newlen > 0);
|
||||
ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY);
|
||||
|
||||
leaf = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
item_size = btrfs_item_size(leaf, slot);
|
||||
|
||||
newitem = kzalloc(item_size, GFP_NOFS);
|
||||
if (!newitem)
|
||||
return -ENOMEM;
|
||||
|
||||
extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
|
||||
|
||||
for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
|
||||
struct btrfs_raid_stride *stride = &extent->strides[i];
|
||||
u64 phys;
|
||||
|
||||
phys = btrfs_raid_stride_physical(leaf, stride);
|
||||
btrfs_set_raid_stride_physical(leaf, stride, phys + frontpad);
|
||||
phys = btrfs_raid_stride_physical(leaf, stride) + frontpad;
|
||||
btrfs_set_stack_raid_stride_physical(&newitem->strides[i], phys);
|
||||
}
|
||||
|
||||
btrfs_set_item_key_safe(trans, path, &newkey);
|
||||
ret = btrfs_del_item(trans, stripe_root, path);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
btrfs_release_path(path);
|
||||
ret = btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size);
|
||||
|
||||
out:
|
||||
kfree(newitem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
|
||||
|
@ -59,9 +76,22 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
|
|||
int slot;
|
||||
int ret;
|
||||
|
||||
if (!stripe_root)
|
||||
if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE) || !stripe_root)
|
||||
return 0;
|
||||
|
||||
if (!btrfs_is_testing(fs_info)) {
|
||||
struct btrfs_chunk_map *map;
|
||||
bool use_rst;
|
||||
|
||||
map = btrfs_find_chunk_map(fs_info, start, length);
|
||||
if (!map)
|
||||
return -EINVAL;
|
||||
use_rst = btrfs_need_stripe_tree_update(fs_info, map->type);
|
||||
btrfs_free_chunk_map(map);
|
||||
if (!use_rst)
|
||||
return 0;
|
||||
}
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
@ -85,6 +115,37 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
|
|||
found_end = found_start + key.offset;
|
||||
ret = 0;
|
||||
|
||||
/*
|
||||
* The stripe extent starts before the range we want to delete,
|
||||
* but the range spans more than one stripe extent:
|
||||
*
|
||||
* |--- RAID Stripe Extent ---||--- RAID Stripe Extent ---|
|
||||
* |--- keep ---|--- drop ---|
|
||||
*
|
||||
* This means we have to get the previous item, truncate its
|
||||
* length and then restart the search.
|
||||
*/
|
||||
if (found_start > start) {
|
||||
if (slot == 0) {
|
||||
ret = btrfs_previous_item(stripe_root, path, start,
|
||||
BTRFS_RAID_STRIPE_KEY);
|
||||
if (ret) {
|
||||
if (ret > 0)
|
||||
ret = -ENOENT;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
path->slots[0]--;
|
||||
}
|
||||
|
||||
leaf = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
found_start = key.objectid;
|
||||
found_end = found_start + key.offset;
|
||||
ASSERT(found_start <= start);
|
||||
}
|
||||
|
||||
if (key.type != BTRFS_RAID_STRIPE_KEY)
|
||||
break;
|
||||
|
||||
|
@ -95,6 +156,54 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
|
|||
trace_btrfs_raid_extent_delete(fs_info, start, end,
|
||||
found_start, found_end);
|
||||
|
||||
/*
|
||||
* The stripe extent starts before the range we want to delete
|
||||
* and ends after the range we want to delete, i.e. we're
|
||||
* punching a hole in the stripe extent:
|
||||
*
|
||||
* |--- RAID Stripe Extent ---|
|
||||
* | keep |--- drop ---| keep |
|
||||
*
|
||||
* This means we need to a) truncate the existing item and b)
|
||||
* create a second item for the remaining range.
|
||||
*/
|
||||
if (found_start < start && found_end > end) {
|
||||
size_t item_size;
|
||||
u64 diff_start = start - found_start;
|
||||
u64 diff_end = found_end - end;
|
||||
struct btrfs_stripe_extent *extent;
|
||||
struct btrfs_key newkey = {
|
||||
.objectid = end,
|
||||
.type = BTRFS_RAID_STRIPE_KEY,
|
||||
.offset = diff_end,
|
||||
};
|
||||
|
||||
/* The "right" item. */
|
||||
ret = btrfs_duplicate_item(trans, stripe_root, path, &newkey);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
item_size = btrfs_item_size(leaf, path->slots[0]);
|
||||
extent = btrfs_item_ptr(leaf, path->slots[0],
|
||||
struct btrfs_stripe_extent);
|
||||
|
||||
for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
|
||||
struct btrfs_raid_stride *stride = &extent->strides[i];
|
||||
u64 phys;
|
||||
|
||||
phys = btrfs_raid_stride_physical(leaf, stride);
|
||||
phys += diff_start + length;
|
||||
btrfs_set_raid_stride_physical(leaf, stride, phys);
|
||||
}
|
||||
|
||||
/* The "left" item. */
|
||||
path->slots[0]--;
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
btrfs_partially_delete_raid_extent(trans, path, &key,
|
||||
diff_start, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* The stripe extent starts before the range we want to delete:
|
||||
*
|
||||
|
@ -105,11 +214,18 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
|
|||
* length to the new size and then re-insert the item.
|
||||
*/
|
||||
if (found_start < start) {
|
||||
u64 diff = start - found_start;
|
||||
u64 diff_start = start - found_start;
|
||||
|
||||
btrfs_partially_delete_raid_extent(trans, path, &key,
|
||||
diff, 0);
|
||||
break;
|
||||
diff_start, 0);
|
||||
|
||||
start += (key.offset - diff_start);
|
||||
length -= (key.offset - diff_start);
|
||||
if (length == 0)
|
||||
break;
|
||||
|
||||
btrfs_release_path(path);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -122,13 +238,16 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
|
|||
* length to the new size and then re-insert the item.
|
||||
*/
|
||||
if (found_end > end) {
|
||||
u64 diff = found_end - end;
|
||||
u64 diff_end = found_end - end;
|
||||
|
||||
btrfs_partially_delete_raid_extent(trans, path, &key,
|
||||
diff, diff);
|
||||
key.offset - length,
|
||||
length);
|
||||
ASSERT(key.offset - diff_end == length);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Finally we can delete the whole item, no more special cases. */
|
||||
ret = btrfs_del_item(trans, stripe_root, path);
|
||||
if (ret)
|
||||
break;
|
||||
|
@ -169,7 +288,6 @@ static int update_raid_extent_item(struct btrfs_trans_handle *trans,
|
|||
|
||||
write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot),
|
||||
item_size);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
btrfs_free_path(path);
|
||||
|
||||
return ret;
|
||||
|
@ -199,12 +317,8 @@ int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
|
|||
for (int i = 0; i < num_stripes; i++) {
|
||||
u64 devid = bioc->stripes[i].dev->devid;
|
||||
u64 physical = bioc->stripes[i].physical;
|
||||
u64 length = bioc->stripes[i].length;
|
||||
struct btrfs_raid_stride *raid_stride = &stripe_extent->strides[i];
|
||||
|
||||
if (length == 0)
|
||||
length = bioc->size;
|
||||
|
||||
btrfs_set_stack_raid_stride_devid(raid_stride, devid);
|
||||
btrfs_set_stack_raid_stride_physical(raid_stride, physical);
|
||||
}
|
||||
|
|
|
@ -342,12 +342,6 @@ static bool handle_useless_nodes(struct reloc_control *rc,
|
|||
if (cur == node)
|
||||
ret = true;
|
||||
|
||||
/* The node is the lowest node */
|
||||
if (cur->lowest) {
|
||||
list_del_init(&cur->lower);
|
||||
cur->lowest = 0;
|
||||
}
|
||||
|
||||
/* Cleanup the lower edges */
|
||||
while (!list_empty(&cur->lower)) {
|
||||
struct btrfs_backref_edge *edge;
|
||||
|
@ -373,7 +367,6 @@ static bool handle_useless_nodes(struct reloc_control *rc,
|
|||
* cache to avoid unnecessary backref lookup.
|
||||
*/
|
||||
if (cur->level > 0) {
|
||||
list_add(&cur->list, &cache->detached);
|
||||
cur->detached = 1;
|
||||
} else {
|
||||
rb_erase(&cur->rb_node, &cache->rb_root);
|
||||
|
@ -426,7 +419,6 @@ static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
|
|||
goto out;
|
||||
}
|
||||
|
||||
node->lowest = 1;
|
||||
cur = node;
|
||||
|
||||
/* Breadth-first search to build backref cache */
|
||||
|
@ -469,92 +461,6 @@ out:
|
|||
return node;
|
||||
}
|
||||
|
||||
/*
|
||||
* helper to add backref node for the newly created snapshot.
|
||||
* the backref node is created by cloning backref node that
|
||||
* corresponds to root of source tree
|
||||
*/
|
||||
static int clone_backref_node(struct btrfs_trans_handle *trans,
|
||||
struct reloc_control *rc,
|
||||
const struct btrfs_root *src,
|
||||
struct btrfs_root *dest)
|
||||
{
|
||||
struct btrfs_root *reloc_root = src->reloc_root;
|
||||
struct btrfs_backref_cache *cache = &rc->backref_cache;
|
||||
struct btrfs_backref_node *node = NULL;
|
||||
struct btrfs_backref_node *new_node;
|
||||
struct btrfs_backref_edge *edge;
|
||||
struct btrfs_backref_edge *new_edge;
|
||||
struct rb_node *rb_node;
|
||||
|
||||
rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start);
|
||||
if (rb_node) {
|
||||
node = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
|
||||
if (node->detached)
|
||||
node = NULL;
|
||||
else
|
||||
BUG_ON(node->new_bytenr != reloc_root->node->start);
|
||||
}
|
||||
|
||||
if (!node) {
|
||||
rb_node = rb_simple_search(&cache->rb_root,
|
||||
reloc_root->commit_root->start);
|
||||
if (rb_node) {
|
||||
node = rb_entry(rb_node, struct btrfs_backref_node,
|
||||
rb_node);
|
||||
BUG_ON(node->detached);
|
||||
}
|
||||
}
|
||||
|
||||
if (!node)
|
||||
return 0;
|
||||
|
||||
new_node = btrfs_backref_alloc_node(cache, dest->node->start,
|
||||
node->level);
|
||||
if (!new_node)
|
||||
return -ENOMEM;
|
||||
|
||||
new_node->lowest = node->lowest;
|
||||
new_node->checked = 1;
|
||||
new_node->root = btrfs_grab_root(dest);
|
||||
ASSERT(new_node->root);
|
||||
|
||||
if (!node->lowest) {
|
||||
list_for_each_entry(edge, &node->lower, list[UPPER]) {
|
||||
new_edge = btrfs_backref_alloc_edge(cache);
|
||||
if (!new_edge)
|
||||
goto fail;
|
||||
|
||||
btrfs_backref_link_edge(new_edge, edge->node[LOWER],
|
||||
new_node, LINK_UPPER);
|
||||
}
|
||||
} else {
|
||||
list_add_tail(&new_node->lower, &cache->leaves);
|
||||
}
|
||||
|
||||
rb_node = rb_simple_insert(&cache->rb_root, new_node->bytenr,
|
||||
&new_node->rb_node);
|
||||
if (rb_node)
|
||||
btrfs_backref_panic(trans->fs_info, new_node->bytenr, -EEXIST);
|
||||
|
||||
if (!new_node->lowest) {
|
||||
list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
|
||||
list_add_tail(&new_edge->list[LOWER],
|
||||
&new_edge->node[LOWER]->upper);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
fail:
|
||||
while (!list_empty(&new_node->lower)) {
|
||||
new_edge = list_entry(new_node->lower.next,
|
||||
struct btrfs_backref_edge, list[UPPER]);
|
||||
list_del(&new_edge->list[UPPER]);
|
||||
btrfs_backref_free_edge(cache, new_edge);
|
||||
}
|
||||
btrfs_backref_free_node(cache, new_node);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* helper to add 'address of tree root -> reloc tree' mapping
|
||||
*/
|
||||
|
@ -950,7 +856,6 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
|||
u32 i;
|
||||
int ret = 0;
|
||||
int first = 1;
|
||||
int dirty = 0;
|
||||
|
||||
if (rc->stage != UPDATE_DATA_PTRS)
|
||||
return 0;
|
||||
|
@ -1030,7 +935,6 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
|
||||
btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
|
||||
dirty = 1;
|
||||
|
||||
key.offset -= btrfs_file_extent_offset(leaf, fi);
|
||||
ref.action = BTRFS_ADD_DELAYED_REF;
|
||||
|
@ -1061,8 +965,6 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
|||
break;
|
||||
}
|
||||
}
|
||||
if (dirty)
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
if (inode)
|
||||
btrfs_add_delayed_iput(inode);
|
||||
return ret;
|
||||
|
@ -1255,13 +1157,11 @@ again:
|
|||
*/
|
||||
btrfs_set_node_blockptr(parent, slot, new_bytenr);
|
||||
btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
|
||||
btrfs_mark_buffer_dirty(trans, parent);
|
||||
|
||||
btrfs_set_node_blockptr(path->nodes[level],
|
||||
path->slots[level], old_bytenr);
|
||||
btrfs_set_node_ptr_generation(path->nodes[level],
|
||||
path->slots[level], old_ptr_gen);
|
||||
btrfs_mark_buffer_dirty(trans, path->nodes[level]);
|
||||
|
||||
ref.action = BTRFS_ADD_DELAYED_REF;
|
||||
ref.bytenr = old_bytenr;
|
||||
|
@ -2058,100 +1958,72 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
|
|||
int index = 0;
|
||||
int ret;
|
||||
|
||||
next = node;
|
||||
while (1) {
|
||||
cond_resched();
|
||||
next = walk_up_backref(next, edges, &index);
|
||||
root = next->root;
|
||||
next = walk_up_backref(node, edges, &index);
|
||||
root = next->root;
|
||||
|
||||
/*
|
||||
* If there is no root, then our references for this block are
|
||||
* incomplete, as we should be able to walk all the way up to a
|
||||
* block that is owned by a root.
|
||||
*
|
||||
* This path is only for SHAREABLE roots, so if we come upon a
|
||||
* non-SHAREABLE root then we have backrefs that resolve
|
||||
* improperly.
|
||||
*
|
||||
* Both of these cases indicate file system corruption, or a bug
|
||||
* in the backref walking code.
|
||||
*/
|
||||
if (!root) {
|
||||
ASSERT(0);
|
||||
btrfs_err(trans->fs_info,
|
||||
"bytenr %llu doesn't have a backref path ending in a root",
|
||||
node->bytenr);
|
||||
return ERR_PTR(-EUCLEAN);
|
||||
}
|
||||
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
|
||||
ASSERT(0);
|
||||
btrfs_err(trans->fs_info,
|
||||
"bytenr %llu has multiple refs with one ending in a non-shareable root",
|
||||
node->bytenr);
|
||||
return ERR_PTR(-EUCLEAN);
|
||||
}
|
||||
/*
|
||||
* If there is no root, then our references for this block are
|
||||
* incomplete, as we should be able to walk all the way up to a block
|
||||
* that is owned by a root.
|
||||
*
|
||||
* This path is only for SHAREABLE roots, so if we come upon a
|
||||
* non-SHAREABLE root then we have backrefs that resolve improperly.
|
||||
*
|
||||
* Both of these cases indicate file system corruption, or a bug in the
|
||||
* backref walking code.
|
||||
*/
|
||||
if (unlikely(!root)) {
|
||||
btrfs_err(trans->fs_info,
|
||||
"bytenr %llu doesn't have a backref path ending in a root",
|
||||
node->bytenr);
|
||||
return ERR_PTR(-EUCLEAN);
|
||||
}
|
||||
if (unlikely(!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))) {
|
||||
btrfs_err(trans->fs_info,
|
||||
"bytenr %llu has multiple refs with one ending in a non-shareable root",
|
||||
node->bytenr);
|
||||
return ERR_PTR(-EUCLEAN);
|
||||
}
|
||||
|
||||
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID) {
|
||||
ret = record_reloc_root_in_trans(trans, root);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = btrfs_record_root_in_trans(trans, root);
|
||||
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID) {
|
||||
ret = record_reloc_root_in_trans(trans, root);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
root = root->reloc_root;
|
||||
|
||||
/*
|
||||
* We could have raced with another thread which failed, so
|
||||
* root->reloc_root may not be set, return ENOENT in this case.
|
||||
*/
|
||||
if (!root)
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
||||
if (next->new_bytenr != root->node->start) {
|
||||
/*
|
||||
* We just created the reloc root, so we shouldn't have
|
||||
* ->new_bytenr set and this shouldn't be in the changed
|
||||
* list. If it is then we have multiple roots pointing
|
||||
* at the same bytenr which indicates corruption, or
|
||||
* we've made a mistake in the backref walking code.
|
||||
*/
|
||||
ASSERT(next->new_bytenr == 0);
|
||||
ASSERT(list_empty(&next->list));
|
||||
if (next->new_bytenr || !list_empty(&next->list)) {
|
||||
btrfs_err(trans->fs_info,
|
||||
"bytenr %llu possibly has multiple roots pointing at the same bytenr %llu",
|
||||
node->bytenr, next->bytenr);
|
||||
return ERR_PTR(-EUCLEAN);
|
||||
}
|
||||
|
||||
next->new_bytenr = root->node->start;
|
||||
btrfs_put_root(next->root);
|
||||
next->root = btrfs_grab_root(root);
|
||||
ASSERT(next->root);
|
||||
list_add_tail(&next->list,
|
||||
&rc->backref_cache.changed);
|
||||
mark_block_processed(rc, next);
|
||||
break;
|
||||
}
|
||||
|
||||
WARN_ON(1);
|
||||
root = NULL;
|
||||
next = walk_down_backref(edges, &index);
|
||||
if (!next || next->level <= node->level)
|
||||
break;
|
||||
goto found;
|
||||
}
|
||||
if (!root) {
|
||||
/*
|
||||
* This can happen if there's fs corruption or if there's a bug
|
||||
* in the backref lookup code.
|
||||
*/
|
||||
ASSERT(0);
|
||||
|
||||
ret = btrfs_record_root_in_trans(trans, root);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
root = root->reloc_root;
|
||||
|
||||
/*
|
||||
* We could have raced with another thread which failed, so
|
||||
* root->reloc_root may not be set, return ENOENT in this case.
|
||||
*/
|
||||
if (!root)
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
||||
if (next->new_bytenr) {
|
||||
/*
|
||||
* We just created the reloc root, so we shouldn't have
|
||||
* ->new_bytenr set yet. If it is then we have multiple roots
|
||||
* pointing at the same bytenr which indicates corruption, or
|
||||
* we've made a mistake in the backref walking code.
|
||||
*/
|
||||
ASSERT(next->new_bytenr == 0);
|
||||
btrfs_err(trans->fs_info,
|
||||
"bytenr %llu possibly has multiple roots pointing at the same bytenr %llu",
|
||||
node->bytenr, next->bytenr);
|
||||
return ERR_PTR(-EUCLEAN);
|
||||
}
|
||||
|
||||
next->new_bytenr = root->node->start;
|
||||
btrfs_put_root(next->root);
|
||||
next->root = btrfs_grab_root(root);
|
||||
ASSERT(next->root);
|
||||
mark_block_processed(rc, next);
|
||||
found:
|
||||
next = node;
|
||||
/* setup backref node path for btrfs_reloc_cow_block */
|
||||
while (1) {
|
||||
|
@ -2247,17 +2119,11 @@ static noinline_for_stack u64 calcu_metadata_size(struct reloc_control *rc,
|
|||
return num_bytes;
|
||||
}
|
||||
|
||||
static int reserve_metadata_space(struct btrfs_trans_handle *trans,
|
||||
struct reloc_control *rc,
|
||||
struct btrfs_backref_node *node)
|
||||
static int refill_metadata_space(struct btrfs_trans_handle *trans,
|
||||
struct reloc_control *rc, u64 num_bytes)
|
||||
{
|
||||
struct btrfs_root *root = rc->extent_root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
u64 num_bytes;
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
int ret;
|
||||
u64 tmp;
|
||||
|
||||
num_bytes = calcu_metadata_size(rc, node) * 2;
|
||||
|
||||
trans->block_rsv = rc->block_rsv;
|
||||
rc->reserved_bytes += num_bytes;
|
||||
|
@ -2270,7 +2136,8 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
|
|||
ret = btrfs_block_rsv_refill(fs_info, rc->block_rsv, num_bytes,
|
||||
BTRFS_RESERVE_FLUSH_LIMIT);
|
||||
if (ret) {
|
||||
tmp = fs_info->nodesize * RELOCATION_RESERVED_NODES;
|
||||
u64 tmp = fs_info->nodesize * RELOCATION_RESERVED_NODES;
|
||||
|
||||
while (tmp <= rc->reserved_bytes)
|
||||
tmp <<= 1;
|
||||
/*
|
||||
|
@ -2288,6 +2155,16 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int reserve_metadata_space(struct btrfs_trans_handle *trans,
|
||||
struct reloc_control *rc,
|
||||
struct btrfs_backref_node *node)
|
||||
{
|
||||
u64 num_bytes;
|
||||
|
||||
num_bytes = calcu_metadata_size(rc, node) * 2;
|
||||
return refill_metadata_space(trans, rc, num_bytes);
|
||||
}
|
||||
|
||||
/*
|
||||
* relocate a block tree, and then update pointers in upper level
|
||||
* blocks that reference the block to point to the new location.
|
||||
|
@ -2442,7 +2319,7 @@ next:
|
|||
|
||||
if (!ret && node->pending) {
|
||||
btrfs_backref_drop_node_buffer(node);
|
||||
list_move_tail(&node->list, &rc->backref_cache.changed);
|
||||
list_del_init(&node->list);
|
||||
node->pending = 0;
|
||||
}
|
||||
|
||||
|
@ -2605,8 +2482,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
|
|||
/*
|
||||
* This block was the root block of a root, and this is
|
||||
* the first time we're processing the block and thus it
|
||||
* should not have had the ->new_bytenr modified and
|
||||
* should have not been included on the changed list.
|
||||
* should not have had the ->new_bytenr modified.
|
||||
*
|
||||
* However in the case of corruption we could have
|
||||
* multiple refs pointing to the same block improperly,
|
||||
|
@ -2616,8 +2492,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
|
|||
* normal user in the case of corruption.
|
||||
*/
|
||||
ASSERT(node->new_bytenr == 0);
|
||||
ASSERT(list_empty(&node->list));
|
||||
if (node->new_bytenr || !list_empty(&node->list)) {
|
||||
if (node->new_bytenr) {
|
||||
btrfs_err(root->fs_info,
|
||||
"bytenr %llu has improper references to it",
|
||||
node->bytenr);
|
||||
|
@ -2640,17 +2515,12 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
|
|||
btrfs_put_root(node->root);
|
||||
node->root = btrfs_grab_root(root);
|
||||
ASSERT(node->root);
|
||||
list_add_tail(&node->list, &rc->backref_cache.changed);
|
||||
} else {
|
||||
path->lowest_level = node->level;
|
||||
if (root == root->fs_info->chunk_root)
|
||||
btrfs_reserve_chunk_metadata(trans, false);
|
||||
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
|
||||
btrfs_release_path(path);
|
||||
if (root == root->fs_info->chunk_root)
|
||||
btrfs_trans_release_chunk_metadata(trans);
|
||||
if (ret > 0)
|
||||
ret = 0;
|
||||
btrfs_err(root->fs_info,
|
||||
"bytenr %llu resolved to a non-shareable root",
|
||||
node->bytenr);
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
if (!ret)
|
||||
update_processed_blocks(rc, node);
|
||||
|
@ -2658,11 +2528,50 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
|
|||
ret = do_relocation(trans, rc, node, key, path, 1);
|
||||
}
|
||||
out:
|
||||
if (ret || node->level == 0 || node->cowonly)
|
||||
if (ret || node->level == 0)
|
||||
btrfs_backref_cleanup_node(&rc->backref_cache, node);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int relocate_cowonly_block(struct btrfs_trans_handle *trans,
|
||||
struct reloc_control *rc, struct tree_block *block,
|
||||
struct btrfs_path *path)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_root *root;
|
||||
u64 num_bytes;
|
||||
int nr_levels;
|
||||
int ret;
|
||||
|
||||
root = btrfs_get_fs_root(fs_info, block->owner, true);
|
||||
if (IS_ERR(root))
|
||||
return PTR_ERR(root);
|
||||
|
||||
nr_levels = max(btrfs_header_level(root->node) - block->level, 0) + 1;
|
||||
|
||||
num_bytes = fs_info->nodesize * nr_levels;
|
||||
ret = refill_metadata_space(trans, rc, num_bytes);
|
||||
if (ret) {
|
||||
btrfs_put_root(root);
|
||||
return ret;
|
||||
}
|
||||
path->lowest_level = block->level;
|
||||
if (root == root->fs_info->chunk_root)
|
||||
btrfs_reserve_chunk_metadata(trans, false);
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &block->key, path, 0, 1);
|
||||
path->lowest_level = 0;
|
||||
btrfs_release_path(path);
|
||||
|
||||
if (root == root->fs_info->chunk_root)
|
||||
btrfs_trans_release_chunk_metadata(trans);
|
||||
if (ret > 0)
|
||||
ret = 0;
|
||||
btrfs_put_root(root);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* relocate a list of blocks
|
||||
*/
|
||||
|
@ -2702,6 +2611,20 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
|
|||
|
||||
/* Do tree relocation */
|
||||
rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
|
||||
/*
|
||||
* For COWonly blocks, or the data reloc tree, we only need to
|
||||
* COW down to the block, there's no need to generate a backref
|
||||
* tree.
|
||||
*/
|
||||
if (block->owner &&
|
||||
(!is_fstree(block->owner) ||
|
||||
block->owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) {
|
||||
ret = relocate_cowonly_block(trans, rc, block, path);
|
||||
if (ret)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
node = build_backref_tree(trans, rc, &block->key,
|
||||
block->level, block->bytenr);
|
||||
if (IS_ERR(node)) {
|
||||
|
@ -2947,7 +2870,7 @@ again:
|
|||
|
||||
/*
|
||||
* We could have lost folio private when we dropped the lock to read the
|
||||
* folio above, make sure we set_page_extent_mapped here so we have any
|
||||
* folio above, make sure we set_folio_extent_mapped() here so we have any
|
||||
* of the subpage blocksize stuff we need in place.
|
||||
*/
|
||||
ret = set_folio_extent_mapped(folio);
|
||||
|
@ -3799,7 +3722,6 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
|
||||
btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
|
||||
BTRFS_INODE_PREALLOC);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
@ -4405,8 +4327,18 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
|
|||
WARN_ON(!first_cow && level == 0);
|
||||
|
||||
node = rc->backref_cache.path[level];
|
||||
BUG_ON(node->bytenr != buf->start &&
|
||||
node->new_bytenr != buf->start);
|
||||
|
||||
/*
|
||||
* If node->bytenr != buf->start and node->new_bytenr !=
|
||||
* buf->start then we've got the wrong backref node for what we
|
||||
* expected to see here and the cache is incorrect.
|
||||
*/
|
||||
if (unlikely(node->bytenr != buf->start && node->new_bytenr != buf->start)) {
|
||||
btrfs_err(fs_info,
|
||||
"bytenr %llu was found but our backref cache was expecting %llu or %llu",
|
||||
buf->start, node->bytenr, node->new_bytenr);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
btrfs_backref_drop_node_buffer(node);
|
||||
atomic_inc(&cow->refs);
|
||||
|
@ -4506,10 +4438,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
|
|||
return ret;
|
||||
}
|
||||
new_root->reloc_root = btrfs_grab_root(reloc_root);
|
||||
|
||||
if (rc->create_reloc_tree)
|
||||
ret = clone_backref_node(trans, rc, root, reloc_root);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -197,7 +197,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
|
|||
btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
|
||||
|
||||
write_extent_buffer(l, item, ptr, sizeof(*item));
|
||||
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
@ -447,7 +446,6 @@ again:
|
|||
btrfs_set_root_ref_name_len(leaf, ref, name->len);
|
||||
ptr = (unsigned long)(ref + 1);
|
||||
write_extent_buffer(leaf, name->name, ptr, name->len);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
|
||||
if (key.type == BTRFS_ROOT_BACKREF_KEY) {
|
||||
btrfs_release_path(path);
|
||||
|
|
|
@ -7259,7 +7259,7 @@ static int changed_cb(struct btrfs_path *left_path,
|
|||
enum btrfs_compare_tree_result result,
|
||||
struct send_ctx *sctx)
|
||||
{
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We can not hold the commit root semaphore here. This is because in
|
||||
|
@ -7319,7 +7319,6 @@ static int changed_cb(struct btrfs_path *left_path,
|
|||
return 0;
|
||||
}
|
||||
result = BTRFS_COMPARE_TREE_CHANGED;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
sctx->left_path = left_path;
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "extent-tree.h"
|
||||
#include "zoned.h"
|
||||
|
||||
/*
|
||||
* HOW DOES SPACE RESERVATION WORK
|
||||
|
@ -127,6 +128,14 @@
|
|||
* churn a lot and we can avoid making some extent tree modifications if we
|
||||
* are able to delay for as long as possible.
|
||||
*
|
||||
* RESET_ZONES
|
||||
* This state works only for the zoned mode. On the zoned mode, we cannot
|
||||
* reuse once allocated then freed region until we reset the zone, due to
|
||||
* the sequential write zone requirement. The RESET_ZONES state resets the
|
||||
* zones of an unused block group and let us reuse the space. The reusing
|
||||
* is faster than removing the block group and allocating another block
|
||||
* group on the zones.
|
||||
*
|
||||
* ALLOC_CHUNK
|
||||
* We will skip this the first time through space reservation, because of
|
||||
* overcommit and we don't want to have a lot of useless metadata space when
|
||||
|
@ -316,7 +325,7 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
|
|||
found->bytes_used += block_group->used;
|
||||
found->disk_used += block_group->used * factor;
|
||||
found->bytes_readonly += block_group->bytes_super;
|
||||
btrfs_space_info_update_bytes_zone_unusable(info, found, block_group->zone_unusable);
|
||||
btrfs_space_info_update_bytes_zone_unusable(found, block_group->zone_unusable);
|
||||
if (block_group->length > 0)
|
||||
found->full = 0;
|
||||
btrfs_try_granting_tickets(info, found);
|
||||
|
@ -489,9 +498,7 @@ again:
|
|||
if ((used + ticket->bytes <= space_info->total_bytes) ||
|
||||
btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
|
||||
flush)) {
|
||||
btrfs_space_info_update_bytes_may_use(fs_info,
|
||||
space_info,
|
||||
ticket->bytes);
|
||||
btrfs_space_info_update_bytes_may_use(space_info, ticket->bytes);
|
||||
remove_ticket(space_info, ticket);
|
||||
ticket->bytes = 0;
|
||||
space_info->tickets_id++;
|
||||
|
@ -834,6 +841,9 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
|||
*/
|
||||
ret = btrfs_commit_current_transaction(root);
|
||||
break;
|
||||
case RESET_ZONES:
|
||||
ret = btrfs_reset_unused_block_groups(space_info, num_bytes);
|
||||
break;
|
||||
default:
|
||||
ret = -ENOSPC;
|
||||
break;
|
||||
|
@ -1086,9 +1096,14 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
|
|||
enum btrfs_flush_state flush_state;
|
||||
int commit_cycles = 0;
|
||||
u64 last_tickets_id;
|
||||
enum btrfs_flush_state final_state;
|
||||
|
||||
fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
|
||||
space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
|
||||
if (btrfs_is_zoned(fs_info))
|
||||
final_state = RESET_ZONES;
|
||||
else
|
||||
final_state = COMMIT_TRANS;
|
||||
|
||||
spin_lock(&space_info->lock);
|
||||
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
|
||||
|
@ -1141,7 +1156,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
|
|||
if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
|
||||
flush_state++;
|
||||
|
||||
if (flush_state > COMMIT_TRANS) {
|
||||
if (flush_state > final_state) {
|
||||
commit_cycles++;
|
||||
if (commit_cycles > 2) {
|
||||
if (maybe_fail_all_tickets(fs_info, space_info)) {
|
||||
|
@ -1155,7 +1170,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
|
|||
}
|
||||
}
|
||||
spin_unlock(&space_info->lock);
|
||||
} while (flush_state <= COMMIT_TRANS);
|
||||
} while (flush_state <= final_state);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1286,6 +1301,10 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
|
|||
* This is where we reclaim all of the pinned space generated by running the
|
||||
* iputs
|
||||
*
|
||||
* RESET_ZONES
|
||||
* This state works only for the zoned mode. We scan the unused block group
|
||||
* list and reset the zones and reuse the block group.
|
||||
*
|
||||
* ALLOC_CHUNK_FORCE
|
||||
* For data we start with alloc chunk force, however we could have been full
|
||||
* before, and then the transaction commit could have freed new block groups,
|
||||
|
@ -1295,6 +1314,7 @@ static const enum btrfs_flush_state data_flush_states[] = {
|
|||
FLUSH_DELALLOC_FULL,
|
||||
RUN_DELAYED_IPUTS,
|
||||
COMMIT_TRANS,
|
||||
RESET_ZONES,
|
||||
ALLOC_CHUNK_FORCE,
|
||||
};
|
||||
|
||||
|
@ -1386,6 +1406,7 @@ void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info)
|
|||
static const enum btrfs_flush_state priority_flush_states[] = {
|
||||
FLUSH_DELAYED_ITEMS_NR,
|
||||
FLUSH_DELAYED_ITEMS,
|
||||
RESET_ZONES,
|
||||
ALLOC_CHUNK,
|
||||
};
|
||||
|
||||
|
@ -1399,6 +1420,7 @@ static const enum btrfs_flush_state evict_flush_states[] = {
|
|||
FLUSH_DELALLOC_FULL,
|
||||
ALLOC_CHUNK,
|
||||
COMMIT_TRANS,
|
||||
RESET_ZONES,
|
||||
};
|
||||
|
||||
static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
|
||||
|
@ -1690,8 +1712,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
|
|||
if (!pending_tickets &&
|
||||
((used + orig_bytes <= space_info->total_bytes) ||
|
||||
btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, space_info,
|
||||
orig_bytes);
|
||||
btrfs_space_info_update_bytes_may_use(space_info, orig_bytes);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
|
@ -1703,8 +1724,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
|
|||
if (ret && unlikely(flush == BTRFS_RESERVE_FLUSH_EMERGENCY)) {
|
||||
used = btrfs_space_info_used(space_info, false);
|
||||
if (used + orig_bytes <= space_info->total_bytes) {
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, space_info,
|
||||
orig_bytes);
|
||||
btrfs_space_info_update_bytes_may_use(space_info, orig_bytes);
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
|
@ -2082,3 +2102,32 @@ void btrfs_reclaim_sweep(const struct btrfs_fs_info *fs_info)
|
|||
do_reclaim_sweep(space_info, raid);
|
||||
}
|
||||
}
|
||||
|
||||
void btrfs_return_free_space(struct btrfs_space_info *space_info, u64 len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = space_info->fs_info;
|
||||
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
||||
|
||||
lockdep_assert_held(&space_info->lock);
|
||||
|
||||
/* Prioritize the global reservation to receive the freed space. */
|
||||
if (global_rsv->space_info != space_info)
|
||||
goto grant;
|
||||
|
||||
spin_lock(&global_rsv->lock);
|
||||
if (!global_rsv->full) {
|
||||
u64 to_add = min(len, global_rsv->size - global_rsv->reserved);
|
||||
|
||||
global_rsv->reserved += to_add;
|
||||
btrfs_space_info_update_bytes_may_use(space_info, to_add);
|
||||
if (global_rsv->reserved >= global_rsv->size)
|
||||
global_rsv->full = 1;
|
||||
len -= to_add;
|
||||
}
|
||||
spin_unlock(&global_rsv->lock);
|
||||
|
||||
grant:
|
||||
/* Add to any tickets we may have. */
|
||||
if (len)
|
||||
btrfs_try_granting_tickets(fs_info, space_info);
|
||||
}
|
||||
|
|
|
@ -79,6 +79,10 @@ enum btrfs_reserve_flush_enum {
|
|||
BTRFS_RESERVE_FLUSH_EMERGENCY,
|
||||
};
|
||||
|
||||
/*
|
||||
* Please be aware that the order of enum values will be the order of the reclaim
|
||||
* process in btrfs_async_reclaim_metadata_space().
|
||||
*/
|
||||
enum btrfs_flush_state {
|
||||
FLUSH_DELAYED_ITEMS_NR = 1,
|
||||
FLUSH_DELAYED_ITEMS = 2,
|
||||
|
@ -91,6 +95,7 @@ enum btrfs_flush_state {
|
|||
ALLOC_CHUNK_FORCE = 9,
|
||||
RUN_DELAYED_IPUTS = 10,
|
||||
COMMIT_TRANS = 11,
|
||||
RESET_ZONES = 12,
|
||||
};
|
||||
|
||||
struct btrfs_space_info {
|
||||
|
@ -229,10 +234,10 @@ static inline bool btrfs_mixed_space_info(const struct btrfs_space_info *space_i
|
|||
*/
|
||||
#define DECLARE_SPACE_INFO_UPDATE(name, trace_name) \
|
||||
static inline void \
|
||||
btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \
|
||||
struct btrfs_space_info *sinfo, \
|
||||
btrfs_space_info_update_##name(struct btrfs_space_info *sinfo, \
|
||||
s64 bytes) \
|
||||
{ \
|
||||
struct btrfs_fs_info *fs_info = sinfo->fs_info; \
|
||||
const u64 abs_bytes = (bytes < 0) ? -bytes : bytes; \
|
||||
lockdep_assert_held(&sinfo->lock); \
|
||||
trace_update_##name(fs_info, sinfo, sinfo->name, bytes); \
|
||||
|
@ -275,13 +280,12 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
|
|||
enum btrfs_reserve_flush_enum flush);
|
||||
|
||||
static inline void btrfs_space_info_free_bytes_may_use(
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info,
|
||||
u64 num_bytes)
|
||||
{
|
||||
spin_lock(&space_info->lock);
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, space_info, -num_bytes);
|
||||
btrfs_try_granting_tickets(fs_info, space_info);
|
||||
btrfs_space_info_update_bytes_may_use(space_info, -num_bytes);
|
||||
btrfs_try_granting_tickets(space_info->fs_info, space_info);
|
||||
spin_unlock(&space_info->lock);
|
||||
}
|
||||
int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
|
||||
|
@ -295,5 +299,6 @@ void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool
|
|||
bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info);
|
||||
int btrfs_calc_reclaim_threshold(const struct btrfs_space_info *space_info);
|
||||
void btrfs_reclaim_sweep(const struct btrfs_fs_info *fs_info);
|
||||
void btrfs_return_free_space(struct btrfs_space_info *space_info, u64 len);
|
||||
|
||||
#endif /* BTRFS_SPACE_INFO_H */
|
||||
|
|
|
@ -635,6 +635,28 @@ IMPLEMENT_BTRFS_PAGE_OPS(ordered, folio_set_ordered, folio_clear_ordered,
|
|||
IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked,
|
||||
folio_test_checked);
|
||||
|
||||
#define GET_SUBPAGE_BITMAP(subpage, fs_info, name, dst) \
|
||||
{ \
|
||||
const int sectors_per_page = fs_info->sectors_per_page; \
|
||||
\
|
||||
ASSERT(sectors_per_page < BITS_PER_LONG); \
|
||||
*dst = bitmap_read(subpage->bitmaps, \
|
||||
sectors_per_page * btrfs_bitmap_nr_##name, \
|
||||
sectors_per_page); \
|
||||
}
|
||||
|
||||
#define SUBPAGE_DUMP_BITMAP(fs_info, folio, name, start, len) \
|
||||
{ \
|
||||
const struct btrfs_subpage *subpage = folio_get_private(folio); \
|
||||
unsigned long bitmap; \
|
||||
\
|
||||
GET_SUBPAGE_BITMAP(subpage, fs_info, name, &bitmap); \
|
||||
btrfs_warn(fs_info, \
|
||||
"dumpping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \
|
||||
start, len, folio_pos(folio), \
|
||||
fs_info->sectors_per_page, &bitmap); \
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure not only the page dirty bit is cleared, but also subpage dirty bit
|
||||
* is cleared.
|
||||
|
@ -660,6 +682,10 @@ void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
|
|||
subpage = folio_get_private(folio);
|
||||
ASSERT(subpage);
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) {
|
||||
SUBPAGE_DUMP_BITMAP(fs_info, folio, dirty, start, len);
|
||||
ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
|
||||
}
|
||||
ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
|
@ -689,23 +715,16 @@ void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info,
|
|||
nbits = len >> fs_info->sectorsize_bits;
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
/* Target range should not yet be locked. */
|
||||
ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
|
||||
if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) {
|
||||
SUBPAGE_DUMP_BITMAP(fs_info, folio, locked, start, len);
|
||||
ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
|
||||
}
|
||||
bitmap_set(subpage->bitmaps, start_bit, nbits);
|
||||
ret = atomic_add_return(nbits, &subpage->nr_locked);
|
||||
ASSERT(ret <= fs_info->sectors_per_page);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
|
||||
#define GET_SUBPAGE_BITMAP(subpage, fs_info, name, dst) \
|
||||
{ \
|
||||
const int sectors_per_page = fs_info->sectors_per_page; \
|
||||
\
|
||||
ASSERT(sectors_per_page < BITS_PER_LONG); \
|
||||
*dst = bitmap_read(subpage->bitmaps, \
|
||||
sectors_per_page * btrfs_bitmap_nr_##name, \
|
||||
sectors_per_page); \
|
||||
}
|
||||
|
||||
void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len)
|
||||
{
|
||||
|
@ -716,6 +735,7 @@ void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
|
|||
unsigned long writeback_bitmap;
|
||||
unsigned long ordered_bitmap;
|
||||
unsigned long checked_bitmap;
|
||||
unsigned long locked_bitmap;
|
||||
unsigned long flags;
|
||||
|
||||
ASSERT(folio_test_private(folio) && folio_get_private(folio));
|
||||
|
@ -728,15 +748,16 @@ void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
|
|||
GET_SUBPAGE_BITMAP(subpage, fs_info, writeback, &writeback_bitmap);
|
||||
GET_SUBPAGE_BITMAP(subpage, fs_info, ordered, &ordered_bitmap);
|
||||
GET_SUBPAGE_BITMAP(subpage, fs_info, checked, &checked_bitmap);
|
||||
GET_SUBPAGE_BITMAP(subpage, fs_info, locked, &checked_bitmap);
|
||||
GET_SUBPAGE_BITMAP(subpage, fs_info, locked, &locked_bitmap);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
|
||||
dump_page(folio_page(folio, 0), "btrfs subpage dump");
|
||||
btrfs_warn(fs_info,
|
||||
"start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl",
|
||||
"start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl locked=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl",
|
||||
start, len, folio_pos(folio),
|
||||
sectors_per_page, &uptodate_bitmap,
|
||||
sectors_per_page, &dirty_bitmap,
|
||||
sectors_per_page, &locked_bitmap,
|
||||
sectors_per_page, &writeback_bitmap,
|
||||
sectors_per_page, &ordered_bitmap,
|
||||
sectors_per_page, &checked_bitmap);
|
||||
|
|
|
@ -137,6 +137,19 @@ DECLARE_BTRFS_SUBPAGE_OPS(writeback);
|
|||
DECLARE_BTRFS_SUBPAGE_OPS(ordered);
|
||||
DECLARE_BTRFS_SUBPAGE_OPS(checked);
|
||||
|
||||
/*
|
||||
* Helper for error cleanup, where a folio will have its dirty flag cleared,
|
||||
* with writeback started and finished.
|
||||
*/
|
||||
static inline void btrfs_folio_clamp_finish_io(struct btrfs_fs_info *fs_info,
|
||||
struct folio *locked_folio,
|
||||
u64 start, u32 len)
|
||||
{
|
||||
btrfs_folio_clamp_clear_dirty(fs_info, locked_folio, start, len);
|
||||
btrfs_folio_clamp_set_writeback(fs_info, locked_folio, start, len);
|
||||
btrfs_folio_clamp_clear_writeback(fs_info, locked_folio, start, len);
|
||||
}
|
||||
|
||||
bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
|
||||
|
|
|
@ -971,7 +971,7 @@ static int btrfs_fill_super(struct super_block *sb,
|
|||
|
||||
err = open_ctree(sb, fs_devices);
|
||||
if (err) {
|
||||
btrfs_err(fs_info, "open_ctree failed");
|
||||
btrfs_err(fs_info, "open_ctree failed: %d", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -2446,6 +2446,9 @@ static __cold void btrfs_interface_exit(void)
|
|||
static int __init btrfs_print_mod_info(void)
|
||||
{
|
||||
static const char options[] = ""
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
", experimental=on"
|
||||
#endif
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
", debug=on"
|
||||
#endif
|
||||
|
@ -2466,7 +2469,17 @@ static int __init btrfs_print_mod_info(void)
|
|||
", fsverity=no"
|
||||
#endif
|
||||
;
|
||||
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
if (btrfs_get_mod_read_policy() == NULL)
|
||||
pr_info("Btrfs loaded%s\n", options);
|
||||
else
|
||||
pr_info("Btrfs loaded%s, read_policy=%s\n",
|
||||
options, btrfs_get_mod_read_policy());
|
||||
#else
|
||||
pr_info("Btrfs loaded%s\n", options);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2524,6 +2537,11 @@ static const struct init_sequence mod_init_seq[] = {
|
|||
}, {
|
||||
.init_func = extent_map_init,
|
||||
.exit_func = extent_map_exit,
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
}, {
|
||||
.init_func = btrfs_read_policy_init,
|
||||
.exit_func = NULL,
|
||||
#endif
|
||||
}, {
|
||||
.init_func = ordered_data_init,
|
||||
.exit_func = ordered_data_exit,
|
||||
|
|
174
fs/btrfs/sysfs.c
174
fs/btrfs/sysfs.c
|
@ -1305,7 +1305,73 @@ static ssize_t btrfs_temp_fsid_show(struct kobject *kobj,
|
|||
}
|
||||
BTRFS_ATTR(, temp_fsid, btrfs_temp_fsid_show);
|
||||
|
||||
static const char * const btrfs_read_policy_name[] = { "pid" };
|
||||
static const char *btrfs_read_policy_name[] = {
|
||||
"pid",
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
"round-robin",
|
||||
"devid",
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
|
||||
/* Global module configuration parameters. */
|
||||
static char *read_policy;
|
||||
char *btrfs_get_mod_read_policy(void)
|
||||
{
|
||||
return read_policy;
|
||||
}
|
||||
|
||||
/* Set perms to 0, disable /sys/module/btrfs/parameter/read_policy interface. */
|
||||
module_param(read_policy, charp, 0);
|
||||
MODULE_PARM_DESC(read_policy,
|
||||
"Global read policy: pid (default), round-robin[:<min_contig_read>], devid[:<devid>]");
|
||||
#endif
|
||||
|
||||
int btrfs_read_policy_to_enum(const char *str, s64 *value_ret)
|
||||
{
|
||||
char param[32] = { 0 };
|
||||
char __maybe_unused *value_str;
|
||||
|
||||
if (!str || strlen(str) == 0)
|
||||
return 0;
|
||||
|
||||
strncpy(param, str, sizeof(param) - 1);
|
||||
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
/* Separate value from input in policy:value format. */
|
||||
value_str = strchr(param, ':');
|
||||
if (value_str) {
|
||||
int ret;
|
||||
|
||||
*value_str = 0;
|
||||
value_str++;
|
||||
if (!value_ret)
|
||||
return -EINVAL;
|
||||
ret = kstrtos64(value_str, 10, value_ret);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
if (*value_ret < 0)
|
||||
return -ERANGE;
|
||||
}
|
||||
#endif
|
||||
|
||||
return sysfs_match_string(btrfs_read_policy_name, param);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
int __init btrfs_read_policy_init(void)
|
||||
{
|
||||
s64 value;
|
||||
|
||||
if (btrfs_read_policy_to_enum(read_policy, &value) == -EINVAL) {
|
||||
btrfs_err(NULL, "invalid read policy or value %s", read_policy);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static ssize_t btrfs_read_policy_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a, char *buf)
|
||||
|
@ -1316,14 +1382,25 @@ static ssize_t btrfs_read_policy_show(struct kobject *kobj,
|
|||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
|
||||
if (policy == i)
|
||||
ret += sysfs_emit_at(buf, ret, "%s[%s]",
|
||||
(ret == 0 ? "" : " "),
|
||||
btrfs_read_policy_name[i]);
|
||||
else
|
||||
ret += sysfs_emit_at(buf, ret, "%s%s",
|
||||
(ret == 0 ? "" : " "),
|
||||
btrfs_read_policy_name[i]);
|
||||
if (ret != 0)
|
||||
ret += sysfs_emit_at(buf, ret, " ");
|
||||
|
||||
if (i == policy)
|
||||
ret += sysfs_emit_at(buf, ret, "[");
|
||||
|
||||
ret += sysfs_emit_at(buf, ret, "%s", btrfs_read_policy_name[i]);
|
||||
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
if (i == BTRFS_READ_POLICY_RR)
|
||||
ret += sysfs_emit_at(buf, ret, ":%u",
|
||||
READ_ONCE(fs_devices->rr_min_contig_read));
|
||||
|
||||
if (i == BTRFS_READ_POLICY_DEVID)
|
||||
ret += sysfs_emit_at(buf, ret, ":%llu",
|
||||
READ_ONCE(fs_devices->read_devid));
|
||||
#endif
|
||||
if (i == policy)
|
||||
ret += sysfs_emit_at(buf, ret, "]");
|
||||
}
|
||||
|
||||
ret += sysfs_emit_at(buf, ret, "\n");
|
||||
|
@ -1336,21 +1413,80 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
|
|||
const char *buf, size_t len)
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
|
||||
int i;
|
||||
int index;
|
||||
s64 value = -1;
|
||||
|
||||
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
|
||||
if (sysfs_streq(buf, btrfs_read_policy_name[i])) {
|
||||
if (i != READ_ONCE(fs_devices->read_policy)) {
|
||||
WRITE_ONCE(fs_devices->read_policy, i);
|
||||
btrfs_info(fs_devices->fs_info,
|
||||
"read policy set to '%s'",
|
||||
btrfs_read_policy_name[i]);
|
||||
index = btrfs_read_policy_to_enum(buf, &value);
|
||||
if (index < 0)
|
||||
return -EINVAL;
|
||||
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
/* If moving from RR then disable collecting fs stats. */
|
||||
if (fs_devices->read_policy == BTRFS_READ_POLICY_RR && index != BTRFS_READ_POLICY_RR)
|
||||
fs_devices->collect_fs_stats = false;
|
||||
|
||||
if (index == BTRFS_READ_POLICY_RR) {
|
||||
if (value != -1) {
|
||||
const u32 sectorsize = fs_devices->fs_info->sectorsize;
|
||||
|
||||
if (!IS_ALIGNED(value, sectorsize)) {
|
||||
u64 temp_value = round_up(value, sectorsize);
|
||||
|
||||
btrfs_debug(fs_devices->fs_info,
|
||||
"read_policy: min contig read %lld should be multiple of sectorsize %u, rounded to %llu",
|
||||
value, sectorsize, temp_value);
|
||||
value = temp_value;
|
||||
}
|
||||
return len;
|
||||
} else {
|
||||
value = BTRFS_DEFAULT_RR_MIN_CONTIG_READ;
|
||||
}
|
||||
|
||||
if (index != READ_ONCE(fs_devices->read_policy) ||
|
||||
value != READ_ONCE(fs_devices->rr_min_contig_read)) {
|
||||
WRITE_ONCE(fs_devices->read_policy, index);
|
||||
WRITE_ONCE(fs_devices->rr_min_contig_read, value);
|
||||
|
||||
btrfs_info(fs_devices->fs_info, "read policy set to '%s:%lld'",
|
||||
btrfs_read_policy_name[index], value);
|
||||
}
|
||||
|
||||
fs_devices->collect_fs_stats = true;
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
if (index == BTRFS_READ_POLICY_DEVID) {
|
||||
if (value != -1) {
|
||||
BTRFS_DEV_LOOKUP_ARGS(args);
|
||||
|
||||
/* Validate input devid. */
|
||||
args.devid = value;
|
||||
if (btrfs_find_device(fs_devices, &args) == NULL)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
/* Set default devid to the devid of the latest device. */
|
||||
value = fs_devices->latest_dev->devid;
|
||||
}
|
||||
|
||||
if (index != READ_ONCE(fs_devices->read_policy) ||
|
||||
value != READ_ONCE(fs_devices->read_devid)) {
|
||||
WRITE_ONCE(fs_devices->read_policy, index);
|
||||
WRITE_ONCE(fs_devices->read_devid, value);
|
||||
|
||||
btrfs_info(fs_devices->fs_info, "read policy set to '%s:%llu'",
|
||||
btrfs_read_policy_name[index], value);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
#endif
|
||||
if (index != READ_ONCE(fs_devices->read_policy)) {
|
||||
WRITE_ONCE(fs_devices->read_policy, index);
|
||||
btrfs_info(fs_devices->fs_info, "read policy set to '%s'",
|
||||
btrfs_read_policy_name[index]);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
BTRFS_ATTR_RW(, read_policy, btrfs_read_policy_show, btrfs_read_policy_store);
|
||||
|
||||
|
|
|
@ -47,5 +47,11 @@ void btrfs_sysfs_del_qgroups(struct btrfs_fs_info *fs_info);
|
|||
int btrfs_sysfs_add_qgroups(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_sysfs_del_one_qgroup(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup *qgroup);
|
||||
int btrfs_read_policy_to_enum(const char *str, s64 *value);
|
||||
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
int __init btrfs_read_policy_init(void);
|
||||
char *btrfs_get_mod_read_policy(void);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -30,6 +30,7 @@ const char *test_error[] = {
|
|||
[TEST_ALLOC_EXTENT_MAP] = "cannot allocate extent map",
|
||||
[TEST_ALLOC_CHUNK_MAP] = "cannot allocate chunk map",
|
||||
[TEST_ALLOC_IO_CONTEXT] = "cannot allocate io context",
|
||||
[TEST_ALLOC_TRANSACTION] = "cannot allocate transaction",
|
||||
};
|
||||
|
||||
static const struct super_operations btrfs_test_super_ops = {
|
||||
|
@ -142,6 +143,11 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
|
|||
fs_info->nodesize = nodesize;
|
||||
fs_info->sectorsize = sectorsize;
|
||||
fs_info->sectorsize_bits = ilog2(sectorsize);
|
||||
|
||||
/* CRC32C csum size. */
|
||||
fs_info->csum_size = 4;
|
||||
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) /
|
||||
fs_info->csum_size;
|
||||
set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
|
||||
|
||||
test_mnt->mnt_sb->s_fs_info = fs_info;
|
||||
|
@ -247,6 +253,15 @@ void btrfs_free_dummy_block_group(struct btrfs_block_group *cache)
|
|||
kfree(cache);
|
||||
}
|
||||
|
||||
void btrfs_init_dummy_transaction(struct btrfs_transaction *trans, struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
memset(trans, 0, sizeof(*trans));
|
||||
trans->fs_info = fs_info;
|
||||
xa_init(&trans->delayed_refs.head_refs);
|
||||
xa_init(&trans->delayed_refs.dirty_extents);
|
||||
spin_lock_init(&trans->delayed_refs.lock);
|
||||
}
|
||||
|
||||
void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
|
@ -295,6 +310,9 @@ int btrfs_run_sanity_tests(void)
|
|||
ret = btrfs_test_raid_stripe_tree(sectorsize, nodesize);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = btrfs_test_delayed_refs(sectorsize, nodesize);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = btrfs_test_extent_map();
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#ifndef BTRFS_TESTS_H
|
||||
#define BTRFS_TESTS_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
int btrfs_run_sanity_tests(void);
|
||||
|
||||
|
@ -25,12 +27,14 @@ enum {
|
|||
TEST_ALLOC_EXTENT_MAP,
|
||||
TEST_ALLOC_CHUNK_MAP,
|
||||
TEST_ALLOC_IO_CONTEXT,
|
||||
TEST_ALLOC_TRANSACTION,
|
||||
};
|
||||
|
||||
extern const char *test_error[];
|
||||
|
||||
struct btrfs_root;
|
||||
struct btrfs_trans_handle;
|
||||
struct btrfs_transaction;
|
||||
|
||||
int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize);
|
||||
int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize);
|
||||
|
@ -40,6 +44,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize);
|
|||
int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize);
|
||||
int btrfs_test_raid_stripe_tree(u32 sectorsize, u32 nodesize);
|
||||
int btrfs_test_extent_map(void);
|
||||
int btrfs_test_delayed_refs(u32 sectorsize, u32 nodesize);
|
||||
struct inode *btrfs_new_test_inode(void);
|
||||
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize);
|
||||
void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info);
|
||||
|
@ -49,6 +54,7 @@ btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info, unsigned long lengt
|
|||
void btrfs_free_dummy_block_group(struct btrfs_block_group *cache);
|
||||
void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
void btrfs_init_dummy_transaction(struct btrfs_transaction *trans, struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_device *btrfs_alloc_dummy_device(struct btrfs_fs_info *fs_info);
|
||||
#else
|
||||
static inline int btrfs_run_sanity_tests(void)
|
||||
|
|
1015
fs/btrfs/tests/delayed-refs-tests.c
Normal file
1015
fs/btrfs/tests/delayed-refs-tests.c
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -14,6 +14,8 @@
|
|||
#define RST_TEST_NUM_DEVICES (2)
|
||||
#define RST_TEST_RAID1_TYPE (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_RAID1)
|
||||
|
||||
#define SZ_48K (SZ_32K + SZ_16K)
|
||||
|
||||
typedef int (*test_func_t)(struct btrfs_trans_handle *trans);
|
||||
|
||||
static struct btrfs_device *btrfs_device_by_devid(struct btrfs_fs_devices *fs_devices,
|
||||
|
@ -29,6 +31,613 @@ static struct btrfs_device *btrfs_device_by_devid(struct btrfs_fs_devices *fs_de
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test creating a range of three extents and then punch a hole in the middle,
|
||||
* deleting all of the middle extents and partially deleting the "book ends".
|
||||
*/
|
||||
static int test_punch_hole_3extents(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_io_context *bioc;
|
||||
struct btrfs_io_stripe io_stripe = { 0 };
|
||||
u64 map_type = RST_TEST_RAID1_TYPE;
|
||||
u64 logical1 = SZ_1M;
|
||||
u64 len1 = SZ_1M;
|
||||
u64 logical2 = logical1 + len1;
|
||||
u64 len2 = SZ_1M;
|
||||
u64 logical3 = logical2 + len2;
|
||||
u64 len3 = SZ_1M;
|
||||
u64 hole_start = logical1 + SZ_256K;
|
||||
u64 hole_len = SZ_2M;
|
||||
int ret;
|
||||
|
||||
bioc = alloc_btrfs_io_context(fs_info, logical1, RST_TEST_NUM_DEVICES);
|
||||
if (!bioc) {
|
||||
test_std_err(TEST_ALLOC_IO_CONTEXT);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
io_stripe.dev = btrfs_device_by_devid(fs_info->fs_devices, 0);
|
||||
|
||||
/* Prepare for the test, 1st create 3 x 1M extents. */
|
||||
bioc->map_type = map_type;
|
||||
bioc->size = len1;
|
||||
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = logical1 + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("inserting RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bioc->logical = logical2;
|
||||
bioc->size = len2;
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = logical2 + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("inserting RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bioc->logical = logical3;
|
||||
bioc->size = len3;
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = logical3 + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("inserting RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete a range starting at logical1 + 256K and 2M in length. Extent
|
||||
* 1 is truncated to 256k length, extent 2 is completely dropped and
|
||||
* extent 3 is moved 256K to the right.
|
||||
*/
|
||||
ret = btrfs_delete_raid_extent(trans, hole_start, hole_len);
|
||||
if (ret) {
|
||||
test_err("deleting RAID extent [%llu, %llu] failed",
|
||||
hole_start, hole_start + hole_len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Get the first extent and check its size. */
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical1, &len1, map_type,
|
||||
0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed",
|
||||
logical1, logical1 + len1);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical1) {
|
||||
test_err("invalid physical address, expected %llu, got %llu",
|
||||
logical1, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len1 != SZ_256K) {
|
||||
test_err("invalid stripe length, expected %llu, got %llu",
|
||||
(u64)SZ_256K, len1);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Get the second extent and check it's absent. */
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical2, &len2, map_type,
|
||||
0, &io_stripe);
|
||||
if (ret != -ENODATA) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] succeeded should fail",
|
||||
logical2, logical2 + len2);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Get the third extent and check its size. */
|
||||
logical3 += SZ_256K;
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical3, &len3, map_type,
|
||||
0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed",
|
||||
logical3, logical3 + len3);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical3) {
|
||||
test_err("invalid physical address, expected %llu, got %llu",
|
||||
logical3 + SZ_256K, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len3 != SZ_1M - SZ_256K) {
|
||||
test_err("invalid stripe length, expected %llu, got %llu",
|
||||
(u64)SZ_1M - SZ_256K, len3);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical1, len1);
|
||||
if (ret) {
|
||||
test_err("deleting RAID extent [%llu, %llu] failed",
|
||||
logical1, logical1 + len1);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical3, len3);
|
||||
if (ret) {
|
||||
test_err("deleting RAID extent [%llu, %llu] failed",
|
||||
logical1, logical1 + len1);
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
btrfs_put_bioc(bioc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_delete_two_extents(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_io_context *bioc;
|
||||
struct btrfs_io_stripe io_stripe = { 0 };
|
||||
u64 map_type = RST_TEST_RAID1_TYPE;
|
||||
u64 logical1 = SZ_1M;
|
||||
u64 len1 = SZ_1M;
|
||||
u64 logical2 = logical1 + len1;
|
||||
u64 len2 = SZ_1M;
|
||||
u64 logical3 = logical2 + len2;
|
||||
u64 len3 = SZ_1M;
|
||||
int ret;
|
||||
|
||||
bioc = alloc_btrfs_io_context(fs_info, logical1, RST_TEST_NUM_DEVICES);
|
||||
if (!bioc) {
|
||||
test_std_err(TEST_ALLOC_IO_CONTEXT);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
io_stripe.dev = btrfs_device_by_devid(fs_info->fs_devices, 0);
|
||||
|
||||
/* Prepare for the test, 1st create 3 x 1M extents. */
|
||||
bioc->map_type = map_type;
|
||||
bioc->size = len1;
|
||||
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = logical1 + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("inserting RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bioc->logical = logical2;
|
||||
bioc->size = len2;
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = logical2 + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("inserting RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bioc->logical = logical3;
|
||||
bioc->size = len3;
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = logical3 + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("inserting RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete a range starting at logical1 and 2M in length. Extents 1
|
||||
* and 2 are dropped and extent 3 is kept as is.
|
||||
*/
|
||||
ret = btrfs_delete_raid_extent(trans, logical1, len1 + len2);
|
||||
if (ret) {
|
||||
test_err("deleting RAID extent [%llu, %llu] failed",
|
||||
logical1, logical1 + len1 + len2);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical1, &len1, map_type,
|
||||
0, &io_stripe);
|
||||
if (ret != -ENODATA) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] succeeded, should fail",
|
||||
logical1, len1);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical2, &len2, map_type,
|
||||
0, &io_stripe);
|
||||
if (ret != -ENODATA) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] succeeded, should fail",
|
||||
logical2, len2);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical3, &len3, map_type,
|
||||
0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed",
|
||||
logical3, len3);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical3) {
|
||||
test_err("invalid physical address, expected %llu, got %llu",
|
||||
logical3, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len3 != SZ_1M) {
|
||||
test_err("invalid stripe length, expected %llu, got %llu",
|
||||
(u64)SZ_1M, len3);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical3, len3);
|
||||
out:
|
||||
btrfs_put_bioc(bioc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Test punching a hole into a single RAID stripe-extent. */
|
||||
static int test_punch_hole(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_io_context *bioc;
|
||||
struct btrfs_io_stripe io_stripe = { 0 };
|
||||
u64 map_type = RST_TEST_RAID1_TYPE;
|
||||
u64 logical1 = SZ_1M;
|
||||
u64 hole_start = logical1 + SZ_32K;
|
||||
u64 hole_len = SZ_64K;
|
||||
u64 logical2 = hole_start + hole_len;
|
||||
u64 len = SZ_1M;
|
||||
u64 len1 = SZ_32K;
|
||||
u64 len2 = len - len1 - hole_len;
|
||||
int ret;
|
||||
|
||||
bioc = alloc_btrfs_io_context(fs_info, logical1, RST_TEST_NUM_DEVICES);
|
||||
if (!bioc) {
|
||||
test_std_err(TEST_ALLOC_IO_CONTEXT);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
io_stripe.dev = btrfs_device_by_devid(fs_info->fs_devices, 0);
|
||||
bioc->map_type = map_type;
|
||||
bioc->size = len;
|
||||
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = logical1 + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("inserting RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical1, &len, map_type, 0,
|
||||
&io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed", logical1,
|
||||
logical1 + len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical1) {
|
||||
test_err("invalid physical address, expected %llu got %llu",
|
||||
logical1, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len != SZ_1M) {
|
||||
test_err("invalid stripe length, expected %llu got %llu",
|
||||
(u64)SZ_1M, len);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, hole_start, hole_len);
|
||||
if (ret) {
|
||||
test_err("deleting RAID extent [%llu, %llu] failed",
|
||||
hole_start, hole_start + hole_len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical1, &len1, map_type,
|
||||
0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed",
|
||||
logical1, logical1 + len1);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical1) {
|
||||
test_err("invalid physical address, expected %llu, got %llu",
|
||||
logical1, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len1 != SZ_32K) {
|
||||
test_err("invalid stripe length, expected %llu, got %llu",
|
||||
(u64)SZ_32K, len1);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical2, &len2, map_type,
|
||||
0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed", logical2,
|
||||
logical2 + len2);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical2) {
|
||||
test_err("invalid physical address, expected %llu, got %llu",
|
||||
logical2, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len2 != len - len1 - hole_len) {
|
||||
test_err("invalid length, expected %llu, got %llu",
|
||||
len - len1 - hole_len, len2);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Check for the absence of the hole. */
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, hole_start, &hole_len,
|
||||
map_type, 0, &io_stripe);
|
||||
if (ret != -ENODATA) {
|
||||
ret = -EINVAL;
|
||||
test_err("lookup of RAID extent [%llu, %llu] succeeded, should fail",
|
||||
hole_start, hole_start + SZ_64K);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical1, len1);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical2, len2);
|
||||
out:
|
||||
btrfs_put_bioc(bioc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test a 1M RST write that spans two adjacent RST items on disk and then
|
||||
* delete a portion starting in the first item and spanning into the second
|
||||
* item. This is similar to test_front_delete(), but spanning multiple items.
|
||||
*/
|
||||
static int test_front_delete_prev_item(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_io_context *bioc;
|
||||
struct btrfs_io_stripe io_stripe = { 0 };
|
||||
u64 map_type = RST_TEST_RAID1_TYPE;
|
||||
u64 logical1 = SZ_1M;
|
||||
u64 logical2 = SZ_2M;
|
||||
u64 len = SZ_1M;
|
||||
int ret;
|
||||
|
||||
bioc = alloc_btrfs_io_context(fs_info, logical1, RST_TEST_NUM_DEVICES);
|
||||
if (!bioc) {
|
||||
test_std_err(TEST_ALLOC_IO_CONTEXT);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
io_stripe.dev = btrfs_device_by_devid(fs_info->fs_devices, 0);
|
||||
bioc->map_type = map_type;
|
||||
bioc->size = len;
|
||||
|
||||
/* Insert RAID extent 1. */
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = logical1 + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("inserting RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bioc->logical = logical2;
|
||||
/* Insert RAID extent 2, directly adjacent to it. */
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = logical2 + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("inserting RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical1 + SZ_512K, SZ_1M);
|
||||
if (ret) {
|
||||
test_err("deleting RAID extent [%llu, %llu] failed",
|
||||
logical1 + SZ_512K, (u64)SZ_1M);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Verify item 1 is truncated to 512K. */
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical1, &len, map_type, 0,
|
||||
&io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed", logical1,
|
||||
logical1 + len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical1) {
|
||||
test_err("invalid physical address, expected %llu got %llu",
|
||||
logical1, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len != SZ_512K) {
|
||||
test_err("invalid stripe length, expected %llu got %llu",
|
||||
(u64)SZ_512K, len);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Verify item 2's start is moved by 512K. */
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical2 + SZ_512K, &len,
|
||||
map_type, 0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed",
|
||||
logical2 + SZ_512K, logical2 + len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical2 + SZ_512K) {
|
||||
test_err("invalid physical address, expected %llu got %llu",
|
||||
logical2 + SZ_512K, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len != SZ_512K) {
|
||||
test_err("invalid stripe length, expected %llu got %llu",
|
||||
(u64)SZ_512K, len);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Verify there's a hole at [1M+512K, 2M+512K] . */
|
||||
len = SZ_1M;
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical1 + SZ_512K, &len,
|
||||
map_type, 0, &io_stripe);
|
||||
if (ret != -ENODATA) {
|
||||
test_err("lookup of RAID [%llu, %llu] succeeded, should fail",
|
||||
logical1 + SZ_512K, logical1 + SZ_512K + len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Clean up after us. */
|
||||
ret = btrfs_delete_raid_extent(trans, logical1, SZ_512K);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical2 + SZ_512K, SZ_512K);
|
||||
|
||||
out:
|
||||
btrfs_put_bioc(bioc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test a 64K RST write on a 2 disk RAID1 at a logical address of 1M and then
|
||||
* delete the 1st 32K, making the new start address 1M+32K.
|
||||
|
@ -94,45 +703,45 @@ static int test_front_delete(struct btrfs_trans_handle *trans)
|
|||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical, SZ_32K);
|
||||
ret = btrfs_delete_raid_extent(trans, logical, SZ_16K);
|
||||
if (ret) {
|
||||
test_err("deleting RAID extent [%llu, %llu] failed", logical,
|
||||
logical + SZ_32K);
|
||||
logical + SZ_16K);
|
||||
goto out;
|
||||
}
|
||||
|
||||
len = SZ_32K;
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical + SZ_32K, &len,
|
||||
len -= SZ_16K;
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical + SZ_16K, &len,
|
||||
map_type, 0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed",
|
||||
logical + SZ_32K, logical + SZ_32K + len);
|
||||
logical + SZ_16K, logical + SZ_64K);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical + SZ_32K) {
|
||||
if (io_stripe.physical != logical + SZ_16K) {
|
||||
test_err("invalid physical address, expected %llu, got %llu",
|
||||
logical + SZ_32K, io_stripe.physical);
|
||||
logical + SZ_16K, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len != SZ_32K) {
|
||||
if (len != SZ_48K) {
|
||||
test_err("invalid stripe length, expected %llu, got %llu",
|
||||
(u64)SZ_32K, len);
|
||||
(u64)SZ_48K, len);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical, &len, map_type, 0, &io_stripe);
|
||||
if (!ret) {
|
||||
if (ret != -ENODATA) {
|
||||
ret = -EINVAL;
|
||||
test_err("lookup of RAID extent [%llu, %llu] succeeded, should fail",
|
||||
logical, logical + SZ_32K);
|
||||
logical, logical + SZ_16K);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical + SZ_32K, SZ_32K);
|
||||
ret = btrfs_delete_raid_extent(trans, logical + SZ_16K, SZ_48K);
|
||||
out:
|
||||
btrfs_put_bioc(bioc);
|
||||
return ret;
|
||||
|
@ -209,14 +818,14 @@ static int test_tail_delete(struct btrfs_trans_handle *trans)
|
|||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical + SZ_32K, SZ_32K);
|
||||
ret = btrfs_delete_raid_extent(trans, logical + SZ_48K, SZ_16K);
|
||||
if (ret) {
|
||||
test_err("deleting RAID extent [%llu, %llu] failed",
|
||||
logical + SZ_32K, logical + SZ_64K);
|
||||
logical + SZ_48K, logical + SZ_64K);
|
||||
goto out;
|
||||
}
|
||||
|
||||
len = SZ_32K;
|
||||
len = SZ_48K;
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical, &len, map_type, 0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed", logical,
|
||||
|
@ -231,9 +840,19 @@ static int test_tail_delete(struct btrfs_trans_handle *trans)
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (len != SZ_32K) {
|
||||
if (len != SZ_48K) {
|
||||
test_err("invalid stripe length, expected %llu, got %llu",
|
||||
(u64)SZ_32K, len);
|
||||
(u64)SZ_48K, len);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
len = SZ_16K;
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical + SZ_48K, &len,
|
||||
map_type, 0, &io_stripe);
|
||||
if (ret != -ENODATA) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] succeeded should fail",
|
||||
logical + SZ_48K, logical + SZ_64K);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
@ -456,6 +1075,10 @@ static const test_func_t tests[] = {
|
|||
test_create_update_delete,
|
||||
test_tail_delete,
|
||||
test_front_delete,
|
||||
test_front_delete_prev_item,
|
||||
test_punch_hole,
|
||||
test_punch_hole_3extents,
|
||||
test_delete_two_extents,
|
||||
};
|
||||
|
||||
static int run_test(test_func_t test, u32 sectorsize, u32 nodesize)
|
||||
|
@ -478,8 +1101,8 @@ static int run_test(test_func_t test, u32 sectorsize, u32 nodesize)
|
|||
ret = PTR_ERR(root);
|
||||
goto out;
|
||||
}
|
||||
btrfs_set_super_compat_ro_flags(root->fs_info->super_copy,
|
||||
BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE);
|
||||
btrfs_set_super_incompat_flags(root->fs_info->super_copy,
|
||||
BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE);
|
||||
root->root_key.objectid = BTRFS_RAID_STRIPE_TREE_OBJECTID;
|
||||
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
root->root_key.offset = 0;
|
||||
|
|
|
@ -795,8 +795,7 @@ alloc_fail:
|
|||
if (num_bytes)
|
||||
btrfs_block_rsv_release(fs_info, trans_rsv, num_bytes, NULL);
|
||||
if (delayed_refs_bytes)
|
||||
btrfs_space_info_free_bytes_may_use(fs_info, trans_rsv->space_info,
|
||||
delayed_refs_bytes);
|
||||
btrfs_space_info_free_bytes_may_use(trans_rsv->space_info, delayed_refs_bytes);
|
||||
reserve_fail:
|
||||
btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
|
||||
return ERR_PTR(ret);
|
||||
|
|
|
@ -227,7 +227,21 @@ static inline void btrfs_clear_skip_qgroup(struct btrfs_trans_handle *trans)
|
|||
delayed_refs->qgroup_to_skip = 0;
|
||||
}
|
||||
|
||||
bool __cold abort_should_print_stack(int error);
|
||||
/*
|
||||
* We want the transaction abort to print stack trace only for errors where the
|
||||
* cause could be a bug, eg. due to ENOSPC, and not for common errors that are
|
||||
* caused by external factors.
|
||||
*/
|
||||
static inline bool btrfs_abort_should_print_stack(int error)
|
||||
{
|
||||
switch (error) {
|
||||
case -EIO:
|
||||
case -EROFS:
|
||||
case -ENOMEM:
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Call btrfs_abort_transaction as early as possible when an error condition is
|
||||
|
@ -240,7 +254,7 @@ do { \
|
|||
if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
|
||||
&((trans)->fs_info->fs_state))) { \
|
||||
__first = true; \
|
||||
if (WARN(abort_should_print_stack(error), \
|
||||
if (WARN(btrfs_abort_should_print_stack(error), \
|
||||
KERN_ERR \
|
||||
"BTRFS: Transaction aborted (error %d)\n", \
|
||||
(error))) { \
|
||||
|
|
|
@ -764,22 +764,19 @@ static int check_block_group_item(struct extent_buffer *leaf,
|
|||
return 0;
|
||||
}
|
||||
|
||||
__printf(4, 5)
|
||||
__printf(5, 6)
|
||||
__cold
|
||||
static void chunk_err(const struct extent_buffer *leaf,
|
||||
static void chunk_err(const struct btrfs_fs_info *fs_info,
|
||||
const struct extent_buffer *leaf,
|
||||
const struct btrfs_chunk *chunk, u64 logical,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
const struct btrfs_fs_info *fs_info = leaf->fs_info;
|
||||
bool is_sb;
|
||||
bool is_sb = !leaf;
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
int i;
|
||||
int slot = -1;
|
||||
|
||||
/* Only superblock eb is able to have such small offset */
|
||||
is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET);
|
||||
|
||||
if (!is_sb) {
|
||||
/*
|
||||
* Get the slot number by iterating through all slots, this
|
||||
|
@ -812,13 +809,17 @@ static void chunk_err(const struct extent_buffer *leaf,
|
|||
/*
|
||||
* The common chunk check which could also work on super block sys chunk array.
|
||||
*
|
||||
* If @leaf is NULL, then @chunk must be an on-stack chunk item.
|
||||
* (For superblock sys_chunk array, and fs_info->sectorsize is unreliable)
|
||||
*
|
||||
* Return -EUCLEAN if anything is corrupted.
|
||||
* Return 0 if everything is OK.
|
||||
*/
|
||||
int btrfs_check_chunk_valid(struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk, u64 logical)
|
||||
int btrfs_check_chunk_valid(const struct btrfs_fs_info *fs_info,
|
||||
const struct extent_buffer *leaf,
|
||||
const struct btrfs_chunk *chunk, u64 logical,
|
||||
u32 sectorsize)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = leaf->fs_info;
|
||||
u64 length;
|
||||
u64 chunk_end;
|
||||
u64 stripe_len;
|
||||
|
@ -826,63 +827,73 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
|
|||
u16 sub_stripes;
|
||||
u64 type;
|
||||
u64 features;
|
||||
u32 chunk_sector_size;
|
||||
bool mixed = false;
|
||||
int raid_index;
|
||||
int nparity;
|
||||
int ncopies;
|
||||
|
||||
length = btrfs_chunk_length(leaf, chunk);
|
||||
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
|
||||
num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
|
||||
sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
|
||||
type = btrfs_chunk_type(leaf, chunk);
|
||||
if (leaf) {
|
||||
length = btrfs_chunk_length(leaf, chunk);
|
||||
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
|
||||
num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
|
||||
sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
|
||||
type = btrfs_chunk_type(leaf, chunk);
|
||||
chunk_sector_size = btrfs_chunk_sector_size(leaf, chunk);
|
||||
} else {
|
||||
length = btrfs_stack_chunk_length(chunk);
|
||||
stripe_len = btrfs_stack_chunk_stripe_len(chunk);
|
||||
num_stripes = btrfs_stack_chunk_num_stripes(chunk);
|
||||
sub_stripes = btrfs_stack_chunk_sub_stripes(chunk);
|
||||
type = btrfs_stack_chunk_type(chunk);
|
||||
chunk_sector_size = btrfs_stack_chunk_sector_size(chunk);
|
||||
}
|
||||
raid_index = btrfs_bg_flags_to_raid_index(type);
|
||||
ncopies = btrfs_raid_array[raid_index].ncopies;
|
||||
nparity = btrfs_raid_array[raid_index].nparity;
|
||||
|
||||
if (unlikely(!num_stripes)) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"invalid chunk num_stripes, have %u", num_stripes);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (unlikely(num_stripes < ncopies)) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"invalid chunk num_stripes < ncopies, have %u < %d",
|
||||
num_stripes, ncopies);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (unlikely(nparity && num_stripes == nparity)) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"invalid chunk num_stripes == nparity, have %u == %d",
|
||||
num_stripes, nparity);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (unlikely(!IS_ALIGNED(logical, fs_info->sectorsize))) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
if (unlikely(!IS_ALIGNED(logical, sectorsize))) {
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"invalid chunk logical, have %llu should aligned to %u",
|
||||
logical, fs_info->sectorsize);
|
||||
logical, sectorsize);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (unlikely(btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize)) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
if (unlikely(chunk_sector_size != sectorsize)) {
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"invalid chunk sectorsize, have %u expect %u",
|
||||
btrfs_chunk_sector_size(leaf, chunk),
|
||||
fs_info->sectorsize);
|
||||
chunk_sector_size, sectorsize);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (unlikely(!length || !IS_ALIGNED(length, fs_info->sectorsize))) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
if (unlikely(!length || !IS_ALIGNED(length, sectorsize))) {
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"invalid chunk length, have %llu", length);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (unlikely(check_add_overflow(logical, length, &chunk_end))) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"invalid chunk logical start and length, have logical start %llu length %llu",
|
||||
logical, length);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (unlikely(!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN)) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"invalid chunk stripe length: %llu",
|
||||
stripe_len);
|
||||
return -EUCLEAN;
|
||||
|
@ -896,30 +907,29 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
|
|||
* Thus it should be a good way to catch obvious bitflips.
|
||||
*/
|
||||
if (unlikely(length >= btrfs_stripe_nr_to_offset(U32_MAX))) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"chunk length too large: have %llu limit %llu",
|
||||
length, btrfs_stripe_nr_to_offset(U32_MAX));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (unlikely(type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
|
||||
BTRFS_BLOCK_GROUP_PROFILE_MASK))) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"unrecognized chunk type: 0x%llx",
|
||||
~(BTRFS_BLOCK_GROUP_TYPE_MASK |
|
||||
BTRFS_BLOCK_GROUP_PROFILE_MASK) &
|
||||
btrfs_chunk_type(leaf, chunk));
|
||||
BTRFS_BLOCK_GROUP_PROFILE_MASK) & type);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
if (unlikely(!has_single_bit_set(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
|
||||
(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0)) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
|
||||
type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (unlikely((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"missing chunk type flag, have 0x%llx one bit must be set in 0x%llx",
|
||||
type, BTRFS_BLOCK_GROUP_TYPE_MASK);
|
||||
return -EUCLEAN;
|
||||
|
@ -928,7 +938,7 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
|
|||
if (unlikely((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
|
||||
(type & (BTRFS_BLOCK_GROUP_METADATA |
|
||||
BTRFS_BLOCK_GROUP_DATA)))) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"system chunk with data or metadata type: 0x%llx",
|
||||
type);
|
||||
return -EUCLEAN;
|
||||
|
@ -941,7 +951,7 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
|
|||
if (!mixed) {
|
||||
if (unlikely((type & BTRFS_BLOCK_GROUP_METADATA) &&
|
||||
(type & BTRFS_BLOCK_GROUP_DATA))) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"mixed chunk type in non-mixed mode: 0x%llx", type);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
@ -963,7 +973,7 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
|
|||
num_stripes != btrfs_raid_array[BTRFS_RAID_DUP].dev_stripes) ||
|
||||
((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
|
||||
num_stripes != btrfs_raid_array[BTRFS_RAID_SINGLE].dev_stripes))) {
|
||||
chunk_err(leaf, chunk, logical,
|
||||
chunk_err(fs_info, leaf, chunk, logical,
|
||||
"invalid num_stripes:sub_stripes %u:%u for profile %llu",
|
||||
num_stripes, sub_stripes,
|
||||
type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
|
||||
|
@ -983,14 +993,15 @@ static int check_leaf_chunk_item(struct extent_buffer *leaf,
|
|||
struct btrfs_chunk *chunk,
|
||||
struct btrfs_key *key, int slot)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = leaf->fs_info;
|
||||
int num_stripes;
|
||||
|
||||
if (unlikely(btrfs_item_size(leaf, slot) < sizeof(struct btrfs_chunk))) {
|
||||
chunk_err(leaf, chunk, key->offset,
|
||||
chunk_err(fs_info, leaf, chunk, key->offset,
|
||||
"invalid chunk item size: have %u expect [%zu, %u)",
|
||||
btrfs_item_size(leaf, slot),
|
||||
sizeof(struct btrfs_chunk),
|
||||
BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
|
||||
BTRFS_LEAF_DATA_SIZE(fs_info));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
|
@ -1001,14 +1012,15 @@ static int check_leaf_chunk_item(struct extent_buffer *leaf,
|
|||
|
||||
if (unlikely(btrfs_chunk_item_size(num_stripes) !=
|
||||
btrfs_item_size(leaf, slot))) {
|
||||
chunk_err(leaf, chunk, key->offset,
|
||||
chunk_err(fs_info, leaf, chunk, key->offset,
|
||||
"invalid chunk item size: have %u expect %lu",
|
||||
btrfs_item_size(leaf, slot),
|
||||
btrfs_chunk_item_size(num_stripes));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
out:
|
||||
return btrfs_check_chunk_valid(leaf, chunk, key->offset);
|
||||
return btrfs_check_chunk_valid(fs_info, leaf, chunk, key->offset,
|
||||
fs_info->sectorsize);
|
||||
}
|
||||
|
||||
__printf(3, 4)
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <uapi/linux/btrfs_tree.h>
|
||||
|
||||
struct extent_buffer;
|
||||
struct btrfs_fs_info;
|
||||
struct btrfs_chunk;
|
||||
struct btrfs_key;
|
||||
|
||||
|
@ -66,8 +67,10 @@ enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node);
|
|||
int btrfs_check_leaf(struct extent_buffer *leaf);
|
||||
int btrfs_check_node(struct extent_buffer *node);
|
||||
|
||||
int btrfs_check_chunk_valid(struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk, u64 logical);
|
||||
int btrfs_check_chunk_valid(const struct btrfs_fs_info *fs_info,
|
||||
const struct extent_buffer *leaf,
|
||||
const struct btrfs_chunk *chunk, u64 logical,
|
||||
u32 sectorsize);
|
||||
int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner);
|
||||
int btrfs_verify_level_key(struct extent_buffer *eb,
|
||||
const struct btrfs_tree_parent_check *check);
|
||||
|
|
|
@ -590,7 +590,6 @@ insert:
|
|||
}
|
||||
}
|
||||
no_copy:
|
||||
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
|
||||
btrfs_release_path(path);
|
||||
return 0;
|
||||
}
|
||||
|
@ -3588,7 +3587,6 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
|
|||
last_offset = max(last_offset, curr_end);
|
||||
}
|
||||
btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
|
||||
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
|
||||
btrfs_release_path(path);
|
||||
return 0;
|
||||
}
|
||||
|
@ -4566,7 +4564,6 @@ copy_item:
|
|||
dst_index++;
|
||||
}
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, dst_path->nodes[0]);
|
||||
btrfs_release_path(dst_path);
|
||||
out:
|
||||
kfree(ins_data);
|
||||
|
@ -4776,7 +4773,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
|
|||
write_extent_buffer(leaf, &fi,
|
||||
btrfs_item_ptr_offset(leaf, path->slots[0]),
|
||||
sizeof(fi));
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
|
||||
btrfs_release_path(path);
|
||||
|
||||
|
|
|
@ -140,8 +140,6 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, const u8 *uuid, u8 typ
|
|||
ret = 0;
|
||||
subid_le = cpu_to_le64(subid_cpu);
|
||||
write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le));
|
||||
btrfs_mark_buffer_dirty(trans, eb);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
|
|
@ -13,8 +13,8 @@
|
|||
#include <linux/list_sort.h>
|
||||
#include <linux/namei.h>
|
||||
#include "misc.h"
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "extent-tree.h"
|
||||
#include "transaction.h"
|
||||
#include "volumes.h"
|
||||
#include "raid56.h"
|
||||
|
@ -48,6 +48,7 @@ struct btrfs_io_geometry {
|
|||
u64 raid56_full_stripe_start;
|
||||
int max_errors;
|
||||
enum btrfs_map_op op;
|
||||
bool use_rst;
|
||||
};
|
||||
|
||||
const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
||||
|
@ -1302,6 +1303,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
|
|||
struct btrfs_device *device;
|
||||
struct btrfs_device *latest_dev = NULL;
|
||||
struct btrfs_device *tmp_device;
|
||||
s64 __maybe_unused value = 0;
|
||||
int ret = 0;
|
||||
|
||||
list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
|
||||
|
@ -1331,7 +1333,23 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
|
|||
fs_devices->latest_dev = latest_dev;
|
||||
fs_devices->total_rw_bytes = 0;
|
||||
fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
fs_devices->rr_min_contig_read = BTRFS_DEFAULT_RR_MIN_CONTIG_READ;
|
||||
fs_devices->read_devid = latest_dev->devid;
|
||||
fs_devices->read_policy = btrfs_read_policy_to_enum(btrfs_get_mod_read_policy(),
|
||||
&value);
|
||||
if (fs_devices->read_policy == BTRFS_READ_POLICY_RR)
|
||||
fs_devices->collect_fs_stats = true;
|
||||
|
||||
if (value) {
|
||||
if (fs_devices->read_policy == BTRFS_READ_POLICY_RR)
|
||||
fs_devices->rr_min_contig_read = value;
|
||||
if (fs_devices->read_policy == BTRFS_READ_POLICY_DEVID)
|
||||
fs_devices->read_devid = value;
|
||||
}
|
||||
#else
|
||||
fs_devices->read_policy = BTRFS_READ_POLICY_PID;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -2049,7 +2067,6 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
|
|||
ptr = btrfs_device_fsid(dev_item);
|
||||
write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid,
|
||||
ptr, BTRFS_FSID_SIZE);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
|
@ -2745,11 +2762,9 @@ next_slot:
|
|||
device = btrfs_find_device(fs_info->fs_devices, &args);
|
||||
BUG_ON(!device); /* Logic error */
|
||||
|
||||
if (device->fs_devices->seeding) {
|
||||
if (device->fs_devices->seeding)
|
||||
btrfs_set_device_generation(leaf, dev_item,
|
||||
device->generation);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
}
|
||||
|
||||
path->slots[0]++;
|
||||
goto next_slot;
|
||||
|
@ -3042,8 +3057,6 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
|
|||
btrfs_device_get_disk_total_bytes(device));
|
||||
btrfs_set_device_bytes_used(leaf, dev_item,
|
||||
btrfs_device_get_bytes_used(device));
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
@ -3752,10 +3765,7 @@ static int insert_balance_item(struct btrfs_fs_info *fs_info,
|
|||
btrfs_set_balance_meta(leaf, item, &disk_bargs);
|
||||
btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
|
||||
btrfs_set_balance_sys(leaf, item, &disk_bargs);
|
||||
|
||||
btrfs_set_balance_flags(leaf, item, bctl->flags);
|
||||
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
err = btrfs_commit_transaction(trans);
|
||||
|
@ -5517,33 +5527,34 @@ void btrfs_remove_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_ma
|
|||
btrfs_free_chunk_map(map);
|
||||
}
|
||||
|
||||
static int btrfs_chunk_map_cmp(const struct rb_node *new,
|
||||
const struct rb_node *exist)
|
||||
{
|
||||
const struct btrfs_chunk_map *new_map =
|
||||
rb_entry(new, struct btrfs_chunk_map, rb_node);
|
||||
const struct btrfs_chunk_map *exist_map =
|
||||
rb_entry(exist, struct btrfs_chunk_map, rb_node);
|
||||
|
||||
if (new_map->start == exist_map->start)
|
||||
return 0;
|
||||
if (new_map->start < exist_map->start)
|
||||
return -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
EXPORT_FOR_TESTS
|
||||
int btrfs_add_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map)
|
||||
{
|
||||
struct rb_node **p;
|
||||
struct rb_node *parent = NULL;
|
||||
bool leftmost = true;
|
||||
struct rb_node *exist;
|
||||
|
||||
write_lock(&fs_info->mapping_tree_lock);
|
||||
p = &fs_info->mapping_tree.rb_root.rb_node;
|
||||
while (*p) {
|
||||
struct btrfs_chunk_map *entry;
|
||||
exist = rb_find_add_cached(&map->rb_node, &fs_info->mapping_tree,
|
||||
btrfs_chunk_map_cmp);
|
||||
|
||||
parent = *p;
|
||||
entry = rb_entry(parent, struct btrfs_chunk_map, rb_node);
|
||||
|
||||
if (map->start < entry->start) {
|
||||
p = &(*p)->rb_left;
|
||||
} else if (map->start > entry->start) {
|
||||
p = &(*p)->rb_right;
|
||||
leftmost = false;
|
||||
} else {
|
||||
write_unlock(&fs_info->mapping_tree_lock);
|
||||
return -EEXIST;
|
||||
}
|
||||
if (exist) {
|
||||
write_unlock(&fs_info->mapping_tree_lock);
|
||||
return -EEXIST;
|
||||
}
|
||||
rb_link_node(&map->rb_node, parent, p);
|
||||
rb_insert_color_cached(&map->rb_node, &fs_info->mapping_tree, leftmost);
|
||||
chunk_map_device_set_bits(map, CHUNK_ALLOCATED);
|
||||
chunk_map_device_clear_bits(map, CHUNK_TRIMMED);
|
||||
write_unlock(&fs_info->mapping_tree_lock);
|
||||
|
@ -5963,6 +5974,76 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
|
|||
return len;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
static int btrfs_read_preferred(struct btrfs_chunk_map *map, int first, int num_stripes)
|
||||
{
|
||||
for (int index = first; index < first + num_stripes; index++) {
|
||||
const struct btrfs_device *device = map->stripes[index].dev;
|
||||
|
||||
if (device->devid == READ_ONCE(device->fs_devices->read_devid))
|
||||
return index;
|
||||
}
|
||||
|
||||
/* If no read-preferred device is set use the first stripe. */
|
||||
return first;
|
||||
}
|
||||
|
||||
struct stripe_mirror {
|
||||
u64 devid;
|
||||
int num;
|
||||
};
|
||||
|
||||
static int btrfs_cmp_devid(const void *a, const void *b)
|
||||
{
|
||||
const struct stripe_mirror *s1 = (const struct stripe_mirror *)a;
|
||||
const struct stripe_mirror *s2 = (const struct stripe_mirror *)b;
|
||||
|
||||
if (s1->devid < s2->devid)
|
||||
return -1;
|
||||
if (s1->devid > s2->devid)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Select a stripe for reading using the round-robin algorithm.
|
||||
*
|
||||
* 1. Compute the read cycle as the total sectors read divided by the minimum
|
||||
* sectors per device.
|
||||
* 2. Determine the stripe number for the current read by taking the modulus
|
||||
* of the read cycle with the total number of stripes:
|
||||
*
|
||||
* stripe index = (total sectors / min sectors per dev) % num stripes
|
||||
*
|
||||
* The calculated stripe index is then used to select the corresponding device
|
||||
* from the list of devices, which is ordered by devid.
|
||||
*/
|
||||
static int btrfs_read_rr(const struct btrfs_chunk_map *map, int first, int num_stripes)
|
||||
{
|
||||
struct stripe_mirror stripes[BTRFS_RAID1_MAX_MIRRORS] = { 0 };
|
||||
struct btrfs_device *device = map->stripes[first].dev;
|
||||
struct btrfs_fs_info *fs_info = device->fs_devices->fs_info;
|
||||
unsigned int read_cycle;
|
||||
unsigned int total_reads;
|
||||
unsigned int min_reads_per_dev;
|
||||
|
||||
total_reads = percpu_counter_sum(&fs_info->stats_read_blocks);
|
||||
min_reads_per_dev = READ_ONCE(fs_info->fs_devices->rr_min_contig_read) >>
|
||||
fs_info->sectorsize_bits;
|
||||
|
||||
for (int index = 0, i = first; i < first + num_stripes; i++) {
|
||||
stripes[index].devid = map->stripes[i].dev->devid;
|
||||
stripes[index].num = i;
|
||||
index++;
|
||||
}
|
||||
sort(stripes, num_stripes, sizeof(struct stripe_mirror),
|
||||
btrfs_cmp_devid, NULL);
|
||||
|
||||
read_cycle = total_reads / min_reads_per_dev;
|
||||
return stripes[read_cycle % num_stripes].num;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int find_live_mirror(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_chunk_map *map, int first,
|
||||
int dev_replace_is_ongoing)
|
||||
|
@ -5992,6 +6073,14 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
|
|||
case BTRFS_READ_POLICY_PID:
|
||||
preferred_mirror = first + (current->pid % num_stripes);
|
||||
break;
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
case BTRFS_READ_POLICY_RR:
|
||||
preferred_mirror = btrfs_read_rr(map, first, num_stripes);
|
||||
break;
|
||||
case BTRFS_READ_POLICY_DEVID:
|
||||
preferred_mirror = btrfs_read_preferred(map, first, num_stripes);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (dev_replace_is_ongoing &&
|
||||
|
@ -6350,8 +6439,7 @@ static int set_io_stripe(struct btrfs_fs_info *fs_info, u64 logical,
|
|||
{
|
||||
dst->dev = map->stripes[io_geom->stripe_index].dev;
|
||||
|
||||
if (io_geom->op == BTRFS_MAP_READ &&
|
||||
btrfs_need_stripe_tree_update(fs_info, map->type))
|
||||
if (io_geom->op == BTRFS_MAP_READ && io_geom->use_rst)
|
||||
return btrfs_get_raid_extent_offset(fs_info, logical, length,
|
||||
map->type,
|
||||
io_geom->stripe_index, dst);
|
||||
|
@ -6366,7 +6454,7 @@ static bool is_single_device_io(struct btrfs_fs_info *fs_info,
|
|||
const struct btrfs_io_stripe *smap,
|
||||
const struct btrfs_chunk_map *map,
|
||||
int num_alloc_stripes,
|
||||
enum btrfs_map_op op, int mirror_num)
|
||||
struct btrfs_io_geometry *io_geom)
|
||||
{
|
||||
if (!smap)
|
||||
return false;
|
||||
|
@ -6374,10 +6462,10 @@ static bool is_single_device_io(struct btrfs_fs_info *fs_info,
|
|||
if (num_alloc_stripes != 1)
|
||||
return false;
|
||||
|
||||
if (btrfs_need_stripe_tree_update(fs_info, map->type) && op != BTRFS_MAP_READ)
|
||||
if (io_geom->use_rst && io_geom->op != BTRFS_MAP_READ)
|
||||
return false;
|
||||
|
||||
if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)
|
||||
if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && io_geom->mirror_num > 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
@ -6583,6 +6671,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
|||
io_geom.raid56_full_stripe_start = (u64)-1;
|
||||
max_len = btrfs_max_io_len(map, map_offset, &io_geom);
|
||||
*length = min_t(u64, map->chunk_len - map_offset, max_len);
|
||||
io_geom.use_rst = btrfs_need_stripe_tree_update(fs_info, map->type);
|
||||
|
||||
if (dev_replace->replace_task != current)
|
||||
down_read(&dev_replace->rwsem);
|
||||
|
@ -6651,8 +6740,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
|||
* physical block information on the stack instead of allocating an
|
||||
* I/O context structure.
|
||||
*/
|
||||
if (is_single_device_io(fs_info, smap, map, num_alloc_stripes, op,
|
||||
io_geom.mirror_num)) {
|
||||
if (is_single_device_io(fs_info, smap, map, num_alloc_stripes, &io_geom)) {
|
||||
ret = set_io_stripe(fs_info, logical, length, smap, map, &io_geom);
|
||||
if (mirror_num_ret)
|
||||
*mirror_num_ret = io_geom.mirror_num;
|
||||
|
@ -6666,6 +6754,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
|||
goto out;
|
||||
}
|
||||
bioc->map_type = map->type;
|
||||
bioc->use_rst = io_geom.use_rst;
|
||||
|
||||
/*
|
||||
* For RAID56 full map, we need to make sure the stripes[] follows the
|
||||
|
@ -7006,16 +7095,6 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
|
|||
warn_32bit_meta_chunk(fs_info, logical, length, type);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Only need to verify chunk item if we're reading from sys chunk array,
|
||||
* as chunk item in tree block is already verified by tree-checker.
|
||||
*/
|
||||
if (leaf->start == BTRFS_SUPER_INFO_OFFSET) {
|
||||
ret = btrfs_check_chunk_valid(leaf, chunk, logical);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
map = btrfs_find_chunk_map(fs_info, logical, 1);
|
||||
|
||||
/* already mapped? */
|
||||
|
@ -7273,16 +7352,11 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
|
|||
{
|
||||
struct btrfs_super_block *super_copy = fs_info->super_copy;
|
||||
struct extent_buffer *sb;
|
||||
struct btrfs_disk_key *disk_key;
|
||||
struct btrfs_chunk *chunk;
|
||||
u8 *array_ptr;
|
||||
unsigned long sb_array_offset;
|
||||
int ret = 0;
|
||||
u32 num_stripes;
|
||||
u32 array_size;
|
||||
u32 len = 0;
|
||||
u32 cur_offset;
|
||||
u64 type;
|
||||
struct btrfs_key key;
|
||||
|
||||
ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize);
|
||||
|
@ -7305,10 +7379,15 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
|
|||
cur_offset = 0;
|
||||
|
||||
while (cur_offset < array_size) {
|
||||
disk_key = (struct btrfs_disk_key *)array_ptr;
|
||||
len = sizeof(*disk_key);
|
||||
if (cur_offset + len > array_size)
|
||||
goto out_short_read;
|
||||
struct btrfs_chunk *chunk;
|
||||
struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)array_ptr;
|
||||
u32 len = sizeof(*disk_key);
|
||||
|
||||
/*
|
||||
* The sys_chunk_array has been already verified at super block
|
||||
* read time. Only do ASSERT()s for basic checks.
|
||||
*/
|
||||
ASSERT(cur_offset + len <= array_size);
|
||||
|
||||
btrfs_disk_key_to_cpu(&key, disk_key);
|
||||
|
||||
|
@ -7316,44 +7395,14 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
|
|||
sb_array_offset += len;
|
||||
cur_offset += len;
|
||||
|
||||
if (key.type != BTRFS_CHUNK_ITEM_KEY) {
|
||||
btrfs_err(fs_info,
|
||||
"unexpected item type %u in sys_array at offset %u",
|
||||
(u32)key.type, cur_offset);
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
ASSERT(key.type == BTRFS_CHUNK_ITEM_KEY);
|
||||
|
||||
chunk = (struct btrfs_chunk *)sb_array_offset;
|
||||
/*
|
||||
* At least one btrfs_chunk with one stripe must be present,
|
||||
* exact stripe count check comes afterwards
|
||||
*/
|
||||
len = btrfs_chunk_item_size(1);
|
||||
if (cur_offset + len > array_size)
|
||||
goto out_short_read;
|
||||
ASSERT(btrfs_chunk_type(sb, chunk) & BTRFS_BLOCK_GROUP_SYSTEM);
|
||||
|
||||
num_stripes = btrfs_chunk_num_stripes(sb, chunk);
|
||||
if (!num_stripes) {
|
||||
btrfs_err(fs_info,
|
||||
"invalid number of stripes %u in sys_array at offset %u",
|
||||
num_stripes, cur_offset);
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
len = btrfs_chunk_item_size(btrfs_chunk_num_stripes(sb, chunk));
|
||||
|
||||
type = btrfs_chunk_type(sb, chunk);
|
||||
if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
|
||||
btrfs_err(fs_info,
|
||||
"invalid chunk type %llu in sys_array at offset %u",
|
||||
type, cur_offset);
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
len = btrfs_chunk_item_size(num_stripes);
|
||||
if (cur_offset + len > array_size)
|
||||
goto out_short_read;
|
||||
ASSERT(cur_offset + len <= array_size);
|
||||
|
||||
ret = read_one_chunk(&key, sb, chunk);
|
||||
if (ret)
|
||||
|
@ -7366,13 +7415,6 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
|
|||
clear_extent_buffer_uptodate(sb);
|
||||
free_extent_buffer_stale(sb);
|
||||
return ret;
|
||||
|
||||
out_short_read:
|
||||
btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u",
|
||||
len, cur_offset);
|
||||
clear_extent_buffer_uptodate(sb);
|
||||
free_extent_buffer_stale(sb);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -7572,8 +7614,6 @@ int btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
|
|||
struct btrfs_device *device;
|
||||
int ret = 0;
|
||||
|
||||
fs_devices->fs_info = fs_info;
|
||||
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
list_for_each_entry(device, &fs_devices->devices, dev_list)
|
||||
device->fs_info = fs_info;
|
||||
|
@ -7749,8 +7789,6 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
|
|||
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
|
||||
btrfs_set_dev_stats_value(eb, ptr, i,
|
||||
btrfs_dev_stat_read(device, i));
|
||||
btrfs_mark_buffer_dirty(trans, eb);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
|
|
@ -296,6 +296,9 @@ enum btrfs_chunk_allocation_policy {
|
|||
BTRFS_CHUNK_ALLOC_ZONED,
|
||||
};
|
||||
|
||||
#define BTRFS_DEFAULT_RR_MIN_CONTIG_READ (SZ_256K)
|
||||
/* Keep in sync with raid_attr table, current maximum is RAID1C4. */
|
||||
#define BTRFS_RAID1_MAX_MIRRORS (4)
|
||||
/*
|
||||
* Read policies for mirrored block group profiles, read picks the stripe based
|
||||
* on these policies.
|
||||
|
@ -303,6 +306,12 @@ enum btrfs_chunk_allocation_policy {
|
|||
enum btrfs_read_policy {
|
||||
/* Use process PID to choose the stripe */
|
||||
BTRFS_READ_POLICY_PID,
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
/* Balancing RAID1 reads across all striped devices (round-robin). */
|
||||
BTRFS_READ_POLICY_RR,
|
||||
/* Read from a specific device. */
|
||||
BTRFS_READ_POLICY_DEVID,
|
||||
#endif
|
||||
BTRFS_NR_READ_POLICY,
|
||||
};
|
||||
|
||||
|
@ -417,6 +426,8 @@ struct btrfs_fs_devices {
|
|||
bool seeding;
|
||||
/* The mount needs to use a randomly generated fsid. */
|
||||
bool temp_fsid;
|
||||
/* Enable/disable the filesystem stats tracking. */
|
||||
bool collect_fs_stats;
|
||||
|
||||
struct btrfs_fs_info *fs_info;
|
||||
/* sysfs kobjects */
|
||||
|
@ -431,6 +442,15 @@ struct btrfs_fs_devices {
|
|||
enum btrfs_read_policy read_policy;
|
||||
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
/*
|
||||
* Minimum contiguous reads before switching to next device, the unit
|
||||
* is one block/sectorsize.
|
||||
*/
|
||||
u32 rr_min_contig_read;
|
||||
|
||||
/* Device to be used for reading in case of RAID1. */
|
||||
u64 read_devid;
|
||||
|
||||
/* Checksum mode - offload it or do it synchronously. */
|
||||
enum btrfs_offload_csum_mode offload_csum_mode;
|
||||
#endif
|
||||
|
@ -485,6 +505,7 @@ struct btrfs_io_context {
|
|||
struct bio *orig_bio;
|
||||
atomic_t error;
|
||||
u16 max_errors;
|
||||
bool use_rst;
|
||||
|
||||
u64 logical;
|
||||
u64 size;
|
||||
|
|
|
@ -204,7 +204,6 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
|
|||
btrfs_set_dir_data_len(leaf, di, size);
|
||||
data_ptr = ((unsigned long)(di + 1)) + name_len;
|
||||
write_extent_buffer(leaf, value, data_ptr, size);
|
||||
btrfs_mark_buffer_dirty(trans, leaf);
|
||||
} else {
|
||||
/*
|
||||
* Insert, and we had space for the xattr, so path->slots[0] is
|
||||
|
|
124
fs/btrfs/zoned.c
124
fs/btrfs/zoned.c
|
@ -2652,3 +2652,127 @@ void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info)
|
|||
}
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset the zones of unused block groups from @space_info->bytes_zone_unusable.
|
||||
*
|
||||
* @space_info: the space to work on
|
||||
* @num_bytes: targeting reclaim bytes
|
||||
*
|
||||
* This one resets the zones of a block group, so we can reuse the region
|
||||
* without removing the block group. On the other hand, btrfs_delete_unused_bgs()
|
||||
* just removes a block group and frees up the underlying zones. So, we still
|
||||
* need to allocate a new block group to reuse the zones.
|
||||
*
|
||||
* Resetting is faster than deleting/recreating a block group. It is similar
|
||||
* to freeing the logical space on the regular mode. However, we cannot change
|
||||
* the block group's profile with this operation.
|
||||
*/
|
||||
int btrfs_reset_unused_block_groups(struct btrfs_space_info *space_info, u64 num_bytes)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = space_info->fs_info;
|
||||
const sector_t zone_size_sectors = fs_info->zone_size >> SECTOR_SHIFT;
|
||||
|
||||
if (!btrfs_is_zoned(fs_info))
|
||||
return 0;
|
||||
|
||||
while (num_bytes > 0) {
|
||||
struct btrfs_chunk_map *map;
|
||||
struct btrfs_block_group *bg = NULL;
|
||||
bool found = false;
|
||||
u64 reclaimed = 0;
|
||||
|
||||
/*
|
||||
* Here, we choose a fully zone_unusable block group. It's
|
||||
* technically possible to reset a partly zone_unusable block
|
||||
* group, which still has some free space left. However,
|
||||
* handling that needs to cope with the allocation side, which
|
||||
* makes the logic more complex. So, let's handle the easy case
|
||||
* for now.
|
||||
*/
|
||||
spin_lock(&fs_info->unused_bgs_lock);
|
||||
list_for_each_entry(bg, &fs_info->unused_bgs, bg_list) {
|
||||
if ((bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != space_info->flags)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Use trylock to avoid locking order violation. In
|
||||
* btrfs_reclaim_bgs_work(), the lock order is
|
||||
* &bg->lock -> &fs_info->unused_bgs_lock. We skip a
|
||||
* block group if we cannot take its lock.
|
||||
*/
|
||||
if (!spin_trylock(&bg->lock))
|
||||
continue;
|
||||
if (btrfs_is_block_group_used(bg) || bg->zone_unusable < bg->length) {
|
||||
spin_unlock(&bg->lock);
|
||||
continue;
|
||||
}
|
||||
spin_unlock(&bg->lock);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
if (!found) {
|
||||
spin_unlock(&fs_info->unused_bgs_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
list_del_init(&bg->bg_list);
|
||||
btrfs_put_block_group(bg);
|
||||
spin_unlock(&fs_info->unused_bgs_lock);
|
||||
|
||||
/*
|
||||
* Since the block group is fully zone_unusable and we cannot
|
||||
* allocate from this block group anymore, we don't need to set
|
||||
* this block group read-only.
|
||||
*/
|
||||
|
||||
down_read(&fs_info->dev_replace.rwsem);
|
||||
map = bg->physical_map;
|
||||
for (int i = 0; i < map->num_stripes; i++) {
|
||||
struct btrfs_io_stripe *stripe = &map->stripes[i];
|
||||
unsigned int nofs_flags;
|
||||
int ret;
|
||||
|
||||
nofs_flags = memalloc_nofs_save();
|
||||
ret = blkdev_zone_mgmt(stripe->dev->bdev, REQ_OP_ZONE_RESET,
|
||||
stripe->physical >> SECTOR_SHIFT,
|
||||
zone_size_sectors);
|
||||
memalloc_nofs_restore(nofs_flags);
|
||||
|
||||
if (ret) {
|
||||
up_read(&fs_info->dev_replace.rwsem);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
up_read(&fs_info->dev_replace.rwsem);
|
||||
|
||||
spin_lock(&space_info->lock);
|
||||
spin_lock(&bg->lock);
|
||||
ASSERT(!btrfs_is_block_group_used(bg));
|
||||
if (bg->ro) {
|
||||
spin_unlock(&bg->lock);
|
||||
spin_unlock(&space_info->lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
reclaimed = bg->alloc_offset;
|
||||
bg->zone_unusable = bg->length - bg->zone_capacity;
|
||||
bg->alloc_offset = 0;
|
||||
/*
|
||||
* This holds because we currently reset fully used then freed
|
||||
* block group.
|
||||
*/
|
||||
ASSERT(reclaimed == bg->zone_capacity);
|
||||
bg->free_space_ctl->free_space += reclaimed;
|
||||
space_info->bytes_zone_unusable -= reclaimed;
|
||||
spin_unlock(&bg->lock);
|
||||
btrfs_return_free_space(space_info, reclaimed);
|
||||
spin_unlock(&space_info->lock);
|
||||
|
||||
if (num_bytes <= reclaimed)
|
||||
break;
|
||||
num_bytes -= reclaimed;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -96,6 +96,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
|
|||
int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info, bool do_finish);
|
||||
void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_reset_unused_block_groups(struct btrfs_space_info *space_info, u64 num_bytes);
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
|
||||
static inline int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info)
|
||||
|
@ -265,6 +266,12 @@ static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
|
|||
|
||||
static inline void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { }
|
||||
|
||||
static inline int btrfs_reset_unused_block_groups(struct btrfs_space_info *space_info,
|
||||
u64 num_bytes)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
|
||||
|
|
|
@ -210,6 +210,43 @@ rb_add(struct rb_node *node, struct rb_root *tree,
|
|||
rb_insert_color(node, tree);
|
||||
}
|
||||
|
||||
/**
|
||||
* rb_find_add_cached() - find equivalent @node in @tree, or add @node
|
||||
* @node: node to look-for / insert
|
||||
* @tree: tree to search / modify
|
||||
* @cmp: operator defining the node order
|
||||
*
|
||||
* Returns the rb_node matching @node, or NULL when no match is found and @node
|
||||
* is inserted.
|
||||
*/
|
||||
static __always_inline struct rb_node *
|
||||
rb_find_add_cached(struct rb_node *node, struct rb_root_cached *tree,
|
||||
int (*cmp)(const struct rb_node *new, const struct rb_node *exist))
|
||||
{
|
||||
bool leftmost = true;
|
||||
struct rb_node **link = &tree->rb_root.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
int c;
|
||||
|
||||
while (*link) {
|
||||
parent = *link;
|
||||
c = cmp(node, parent);
|
||||
|
||||
if (c < 0) {
|
||||
link = &parent->rb_left;
|
||||
} else if (c > 0) {
|
||||
link = &parent->rb_right;
|
||||
leftmost = false;
|
||||
} else {
|
||||
return parent;
|
||||
}
|
||||
}
|
||||
|
||||
rb_link_node(node, parent, link);
|
||||
rb_insert_color_cached(node, tree, leftmost);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* rb_find_add() - find equivalent @node in @tree, or add @node
|
||||
* @node: node to look-for / insert
|
||||
|
|
|
@ -100,7 +100,8 @@ struct find_free_extent_ctl;
|
|||
EM( ALLOC_CHUNK, "ALLOC_CHUNK") \
|
||||
EM( ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE") \
|
||||
EM( RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS") \
|
||||
EMe(COMMIT_TRANS, "COMMIT_TRANS")
|
||||
EM( COMMIT_TRANS, "COMMIT_TRANS") \
|
||||
EMe(RESET_ZONES, "RESET_ZONES")
|
||||
|
||||
/*
|
||||
* First define the enums in the above macros to be exported to userspace via
|
||||
|
|
Loading…
Reference in New Issue
Block a user