mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2026-01-27 12:47:24 +01:00
Currently, when building a free space tree at populate_free_space_tree(),
if we are not using the block group tree feature, we always expect to find
block group items (either extent items or a block group item with key type
BTRFS_BLOCK_GROUP_ITEM_KEY) when we search the extent tree with
btrfs_search_slot_for_read(), so we assert that we found an item. However
this expectation is wrong since we can have a new block group created in
the current transaction which is still empty and for which we still have
not added the block group's item to the extent tree, in which case we do
not have any items in the extent tree associated to the block group.
The insertion of a new block group's block group item in the extent tree
happens at btrfs_create_pending_block_groups() when it calls the helper
insert_block_group_item(). This typically is done when a transaction
handle is released, committed or when running delayed refs (either as
part of a transaction commit or when serving tickets for space reservation
if we are low on free space).
So remove the assertion at populate_free_space_tree() even when the block
group tree feature is not enabled and update the comment to mention this
case.
Syzbot reported this with the following stack trace:
BTRFS info (device loop3 state M): rebuilding free space tree
assertion failed: ret == 0 :: 0, in fs/btrfs/free-space-tree.c:1115
------------[ cut here ]------------
kernel BUG at fs/btrfs/free-space-tree.c:1115!
Oops: invalid opcode: 0000 [#1] SMP KASAN PTI
CPU: 1 UID: 0 PID: 6352 Comm: syz.3.25 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/18/2025
RIP: 0010:populate_free_space_tree+0x700/0x710 fs/btrfs/free-space-tree.c:1115
Code: ff ff e8 d3 (...)
RSP: 0018:ffffc9000430f780 EFLAGS: 00010246
RAX: 0000000000000043 RBX: ffff88805b709630 RCX: fea61d0e2e79d000
RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000
RBP: ffffc9000430f8b0 R08: ffffc9000430f4a7 R09: 1ffff92000861e94
R10: dffffc0000000000 R11: fffff52000861e95 R12: 0000000000000001
R13: 1ffff92000861f00 R14: dffffc0000000000 R15: 0000000000000000
FS: 00007f424d9fe6c0(0000) GS:ffff888125afc000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fd78ad212c0 CR3: 0000000076d68000 CR4: 00000000003526f0
Call Trace:
<TASK>
btrfs_rebuild_free_space_tree+0x1ba/0x6d0 fs/btrfs/free-space-tree.c:1364
btrfs_start_pre_rw_mount+0x128f/0x1bf0 fs/btrfs/disk-io.c:3062
btrfs_remount_rw fs/btrfs/super.c:1334 [inline]
btrfs_reconfigure+0xaed/0x2160 fs/btrfs/super.c:1559
reconfigure_super+0x227/0x890 fs/super.c:1076
do_remount fs/namespace.c:3279 [inline]
path_mount+0xd1a/0xfe0 fs/namespace.c:4027
do_mount fs/namespace.c:4048 [inline]
__do_sys_mount fs/namespace.c:4236 [inline]
__se_sys_mount+0x313/0x410 fs/namespace.c:4213
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f424e39066a
Code: d8 64 89 02 (...)
RSP: 002b:00007f424d9fde68 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
RAX: ffffffffffffffda RBX: 00007f424d9fdef0 RCX: 00007f424e39066a
RDX: 0000200000000180 RSI: 0000200000000380 RDI: 0000000000000000
RBP: 0000200000000180 R08: 00007f424d9fdef0 R09: 0000000000000020
R10: 0000000000000020 R11: 0000000000000246 R12: 0000200000000380
R13: 00007f424d9fdeb0 R14: 0000000000000000 R15: 00002000000002c0
</TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---
Reported-by: syzbot+884dc4621377ba579a6f@syzkaller.appspotmail.com
Link: https://lore.kernel.org/linux-btrfs/68dc3dab.a00a0220.102ee.004e.GAE@google.com/
Fixes: a5ed918285 ("Btrfs: implement the free space B-tree")
CC: <stable@vger.kernel.org> # 6.1.x: 1961d20f6fa8: btrfs: fix assertion when building free space tree
CC: <stable@vger.kernel.org> # 6.1.x
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
1726 lines
46 KiB
C
1726 lines
46 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2015 Facebook. All rights reserved.
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/sched/mm.h>
|
|
#include "messages.h"
|
|
#include "ctree.h"
|
|
#include "disk-io.h"
|
|
#include "locking.h"
|
|
#include "free-space-tree.h"
|
|
#include "transaction.h"
|
|
#include "block-group.h"
|
|
#include "fs.h"
|
|
#include "accessors.h"
|
|
#include "extent-tree.h"
|
|
#include "root-tree.h"
|
|
|
|
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path);
|
|
|
|
static struct btrfs_root *btrfs_free_space_root(
|
|
struct btrfs_block_group *block_group)
|
|
{
|
|
struct btrfs_key key = {
|
|
.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
|
|
.type = BTRFS_ROOT_ITEM_KEY,
|
|
.offset = 0,
|
|
};
|
|
|
|
if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
|
|
key.offset = block_group->global_root_id;
|
|
return btrfs_global_root(block_group->fs_info, &key);
|
|
}
|
|
|
|
void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache)
|
|
{
|
|
u32 bitmap_range;
|
|
size_t bitmap_size;
|
|
u64 num_bitmaps, total_bitmap_size;
|
|
|
|
if (WARN_ON(cache->length == 0))
|
|
btrfs_warn(cache->fs_info, "block group %llu length is zero",
|
|
cache->start);
|
|
|
|
/*
|
|
* We convert to bitmaps when the disk space required for using extents
|
|
* exceeds that required for using bitmaps.
|
|
*/
|
|
bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
|
|
num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
|
|
bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
|
|
total_bitmap_size = num_bitmaps * bitmap_size;
|
|
cache->bitmap_high_thresh = div_u64(total_bitmap_size,
|
|
sizeof(struct btrfs_item));
|
|
|
|
/*
|
|
* We allow for a small buffer between the high threshold and low
|
|
* threshold to avoid thrashing back and forth between the two formats.
|
|
*/
|
|
if (cache->bitmap_high_thresh > 100)
|
|
cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
|
|
else
|
|
cache->bitmap_low_thresh = 0;
|
|
}
|
|
|
|
static int add_new_free_space_info(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path)
|
|
{
|
|
struct btrfs_root *root = btrfs_free_space_root(block_group);
|
|
struct btrfs_free_space_info *info;
|
|
struct btrfs_key key;
|
|
struct extent_buffer *leaf;
|
|
int ret;
|
|
|
|
key.objectid = block_group->start;
|
|
key.type = BTRFS_FREE_SPACE_INFO_KEY;
|
|
key.offset = block_group->length;
|
|
|
|
ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
|
|
if (ret)
|
|
return ret;
|
|
|
|
leaf = path->nodes[0];
|
|
info = btrfs_item_ptr(leaf, path->slots[0],
|
|
struct btrfs_free_space_info);
|
|
btrfs_set_free_space_extent_count(leaf, info, 0);
|
|
btrfs_set_free_space_flags(leaf, info, 0);
|
|
btrfs_release_path(path);
|
|
return 0;
|
|
}
|
|
|
|
EXPORT_FOR_TESTS
|
|
struct btrfs_free_space_info *btrfs_search_free_space_info(
|
|
struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path, int cow)
|
|
{
|
|
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
|
struct btrfs_root *root = btrfs_free_space_root(block_group);
|
|
struct btrfs_key key;
|
|
int ret;
|
|
|
|
key.objectid = block_group->start;
|
|
key.type = BTRFS_FREE_SPACE_INFO_KEY;
|
|
key.offset = block_group->length;
|
|
|
|
ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
|
|
if (ret < 0)
|
|
return ERR_PTR(ret);
|
|
if (ret != 0) {
|
|
btrfs_warn(fs_info, "missing free space info for %llu",
|
|
block_group->start);
|
|
DEBUG_WARN();
|
|
return ERR_PTR(-ENOENT);
|
|
}
|
|
|
|
return btrfs_item_ptr(path->nodes[0], path->slots[0],
|
|
struct btrfs_free_space_info);
|
|
}
|
|
|
|
/*
|
|
* btrfs_search_slot() but we're looking for the greatest key less than the
|
|
* passed key.
|
|
*/
|
|
static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_key *key, struct btrfs_path *p,
|
|
int ins_len, int cow)
|
|
{
|
|
int ret;
|
|
|
|
ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
if (unlikely(ret == 0)) {
|
|
DEBUG_WARN();
|
|
return -EIO;
|
|
}
|
|
|
|
if (unlikely(p->slots[0] == 0)) {
|
|
DEBUG_WARN("no previous slot found");
|
|
return -EIO;
|
|
}
|
|
p->slots[0]--;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
|
|
u64 size)
|
|
{
|
|
return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
|
|
}
|
|
|
|
static unsigned long *alloc_bitmap(u32 bitmap_size)
|
|
{
|
|
unsigned long *ret;
|
|
unsigned int nofs_flag;
|
|
u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
|
|
|
|
/*
|
|
* GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
|
|
* into the filesystem as the free space bitmap can be modified in the
|
|
* critical section of a transaction commit.
|
|
*
|
|
* TODO: push the memalloc_nofs_{save,restore}() to the caller where we
|
|
* know that recursion is unsafe.
|
|
*/
|
|
nofs_flag = memalloc_nofs_save();
|
|
ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
|
|
memalloc_nofs_restore(nofs_flag);
|
|
return ret;
|
|
}
|
|
|
|
static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
|
|
{
|
|
u8 *p = ((u8 *)map) + BIT_BYTE(start);
|
|
const unsigned int size = start + len;
|
|
int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
|
|
u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
|
|
|
|
while (len - bits_to_set >= 0) {
|
|
*p |= mask_to_set;
|
|
len -= bits_to_set;
|
|
bits_to_set = BITS_PER_BYTE;
|
|
mask_to_set = ~0;
|
|
p++;
|
|
}
|
|
if (len) {
|
|
mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
|
|
*p |= mask_to_set;
|
|
}
|
|
}
|
|
|
|
EXPORT_FOR_TESTS
|
|
int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path)
|
|
{
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
struct btrfs_root *root = btrfs_free_space_root(block_group);
|
|
struct btrfs_free_space_info *info;
|
|
struct btrfs_key key, found_key;
|
|
struct extent_buffer *leaf;
|
|
unsigned long *bitmap;
|
|
char *bitmap_cursor;
|
|
u64 start, end;
|
|
u64 bitmap_range, i;
|
|
u32 bitmap_size, flags, expected_extent_count;
|
|
u32 extent_count = 0;
|
|
int done = 0, nr;
|
|
int ret;
|
|
|
|
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
|
|
bitmap = alloc_bitmap(bitmap_size);
|
|
if (unlikely(!bitmap)) {
|
|
ret = -ENOMEM;
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
start = block_group->start;
|
|
end = block_group->start + block_group->length;
|
|
|
|
key.objectid = end - 1;
|
|
key.type = (u8)-1;
|
|
key.offset = (u64)-1;
|
|
|
|
while (!done) {
|
|
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
leaf = path->nodes[0];
|
|
nr = 0;
|
|
path->slots[0]++;
|
|
while (path->slots[0] > 0) {
|
|
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
|
|
|
|
if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
|
|
ASSERT(found_key.objectid == block_group->start);
|
|
ASSERT(found_key.offset == block_group->length);
|
|
done = 1;
|
|
break;
|
|
} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
|
|
u64 first, last;
|
|
|
|
ASSERT(found_key.objectid >= start);
|
|
ASSERT(found_key.objectid < end);
|
|
ASSERT(found_key.objectid + found_key.offset <= end);
|
|
|
|
first = div_u64(found_key.objectid - start,
|
|
fs_info->sectorsize);
|
|
last = div_u64(found_key.objectid + found_key.offset - start,
|
|
fs_info->sectorsize);
|
|
le_bitmap_set(bitmap, first, last - first);
|
|
|
|
extent_count++;
|
|
nr++;
|
|
path->slots[0]--;
|
|
} else {
|
|
ASSERT(0);
|
|
}
|
|
}
|
|
|
|
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
btrfs_release_path(path);
|
|
}
|
|
|
|
info = btrfs_search_free_space_info(trans, block_group, path, 1);
|
|
if (IS_ERR(info)) {
|
|
ret = PTR_ERR(info);
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
leaf = path->nodes[0];
|
|
flags = btrfs_free_space_flags(leaf, info);
|
|
flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
|
|
block_group->using_free_space_bitmaps = true;
|
|
block_group->using_free_space_bitmaps_cached = true;
|
|
btrfs_set_free_space_flags(leaf, info, flags);
|
|
expected_extent_count = btrfs_free_space_extent_count(leaf, info);
|
|
btrfs_release_path(path);
|
|
|
|
if (unlikely(extent_count != expected_extent_count)) {
|
|
btrfs_err(fs_info,
|
|
"incorrect extent count for %llu; counted %u, expected %u",
|
|
block_group->start, extent_count,
|
|
expected_extent_count);
|
|
ret = -EIO;
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
bitmap_cursor = (char *)bitmap;
|
|
bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
|
|
i = start;
|
|
while (i < end) {
|
|
unsigned long ptr;
|
|
u64 extent_size;
|
|
u32 data_size;
|
|
|
|
extent_size = min(end - i, bitmap_range);
|
|
data_size = free_space_bitmap_size(fs_info, extent_size);
|
|
|
|
key.objectid = i;
|
|
key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
|
|
key.offset = extent_size;
|
|
|
|
ret = btrfs_insert_empty_item(trans, root, path, &key,
|
|
data_size);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
leaf = path->nodes[0];
|
|
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
|
write_extent_buffer(leaf, bitmap_cursor, ptr,
|
|
data_size);
|
|
btrfs_release_path(path);
|
|
|
|
i += extent_size;
|
|
bitmap_cursor += data_size;
|
|
}
|
|
|
|
ret = 0;
|
|
out:
|
|
kvfree(bitmap);
|
|
return ret;
|
|
}
|
|
|
|
EXPORT_FOR_TESTS
|
|
int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path)
|
|
{
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
struct btrfs_root *root = btrfs_free_space_root(block_group);
|
|
struct btrfs_free_space_info *info;
|
|
struct btrfs_key key, found_key;
|
|
struct extent_buffer *leaf;
|
|
unsigned long *bitmap;
|
|
u64 start, end;
|
|
u32 bitmap_size, flags, expected_extent_count;
|
|
unsigned long nrbits, start_bit, end_bit;
|
|
u32 extent_count = 0;
|
|
int done = 0, nr;
|
|
int ret;
|
|
|
|
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
|
|
bitmap = alloc_bitmap(bitmap_size);
|
|
if (unlikely(!bitmap)) {
|
|
ret = -ENOMEM;
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
start = block_group->start;
|
|
end = block_group->start + block_group->length;
|
|
|
|
key.objectid = end - 1;
|
|
key.type = (u8)-1;
|
|
key.offset = (u64)-1;
|
|
|
|
while (!done) {
|
|
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
leaf = path->nodes[0];
|
|
nr = 0;
|
|
path->slots[0]++;
|
|
while (path->slots[0] > 0) {
|
|
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
|
|
|
|
if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
|
|
ASSERT(found_key.objectid == block_group->start);
|
|
ASSERT(found_key.offset == block_group->length);
|
|
done = 1;
|
|
break;
|
|
} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
|
|
unsigned long ptr;
|
|
char *bitmap_cursor;
|
|
u32 bitmap_pos, data_size;
|
|
|
|
ASSERT(found_key.objectid >= start);
|
|
ASSERT(found_key.objectid < end);
|
|
ASSERT(found_key.objectid + found_key.offset <= end);
|
|
|
|
bitmap_pos = div_u64(found_key.objectid - start,
|
|
fs_info->sectorsize *
|
|
BITS_PER_BYTE);
|
|
bitmap_cursor = ((char *)bitmap) + bitmap_pos;
|
|
data_size = free_space_bitmap_size(fs_info,
|
|
found_key.offset);
|
|
|
|
path->slots[0]--;
|
|
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
|
read_extent_buffer(leaf, bitmap_cursor, ptr,
|
|
data_size);
|
|
|
|
nr++;
|
|
} else {
|
|
ASSERT(0);
|
|
}
|
|
}
|
|
|
|
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
btrfs_release_path(path);
|
|
}
|
|
|
|
info = btrfs_search_free_space_info(trans, block_group, path, 1);
|
|
if (IS_ERR(info)) {
|
|
ret = PTR_ERR(info);
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
leaf = path->nodes[0];
|
|
flags = btrfs_free_space_flags(leaf, info);
|
|
flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
|
|
block_group->using_free_space_bitmaps = false;
|
|
block_group->using_free_space_bitmaps_cached = true;
|
|
btrfs_set_free_space_flags(leaf, info, flags);
|
|
expected_extent_count = btrfs_free_space_extent_count(leaf, info);
|
|
btrfs_release_path(path);
|
|
|
|
nrbits = block_group->length >> fs_info->sectorsize_bits;
|
|
start_bit = find_next_bit_le(bitmap, nrbits, 0);
|
|
|
|
while (start_bit < nrbits) {
|
|
end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
|
|
ASSERT(start_bit < end_bit);
|
|
|
|
key.objectid = start + start_bit * fs_info->sectorsize;
|
|
key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
|
|
key.offset = (end_bit - start_bit) * fs_info->sectorsize;
|
|
|
|
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
btrfs_release_path(path);
|
|
|
|
extent_count++;
|
|
|
|
start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
|
|
}
|
|
|
|
if (unlikely(extent_count != expected_extent_count)) {
|
|
btrfs_err(fs_info,
|
|
"incorrect extent count for %llu; counted %u, expected %u",
|
|
block_group->start, extent_count,
|
|
expected_extent_count);
|
|
ret = -EIO;
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
ret = 0;
|
|
out:
|
|
kvfree(bitmap);
|
|
return ret;
|
|
}
|
|
|
|
static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path,
|
|
int new_extents)
|
|
{
|
|
struct btrfs_free_space_info *info;
|
|
u32 flags;
|
|
u32 extent_count;
|
|
int ret = 0;
|
|
|
|
if (new_extents == 0)
|
|
return 0;
|
|
|
|
info = btrfs_search_free_space_info(trans, block_group, path, 1);
|
|
if (IS_ERR(info))
|
|
return PTR_ERR(info);
|
|
|
|
flags = btrfs_free_space_flags(path->nodes[0], info);
|
|
extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
|
|
|
|
extent_count += new_extents;
|
|
btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
|
|
btrfs_release_path(path);
|
|
|
|
if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
|
|
extent_count > block_group->bitmap_high_thresh) {
|
|
ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path);
|
|
} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
|
|
extent_count < block_group->bitmap_low_thresh) {
|
|
ret = btrfs_convert_free_space_to_extents(trans, block_group, path);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
EXPORT_FOR_TESTS
|
|
bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path, u64 offset)
|
|
{
|
|
struct extent_buffer *leaf;
|
|
struct btrfs_key key;
|
|
u64 found_start, found_end;
|
|
unsigned long ptr, i;
|
|
|
|
leaf = path->nodes[0];
|
|
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
|
ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
|
|
|
|
found_start = key.objectid;
|
|
found_end = key.objectid + key.offset;
|
|
ASSERT(offset >= found_start && offset < found_end);
|
|
|
|
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
|
i = div_u64(offset - found_start,
|
|
block_group->fs_info->sectorsize);
|
|
return extent_buffer_test_bit(leaf, ptr, i);
|
|
}
|
|
|
|
static void free_space_modify_bits(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path, u64 *start, u64 *size,
|
|
bool set_bits)
|
|
{
|
|
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
|
struct extent_buffer *leaf;
|
|
struct btrfs_key key;
|
|
u64 end = *start + *size;
|
|
u64 found_start, found_end;
|
|
unsigned long ptr, first, last;
|
|
|
|
leaf = path->nodes[0];
|
|
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
|
ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
|
|
|
|
found_start = key.objectid;
|
|
found_end = key.objectid + key.offset;
|
|
ASSERT(*start >= found_start && *start < found_end);
|
|
ASSERT(end > found_start);
|
|
|
|
if (end > found_end)
|
|
end = found_end;
|
|
|
|
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
|
first = (*start - found_start) >> fs_info->sectorsize_bits;
|
|
last = (end - found_start) >> fs_info->sectorsize_bits;
|
|
if (set_bits)
|
|
extent_buffer_bitmap_set(leaf, ptr, first, last - first);
|
|
else
|
|
extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
|
|
btrfs_mark_buffer_dirty(trans, leaf);
|
|
|
|
*size -= end - *start;
|
|
*start = end;
|
|
}
|
|
|
|
/*
|
|
* We can't use btrfs_next_item() in modify_free_space_bitmap() because
|
|
* btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
|
|
* tree walking in btrfs_next_leaf() anyways because we know exactly what we're
|
|
* looking for.
|
|
*/
|
|
static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root, struct btrfs_path *p)
|
|
{
|
|
struct btrfs_key key;
|
|
|
|
if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
|
|
p->slots[0]++;
|
|
return 0;
|
|
}
|
|
|
|
btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
|
|
btrfs_release_path(p);
|
|
|
|
key.objectid += key.offset;
|
|
key.type = (u8)-1;
|
|
key.offset = (u64)-1;
|
|
|
|
return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
|
|
}
|
|
|
|
/*
|
|
* If remove is 1, then we are removing free space, thus clearing bits in the
|
|
* bitmap. If remove is 0, then we are adding free space, thus setting bits in
|
|
* the bitmap.
|
|
*/
|
|
static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path,
|
|
u64 start, u64 size, bool remove)
|
|
{
|
|
struct btrfs_root *root = btrfs_free_space_root(block_group);
|
|
struct btrfs_key key;
|
|
u64 end = start + size;
|
|
u64 cur_start, cur_size;
|
|
bool prev_bit_set = false;
|
|
bool next_bit_set = false;
|
|
int new_extents;
|
|
int ret;
|
|
|
|
/*
|
|
* Read the bit for the block immediately before the extent of space if
|
|
* that block is within the block group.
|
|
*/
|
|
if (start > block_group->start) {
|
|
u64 prev_block = start - block_group->fs_info->sectorsize;
|
|
|
|
key.objectid = prev_block;
|
|
key.type = (u8)-1;
|
|
key.offset = (u64)-1;
|
|
|
|
ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
|
|
if (ret)
|
|
return ret;
|
|
|
|
prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block);
|
|
|
|
/* The previous block may have been in the previous bitmap. */
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
|
if (start >= key.objectid + key.offset) {
|
|
ret = free_space_next_bitmap(trans, root, path);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
} else {
|
|
key.objectid = start;
|
|
key.type = (u8)-1;
|
|
key.offset = (u64)-1;
|
|
|
|
ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Iterate over all of the bitmaps overlapped by the extent of space,
|
|
* clearing/setting bits as required.
|
|
*/
|
|
cur_start = start;
|
|
cur_size = size;
|
|
while (1) {
|
|
free_space_modify_bits(trans, block_group, path, &cur_start,
|
|
&cur_size, !remove);
|
|
if (cur_size == 0)
|
|
break;
|
|
ret = free_space_next_bitmap(trans, root, path);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Read the bit for the block immediately after the extent of space if
|
|
* that block is within the block group.
|
|
*/
|
|
if (end < block_group->start + block_group->length) {
|
|
/* The next block may be in the next bitmap. */
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
|
if (end >= key.objectid + key.offset) {
|
|
ret = free_space_next_bitmap(trans, root, path);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
next_bit_set = btrfs_free_space_test_bit(block_group, path, end);
|
|
}
|
|
|
|
if (remove) {
|
|
new_extents = -1;
|
|
if (prev_bit_set) {
|
|
/* Leftover on the left. */
|
|
new_extents++;
|
|
}
|
|
if (next_bit_set) {
|
|
/* Leftover on the right. */
|
|
new_extents++;
|
|
}
|
|
} else {
|
|
new_extents = 1;
|
|
if (prev_bit_set) {
|
|
/* Merging with neighbor on the left. */
|
|
new_extents--;
|
|
}
|
|
if (next_bit_set) {
|
|
/* Merging with neighbor on the right. */
|
|
new_extents--;
|
|
}
|
|
}
|
|
|
|
btrfs_release_path(path);
|
|
return update_free_space_extent_count(trans, block_group, path, new_extents);
|
|
}
|
|
|
|
static int remove_free_space_extent(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path,
|
|
u64 start, u64 size)
|
|
{
|
|
struct btrfs_root *root = btrfs_free_space_root(block_group);
|
|
struct btrfs_key key;
|
|
u64 found_start, found_end;
|
|
u64 end = start + size;
|
|
int new_extents = -1;
|
|
int ret;
|
|
|
|
key.objectid = start;
|
|
key.type = (u8)-1;
|
|
key.offset = (u64)-1;
|
|
|
|
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
|
|
if (ret)
|
|
return ret;
|
|
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
|
|
|
ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
|
|
|
|
found_start = key.objectid;
|
|
found_end = key.objectid + key.offset;
|
|
ASSERT(start >= found_start && end <= found_end);
|
|
|
|
/*
|
|
* Okay, now that we've found the free space extent which contains the
|
|
* free space that we are removing, there are four cases:
|
|
*
|
|
* 1. We're using the whole extent: delete the key we found and
|
|
* decrement the free space extent count.
|
|
* 2. We are using part of the extent starting at the beginning: delete
|
|
* the key we found and insert a new key representing the leftover at
|
|
* the end. There is no net change in the number of extents.
|
|
* 3. We are using part of the extent ending at the end: delete the key
|
|
* we found and insert a new key representing the leftover at the
|
|
* beginning. There is no net change in the number of extents.
|
|
* 4. We are using part of the extent in the middle: delete the key we
|
|
* found and insert two new keys representing the leftovers on each
|
|
* side. Where we used to have one extent, we now have two, so increment
|
|
* the extent count. We may need to convert the block group to bitmaps
|
|
* as a result.
|
|
*/
|
|
|
|
/* Delete the existing key (cases 1-4). */
|
|
ret = btrfs_del_item(trans, root, path);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/* Add a key for leftovers at the beginning (cases 3 and 4). */
|
|
if (start > found_start) {
|
|
key.objectid = found_start;
|
|
key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
|
|
key.offset = start - found_start;
|
|
|
|
btrfs_release_path(path);
|
|
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
|
|
if (ret)
|
|
return ret;
|
|
new_extents++;
|
|
}
|
|
|
|
/* Add a key for leftovers at the end (cases 2 and 4). */
|
|
if (end < found_end) {
|
|
key.objectid = end;
|
|
key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
|
|
key.offset = found_end - end;
|
|
|
|
btrfs_release_path(path);
|
|
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
|
|
if (ret)
|
|
return ret;
|
|
new_extents++;
|
|
}
|
|
|
|
btrfs_release_path(path);
|
|
return update_free_space_extent_count(trans, block_group, path, new_extents);
|
|
}
|
|
|
|
static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path)
|
|
{
|
|
struct btrfs_free_space_info *info;
|
|
u32 flags;
|
|
|
|
if (bg->using_free_space_bitmaps_cached)
|
|
return bg->using_free_space_bitmaps;
|
|
|
|
info = btrfs_search_free_space_info(NULL, bg, path, 0);
|
|
if (IS_ERR(info))
|
|
return PTR_ERR(info);
|
|
flags = btrfs_free_space_flags(path->nodes[0], info);
|
|
btrfs_release_path(path);
|
|
|
|
bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS);
|
|
bg->using_free_space_bitmaps_cached = true;
|
|
|
|
return bg->using_free_space_bitmaps;
|
|
}
|
|
|
|
EXPORT_FOR_TESTS
|
|
int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path, u64 start, u64 size)
|
|
{
|
|
int ret;
|
|
|
|
ret = __add_block_group_free_space(trans, block_group, path);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = using_bitmaps(block_group, path);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
if (ret)
|
|
return modify_free_space_bitmap(trans, block_group, path,
|
|
start, size, true);
|
|
|
|
return remove_free_space_extent(trans, block_group, path, start, size);
|
|
}
|
|
|
|
int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
|
|
u64 start, u64 size)
|
|
{
|
|
struct btrfs_block_group *block_group;
|
|
struct btrfs_path *path;
|
|
int ret;
|
|
|
|
if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
|
|
return 0;
|
|
|
|
path = btrfs_alloc_path();
|
|
if (unlikely(!path)) {
|
|
ret = -ENOMEM;
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
block_group = btrfs_lookup_block_group(trans->fs_info, start);
|
|
if (unlikely(!block_group)) {
|
|
DEBUG_WARN("no block group found for start=%llu", start);
|
|
ret = -ENOENT;
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
mutex_lock(&block_group->free_space_lock);
|
|
ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size);
|
|
mutex_unlock(&block_group->free_space_lock);
|
|
if (ret)
|
|
btrfs_abort_transaction(trans, ret);
|
|
|
|
btrfs_put_block_group(block_group);
|
|
out:
|
|
btrfs_free_path(path);
|
|
return ret;
|
|
}
|
|
|
|
static int add_free_space_extent(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path,
|
|
u64 start, u64 size)
|
|
{
|
|
struct btrfs_root *root = btrfs_free_space_root(block_group);
|
|
struct btrfs_key key, new_key;
|
|
u64 found_start, found_end;
|
|
u64 end = start + size;
|
|
int new_extents = 1;
|
|
int ret;
|
|
|
|
/*
|
|
* We are adding a new extent of free space, but we need to merge
|
|
* extents. There are four cases here:
|
|
*
|
|
* 1. The new extent does not have any immediate neighbors to merge
|
|
* with: add the new key and increment the free space extent count. We
|
|
* may need to convert the block group to bitmaps as a result.
|
|
* 2. The new extent has an immediate neighbor before it: remove the
|
|
* previous key and insert a new key combining both of them. There is no
|
|
* net change in the number of extents.
|
|
* 3. The new extent has an immediate neighbor after it: remove the next
|
|
* key and insert a new key combining both of them. There is no net
|
|
* change in the number of extents.
|
|
* 4. The new extent has immediate neighbors on both sides: remove both
|
|
* of the keys and insert a new key combining all of them. Where we used
|
|
* to have two extents, we now have one, so decrement the extent count.
|
|
*/
|
|
|
|
new_key.objectid = start;
|
|
new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
|
|
new_key.offset = size;
|
|
|
|
/* Search for a neighbor on the left. */
|
|
if (start == block_group->start)
|
|
goto right;
|
|
key.objectid = start - 1;
|
|
key.type = (u8)-1;
|
|
key.offset = (u64)-1;
|
|
|
|
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
|
|
if (ret)
|
|
return ret;
|
|
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
|
|
|
if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
|
|
ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
|
|
btrfs_release_path(path);
|
|
goto right;
|
|
}
|
|
|
|
found_start = key.objectid;
|
|
found_end = key.objectid + key.offset;
|
|
ASSERT(found_start >= block_group->start &&
|
|
found_end > block_group->start);
|
|
ASSERT(found_start < start && found_end <= start);
|
|
|
|
/*
|
|
* Delete the neighbor on the left and absorb it into the new key (cases
|
|
* 2 and 4).
|
|
*/
|
|
if (found_end == start) {
|
|
ret = btrfs_del_item(trans, root, path);
|
|
if (ret)
|
|
return ret;
|
|
new_key.objectid = found_start;
|
|
new_key.offset += key.offset;
|
|
new_extents--;
|
|
}
|
|
btrfs_release_path(path);
|
|
|
|
right:
|
|
/* Search for a neighbor on the right. */
|
|
if (end == block_group->start + block_group->length)
|
|
goto insert;
|
|
key.objectid = end;
|
|
key.type = (u8)-1;
|
|
key.offset = (u64)-1;
|
|
|
|
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
|
|
if (ret)
|
|
return ret;
|
|
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
|
|
|
if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
|
|
ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
|
|
btrfs_release_path(path);
|
|
goto insert;
|
|
}
|
|
|
|
found_start = key.objectid;
|
|
found_end = key.objectid + key.offset;
|
|
ASSERT(found_start >= block_group->start &&
|
|
found_end > block_group->start);
|
|
ASSERT((found_start < start && found_end <= start) ||
|
|
(found_start >= end && found_end > end));
|
|
|
|
/*
|
|
* Delete the neighbor on the right and absorb it into the new key
|
|
* (cases 3 and 4).
|
|
*/
|
|
if (found_start == end) {
|
|
ret = btrfs_del_item(trans, root, path);
|
|
if (ret)
|
|
return ret;
|
|
new_key.offset += key.offset;
|
|
new_extents--;
|
|
}
|
|
btrfs_release_path(path);
|
|
|
|
insert:
|
|
/* Insert the new key (cases 1-4). */
|
|
ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
|
|
if (ret)
|
|
return ret;
|
|
|
|
btrfs_release_path(path);
|
|
return update_free_space_extent_count(trans, block_group, path, new_extents);
|
|
}
|
|
|
|
EXPORT_FOR_TESTS
|
|
int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path, u64 start, u64 size)
|
|
{
|
|
int ret;
|
|
|
|
ret = __add_block_group_free_space(trans, block_group, path);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = using_bitmaps(block_group, path);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
if (ret)
|
|
return modify_free_space_bitmap(trans, block_group, path,
|
|
start, size, false);
|
|
|
|
return add_free_space_extent(trans, block_group, path, start, size);
|
|
}
|
|
|
|
int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
|
|
u64 start, u64 size)
|
|
{
|
|
struct btrfs_block_group *block_group;
|
|
struct btrfs_path *path;
|
|
int ret;
|
|
|
|
if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
|
|
return 0;
|
|
|
|
path = btrfs_alloc_path();
|
|
if (unlikely(!path)) {
|
|
ret = -ENOMEM;
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
block_group = btrfs_lookup_block_group(trans->fs_info, start);
|
|
if (unlikely(!block_group)) {
|
|
DEBUG_WARN("no block group found for start=%llu", start);
|
|
ret = -ENOENT;
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
mutex_lock(&block_group->free_space_lock);
|
|
ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size);
|
|
mutex_unlock(&block_group->free_space_lock);
|
|
if (ret)
|
|
btrfs_abort_transaction(trans, ret);
|
|
|
|
btrfs_put_block_group(block_group);
|
|
out:
|
|
btrfs_free_path(path);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Populate the free space tree by walking the extent tree. Operations on the
|
|
* extent tree that happen as a result of writes to the free space tree will go
|
|
* through the normal add/remove hooks.
|
|
*/
|
|
static int populate_free_space_tree(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group)
|
|
{
|
|
struct btrfs_root *extent_root;
|
|
BTRFS_PATH_AUTO_FREE(path);
|
|
BTRFS_PATH_AUTO_FREE(path2);
|
|
struct btrfs_key key;
|
|
u64 start, end;
|
|
int ret;
|
|
|
|
path = btrfs_alloc_path();
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
path2 = btrfs_alloc_path();
|
|
if (!path2)
|
|
return -ENOMEM;
|
|
|
|
path->reada = READA_FORWARD;
|
|
|
|
ret = add_new_free_space_info(trans, block_group, path2);
|
|
if (ret)
|
|
return ret;
|
|
|
|
mutex_lock(&block_group->free_space_lock);
|
|
|
|
/*
|
|
* Iterate through all of the extent and metadata items in this block
|
|
* group, adding the free space between them and the free space at the
|
|
* end. Note that EXTENT_ITEM and METADATA_ITEM are less than
|
|
* BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
|
|
* contained in.
|
|
*/
|
|
key.objectid = block_group->start;
|
|
key.type = BTRFS_EXTENT_ITEM_KEY;
|
|
key.offset = 0;
|
|
|
|
extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
|
|
ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
|
|
if (ret < 0)
|
|
goto out_locked;
|
|
/*
|
|
* If ret is 1 (no key found), it means this is an empty block group,
|
|
* without any extents allocated from it and there's no block group
|
|
* item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree
|
|
* because we are using the block group tree feature (so block group
|
|
* items are stored in the block group tree) or this is a new block
|
|
* group created in the current transaction and its block group item
|
|
* was not yet inserted in the extent tree (that happens in
|
|
* btrfs_create_pending_block_groups() -> insert_block_group_item()).
|
|
* It also means there are no extents allocated for block groups with a
|
|
* start offset beyond this block group's end offset (this is the last,
|
|
* highest, block group).
|
|
*/
|
|
start = block_group->start;
|
|
end = block_group->start + block_group->length;
|
|
while (ret == 0) {
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
|
|
|
if (key.type == BTRFS_EXTENT_ITEM_KEY ||
|
|
key.type == BTRFS_METADATA_ITEM_KEY) {
|
|
if (key.objectid >= end)
|
|
break;
|
|
|
|
if (start < key.objectid) {
|
|
ret = __btrfs_add_to_free_space_tree(trans,
|
|
block_group,
|
|
path2, start,
|
|
key.objectid -
|
|
start);
|
|
if (ret)
|
|
goto out_locked;
|
|
}
|
|
start = key.objectid;
|
|
if (key.type == BTRFS_METADATA_ITEM_KEY)
|
|
start += trans->fs_info->nodesize;
|
|
else
|
|
start += key.offset;
|
|
} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
|
|
if (key.objectid != block_group->start)
|
|
break;
|
|
}
|
|
|
|
ret = btrfs_next_item(extent_root, path);
|
|
if (ret < 0)
|
|
goto out_locked;
|
|
}
|
|
if (start < end) {
|
|
ret = __btrfs_add_to_free_space_tree(trans, block_group, path2,
|
|
start, end - start);
|
|
if (ret)
|
|
goto out_locked;
|
|
}
|
|
|
|
ret = 0;
|
|
out_locked:
|
|
mutex_unlock(&block_group->free_space_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
|
|
{
|
|
struct btrfs_trans_handle *trans;
|
|
struct btrfs_root *tree_root = fs_info->tree_root;
|
|
struct btrfs_root *free_space_root;
|
|
struct btrfs_block_group *block_group;
|
|
struct rb_node *node;
|
|
int ret;
|
|
|
|
trans = btrfs_start_transaction(tree_root, 0);
|
|
if (IS_ERR(trans))
|
|
return PTR_ERR(trans);
|
|
|
|
set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
|
|
set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
|
|
free_space_root = btrfs_create_tree(trans,
|
|
BTRFS_FREE_SPACE_TREE_OBJECTID);
|
|
if (IS_ERR(free_space_root)) {
|
|
ret = PTR_ERR(free_space_root);
|
|
btrfs_abort_transaction(trans, ret);
|
|
btrfs_end_transaction(trans);
|
|
goto out_clear;
|
|
}
|
|
ret = btrfs_global_root_insert(free_space_root);
|
|
if (unlikely(ret)) {
|
|
btrfs_put_root(free_space_root);
|
|
btrfs_abort_transaction(trans, ret);
|
|
btrfs_end_transaction(trans);
|
|
goto out_clear;
|
|
}
|
|
|
|
node = rb_first_cached(&fs_info->block_group_cache_tree);
|
|
while (node) {
|
|
block_group = rb_entry(node, struct btrfs_block_group,
|
|
cache_node);
|
|
ret = populate_free_space_tree(trans, block_group);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
btrfs_end_transaction(trans);
|
|
goto out_clear;
|
|
}
|
|
node = rb_next(node);
|
|
}
|
|
|
|
btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
|
|
btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
|
|
clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
|
|
ret = btrfs_commit_transaction(trans);
|
|
|
|
/*
|
|
* Now that we've committed the transaction any reading of our commit
|
|
* root will be safe, so we can cache from the free space tree now.
|
|
*/
|
|
clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
|
|
return ret;
|
|
|
|
out_clear:
|
|
clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
|
|
clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
|
|
return ret;
|
|
}
|
|
|
|
static int clear_free_space_tree(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root)
|
|
{
|
|
BTRFS_PATH_AUTO_FREE(path);
|
|
struct btrfs_key key;
|
|
struct rb_node *node;
|
|
int nr;
|
|
int ret;
|
|
|
|
path = btrfs_alloc_path();
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
key.objectid = 0;
|
|
key.type = 0;
|
|
key.offset = 0;
|
|
|
|
while (1) {
|
|
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
nr = btrfs_header_nritems(path->nodes[0]);
|
|
if (!nr)
|
|
break;
|
|
|
|
path->slots[0] = 0;
|
|
ret = btrfs_del_items(trans, root, path, 0, nr);
|
|
if (ret)
|
|
return ret;
|
|
|
|
btrfs_release_path(path);
|
|
}
|
|
|
|
node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
|
|
while (node) {
|
|
struct btrfs_block_group *bg;
|
|
|
|
bg = rb_entry(node, struct btrfs_block_group, cache_node);
|
|
clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
|
|
node = rb_next(node);
|
|
cond_resched();
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
|
|
{
|
|
struct btrfs_trans_handle *trans;
|
|
struct btrfs_root *tree_root = fs_info->tree_root;
|
|
struct btrfs_key key = {
|
|
.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
|
|
.type = BTRFS_ROOT_ITEM_KEY,
|
|
.offset = 0,
|
|
};
|
|
struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
|
|
int ret;
|
|
|
|
trans = btrfs_start_transaction(tree_root, 0);
|
|
if (IS_ERR(trans))
|
|
return PTR_ERR(trans);
|
|
|
|
btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
|
|
btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
|
|
|
|
ret = clear_free_space_tree(trans, free_space_root);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
btrfs_end_transaction(trans);
|
|
return ret;
|
|
}
|
|
|
|
ret = btrfs_del_root(trans, &free_space_root->root_key);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
btrfs_end_transaction(trans);
|
|
return ret;
|
|
}
|
|
|
|
btrfs_global_root_delete(free_space_root);
|
|
|
|
spin_lock(&fs_info->trans_lock);
|
|
list_del(&free_space_root->dirty_list);
|
|
spin_unlock(&fs_info->trans_lock);
|
|
|
|
btrfs_tree_lock(free_space_root->node);
|
|
btrfs_clear_buffer_dirty(trans, free_space_root->node);
|
|
btrfs_tree_unlock(free_space_root->node);
|
|
ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
|
|
free_space_root->node, 0, 1);
|
|
btrfs_put_root(free_space_root);
|
|
if (unlikely(ret < 0)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
btrfs_end_transaction(trans);
|
|
return ret;
|
|
}
|
|
|
|
return btrfs_commit_transaction(trans);
|
|
}
|
|
|
|
int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
|
|
{
|
|
struct btrfs_trans_handle *trans;
|
|
struct btrfs_key key = {
|
|
.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
|
|
.type = BTRFS_ROOT_ITEM_KEY,
|
|
.offset = 0,
|
|
};
|
|
struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
|
|
struct rb_node *node;
|
|
int ret;
|
|
|
|
trans = btrfs_start_transaction(free_space_root, 1);
|
|
if (IS_ERR(trans))
|
|
return PTR_ERR(trans);
|
|
|
|
set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
|
|
set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
|
|
|
|
ret = clear_free_space_tree(trans, free_space_root);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
btrfs_end_transaction(trans);
|
|
return ret;
|
|
}
|
|
|
|
node = rb_first_cached(&fs_info->block_group_cache_tree);
|
|
while (node) {
|
|
struct btrfs_block_group *block_group;
|
|
|
|
block_group = rb_entry(node, struct btrfs_block_group,
|
|
cache_node);
|
|
|
|
if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
|
|
&block_group->runtime_flags))
|
|
goto next;
|
|
|
|
ret = populate_free_space_tree(trans, block_group);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
btrfs_end_transaction(trans);
|
|
return ret;
|
|
}
|
|
next:
|
|
if (btrfs_should_end_transaction(trans)) {
|
|
btrfs_end_transaction(trans);
|
|
trans = btrfs_start_transaction(free_space_root, 1);
|
|
if (IS_ERR(trans))
|
|
return PTR_ERR(trans);
|
|
}
|
|
node = rb_next(node);
|
|
}
|
|
|
|
btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
|
|
btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
|
|
clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
|
|
|
|
ret = btrfs_commit_transaction(trans);
|
|
clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
|
|
return ret;
|
|
}
|
|
|
|
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path)
|
|
{
|
|
bool own_path = false;
|
|
int ret;
|
|
|
|
if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
|
|
&block_group->runtime_flags))
|
|
return 0;
|
|
|
|
/*
|
|
* While rebuilding the free space tree we may allocate new metadata
|
|
* block groups while modifying the free space tree.
|
|
*
|
|
* Because during the rebuild (at btrfs_rebuild_free_space_tree()) we
|
|
* can use multiple transactions, every time btrfs_end_transaction() is
|
|
* called at btrfs_rebuild_free_space_tree() we finish the creation of
|
|
* new block groups by calling btrfs_create_pending_block_groups(), and
|
|
* that in turn calls us, through add_block_group_free_space(), to add
|
|
* a free space info item and a free space extent item for the block
|
|
* group.
|
|
*
|
|
* Then later btrfs_rebuild_free_space_tree() may find such new block
|
|
* groups and processes them with populate_free_space_tree(), which can
|
|
* fail with EEXIST since there are already items for the block group in
|
|
* the free space tree. Notice that we say "may find" because a new
|
|
* block group may be added to the block groups rbtree in a node before
|
|
* or after the block group currently being processed by the rebuild
|
|
* process. So signal the rebuild process to skip such new block groups
|
|
* if it finds them.
|
|
*/
|
|
set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
|
|
|
|
if (!path) {
|
|
path = btrfs_alloc_path();
|
|
if (unlikely(!path)) {
|
|
btrfs_abort_transaction(trans, -ENOMEM);
|
|
return -ENOMEM;
|
|
}
|
|
own_path = true;
|
|
}
|
|
|
|
ret = add_new_free_space_info(trans, block_group, path);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
ret = __btrfs_add_to_free_space_tree(trans, block_group, path,
|
|
block_group->start, block_group->length);
|
|
if (ret)
|
|
btrfs_abort_transaction(trans, ret);
|
|
|
|
out:
|
|
if (own_path)
|
|
btrfs_free_path(path);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group)
|
|
{
|
|
int ret;
|
|
|
|
if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
|
|
return 0;
|
|
|
|
mutex_lock(&block_group->free_space_lock);
|
|
ret = __add_block_group_free_space(trans, block_group, NULL);
|
|
mutex_unlock(&block_group->free_space_lock);
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group)
|
|
{
|
|
struct btrfs_root *root = btrfs_free_space_root(block_group);
|
|
struct btrfs_path *path;
|
|
struct btrfs_key key, found_key;
|
|
struct extent_buffer *leaf;
|
|
u64 start, end;
|
|
int done = 0, nr;
|
|
int ret;
|
|
|
|
if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
|
|
return 0;
|
|
|
|
if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
|
|
/* We never added this block group to the free space tree. */
|
|
return 0;
|
|
}
|
|
|
|
path = btrfs_alloc_path();
|
|
if (unlikely(!path)) {
|
|
ret = -ENOMEM;
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
start = block_group->start;
|
|
end = block_group->start + block_group->length;
|
|
|
|
key.objectid = end - 1;
|
|
key.type = (u8)-1;
|
|
key.offset = (u64)-1;
|
|
|
|
while (!done) {
|
|
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
|
|
leaf = path->nodes[0];
|
|
nr = 0;
|
|
path->slots[0]++;
|
|
while (path->slots[0] > 0) {
|
|
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
|
|
|
|
if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
|
|
ASSERT(found_key.objectid == block_group->start);
|
|
ASSERT(found_key.offset == block_group->length);
|
|
done = 1;
|
|
nr++;
|
|
path->slots[0]--;
|
|
break;
|
|
} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
|
|
found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
|
|
ASSERT(found_key.objectid >= start);
|
|
ASSERT(found_key.objectid < end);
|
|
ASSERT(found_key.objectid + found_key.offset <= end);
|
|
nr++;
|
|
path->slots[0]--;
|
|
} else {
|
|
ASSERT(0);
|
|
}
|
|
}
|
|
|
|
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
|
|
if (unlikely(ret)) {
|
|
btrfs_abort_transaction(trans, ret);
|
|
goto out;
|
|
}
|
|
btrfs_release_path(path);
|
|
}
|
|
|
|
ret = 0;
|
|
out:
|
|
btrfs_free_path(path);
|
|
return ret;
|
|
}
|
|
|
|
static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
|
|
struct btrfs_path *path,
|
|
u32 expected_extent_count)
|
|
{
|
|
struct btrfs_block_group *block_group;
|
|
struct btrfs_fs_info *fs_info;
|
|
struct btrfs_root *root;
|
|
struct btrfs_key key;
|
|
bool prev_bit_set = false;
|
|
/* Initialize to silence GCC. */
|
|
u64 extent_start = 0;
|
|
u64 end, offset;
|
|
u64 total_found = 0;
|
|
u32 extent_count = 0;
|
|
int ret;
|
|
|
|
block_group = caching_ctl->block_group;
|
|
fs_info = block_group->fs_info;
|
|
root = btrfs_free_space_root(block_group);
|
|
|
|
end = block_group->start + block_group->length;
|
|
|
|
while (1) {
|
|
ret = btrfs_next_item(root, path);
|
|
if (ret < 0)
|
|
return ret;
|
|
if (ret)
|
|
break;
|
|
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
|
|
|
if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
|
|
break;
|
|
|
|
ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
|
|
ASSERT(key.objectid < end && key.objectid + key.offset <= end);
|
|
|
|
offset = key.objectid;
|
|
while (offset < key.objectid + key.offset) {
|
|
bool bit_set;
|
|
|
|
bit_set = btrfs_free_space_test_bit(block_group, path, offset);
|
|
if (!prev_bit_set && bit_set) {
|
|
extent_start = offset;
|
|
} else if (prev_bit_set && !bit_set) {
|
|
u64 space_added;
|
|
|
|
ret = btrfs_add_new_free_space(block_group,
|
|
extent_start,
|
|
offset,
|
|
&space_added);
|
|
if (ret)
|
|
return ret;
|
|
total_found += space_added;
|
|
if (total_found > CACHING_CTL_WAKE_UP) {
|
|
total_found = 0;
|
|
wake_up(&caching_ctl->wait);
|
|
}
|
|
extent_count++;
|
|
}
|
|
prev_bit_set = bit_set;
|
|
offset += fs_info->sectorsize;
|
|
}
|
|
}
|
|
if (prev_bit_set) {
|
|
ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
|
|
if (ret)
|
|
return ret;
|
|
extent_count++;
|
|
}
|
|
|
|
if (unlikely(extent_count != expected_extent_count)) {
|
|
btrfs_err(fs_info,
|
|
"incorrect extent count for %llu; counted %u, expected %u",
|
|
block_group->start, extent_count,
|
|
expected_extent_count);
|
|
DEBUG_WARN();
|
|
return -EIO;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
|
|
struct btrfs_path *path,
|
|
u32 expected_extent_count)
|
|
{
|
|
struct btrfs_block_group *block_group;
|
|
struct btrfs_fs_info *fs_info;
|
|
struct btrfs_root *root;
|
|
struct btrfs_key key;
|
|
u64 end;
|
|
u64 total_found = 0;
|
|
u32 extent_count = 0;
|
|
int ret;
|
|
|
|
block_group = caching_ctl->block_group;
|
|
fs_info = block_group->fs_info;
|
|
root = btrfs_free_space_root(block_group);
|
|
|
|
end = block_group->start + block_group->length;
|
|
|
|
while (1) {
|
|
u64 space_added;
|
|
|
|
ret = btrfs_next_item(root, path);
|
|
if (ret < 0)
|
|
return ret;
|
|
if (ret)
|
|
break;
|
|
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
|
|
|
if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
|
|
break;
|
|
|
|
ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
|
|
ASSERT(key.objectid < end && key.objectid + key.offset <= end);
|
|
|
|
ret = btrfs_add_new_free_space(block_group, key.objectid,
|
|
key.objectid + key.offset,
|
|
&space_added);
|
|
if (ret)
|
|
return ret;
|
|
total_found += space_added;
|
|
if (total_found > CACHING_CTL_WAKE_UP) {
|
|
total_found = 0;
|
|
wake_up(&caching_ctl->wait);
|
|
}
|
|
extent_count++;
|
|
}
|
|
|
|
if (unlikely(extent_count != expected_extent_count)) {
|
|
btrfs_err(fs_info,
|
|
"incorrect extent count for %llu; counted %u, expected %u",
|
|
block_group->start, extent_count,
|
|
expected_extent_count);
|
|
DEBUG_WARN();
|
|
return -EIO;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl)
|
|
{
|
|
struct btrfs_block_group *block_group;
|
|
struct btrfs_free_space_info *info;
|
|
BTRFS_PATH_AUTO_FREE(path);
|
|
u32 extent_count, flags;
|
|
|
|
block_group = caching_ctl->block_group;
|
|
|
|
path = btrfs_alloc_path();
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
/*
|
|
* Just like caching_thread() doesn't want to deadlock on the extent
|
|
* tree, we don't want to deadlock on the free space tree.
|
|
*/
|
|
path->skip_locking = 1;
|
|
path->search_commit_root = 1;
|
|
path->reada = READA_FORWARD;
|
|
|
|
info = btrfs_search_free_space_info(NULL, block_group, path, 0);
|
|
if (IS_ERR(info))
|
|
return PTR_ERR(info);
|
|
|
|
extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
|
|
flags = btrfs_free_space_flags(path->nodes[0], info);
|
|
|
|
/*
|
|
* We left path pointing to the free space info item, so now
|
|
* load_free_space_foo can just iterate through the free space tree from
|
|
* there.
|
|
*/
|
|
if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
|
|
return load_free_space_bitmaps(caching_ctl, path, extent_count);
|
|
else
|
|
return load_free_space_extents(caching_ctl, path, extent_count);
|
|
}
|