Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c2707a2

Browse files
morbidrsakdave
authored andcommitted
btrfs: zoned: add a dedicated data relocation block group
Relocation in a zoned filesystem can fail with a transaction abort with error -22 (EINVAL). This happens because the relocation code assumes that the extents we relocated the data to have the same size the source extents had and ensures this by preallocating the extents. But in a zoned filesystem we currently can't preallocate the extents as this would break the sequential write required rule. Therefore it can happen that the writeback process kicks in while we're still adding pages to a delalloc range and starts writing out dirty pages. This then creates destination extents that are smaller than the source extents, triggering the following safety check in get_new_location(): 1034 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1035 ret = -EINVAL; 1036 goto out; 1037 } Temporarily create a dedicated block group for the relocation process, so no non-relocation data writes can interfere with the relocation writes. This is needed that we can switch the relocation process on a zoned filesystem from the REQ_OP_ZONE_APPEND writing we use for data to a scheme like in a non-zoned filesystem using REQ_OP_WRITE and preallocation. Fixes: 32430c6 ("btrfs: zoned: enable relocation on a zoned filesystem") Reviewed-by: Naohiro Aota <[email protected]> Signed-off-by: Johannes Thumshirn <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 37f00a6 commit c2707a2

File tree

6 files changed

+71
-2
lines changed

6 files changed

+71
-2
lines changed

fs/btrfs/block-group.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -903,6 +903,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
903903
spin_unlock(&cluster->refill_lock);
904904

905905
btrfs_clear_treelog_bg(block_group);
906+
btrfs_clear_data_reloc_bg(block_group);
906907

907908
path = btrfs_alloc_path();
908909
if (!path) {

fs/btrfs/ctree.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1018,6 +1018,13 @@ struct btrfs_fs_info {
10181018
spinlock_t treelog_bg_lock;
10191019
u64 treelog_bg;
10201020

1021+
/*
1022+
* Start of the dedicated data relocation block group, protected by
1023+
* relocation_bg_lock.
1024+
*/
1025+
spinlock_t relocation_bg_lock;
1026+
u64 data_reloc_bg;
1027+
10211028
spinlock_t zone_active_bgs_lock;
10221029
struct list_head zone_active_bgs;
10231030

fs/btrfs/disk-io.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2885,6 +2885,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
28852885
spin_lock_init(&fs_info->unused_bgs_lock);
28862886
spin_lock_init(&fs_info->treelog_bg_lock);
28872887
spin_lock_init(&fs_info->zone_active_bgs_lock);
2888+
spin_lock_init(&fs_info->relocation_bg_lock);
28882889
rwlock_init(&fs_info->tree_mod_log_lock);
28892890
mutex_init(&fs_info->unused_bg_unpin_mutex);
28902891
mutex_init(&fs_info->reclaim_bgs_lock);

fs/btrfs/extent-tree.c

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3497,6 +3497,9 @@ struct find_free_extent_ctl {
34973497
/* Allocation is called for tree-log */
34983498
bool for_treelog;
34993499

3500+
/* Allocation is called for data relocation */
3501+
bool for_data_reloc;
3502+
35003503
/* RAID index, converted from flags */
35013504
int index;
35023505

@@ -3758,6 +3761,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
37583761
u64 avail;
37593762
u64 bytenr = block_group->start;
37603763
u64 log_bytenr;
3764+
u64 data_reloc_bytenr;
37613765
int ret = 0;
37623766
bool skip;
37633767

@@ -3775,6 +3779,19 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
37753779
if (skip)
37763780
return 1;
37773781

3782+
/*
3783+
* Do not allow non-relocation blocks in the dedicated relocation block
3784+
* group, and vice versa.
3785+
*/
3786+
spin_lock(&fs_info->relocation_bg_lock);
3787+
data_reloc_bytenr = fs_info->data_reloc_bg;
3788+
if (data_reloc_bytenr &&
3789+
((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
3790+
(!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
3791+
skip = true;
3792+
spin_unlock(&fs_info->relocation_bg_lock);
3793+
if (skip)
3794+
return 1;
37783795
/* Check RO and no space case before trying to activate it */
37793796
spin_lock(&block_group->lock);
37803797
if (block_group->ro ||
@@ -3790,10 +3807,14 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
37903807
spin_lock(&space_info->lock);
37913808
spin_lock(&block_group->lock);
37923809
spin_lock(&fs_info->treelog_bg_lock);
3810+
spin_lock(&fs_info->relocation_bg_lock);
37933811

37943812
ASSERT(!ffe_ctl->for_treelog ||
37953813
block_group->start == fs_info->treelog_bg ||
37963814
fs_info->treelog_bg == 0);
3815+
ASSERT(!ffe_ctl->for_data_reloc ||
3816+
block_group->start == fs_info->data_reloc_bg ||
3817+
fs_info->data_reloc_bg == 0);
37973818

37983819
if (block_group->ro) {
37993820
ret = 1;
@@ -3810,6 +3831,16 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
38103831
goto out;
38113832
}
38123833

3834+
/*
3835+
* Do not allow currently used block group to be the data relocation
3836+
* dedicated block group.
3837+
*/
3838+
if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
3839+
(block_group->used || block_group->reserved)) {
3840+
ret = 1;
3841+
goto out;
3842+
}
3843+
38133844
WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity);
38143845
avail = block_group->zone_capacity - block_group->alloc_offset;
38153846
if (avail < num_bytes) {
@@ -3828,6 +3859,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
38283859
if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
38293860
fs_info->treelog_bg = block_group->start;
38303861

3862+
if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
3863+
fs_info->data_reloc_bg = block_group->start;
3864+
38313865
ffe_ctl->found_offset = start + block_group->alloc_offset;
38323866
block_group->alloc_offset += num_bytes;
38333867
spin_lock(&ctl->tree_lock);
@@ -3844,6 +3878,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
38443878
out:
38453879
if (ret && ffe_ctl->for_treelog)
38463880
fs_info->treelog_bg = 0;
3881+
if (ret && ffe_ctl->for_data_reloc)
3882+
fs_info->data_reloc_bg = 0;
3883+
spin_unlock(&fs_info->relocation_bg_lock);
38473884
spin_unlock(&fs_info->treelog_bg_lock);
38483885
spin_unlock(&block_group->lock);
38493886
spin_unlock(&space_info->lock);
@@ -4112,6 +4149,12 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
41124149
ffe_ctl->hint_byte = fs_info->treelog_bg;
41134150
spin_unlock(&fs_info->treelog_bg_lock);
41144151
}
4152+
if (ffe_ctl->for_data_reloc) {
4153+
spin_lock(&fs_info->relocation_bg_lock);
4154+
if (fs_info->data_reloc_bg)
4155+
ffe_ctl->hint_byte = fs_info->data_reloc_bg;
4156+
spin_unlock(&fs_info->relocation_bg_lock);
4157+
}
41154158
return 0;
41164159
default:
41174160
BUG();
@@ -4245,6 +4288,8 @@ static noinline int find_free_extent(struct btrfs_root *root,
42454288
if (unlikely(block_group->ro)) {
42464289
if (ffe_ctl->for_treelog)
42474290
btrfs_clear_treelog_bg(block_group);
4291+
if (ffe_ctl->for_data_reloc)
4292+
btrfs_clear_data_reloc_bg(block_group);
42484293
continue;
42494294
}
42504295

@@ -4438,6 +4483,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
44384483
u64 flags;
44394484
int ret;
44404485
bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4486+
bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data);
44414487

44424488
flags = get_alloc_profile_by_root(root, is_data);
44434489
again:
@@ -4451,6 +4497,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
44514497
ffe_ctl.delalloc = delalloc;
44524498
ffe_ctl.hint_byte = hint_byte;
44534499
ffe_ctl.for_treelog = for_treelog;
4500+
ffe_ctl.for_data_reloc = for_data_reloc;
44544501

44554502
ret = find_free_extent(root, ins, &ffe_ctl);
44564503
if (!ret && !is_data) {
@@ -4470,8 +4517,8 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
44704517

44714518
sinfo = btrfs_find_space_info(fs_info, flags);
44724519
btrfs_err(fs_info,
4473-
"allocation failed flags %llu, wanted %llu tree-log %d",
4474-
flags, num_bytes, for_treelog);
4520+
"allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
4521+
flags, num_bytes, for_treelog, for_data_reloc);
44754522
if (sinfo)
44764523
btrfs_dump_space_info(fs_info, sinfo,
44774524
num_bytes, 1);

fs/btrfs/zoned.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1954,3 +1954,13 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
19541954
out:
19551955
btrfs_put_block_group(block_group);
19561956
}
1957+
1958+
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
1959+
{
1960+
struct btrfs_fs_info *fs_info = bg->fs_info;
1961+
1962+
spin_lock(&fs_info->relocation_bg_lock);
1963+
if (fs_info->data_reloc_bg == bg->start)
1964+
fs_info->data_reloc_bg = 0;
1965+
spin_unlock(&fs_info->relocation_bg_lock);
1966+
}

fs/btrfs/zoned.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
7575
int raid_index);
7676
void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
7777
u64 length);
78+
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
7879
#else /* CONFIG_BLK_DEV_ZONED */
7980
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
8081
struct blk_zone *zone)
@@ -229,6 +230,8 @@ static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
229230
static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
230231
u64 logical, u64 length) { }
231232

233+
static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
234+
232235
#endif
233236

234237
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)

0 commit comments

Comments
 (0)