Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 74d4260

Browse files
shartsebehlendorf
authored andcommitted
zpool reopen should detect expanded devices
Update bdev_capacity to have wholedisk vdevs query the size of the underlying block device (correcting for the size of the efi parition and partition alignment) and therefore detect expanded space. Correct vdev_get_stats_ex so that the expandsize is aligned to metaslab size and new space is only reported if it is large enough for a new metaslab. Reviewed by: Don Brady <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed-by: Matthew Ahrens <[email protected]> Reviewed by: John Wren Kennedy <[email protected]> Signed-off-by: sara hartse <[email protected]> External-issue: LX-165 Closes openzfs#7546 Issue openzfs#7582
1 parent d1f06ec commit 74d4260

File tree

6 files changed

+107
-42
lines changed

6 files changed

+107
-42
lines changed

include/sys/vdev_disk.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,23 @@
2323
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
2424
* Written by Brian Behlendorf <[email protected]>.
2525
* LLNL-CODE-403049.
26+
* Copyright (c) 2018 by Delphix. All rights reserved.
2627
*/
2728

2829
#ifndef _SYS_VDEV_DISK_H
2930
#define _SYS_VDEV_DISK_H
3031

32+
/*
33+
* Don't start the slice at the default block of 34; many storage
34+
* devices will use a stripe width of 128k, other vendors prefer a 1m
35+
* alignment. It is best to play it safe and ensure a 1m alignment
36+
* given 512B blocks. When the block size is larger by a power of 2
37+
* we will still be 1m aligned. Some devices are sensitive to the
38+
* partition ending alignment as well.
39+
*/
40+
#define NEW_START_BLOCK 2048
41+
#define PARTITION_END_ALIGNMENT 2048
42+
3143
#ifdef _KERNEL
3244
#include <sys/vdev.h>
3345

lib/libefi/rdwr_efi.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
/*
2323
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
2424
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
25+
* Copyright (c) 2018 by Delphix. All rights reserved.
2526
*/
2627

2728
#include <stdio.h>
@@ -1154,7 +1155,7 @@ efi_use_whole_disk(int fd)
11541155

11551156
/*
11561157
* Find the last physically non-zero partition.
1157-
* This is the reserved partition.
1158+
* This should be the reserved partition.
11581159
*/
11591160
for (i = 0; i < efi_label->efi_nparts; i ++) {
11601161
if (resv_start < efi_label->efi_parts[i].p_start) {
@@ -1163,6 +1164,23 @@ efi_use_whole_disk(int fd)
11631164
}
11641165
}
11651166

1167+
/*
1168+
* Verify that we've found the reserved partition by checking
1169+
* that it looks the way it did when we created it in zpool_label_disk.
1170+
* If we've found the incorrect partition, then we know that this
1171+
* device was reformatted and no longer is soley used by ZFS.
1172+
*/
1173+
if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) ||
1174+
(efi_label->efi_parts[resv_index].p_tag != V_RESERVED) ||
1175+
(resv_index != 8)) {
1176+
if (efi_debug) {
1177+
(void) fprintf(stderr,
1178+
"efi_use_whole_disk: wholedisk not available\n");
1179+
}
1180+
efi_free(efi_label);
1181+
return (VT_ENOSPC);
1182+
}
1183+
11661184
/*
11671185
* Find the last physically non-zero partition before that.
11681186
* This is the data partition.

lib/libzfs/libzfs_pool.c

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
/*
2323
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
2424
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25-
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
25+
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
2626
* Copyright 2016 Igor Kozhukhov <[email protected]>
2727
* Copyright (c) 2017 Datto Inc.
2828
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
@@ -44,6 +44,7 @@
4444
#include <sys/systeminfo.h>
4545
#include <sys/vtoc.h>
4646
#include <sys/zfs_ioctl.h>
47+
#include <sys/vdev_disk.h>
4748
#include <dlfcn.h>
4849

4950
#include "zfs_namecheck.h"
@@ -925,17 +926,6 @@ zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
925926
return (0);
926927
}
927928

928-
/*
929-
* Don't start the slice at the default block of 34; many storage
930-
* devices will use a stripe width of 128k, other vendors prefer a 1m
931-
* alignment. It is best to play it safe and ensure a 1m alignment
932-
* given 512B blocks. When the block size is larger by a power of 2
933-
* we will still be 1m aligned. Some devices are sensitive to the
934-
* partition ending alignment as well.
935-
*/
936-
#define NEW_START_BLOCK 2048
937-
#define PARTITION_END_ALIGNMENT 2048
938-
939929
/*
940930
* Validate the given pool name, optionally putting an extended error message in
941931
* 'buf'.

module/zfs/vdev.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
/*
2323
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24-
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
24+
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
2525
* Copyright 2017 Nexenta Systems, Inc.
2626
* Copyright (c) 2014 Integros [integros.com]
2727
* Copyright 2016 Toomas Soome <[email protected]>
@@ -3493,7 +3493,6 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
34933493
vd->vdev_max_asize - vd->vdev_asize,
34943494
1ULL << tvd->vdev_ms_shift);
34953495
}
3496-
vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
34973496
if (vd->vdev_aux == NULL && vd == vd->vdev_top &&
34983497
vdev_is_concrete(vd)) {
34993498
vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation;

module/zfs/vdev_disk.c

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
2424
* Rewritten for Linux by Brian Behlendorf <[email protected]>.
2525
* LLNL-CODE-403049.
26-
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
26+
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
2727
*/
2828

2929
#include <sys/zfs_context.h>
@@ -34,10 +34,14 @@
3434
#include <sys/fs/zfs.h>
3535
#include <sys/zio.h>
3636
#include <linux/mod_compat.h>
37+
#include <linux/msdos_fs.h>
3738

3839
char *zfs_vdev_scheduler = VDEV_SCHEDULER;
3940
static void *zfs_vdev_holder = VDEV_HOLDER;
4041

42+
/* size of the "reserved" partition, in blocks */
43+
#define EFI_MIN_RESV_SIZE (16 * 1024)
44+
4145
/*
4246
* Virtual device vector for disks.
4347
*/
@@ -81,17 +85,39 @@ vdev_bdev_mode(int smode)
8185
}
8286
#endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
8387

88+
/* The capacity (in bytes) of a bdev that is available to be used by a vdev */
8489
static uint64_t
85-
bdev_capacity(struct block_device *bdev)
90+
bdev_capacity(struct block_device *bdev, boolean_t wholedisk)
8691
{
8792
struct hd_struct *part = bdev->bd_part;
93+
uint64_t sectors = get_capacity(bdev->bd_disk);
94+
/* If there are no paritions, return the entire device capacity */
95+
if (part == NULL)
96+
return (sectors << SECTOR_BITS);
8897

89-
/* The partition capacity referenced by the block device */
90-
if (part)
91-
return (part->nr_sects << 9);
92-
93-
/* Otherwise assume the full device capacity */
94-
return (get_capacity(bdev->bd_disk) << 9);
98+
/*
99+
* If there are partitions, decide if we are using a `wholedisk`
100+
* layout (composed of part1 and part9) or just a single partition.
101+
*/
102+
if (wholedisk) {
103+
/* Verify the expected device layout */
104+
ASSERT3P(bdev, !=, bdev->bd_contains);
105+
/*
106+
* Sectors used by the EFI partition (part9) as well as
107+
* partion alignment.
108+
*/
109+
uint64_t used = EFI_MIN_RESV_SIZE + NEW_START_BLOCK +
110+
PARTITION_END_ALIGNMENT;
111+
112+
/* Space available to the vdev, i.e. the size of part1 */
113+
if (sectors <= used)
114+
return (0);
115+
uint64_t available = sectors - used;
116+
return (available << SECTOR_BITS);
117+
} else {
118+
/* The partition capacity referenced by the block device */
119+
return (part->nr_sects << SECTOR_BITS);
120+
}
95121
}
96122

97123
static void
@@ -330,9 +356,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
330356
v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev));
331357

332358
/* Physical volume size in bytes */
333-
*psize = bdev_capacity(vd->vd_bdev);
334-
335-
/* TODO: report possible expansion size */
359+
*psize = bdev_capacity(vd->vd_bdev, v->vdev_wholedisk);
336360
*max_psize = *psize;
337361

338362
/* Based on the minimum sector size set the block size */

tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#
2727

2828
#
29-
# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
29+
# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
3030
# Copyright (c) 2017 Lawrence Livermore National Security, LLC.
3131
#
3232

@@ -43,8 +43,9 @@
4343
# 1) Create 3 files
4444
# 2) Create a pool backed by the files
4545
# 3) Expand the files' size with truncate
46-
# 4) Use zpool online -e to online the vdevs
47-
# 5) Check that the pool size was expanded
46+
# 4) Use zpool reopen to check the expandsize
47+
# 5) Use zpool online -e to online the vdevs
48+
# 6) Check that the pool size was expanded
4849
#
4950

5051
verify_runnable "global"
@@ -64,8 +65,8 @@ log_onexit cleanup
6465

6566
log_assert "zpool can expand after zpool online -e zvol vdevs on LUN expansion"
6667

67-
6868
for type in " " mirror raidz raidz2; do
69+
# Initialize the file devices and the pool
6970
for i in 1 2 3; do
7071
log_must truncate -s $org_size ${TEMPFILE}.$i
7172
done
@@ -80,13 +81,35 @@ for type in " " mirror raidz raidz2; do
8081
"$autoexp"
8182
fi
8283
typeset prev_size=$(get_pool_prop size $TESTPOOL1)
83-
typeset zfs_prev_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
84-
awk '{print $3}')
84+
typeset zfs_prev_size=$(get_prop avail $TESTPOOL1)
8585

86+
# Increase the size of the file devices
8687
for i in 1 2 3; do
8788
log_must truncate -s $exp_size ${TEMPFILE}.$i
8889
done
8990

91+
# Reopen the pool and check that the `expandsize` property is set
92+
log_must zpool reopen $TESTPOOL1
93+
typeset zpool_expandsize=$(get_pool_prop expandsize $TESTPOOL1)
94+
95+
if [[ $type == "mirror" ]]; then
96+
typeset expected_zpool_expandsize=$(($exp_size-$org_size))
97+
else
98+
typeset expected_zpool_expandsize=$((3*($exp_size-$org_size)))
99+
fi
100+
101+
if [[ "$zpool_expandsize" = "-" ]]; then
102+
log_fail "pool $TESTPOOL1 did not detect any " \
103+
"expandsize after reopen"
104+
fi
105+
106+
if [[ $zpool_expandsize -ne $expected_zpool_expandsize ]]; then
107+
log_fail "pool $TESTPOOL1 did not detect correct " \
108+
"expandsize after reopen: found $zpool_expandsize," \
109+
"expected $expected_zpool_expandsize"
110+
fi
111+
112+
# Online the devices to add the new space to the pool
90113
for i in 1 2 3; do
91114
log_must zpool online -e $TESTPOOL1 ${TEMPFILE}.$i
92115
done
@@ -96,8 +119,7 @@ for type in " " mirror raidz raidz2; do
96119
sync
97120

98121
typeset expand_size=$(get_pool_prop size $TESTPOOL1)
99-
typeset zfs_expand_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
100-
awk '{print $3}')
122+
typeset zfs_expand_size=$(get_prop avail $TESTPOOL1)
101123
log_note "$TESTPOOL1 $type has previous size: $prev_size and " \
102124
"expanded size: $expand_size"
103125

@@ -112,8 +134,8 @@ for type in " " mirror raidz raidz2; do
112134
grep "(+${expansion_size}" | wc -l)
113135

114136
if [[ $size_addition -ne $i ]]; then
115-
log_fail "pool $TESTPOOL1 is not autoexpand " \
116-
"after LUN expansion"
137+
log_fail "pool $TESTPOOL1 did not expand " \
138+
"after LUN expansion and zpool online -e"
117139
fi
118140
elif [[ $type == "mirror" ]]; then
119141
typeset expansion_size=$(($exp_size-$org_size))
@@ -123,8 +145,8 @@ for type in " " mirror raidz raidz2; do
123145
grep "(+${expansion_size})" >/dev/null 2>&1
124146

125147
if [[ $? -ne 0 ]]; then
126-
log_fail "pool $TESTPOOL1 is not autoexpand " \
127-
"after LUN expansion"
148+
log_fail "pool $TESTPOOL1 did not expand " \
149+
"after LUN expansion and zpool online -e"
128150
fi
129151
else
130152
typeset expansion_size=$((3*($exp_size-$org_size)))
@@ -134,13 +156,13 @@ for type in " " mirror raidz raidz2; do
134156
grep "(+${expansion_size})" >/dev/null 2>&1
135157

136158
if [[ $? -ne 0 ]] ; then
137-
log_fail "pool $TESTPOOL1 is not autoexpand " \
138-
"after LUN expansion"
159+
log_fail "pool $TESTPOOL1 did not expand " \
160+
"after LUN expansion and zpool online -e"
139161
fi
140162
fi
141163
else
142-
log_fail "pool $TESTPOOL1 is not autoexpanded after LUN " \
143-
"expansion"
164+
log_fail "pool $TESTPOOL1 did not expand after LUN expansion " \
165+
"and zpool online -e"
144166
fi
145167
log_must zpool destroy $TESTPOOL1
146168
done

0 commit comments

Comments
 (0)