Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F106047193
D18062.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
17 KB
Referenced Files
None
Subscribers
None
D18062.diff
View Options
Index: head/sys/geom/mirror/g_mirror.h
===================================================================
--- head/sys/geom/mirror/g_mirror.h
+++ head/sys/geom/mirror/g_mirror.h
@@ -148,6 +148,10 @@
u_int d_genid; /* Disk's generation ID. */
struct g_mirror_disk_sync d_sync;/* Sync information. */
LIST_ENTRY(g_mirror_disk) d_next;
+ u_int d_init_ndisks; /* Initial number of mirror components */
+ uint32_t d_init_slice; /* Initial slice size */
+ uint8_t d_init_balance;/* Initial balance */
+ uint64_t d_init_mediasize;/* Initial mediasize */
};
#define d_name d_consumer->provider->name
Index: head/sys/geom/mirror/g_mirror.c
===================================================================
--- head/sys/geom/mirror/g_mirror.c
+++ head/sys/geom/mirror/g_mirror.c
@@ -59,6 +59,11 @@
int g_mirror_debug = 0;
SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
"Debug level");
+bool g_launch_mirror_before_timeout = true;
+SYSCTL_BOOL(_kern_geom_mirror, OID_AUTO, launch_mirror_before_timeout,
+ CTLFLAG_RWTUN, &g_launch_mirror_before_timeout, 0,
+ "If false, force gmirror to wait out the full kern.geom.mirror.timeout "
+ "before launching mirrors");
static u_int g_mirror_timeout = 4;
SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
0, "Time to wait on all mirror components");
@@ -110,6 +115,8 @@
static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
+static int g_mirror_refresh_device(struct g_mirror_softc *sc,
+ const struct g_provider *pp, const struct g_mirror_metadata *md);
static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
struct bio *bp, off_t offset);
static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
@@ -472,6 +479,10 @@
disk->d_sync.ds_update_ts = time_uptime;
disk->d_genid = md->md_genid;
disk->d_sync.ds_syncid = md->md_syncid;
+ disk->d_init_ndisks = md->md_all;
+ disk->d_init_slice = md->md_slice;
+ disk->d_init_balance = md->md_balance;
+ disk->d_init_mediasize = md->md_mediasize;
if (errorp != NULL)
*errorp = 0;
return (disk);
@@ -2362,17 +2373,74 @@
case G_MIRROR_DEVICE_STATE_STARTING:
{
struct g_mirror_disk *pdisk, *tdisk;
- u_int dirty, ndisks, genid, syncid;
- bool broken;
+ const char *mismatch;
+ uintmax_t found, newest;
+ u_int dirty, ndisks;
+ /* Pre-flight checks */
+ LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
+ /*
+ * Confirm we already detected the newest genid.
+ */
+ KASSERT(sc->sc_genid >= disk->d_genid,
+ ("%s: found newer genid %u (sc:%p had %u).", __func__,
+ disk->d_genid, sc, sc->sc_genid));
+
+ /* Kick out any previously tasted stale components. */
+ if (disk->d_genid < sc->sc_genid) {
+ G_MIRROR_DEBUG(0, "Stale 'genid' field on %s "
+ "(device %s) (component=%u latest=%u), skipping.",
+ g_mirror_get_diskname(disk), sc->sc_name,
+ disk->d_genid, sc->sc_genid);
+ g_mirror_destroy_disk(disk);
+ sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
+ continue;
+ }
+
+ /*
+ * Confirm we already detected the newest syncid.
+ */
+ KASSERT(sc->sc_syncid >= disk->d_sync.ds_syncid,
+ ("%s: found newer syncid %u (sc:%p had %u).",
+ __func__, disk->d_sync.ds_syncid, sc,
+ sc->sc_syncid));
+
+#define DETECT_MISMATCH(field, name) \
+ if (mismatch == NULL && \
+ disk->d_init_ ## field != sc->sc_ ## field) { \
+ mismatch = name; \
+ found = (intmax_t)disk->d_init_ ## field; \
+ newest = (intmax_t)sc->sc_ ## field; \
+ }
+ mismatch = NULL;
+ DETECT_MISMATCH(ndisks, "md_all");
+ DETECT_MISMATCH(balance, "md_balance");
+ DETECT_MISMATCH(slice, "md_slice");
+ DETECT_MISMATCH(mediasize, "md_mediasize");
+#undef DETECT_MISMATCH
+ if (mismatch != NULL) {
+ G_MIRROR_DEBUG(0, "Found a mismatching '%s' "
+ "field on %s (device %s) (found=%ju "
+ "newest=%ju).", mismatch,
+ g_mirror_get_diskname(disk), sc->sc_name,
+ found, newest);
+ g_mirror_destroy_disk(disk);
+ sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
+ continue;
+ }
+ }
+
KASSERT(sc->sc_provider == NULL,
("Non-NULL provider in STARTING state (%s).", sc->sc_name));
/*
- * Are we ready? We are, if all disks are connected or
- * if we have any disks and 'force' is true.
+ * Are we ready? If the timeout (force is true) has expired, and
+ * any disks are present, then yes. If we're permitted to launch
+ * before the timeout has expired and the expected number of
+ * current-generation mirror disks have been tasted, then yes.
*/
ndisks = g_mirror_ndisks(sc, -1);
- if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
+ if ((force && ndisks > 0) ||
+ (g_launch_mirror_before_timeout && ndisks == sc->sc_ndisks)) {
;
} else if (ndisks == 0) {
/*
@@ -2420,42 +2488,6 @@
}
/*
- * Find the biggest genid.
- */
- genid = 0;
- LIST_FOREACH(disk, &sc->sc_disks, d_next) {
- if (disk->d_genid > genid)
- genid = disk->d_genid;
- }
- sc->sc_genid = genid;
- /*
- * Remove all disks without the biggest genid.
- */
- broken = false;
- LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
- if (disk->d_genid < genid) {
- G_MIRROR_DEBUG(0,
- "Component %s (device %s) broken, skipping.",
- g_mirror_get_diskname(disk), sc->sc_name);
- g_mirror_destroy_disk(disk);
- /*
- * Bump the syncid in case we discover a healthy
- * replacement disk after starting the mirror.
- */
- broken = true;
- }
- }
-
- /*
- * Find the biggest syncid.
- */
- syncid = 0;
- LIST_FOREACH(disk, &sc->sc_disks, d_next) {
- if (disk->d_sync.ds_syncid > syncid)
- syncid = disk->d_sync.ds_syncid;
- }
-
- /*
* Here we need to look for dirty disks and if all disks
* with the biggest syncid are dirty, we have to choose
* one with the biggest priority and rebuild the rest.
@@ -2468,7 +2500,7 @@
dirty = ndisks = 0;
pdisk = NULL;
LIST_FOREACH(disk, &sc->sc_disks, d_next) {
- if (disk->d_sync.ds_syncid != syncid)
+ if (disk->d_sync.ds_syncid != sc->sc_syncid)
continue;
if ((disk->d_flags &
G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
@@ -2495,7 +2527,7 @@
"master disk for synchronization.",
g_mirror_get_diskname(pdisk), sc->sc_name);
LIST_FOREACH(disk, &sc->sc_disks, d_next) {
- if (disk->d_sync.ds_syncid != syncid)
+ if (disk->d_sync.ds_syncid != sc->sc_syncid)
continue;
if ((disk->d_flags &
G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
@@ -2516,7 +2548,7 @@
* We have some non-dirty disks.
*/
LIST_FOREACH(disk, &sc->sc_disks, d_next) {
- if (disk->d_sync.ds_syncid != syncid)
+ if (disk->d_sync.ds_syncid != sc->sc_syncid)
continue;
if ((disk->d_flags &
G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
@@ -2532,8 +2564,7 @@
/* Reset hint. */
sc->sc_hint = NULL;
- sc->sc_syncid = syncid;
- if (force || broken) {
+ if (force) {
/* Remember to bump syncid on first write. */
sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
}
@@ -2870,37 +2901,23 @@
struct g_mirror_metadata *md)
{
+ G_MIRROR_DEBUG(2, "%s: md_did 0x%u disk %s device %s md_all 0x%x "
+ "sc_ndisks 0x%x md_slice 0x%x sc_slice 0x%x md_balance 0x%x "
+ "sc_balance 0x%x sc_mediasize 0x%lx pp_mediasize 0x%lx "
+ "md_sectorsize 0x%x sc_sectorsize 0x%x md_mflags 0x%lx "
+ "md_dflags 0x%lx md_syncid 0x%x md_genid 0x%x md_priority 0x%x "
+ "sc_state 0x%x.",
+ __func__, md->md_did, pp->name, sc->sc_name, md->md_all,
+ sc->sc_ndisks, md->md_slice, sc->sc_slice, md->md_balance,
+ sc->sc_balance, sc->sc_mediasize, pp->mediasize, md->md_sectorsize,
+ sc->sc_sectorsize, md->md_mflags, md->md_dflags, md->md_syncid,
+ md->md_genid, md->md_priority, sc->sc_state);
+
if (g_mirror_id2disk(sc, md->md_did) != NULL) {
G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
pp->name, md->md_did);
return (EEXIST);
}
- if (md->md_all != sc->sc_ndisks) {
- G_MIRROR_DEBUG(1,
- "Invalid '%s' field on disk %s (device %s), skipping.",
- "md_all", pp->name, sc->sc_name);
- return (EINVAL);
- }
- if (md->md_slice != sc->sc_slice) {
- G_MIRROR_DEBUG(1,
- "Invalid '%s' field on disk %s (device %s), skipping.",
- "md_slice", pp->name, sc->sc_name);
- return (EINVAL);
- }
- if (md->md_balance != sc->sc_balance) {
- G_MIRROR_DEBUG(1,
- "Invalid '%s' field on disk %s (device %s), skipping.",
- "md_balance", pp->name, sc->sc_name);
- return (EINVAL);
- }
-#if 0
- if (md->md_mediasize != sc->sc_mediasize) {
- G_MIRROR_DEBUG(1,
- "Invalid '%s' field on disk %s (device %s), skipping.",
- "md_mediasize", pp->name, sc->sc_name);
- return (EINVAL);
- }
-#endif
if (sc->sc_mediasize > pp->mediasize) {
G_MIRROR_DEBUG(1,
"Invalid size of disk %s (device %s), skipping.", pp->name,
@@ -2947,12 +2964,21 @@
error = g_mirror_check_metadata(sc, pp, md);
if (error != 0)
return (error);
- if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
- md->md_genid < sc->sc_genid) {
+
+ if (md->md_genid < sc->sc_genid) {
G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
pp->name, sc->sc_name);
return (EINVAL);
}
+
+ /*
+ * If the component disk we're tasting has newer metadata than the
+ * STARTING gmirror device, refresh the device from the component.
+ */
+ error = g_mirror_refresh_device(sc, pp, md);
+ if (error != 0)
+ return (error);
+
disk = g_mirror_init_disk(sc, pp, md, &error);
if (disk == NULL)
return (error);
@@ -3029,6 +3055,21 @@
return (error);
}
+static void
+g_mirror_reinit_from_metadata(struct g_mirror_softc *sc,
+ const struct g_mirror_metadata *md)
+{
+
+ sc->sc_genid = md->md_genid;
+ sc->sc_syncid = md->md_syncid;
+
+ sc->sc_slice = md->md_slice;
+ sc->sc_balance = md->md_balance;
+ sc->sc_mediasize = md->md_mediasize;
+ sc->sc_ndisks = md->md_all;
+ sc->sc_flags = md->md_mflags;
+}
+
struct g_geom *
g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
u_int type)
@@ -3056,12 +3097,8 @@
sc->sc_type = type;
sc->sc_id = md->md_mid;
- sc->sc_slice = md->md_slice;
- sc->sc_balance = md->md_balance;
- sc->sc_mediasize = md->md_mediasize;
+ g_mirror_reinit_from_metadata(sc, md);
sc->sc_sectorsize = md->md_sectorsize;
- sc->sc_ndisks = md->md_all;
- sc->sc_flags = md->md_mflags;
sc->sc_bump_id = 0;
sc->sc_idle = 1;
sc->sc_last_write = time_uptime;
@@ -3482,6 +3519,52 @@
if (g_mirror_post_sync != NULL)
EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
+}
+
+/*
+ * Refresh the mirror device's metadata when gmirror encounters a newer
+ * generation as the individual components are being added to the mirror set.
+ */
+static int
+g_mirror_refresh_device(struct g_mirror_softc *sc, const struct g_provider *pp,
+ const struct g_mirror_metadata *md)
+{
+
+ g_topology_assert_not();
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
+
+ KASSERT(sc->sc_genid <= md->md_genid,
+ ("%s: attempted to refresh from stale component %s (device %s) "
+ "(%u < %u).", __func__, pp->name, sc->sc_name, md->md_genid,
+ sc->sc_genid));
+
+ if (sc->sc_genid > md->md_genid || (sc->sc_genid == md->md_genid &&
+ sc->sc_syncid >= md->md_syncid))
+ return (0);
+
+ G_MIRROR_DEBUG(0, "Found newer version for device %s (genid: curr=%u "
+ "new=%u; syncid: curr=%u new=%u; ndisks: curr=%u new=%u; "
+ "provider=%s).", sc->sc_name, sc->sc_genid, md->md_genid,
+ sc->sc_syncid, md->md_syncid, sc->sc_ndisks, md->md_all, pp->name);
+
+ if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) {
+ /* Probable data corruption detected */
+ G_MIRROR_DEBUG(0, "Cannot refresh metadata in %s state "
+ "(device=%s genid=%u). A stale mirror device was launched.",
+ g_mirror_device_state2str(sc->sc_state), sc->sc_name,
+ sc->sc_genid);
+ return (EINVAL);
+ }
+
+ /* Update softc */
+ g_mirror_reinit_from_metadata(sc, md);
+
+ G_MIRROR_DEBUG(1, "Refresh device %s (id=%u, state=%s) from disk %s "
+ "(genid=%u syncid=%u md_all=%u).", sc->sc_name, md->md_mid,
+ g_mirror_device_state2str(sc->sc_state), pp->name, md->md_genid,
+ md->md_syncid, (unsigned)md->md_all);
+
+ return (0);
}
DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
Index: head/tests/sys/geom/class/mirror/Makefile
===================================================================
--- head/tests/sys/geom/class/mirror/Makefile
+++ head/tests/sys/geom/class/mirror/Makefile
@@ -18,6 +18,7 @@
TAP_TESTS_SH+= 12_test
TAP_TESTS_SH+= 13_test
+ATF_TESTS_SH+= component_selection
ATF_TESTS_SH+= sync_error
${PACKAGE}FILES+= conf.sh
Index: head/tests/sys/geom/class/mirror/component_selection.sh
===================================================================
--- head/tests/sys/geom/class/mirror/component_selection.sh
+++ head/tests/sys/geom/class/mirror/component_selection.sh
@@ -0,0 +1,133 @@
+# $FreeBSD$
+
+atf_test_case run_latest_genid cleanup
+run_latest_genid_head()
+{
+ atf_set "descr" \
+ "Ensure that we properly select components (latest genid) during STARTING."
+ atf_set "require.user" "root"
+}
+run_latest_genid_body()
+{
+ . $(atf_get_srcdir)/conf.sh
+
+ f1=$(mktemp ${base}.XXXXXX)
+ f2=$(mktemp ${base}.XXXXXX)
+ f3=$(mktemp ${base}.XXXXXX)
+ rnd1=$(mktemp ${base}.XXXXXX)
+ rnd2=$(mktemp ${base}.XXXXXX)
+
+ atf_check truncate -s 2M $f1
+ atf_check truncate -s 2M $f2
+ atf_check truncate -s 2M $f3
+ dd if=/dev/urandom bs=512 count=1 of="$rnd1"
+ dd if=/dev/urandom bs=512 count=1 of="$rnd2"
+
+ md1=$(attach_md -t vnode -f ${f1})
+ md2=$(attach_md -t vnode -f ${f2})
+ md3=$(attach_md -t vnode -f ${f3})
+
+ # Use a gnop for md1 just for consistency; it's not used for anything.
+ atf_check gnop create $md1
+ atf_check gnop create $md2
+ atf_check gnop create $md3
+ # Hardcode component names so that the non-.nop device isn't tasted
+ # instead.
+ atf_check gmirror label -h $name ${md1}.nop
+ devwait
+
+ atf_check gmirror insert -h $name ${md2}.nop
+ atf_check gmirror insert -h $name ${md3}.nop
+ syncwait
+
+ # Fail mirror 3, writing known contents to mirror 1+2 block 1
+ atf_check -s exit:0 -e empty -o empty \
+ gnop configure -w 100 ${md3}.nop
+ atf_check -s exit:0 dd if="$rnd1" bs=512 count=1 oseek=1 conv=notrunc \
+ of=/dev/mirror/$name status=none
+
+ disconnectwait nop "${md3}.nop"
+
+ # Should have two mirrors remaining after md3 was evicted
+ atf_check [ $(gmirror status -s $name | wc -l) -eq 2 ]
+ atf_check -s exit:0 -o match:"DEGRADED ${md1}.nop \(ACTIVE\)" \
+ gmirror status -s $name
+ atf_check -s exit:0 -o match:"DEGRADED ${md2}.nop \(ACTIVE\)" \
+ gmirror status -s $name
+
+ # Repeat:
+ # Fail mirror 2, writing known contents to mirror 1 block 2
+ atf_check -s exit:0 -e empty -o empty \
+ gnop configure -w 100 ${md2}.nop
+ atf_check -s exit:0 dd if="$rnd2" bs=512 count=2 oseek=1 conv=notrunc \
+ of=/dev/mirror/$name status=none
+
+ disconnectwait nop "${md2}.nop"
+
+ # Should have one mirror remaining after md2 was evicted
+ atf_check [ $(gmirror status -s $name | wc -l) -eq 1 ]
+ atf_check -s exit:0 -o match:"DEGRADED ${md1}.nop \(ACTIVE\)" \
+ gmirror status -s $name
+
+ # Stop the mirror and remove the pieces so gmirror can't see them.
+ atf_check gmirror stop $name
+ atf_check gnop destroy ${md1}.nop
+ atf_check gnop destroy ${md2}.nop
+ atf_check gnop destroy ${md3}.nop
+
+ # Rebuild; spin up "disk" with lowest genid
+ atf_check gnop create $md3
+ md3gen=$(gmirror dump /dev/${md3}.nop | grep genid | cut -d: -f2)
+ # Assert gmirror is referencing this component for now:
+ atf_check [ $(consumerrefs nop ${md3}.nop) = "r1w1e1" ]
+
+ # Adding newer genid should kick out old component
+ atf_check gnop create $md2
+ md2gen=$(gmirror dump /dev/${md2}.nop | grep genid | cut -d: -f2)
+ atf_check [ $md2gen -gt $md3gen ]
+
+ disconnectwait nop "${md3}.nop"
+
+ # Can't test this because 'status' doesn't exist until RUNNING:
+ #atf_check [ $(gmirror status -s $name | wc -l) -eq 1 ]
+ # But as a substitute, assert gmirror has dropped reference to staler
+ # component in favor of newer component:
+ atf_check [ $(consumerrefs nop ${md2}.nop) = "r1w1e1" ]
+
+ # ditto
+ atf_check gnop create $md1
+ md1gen=$(gmirror dump /dev/${md1}.nop | grep genid | cut -d: -f2)
+ atf_check [ $md1gen -gt $md2gen ]
+
+ disconnectwait nop "${md2}.nop"
+
+ # Assert gmirror has dropped reference to stale component in favor of
+ # newer component:
+ atf_check [ $(consumerrefs nop ${md1}.nop) = "r1w1e1" ]
+
+ # gmirror won't start the mirror automatically with only one component
+ # ($md0) of configured three, so this waits out the
+ # kern.geom.mirror.timeout:
+ devwait
+
+ atf_check [ $(gmirror status -s $name | wc -l) -eq 1 ]
+ atf_check -s exit:0 -o match:"DEGRADED ${md1}.nop \(ACTIVE\)" \
+ gmirror status -s $name
+}
+run_latest_genid_cleanup()
+{
+ . $(atf_get_srcdir)/conf.sh
+
+ if [ -f "$TEST_MDS_FILE" ]; then
+ while read test_md; do
+ echo "# Removing test gnop: ${test_md}.nop"
+ gnop destroy -f "${test_md}.nop" 2>/dev/null || :
+ done < "$TEST_MDS_FILE"
+ fi
+ gmirror_test_cleanup
+}
+
+atf_init_test_cases()
+{
+ atf_add_test_case run_latest_genid
+}
Index: head/tests/sys/geom/class/mirror/conf.sh
===================================================================
--- head/tests/sys/geom/class/mirror/conf.sh
+++ head/tests/sys/geom/class/mirror/conf.sh
@@ -19,4 +19,35 @@
done
}
+consumerrefs()
+{
+ gclass=$1
+ geom=$2
+
+ if [ $# -ne 2 ]; then
+ echo "Bad usage consumerrefs" >&2
+ exit 1
+ fi
+
+ geom "${gclass}" list "${geom}" | \
+ grep -A5 ^Consumers | \
+ grep Mode | \
+ cut -d: -f2
+}
+
+disconnectwait()
+{
+ gclass=$1
+ geom=$2
+
+ if [ $# -ne 2 ]; then
+ echo "Bad usage disconnectwait" >&2
+ exit 1
+ fi
+
+ while [ $(consumerrefs "$gclass" "$geom") != r0w0e0 ]; do
+ sleep 0.05
+ done
+}
+
. `dirname $0`/../geom_subr.sh
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 25, 11:19 AM (10 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15596469
Default Alt Text
D18062.diff (17 KB)
Attached To
Mode
D18062: gmirror: Evaluate mirror components against newest metadata copy
Attached
Detach File
Event Timeline
Log In to Comment