Page MenuHomeFreeBSD

D18062.id51681.diff
No OneTemporary

D18062.id51681.diff

Index: sys/geom/mirror/g_mirror.h
===================================================================
--- sys/geom/mirror/g_mirror.h
+++ sys/geom/mirror/g_mirror.h
@@ -148,6 +148,10 @@
u_int d_genid; /* Disk's generation ID. */
struct g_mirror_disk_sync d_sync;/* Sync information. */
LIST_ENTRY(g_mirror_disk) d_next;
+ u_int d_init_ndisks; /* Initial number of mirror components */
+ uint32_t d_init_slice; /* Initial slice size */
+ uint8_t d_init_balance;/* Initial balance */
+ uint64_t d_init_mediasize;/* Initial mediasize */
};
#define d_name d_consumer->provider->name
Index: sys/geom/mirror/g_mirror.c
===================================================================
--- sys/geom/mirror/g_mirror.c
+++ sys/geom/mirror/g_mirror.c
@@ -59,6 +59,11 @@
int g_mirror_debug = 0;
SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
"Debug level");
+bool g_launch_mirror_before_timeout = true;
+SYSCTL_BOOL(_kern_geom_mirror, OID_AUTO, launch_mirror_before_timeout,
+ CTLFLAG_RWTUN, &g_launch_mirror_before_timeout, 0,
+ "If false, force gmirror to wait out the full kern.geom.mirror.timeout "
+ "before launching mirrors");
static u_int g_mirror_timeout = 4;
SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
0, "Time to wait on all mirror components");
@@ -110,6 +115,8 @@
static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
+static int g_mirror_refresh_device(struct g_mirror_softc *sc,
+ const struct g_provider *pp, const struct g_mirror_metadata *md);
static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
struct bio *bp, off_t offset);
static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
@@ -472,6 +479,10 @@
disk->d_sync.ds_update_ts = time_uptime;
disk->d_genid = md->md_genid;
disk->d_sync.ds_syncid = md->md_syncid;
+ disk->d_init_ndisks = md->md_all;
+ disk->d_init_slice = md->md_slice;
+ disk->d_init_balance = md->md_balance;
+ disk->d_init_mediasize = md->md_mediasize;
if (errorp != NULL)
*errorp = 0;
return (disk);
@@ -2362,17 +2373,74 @@
case G_MIRROR_DEVICE_STATE_STARTING:
{
struct g_mirror_disk *pdisk, *tdisk;
- u_int dirty, ndisks, genid, syncid;
- bool broken;
+ const char *mismatch;
+ uintmax_t found, newest;
+ u_int dirty, ndisks;
+
+ /* Pre-flight checks */
+ LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
+ /*
+ * Confirm we already detected the newest genid.
+ */
+ KASSERT(sc->sc_genid >= disk->d_genid,
+ ("%s: found newer genid %u (sc:%p had %u).", __func__,
+ disk->d_genid, sc, sc->sc_genid));
+
+ /* Kick out any previously tasted stale components. */
+ if (disk->d_genid < sc->sc_genid) {
+ G_MIRROR_DEBUG(0, "Stale 'genid' field on %s "
+ "(device %s) (component=%u latest=%u), skipping.",
+ g_mirror_get_diskname(disk), sc->sc_name,
+ disk->d_genid, sc->sc_genid);
+ g_mirror_destroy_disk(disk);
+ sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
+ continue;
+ }
+
+ /*
+ * Confirm we already detected the newest syncid.
+ */
+ KASSERT(sc->sc_syncid >= disk->d_sync.ds_syncid,
+ ("%s: found newer syncid %u (sc:%p had %u).",
+ __func__, disk->d_sync.ds_syncid, sc,
+ sc->sc_syncid));
+
+#define DETECT_MISMATCH(field, name) \
+ if (mismatch == NULL && \
+ disk->d_init_ ## field != sc->sc_ ## field) { \
+ mismatch = name; \
+ found = (intmax_t)disk->d_init_ ## field; \
+ newest = (intmax_t)sc->sc_ ## field; \
+ }
+ mismatch = NULL;
+ DETECT_MISMATCH(ndisks, "md_all");
+ DETECT_MISMATCH(balance, "md_balance");
+ DETECT_MISMATCH(slice, "md_slice");
+ DETECT_MISMATCH(mediasize, "md_mediasize");
+#undef DETECT_MISMATCH
+ if (mismatch != NULL) {
+ G_MIRROR_DEBUG(0, "Found a mismatching '%s' "
+ "field on %s (device %s) (found=%ju "
+ "newest=%ju).", mismatch,
+ g_mirror_get_diskname(disk), sc->sc_name,
+ found, newest);
+ g_mirror_destroy_disk(disk);
+ sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
+ continue;
+ }
+ }
KASSERT(sc->sc_provider == NULL,
("Non-NULL provider in STARTING state (%s).", sc->sc_name));
/*
- * Are we ready? We are, if all disks are connected or
- * if we have any disks and 'force' is true.
+ * Are we ready? If the timeout (force is true) has expired, and
+ * any disks are present, then yes. If we're permitted to launch
+ * before the timeout has expired and the expected number of
+ * current-generation mirror disks have been tasted, then yes.
*/
ndisks = g_mirror_ndisks(sc, -1);
- if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
+ if ((force && ndisks > 0) ||
+ (g_launch_mirror_before_timeout && ndisks == sc->sc_ndisks)) {
;
} else if (ndisks == 0) {
/*
@@ -2419,42 +2487,6 @@
callout_drain(&sc->sc_callout);
}
- /*
- * Find the biggest genid.
- */
- genid = 0;
- LIST_FOREACH(disk, &sc->sc_disks, d_next) {
- if (disk->d_genid > genid)
- genid = disk->d_genid;
- }
- sc->sc_genid = genid;
- /*
- * Remove all disks without the biggest genid.
- */
- broken = false;
- LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
- if (disk->d_genid < genid) {
- G_MIRROR_DEBUG(0,
- "Component %s (device %s) broken, skipping.",
- g_mirror_get_diskname(disk), sc->sc_name);
- g_mirror_destroy_disk(disk);
- /*
- * Bump the syncid in case we discover a healthy
- * replacement disk after starting the mirror.
- */
- broken = true;
- }
- }
-
- /*
- * Find the biggest syncid.
- */
- syncid = 0;
- LIST_FOREACH(disk, &sc->sc_disks, d_next) {
- if (disk->d_sync.ds_syncid > syncid)
- syncid = disk->d_sync.ds_syncid;
- }
-
/*
* Here we need to look for dirty disks and if all disks
* with the biggest syncid are dirty, we have to choose
@@ -2468,7 +2500,7 @@
dirty = ndisks = 0;
pdisk = NULL;
LIST_FOREACH(disk, &sc->sc_disks, d_next) {
- if (disk->d_sync.ds_syncid != syncid)
+ if (disk->d_sync.ds_syncid != sc->sc_syncid)
continue;
if ((disk->d_flags &
G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
@@ -2495,7 +2527,7 @@
"master disk for synchronization.",
g_mirror_get_diskname(pdisk), sc->sc_name);
LIST_FOREACH(disk, &sc->sc_disks, d_next) {
- if (disk->d_sync.ds_syncid != syncid)
+ if (disk->d_sync.ds_syncid != sc->sc_syncid)
continue;
if ((disk->d_flags &
G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
@@ -2516,7 +2548,7 @@
* We have some non-dirty disks.
*/
LIST_FOREACH(disk, &sc->sc_disks, d_next) {
- if (disk->d_sync.ds_syncid != syncid)
+ if (disk->d_sync.ds_syncid != sc->sc_syncid)
continue;
if ((disk->d_flags &
G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
@@ -2532,8 +2564,7 @@
/* Reset hint. */
sc->sc_hint = NULL;
- sc->sc_syncid = syncid;
- if (force || broken) {
+ if (force) {
/* Remember to bump syncid on first write. */
sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
}
@@ -2870,37 +2901,23 @@
struct g_mirror_metadata *md)
{
+ G_MIRROR_DEBUG(2, "%s: md_did 0x%u disk %s device %s md_all 0x%x "
+ "sc_ndisks 0x%x md_slice 0x%x sc_slice 0x%x md_balance 0x%x "
+ "sc_balance 0x%x sc_mediasize 0x%lx pp_mediasize 0x%lx "
+ "md_sectorsize 0x%x sc_sectorsize 0x%x md_mflags 0x%lx "
+ "md_dflags 0x%lx md_syncid 0x%x md_genid 0x%x md_priority 0x%x "
+ "sc_state 0x%x.",
+ __func__, md->md_did, pp->name, sc->sc_name, md->md_all,
+ sc->sc_ndisks, md->md_slice, sc->sc_slice, md->md_balance,
+ sc->sc_balance, sc->sc_mediasize, pp->mediasize, md->md_sectorsize,
+ sc->sc_sectorsize, md->md_mflags, md->md_dflags, md->md_syncid,
+ md->md_genid, md->md_priority, sc->sc_state);
+
if (g_mirror_id2disk(sc, md->md_did) != NULL) {
G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
pp->name, md->md_did);
return (EEXIST);
}
- if (md->md_all != sc->sc_ndisks) {
- G_MIRROR_DEBUG(1,
- "Invalid '%s' field on disk %s (device %s), skipping.",
- "md_all", pp->name, sc->sc_name);
- return (EINVAL);
- }
- if (md->md_slice != sc->sc_slice) {
- G_MIRROR_DEBUG(1,
- "Invalid '%s' field on disk %s (device %s), skipping.",
- "md_slice", pp->name, sc->sc_name);
- return (EINVAL);
- }
- if (md->md_balance != sc->sc_balance) {
- G_MIRROR_DEBUG(1,
- "Invalid '%s' field on disk %s (device %s), skipping.",
- "md_balance", pp->name, sc->sc_name);
- return (EINVAL);
- }
-#if 0
- if (md->md_mediasize != sc->sc_mediasize) {
- G_MIRROR_DEBUG(1,
- "Invalid '%s' field on disk %s (device %s), skipping.",
- "md_mediasize", pp->name, sc->sc_name);
- return (EINVAL);
- }
-#endif
if (sc->sc_mediasize > pp->mediasize) {
G_MIRROR_DEBUG(1,
"Invalid size of disk %s (device %s), skipping.", pp->name,
@@ -2947,12 +2964,21 @@
error = g_mirror_check_metadata(sc, pp, md);
if (error != 0)
return (error);
- if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
- md->md_genid < sc->sc_genid) {
+
+ if (md->md_genid < sc->sc_genid) {
G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
pp->name, sc->sc_name);
return (EINVAL);
}
+
+ /*
+ * If the component disk we're tasting has newer metadata than the
+ * STARTING gmirror device, refresh the device from the component.
+ */
+ error = g_mirror_refresh_device(sc, pp, md);
+ if (error != 0)
+ return (error);
+
disk = g_mirror_init_disk(sc, pp, md, &error);
if (disk == NULL)
return (error);
@@ -3029,6 +3055,21 @@
return (error);
}
+static void
+g_mirror_reinit_from_metadata(struct g_mirror_softc *sc,
+ const struct g_mirror_metadata *md)
+{
+
+ sc->sc_genid = md->md_genid;
+ sc->sc_syncid = md->md_syncid;
+
+ sc->sc_slice = md->md_slice;
+ sc->sc_balance = md->md_balance;
+ sc->sc_mediasize = md->md_mediasize;
+ sc->sc_ndisks = md->md_all;
+ sc->sc_flags = md->md_mflags;
+}
+
struct g_geom *
g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
u_int type)
@@ -3056,12 +3097,8 @@
sc->sc_type = type;
sc->sc_id = md->md_mid;
- sc->sc_slice = md->md_slice;
- sc->sc_balance = md->md_balance;
- sc->sc_mediasize = md->md_mediasize;
+ g_mirror_reinit_from_metadata(sc, md);
sc->sc_sectorsize = md->md_sectorsize;
- sc->sc_ndisks = md->md_all;
- sc->sc_flags = md->md_mflags;
sc->sc_bump_id = 0;
sc->sc_idle = 1;
sc->sc_last_write = time_uptime;
@@ -3484,5 +3521,51 @@
EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
}
+/*
+ * Refresh the mirror device's metadata when gmirror encounters a newer
+ * generation as the individual components are being added to the mirror set.
+ */
+static int
+g_mirror_refresh_device(struct g_mirror_softc *sc, const struct g_provider *pp,
+ const struct g_mirror_metadata *md)
+{
+
+ g_topology_assert_not();
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
+
+ KASSERT(sc->sc_genid <= md->md_genid,
+ ("%s: attempted to refresh from stale component %s (device %s) "
+ "(%u < %u).", __func__, pp->name, sc->sc_name, md->md_genid,
+ sc->sc_genid));
+
+ if (sc->sc_genid > md->md_genid || (sc->sc_genid == md->md_genid &&
+ sc->sc_syncid >= md->md_syncid))
+ return (0);
+
+ G_MIRROR_DEBUG(0, "Found newer version for device %s (genid: curr=%u "
+ "new=%u; syncid: curr=%u new=%u; ndisks: curr=%u new=%u; "
+ "provider=%s).", sc->sc_name, sc->sc_genid, md->md_genid,
+ sc->sc_syncid, md->md_syncid, sc->sc_ndisks, md->md_all, pp->name);
+
+ if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) {
+ /* Probable data corruption detected */
+ G_MIRROR_DEBUG(0, "Cannot refresh metadata in %s state "
+ "(device=%s genid=%u). A stale mirror device was launched.",
+ g_mirror_device_state2str(sc->sc_state), sc->sc_name,
+ sc->sc_genid);
+ return (EINVAL);
+ }
+
+ /* Update softc */
+ g_mirror_reinit_from_metadata(sc, md);
+
+ G_MIRROR_DEBUG(1, "Refresh device %s (id=%u, state=%s) from disk %s "
+ "(genid=%u syncid=%u md_all=%u).", sc->sc_name, md->md_mid,
+ g_mirror_device_state2str(sc->sc_state), pp->name, md->md_genid,
+ md->md_syncid, (unsigned)md->md_all);
+
+ return (0);
+}
+
DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
MODULE_VERSION(geom_mirror, 0);
Index: tests/sys/geom/class/mirror/Makefile
===================================================================
--- tests/sys/geom/class/mirror/Makefile
+++ tests/sys/geom/class/mirror/Makefile
@@ -18,6 +18,7 @@
TAP_TESTS_SH+= 12_test
TAP_TESTS_SH+= 13_test
+ATF_TESTS_SH+= component_selection
ATF_TESTS_SH+= sync_error
${PACKAGE}FILES+= conf.sh
Index: tests/sys/geom/class/mirror/component_selection.sh
===================================================================
--- /dev/null
+++ tests/sys/geom/class/mirror/component_selection.sh
@@ -0,0 +1,130 @@
+# $FreeBSD$
+
+atf_test_case run_latest_genid cleanup
+run_latest_genid_head()
+{
+ atf_set "descr" \
+ "Ensure that we properly select components (latest genid) during STARTING."
+ atf_set "require.user" "root"
+}
+run_latest_genid_body()
+{
+ . $(atf_get_srcdir)/conf.sh
+
+ f1=$(mktemp ${base}.XXXXXX)
+ f2=$(mktemp ${base}.XXXXXX)
+ f3=$(mktemp ${base}.XXXXXX)
+ rnd1=$(mktemp ${base}.XXXXXX)
+ rnd2=$(mktemp ${base}.XXXXXX)
+
+ atf_check truncate -s 2M $f1
+ atf_check truncate -s 2M $f2
+ atf_check truncate -s 2M $f3
+ dd if=/dev/urandom bs=512 count=1 of="$rnd1"
+ dd if=/dev/urandom bs=512 count=1 of="$rnd2"
+
+ md1=$(attach_md -t vnode -f ${f1})
+ md2=$(attach_md -t vnode -f ${f2})
+ md3=$(attach_md -t vnode -f ${f3})
+
+ # Use a gnop for md1 just for consistency; it's not used for anything.
+ atf_check gnop create $md1
+ atf_check gnop create $md2
+ atf_check gnop create $md3
+ # Hardcode component names so that the non-.nop device isn't tasted
+ # instead.
+ atf_check gmirror label -h $name ${md1}.nop
+ devwait
+
+ atf_check gmirror insert -h $name ${md2}.nop
+ atf_check gmirror insert -h $name ${md3}.nop
+ syncwait
+
+ # Fail mirror 3, writing known contents to mirror 1+2 block 1
+ atf_check -s exit:0 -e empty -o empty \
+ gnop configure -w 100 ${md3}.nop
+ atf_check -s exit:0 dd if="$rnd1" bs=512 count=1 oseek=1 conv=notrunc \
+ of=/dev/mirror/$name status=none
+
+ sleep 0.1
+
+ # Should have two mirrors remaining after md3 was evicted
+ atf_check [ $(gmirror status -s $name | wc -l) -eq 2 ]
+ atf_check -s exit:0 -o match:"DEGRADED ${md1}.nop \(ACTIVE\)" \
+ gmirror status -s $name
+ atf_check -s exit:0 -o match:"DEGRADED ${md2}.nop \(ACTIVE\)" \
+ gmirror status -s $name
+
+ # Repeat:
+ # Fail mirror 2, writing known contents to mirror 1 block 2
+ atf_check -s exit:0 -e empty -o empty \
+ gnop configure -w 100 ${md2}.nop
+ atf_check -s exit:0 dd if="$rnd2" bs=512 count=2 oseek=1 conv=notrunc \
+ of=/dev/mirror/$name status=none
+ sleep 0.1
+
+ # Should have one mirror remaining after md2 was evicted
+ atf_check [ $(gmirror status -s $name | wc -l) -eq 1 ]
+ atf_check -s exit:0 -o match:"DEGRADED ${md1}.nop \(ACTIVE\)" \
+ gmirror status -s $name
+
+ # Stop the mirror and remove the pieces so gmirror can't see them.
+ atf_check gmirror stop $name
+ atf_check gnop destroy ${md1}.nop
+ atf_check gnop destroy ${md2}.nop
+ atf_check gnop destroy ${md3}.nop
+
+ # Rebuild; spin up "disk" with lowest genid
+ atf_check gnop create $md3
+ md3gen=$(gmirror dump /dev/${md3}.nop | grep genid | cut -d: -f2)
+ # Assert gmirror is referencing this component for now:
+ atf_check [ $(gnop list ${md3}.nop | grep -A5 ^Consumers | grep Mode | cut -d: -f2) = "r1w1e1" ]
+
+ # Adding newer genid should kick out old component
+ atf_check gnop create $md2
+ md2gen=$(gmirror dump /dev/${md2}.nop | grep genid | cut -d: -f2)
+ atf_check [ $md2gen -gt $md3gen ]
+ sleep 0.1
+
+ # Can't test this because 'status' doesn't exist until RUNNING:
+ #atf_check [ $(gmirror status -s $name | wc -l) -eq 1 ]
+ # But as a substitute, assert gmirror has dropped reference to staler
+ # component in favor of newer component:
+ atf_check [ $(gnop list ${md3}.nop | grep -A5 ^Consumers | grep Mode | cut -d: -f2) = "r0w0e0" ]
+ atf_check [ $(gnop list ${md2}.nop | grep -A5 ^Consumers | grep Mode | cut -d: -f2) = "r1w1e1" ]
+
+ # ditto
+ atf_check gnop create $md1
+ md1gen=$(gmirror dump /dev/${md1}.nop | grep genid | cut -d: -f2)
+ atf_check [ $md1gen -gt $md2gen ]
+ sleep 0.1
+
+ # Assert gmirror has dropped reference to stale component in favor of
+ # newer component:
+ atf_check [ $(gnop list ${md2}.nop | grep -A5 ^Consumers | grep Mode | cut -d: -f2) = "r0w0e0" ]
+ atf_check [ $(gnop list ${md1}.nop | grep -A5 ^Consumers | grep Mode | cut -d: -f2) = "r1w1e1" ]
+
+ # Sleep the configured timeout because md_all=3 and gmirror won't start
+ # the mirror automatically with only one component ($md0).
+ sleep $(sysctl -n kern.geom.mirror.timeout)
+ atf_check [ $(gmirror status -s $name | wc -l) -eq 1 ]
+ atf_check -s exit:0 -o match:"DEGRADED ${md1}.nop \(ACTIVE\)" \
+ gmirror status -s $name
+}
+run_latest_genid_cleanup()
+{
+ . $(atf_get_srcdir)/conf.sh
+
+ if [ -f "$TEST_MDS_FILE" ]; then
+ while read test_md; do
+ echo "# Removing test gnop: ${test_md}.nop"
+ gnop destroy -f "${test_md}.nop" 2>/dev/null || :
+ done < "$TEST_MDS_FILE"
+ fi
+ gmirror_test_cleanup
+}
+
+atf_init_test_cases()
+{
+ atf_add_test_case run_latest_genid
+}

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 26, 12:27 AM (11 h, 55 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15599893
Default Alt Text
D18062.id51681.diff (17 KB)

Event Timeline