Index: sys/geom/mirror/g_mirror.h =================================================================== --- sys/geom/mirror/g_mirror.h +++ sys/geom/mirror/g_mirror.h @@ -148,6 +148,7 @@ u_int d_genid; /* Disk's generation ID. */ struct g_mirror_disk_sync d_sync;/* Sync information. */ LIST_ENTRY(g_mirror_disk) d_next; + u_int d_init_ndisks; /* Initial number of mirror components */ }; #define d_name d_consumer->provider->name Index: sys/geom/mirror/g_mirror.c =================================================================== --- sys/geom/mirror/g_mirror.c +++ sys/geom/mirror/g_mirror.c @@ -59,6 +59,11 @@ int g_mirror_debug = 0; SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0, "Debug level"); +bool g_launch_mirror_before_timeout = true; +SYSCTL_BOOL(_kern_geom_mirror, OID_AUTO, launch_mirror_before_timeout, + CTLFLAG_RWTUN, &g_launch_mirror_before_timeout, 0, + "If false, force gmirror to wait out the full kern.geom.mirror.timeout " + "before launching gmirrors"); static u_int g_mirror_timeout = 4; SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout, 0, "Time to wait on all mirror components"); @@ -110,6 +115,8 @@ static void g_mirror_update_device(struct g_mirror_softc *sc, bool force); static void g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); +static int g_mirror_refresh_device(struct g_mirror_softc *sc, + const struct g_provider *pp, const struct g_mirror_metadata *md); static void g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp, off_t offset); static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type); @@ -472,6 +479,7 @@ disk->d_sync.ds_update_ts = time_uptime; disk->d_genid = md->md_genid; disk->d_sync.ds_syncid = md->md_syncid; + disk->d_init_ndisks = md->md_all; if (errorp != NULL) *errorp = 0; return (disk); @@ -2362,17 +2370,22 @@ case G_MIRROR_DEVICE_STATE_STARTING: { struct g_mirror_disk *pdisk, *tdisk; - u_int dirty, ndisks, genid, syncid; - bool broken; + u_int dirty, ndisks; +#ifdef INVARIANTS + u_int genid, syncid; +#endif KASSERT(sc->sc_provider == NULL, ("Non-NULL provider in STARTING state (%s).", sc->sc_name)); /* - * Are we ready? We are, if all disks are connected or - * if we have any disks and 'force' is true. + * Are we ready? If the timeout (force is true) has expired, and + * any disks are present, then yes. If we're permitted to launch + * before the timeout has expired and the expected number of + * mirror disks have been tasted, then yes. */ ndisks = g_mirror_ndisks(sc, -1); - if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) { + if ((force && ndisks > 0) || + (g_launch_mirror_before_timeout && ndisks == sc->sc_ndisks)) { ; } else if (ndisks == 0) { /* @@ -2419,41 +2432,29 @@ callout_drain(&sc->sc_callout); } +#ifdef INVARIANTS /* - * Find the biggest genid. + * Confirm we already detected the newest genid. */ genid = 0; - LIST_FOREACH(disk, &sc->sc_disks, d_next) { + LIST_FOREACH(disk, &sc->sc_disks, d_next) if (disk->d_genid > genid) genid = disk->d_genid; - } - sc->sc_genid = genid; - /* - * Remove all disks without the biggest genid. - */ - broken = false; - LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { - if (disk->d_genid < genid) { - G_MIRROR_DEBUG(0, - "Component %s (device %s) broken, skipping.", - g_mirror_get_diskname(disk), sc->sc_name); - g_mirror_destroy_disk(disk); - /* - * Bump the syncid in case we discover a healthy - * replacement disk after starting the mirror. - */ - broken = true; - } - } + KASSERT(sc->sc_genid == genid, + ("%s: found newer genid %u (sc:%p had %u).", __func__, + genid, sc, sc->sc_genid)); /* - * Find the biggest syncid. + * Confirm we already detected the newest syncid. */ syncid = 0; - LIST_FOREACH(disk, &sc->sc_disks, d_next) { + LIST_FOREACH(disk, &sc->sc_disks, d_next) if (disk->d_sync.ds_syncid > syncid) syncid = disk->d_sync.ds_syncid; - } + KASSERT(sc->sc_syncid == syncid, + ("%s: found newer syncid %u (sc:%p had %u).", __func__, + syncid, sc, sc->sc_syncid)); +#endif /* * Here we need to look for dirty disks and if all disks @@ -2467,8 +2468,29 @@ */ dirty = ndisks = 0; pdisk = NULL; - LIST_FOREACH(disk, &sc->sc_disks, d_next) { - if (disk->d_sync.ds_syncid != syncid) + LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { + if (disk->d_init_ndisks != sc->sc_ndisks) { + G_MIRROR_DEBUG(0, "Found a mismatching 'md_all' " + "field on %s (device %s) (found=%u newest=%u).", + g_mirror_get_diskname(disk), sc->sc_name, + disk->d_init_ndisks, sc->sc_ndisks); + /* + * XXX Do we want to kill same-genid mirrors + * with different md_all? Force them dirty? + * Take sc_ndisks = max(, md_all) over current + * generation? + * + * For now, preserve existing behavior and kill + * them. + */ +#if 1 + g_mirror_destroy_disk(disk); + sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; + continue; +#endif + } + + if (disk->d_sync.ds_syncid != sc->sc_syncid) continue; if ((disk->d_flags & G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { @@ -2495,7 +2517,7 @@ "master disk for synchronization.", g_mirror_get_diskname(pdisk), sc->sc_name); LIST_FOREACH(disk, &sc->sc_disks, d_next) { - if (disk->d_sync.ds_syncid != syncid) + if (disk->d_sync.ds_syncid != sc->sc_syncid) continue; if ((disk->d_flags & G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { @@ -2516,7 +2538,7 @@ * We have some non-dirty disks. */ LIST_FOREACH(disk, &sc->sc_disks, d_next) { - if (disk->d_sync.ds_syncid != syncid) + if (disk->d_sync.ds_syncid != sc->sc_syncid) continue; if ((disk->d_flags & G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { @@ -2532,8 +2554,7 @@ /* Reset hint. */ sc->sc_hint = NULL; - sc->sc_syncid = syncid; - if (force || broken) { + if (force) { /* Remember to bump syncid on first write. */ sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; } @@ -2870,37 +2891,23 @@ struct g_mirror_metadata *md) { + G_MIRROR_DEBUG(2, "%s: md_did 0x%u disk %s device %s md_all 0x%x " + "sc_ndisks 0x%x md_slice 0x%x sc_slice 0x%x md_balance 0x%x " + "sc_balance 0x%x sc_mediasize 0x%lx pp_mediasize 0x%lx " + "md_sectorsize 0x%x sc_sectorsize 0x%x md_mflags 0x%lx " + "md_dflags 0x%lx md_syncid 0x%x md_genid 0x%x md_priority 0x%x " + "sc_state 0x%x.", + __func__, md->md_did, pp->name, sc->sc_name, md->md_all, + sc->sc_ndisks, md->md_slice, sc->sc_slice, md->md_balance, + sc->sc_balance, sc->sc_mediasize, pp->mediasize, md->md_sectorsize, + sc->sc_sectorsize, md->md_mflags, md->md_dflags, md->md_syncid, + md->md_genid, md->md_priority, sc->sc_state); + if (g_mirror_id2disk(sc, md->md_did) != NULL) { G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.", pp->name, md->md_did); return (EEXIST); } - if (md->md_all != sc->sc_ndisks) { - G_MIRROR_DEBUG(1, - "Invalid '%s' field on disk %s (device %s), skipping.", - "md_all", pp->name, sc->sc_name); - return (EINVAL); - } - if (md->md_slice != sc->sc_slice) { - G_MIRROR_DEBUG(1, - "Invalid '%s' field on disk %s (device %s), skipping.", - "md_slice", pp->name, sc->sc_name); - return (EINVAL); - } - if (md->md_balance != sc->sc_balance) { - G_MIRROR_DEBUG(1, - "Invalid '%s' field on disk %s (device %s), skipping.", - "md_balance", pp->name, sc->sc_name); - return (EINVAL); - } -#if 0 - if (md->md_mediasize != sc->sc_mediasize) { - G_MIRROR_DEBUG(1, - "Invalid '%s' field on disk %s (device %s), skipping.", - "md_mediasize", pp->name, sc->sc_name); - return (EINVAL); - } -#endif if (sc->sc_mediasize > pp->mediasize) { G_MIRROR_DEBUG(1, "Invalid size of disk %s (device %s), skipping.", pp->name, @@ -2938,7 +2945,7 @@ g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp, struct g_mirror_metadata *md) { - struct g_mirror_disk *disk; + struct g_mirror_disk *disk, *tdisk; int error; g_topology_assert_not(); @@ -2947,6 +2954,15 @@ error = g_mirror_check_metadata(sc, pp, md); if (error != 0) return (error); + + /* + * If the component disk we're tasting has newer metadata than the + * STARTING gmirror device, refresh the device from the component. + */ + error = g_mirror_refresh_device(sc, pp, md); + if (error != 0) + return (error); + if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING && md->md_genid < sc->sc_genid) { G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.", @@ -2965,6 +2981,24 @@ pp->name, md->md_version, G_MIRROR_VERSION); g_mirror_update_metadata(disk); } + + /* + * If we're still STARTING, kick out any previously tasted stale + * components. + */ + if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) + return (0); + + LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { + if (disk->d_genid < sc->sc_genid) { + G_MIRROR_DEBUG(0, "Stale 'genid' field on %s " + "(device %s) (component=%u latest=%u), skipping.", + g_mirror_get_diskname(disk), sc->sc_name, + disk->d_genid, sc->sc_genid); + g_mirror_destroy_disk(disk); + sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; + } + } return (0); } @@ -3029,6 +3063,21 @@ return (error); } +static void +g_mirror_reinit_from_metadata(struct g_mirror_softc *sc, + const struct g_mirror_metadata *md) +{ + + sc->sc_genid = md->md_genid; + sc->sc_syncid = md->md_syncid; + + sc->sc_slice = md->md_slice; + sc->sc_balance = md->md_balance; + sc->sc_mediasize = md->md_mediasize; + sc->sc_ndisks = md->md_all; + sc->sc_flags = md->md_mflags; +} + struct g_geom * g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md, u_int type) @@ -3056,12 +3105,8 @@ sc->sc_type = type; sc->sc_id = md->md_mid; - sc->sc_slice = md->md_slice; - sc->sc_balance = md->md_balance; - sc->sc_mediasize = md->md_mediasize; + g_mirror_reinit_from_metadata(sc, md); sc->sc_sectorsize = md->md_sectorsize; - sc->sc_ndisks = md->md_all; - sc->sc_flags = md->md_mflags; sc->sc_bump_id = 0; sc->sc_idle = 1; sc->sc_last_write = time_uptime; @@ -3484,5 +3529,49 @@ EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync); } +/* + * Refresh the mirror device's metadata when gmirror encounters a newer + * generation as the individual components are being added to the mirror set. + */ +static int +g_mirror_refresh_device(struct g_mirror_softc *sc, const struct g_provider *pp, + const struct g_mirror_metadata *md) +{ + + g_topology_assert_not(); + sx_assert(&sc->sc_lock, SX_XLOCKED); + + if ((sc->sc_genid < md->md_genid) || (sc->sc_genid == md->md_genid && + sc->sc_syncid < md->md_syncid)) { + G_MIRROR_DEBUG(0, "Found newer version for device %s (genid: " + "curr=%u new=%u; syncid: curr=%u new=%u; ndisks: curr=%u " + "new=%u; provider=%s).", sc->sc_name, sc->sc_genid, + md->md_genid, sc->sc_syncid, md->md_syncid, sc->sc_ndisks, + md->md_all, pp->name); + + } else { + return (0); + } + + if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) { + /* Probable data corruption detected */ + G_MIRROR_DEBUG(0, "Cannot refresh metadata in %s state " + "(device=%s genid=%u). A stale mirror device was launched.", + g_mirror_device_state2str(sc->sc_state), sc->sc_name, + sc->sc_genid); + return (EINVAL); + } + + /* Update softc */ + g_mirror_reinit_from_metadata(sc, md); + + G_MIRROR_DEBUG(1, "Refresh device %s (id=%u, state=%s) from disk %s " + "(genid=%u syncid=%u md_all=%u).", sc->sc_name, md->md_mid, + g_mirror_device_state2str(sc->sc_state), pp->name, md->md_genid, + md->md_syncid, (unsigned)md->md_all); + + return (0); +} + DECLARE_GEOM_CLASS(g_mirror_class, g_mirror); MODULE_VERSION(geom_mirror, 0); Index: sys/geom/mirror/g_mirror_ctl.c =================================================================== --- sys/geom/mirror/g_mirror_ctl.c +++ sys/geom/mirror/g_mirror_ctl.c @@ -982,8 +982,16 @@ continue; } sc->sc_ndisks = g_mirror_ndisks(sc, -1); - LIST_FOREACH(disk, &sc->sc_disks, d_next) { - g_mirror_update_metadata(disk); + if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING && + g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0) { + /* Bump genid after forgetting disconnected disks */ + sc->sc_bump_id |= G_MIRROR_BUMP_GENID; + g_mirror_event_send(sc, 0, + G_MIRROR_EVENT_WAIT | G_MIRROR_EVENT_DEVICE); + } else { + LIST_FOREACH(disk, &sc->sc_disks, d_next) { + g_mirror_update_metadata(disk); + } } sx_xunlock(&sc->sc_lock); } Index: sys/sys/param.h =================================================================== --- sys/sys/param.h +++ sys/sys/param.h @@ -60,7 +60,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1300004 /* Master, propagated to newvers */ +#define __FreeBSD_version 1300005 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,