diff --git a/sys/geom/cache/g_cache.c b/sys/geom/cache/g_cache.c
index 2caae5ede0f4..86c2a9bb36a2 100644
--- a/sys/geom/cache/g_cache.c
+++ b/sys/geom/cache/g_cache.c
@@ -1,1014 +1,1015 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006 Ruslan Ermilov <ru@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/time.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 #include <geom/geom_dbg.h>
 #include <geom/cache/g_cache.h>
 
 FEATURE(geom_cache, "GEOM cache module");
 
 static MALLOC_DEFINE(M_GCACHE, "gcache_data", "GEOM_CACHE Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, cache, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_CACHE stuff");
 static u_int g_cache_debug = 0;
 SYSCTL_UINT(_kern_geom_cache, OID_AUTO, debug, CTLFLAG_RW, &g_cache_debug, 0,
     "Debug level");
 static u_int g_cache_enable = 1;
 SYSCTL_UINT(_kern_geom_cache, OID_AUTO, enable, CTLFLAG_RW, &g_cache_enable, 0,
     "");
 static u_int g_cache_timeout = 10;
 SYSCTL_UINT(_kern_geom_cache, OID_AUTO, timeout, CTLFLAG_RW, &g_cache_timeout,
     0, "");
 static u_int g_cache_idletime = 5;
 SYSCTL_UINT(_kern_geom_cache, OID_AUTO, idletime, CTLFLAG_RW, &g_cache_idletime,
     0, "");
 static u_int g_cache_used_lo = 5;
 static u_int g_cache_used_hi = 20;
 static int
 sysctl_handle_pct(SYSCTL_HANDLER_ARGS)
 {
 	u_int val = *(u_int *)arg1;
 	int error;
 
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (val > 100)
 		return (EINVAL);
 	if ((arg1 == &g_cache_used_lo && val > g_cache_used_hi) ||
 	    (arg1 == &g_cache_used_hi && g_cache_used_lo > val))
 		return (EINVAL);
 	*(u_int *)arg1 = val;
 	return (0);
 }
 SYSCTL_PROC(_kern_geom_cache, OID_AUTO, used_lo,
     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, &g_cache_used_lo, 0,
     sysctl_handle_pct, "IU",
     "");
 SYSCTL_PROC(_kern_geom_cache, OID_AUTO, used_hi,
     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, &g_cache_used_hi, 0,
     sysctl_handle_pct, "IU",
     "");
 
 static int g_cache_destroy(struct g_cache_softc *sc, boolean_t force);
 static g_ctl_destroy_geom_t g_cache_destroy_geom;
 
 static g_taste_t g_cache_taste;
 static g_ctl_req_t g_cache_config;
 static g_dumpconf_t g_cache_dumpconf;
 
 struct g_class g_cache_class = {
 	.name = G_CACHE_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_cache_config,
 	.taste = g_cache_taste,
 	.destroy_geom = g_cache_destroy_geom
 };
 
 #define	OFF2BNO(off, sc)	((off) >> (sc)->sc_bshift)
 #define	BNO2OFF(bno, sc)	((bno) << (sc)->sc_bshift)
 
 static struct g_cache_desc *
 g_cache_alloc(struct g_cache_softc *sc)
 {
 	struct g_cache_desc *dp;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	if (!TAILQ_EMPTY(&sc->sc_usedlist)) {
 		dp = TAILQ_FIRST(&sc->sc_usedlist);
 		TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 		sc->sc_nused--;
 		dp->d_flags = 0;
 		LIST_REMOVE(dp, d_next);
 		return (dp);
 	}
 	if (sc->sc_nent > sc->sc_maxent) {
 		sc->sc_cachefull++;
 		return (NULL);
 	}
 	dp = malloc(sizeof(*dp), M_GCACHE, M_NOWAIT | M_ZERO);
 	if (dp == NULL)
 		return (NULL);
 	dp->d_data = uma_zalloc(sc->sc_zone, M_NOWAIT);
 	if (dp->d_data == NULL) {
 		free(dp, M_GCACHE);
 		return (NULL);
 	}
 	sc->sc_nent++;
 	return (dp);
 }
 
 static void
 g_cache_free(struct g_cache_softc *sc, struct g_cache_desc *dp)
 {
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	uma_zfree(sc->sc_zone, dp->d_data);
 	free(dp, M_GCACHE);
 	sc->sc_nent--;
 }
 
 static void
 g_cache_free_used(struct g_cache_softc *sc)
 {
 	struct g_cache_desc *dp;
 	u_int n;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	n = g_cache_used_lo * sc->sc_maxent / 100;
 	while (sc->sc_nused > n) {
 		KASSERT(!TAILQ_EMPTY(&sc->sc_usedlist), ("used list empty"));
 		dp = TAILQ_FIRST(&sc->sc_usedlist);
 		TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 		sc->sc_nused--;
 		LIST_REMOVE(dp, d_next);
 		g_cache_free(sc, dp);
 	}
 }
 
 static void
 g_cache_deliver(struct g_cache_softc *sc, struct bio *bp,
     struct g_cache_desc *dp, int error)
 {
 	off_t off1, off, len;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 	KASSERT(OFF2BNO(bp->bio_offset, sc) <= dp->d_bno, ("wrong entry"));
 	KASSERT(OFF2BNO(bp->bio_offset + bp->bio_length - 1, sc) >=
 	    dp->d_bno, ("wrong entry"));
 
 	off1 = BNO2OFF(dp->d_bno, sc);
 	off = MAX(bp->bio_offset, off1);
 	len = MIN(bp->bio_offset + bp->bio_length, off1 + sc->sc_bsize) - off;
 
 	if (bp->bio_error == 0)
 		bp->bio_error = error;
 	if (bp->bio_error == 0) {
 		bcopy(dp->d_data + (off - off1),
 		    bp->bio_data + (off - bp->bio_offset), len);
 	}
 	bp->bio_completed += len;
 	KASSERT(bp->bio_completed <= bp->bio_length, ("extra data"));
 	if (bp->bio_completed == bp->bio_length) {
 		if (bp->bio_error != 0)
 			bp->bio_completed = 0;
 		g_io_deliver(bp, bp->bio_error);
 	}
 
 	if (dp->d_flags & D_FLAG_USED) {
 		TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 		TAILQ_INSERT_TAIL(&sc->sc_usedlist, dp, d_used);
 	} else if (OFF2BNO(off + len, sc) > dp->d_bno) {
 		TAILQ_INSERT_TAIL(&sc->sc_usedlist, dp, d_used);
 		sc->sc_nused++;
 		dp->d_flags |= D_FLAG_USED;
 	}
 	dp->d_atime = time_uptime;
 }
 
 static void
 g_cache_done(struct bio *bp)
 {
 	struct g_cache_softc *sc;
 	struct g_cache_desc *dp;
 	struct bio *bp2, *tmpbp;
 
 	sc = bp->bio_from->geom->softc;
 	KASSERT(G_CACHE_DESC1(bp) == sc, ("corrupt bio_caller in g_cache_done()"));
 	dp = G_CACHE_DESC2(bp);
 	mtx_lock(&sc->sc_mtx);
 	bp2 = dp->d_biolist;
 	while (bp2 != NULL) {
 		KASSERT(G_CACHE_NEXT_BIO1(bp2) == sc, ("corrupt bio_driver in g_cache_done()"));
 		tmpbp = G_CACHE_NEXT_BIO2(bp2);
 		g_cache_deliver(sc, bp2, dp, bp->bio_error);
 		bp2 = tmpbp;
 	}
 	dp->d_biolist = NULL;
 	if (dp->d_flags & D_FLAG_INVALID) {
 		sc->sc_invalid--;
 		g_cache_free(sc, dp);
 	} else if (bp->bio_error) {
 		LIST_REMOVE(dp, d_next);
 		if (dp->d_flags & D_FLAG_USED) {
 			TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 			sc->sc_nused--;
 		}
 		g_cache_free(sc, dp);
 	}
 	mtx_unlock(&sc->sc_mtx);
 	g_destroy_bio(bp);
 }
 
 static struct g_cache_desc *
 g_cache_lookup(struct g_cache_softc *sc, off_t bno)
 {
 	struct g_cache_desc *dp;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	LIST_FOREACH(dp, &sc->sc_desclist[G_CACHE_BUCKET(bno)], d_next)
 		if (dp->d_bno == bno)
 			return (dp);
 	return (NULL);
 }
 
 static int
 g_cache_read(struct g_cache_softc *sc, struct bio *bp)
 {
 	struct bio *cbp;
 	struct g_cache_desc *dp;
 
 	mtx_lock(&sc->sc_mtx);
 	dp = g_cache_lookup(sc,
 	    OFF2BNO(bp->bio_offset + bp->bio_completed, sc));
 	if (dp != NULL) {
 		/* Add to waiters list or deliver. */
 		sc->sc_cachehits++;
 		if (dp->d_biolist != NULL) {
 			G_CACHE_NEXT_BIO1(bp) = sc;
 			G_CACHE_NEXT_BIO2(bp) = dp->d_biolist;
 			dp->d_biolist = bp;
 		} else
 			g_cache_deliver(sc, bp, dp, 0);
 		mtx_unlock(&sc->sc_mtx);
 		return (0);
 	}
 
 	/* Cache miss.  Allocate entry and schedule bio.  */
 	sc->sc_cachemisses++;
 	dp = g_cache_alloc(sc);
 	if (dp == NULL) {
 		mtx_unlock(&sc->sc_mtx);
 		return (ENOMEM);
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_cache_free(sc, dp);
 		mtx_unlock(&sc->sc_mtx);
 		return (ENOMEM);
 	}
 
 	dp->d_bno = OFF2BNO(bp->bio_offset + bp->bio_completed, sc);
 	G_CACHE_NEXT_BIO1(bp) = sc;
 	G_CACHE_NEXT_BIO2(bp) = NULL;
 	dp->d_biolist = bp;
 	LIST_INSERT_HEAD(&sc->sc_desclist[G_CACHE_BUCKET(dp->d_bno)],
 	    dp, d_next);
 	mtx_unlock(&sc->sc_mtx);
 
 	G_CACHE_DESC1(cbp) = sc;
 	G_CACHE_DESC2(cbp) = dp;
 	cbp->bio_done = g_cache_done;
 	cbp->bio_offset = BNO2OFF(dp->d_bno, sc);
 	cbp->bio_data = dp->d_data;
 	cbp->bio_length = sc->sc_bsize;
 	g_io_request(cbp, LIST_FIRST(&bp->bio_to->geom->consumer));
 	return (0);
 }
 
 static void
 g_cache_invalidate(struct g_cache_softc *sc, struct bio *bp)
 {
 	struct g_cache_desc *dp;
 	off_t bno, lim;
 
 	mtx_lock(&sc->sc_mtx);
 	bno = OFF2BNO(bp->bio_offset, sc);
 	lim = OFF2BNO(bp->bio_offset + bp->bio_length - 1, sc);
 	do {
 		if ((dp = g_cache_lookup(sc, bno)) != NULL) {
 			LIST_REMOVE(dp, d_next);
 			if (dp->d_flags & D_FLAG_USED) {
 				TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 				sc->sc_nused--;
 			}
 			if (dp->d_biolist == NULL)
 				g_cache_free(sc, dp);
 			else {
 				dp->d_flags = D_FLAG_INVALID;
 				sc->sc_invalid++;
 			}
 		}
 		bno++;
 	} while (bno <= lim);
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_cache_start(struct bio *bp)
 {
 	struct g_cache_softc *sc;
 	struct g_geom *gp;
 	struct g_cache_desc *dp;
 	struct bio *cbp;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 	G_CACHE_LOGREQ(bp, "Request received.");
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		sc->sc_reads++;
 		sc->sc_readbytes += bp->bio_length;
 		if (!g_cache_enable)
 			break;
 		if (bp->bio_offset + bp->bio_length > sc->sc_tail)
 			break;
 		if (OFF2BNO(bp->bio_offset, sc) ==
 		    OFF2BNO(bp->bio_offset + bp->bio_length - 1, sc)) {
 			sc->sc_cachereads++;
 			sc->sc_cachereadbytes += bp->bio_length;
 			if (g_cache_read(sc, bp) == 0)
 				return;
 			sc->sc_cachereads--;
 			sc->sc_cachereadbytes -= bp->bio_length;
 			break;
 		} else if (OFF2BNO(bp->bio_offset, sc) + 1 ==
 		    OFF2BNO(bp->bio_offset + bp->bio_length - 1, sc)) {
 			mtx_lock(&sc->sc_mtx);
 			dp = g_cache_lookup(sc, OFF2BNO(bp->bio_offset, sc));
 			if (dp == NULL || dp->d_biolist != NULL) {
 				mtx_unlock(&sc->sc_mtx);
 				break;
 			}
 			sc->sc_cachereads++;
 			sc->sc_cachereadbytes += bp->bio_length;
 			g_cache_deliver(sc, bp, dp, 0);
 			mtx_unlock(&sc->sc_mtx);
 			if (g_cache_read(sc, bp) == 0)
 				return;
 			sc->sc_cachereads--;
 			sc->sc_cachereadbytes -= bp->bio_length;
 			break;
 		}
 		break;
 	case BIO_WRITE:
 		sc->sc_writes++;
 		sc->sc_wrotebytes += bp->bio_length;
 		g_cache_invalidate(sc, bp);
 		break;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_std_done;
 	G_CACHE_LOGREQ(cbp, "Sending request.");
 	g_io_request(cbp, LIST_FIRST(&gp->consumer));
 }
 
 static void
 g_cache_go(void *arg)
 {
 	struct g_cache_softc *sc = arg;
 	struct g_cache_desc *dp;
 	int i;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	/* Forcibly mark idle ready entries as used. */
 	for (i = 0; i < G_CACHE_BUCKETS; i++) {
 		LIST_FOREACH(dp, &sc->sc_desclist[i], d_next) {
 			if (dp->d_flags & D_FLAG_USED ||
 			    dp->d_biolist != NULL ||
 			    time_uptime - dp->d_atime < g_cache_idletime)
 				continue;
 			TAILQ_INSERT_TAIL(&sc->sc_usedlist, dp, d_used);
 			sc->sc_nused++;
 			dp->d_flags |= D_FLAG_USED;
 		}
 	}
 
 	/* Keep the number of used entries low. */
 	if (sc->sc_nused > g_cache_used_hi * sc->sc_maxent / 100)
 		g_cache_free_used(sc);
 
 	callout_reset(&sc->sc_callout, g_cache_timeout * hz, g_cache_go, sc);
 }
 
 static int
 g_cache_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error;
 
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	error = g_access(cp, dr, dw, de);
 
 	return (error);
 }
 
 static void
 g_cache_orphan(struct g_consumer *cp)
 {
 
 	g_topology_assert();
 	g_cache_destroy(cp->geom->softc, 1);
 }
 
 static struct g_cache_softc *
 g_cache_find_device(struct g_class *mp, const char *name)
 {
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (strcmp(gp->name, name) == 0)
 			return (gp->softc);
 	}
 	return (NULL);
 }
 
 static struct g_geom *
 g_cache_create(struct g_class *mp, struct g_provider *pp,
     const struct g_cache_metadata *md, u_int type)
 {
 	struct g_cache_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *newpp;
 	struct g_consumer *cp;
 	u_int bshift;
 	int i;
 
 	g_topology_assert();
 
 	gp = NULL;
 	newpp = NULL;
 	cp = NULL;
 
 	G_CACHE_DEBUG(1, "Creating device %s.", md->md_name);
 
 	/* Cache size is minimum 100. */
 	if (md->md_size < 100) {
 		G_CACHE_DEBUG(0, "Invalid size for device %s.", md->md_name);
 		return (NULL);
 	}
 
 	/* Block size restrictions. */
 	bshift = ffs(md->md_bsize) - 1;
 	if (md->md_bsize == 0 || md->md_bsize > maxphys ||
 	    md->md_bsize != 1 << bshift ||
 	    (md->md_bsize % pp->sectorsize) != 0) {
 		G_CACHE_DEBUG(0, "Invalid blocksize for provider %s.", pp->name);
 		return (NULL);
 	}
 
 	/* Check for duplicate unit. */
 	if (g_cache_find_device(mp, (const char *)&md->md_name) != NULL) {
 		G_CACHE_DEBUG(0, "Provider %s already exists.", md->md_name);
 		return (NULL);
 	}
 
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	sc->sc_type = type;
 	sc->sc_bshift = bshift;
 	sc->sc_bsize = 1 << bshift;
 	sc->sc_zone = uma_zcreate("gcache", sc->sc_bsize, NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	mtx_init(&sc->sc_mtx, "GEOM CACHE mutex", NULL, MTX_DEF);
 	for (i = 0; i < G_CACHE_BUCKETS; i++)
 		LIST_INIT(&sc->sc_desclist[i]);
 	TAILQ_INIT(&sc->sc_usedlist);
 	sc->sc_maxent = md->md_size;
 	callout_init_mtx(&sc->sc_callout, &sc->sc_mtx, 0);
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	gp->start = g_cache_start;
 	gp->orphan = g_cache_orphan;
 	gp->access = g_cache_access;
 	gp->dumpconf = g_cache_dumpconf;
 
 	newpp = g_new_providerf(gp, "cache/%s", gp->name);
 	newpp->sectorsize = pp->sectorsize;
 	newpp->mediasize = pp->mediasize;
 	if (type == G_CACHE_TYPE_AUTOMATIC)
 		newpp->mediasize -= pp->sectorsize;
 	sc->sc_tail = BNO2OFF(OFF2BNO(newpp->mediasize, sc), sc);
 
 	cp = g_new_consumer(gp);
 	if (g_attach(cp, pp) != 0) {
 		G_CACHE_DEBUG(0, "Cannot attach to provider %s.", pp->name);
 		g_destroy_consumer(cp);
 		g_destroy_provider(newpp);
 		mtx_destroy(&sc->sc_mtx);
 		g_free(sc);
 		g_destroy_geom(gp);
 		return (NULL);
 	}
 
 	g_error_provider(newpp, 0);
 	G_CACHE_DEBUG(0, "Device %s created.", gp->name);
 	callout_reset(&sc->sc_callout, g_cache_timeout * hz, g_cache_go, sc);
 	return (gp);
 }
 
 static int
 g_cache_destroy(struct g_cache_softc *sc, boolean_t force)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_cache_desc *dp, *dp2;
 	int i;
 
 	g_topology_assert();
 	if (sc == NULL)
 		return (ENXIO);
 	gp = sc->sc_geom;
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_CACHE_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_CACHE_DEBUG(1, "Device %s is still open (r%dw%de%d).",
 			    pp->name, pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	} else {
 		G_CACHE_DEBUG(0, "Device %s removed.", gp->name);
 	}
 	callout_drain(&sc->sc_callout);
 	mtx_lock(&sc->sc_mtx);
 	for (i = 0; i < G_CACHE_BUCKETS; i++) {
 		dp = LIST_FIRST(&sc->sc_desclist[i]);
 		while (dp != NULL) {
 			dp2 = LIST_NEXT(dp, d_next);
 			g_cache_free(sc, dp);
 			dp = dp2;
 		}
 	}
 	mtx_unlock(&sc->sc_mtx);
 	mtx_destroy(&sc->sc_mtx);
 	uma_zdestroy(sc->sc_zone);
 	g_free(sc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_cache_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 
 	return (g_cache_destroy(gp->softc, 0));
 }
 
 static int
 g_cache_read_metadata(struct g_consumer *cp, struct g_cache_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	/* Decode metadata. */
 	cache_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 static int
 g_cache_write_metadata(struct g_consumer *cp, struct g_cache_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 0, 1, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	buf = malloc((size_t)pp->sectorsize, M_GCACHE, M_WAITOK | M_ZERO);
 	cache_metadata_encode(md, buf);
 	g_topology_unlock();
 	error = g_write_data(cp, pp->mediasize - pp->sectorsize, buf, pp->sectorsize);
 	g_topology_lock();
 	g_access(cp, 0, -1, 0);
 	free(buf, M_GCACHE);
 
 	return (error);
 }
 
 static struct g_geom *
 g_cache_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_cache_metadata md;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	G_CACHE_DEBUG(3, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "cache:taste");
 	gp->start = g_cache_start;
 	gp->orphan = g_cache_orphan;
 	gp->access = g_cache_access;
 	cp = g_new_consumer(gp);
+	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error == 0) {
 		error = g_cache_read_metadata(cp, &md);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 
 	if (strcmp(md.md_magic, G_CACHE_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_CACHE_VERSION) {
 		printf("geom_cache.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 
 	gp = g_cache_create(mp, pp, &md, G_CACHE_TYPE_AUTOMATIC);
 	if (gp == NULL) {
 		G_CACHE_DEBUG(0, "Can't create %s.", md.md_name);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_cache_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_cache_metadata md;
 	struct g_provider *pp;
 	struct g_geom *gp;
 	intmax_t *bsize, *size;
 	const char *name;
 	int *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs != 2) {
 		gctl_error(req, "Invalid number of arguments.");
 		return;
 	}
 
 	strlcpy(md.md_magic, G_CACHE_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_CACHE_VERSION;
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg0' argument");
 		return;
 	}
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 
 	size = gctl_get_paraml(req, "size", sizeof(*size));
 	if (size == NULL) {
 		gctl_error(req, "No '%s' argument", "size");
 		return;
 	}
 	if ((u_int)*size < 100) {
 		gctl_error(req, "Invalid '%s' argument", "size");
 		return;
 	}
 	md.md_size = (u_int)*size;
 
 	bsize = gctl_get_paraml(req, "blocksize", sizeof(*bsize));
 	if (bsize == NULL) {
 		gctl_error(req, "No '%s' argument", "blocksize");
 		return;
 	}
 	if (*bsize < 0) {
 		gctl_error(req, "Invalid '%s' argument", "blocksize");
 		return;
 	}
 	md.md_bsize = (u_int)*bsize;
 
 	/* This field is not important here. */
 	md.md_provsize = 0;
 
 	pp = gctl_get_provider(req, "arg1");
 	if (pp == NULL)
 		return;
 	gp = g_cache_create(mp, pp, &md, G_CACHE_TYPE_MANUAL);
 	if (gp == NULL) {
 		gctl_error(req, "Can't create %s.", md.md_name);
 		return;
 	}
 }
 
 static void
 g_cache_ctl_configure(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_cache_metadata md;
 	struct g_cache_softc *sc;
 	struct g_consumer *cp;
 	intmax_t *bsize, *size;
 	const char *name;
 	int error, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs != 1) {
 		gctl_error(req, "Missing device.");
 		return;
 	}
 
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg0' argument");
 		return;
 	}
 	sc = g_cache_find_device(mp, name);
 	if (sc == NULL) {
 		G_CACHE_DEBUG(1, "Device %s is invalid.", name);
 		gctl_error(req, "Device %s is invalid.", name);
 		return;
 	}
 
 	size = gctl_get_paraml(req, "size", sizeof(*size));
 	if (size == NULL) {
 		gctl_error(req, "No '%s' argument", "size");
 		return;
 	}
 	if ((u_int)*size != 0 && (u_int)*size < 100) {
 		gctl_error(req, "Invalid '%s' argument", "size");
 		return;
 	}
 	if ((u_int)*size != 0)
 		sc->sc_maxent = (u_int)*size;
 
 	bsize = gctl_get_paraml(req, "blocksize", sizeof(*bsize));
 	if (bsize == NULL) {
 		gctl_error(req, "No '%s' argument", "blocksize");
 		return;
 	}
 	if (*bsize < 0) {
 		gctl_error(req, "Invalid '%s' argument", "blocksize");
 		return;
 	}
 
 	if (sc->sc_type != G_CACHE_TYPE_AUTOMATIC)
 		return;
 
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 	strlcpy(md.md_magic, G_CACHE_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_CACHE_VERSION;
 	if ((u_int)*size != 0)
 		md.md_size = (u_int)*size;
 	else
 		md.md_size = sc->sc_maxent;
 	if ((u_int)*bsize != 0)
 		md.md_bsize = (u_int)*bsize;
 	else
 		md.md_bsize = sc->sc_bsize;
 	cp = LIST_FIRST(&sc->sc_geom->consumer);
 	md.md_provsize = cp->provider->mediasize;
 	error = g_cache_write_metadata(cp, &md);
 	if (error == 0)
 		G_CACHE_DEBUG(2, "Metadata on %s updated.", cp->provider->name);
 	else
 		G_CACHE_DEBUG(0, "Cannot update metadata on %s (error=%d).",
 		    cp->provider->name, error);
 }
 
 static void
 g_cache_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	int *nargs, *force, error, i;
 	struct g_cache_softc *sc;
 	const char *name;
 	char param[16];
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No 'force' argument");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		sc = g_cache_find_device(mp, name);
 		if (sc == NULL) {
 			G_CACHE_DEBUG(1, "Device %s is invalid.", name);
 			gctl_error(req, "Device %s is invalid.", name);
 			return;
 		}
 		error = g_cache_destroy(sc, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_cache_ctl_reset(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_cache_softc *sc;
 	const char *name;
 	char param[16];
 	int i, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		sc = g_cache_find_device(mp, name);
 		if (sc == NULL) {
 			G_CACHE_DEBUG(1, "Device %s is invalid.", name);
 			gctl_error(req, "Device %s is invalid.", name);
 			return;
 		}
 		sc->sc_reads = 0;
 		sc->sc_readbytes = 0;
 		sc->sc_cachereads = 0;
 		sc->sc_cachereadbytes = 0;
 		sc->sc_cachehits = 0;
 		sc->sc_cachemisses = 0;
 		sc->sc_cachefull = 0;
 		sc->sc_writes = 0;
 		sc->sc_wrotebytes = 0;
 	}
 }
 
 static void
 g_cache_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_CACHE_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_cache_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "configure") == 0) {
 		g_cache_ctl_configure(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0 ||
 	    strcmp(verb, "stop") == 0) {
 		g_cache_ctl_destroy(req, mp);
 		return;
 	} else if (strcmp(verb, "reset") == 0) {
 		g_cache_ctl_reset(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_cache_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_cache_softc *sc;
 
 	if (pp != NULL || cp != NULL)
 		return;
 	sc = gp->softc;
 	sbuf_printf(sb, "%s<Size>%u</Size>\n", indent, sc->sc_maxent);
 	sbuf_printf(sb, "%s<BlockSize>%u</BlockSize>\n", indent, sc->sc_bsize);
 	sbuf_printf(sb, "%s<TailOffset>%ju</TailOffset>\n", indent,
 	    (uintmax_t)sc->sc_tail);
 	sbuf_printf(sb, "%s<Entries>%u</Entries>\n", indent, sc->sc_nent);
 	sbuf_printf(sb, "%s<UsedEntries>%u</UsedEntries>\n", indent,
 	    sc->sc_nused);
 	sbuf_printf(sb, "%s<InvalidEntries>%u</InvalidEntries>\n", indent,
 	    sc->sc_invalid);
 	sbuf_printf(sb, "%s<Reads>%ju</Reads>\n", indent, sc->sc_reads);
 	sbuf_printf(sb, "%s<ReadBytes>%ju</ReadBytes>\n", indent,
 	    sc->sc_readbytes);
 	sbuf_printf(sb, "%s<CacheReads>%ju</CacheReads>\n", indent,
 	    sc->sc_cachereads);
 	sbuf_printf(sb, "%s<CacheReadBytes>%ju</CacheReadBytes>\n", indent,
 	    sc->sc_cachereadbytes);
 	sbuf_printf(sb, "%s<CacheHits>%ju</CacheHits>\n", indent,
 	    sc->sc_cachehits);
 	sbuf_printf(sb, "%s<CacheMisses>%ju</CacheMisses>\n", indent,
 	    sc->sc_cachemisses);
 	sbuf_printf(sb, "%s<CacheFull>%ju</CacheFull>\n", indent,
 	    sc->sc_cachefull);
 	sbuf_printf(sb, "%s<Writes>%ju</Writes>\n", indent, sc->sc_writes);
 	sbuf_printf(sb, "%s<WroteBytes>%ju</WroteBytes>\n", indent,
 	    sc->sc_wrotebytes);
 }
 
 DECLARE_GEOM_CLASS(g_cache_class, g_cache);
 MODULE_VERSION(geom_cache, 0);
diff --git a/sys/geom/concat/g_concat.c b/sys/geom/concat/g_concat.c
index dfa7b97a1806..6db293b07146 100644
--- a/sys/geom/concat/g_concat.c
+++ b/sys/geom/concat/g_concat.c
@@ -1,1026 +1,1027 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <geom/geom.h>
 #include <geom/geom_dbg.h>
 #include <geom/concat/g_concat.h>
 
 FEATURE(geom_concat, "GEOM concatenation support");
 
 static MALLOC_DEFINE(M_CONCAT, "concat_data", "GEOM_CONCAT Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, concat, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_CONCAT stuff");
 static u_int g_concat_debug = 0;
 SYSCTL_UINT(_kern_geom_concat, OID_AUTO, debug, CTLFLAG_RWTUN, &g_concat_debug, 0,
     "Debug level");
 
 static int g_concat_destroy(struct g_concat_softc *sc, boolean_t force);
 static int g_concat_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 
 static g_taste_t g_concat_taste;
 static g_ctl_req_t g_concat_config;
 static g_dumpconf_t g_concat_dumpconf;
 
 struct g_class g_concat_class = {
 	.name = G_CONCAT_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_concat_config,
 	.taste = g_concat_taste,
 	.destroy_geom = g_concat_destroy_geom
 };
 
 /*
  * Greatest Common Divisor.
  */
 static u_int
 gcd(u_int a, u_int b)
 {
 	u_int c;
 
 	while (b != 0) {
 		c = a;
 		a = b;
 		b = (c % b);
 	}
 	return (a);
 }
 
 /*
  * Least Common Multiple.
  */
 static u_int
 lcm(u_int a, u_int b)
 {
 
 	return ((a * b) / gcd(a, b));
 }
 
 /*
  * Return the number of valid disks.
  */
 static u_int
 g_concat_nvalid(struct g_concat_softc *sc)
 {
 	u_int i, no;
 
 	no = 0;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		if (sc->sc_disks[i].d_consumer != NULL)
 			no++;
 	}
 
 	return (no);
 }
 
 static void
 g_concat_remove_disk(struct g_concat_disk *disk)
 {
 	struct g_consumer *cp;
 	struct g_concat_softc *sc;
 
 	g_topology_assert();
 	KASSERT(disk->d_consumer != NULL, ("Non-valid disk in %s.", __func__));
 	sc = disk->d_softc;
 	cp = disk->d_consumer;
 
 	if (!disk->d_removed) {
 		G_CONCAT_DEBUG(0, "Disk %s removed from %s.",
 		    cp->provider->name, sc->sc_name);
 		disk->d_removed = 1;
 	}
 
 	if (sc->sc_provider != NULL) {
 		G_CONCAT_DEBUG(0, "Device %s deactivated.",
 		    sc->sc_provider->name);
 		g_wither_provider(sc->sc_provider, ENXIO);
 		sc->sc_provider = NULL;
 	}
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		return;
 	disk->d_consumer = NULL;
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	/* If there are no valid disks anymore, remove device. */
 	if (LIST_EMPTY(&sc->sc_geom->consumer))
 		g_concat_destroy(sc, 1);
 }
 
 static void
 g_concat_orphan(struct g_consumer *cp)
 {
 	struct g_concat_softc *sc;
 	struct g_concat_disk *disk;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	disk = cp->private;
 	if (disk == NULL)	/* Possible? */
 		return;
 	g_concat_remove_disk(disk);
 }
 
 static int
 g_concat_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *cp1, *cp2, *tmp;
 	struct g_concat_disk *disk;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	gp = pp->geom;
 
 	/* On first open, grab an extra "exclusive" bit */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... and let go of it on last close */
 	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
 		de--;
 
 	LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) {
 		error = g_access(cp1, dr, dw, de);
 		if (error != 0)
 			goto fail;
 		disk = cp1->private;
 		if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 &&
 		    disk->d_removed) {
 			g_concat_remove_disk(disk); /* May destroy geom. */
 		}
 	}
 	return (0);
 
 fail:
 	LIST_FOREACH(cp2, &gp->consumer, consumer) {
 		if (cp1 == cp2)
 			break;
 		g_access(cp2, -dr, -dw, -de);
 	}
 	return (error);
 }
 
 static void
 g_concat_candelete(struct bio *bp)
 {
 	struct g_concat_softc *sc;
 	struct g_concat_disk *disk;
 	int i, val;
 
 	sc = bp->bio_to->geom->softc;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (!disk->d_removed && disk->d_candelete)
 			break;
 	}
 	val = i < sc->sc_ndisks;
 	g_handleattr(bp, "GEOM::candelete", &val, sizeof(val));
 }
 
 static void
 g_concat_kernel_dump(struct bio *bp)
 {
 	struct g_concat_softc *sc;
 	struct g_concat_disk *disk;
 	struct bio *cbp;
 	struct g_kerneldump *gkd;
 	u_int i;
 
 	sc = bp->bio_to->geom->softc;
 	gkd = (struct g_kerneldump *)bp->bio_data;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		if (sc->sc_disks[i].d_start <= gkd->offset &&
 		    sc->sc_disks[i].d_end > gkd->offset)
 			break;
 	}
 	if (i == sc->sc_ndisks) {
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	disk = &sc->sc_disks[i];
 	gkd->offset -= disk->d_start;
 	if (gkd->length > disk->d_end - disk->d_start - gkd->offset)
 		gkd->length = disk->d_end - disk->d_start - gkd->offset;
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_std_done;
 	g_io_request(cbp, disk->d_consumer);
 	G_CONCAT_DEBUG(1, "Kernel dump will go to %s.",
 	    disk->d_consumer->provider->name);
 }
 
 static void
 g_concat_done(struct bio *bp)
 {
 	struct g_concat_softc *sc;
 	struct bio *pbp;
 
 	pbp = bp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	mtx_lock(&sc->sc_lock);
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	pbp->bio_completed += bp->bio_completed;
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
 		mtx_unlock(&sc->sc_lock);
 		g_io_deliver(pbp, pbp->bio_error);
 	} else
 		mtx_unlock(&sc->sc_lock);
 	g_destroy_bio(bp);
 }
 
 /*
  * Called for both BIO_FLUSH and BIO_SPEEDUP. Just pass the call down
  */
 static void
 g_concat_passdown(struct g_concat_softc *sc, struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	u_int no;
 
 	bioq_init(&queue);
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			while ((cbp = bioq_takefirst(&queue)) != NULL)
 				g_destroy_bio(cbp);
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		cbp->bio_done = g_concat_done;
 		cbp->bio_caller1 = sc->sc_disks[no].d_consumer;
 		cbp->bio_to = sc->sc_disks[no].d_consumer->provider;
 	}
 	while ((cbp = bioq_takefirst(&queue)) != NULL) {
 		G_CONCAT_LOGREQ(cbp, "Sending request.");
 		cp = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		g_io_request(cbp, cp);
 	}
 }
 
 static void
 g_concat_start(struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_concat_softc *sc;
 	struct g_concat_disk *disk;
 	struct g_provider *pp;
 	off_t offset, end, length, off, len;
 	struct bio *cbp;
 	char *addr;
 	u_int no;
 
 	pp = bp->bio_to;
 	sc = pp->geom->softc;
 	/*
 	 * If sc == NULL, provider's error should be set and g_concat_start()
 	 * should not be called at all.
 	 */
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 
 	G_CONCAT_LOGREQ(bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_SPEEDUP:
 	case BIO_FLUSH:
 		g_concat_passdown(sc, bp);
 		return;
 	case BIO_GETATTR:
 		if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
 			g_concat_kernel_dump(bp);
 			return;
 		} else if (strcmp("GEOM::candelete", bp->bio_attribute) == 0) {
 			g_concat_candelete(bp);
 			return;
 		}
 		/* To which provider it should be delivered? */
 		/* FALLTHROUGH */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	offset = bp->bio_offset;
 	length = bp->bio_length;
 	if ((bp->bio_flags & BIO_UNMAPPED) != 0)
 		addr = NULL;
 	else
 		addr = bp->bio_data;
 	end = offset + length;
 
 	bioq_init(&queue);
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		disk = &sc->sc_disks[no];
 		if (disk->d_end <= offset)
 			continue;
 		if (disk->d_start >= end)
 			break;
 
 		off = offset - disk->d_start;
 		len = MIN(length, disk->d_end - offset);
 		length -= len;
 		offset += len;
 
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			while ((cbp = bioq_takefirst(&queue)) != NULL)
 				g_destroy_bio(cbp);
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		/*
 		 * Fill in the component buf structure.
 		 */
 		if (len == bp->bio_length)
 			cbp->bio_done = g_std_done;
 		else
 			cbp->bio_done = g_concat_done;
 		cbp->bio_offset = off;
 		cbp->bio_length = len;
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 			cbp->bio_ma_offset += (uintptr_t)addr;
 			cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
 			cbp->bio_ma_offset %= PAGE_SIZE;
 			cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
 			    cbp->bio_length) / PAGE_SIZE;
 		} else
 			cbp->bio_data = addr;
 		addr += len;
 		cbp->bio_to = disk->d_consumer->provider;
 		cbp->bio_caller1 = disk;
 
 		if (length == 0)
 			break;
 	}
 	KASSERT(length == 0,
 	    ("Length is still greater than 0 (class=%s, name=%s).",
 	    bp->bio_to->geom->class->name, bp->bio_to->geom->name));
 	while ((cbp = bioq_takefirst(&queue)) != NULL) {
 		G_CONCAT_LOGREQ(cbp, "Sending request.");
 		disk = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		g_io_request(cbp, disk->d_consumer);
 	}
 }
 
 static void
 g_concat_check_and_run(struct g_concat_softc *sc)
 {
 	struct g_concat_disk *disk;
 	struct g_provider *dp, *pp;
 	u_int no, sectorsize = 0;
 	off_t start;
 	int error;
 
 	g_topology_assert();
 	if (g_concat_nvalid(sc) != sc->sc_ndisks)
 		return;
 
 	pp = g_new_providerf(sc->sc_geom, "concat/%s", sc->sc_name);
 	pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE |
 	    G_PF_ACCEPT_UNMAPPED;
 	start = 0;
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		disk = &sc->sc_disks[no];
 		dp = disk->d_consumer->provider;
 		disk->d_start = start;
 		disk->d_end = disk->d_start + dp->mediasize;
 		if (sc->sc_type == G_CONCAT_TYPE_AUTOMATIC)
 			disk->d_end -= dp->sectorsize;
 		start = disk->d_end;
 		error = g_access(disk->d_consumer, 1, 0, 0);
 		if (error == 0) {
 			error = g_getattr("GEOM::candelete", disk->d_consumer,
 			    &disk->d_candelete);
 			if (error != 0)
 				disk->d_candelete = 0;
 			(void)g_access(disk->d_consumer, -1, 0, 0);
 		} else
 			G_CONCAT_DEBUG(1, "Failed to access disk %s, error %d.",
 			    dp->name, error);
 		if (no == 0)
 			sectorsize = dp->sectorsize;
 		else
 			sectorsize = lcm(sectorsize, dp->sectorsize);
 
 		/* A provider underneath us doesn't support unmapped */
 		if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
 			G_CONCAT_DEBUG(1, "Cancelling unmapped "
 			    "because of %s.", dp->name);
 			pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
 		}
 	}
 	pp->sectorsize = sectorsize;
 	/* We have sc->sc_disks[sc->sc_ndisks - 1].d_end in 'start'. */
 	pp->mediasize = start;
 	pp->stripesize = sc->sc_disks[0].d_consumer->provider->stripesize;
 	pp->stripeoffset = sc->sc_disks[0].d_consumer->provider->stripeoffset;
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 
 	G_CONCAT_DEBUG(0, "Device %s activated.", sc->sc_provider->name);
 }
 
 static int
 g_concat_read_metadata(struct g_consumer *cp, struct g_concat_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	/* Decode metadata. */
 	concat_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /*
  * Add disk to given device.
  */
 static int
 g_concat_add_disk(struct g_concat_softc *sc, struct g_provider *pp, u_int no)
 {
 	struct g_concat_disk *disk;
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	/* Metadata corrupted? */
 	if (no >= sc->sc_ndisks)
 		return (EINVAL);
 
 	disk = &sc->sc_disks[no];
 	/* Check if disk is not already attached. */
 	if (disk->d_consumer != NULL)
 		return (EEXIST);
 
 	gp = sc->sc_geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
 		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 		if (error != 0) {
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			return (error);
 		}
 	}
 	if (sc->sc_type == G_CONCAT_TYPE_AUTOMATIC) {
 		struct g_concat_metadata md;
 
 		/* Re-read metadata. */
 		error = g_concat_read_metadata(cp, &md);
 		if (error != 0)
 			goto fail;
 
 		if (strcmp(md.md_magic, G_CONCAT_MAGIC) != 0 ||
 		    strcmp(md.md_name, sc->sc_name) != 0 ||
 		    md.md_id != sc->sc_id) {
 			G_CONCAT_DEBUG(0, "Metadata on %s changed.", pp->name);
 			goto fail;
 		}
 	}
 
 	cp->private = disk;
 	disk->d_consumer = cp;
 	disk->d_softc = sc;
 	disk->d_start = 0;	/* not yet */
 	disk->d_end = 0;	/* not yet */
 	disk->d_removed = 0;
 
 	G_CONCAT_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
 
 	g_concat_check_and_run(sc);
 
 	return (0);
 fail:
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
 		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	return (error);
 }
 
 static struct g_geom *
 g_concat_create(struct g_class *mp, const struct g_concat_metadata *md,
     u_int type)
 {
 	struct g_concat_softc *sc;
 	struct g_geom *gp;
 	u_int no;
 
 	G_CONCAT_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
 	    md->md_id);
 
 	/* One disks is minimum. */
 	if (md->md_all < 1)
 		return (NULL);
 
 	/* Check for duplicate unit */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
 			G_CONCAT_DEBUG(0, "Device %s already configured.",
 			    gp->name);
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_CONCAT, M_WAITOK | M_ZERO);
 	gp->start = g_concat_start;
 	gp->spoiled = g_concat_orphan;
 	gp->orphan = g_concat_orphan;
 	gp->access = g_concat_access;
 	gp->dumpconf = g_concat_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_disks = malloc(sizeof(struct g_concat_disk) * sc->sc_ndisks,
 	    M_CONCAT, M_WAITOK | M_ZERO);
 	for (no = 0; no < sc->sc_ndisks; no++)
 		sc->sc_disks[no].d_consumer = NULL;
 	sc->sc_type = type;
 	mtx_init(&sc->sc_lock, "gconcat lock", NULL, MTX_DEF);
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 
 	G_CONCAT_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
 
 	return (gp);
 }
 
 static int
 g_concat_destroy(struct g_concat_softc *sc, boolean_t force)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp, *cp1;
 	struct g_geom *gp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_CONCAT_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_CONCAT_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	gp = sc->sc_geom;
 	LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
 		g_concat_remove_disk(cp->private);
 		if (cp1 == NULL)
 			return (0);	/* Recursion happened. */
 	}
 	if (!LIST_EMPTY(&gp->consumer))
 		return (EINPROGRESS);
 
 	gp->softc = NULL;
 	KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
 	    gp->name));
 	free(sc->sc_disks, M_CONCAT);
 	mtx_destroy(&sc->sc_lock);
 	free(sc, M_CONCAT);
 
 	G_CONCAT_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static int
 g_concat_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_concat_softc *sc;
 
 	sc = gp->softc;
 	return (g_concat_destroy(sc, 0));
 }
 
 static struct g_geom *
 g_concat_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_concat_metadata md;
 	struct g_concat_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	/* Skip providers that are already open for writing. */
 	if (pp->acw > 0)
 		return (NULL);
 
 	G_CONCAT_DEBUG(3, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "concat:taste");
 	gp->start = g_concat_start;
 	gp->access = g_concat_access;
 	gp->orphan = g_concat_orphan;
 	cp = g_new_consumer(gp);
+	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error == 0) {
 		error = g_concat_read_metadata(cp, &md);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_CONCAT_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_CONCAT_VERSION) {
 		printf("geom_concat.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	/*
 	 * Backward compatibility:
 	 */
 	/* There was no md_provider field in earlier versions of metadata. */
 	if (md.md_version < 3)
 		bzero(md.md_provider, sizeof(md.md_provider));
 	/* There was no md_provsize field in earlier versions of metadata. */
 	if (md.md_version < 4)
 		md.md_provsize = pp->mediasize;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_type != G_CONCAT_TYPE_AUTOMATIC)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id)
 			continue;
 		break;
 	}
 	if (gp != NULL) {
 		G_CONCAT_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_concat_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_CONCAT_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			return (NULL);
 		}
 	} else {
 		gp = g_concat_create(mp, &md, G_CONCAT_TYPE_AUTOMATIC);
 		if (gp == NULL) {
 			G_CONCAT_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		G_CONCAT_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_concat_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_CONCAT_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			g_concat_destroy(sc, 1);
 			return (NULL);
 		}
 	}
 
 	return (gp);
 }
 
 static void
 g_concat_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	u_int attached, no;
 	struct g_concat_metadata md;
 	struct g_provider *pp;
 	struct g_concat_softc *sc;
 	struct g_geom *gp;
 	struct sbuf *sb;
 	const char *name;
 	char param[16];
 	int *nargs;
 
 	g_topology_assert();
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 
 	strlcpy(md.md_magic, G_CONCAT_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_CONCAT_VERSION;
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 	md.md_id = arc4random();
 	md.md_no = 0;
 	md.md_all = *nargs - 1;
 	bzero(md.md_provider, sizeof(md.md_provider));
 	/* This field is not important here. */
 	md.md_provsize = 0;
 
 	/* Check all providers are valid */
 	for (no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		pp = gctl_get_provider(req, param);
 		if (pp == NULL)
 			return;
 	}
 
 	gp = g_concat_create(mp, &md, G_CONCAT_TYPE_MANUAL);
 	if (gp == NULL) {
 		gctl_error(req, "Can't configure %s.", md.md_name);
 		return;
 	}
 
 	sc = gp->softc;
 	sb = sbuf_new_auto();
 	sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
 	for (attached = 0, no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		pp = gctl_get_provider(req, param);
 		if (pp == NULL) {
 			name = gctl_get_asciiparam(req, param);
 			MPASS(name != NULL);
 			sbuf_printf(sb, " %s", name);
 			continue;
 		}
 		if (g_concat_add_disk(sc, pp, no - 1) != 0) {
 			G_CONCAT_DEBUG(1, "Disk %u (%s) not attached to %s.",
 			    no, pp->name, gp->name);
 			sbuf_printf(sb, " %s", pp->name);
 			continue;
 		}
 		attached++;
 	}
 	sbuf_finish(sb);
 	if (md.md_all != attached) {
 		g_concat_destroy(gp->softc, 1);
 		gctl_error(req, "%s", sbuf_data(sb));
 	}
 	sbuf_delete(sb);
 }
 
 static struct g_concat_softc *
 g_concat_find_device(struct g_class *mp, const char *name)
 {
 	struct g_concat_softc *sc;
 	struct g_geom *gp;
 
 	if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0)
 		name += strlen(_PATH_DEV);
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(sc->sc_name, name) == 0)
 			return (sc);
 	}
 	return (NULL);
 }
 
 static void
 g_concat_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_concat_softc *sc;
 	int *force, *nargs, error;
 	const char *name;
 	char param[16];
 	u_int i;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No '%s' argument.", "force");
 		return;
 	}
 
 	for (i = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			return;
 		}
 		sc = g_concat_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		error = g_concat_destroy(sc, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_concat_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_CONCAT_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_concat_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0 ||
 	    strcmp(verb, "stop") == 0) {
 		g_concat_ctl_destroy(req, mp);
 		return;
 	}
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_concat_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_concat_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		struct g_concat_disk *disk;
 
 		disk = cp->private;
 		if (disk == NULL)
 			return;
 		sbuf_printf(sb, "%s<End>%jd</End>\n", indent,
 		    (intmax_t)disk->d_end);
 		sbuf_printf(sb, "%s<Start>%jd</Start>\n", indent,
 		    (intmax_t)disk->d_start);
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<Type>", indent);
 		switch (sc->sc_type) {
 		case G_CONCAT_TYPE_AUTOMATIC:
 			sbuf_cat(sb, "AUTOMATIC");
 			break;
 		case G_CONCAT_TYPE_MANUAL:
 			sbuf_cat(sb, "MANUAL");
 			break;
 		default:
 			sbuf_cat(sb, "UNKNOWN");
 			break;
 		}
 		sbuf_cat(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
 		    indent, sc->sc_ndisks, g_concat_nvalid(sc));
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
 			sbuf_cat(sb, "UP");
 		else
 			sbuf_cat(sb, "DOWN");
 		sbuf_cat(sb, "</State>\n");
 	}
 }
 
 DECLARE_GEOM_CLASS(g_concat_class, g_concat);
 MODULE_VERSION(geom_concat, 0);
diff --git a/sys/geom/journal/g_journal.c b/sys/geom/journal/g_journal.c
index e57d58d6f7ca..eba0f0dcab34 100644
--- a/sys/geom/journal/g_journal.c
+++ b/sys/geom/journal/g_journal.c
@@ -1,3024 +1,3025 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/eventhandler.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/sched.h>
 #include <sys/taskqueue.h>
 #include <sys/vnode.h>
 #include <sys/sbuf.h>
 #ifdef GJ_MEMDEBUG
 #include <sys/stack.h>
 #include <sys/kdb.h>
 #endif
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <geom/geom.h>
 #include <geom/geom_dbg.h>
 
 #include <geom/journal/g_journal.h>
 
 FEATURE(geom_journal, "GEOM journaling support");
 
 /*
  * On-disk journal format:
  *
  * JH - Journal header
  * RH - Record header
  *
  * %%%%%% ****** +------+ +------+     ****** +------+     %%%%%%
  * % JH % * RH * | Data | | Data | ... * RH * | Data | ... % JH % ...
  * %%%%%% ****** +------+ +------+     ****** +------+     %%%%%%
  *
  */
 
 CTASSERT(sizeof(struct g_journal_header) <= 512);
 CTASSERT(sizeof(struct g_journal_record_header) <= 512);
 
 static MALLOC_DEFINE(M_JOURNAL, "journal_data", "GEOM_JOURNAL Data");
 static struct mtx g_journal_cache_mtx;
 MTX_SYSINIT(g_journal_cache, &g_journal_cache_mtx, "cache usage", MTX_DEF);
 
 const struct g_journal_desc *g_journal_filesystems[] = {
 	&g_journal_ufs,
 	NULL
 };
 
 SYSCTL_DECL(_kern_geom);
 
 int g_journal_debug = 0;
 static u_int g_journal_switch_time = 10;
 static u_int g_journal_force_switch = 70;
 static u_int g_journal_parallel_flushes = 16;
 static u_int g_journal_parallel_copies = 16;
 static u_int g_journal_accept_immediately = 64;
 static u_int g_journal_record_entries = GJ_RECORD_HEADER_NENTRIES;
 static u_int g_journal_do_optimize = 1;
 
 static SYSCTL_NODE(_kern_geom, OID_AUTO, journal,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_JOURNAL stuff");
 SYSCTL_INT(_kern_geom_journal, OID_AUTO, debug, CTLFLAG_RWTUN, &g_journal_debug, 0,
     "Debug level");
 SYSCTL_UINT(_kern_geom_journal, OID_AUTO, switch_time, CTLFLAG_RW,
     &g_journal_switch_time, 0, "Switch journals every N seconds");
 SYSCTL_UINT(_kern_geom_journal, OID_AUTO, force_switch, CTLFLAG_RW,
     &g_journal_force_switch, 0, "Force switch when journal is N% full");
 SYSCTL_UINT(_kern_geom_journal, OID_AUTO, parallel_flushes, CTLFLAG_RW,
     &g_journal_parallel_flushes, 0,
     "Number of flush I/O requests to send in parallel");
 SYSCTL_UINT(_kern_geom_journal, OID_AUTO, accept_immediately, CTLFLAG_RW,
     &g_journal_accept_immediately, 0,
     "Number of I/O requests accepted immediately");
 SYSCTL_UINT(_kern_geom_journal, OID_AUTO, parallel_copies, CTLFLAG_RW,
     &g_journal_parallel_copies, 0,
     "Number of copy I/O requests to send in parallel");
 static int
 g_journal_record_entries_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	u_int entries;
 	int error;
 
 	entries = g_journal_record_entries;
 	error = sysctl_handle_int(oidp, &entries, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (entries < 1 || entries > GJ_RECORD_HEADER_NENTRIES)
 		return (EINVAL);
 	g_journal_record_entries = entries;
 	return (0);
 }
 SYSCTL_PROC(_kern_geom_journal, OID_AUTO, record_entries,
     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
     g_journal_record_entries_sysctl, "I",
     "Maximum number of entires in one journal record");
 SYSCTL_UINT(_kern_geom_journal, OID_AUTO, optimize, CTLFLAG_RW,
     &g_journal_do_optimize, 0, "Try to combine bios on flush and copy");
 
 static u_long g_journal_cache_used = 0;
 static u_long g_journal_cache_limit = 64 * 1024 * 1024;
 static u_int g_journal_cache_divisor = 2;
 static u_int g_journal_cache_switch = 90;
 static u_int g_journal_cache_misses = 0;
 static u_int g_journal_cache_alloc_failures = 0;
 static u_long g_journal_cache_low = 0;
 
 static SYSCTL_NODE(_kern_geom_journal, OID_AUTO, cache,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_JOURNAL cache");
 SYSCTL_ULONG(_kern_geom_journal_cache, OID_AUTO, used, CTLFLAG_RD,
     &g_journal_cache_used, 0, "Number of allocated bytes");
 static int
 g_journal_cache_limit_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	u_long limit;
 	int error;
 
 	limit = g_journal_cache_limit;
 	error = sysctl_handle_long(oidp, &limit, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	g_journal_cache_limit = limit;
 	g_journal_cache_low = (limit / 100) * g_journal_cache_switch;
 	return (0);
 }
 SYSCTL_PROC(_kern_geom_journal_cache, OID_AUTO, limit,
     CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 0,
     g_journal_cache_limit_sysctl, "I",
     "Maximum number of allocated bytes");
 SYSCTL_UINT(_kern_geom_journal_cache, OID_AUTO, divisor, CTLFLAG_RDTUN,
     &g_journal_cache_divisor, 0,
     "(kmem_size / kern.geom.journal.cache.divisor) == cache size");
 static int
 g_journal_cache_switch_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	u_int cswitch;
 	int error;
 
 	cswitch = g_journal_cache_switch;
 	error = sysctl_handle_int(oidp, &cswitch, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (cswitch > 100)
 		return (EINVAL);
 	g_journal_cache_switch = cswitch;
 	g_journal_cache_low = (g_journal_cache_limit / 100) * cswitch;
 	return (0);
 }
 SYSCTL_PROC(_kern_geom_journal_cache, OID_AUTO, switch,
     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
     g_journal_cache_switch_sysctl, "I",
     "Force switch when we hit this percent of cache use");
 SYSCTL_UINT(_kern_geom_journal_cache, OID_AUTO, misses, CTLFLAG_RW,
     &g_journal_cache_misses, 0, "Number of cache misses");
 SYSCTL_UINT(_kern_geom_journal_cache, OID_AUTO, alloc_failures, CTLFLAG_RW,
     &g_journal_cache_alloc_failures, 0, "Memory allocation failures");
 
 static u_long g_journal_stats_bytes_skipped = 0;
 static u_long g_journal_stats_combined_ios = 0;
 static u_long g_journal_stats_switches = 0;
 static u_long g_journal_stats_wait_for_copy = 0;
 static u_long g_journal_stats_journal_full = 0;
 static u_long g_journal_stats_low_mem = 0;
 
 static SYSCTL_NODE(_kern_geom_journal, OID_AUTO, stats,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_JOURNAL statistics");
 SYSCTL_ULONG(_kern_geom_journal_stats, OID_AUTO, skipped_bytes, CTLFLAG_RW,
     &g_journal_stats_bytes_skipped, 0, "Number of skipped bytes");
 SYSCTL_ULONG(_kern_geom_journal_stats, OID_AUTO, combined_ios, CTLFLAG_RW,
     &g_journal_stats_combined_ios, 0, "Number of combined I/O requests");
 SYSCTL_ULONG(_kern_geom_journal_stats, OID_AUTO, switches, CTLFLAG_RW,
     &g_journal_stats_switches, 0, "Number of journal switches");
 SYSCTL_ULONG(_kern_geom_journal_stats, OID_AUTO, wait_for_copy, CTLFLAG_RW,
     &g_journal_stats_wait_for_copy, 0, "Wait for journal copy on switch");
 SYSCTL_ULONG(_kern_geom_journal_stats, OID_AUTO, journal_full, CTLFLAG_RW,
     &g_journal_stats_journal_full, 0,
     "Number of times journal was almost full.");
 SYSCTL_ULONG(_kern_geom_journal_stats, OID_AUTO, low_mem, CTLFLAG_RW,
     &g_journal_stats_low_mem, 0, "Number of times low_mem hook was called.");
 
 static g_taste_t g_journal_taste;
 static g_ctl_req_t g_journal_config;
 static g_dumpconf_t g_journal_dumpconf;
 static g_init_t g_journal_init;
 static g_fini_t g_journal_fini;
 
 struct g_class g_journal_class = {
 	.name = G_JOURNAL_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_journal_taste,
 	.ctlreq = g_journal_config,
 	.dumpconf = g_journal_dumpconf,
 	.init = g_journal_init,
 	.fini = g_journal_fini
 };
 
 static int g_journal_destroy(struct g_journal_softc *sc);
 static void g_journal_metadata_update(struct g_journal_softc *sc);
 static void g_journal_start_switcher(struct g_class *mp);
 static void g_journal_stop_switcher(void);
 static void g_journal_switch_wait(struct g_journal_softc *sc);
 
 #define	GJ_SWITCHER_WORKING	0
 #define	GJ_SWITCHER_DIE		1
 #define	GJ_SWITCHER_DIED	2
 static struct proc *g_journal_switcher_proc = NULL;
 static int g_journal_switcher_state = GJ_SWITCHER_WORKING;
 static int g_journal_switcher_wokenup = 0;
 static int g_journal_sync_requested = 0;
 
 #ifdef GJ_MEMDEBUG
 struct meminfo {
 	size_t		mi_size;
 	struct stack	mi_stack;
 };
 #endif
 
 /*
  * We use our own malloc/realloc/free functions, so we can collect statistics
  * and force journal switch when we're running out of cache.
  */
 static void *
 gj_malloc(size_t size, int flags)
 {
 	void *p;
 #ifdef GJ_MEMDEBUG
 	struct meminfo *mi;
 #endif
 
 	mtx_lock(&g_journal_cache_mtx);
 	if (g_journal_cache_limit > 0 && !g_journal_switcher_wokenup &&
 	    g_journal_cache_used + size > g_journal_cache_low) {
 		GJ_DEBUG(1, "No cache, waking up the switcher.");
 		g_journal_switcher_wokenup = 1;
 		wakeup(&g_journal_switcher_state);
 	}
 	if ((flags & M_NOWAIT) && g_journal_cache_limit > 0 &&
 	    g_journal_cache_used + size > g_journal_cache_limit) {
 		mtx_unlock(&g_journal_cache_mtx);
 		g_journal_cache_alloc_failures++;
 		return (NULL);
 	}
 	g_journal_cache_used += size;
 	mtx_unlock(&g_journal_cache_mtx);
 	flags &= ~M_NOWAIT;
 #ifndef GJ_MEMDEBUG
 	p = malloc(size, M_JOURNAL, flags | M_WAITOK);
 #else
 	mi = malloc(sizeof(*mi) + size, M_JOURNAL, flags | M_WAITOK);
 	p = (u_char *)mi + sizeof(*mi);
 	mi->mi_size = size;
 	stack_save(&mi->mi_stack);
 #endif
 	return (p);
 }
 
 static void
 gj_free(void *p, size_t size)
 {
 #ifdef GJ_MEMDEBUG
 	struct meminfo *mi;
 #endif
 
 	KASSERT(p != NULL, ("p=NULL"));
 	KASSERT(size > 0, ("size=0"));
 	mtx_lock(&g_journal_cache_mtx);
 	KASSERT(g_journal_cache_used >= size, ("Freeing too much?"));
 	g_journal_cache_used -= size;
 	mtx_unlock(&g_journal_cache_mtx);
 #ifdef GJ_MEMDEBUG
 	mi = p = (void *)((u_char *)p - sizeof(*mi));
 	if (mi->mi_size != size) {
 		printf("GJOURNAL: Size mismatch! %zu != %zu\n", size,
 		    mi->mi_size);
 		printf("GJOURNAL: Alloc backtrace:\n");
 		stack_print(&mi->mi_stack);
 		printf("GJOURNAL: Free backtrace:\n");
 		kdb_backtrace();
 	}
 #endif
 	free(p, M_JOURNAL);
 }
 
 static void *
 gj_realloc(void *p, size_t size, size_t oldsize)
 {
 	void *np;
 
 #ifndef GJ_MEMDEBUG
 	mtx_lock(&g_journal_cache_mtx);
 	g_journal_cache_used -= oldsize;
 	g_journal_cache_used += size;
 	mtx_unlock(&g_journal_cache_mtx);
 	np = realloc(p, size, M_JOURNAL, M_WAITOK);
 #else
 	np = gj_malloc(size, M_WAITOK);
 	bcopy(p, np, MIN(oldsize, size));
 	gj_free(p, oldsize);
 #endif
 	return (np);
 }
 
 static void
 g_journal_check_overflow(struct g_journal_softc *sc)
 {
 	off_t length, used;
 
 	if ((sc->sc_active.jj_offset < sc->sc_inactive.jj_offset &&
 	     sc->sc_journal_offset >= sc->sc_inactive.jj_offset) ||
 	    (sc->sc_active.jj_offset > sc->sc_inactive.jj_offset &&
 	     sc->sc_journal_offset >= sc->sc_inactive.jj_offset &&
 	     sc->sc_journal_offset < sc->sc_active.jj_offset)) {
 		panic("Journal overflow "
 		    "(id = %u joffset=%jd active=%jd inactive=%jd)",
 		    (unsigned)sc->sc_id,
 		    (intmax_t)sc->sc_journal_offset,
 		    (intmax_t)sc->sc_active.jj_offset,
 		    (intmax_t)sc->sc_inactive.jj_offset);
 	}
 	if (sc->sc_active.jj_offset < sc->sc_inactive.jj_offset) {
 		length = sc->sc_inactive.jj_offset - sc->sc_active.jj_offset;
 		used = sc->sc_journal_offset - sc->sc_active.jj_offset;
 	} else {
 		length = sc->sc_jend - sc->sc_active.jj_offset;
 		length += sc->sc_inactive.jj_offset - sc->sc_jstart;
 		if (sc->sc_journal_offset >= sc->sc_active.jj_offset)
 			used = sc->sc_journal_offset - sc->sc_active.jj_offset;
 		else {
 			used = sc->sc_jend - sc->sc_active.jj_offset;
 			used += sc->sc_journal_offset - sc->sc_jstart;
 		}
 	}
 	/* Already woken up? */
 	if (g_journal_switcher_wokenup)
 		return;
 	/*
 	 * If the active journal takes more than g_journal_force_switch precent
 	 * of free journal space, we force journal switch.
 	 */
 	KASSERT(length > 0,
 	    ("length=%jd used=%jd active=%jd inactive=%jd joffset=%jd",
 	    (intmax_t)length, (intmax_t)used,
 	    (intmax_t)sc->sc_active.jj_offset,
 	    (intmax_t)sc->sc_inactive.jj_offset,
 	    (intmax_t)sc->sc_journal_offset));
 	if ((used * 100) / length > g_journal_force_switch) {
 		g_journal_stats_journal_full++;
 		GJ_DEBUG(1, "Journal %s %jd%% full, forcing journal switch.",
 		    sc->sc_name, (used * 100) / length);
 		mtx_lock(&g_journal_cache_mtx);
 		g_journal_switcher_wokenup = 1;
 		wakeup(&g_journal_switcher_state);
 		mtx_unlock(&g_journal_cache_mtx);
 	}
 }
 
 static void
 g_journal_orphan(struct g_consumer *cp)
 {
 	struct g_journal_softc *sc;
 	char name[256];
 	int error;
 
 	g_topology_assert();
 	sc = cp->geom->softc;
 	strlcpy(name, cp->provider->name, sizeof(name));
 	GJ_DEBUG(0, "Lost provider %s.", name);
 	if (sc == NULL)
 		return;
 	error = g_journal_destroy(sc);
 	if (error == 0)
 		GJ_DEBUG(0, "Journal %s destroyed.", name);
 	else {
 		GJ_DEBUG(0, "Cannot destroy journal %s (error=%d). "
 		    "Destroy it manually after last close.", sc->sc_name,
 		    error);
 	}
 }
 
 static int
 g_journal_access(struct g_provider *pp, int acr, int acw, int ace)
 {
 	struct g_journal_softc *sc;
 	int dcr, dcw, dce;
 
 	g_topology_assert();
 	GJ_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name,
 	    acr, acw, ace);
 
 	dcr = pp->acr + acr;
 	dcw = pp->acw + acw;
 	dce = pp->ace + ace;
 
 	sc = pp->geom->softc;
 	if (sc == NULL || (sc->sc_flags & GJF_DEVICE_DESTROY)) {
 		if (acr <= 0 && acw <= 0 && ace <= 0)
 			return (0);
 		else
 			return (ENXIO);
 	}
 	if (pp->acw == 0 && dcw > 0) {
 		GJ_DEBUG(1, "Marking %s as dirty.", sc->sc_name);
 		sc->sc_flags &= ~GJF_DEVICE_CLEAN;
 		g_topology_unlock();
 		g_journal_metadata_update(sc);
 		g_topology_lock();
 	} /* else if (pp->acw == 0 && dcw > 0 && JEMPTY(sc)) {
 		GJ_DEBUG(1, "Marking %s as clean.", sc->sc_name);
 		sc->sc_flags |= GJF_DEVICE_CLEAN;
 		g_topology_unlock();
 		g_journal_metadata_update(sc);
 		g_topology_lock();
 	} */
 	return (0);
 }
 
 static void
 g_journal_header_encode(struct g_journal_header *hdr, u_char *data)
 {
 
 	bcopy(GJ_HEADER_MAGIC, data, sizeof(GJ_HEADER_MAGIC));
 	data += sizeof(GJ_HEADER_MAGIC);
 	le32enc(data, hdr->jh_journal_id);
 	data += 4;
 	le32enc(data, hdr->jh_journal_next_id);
 }
 
 static int
 g_journal_header_decode(const u_char *data, struct g_journal_header *hdr)
 {
 
 	bcopy(data, hdr->jh_magic, sizeof(hdr->jh_magic));
 	data += sizeof(hdr->jh_magic);
 	if (bcmp(hdr->jh_magic, GJ_HEADER_MAGIC, sizeof(GJ_HEADER_MAGIC)) != 0)
 		return (EINVAL);
 	hdr->jh_journal_id = le32dec(data);
 	data += 4;
 	hdr->jh_journal_next_id = le32dec(data);
 	return (0);
 }
 
 static void
 g_journal_flush_cache(struct g_journal_softc *sc)
 {
 	struct bintime bt;
 	int error;
 
 	if (sc->sc_bio_flush == 0)
 		return;
 	GJ_TIMER_START(1, &bt);
 	if (sc->sc_bio_flush & GJ_FLUSH_JOURNAL) {
 		error = g_io_flush(sc->sc_jconsumer);
 		GJ_DEBUG(error == 0 ? 2 : 0, "Flush cache of %s: error=%d.",
 		    sc->sc_jconsumer->provider->name, error);
 	}
 	if (sc->sc_bio_flush & GJ_FLUSH_DATA) {
 		/*
 		 * TODO: This could be called in parallel with the
 		 *       previous call.
 		 */
 		error = g_io_flush(sc->sc_dconsumer);
 		GJ_DEBUG(error == 0 ? 2 : 0, "Flush cache of %s: error=%d.",
 		    sc->sc_dconsumer->provider->name, error);
 	}
 	GJ_TIMER_STOP(1, &bt, "Cache flush time");
 }
 
 static int
 g_journal_write_header(struct g_journal_softc *sc)
 {
 	struct g_journal_header hdr;
 	struct g_consumer *cp;
 	u_char *buf;
 	int error;
 
 	cp = sc->sc_jconsumer;
 	buf = gj_malloc(cp->provider->sectorsize, M_WAITOK);
 
 	strlcpy(hdr.jh_magic, GJ_HEADER_MAGIC, sizeof(hdr.jh_magic));
 	hdr.jh_journal_id = sc->sc_journal_id;
 	hdr.jh_journal_next_id = sc->sc_journal_next_id;
 	g_journal_header_encode(&hdr, buf);
 	error = g_write_data(cp, sc->sc_journal_offset, buf,
 	    cp->provider->sectorsize);
 	/* if (error == 0) */
 	sc->sc_journal_offset += cp->provider->sectorsize;
 
 	gj_free(buf, cp->provider->sectorsize);
 	return (error);
 }
 
 /*
  * Every journal record has a header and data following it.
  * Functions below are used to decode the header before storing it to
  * little endian and to encode it after reading to system endianness.
  */
 static void
 g_journal_record_header_encode(struct g_journal_record_header *hdr,
     u_char *data)
 {
 	struct g_journal_entry *ent;
 	u_int i;
 
 	bcopy(GJ_RECORD_HEADER_MAGIC, data, sizeof(GJ_RECORD_HEADER_MAGIC));
 	data += sizeof(GJ_RECORD_HEADER_MAGIC);
 	le32enc(data, hdr->jrh_journal_id);
 	data += 8;
 	le16enc(data, hdr->jrh_nentries);
 	data += 2;
 	bcopy(hdr->jrh_sum, data, sizeof(hdr->jrh_sum));
 	data += 8;
 	for (i = 0; i < hdr->jrh_nentries; i++) {
 		ent = &hdr->jrh_entries[i];
 		le64enc(data, ent->je_joffset);
 		data += 8;
 		le64enc(data, ent->je_offset);
 		data += 8;
 		le64enc(data, ent->je_length);
 		data += 8;
 	}
 }
 
 static int
 g_journal_record_header_decode(const u_char *data,
     struct g_journal_record_header *hdr)
 {
 	struct g_journal_entry *ent;
 	u_int i;
 
 	bcopy(data, hdr->jrh_magic, sizeof(hdr->jrh_magic));
 	data += sizeof(hdr->jrh_magic);
 	if (strcmp(hdr->jrh_magic, GJ_RECORD_HEADER_MAGIC) != 0)
 		return (EINVAL);
 	hdr->jrh_journal_id = le32dec(data);
 	data += 8;
 	hdr->jrh_nentries = le16dec(data);
 	data += 2;
 	if (hdr->jrh_nentries > GJ_RECORD_HEADER_NENTRIES)
 		return (EINVAL);
 	bcopy(data, hdr->jrh_sum, sizeof(hdr->jrh_sum));
 	data += 8;
 	for (i = 0; i < hdr->jrh_nentries; i++) {
 		ent = &hdr->jrh_entries[i];
 		ent->je_joffset = le64dec(data);
 		data += 8;
 		ent->je_offset = le64dec(data);
 		data += 8;
 		ent->je_length = le64dec(data);
 		data += 8;
 	}
 	return (0);
 }
 
 /*
  * Function reads metadata from a provider (via the given consumer), decodes
  * it to system endianness and verifies its correctness.
  */
 static int
 g_journal_metadata_read(struct g_consumer *cp, struct g_journal_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* Metadata is stored in last sector. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		GJ_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    cp->provider->name, error);
 		return (error);
 	}
 
 	/* Decode metadata. */
 	error = journal_metadata_decode(buf, md);
 	g_free(buf);
 	/* Is this is gjournal provider at all? */
 	if (strcmp(md->md_magic, G_JOURNAL_MAGIC) != 0)
 		return (EINVAL);
 	/*
 	 * Are we able to handle this version of metadata?
 	 * We only maintain backward compatibility.
 	 */
 	if (md->md_version > G_JOURNAL_VERSION) {
 		GJ_DEBUG(0,
 		    "Kernel module is too old to handle metadata from %s.",
 		    cp->provider->name);
 		return (EINVAL);
 	}
 	/* Is checksum correct? */
 	if (error != 0) {
 		GJ_DEBUG(0, "MD5 metadata hash mismatch for provider %s.",
 		    cp->provider->name);
 		return (error);
 	}
 	return (0);
 }
 
 /*
  * Two functions below are responsible for updating metadata.
  * Only metadata on the data provider is updated (we need to update
  * information about active journal in there).
  */
 static void
 g_journal_metadata_done(struct bio *bp)
 {
 
 	/*
 	 * There is not much we can do on error except informing about it.
 	 */
 	if (bp->bio_error != 0) {
 		GJ_LOGREQ(0, bp, "Cannot update metadata (error=%d).",
 		    bp->bio_error);
 	} else {
 		GJ_LOGREQ(2, bp, "Metadata updated.");
 	}
 	gj_free(bp->bio_data, bp->bio_length);
 	g_destroy_bio(bp);
 }
 
 static void
 g_journal_metadata_update(struct g_journal_softc *sc)
 {
 	struct g_journal_metadata md;
 	struct g_consumer *cp;
 	struct bio *bp;
 	u_char *sector;
 
 	cp = sc->sc_dconsumer;
 	sector = gj_malloc(cp->provider->sectorsize, M_WAITOK);
 	strlcpy(md.md_magic, G_JOURNAL_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_JOURNAL_VERSION;
 	md.md_id = sc->sc_id;
 	md.md_type = sc->sc_orig_type;
 	md.md_jstart = sc->sc_jstart;
 	md.md_jend = sc->sc_jend;
 	md.md_joffset = sc->sc_inactive.jj_offset;
 	md.md_jid = sc->sc_journal_previous_id;
 	md.md_flags = 0;
 	if (sc->sc_flags & GJF_DEVICE_CLEAN)
 		md.md_flags |= GJ_FLAG_CLEAN;
 
 	if (sc->sc_flags & GJF_DEVICE_HARDCODED)
 		strlcpy(md.md_provider, sc->sc_name, sizeof(md.md_provider));
 	else
 		bzero(md.md_provider, sizeof(md.md_provider));
 	md.md_provsize = cp->provider->mediasize;
 	journal_metadata_encode(&md, sector);
 
 	/*
 	 * Flush the cache, so we know all data are on disk.
 	 * We write here informations like "journal is consistent", so we need
 	 * to be sure it is. Without BIO_FLUSH here, we can end up in situation
 	 * where metadata is stored on disk, but not all data.
 	 */
 	g_journal_flush_cache(sc);
 
 	bp = g_alloc_bio();
 	bp->bio_offset = cp->provider->mediasize - cp->provider->sectorsize;
 	bp->bio_length = cp->provider->sectorsize;
 	bp->bio_data = sector;
 	bp->bio_cmd = BIO_WRITE;
 	if (!(sc->sc_flags & GJF_DEVICE_DESTROY)) {
 		bp->bio_done = g_journal_metadata_done;
 		g_io_request(bp, cp);
 	} else {
 		bp->bio_done = NULL;
 		g_io_request(bp, cp);
 		biowait(bp, "gjmdu");
 		g_journal_metadata_done(bp);
 	}
 
 	/*
 	 * Be sure metadata reached the disk.
 	 */
 	g_journal_flush_cache(sc);
 }
 
 /*
  * This is where the I/O request comes from the GEOM.
  */
 static void
 g_journal_start(struct bio *bp)
 {
 	struct g_journal_softc *sc;
 
 	sc = bp->bio_to->geom->softc;
 	GJ_LOGREQ(3, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 		mtx_lock(&sc->sc_mtx);
 		bioq_insert_tail(&sc->sc_regular_queue, bp);
 		wakeup(sc);
 		mtx_unlock(&sc->sc_mtx);
 		return;
 	case BIO_GETATTR:
 		if (strcmp(bp->bio_attribute, "GJOURNAL::provider") == 0) {
 			strlcpy(bp->bio_data, bp->bio_to->name, bp->bio_length);
 			bp->bio_completed = strlen(bp->bio_to->name) + 1;
 			g_io_deliver(bp, 0);
 			return;
 		}
 		/* FALLTHROUGH */
 	case BIO_SPEEDUP:
 	case BIO_DELETE:
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 }
 
 static void
 g_journal_std_done(struct bio *bp)
 {
 	struct g_journal_softc *sc;
 
 	sc = bp->bio_from->geom->softc;
 	mtx_lock(&sc->sc_mtx);
 	bioq_insert_tail(&sc->sc_back_queue, bp);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static struct bio *
 g_journal_new_bio(off_t start, off_t end, off_t joffset, u_char *data,
     int flags)
 {
 	struct bio *bp;
 
 	bp = g_alloc_bio();
 	bp->bio_offset = start;
 	bp->bio_joffset = joffset;
 	bp->bio_length = end - start;
 	bp->bio_cmd = BIO_WRITE;
 	bp->bio_done = g_journal_std_done;
 	if (data == NULL)
 		bp->bio_data = NULL;
 	else {
 		bp->bio_data = gj_malloc(bp->bio_length, flags);
 		if (bp->bio_data != NULL)
 			bcopy(data, bp->bio_data, bp->bio_length);
 	}
 	return (bp);
 }
 
 #define	g_journal_insert_bio(head, bp, flags)				\
 	g_journal_insert((head), (bp)->bio_offset,			\
 		(bp)->bio_offset + (bp)->bio_length, (bp)->bio_joffset,	\
 		(bp)->bio_data, flags)
 /*
  * The function below does a lot more than just inserting bio to the queue.
  * It keeps the queue sorted by offset and ensures that there are no doubled
  * data (it combines bios where ranges overlap).
  *
  * The function returns the number of bios inserted (as bio can be splitted).
  */
 static int
 g_journal_insert(struct bio **head, off_t nstart, off_t nend, off_t joffset,
     u_char *data, int flags)
 {
 	struct bio *nbp, *cbp, *pbp;
 	off_t cstart, cend;
 	u_char *tmpdata;
 	int n;
 
 	GJ_DEBUG(3, "INSERT(%p): (%jd, %jd, %jd)", *head, nstart, nend,
 	    joffset);
 	n = 0;
 	pbp = NULL;
 	GJQ_FOREACH(*head, cbp) {
 		cstart = cbp->bio_offset;
 		cend = cbp->bio_offset + cbp->bio_length;
 
 		if (nstart >= cend) {
 			/*
 			 *  +-------------+
 			 *  |             |
 			 *  |   current   |  +-------------+
 			 *  |     bio     |  |             |
 			 *  |             |  |     new     |
 			 *  +-------------+  |     bio     |
 			 *                   |             |
 			 *                   +-------------+
 			 */
 			GJ_DEBUG(3, "INSERT(%p): 1", *head);
 		} else if (nend <= cstart) {
 			/*
 			 *                   +-------------+
 			 *                   |             |
 			 *  +-------------+  |   current   |
 			 *  |             |  |     bio     |
 			 *  |     new     |  |             |
 			 *  |     bio     |  +-------------+
 			 *  |             |
 			 *  +-------------+
 			 */
 			nbp = g_journal_new_bio(nstart, nend, joffset, data,
 			    flags);
 			if (pbp == NULL)
 				*head = nbp;
 			else
 				pbp->bio_next = nbp;
 			nbp->bio_next = cbp;
 			n++;
 			GJ_DEBUG(3, "INSERT(%p): 2 (nbp=%p pbp=%p)", *head, nbp,
 			    pbp);
 			goto end;
 		} else if (nstart <= cstart && nend >= cend) {
 			/*
 			 *      +-------------+      +-------------+
 			 *      | current bio |      | current bio |
 			 *  +---+-------------+---+  +-------------+---+
 			 *  |   |             |   |  |             |   |
 			 *  |   |             |   |  |             |   |
 			 *  |   +-------------+   |  +-------------+   |
 			 *  |       new bio       |  |     new bio     |
 			 *  +---------------------+  +-----------------+
 			 *
 			 *      +-------------+  +-------------+
 			 *      | current bio |  | current bio |
 			 *  +---+-------------+  +-------------+
 			 *  |   |             |  |             |
 			 *  |   |             |  |             |
 			 *  |   +-------------+  +-------------+
 			 *  |     new bio     |  |   new bio   |
 			 *  +-----------------+  +-------------+
 			 */
 			g_journal_stats_bytes_skipped += cbp->bio_length;
 			cbp->bio_offset = nstart;
 			cbp->bio_joffset = joffset;
 			cbp->bio_length = cend - nstart;
 			if (cbp->bio_data != NULL) {
 				gj_free(cbp->bio_data, cend - cstart);
 				cbp->bio_data = NULL;
 			}
 			if (data != NULL) {
 				cbp->bio_data = gj_malloc(cbp->bio_length,
 				    flags);
 				if (cbp->bio_data != NULL) {
 					bcopy(data, cbp->bio_data,
 					    cbp->bio_length);
 				}
 				data += cend - nstart;
 			}
 			joffset += cend - nstart;
 			nstart = cend;
 			GJ_DEBUG(3, "INSERT(%p): 3 (cbp=%p)", *head, cbp);
 		} else if (nstart > cstart && nend >= cend) {
 			/*
 			 *  +-----------------+  +-------------+
 			 *  |   current bio   |  | current bio |
 			 *  |   +-------------+  |   +---------+---+
 			 *  |   |             |  |   |         |   |
 			 *  |   |             |  |   |         |   |
 			 *  +---+-------------+  +---+---------+   |
 			 *      |   new bio   |      |   new bio   |
 			 *      +-------------+      +-------------+
 			 */
 			g_journal_stats_bytes_skipped += cend - nstart;
 			nbp = g_journal_new_bio(nstart, cend, joffset, data,
 			    flags);
 			nbp->bio_next = cbp->bio_next;
 			cbp->bio_next = nbp;
 			cbp->bio_length = nstart - cstart;
 			if (cbp->bio_data != NULL) {
 				cbp->bio_data = gj_realloc(cbp->bio_data,
 				    cbp->bio_length, cend - cstart);
 			}
 			if (data != NULL)
 				data += cend - nstart;
 			joffset += cend - nstart;
 			nstart = cend;
 			n++;
 			GJ_DEBUG(3, "INSERT(%p): 4 (cbp=%p)", *head, cbp);
 		} else if (nstart > cstart && nend < cend) {
 			/*
 			 *  +---------------------+
 			 *  |     current bio     |
 			 *  |   +-------------+   |
 			 *  |   |             |   |
 			 *  |   |             |   |
 			 *  +---+-------------+---+
 			 *      |   new bio   |
 			 *      +-------------+
 			 */
 			g_journal_stats_bytes_skipped += nend - nstart;
 			nbp = g_journal_new_bio(nstart, nend, joffset, data,
 			    flags);
 			nbp->bio_next = cbp->bio_next;
 			cbp->bio_next = nbp;
 			if (cbp->bio_data == NULL)
 				tmpdata = NULL;
 			else
 				tmpdata = cbp->bio_data + nend - cstart;
 			nbp = g_journal_new_bio(nend, cend,
 			    cbp->bio_joffset + nend - cstart, tmpdata, flags);
 			nbp->bio_next = ((struct bio *)cbp->bio_next)->bio_next;
 			((struct bio *)cbp->bio_next)->bio_next = nbp;
 			cbp->bio_length = nstart - cstart;
 			if (cbp->bio_data != NULL) {
 				cbp->bio_data = gj_realloc(cbp->bio_data,
 				    cbp->bio_length, cend - cstart);
 			}
 			n += 2;
 			GJ_DEBUG(3, "INSERT(%p): 5 (cbp=%p)", *head, cbp);
 			goto end;
 		} else if (nstart <= cstart && nend < cend) {
 			/*
 			 *  +-----------------+      +-------------+
 			 *  |   current bio   |      | current bio |
 			 *  +-------------+   |  +---+---------+   |
 			 *  |             |   |  |   |         |   |
 			 *  |             |   |  |   |         |   |
 			 *  +-------------+---+  |   +---------+---+
 			 *  |   new bio   |      |   new bio   |
 			 *  +-------------+      +-------------+
 			 */
 			g_journal_stats_bytes_skipped += nend - nstart;
 			nbp = g_journal_new_bio(nstart, nend, joffset, data,
 			    flags);
 			if (pbp == NULL)
 				*head = nbp;
 			else
 				pbp->bio_next = nbp;
 			nbp->bio_next = cbp;
 			cbp->bio_offset = nend;
 			cbp->bio_length = cend - nend;
 			cbp->bio_joffset += nend - cstart;
 			tmpdata = cbp->bio_data;
 			if (tmpdata != NULL) {
 				cbp->bio_data = gj_malloc(cbp->bio_length,
 				    flags);
 				if (cbp->bio_data != NULL) {
 					bcopy(tmpdata + nend - cstart,
 					    cbp->bio_data, cbp->bio_length);
 				}
 				gj_free(tmpdata, cend - cstart);
 			}
 			n++;
 			GJ_DEBUG(3, "INSERT(%p): 6 (cbp=%p)", *head, cbp);
 			goto end;
 		}
 		if (nstart == nend)
 			goto end;
 		pbp = cbp;
 	}
 	nbp = g_journal_new_bio(nstart, nend, joffset, data, flags);
 	if (pbp == NULL)
 		*head = nbp;
 	else
 		pbp->bio_next = nbp;
 	nbp->bio_next = NULL;
 	n++;
 	GJ_DEBUG(3, "INSERT(%p): 8 (nbp=%p pbp=%p)", *head, nbp, pbp);
 end:
 	if (g_journal_debug >= 3) {
 		GJQ_FOREACH(*head, cbp) {
 			GJ_DEBUG(3, "ELEMENT: %p (%jd, %jd, %jd, %p)", cbp,
 			    (intmax_t)cbp->bio_offset,
 			    (intmax_t)cbp->bio_length,
 			    (intmax_t)cbp->bio_joffset, cbp->bio_data);
 		}
 		GJ_DEBUG(3, "INSERT(%p): DONE %d", *head, n);
 	}
 	return (n);
 }
 
 /*
  * The function combines neighbour bios trying to squeeze as much data as
  * possible into one bio.
  *
  * The function returns the number of bios combined (negative value).
  */
 static int
 g_journal_optimize(struct bio *head)
 {
 	struct bio *cbp, *pbp;
 	int n;
 
 	n = 0;
 	pbp = NULL;
 	GJQ_FOREACH(head, cbp) {
 		/* Skip bios which has to be read first. */
 		if (cbp->bio_data == NULL) {
 			pbp = NULL;
 			continue;
 		}
 		/* There is no previous bio yet. */
 		if (pbp == NULL) {
 			pbp = cbp;
 			continue;
 		}
 		/* Is this a neighbour bio? */
 		if (pbp->bio_offset + pbp->bio_length != cbp->bio_offset) {
 			/* Be sure that bios queue is sorted. */
 			KASSERT(pbp->bio_offset + pbp->bio_length < cbp->bio_offset,
 			    ("poffset=%jd plength=%jd coffset=%jd",
 			    (intmax_t)pbp->bio_offset,
 			    (intmax_t)pbp->bio_length,
 			    (intmax_t)cbp->bio_offset));
 			pbp = cbp;
 			continue;
 		}
 		/* Be sure we don't end up with too big bio. */
 		if (pbp->bio_length + cbp->bio_length > maxphys) {
 			pbp = cbp;
 			continue;
 		}
 		/* Ok, we can join bios. */
 		GJ_LOGREQ(4, pbp, "Join: ");
 		GJ_LOGREQ(4, cbp, "and: ");
 		pbp->bio_data = gj_realloc(pbp->bio_data,
 		    pbp->bio_length + cbp->bio_length, pbp->bio_length);
 		bcopy(cbp->bio_data, pbp->bio_data + pbp->bio_length,
 		    cbp->bio_length);
 		gj_free(cbp->bio_data, cbp->bio_length);
 		pbp->bio_length += cbp->bio_length;
 		pbp->bio_next = cbp->bio_next;
 		g_destroy_bio(cbp);
 		cbp = pbp;
 		g_journal_stats_combined_ios++;
 		n--;
 		GJ_LOGREQ(4, pbp, "Got: ");
 	}
 	return (n);
 }
 
 /*
  * TODO: Update comment.
  * These are functions responsible for copying one portion of data from journal
  * to the destination provider.
  * The order goes like this:
  * 1. Read the header, which contains informations about data blocks
  *    following it.
  * 2. Read the data blocks from the journal.
  * 3. Write the data blocks on the data provider.
  *
  * g_journal_copy_start()
  * g_journal_copy_done() - got finished write request, logs potential errors.
  */
 
 /*
  * When there is no data in cache, this function is used to read it.
  */
 static void
 g_journal_read_first(struct g_journal_softc *sc, struct bio *bp)
 {
 	struct bio *cbp;
 
 	/*
 	 * We were short in memory, so data was freed.
 	 * In that case we need to read it back from journal.
 	 */
 	cbp = g_alloc_bio();
 	cbp->bio_cflags = bp->bio_cflags;
 	cbp->bio_parent = bp;
 	cbp->bio_offset = bp->bio_joffset;
 	cbp->bio_length = bp->bio_length;
 	cbp->bio_data = gj_malloc(bp->bio_length, M_WAITOK);
 	cbp->bio_cmd = BIO_READ;
 	cbp->bio_done = g_journal_std_done;
 	GJ_LOGREQ(4, cbp, "READ FIRST");
 	g_io_request(cbp, sc->sc_jconsumer);
 	g_journal_cache_misses++;
 }
 
 static void
 g_journal_copy_send(struct g_journal_softc *sc)
 {
 	struct bio *bioq, *bp, *lbp;
 
 	bioq = lbp = NULL;
 	mtx_lock(&sc->sc_mtx);
 	for (; sc->sc_copy_in_progress < g_journal_parallel_copies;) {
 		bp = GJQ_FIRST(sc->sc_inactive.jj_queue);
 		if (bp == NULL)
 			break;
 		GJQ_REMOVE(sc->sc_inactive.jj_queue, bp);
 		sc->sc_copy_in_progress++;
 		GJQ_INSERT_AFTER(bioq, bp, lbp);
 		lbp = bp;
 	}
 	mtx_unlock(&sc->sc_mtx);
 	if (g_journal_do_optimize)
 		sc->sc_copy_in_progress += g_journal_optimize(bioq);
 	while ((bp = GJQ_FIRST(bioq)) != NULL) {
 		GJQ_REMOVE(bioq, bp);
 		GJQ_INSERT_HEAD(sc->sc_copy_queue, bp);
 		bp->bio_cflags = GJ_BIO_COPY;
 		if (bp->bio_data == NULL)
 			g_journal_read_first(sc, bp);
 		else {
 			bp->bio_joffset = 0;
 			GJ_LOGREQ(4, bp, "SEND");
 			g_io_request(bp, sc->sc_dconsumer);
 		}
 	}
 }
 
 static void
 g_journal_copy_start(struct g_journal_softc *sc)
 {
 
 	/*
 	 * Remember in metadata that we're starting to copy journaled data
 	 * to the data provider.
 	 * In case of power failure, we will copy these data once again on boot.
 	 */
 	if (!sc->sc_journal_copying) {
 		sc->sc_journal_copying = 1;
 		GJ_DEBUG(1, "Starting copy of journal.");
 		g_journal_metadata_update(sc);
 	}
 	g_journal_copy_send(sc);
 }
 
 /*
  * Data block has been read from the journal provider.
  */
 static int
 g_journal_copy_read_done(struct bio *bp)
 {
 	struct g_journal_softc *sc;
 	struct g_consumer *cp;
 	struct bio *pbp;
 
 	KASSERT(bp->bio_cflags == GJ_BIO_COPY,
 	    ("Invalid bio (%d != %d).", bp->bio_cflags, GJ_BIO_COPY));
 
 	sc = bp->bio_from->geom->softc;
 	pbp = bp->bio_parent;
 
 	if (bp->bio_error != 0) {
 		GJ_DEBUG(0, "Error while reading data from %s (error=%d).",
 		    bp->bio_to->name, bp->bio_error);
 		/*
 		 * We will not be able to deliver WRITE request as well.
 		 */
 		gj_free(bp->bio_data, bp->bio_length);
 		g_destroy_bio(pbp);
 		g_destroy_bio(bp);
 		sc->sc_copy_in_progress--;
 		return (1);
 	}
 	pbp->bio_data = bp->bio_data;
 	cp = sc->sc_dconsumer;
 	g_io_request(pbp, cp);
 	GJ_LOGREQ(4, bp, "READ DONE");
 	g_destroy_bio(bp);
 	return (0);
 }
 
 /*
  * Data block has been written to the data provider.
  */
 static void
 g_journal_copy_write_done(struct bio *bp)
 {
 	struct g_journal_softc *sc;
 
 	KASSERT(bp->bio_cflags == GJ_BIO_COPY,
 	    ("Invalid bio (%d != %d).", bp->bio_cflags, GJ_BIO_COPY));
 
 	sc = bp->bio_from->geom->softc;
 	sc->sc_copy_in_progress--;
 
 	if (bp->bio_error != 0) {
 		GJ_LOGREQ(0, bp, "[copy] Error while writing data (error=%d)",
 		    bp->bio_error);
 	}
 	GJQ_REMOVE(sc->sc_copy_queue, bp);
 	gj_free(bp->bio_data, bp->bio_length);
 	GJ_LOGREQ(4, bp, "DONE");
 	g_destroy_bio(bp);
 
 	if (sc->sc_copy_in_progress == 0) {
 		/*
 		 * This was the last write request for this journal.
 		 */
 		GJ_DEBUG(1, "Data has been copied.");
 		sc->sc_journal_copying = 0;
 	}
 }
 
 static void g_journal_flush_done(struct bio *bp);
 
 /*
  * Flush one record onto active journal provider.
  */
 static void
 g_journal_flush(struct g_journal_softc *sc)
 {
 	struct g_journal_record_header hdr;
 	struct g_journal_entry *ent;
 	struct g_provider *pp;
 	struct bio **bioq;
 	struct bio *bp, *fbp, *pbp;
 	off_t joffset;
 	u_char *data, hash[16];
 	MD5_CTX ctx;
 	u_int i;
 
 	if (sc->sc_current_count == 0)
 		return;
 
 	pp = sc->sc_jprovider;
 	GJ_VALIDATE_OFFSET(sc->sc_journal_offset, sc);
 	joffset = sc->sc_journal_offset;
 
 	GJ_DEBUG(2, "Storing %d journal entries on %s at %jd.",
 	    sc->sc_current_count, pp->name, (intmax_t)joffset);
 
 	/*
 	 * Store 'journal id', so we know to which journal this record belongs.
 	 */
 	hdr.jrh_journal_id = sc->sc_journal_id;
 	/* Could be less than g_journal_record_entries if called due timeout. */
 	hdr.jrh_nentries = MIN(sc->sc_current_count, g_journal_record_entries);
 	strlcpy(hdr.jrh_magic, GJ_RECORD_HEADER_MAGIC, sizeof(hdr.jrh_magic));
 
 	bioq = &sc->sc_active.jj_queue;
 	GJQ_LAST(sc->sc_flush_queue, pbp);
 
 	fbp = g_alloc_bio();
 	fbp->bio_parent = NULL;
 	fbp->bio_cflags = GJ_BIO_JOURNAL;
 	fbp->bio_offset = -1;
 	fbp->bio_joffset = joffset;
 	fbp->bio_length = pp->sectorsize;
 	fbp->bio_cmd = BIO_WRITE;
 	fbp->bio_done = g_journal_std_done;
 	GJQ_INSERT_AFTER(sc->sc_flush_queue, fbp, pbp);
 	pbp = fbp;
 	fbp->bio_to = pp;
 	GJ_LOGREQ(4, fbp, "FLUSH_OUT");
 	joffset += pp->sectorsize;
 	sc->sc_flush_count++;
 	if (sc->sc_flags & GJF_DEVICE_CHECKSUM)
 		MD5Init(&ctx);
 
 	for (i = 0; i < hdr.jrh_nentries; i++) {
 		bp = sc->sc_current_queue;
 		KASSERT(bp != NULL, ("NULL bp"));
 		bp->bio_to = pp;
 		GJ_LOGREQ(4, bp, "FLUSHED");
 		sc->sc_current_queue = bp->bio_next;
 		bp->bio_next = NULL;
 		sc->sc_current_count--;
 
 		/* Add to the header. */
 		ent = &hdr.jrh_entries[i];
 		ent->je_offset = bp->bio_offset;
 		ent->je_joffset = joffset;
 		ent->je_length = bp->bio_length;
 
 		data = bp->bio_data;
 		if (sc->sc_flags & GJF_DEVICE_CHECKSUM)
 			MD5Update(&ctx, data, ent->je_length);
 		g_reset_bio(bp);
 		bp->bio_cflags = GJ_BIO_JOURNAL;
 		bp->bio_offset = ent->je_offset;
 		bp->bio_joffset = ent->je_joffset;
 		bp->bio_length = ent->je_length;
 		bp->bio_data = data;
 		bp->bio_cmd = BIO_WRITE;
 		bp->bio_done = g_journal_std_done;
 		GJQ_INSERT_AFTER(sc->sc_flush_queue, bp, pbp);
 		pbp = bp;
 		bp->bio_to = pp;
 		GJ_LOGREQ(4, bp, "FLUSH_OUT");
 		joffset += bp->bio_length;
 		sc->sc_flush_count++;
 
 		/*
 		 * Add request to the active sc_journal_queue queue.
 		 * This is our cache. After journal switch we don't have to
 		 * read the data from the inactive journal, because we keep
 		 * it in memory.
 		 */
 		g_journal_insert(bioq, ent->je_offset,
 		    ent->je_offset + ent->je_length, ent->je_joffset, data,
 		    M_NOWAIT);
 	}
 
 	/*
 	 * After all requests, store valid header.
 	 */
 	data = gj_malloc(pp->sectorsize, M_WAITOK);
 	if (sc->sc_flags & GJF_DEVICE_CHECKSUM) {
 		MD5Final(hash, &ctx);
 		bcopy(hash, hdr.jrh_sum, sizeof(hdr.jrh_sum));
 	}
 	g_journal_record_header_encode(&hdr, data);
 	fbp->bio_data = data;
 
 	sc->sc_journal_offset = joffset;
 
 	g_journal_check_overflow(sc);
 }
 
 /*
  * Flush request finished.
  */
 static void
 g_journal_flush_done(struct bio *bp)
 {
 	struct g_journal_softc *sc;
 	struct g_consumer *cp;
 
 	KASSERT((bp->bio_cflags & GJ_BIO_MASK) == GJ_BIO_JOURNAL,
 	    ("Invalid bio (%d != %d).", bp->bio_cflags, GJ_BIO_JOURNAL));
 
 	cp = bp->bio_from;
 	sc = cp->geom->softc;
 	sc->sc_flush_in_progress--;
 
 	if (bp->bio_error != 0) {
 		GJ_LOGREQ(0, bp, "[flush] Error while writing data (error=%d)",
 		    bp->bio_error);
 	}
 	gj_free(bp->bio_data, bp->bio_length);
 	GJ_LOGREQ(4, bp, "DONE");
 	g_destroy_bio(bp);
 }
 
 static void g_journal_release_delayed(struct g_journal_softc *sc);
 
 static void
 g_journal_flush_send(struct g_journal_softc *sc)
 {
 	struct g_consumer *cp;
 	struct bio *bioq, *bp, *lbp;
 
 	cp = sc->sc_jconsumer;
 	bioq = lbp = NULL;
 	while (sc->sc_flush_in_progress < g_journal_parallel_flushes) {
 		/* Send one flush requests to the active journal. */
 		bp = GJQ_FIRST(sc->sc_flush_queue);
 		if (bp != NULL) {
 			GJQ_REMOVE(sc->sc_flush_queue, bp);
 			sc->sc_flush_count--;
 			bp->bio_offset = bp->bio_joffset;
 			bp->bio_joffset = 0;
 			sc->sc_flush_in_progress++;
 			GJQ_INSERT_AFTER(bioq, bp, lbp);
 			lbp = bp;
 		}
 		/* Try to release delayed requests. */
 		g_journal_release_delayed(sc);
 		/* If there are no requests to flush, leave. */
 		if (GJQ_FIRST(sc->sc_flush_queue) == NULL)
 			break;
 	}
 	if (g_journal_do_optimize)
 		sc->sc_flush_in_progress += g_journal_optimize(bioq);
 	while ((bp = GJQ_FIRST(bioq)) != NULL) {
 		GJQ_REMOVE(bioq, bp);
 		GJ_LOGREQ(3, bp, "Flush request send");
 		g_io_request(bp, cp);
 	}
 }
 
 static void
 g_journal_add_current(struct g_journal_softc *sc, struct bio *bp)
 {
 	int n;
 
 	GJ_LOGREQ(4, bp, "CURRENT %d", sc->sc_current_count);
 	n = g_journal_insert_bio(&sc->sc_current_queue, bp, M_WAITOK);
 	sc->sc_current_count += n;
 	n = g_journal_optimize(sc->sc_current_queue);
 	sc->sc_current_count += n;
 	/*
 	 * For requests which are added to the current queue we deliver
 	 * response immediately.
 	 */
 	bp->bio_completed = bp->bio_length;
 	g_io_deliver(bp, 0);
 	if (sc->sc_current_count >= g_journal_record_entries) {
 		/*
 		 * Let's flush one record onto active journal provider.
 		 */
 		g_journal_flush(sc);
 	}
 }
 
 static void
 g_journal_release_delayed(struct g_journal_softc *sc)
 {
 	struct bio *bp;
 
 	for (;;) {
 		/* The flush queue is full, exit. */
 		if (sc->sc_flush_count >= g_journal_accept_immediately)
 			return;
 		bp = bioq_takefirst(&sc->sc_delayed_queue);
 		if (bp == NULL)
 			return;
 		sc->sc_delayed_count--;
 		g_journal_add_current(sc, bp);
 	}
 }
 
 /*
  * Add I/O request to the current queue. If we have enough requests for one
  * journal record we flush them onto active journal provider.
  */
 static void
 g_journal_add_request(struct g_journal_softc *sc, struct bio *bp)
 {
 
 	/*
 	 * The flush queue is full, we need to delay the request.
 	 */
 	if (sc->sc_delayed_count > 0 ||
 	    sc->sc_flush_count >= g_journal_accept_immediately) {
 		GJ_LOGREQ(4, bp, "DELAYED");
 		bioq_insert_tail(&sc->sc_delayed_queue, bp);
 		sc->sc_delayed_count++;
 		return;
 	}
 
 	KASSERT(TAILQ_EMPTY(&sc->sc_delayed_queue.queue),
 	    ("DELAYED queue not empty."));
 	g_journal_add_current(sc, bp);
 }
 
 static void g_journal_read_done(struct bio *bp);
 
 /*
  * Try to find requested data in cache.
  */
 static struct bio *
 g_journal_read_find(struct bio *head, int sorted, struct bio *pbp, off_t ostart,
     off_t oend)
 {
 	off_t cstart, cend;
 	struct bio *bp;
 
 	GJQ_FOREACH(head, bp) {
 		if (bp->bio_offset == -1)
 			continue;
 		cstart = MAX(ostart, bp->bio_offset);
 		cend = MIN(oend, bp->bio_offset + bp->bio_length);
 		if (cend <= ostart)
 			continue;
 		else if (cstart >= oend) {
 			if (!sorted)
 				continue;
 			else {
 				bp = NULL;
 				break;
 			}
 		}
 		if (bp->bio_data == NULL)
 			break;
 		GJ_DEBUG(3, "READ(%p): (%jd, %jd) (bp=%p)", head, cstart, cend,
 		    bp);
 		bcopy(bp->bio_data + cstart - bp->bio_offset,
 		    pbp->bio_data + cstart - pbp->bio_offset, cend - cstart);
 		pbp->bio_completed += cend - cstart;
 		if (pbp->bio_completed == pbp->bio_length) {
 			/*
 			 * Cool, the whole request was in cache, deliver happy
 			 * message.
 			 */
 			g_io_deliver(pbp, 0);
 			return (pbp);
 		}
 		break;
 	}
 	return (bp);
 }
 
 /*
  * This function is used for collecting data on read.
  * The complexity is because parts of the data can be stored in four different
  * places:
  * - in memory - the data not yet send to the active journal provider
  * - in the active journal
  * - in the inactive journal
  * - in the data provider
  */
 static void
 g_journal_read(struct g_journal_softc *sc, struct bio *pbp, off_t ostart,
     off_t oend)
 {
 	struct bio *bp, *nbp, *head;
 	off_t cstart, cend;
 	u_int i, sorted = 0;
 
 	GJ_DEBUG(3, "READ: (%jd, %jd)", ostart, oend);
 
 	cstart = cend = -1;
 	bp = NULL;
 	head = NULL;
 	for (i = 1; i <= 5; i++) {
 		switch (i) {
 		case 1:	/* Not-yet-send data. */
 			head = sc->sc_current_queue;
 			sorted = 1;
 			break;
 		case 2: /* Skip flush queue as they are also in active queue */
 			continue;
 		case 3:	/* Active journal. */
 			head = sc->sc_active.jj_queue;
 			sorted = 1;
 			break;
 		case 4:	/* Inactive journal. */
 			/*
 			 * XXX: Here could be a race with g_journal_lowmem().
 			 */
 			head = sc->sc_inactive.jj_queue;
 			sorted = 1;
 			break;
 		case 5:	/* In-flight to the data provider. */
 			head = sc->sc_copy_queue;
 			sorted = 0;
 			break;
 		default:
 			panic("gjournal %s: i=%d", __func__, i);
 		}
 		bp = g_journal_read_find(head, sorted, pbp, ostart, oend);
 		if (bp == pbp) { /* Got the whole request. */
 			GJ_DEBUG(2, "Got the whole request from %u.", i);
 			return;
 		} else if (bp != NULL) {
 			cstart = MAX(ostart, bp->bio_offset);
 			cend = MIN(oend, bp->bio_offset + bp->bio_length);
 			GJ_DEBUG(2, "Got part of the request from %u (%jd-%jd).",
 			    i, (intmax_t)cstart, (intmax_t)cend);
 			break;
 		}
 	}
 	if (bp != NULL) {
 		if (bp->bio_data == NULL) {
 			nbp = g_duplicate_bio(pbp);
 			nbp->bio_cflags = GJ_BIO_READ;
 			nbp->bio_data =
 			    pbp->bio_data + cstart - pbp->bio_offset;
 			nbp->bio_offset =
 			    bp->bio_joffset + cstart - bp->bio_offset;
 			nbp->bio_length = cend - cstart;
 			nbp->bio_done = g_journal_read_done;
 			g_io_request(nbp, sc->sc_jconsumer);
 		}
 		/*
 		 * If we don't have the whole request yet, call g_journal_read()
 		 * recursively.
 		 */
 		if (ostart < cstart)
 			g_journal_read(sc, pbp, ostart, cstart);
 		if (oend > cend)
 			g_journal_read(sc, pbp, cend, oend);
 	} else {
 		/*
 		 * No data in memory, no data in journal.
 		 * Its time for asking data provider.
 		 */
 		GJ_DEBUG(3, "READ(data): (%jd, %jd)", ostart, oend);
 		nbp = g_duplicate_bio(pbp);
 		nbp->bio_cflags = GJ_BIO_READ;
 		nbp->bio_data = pbp->bio_data + ostart - pbp->bio_offset;
 		nbp->bio_offset = ostart;
 		nbp->bio_length = oend - ostart;
 		nbp->bio_done = g_journal_read_done;
 		g_io_request(nbp, sc->sc_dconsumer);
 		/* We have the whole request, return here. */
 		return;
 	}
 }
 
 /*
  * Function responsible for handling finished READ requests.
  * Actually, g_std_done() could be used here, the only difference is that we
  * log error.
  */
 static void
 g_journal_read_done(struct bio *bp)
 {
 	struct bio *pbp;
 
 	KASSERT(bp->bio_cflags == GJ_BIO_READ,
 	    ("Invalid bio (%d != %d).", bp->bio_cflags, GJ_BIO_READ));
 
 	pbp = bp->bio_parent;
 	pbp->bio_inbed++;
 	pbp->bio_completed += bp->bio_length;
 
 	if (bp->bio_error != 0) {
 		if (pbp->bio_error == 0)
 			pbp->bio_error = bp->bio_error;
 		GJ_DEBUG(0, "Error while reading data from %s (error=%d).",
 		    bp->bio_to->name, bp->bio_error);
 	}
 	g_destroy_bio(bp);
 	if (pbp->bio_children == pbp->bio_inbed &&
 	    pbp->bio_completed == pbp->bio_length) {
 		/* We're done. */
 		g_io_deliver(pbp, 0);
 	}
 }
 
 /*
  * Deactive current journal and active next one.
  */
 static void
 g_journal_switch(struct g_journal_softc *sc)
 {
 	struct g_provider *pp;
 
 	if (JEMPTY(sc)) {
 		GJ_DEBUG(3, "No need for %s switch.", sc->sc_name);
 		pp = LIST_FIRST(&sc->sc_geom->provider);
 		if (!(sc->sc_flags & GJF_DEVICE_CLEAN) && pp->acw == 0) {
 			sc->sc_flags |= GJF_DEVICE_CLEAN;
 			GJ_DEBUG(1, "Marking %s as clean.", sc->sc_name);
 			g_journal_metadata_update(sc);
 		}
 	} else {
 		GJ_DEBUG(3, "Switching journal %s.", sc->sc_geom->name);
 
 		pp = sc->sc_jprovider;
 
 		sc->sc_journal_previous_id = sc->sc_journal_id;
 
 		sc->sc_journal_id = sc->sc_journal_next_id;
 		sc->sc_journal_next_id = arc4random();
 
 		GJ_VALIDATE_OFFSET(sc->sc_journal_offset, sc);
 
 		g_journal_write_header(sc);
 
 		sc->sc_inactive.jj_offset = sc->sc_active.jj_offset;
 		sc->sc_inactive.jj_queue = sc->sc_active.jj_queue;
 
 		sc->sc_active.jj_offset =
 		    sc->sc_journal_offset - pp->sectorsize;
 		sc->sc_active.jj_queue = NULL;
 
 		/*
 		 * Switch is done, start copying data from the (now) inactive
 		 * journal to the data provider.
 		 */
 		g_journal_copy_start(sc);
 	}
 	mtx_lock(&sc->sc_mtx);
 	sc->sc_flags &= ~GJF_DEVICE_SWITCH;
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_journal_initialize(struct g_journal_softc *sc)
 {
 
 	sc->sc_journal_id = arc4random();
 	sc->sc_journal_next_id = arc4random();
 	sc->sc_journal_previous_id = sc->sc_journal_id;
 	sc->sc_journal_offset = sc->sc_jstart;
 	sc->sc_inactive.jj_offset = sc->sc_jstart;
 	g_journal_write_header(sc);
 	sc->sc_active.jj_offset = sc->sc_jstart;
 }
 
 static void
 g_journal_mark_as_dirty(struct g_journal_softc *sc)
 {
 	const struct g_journal_desc *desc;
 	int i;
 
 	GJ_DEBUG(1, "Marking file system %s as dirty.", sc->sc_name);
 	for (i = 0; (desc = g_journal_filesystems[i]) != NULL; i++)
 		desc->jd_dirty(sc->sc_dconsumer);
 }
 
 /*
  * Function read record header from the given journal.
  * It is very simlar to g_read_data(9), but it doesn't allocate memory for bio
  * and data on every call.
  */
 static int
 g_journal_sync_read(struct g_consumer *cp, struct bio *bp, off_t offset,
     void *data)
 {
 	int error;
 
 	g_reset_bio(bp);
 	bp->bio_cmd = BIO_READ;
 	bp->bio_done = NULL;
 	bp->bio_offset = offset;
 	bp->bio_length = cp->provider->sectorsize;
 	bp->bio_data = data;
 	g_io_request(bp, cp);
 	error = biowait(bp, "gjs_read");
 	return (error);
 }
 
 #if 0
 /*
  * Function is called when we start the journal device and we detect that
  * one of the journals was not fully copied.
  * The purpose of this function is to read all records headers from journal
  * and placed them in the inactive queue, so we can start journal
  * synchronization process and the journal provider itself.
  * Design decision was taken to not synchronize the whole journal here as it
  * can take too much time. Reading headers only and delaying synchronization
  * process until after journal provider is started should be the best choice.
  */
 #endif
 
 static void
 g_journal_sync(struct g_journal_softc *sc)
 {
 	struct g_journal_record_header rhdr;
 	struct g_journal_entry *ent;
 	struct g_journal_header jhdr;
 	struct g_consumer *cp;
 	struct bio *bp, *fbp, *tbp;
 	off_t joffset, offset;
 	u_char *buf, sum[16];
 	uint64_t id;
 	MD5_CTX ctx;
 	int error, found, i;
 
 	found = 0;
 	fbp = NULL;
 	cp = sc->sc_jconsumer;
 	bp = g_alloc_bio();
 	buf = gj_malloc(cp->provider->sectorsize, M_WAITOK);
 	offset = joffset = sc->sc_inactive.jj_offset = sc->sc_journal_offset;
 
 	GJ_DEBUG(2, "Looking for termination at %jd.", (intmax_t)joffset);
 
 	/*
 	 * Read and decode first journal header.
 	 */
 	error = g_journal_sync_read(cp, bp, offset, buf);
 	if (error != 0) {
 		GJ_DEBUG(0, "Error while reading journal header from %s.",
 		    cp->provider->name);
 		goto end;
 	}
 	error = g_journal_header_decode(buf, &jhdr);
 	if (error != 0) {
 		GJ_DEBUG(0, "Cannot decode journal header from %s.",
 		    cp->provider->name);
 		goto end;
 	}
 	id = sc->sc_journal_id;
 	if (jhdr.jh_journal_id != sc->sc_journal_id) {
 		GJ_DEBUG(1, "Journal ID mismatch at %jd (0x%08x != 0x%08x).",
 		    (intmax_t)offset, (u_int)jhdr.jh_journal_id, (u_int)id);
 		goto end;
 	}
 	offset += cp->provider->sectorsize;
 	id = sc->sc_journal_next_id = jhdr.jh_journal_next_id;
 
 	for (;;) {
 		/*
 		 * If the biggest record won't fit, look for a record header or
 		 * journal header from the beginning.
 		 */
 		GJ_VALIDATE_OFFSET(offset, sc);
 		error = g_journal_sync_read(cp, bp, offset, buf);
 		if (error != 0) {
 			/*
 			 * Not good. Having an error while reading header
 			 * means, that we cannot read next headers and in
 			 * consequence we cannot find termination.
 			 */
 			GJ_DEBUG(0,
 			    "Error while reading record header from %s.",
 			    cp->provider->name);
 			break;
 		}
 
 		error = g_journal_record_header_decode(buf, &rhdr);
 		if (error != 0) {
 			GJ_DEBUG(2, "Not a record header at %jd (error=%d).",
 			    (intmax_t)offset, error);
 			/*
 			 * This is not a record header.
 			 * If we are lucky, this is next journal header.
 			 */
 			error = g_journal_header_decode(buf, &jhdr);
 			if (error != 0) {
 				GJ_DEBUG(1, "Not a journal header at %jd (error=%d).",
 				    (intmax_t)offset, error);
 				/*
 				 * Nope, this is not journal header, which
 				 * bascially means that journal is not
 				 * terminated properly.
 				 */
 				error = ENOENT;
 				break;
 			}
 			/*
 			 * Ok. This is header of _some_ journal. Now we need to
 			 * verify if this is header of the _next_ journal.
 			 */
 			if (jhdr.jh_journal_id != id) {
 				GJ_DEBUG(1, "Journal ID mismatch at %jd "
 				    "(0x%08x != 0x%08x).", (intmax_t)offset,
 				    (u_int)jhdr.jh_journal_id, (u_int)id);
 				error = ENOENT;
 				break;
 			}
 
 			/* Found termination. */
 			found++;
 			GJ_DEBUG(1, "Found termination at %jd (id=0x%08x).",
 			    (intmax_t)offset, (u_int)id);
 			sc->sc_active.jj_offset = offset;
 			sc->sc_journal_offset =
 			    offset + cp->provider->sectorsize;
 			sc->sc_journal_id = id;
 			id = sc->sc_journal_next_id = jhdr.jh_journal_next_id;
 
 			while ((tbp = fbp) != NULL) {
 				fbp = tbp->bio_next;
 				GJ_LOGREQ(3, tbp, "Adding request.");
 				g_journal_insert_bio(&sc->sc_inactive.jj_queue,
 				    tbp, M_WAITOK);
 			}
 
 			/* Skip journal's header. */
 			offset += cp->provider->sectorsize;
 			continue;
 		}
 
 		/* Skip record's header. */
 		offset += cp->provider->sectorsize;
 
 		/*
 		 * Add information about every record entry to the inactive
 		 * queue.
 		 */
 		if (sc->sc_flags & GJF_DEVICE_CHECKSUM)
 			MD5Init(&ctx);
 		for (i = 0; i < rhdr.jrh_nentries; i++) {
 			ent = &rhdr.jrh_entries[i];
 			GJ_DEBUG(3, "Insert entry: %jd %jd.",
 			    (intmax_t)ent->je_offset, (intmax_t)ent->je_length);
 			g_journal_insert(&fbp, ent->je_offset,
 			    ent->je_offset + ent->je_length, ent->je_joffset,
 			    NULL, M_WAITOK);
 			if (sc->sc_flags & GJF_DEVICE_CHECKSUM) {
 				u_char *buf2;
 
 				/*
 				 * TODO: Should use faster function (like
 				 *       g_journal_sync_read()).
 				 */
 				buf2 = g_read_data(cp, offset, ent->je_length,
 				    NULL);
 				if (buf2 == NULL)
 					GJ_DEBUG(0, "Cannot read data at %jd.",
 					    (intmax_t)offset);
 				else {
 					MD5Update(&ctx, buf2, ent->je_length);
 					g_free(buf2);
 				}
 			}
 			/* Skip entry's data. */
 			offset += ent->je_length;
 		}
 		if (sc->sc_flags & GJF_DEVICE_CHECKSUM) {
 			MD5Final(sum, &ctx);
 			if (bcmp(sum, rhdr.jrh_sum, sizeof(rhdr.jrh_sum)) != 0) {
 				GJ_DEBUG(0, "MD5 hash mismatch at %jd!",
 				    (intmax_t)offset);
 			}
 		}
 	}
 end:
 	gj_free(bp->bio_data, cp->provider->sectorsize);
 	g_destroy_bio(bp);
 
 	/* Remove bios from unterminated journal. */
 	while ((tbp = fbp) != NULL) {
 		fbp = tbp->bio_next;
 		g_destroy_bio(tbp);
 	}
 
 	if (found < 1 && joffset > 0) {
 		GJ_DEBUG(0, "Journal on %s is broken/corrupted. Initializing.",
 		    sc->sc_name);
 		while ((tbp = sc->sc_inactive.jj_queue) != NULL) {
 			sc->sc_inactive.jj_queue = tbp->bio_next;
 			g_destroy_bio(tbp);
 		}
 		g_journal_initialize(sc);
 		g_journal_mark_as_dirty(sc);
 	} else {
 		GJ_DEBUG(0, "Journal %s consistent.", sc->sc_name);
 		g_journal_copy_start(sc);
 	}
 }
 
 /*
  * Wait for requests.
  * If we have requests in the current queue, flush them after 3 seconds from the
  * last flush. In this way we don't wait forever (or for journal switch) with
  * storing not full records on journal.
  */
 static void
 g_journal_wait(struct g_journal_softc *sc, time_t last_write)
 {
 	int error, timeout;
 
 	GJ_DEBUG(3, "%s: enter", __func__);
 	if (sc->sc_current_count == 0) {
 		if (g_journal_debug < 2)
 			msleep(sc, &sc->sc_mtx, PRIBIO | PDROP, "gj:work", 0);
 		else {
 			/*
 			 * If we have debug turned on, show number of elements
 			 * in various queues.
 			 */
 			for (;;) {
 				error = msleep(sc, &sc->sc_mtx, PRIBIO,
 				    "gj:work", hz * 3);
 				if (error == 0) {
 					mtx_unlock(&sc->sc_mtx);
 					break;
 				}
 				GJ_DEBUG(3, "Report: current count=%d",
 				    sc->sc_current_count);
 				GJ_DEBUG(3, "Report: flush count=%d",
 				    sc->sc_flush_count);
 				GJ_DEBUG(3, "Report: flush in progress=%d",
 				    sc->sc_flush_in_progress);
 				GJ_DEBUG(3, "Report: copy in progress=%d",
 				    sc->sc_copy_in_progress);
 				GJ_DEBUG(3, "Report: delayed=%d",
 				    sc->sc_delayed_count);
 			}
 		}
 		GJ_DEBUG(3, "%s: exit 1", __func__);
 		return;
 	}
 
 	/*
 	 * Flush even not full records every 3 seconds.
 	 */
 	timeout = (last_write + 3 - time_second) * hz;
 	if (timeout <= 0) {
 		mtx_unlock(&sc->sc_mtx);
 		g_journal_flush(sc);
 		g_journal_flush_send(sc);
 		GJ_DEBUG(3, "%s: exit 2", __func__);
 		return;
 	}
 	error = msleep(sc, &sc->sc_mtx, PRIBIO | PDROP, "gj:work", timeout);
 	if (error == EWOULDBLOCK)
 		g_journal_flush_send(sc);
 	GJ_DEBUG(3, "%s: exit 3", __func__);
 }
 
 /*
  * Worker thread.
  */
 static void
 g_journal_worker(void *arg)
 {
 	struct g_journal_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct bio *bp;
 	time_t last_write;
 	int type;
 
 	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
 	thread_unlock(curthread);
 
 	sc = arg;
 	type = 0;	/* gcc */
 
 	if (sc->sc_flags & GJF_DEVICE_CLEAN) {
 		GJ_DEBUG(0, "Journal %s clean.", sc->sc_name);
 		g_journal_initialize(sc);
 	} else {
 		g_journal_sync(sc);
 	}
 	/*
 	 * Check if we can use BIO_FLUSH.
 	 */
 	sc->sc_bio_flush = 0;
 	if (g_io_flush(sc->sc_jconsumer) == 0) {
 		sc->sc_bio_flush |= GJ_FLUSH_JOURNAL;
 		GJ_DEBUG(1, "BIO_FLUSH supported by %s.",
 		    sc->sc_jconsumer->provider->name);
 	} else {
 		GJ_DEBUG(0, "BIO_FLUSH not supported by %s.",
 		    sc->sc_jconsumer->provider->name);
 	}
 	if (sc->sc_jconsumer != sc->sc_dconsumer) {
 		if (g_io_flush(sc->sc_dconsumer) == 0) {
 			sc->sc_bio_flush |= GJ_FLUSH_DATA;
 			GJ_DEBUG(1, "BIO_FLUSH supported by %s.",
 			    sc->sc_dconsumer->provider->name);
 		} else {
 			GJ_DEBUG(0, "BIO_FLUSH not supported by %s.",
 			    sc->sc_dconsumer->provider->name);
 		}
 	}
 
 	gp = sc->sc_geom;
 	g_topology_lock();
 	pp = g_new_providerf(gp, "%s.journal", sc->sc_name);
 	pp->mediasize = sc->sc_mediasize;
 	/*
 	 * There could be a problem when data provider and journal providers
 	 * have different sectorsize, but such scenario is prevented on journal
 	 * creation.
 	 */
 	pp->sectorsize = sc->sc_sectorsize;
 	g_error_provider(pp, 0);
 	g_topology_unlock();
 	last_write = time_second;
 
 	if (sc->sc_rootmount != NULL) {
 		GJ_DEBUG(1, "root_mount_rel %p", sc->sc_rootmount);
 		root_mount_rel(sc->sc_rootmount);
 		sc->sc_rootmount = NULL;
 	}
 
 	for (;;) {
 		/* Get first request from the queue. */
 		mtx_lock(&sc->sc_mtx);
 		bp = bioq_first(&sc->sc_back_queue);
 		if (bp != NULL)
 			type = (bp->bio_cflags & GJ_BIO_MASK);
 		if (bp == NULL) {
 			bp = bioq_first(&sc->sc_regular_queue);
 			if (bp != NULL)
 				type = GJ_BIO_REGULAR;
 		}
 		if (bp == NULL) {
 try_switch:
 			if ((sc->sc_flags & GJF_DEVICE_SWITCH) ||
 			    (sc->sc_flags & GJF_DEVICE_DESTROY)) {
 				if (sc->sc_current_count > 0) {
 					mtx_unlock(&sc->sc_mtx);
 					g_journal_flush(sc);
 					g_journal_flush_send(sc);
 					continue;
 				}
 				if (sc->sc_flush_in_progress > 0)
 					goto sleep;
 				if (sc->sc_copy_in_progress > 0)
 					goto sleep;
 			}
 			if (sc->sc_flags & GJF_DEVICE_SWITCH) {
 				mtx_unlock(&sc->sc_mtx);
 				g_journal_switch(sc);
 				wakeup(&sc->sc_journal_copying);
 				continue;
 			}
 			if (sc->sc_flags & GJF_DEVICE_DESTROY) {
 				GJ_DEBUG(1, "Shutting down worker "
 				    "thread for %s.", gp->name);
 				sc->sc_worker = NULL;
 				wakeup(&sc->sc_worker);
 				mtx_unlock(&sc->sc_mtx);
 				kproc_exit(0);
 			}
 sleep:
 			g_journal_wait(sc, last_write);
 			continue;
 		}
 		/*
 		 * If we're in switch process, we need to delay all new
 		 * write requests until its done.
 		 */
 		if ((sc->sc_flags & GJF_DEVICE_SWITCH) &&
 		    type == GJ_BIO_REGULAR && bp->bio_cmd == BIO_WRITE) {
 			GJ_LOGREQ(2, bp, "WRITE on SWITCH");
 			goto try_switch;
 		}
 		if (type == GJ_BIO_REGULAR)
 			bioq_remove(&sc->sc_regular_queue, bp);
 		else
 			bioq_remove(&sc->sc_back_queue, bp);
 		mtx_unlock(&sc->sc_mtx);
 		switch (type) {
 		case GJ_BIO_REGULAR:
 			/* Regular request. */
 			switch (bp->bio_cmd) {
 			case BIO_READ:
 				g_journal_read(sc, bp, bp->bio_offset,
 				    bp->bio_offset + bp->bio_length);
 				break;
 			case BIO_WRITE:
 				last_write = time_second;
 				g_journal_add_request(sc, bp);
 				g_journal_flush_send(sc);
 				break;
 			default:
 				panic("Invalid bio_cmd (%d).", bp->bio_cmd);
 			}
 			break;
 		case GJ_BIO_COPY:
 			switch (bp->bio_cmd) {
 			case BIO_READ:
 				if (g_journal_copy_read_done(bp))
 					g_journal_copy_send(sc);
 				break;
 			case BIO_WRITE:
 				g_journal_copy_write_done(bp);
 				g_journal_copy_send(sc);
 				break;
 			default:
 				panic("Invalid bio_cmd (%d).", bp->bio_cmd);
 			}
 			break;
 		case GJ_BIO_JOURNAL:
 			g_journal_flush_done(bp);
 			g_journal_flush_send(sc);
 			break;
 		case GJ_BIO_READ:
 		default:
 			panic("Invalid bio (%d).", type);
 		}
 	}
 }
 
 static void
 g_journal_destroy_event(void *arg, int flags __unused)
 {
 	struct g_journal_softc *sc;
 
 	g_topology_assert();
 	sc = arg;
 	g_journal_destroy(sc);
 }
 
 static void
 g_journal_timeout(void *arg)
 {
 	struct g_journal_softc *sc;
 
 	sc = arg;
 	GJ_DEBUG(0, "Timeout. Journal %s cannot be completed.",
 	    sc->sc_geom->name);
 	g_post_event(g_journal_destroy_event, sc, M_NOWAIT, NULL);
 }
 
 static struct g_geom *
 g_journal_create(struct g_class *mp, struct g_provider *pp,
     const struct g_journal_metadata *md)
 {
 	struct g_journal_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error;
 
 	sc = NULL;	/* gcc */
 
 	g_topology_assert();
 	/*
 	 * There are two possibilities:
 	 * 1. Data and both journals are on the same provider.
 	 * 2. Data and journals are all on separated providers.
 	 */
 	/* Look for journal device with the same ID. */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_id == md->md_id)
 			break;
 	}
 	if (gp == NULL)
 		sc = NULL;
 	else if (sc != NULL && (sc->sc_type & md->md_type) != 0) {
 		GJ_DEBUG(1, "Journal device %u already configured.", sc->sc_id);
 		return (NULL);
 	}
 	if (md->md_type == 0 || (md->md_type & ~GJ_TYPE_COMPLETE) != 0) {
 		GJ_DEBUG(0, "Invalid type on %s.", pp->name);
 		return (NULL);
 	}
 	if (md->md_type & GJ_TYPE_DATA) {
 		GJ_DEBUG(0, "Journal %u: %s contains data.", md->md_id,
 		    pp->name);
 	}
 	if (md->md_type & GJ_TYPE_JOURNAL) {
 		GJ_DEBUG(0, "Journal %u: %s contains journal.", md->md_id,
 		    pp->name);
 	}
 
 	if (sc == NULL) {
 		/* Action geom. */
 		sc = malloc(sizeof(*sc), M_JOURNAL, M_WAITOK | M_ZERO);
 		sc->sc_id = md->md_id;
 		sc->sc_type = 0;
 		sc->sc_flags = 0;
 		sc->sc_worker = NULL;
 
 		gp = g_new_geomf(mp, "gjournal %u", sc->sc_id);
 		gp->start = g_journal_start;
 		gp->orphan = g_journal_orphan;
 		gp->access = g_journal_access;
 		gp->softc = sc;
 		gp->flags |= G_GEOM_VOLATILE_BIO;
 		sc->sc_geom = gp;
 
 		mtx_init(&sc->sc_mtx, "gjournal", NULL, MTX_DEF);
 
 		bioq_init(&sc->sc_back_queue);
 		bioq_init(&sc->sc_regular_queue);
 		bioq_init(&sc->sc_delayed_queue);
 		sc->sc_delayed_count = 0;
 		sc->sc_current_queue = NULL;
 		sc->sc_current_count = 0;
 		sc->sc_flush_queue = NULL;
 		sc->sc_flush_count = 0;
 		sc->sc_flush_in_progress = 0;
 		sc->sc_copy_queue = NULL;
 		sc->sc_copy_in_progress = 0;
 		sc->sc_inactive.jj_queue = NULL;
 		sc->sc_active.jj_queue = NULL;
 
 		sc->sc_rootmount = root_mount_hold("GJOURNAL");
 		GJ_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
 
 		callout_init(&sc->sc_callout, 1);
 		if (md->md_type != GJ_TYPE_COMPLETE) {
 			/*
 			 * Journal and data are on separate providers.
 			 * At this point we have only one of them.
 			 * We setup a timeout in case the other part will not
 			 * appear, so we won't wait forever.
 			 */
 			callout_reset(&sc->sc_callout, 5 * hz,
 			    g_journal_timeout, sc);
 		}
 	}
 
 	/* Remember type of the data provider. */
 	if (md->md_type & GJ_TYPE_DATA)
 		sc->sc_orig_type = md->md_type;
 	sc->sc_type |= md->md_type;
 	cp = NULL;
 
 	if (md->md_type & GJ_TYPE_DATA) {
 		if (md->md_flags & GJ_FLAG_CLEAN)
 			sc->sc_flags |= GJF_DEVICE_CLEAN;
 		if (md->md_flags & GJ_FLAG_CHECKSUM)
 			sc->sc_flags |= GJF_DEVICE_CHECKSUM;
 		cp = g_new_consumer(gp);
 		error = g_attach(cp, pp);
 		KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
 		    pp->name, error));
 		error = g_access(cp, 1, 1, 1);
 		if (error != 0) {
 			GJ_DEBUG(0, "Cannot access %s (error=%d).", pp->name,
 			    error);
 			g_journal_destroy(sc);
 			return (NULL);
 		}
 		sc->sc_dconsumer = cp;
 		sc->sc_mediasize = pp->mediasize - pp->sectorsize;
 		sc->sc_sectorsize = pp->sectorsize;
 		sc->sc_jstart = md->md_jstart;
 		sc->sc_jend = md->md_jend;
 		if (md->md_provider[0] != '\0')
 			sc->sc_flags |= GJF_DEVICE_HARDCODED;
 		sc->sc_journal_offset = md->md_joffset;
 		sc->sc_journal_id = md->md_jid;
 		sc->sc_journal_previous_id = md->md_jid;
 	}
 	if (md->md_type & GJ_TYPE_JOURNAL) {
 		if (cp == NULL) {
 			cp = g_new_consumer(gp);
 			error = g_attach(cp, pp);
 			KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
 			    pp->name, error));
 			error = g_access(cp, 1, 1, 1);
 			if (error != 0) {
 				GJ_DEBUG(0, "Cannot access %s (error=%d).",
 				    pp->name, error);
 				g_journal_destroy(sc);
 				return (NULL);
 			}
 		} else {
 			/*
 			 * Journal is on the same provider as data, which means
 			 * that data provider ends where journal starts.
 			 */
 			sc->sc_mediasize = md->md_jstart;
 		}
 		sc->sc_jconsumer = cp;
 	}
 
 	/* Start switcher kproc if needed. */
 	if (g_journal_switcher_proc == NULL)
 		g_journal_start_switcher(mp);
 
 	if ((sc->sc_type & GJ_TYPE_COMPLETE) != GJ_TYPE_COMPLETE) {
 		/* Journal is not complete yet. */
 		return (gp);
 	} else {
 		/* Journal complete, cancel timeout. */
 		callout_drain(&sc->sc_callout);
 	}
 
 	error = kproc_create(g_journal_worker, sc, &sc->sc_worker, 0, 0,
 	    "g_journal %s", sc->sc_name);
 	if (error != 0) {
 		GJ_DEBUG(0, "Cannot create worker thread for %s.journal.",
 		    sc->sc_name);
 		g_journal_destroy(sc);
 		return (NULL);
 	}
 
 	return (gp);
 }
 
 static void
 g_journal_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	cp = arg;
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static int
 g_journal_destroy(struct g_journal_softc *sc)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	gp = sc->sc_geom;
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL) {
 		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
 			GJ_DEBUG(1, "Device %s is still open (r%dw%de%d).",
 			    pp->name, pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 		g_error_provider(pp, ENXIO);
 
 		g_journal_flush(sc);
 		g_journal_flush_send(sc);
 		g_journal_switch(sc);
 	}
 
 	sc->sc_flags |= (GJF_DEVICE_DESTROY | GJF_DEVICE_CLEAN);
 
 	g_topology_unlock();
 
 	if (sc->sc_rootmount != NULL) {
 		GJ_DEBUG(1, "root_mount_rel %p", sc->sc_rootmount);
 		root_mount_rel(sc->sc_rootmount);
 		sc->sc_rootmount = NULL;
 	}
 
 	callout_drain(&sc->sc_callout);
 	mtx_lock(&sc->sc_mtx);
 	wakeup(sc);
 	while (sc->sc_worker != NULL)
 		msleep(&sc->sc_worker, &sc->sc_mtx, PRIBIO, "gj:destroy", 0);
 	mtx_unlock(&sc->sc_mtx);
 
 	if (pp != NULL) {
 		GJ_DEBUG(1, "Marking %s as clean.", sc->sc_name);
 		g_journal_metadata_update(sc);
 		g_topology_lock();
 		g_wither_provider(pp, ENXIO);
 	} else {
 		g_topology_lock();
 	}
 	mtx_destroy(&sc->sc_mtx);
 
 	if (sc->sc_current_count != 0) {
 		GJ_DEBUG(0, "Warning! Number of current requests %d.",
 		    sc->sc_current_count);
 	}
 
 	gp->softc = NULL;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->acr + cp->acw + cp->ace > 0)
 			g_access(cp, -1, -1, -1);
 		/*
 		 * We keep all consumers open for writing, so if I'll detach
 		 * and destroy consumer here, I'll get providers for taste, so
 		 * journal will be started again.
 		 * Sending an event here, prevents this from happening.
 		 */
 		g_post_event(g_journal_destroy_consumer, cp, M_WAITOK, NULL);
 	}
 	g_wither_geom(gp, ENXIO);
 	free(sc, M_JOURNAL);
 	return (0);
 }
 
 static void
 g_journal_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_journal_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_journal_metadata md;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	GJ_DEBUG(2, "Tasting %s.", pp->name);
 	if (pp->geom->class == mp)
 		return (NULL);
 
 	gp = g_new_geomf(mp, "journal:taste");
 	/* This orphan function should be never called. */
 	gp->orphan = g_journal_taste_orphan;
 	cp = g_new_consumer(gp);
+	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error == 0) {
 		error = g_journal_metadata_read(cp, &md);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
 		return (NULL);
 	if (g_journal_debug >= 2)
 		journal_metadata_dump(&md);
 
 	gp = g_journal_create(mp, pp, &md);
 	return (gp);
 }
 
 static struct g_journal_softc *
 g_journal_find_device(struct g_class *mp, const char *name)
 {
 	struct g_journal_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	if (strncmp(name, _PATH_DEV, 5) == 0)
 		name += 5;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_flags & GJF_DEVICE_DESTROY)
 			continue;
 		if ((sc->sc_type & GJ_TYPE_COMPLETE) != GJ_TYPE_COMPLETE)
 			continue;
 		pp = LIST_FIRST(&gp->provider);
 		if (strcmp(sc->sc_name, name) == 0)
 			return (sc);
 		if (pp != NULL && strcmp(pp->name, name) == 0)
 			return (sc);
 	}
 	return (NULL);
 }
 
 static void
 g_journal_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_journal_softc *sc;
 	const char *name;
 	char param[16];
 	int *nargs;
 	int error, i;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument.", i);
 			return;
 		}
 		sc = g_journal_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		error = g_journal_destroy(sc);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    LIST_FIRST(&sc->sc_geom->provider)->name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_journal_ctl_sync(struct gctl_req *req __unused, struct g_class *mp __unused)
 {
 
 	g_topology_assert();
 	g_topology_unlock();
 	g_journal_sync_requested++;
 	wakeup(&g_journal_switcher_state);
 	while (g_journal_sync_requested > 0)
 		tsleep(&g_journal_sync_requested, PRIBIO, "j:sreq", hz / 2);
 	g_topology_lock();
 }
 
 static void
 g_journal_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_JOURNAL_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "destroy") == 0 || strcmp(verb, "stop") == 0) {
 		g_journal_ctl_destroy(req, mp);
 		return;
 	} else if (strcmp(verb, "sync") == 0) {
 		g_journal_ctl_sync(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_journal_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_journal_softc *sc;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		int first = 1;
 
 		sbuf_printf(sb, "%s<Role>", indent);
 		if (cp == sc->sc_dconsumer) {
 			sbuf_cat(sb, "Data");
 			first = 0;
 		}
 		if (cp == sc->sc_jconsumer) {
 			if (!first)
 				sbuf_cat(sb, ",");
 			sbuf_cat(sb, "Journal");
 		}
 		sbuf_cat(sb, "</Role>\n");
 		if (cp == sc->sc_jconsumer) {
 			sbuf_printf(sb, "<Jstart>%jd</Jstart>\n",
 			    (intmax_t)sc->sc_jstart);
 			sbuf_printf(sb, "<Jend>%jd</Jend>\n",
 			    (intmax_t)sc->sc_jend);
 		}
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 	}
 }
 
 static eventhandler_tag g_journal_event_shutdown = NULL;
 static eventhandler_tag g_journal_event_lowmem = NULL;
 
 static void
 g_journal_shutdown(void *arg, int howto __unused)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 
 	if (KERNEL_PANICKED())
 		return;
 	mp = arg;
 	g_topology_lock();
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if (gp->softc == NULL)
 			continue;
 		GJ_DEBUG(0, "Shutting down geom %s.", gp->name);
 		g_journal_destroy(gp->softc);
 	}
 	g_topology_unlock();
 }
 
 /*
  * Free cached requests from inactive queue in case of low memory.
  * We free GJ_FREE_AT_ONCE elements at once.
  */
 #define	GJ_FREE_AT_ONCE	4
 static void
 g_journal_lowmem(void *arg, int howto __unused)
 {
 	struct g_journal_softc *sc;
 	struct g_class *mp;
 	struct g_geom *gp;
 	struct bio *bp;
 	u_int nfree = GJ_FREE_AT_ONCE;
 
 	g_journal_stats_low_mem++;
 	mp = arg;
 	g_topology_lock();
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL || (sc->sc_flags & GJF_DEVICE_DESTROY))
 			continue;
 		mtx_lock(&sc->sc_mtx);
 		for (bp = sc->sc_inactive.jj_queue; nfree > 0 && bp != NULL;
 		    nfree--, bp = bp->bio_next) {
 			/*
 			 * This is safe to free the bio_data, because:
 			 * 1. If bio_data is NULL it will be read from the
 			 *    inactive journal.
 			 * 2. If bp is sent down, it is first removed from the
 			 *    inactive queue, so it's impossible to free the
 			 *    data from under in-flight bio.
 			 * On the other hand, freeing elements from the active
 			 * queue, is not safe.
 			 */
 			if (bp->bio_data != NULL) {
 				GJ_DEBUG(2, "Freeing data from %s.",
 				    sc->sc_name);
 				gj_free(bp->bio_data, bp->bio_length);
 				bp->bio_data = NULL;
 			}
 		}
 		mtx_unlock(&sc->sc_mtx);
 		if (nfree == 0)
 			break;
 	}
 	g_topology_unlock();
 }
 
 static void g_journal_switcher(void *arg);
 
 static void
 g_journal_init(struct g_class *mp)
 {
 
 	/* Pick a conservative value if provided value sucks. */
 	if (g_journal_cache_divisor <= 0 ||
 	    (vm_kmem_size / g_journal_cache_divisor == 0)) {
 		g_journal_cache_divisor = 5;
 	}
 	if (g_journal_cache_limit > 0) {
 		g_journal_cache_limit = vm_kmem_size / g_journal_cache_divisor;
 		g_journal_cache_low =
 		    (g_journal_cache_limit / 100) * g_journal_cache_switch;
 	}
 	g_journal_event_shutdown = EVENTHANDLER_REGISTER(shutdown_post_sync,
 	    g_journal_shutdown, mp, EVENTHANDLER_PRI_FIRST);
 	if (g_journal_event_shutdown == NULL)
 		GJ_DEBUG(0, "Warning! Cannot register shutdown event.");
 	g_journal_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem,
 	    g_journal_lowmem, mp, EVENTHANDLER_PRI_FIRST);
 	if (g_journal_event_lowmem == NULL)
 		GJ_DEBUG(0, "Warning! Cannot register lowmem event.");
 }
 
 static void
 g_journal_fini(struct g_class *mp)
 {
 
 	if (g_journal_event_shutdown != NULL) {
 		EVENTHANDLER_DEREGISTER(shutdown_post_sync,
 		    g_journal_event_shutdown);
 	}
 	if (g_journal_event_lowmem != NULL)
 		EVENTHANDLER_DEREGISTER(vm_lowmem, g_journal_event_lowmem);
 	if (g_journal_switcher_proc != NULL)
 		g_journal_stop_switcher();
 }
 
 DECLARE_GEOM_CLASS(g_journal_class, g_journal);
 
 static const struct g_journal_desc *
 g_journal_find_desc(const char *fstype)
 {
 	const struct g_journal_desc *desc;
 	int i;
 
 	for (desc = g_journal_filesystems[i = 0]; desc != NULL;
 	     desc = g_journal_filesystems[++i]) {
 		if (strcmp(desc->jd_fstype, fstype) == 0)
 			break;
 	}
 	return (desc);
 }
 
 static void
 g_journal_switch_wait(struct g_journal_softc *sc)
 {
 	struct bintime bt;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 	if (g_journal_debug >= 2) {
 		if (sc->sc_flush_in_progress > 0) {
 			GJ_DEBUG(2, "%d requests flushing.",
 			    sc->sc_flush_in_progress);
 		}
 		if (sc->sc_copy_in_progress > 0) {
 			GJ_DEBUG(2, "%d requests copying.",
 			    sc->sc_copy_in_progress);
 		}
 		if (sc->sc_flush_count > 0) {
 			GJ_DEBUG(2, "%d requests to flush.",
 			    sc->sc_flush_count);
 		}
 		if (sc->sc_delayed_count > 0) {
 			GJ_DEBUG(2, "%d requests delayed.",
 			    sc->sc_delayed_count);
 		}
 	}
 	g_journal_stats_switches++;
 	if (sc->sc_copy_in_progress > 0)
 		g_journal_stats_wait_for_copy++;
 	GJ_TIMER_START(1, &bt);
 	sc->sc_flags &= ~GJF_DEVICE_BEFORE_SWITCH;
 	sc->sc_flags |= GJF_DEVICE_SWITCH;
 	wakeup(sc);
 	while (sc->sc_flags & GJF_DEVICE_SWITCH) {
 		msleep(&sc->sc_journal_copying, &sc->sc_mtx, PRIBIO,
 		    "gj:switch", 0);
 	}
 	GJ_TIMER_STOP(1, &bt, "Switch time of %s", sc->sc_name);
 }
 
 static void
 g_journal_do_switch(struct g_class *classp)
 {
 	struct g_journal_softc *sc;
 	const struct g_journal_desc *desc;
 	struct g_geom *gp;
 	struct mount *mp;
 	struct bintime bt;
 	char *mountpoint;
 	int error, save;
 
 	g_topology_lock();
 	LIST_FOREACH(gp, &classp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_flags & GJF_DEVICE_DESTROY)
 			continue;
 		if ((sc->sc_type & GJ_TYPE_COMPLETE) != GJ_TYPE_COMPLETE)
 			continue;
 		mtx_lock(&sc->sc_mtx);
 		sc->sc_flags |= GJF_DEVICE_BEFORE_SWITCH;
 		mtx_unlock(&sc->sc_mtx);
 	}
 	g_topology_unlock();
 
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (mp->mnt_gjprovider == NULL)
 			continue;
 		if (mp->mnt_flag & MNT_RDONLY)
 			continue;
 		desc = g_journal_find_desc(mp->mnt_stat.f_fstypename);
 		if (desc == NULL)
 			continue;
 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
 			continue;
 		/* mtx_unlock(&mountlist_mtx) was done inside vfs_busy() */
 
 		g_topology_lock();
 		sc = g_journal_find_device(classp, mp->mnt_gjprovider);
 		g_topology_unlock();
 
 		if (sc == NULL) {
 			GJ_DEBUG(0, "Cannot find journal geom for %s.",
 			    mp->mnt_gjprovider);
 			goto next;
 		} else if (JEMPTY(sc)) {
 			mtx_lock(&sc->sc_mtx);
 			sc->sc_flags &= ~GJF_DEVICE_BEFORE_SWITCH;
 			mtx_unlock(&sc->sc_mtx);
 			GJ_DEBUG(3, "No need for %s switch.", sc->sc_name);
 			goto next;
 		}
 
 		mountpoint = mp->mnt_stat.f_mntonname;
 
 		error = vn_start_write(NULL, &mp, V_WAIT);
 		if (error != 0) {
 			GJ_DEBUG(0, "vn_start_write(%s) failed (error=%d).",
 			    mountpoint, error);
 			goto next;
 		}
 
 		save = curthread_pflags_set(TDP_SYNCIO);
 
 		GJ_TIMER_START(1, &bt);
 		vfs_periodic(mp, MNT_NOWAIT);
 		GJ_TIMER_STOP(1, &bt, "Msync time of %s", mountpoint);
 
 		GJ_TIMER_START(1, &bt);
 		error = VFS_SYNC(mp, MNT_NOWAIT);
 		if (error == 0)
 			GJ_TIMER_STOP(1, &bt, "Sync time of %s", mountpoint);
 		else {
 			GJ_DEBUG(0, "Cannot sync file system %s (error=%d).",
 			    mountpoint, error);
 		}
 
 		curthread_pflags_restore(save);
 
 		vn_finished_write(mp);
 
 		if (error != 0)
 			goto next;
 
 		/*
 		 * Send BIO_FLUSH before freezing the file system, so it can be
 		 * faster after the freeze.
 		 */
 		GJ_TIMER_START(1, &bt);
 		g_journal_flush_cache(sc);
 		GJ_TIMER_STOP(1, &bt, "BIO_FLUSH time of %s", sc->sc_name);
 
 		GJ_TIMER_START(1, &bt);
 		error = vfs_write_suspend(mp, VS_SKIP_UNMOUNT);
 		GJ_TIMER_STOP(1, &bt, "Suspend time of %s", mountpoint);
 		if (error != 0) {
 			GJ_DEBUG(0, "Cannot suspend file system %s (error=%d).",
 			    mountpoint, error);
 			goto next;
 		}
 
 		error = desc->jd_clean(mp);
 		if (error != 0)
 			goto next;
 
 		mtx_lock(&sc->sc_mtx);
 		g_journal_switch_wait(sc);
 		mtx_unlock(&sc->sc_mtx);
 
 		vfs_write_resume(mp, 0);
 next:
 		mtx_lock(&mountlist_mtx);
 		vfs_unbusy(mp);
 	}
 	mtx_unlock(&mountlist_mtx);
 
 	sc = NULL;
 	for (;;) {
 		g_topology_lock();
 		LIST_FOREACH(gp, &g_journal_class.geom, geom) {
 			sc = gp->softc;
 			if (sc == NULL)
 				continue;
 			mtx_lock(&sc->sc_mtx);
 			if ((sc->sc_type & GJ_TYPE_COMPLETE) == GJ_TYPE_COMPLETE &&
 			    !(sc->sc_flags & GJF_DEVICE_DESTROY) &&
 			    (sc->sc_flags & GJF_DEVICE_BEFORE_SWITCH)) {
 				break;
 			}
 			mtx_unlock(&sc->sc_mtx);
 			sc = NULL;
 		}
 		g_topology_unlock();
 		if (sc == NULL)
 			break;
 		mtx_assert(&sc->sc_mtx, MA_OWNED);
 		g_journal_switch_wait(sc);
 		mtx_unlock(&sc->sc_mtx);
 	}
 }
 
 static void
 g_journal_start_switcher(struct g_class *mp)
 {
 	int error;
 
 	g_topology_assert();
 	MPASS(g_journal_switcher_proc == NULL);
 	g_journal_switcher_state = GJ_SWITCHER_WORKING;
 	error = kproc_create(g_journal_switcher, mp, &g_journal_switcher_proc,
 	    0, 0, "g_journal switcher");
 	KASSERT(error == 0, ("Cannot create switcher thread."));
 }
 
 static void
 g_journal_stop_switcher(void)
 {
 	g_topology_assert();
 	MPASS(g_journal_switcher_proc != NULL);
 	g_journal_switcher_state = GJ_SWITCHER_DIE;
 	wakeup(&g_journal_switcher_state);
 	while (g_journal_switcher_state != GJ_SWITCHER_DIED)
 		tsleep(&g_journal_switcher_state, PRIBIO, "jfini:wait", hz / 5);
 	GJ_DEBUG(1, "Switcher died.");
 	g_journal_switcher_proc = NULL;
 }
 
 /*
  * TODO: Kill switcher thread on last geom destruction?
  */
 static void
 g_journal_switcher(void *arg)
 {
 	struct g_class *mp;
 	struct bintime bt;
 	int error;
 
 	mp = arg;
 	curthread->td_pflags |= TDP_NORUNNINGBUF;
 	for (;;) {
 		g_journal_switcher_wokenup = 0;
 		error = tsleep(&g_journal_switcher_state, PRIBIO, "jsw:wait",
 		    g_journal_switch_time * hz);
 		if (g_journal_switcher_state == GJ_SWITCHER_DIE) {
 			g_journal_switcher_state = GJ_SWITCHER_DIED;
 			GJ_DEBUG(1, "Switcher exiting.");
 			wakeup(&g_journal_switcher_state);
 			kproc_exit(0);
 		}
 		if (error == 0 && g_journal_sync_requested == 0) {
 			GJ_DEBUG(1, "Out of cache, force switch (used=%jd "
 			    "limit=%jd).", (intmax_t)g_journal_cache_used,
 			    (intmax_t)g_journal_cache_limit);
 		}
 		GJ_TIMER_START(1, &bt);
 		g_journal_do_switch(mp);
 		GJ_TIMER_STOP(1, &bt, "Entire switch time");
 		if (g_journal_sync_requested > 0) {
 			g_journal_sync_requested = 0;
 			wakeup(&g_journal_sync_requested);
 		}
 	}
 }
diff --git a/sys/geom/linux_lvm/g_linux_lvm.c b/sys/geom/linux_lvm/g_linux_lvm.c
index b835baecc93d..f17827757139 100644
--- a/sys/geom/linux_lvm/g_linux_lvm.c
+++ b/sys/geom/linux_lvm/g_linux_lvm.c
@@ -1,1200 +1,1201 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2008 Andrew Thompson <thompsa@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/ctype.h>
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
 #include <geom/geom_dbg.h>
 #include <sys/endian.h>
 
 #include <geom/linux_lvm/g_linux_lvm.h>
 
 FEATURE(geom_linux_lvm, "GEOM Linux LVM partitioning support");
 
 /* Declare malloc(9) label */
 static MALLOC_DEFINE(M_GLLVM, "gllvm", "GEOM_LINUX_LVM Data");
 
 /* GEOM class methods */
 static g_access_t g_llvm_access;
 static g_init_t g_llvm_init;
 static g_orphan_t g_llvm_orphan;
 static g_orphan_t g_llvm_taste_orphan;
 static g_start_t g_llvm_start;
 static g_taste_t g_llvm_taste;
 static g_ctl_destroy_geom_t g_llvm_destroy_geom;
 
 static void	g_llvm_done(struct bio *);
 static void	g_llvm_remove_disk(struct g_llvm_vg *, struct g_consumer *);
 static int	g_llvm_activate_lv(struct g_llvm_vg *, struct g_llvm_lv *);
 static int	g_llvm_add_disk(struct g_llvm_vg *, struct g_provider *, char *);
 static void	g_llvm_free_vg(struct g_llvm_vg *);
 static int	g_llvm_destroy(struct g_llvm_vg *, int);
 static int	g_llvm_read_label(struct g_consumer *, struct g_llvm_label *);
 static int	g_llvm_read_md(struct g_consumer *, struct g_llvm_metadata *,
 		    struct g_llvm_label *);
 
 static int	llvm_label_decode(const u_char *, struct g_llvm_label *, int);
 static int	llvm_md_decode(const u_char *, struct g_llvm_metadata *,
 		    struct g_llvm_label *);
 static int	llvm_textconf_decode(u_char *, int,
 		    struct g_llvm_metadata *);
 static int	llvm_textconf_decode_pv(char **, char *, struct g_llvm_vg *);
 static int	llvm_textconf_decode_lv(char **, char *, struct g_llvm_vg *);
 static int	llvm_textconf_decode_sg(char **, char *, struct g_llvm_lv *);
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, linux_lvm, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_LINUX_LVM stuff");
 static u_int g_llvm_debug = 0;
 SYSCTL_UINT(_kern_geom_linux_lvm, OID_AUTO, debug, CTLFLAG_RWTUN, &g_llvm_debug, 0,
     "Debug level");
 
 LIST_HEAD(, g_llvm_vg) vg_list;
 
 /*
  * Called to notify geom when it's been opened, and for what intent
  */
 static int
 g_llvm_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *c;
 	struct g_llvm_vg *vg;
 	struct g_geom *gp;
 	int error;
 
 	KASSERT(pp != NULL, ("%s: NULL provider", __func__));
 	gp = pp->geom;
 	KASSERT(gp != NULL, ("%s: NULL geom", __func__));
 	vg = gp->softc;
 
 	if (vg == NULL) {
 		/* It seems that .access can be called with negative dr,dw,dx
 		 * in this case but I want to check for myself */
 		G_LLVM_DEBUG(0, "access(%d, %d, %d) for %s",
 		    dr, dw, de, pp->name);
 
 		/* This should only happen when geom is withered so
 		 * allow only negative requests */
 		KASSERT(dr <= 0 && dw <= 0 && de <= 0,
 		    ("%s: Positive access for %s", __func__, pp->name));
 		if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0)
 			G_LLVM_DEBUG(0,
 			    "Device %s definitely destroyed", pp->name);
 		return (0);
 	}
 
 	/* Grab an exclusive bit to propagate on our consumers on first open */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... drop it on close */
 	if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0)
 		de--;
 
 	error = ENXIO;
 	LIST_FOREACH(c, &gp->consumer, consumer) {
 		KASSERT(c != NULL, ("%s: consumer is NULL", __func__));
 		error = g_access(c, dr, dw, de);
 		if (error != 0) {
 			struct g_consumer *c2;
 
 			/* Backout earlier changes */
 			LIST_FOREACH(c2, &gp->consumer, consumer) {
 				if (c2 == c) /* all eariler components fixed */
 					return (error);
 				g_access(c2, -dr, -dw, -de);
 			}
 		}
 	}
 
 	return (error);
 }
 
 /*
  * Dismantle bio_queue and destroy its components
  */
 static void
 bioq_dismantle(struct bio_queue_head *bq)
 {
 	struct bio *b;
 
 	for (b = bioq_first(bq); b != NULL; b = bioq_first(bq)) {
 		bioq_remove(bq, b);
 		g_destroy_bio(b);
 	}
 }
 
 /*
  * GEOM .done handler
  * Can't use standard handler because one requested IO may
  * fork into additional data IOs
  */
 static void
 g_llvm_done(struct bio *b)
 {
 	struct bio *parent_b;
 
 	parent_b = b->bio_parent;
 
 	if (b->bio_error != 0) {
 		G_LLVM_DEBUG(0, "Error %d for offset=%ju, length=%ju on %s",
 		    b->bio_error, b->bio_offset, b->bio_length,
 		    b->bio_to->name);
 		if (parent_b->bio_error == 0)
 			parent_b->bio_error = b->bio_error;
 	}
 
 	parent_b->bio_inbed++;
 	parent_b->bio_completed += b->bio_completed;
 
 	if (parent_b->bio_children == parent_b->bio_inbed) {
 		parent_b->bio_completed = parent_b->bio_length;
 		g_io_deliver(parent_b, parent_b->bio_error);
 	}
 	g_destroy_bio(b);
 }
 
 static void
 g_llvm_start(struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_llvm_vg *vg;
 	struct g_llvm_pv *pv;
 	struct g_llvm_lv *lv;
 	struct g_llvm_segment *sg;
 	struct bio *cb;
 	struct bio_queue_head bq;
 	size_t chunk_size;
 	off_t offset, length;
 	char *addr;
 	u_int count;
 
 	pp = bp->bio_to;
 	lv = pp->private;
 	vg = pp->geom->softc;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 	/* XXX BIO_GETATTR allowed? */
 		break;
 	default:
 		/*
 		 * BIO_SPEEDUP and BIO_FLUSH should pass through to all sg
 		 * elements, but aren't.
 		 */
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	bioq_init(&bq);
 
 	chunk_size = vg->vg_extentsize;
 	addr = bp->bio_data;
 	offset = bp->bio_offset;	/* virtual offset and length */
 	length = bp->bio_length;
 
 	while (length > 0) {
 		size_t chunk_index, in_chunk_offset, in_chunk_length;
 
 		pv = NULL;
 		cb = g_clone_bio(bp);
 		if (cb == NULL) {
 			bioq_dismantle(&bq);
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 
 		/* get the segment and the pv */
 		if (lv->lv_sgcount == 1) {
 			/* skip much of the calculations for a single sg */
 			chunk_index = 0;
 			in_chunk_offset = 0;
 			in_chunk_length = length;
 			sg = lv->lv_firstsg;
 			pv = sg->sg_pv;
 			cb->bio_offset = offset + sg->sg_pvoffset;
 		} else {
 			chunk_index = offset / chunk_size; /* round downwards */
 			in_chunk_offset = offset % chunk_size;
 			in_chunk_length =
 			    min(length, chunk_size - in_chunk_offset);
 
 			/* XXX could be faster */
 			LIST_FOREACH(sg, &lv->lv_segs, sg_next) {
 				if (chunk_index >= sg->sg_start &&
 				    chunk_index <= sg->sg_end) {
 					/* adjust chunk index for sg start */
 					chunk_index -= sg->sg_start;
 					pv = sg->sg_pv;
 					break;
 				}
 			}
 			cb->bio_offset =
 			    (off_t)chunk_index * (off_t)chunk_size
 			    + in_chunk_offset + sg->sg_pvoffset;
 		}
 
 		KASSERT(pv != NULL, ("Can't find PV for chunk %zu",
 		    chunk_index));
 
 		cb->bio_to = pv->pv_gprov;
 		cb->bio_done = g_llvm_done;
 		cb->bio_length = in_chunk_length;
 		cb->bio_data = addr;
 		cb->bio_caller1 = pv;
 		bioq_disksort(&bq, cb);
 
 		G_LLVM_DEBUG(5,
 		    "Mapped %s(%ju, %ju) on %s to %zu(%zu,%zu) @ %s:%ju",
 		    bp->bio_cmd == BIO_READ ? "R" : "W",
 		    offset, length, lv->lv_name,
 		    chunk_index, in_chunk_offset, in_chunk_length,
 		    pv->pv_name, cb->bio_offset);
 
 		addr += in_chunk_length;
 		length -= in_chunk_length;
 		offset += in_chunk_length;
 	}
 
 	/* Fire off bio's here */
 	count = 0;
 	for (cb = bioq_first(&bq); cb != NULL; cb = bioq_first(&bq)) {
 		bioq_remove(&bq, cb);
 		pv = cb->bio_caller1;
 		cb->bio_caller1 = NULL;
 		G_LLVM_DEBUG(6, "firing bio to %s, offset=%ju, length=%ju",
 		    cb->bio_to->name, cb->bio_offset, cb->bio_length);
 		g_io_request(cb, pv->pv_gcons);
 		count++;
 	}
 	if (count == 0) { /* We handled everything locally */
 		bp->bio_completed = bp->bio_length;
 		g_io_deliver(bp, 0);
 	}
 }
 
 static void
 g_llvm_remove_disk(struct g_llvm_vg *vg, struct g_consumer *cp)
 {
 	struct g_llvm_pv *pv;
 	struct g_llvm_lv *lv;
 	struct g_llvm_segment *sg;
 	int found;
 
 	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
 	pv = (struct g_llvm_pv *)cp->private;
 
 	G_LLVM_DEBUG(0, "Disk %s removed from %s.", cp->provider->name,
 	    pv->pv_name);
 
 	LIST_FOREACH(lv, &vg->vg_lvs, lv_next) {
 		/* Find segments that map to this disk */
 		found = 0;
 		LIST_FOREACH(sg, &lv->lv_segs, sg_next) {
 			if (sg->sg_pv == pv) {
 				sg->sg_pv = NULL;
 				lv->lv_sgactive--;
 				found = 1;
 				break;
 			}
 		}
 		if (found) {
 			G_LLVM_DEBUG(0, "Device %s removed.",
 			    lv->lv_gprov->name);
 			g_wither_provider(lv->lv_gprov, ENXIO);
 			lv->lv_gprov = NULL;
 		}
 	}
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_llvm_orphan(struct g_consumer *cp)
 {
 	struct g_llvm_vg *vg;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	vg = gp->softc;
 	if (vg == NULL)
 		return;
 
 	g_llvm_remove_disk(vg, cp);
 	g_llvm_destroy(vg, 1);
 }
 
 static int
 g_llvm_activate_lv(struct g_llvm_vg *vg, struct g_llvm_lv *lv)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	KASSERT(lv->lv_sgactive == lv->lv_sgcount, ("segment missing"));
 
 	gp = vg->vg_geom;
 	pp = g_new_providerf(gp, "linux_lvm/%s-%s", vg->vg_name, lv->lv_name);
 	pp->mediasize = vg->vg_extentsize * (off_t)lv->lv_extentcount;
 	pp->sectorsize = vg->vg_sectorsize;
 	g_error_provider(pp, 0);
 	lv->lv_gprov = pp;
 	pp->private = lv;
 
 	G_LLVM_DEBUG(1, "Created %s, %juM", pp->name,
 	    pp->mediasize / (1024*1024));
 
 	return (0);
 }
 
 static int
 g_llvm_add_disk(struct g_llvm_vg *vg, struct g_provider *pp, char *uuid)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp, *fcp;
 	struct g_llvm_pv *pv;
 	struct g_llvm_lv *lv;
 	struct g_llvm_segment *sg;
 	int error;
 
 	g_topology_assert();
 
 	LIST_FOREACH(pv, &vg->vg_pvs, pv_next) {
 		if (strcmp(pv->pv_uuid, uuid) == 0)
 			break;	/* found it */
 	}
 	if (pv == NULL) {
 		G_LLVM_DEBUG(3, "uuid %s not found in pv list", uuid);
 		return (ENOENT);
 	}
 	if (pv->pv_gprov != NULL) {
 		G_LLVM_DEBUG(0, "disk %s already initialised in %s",
 		    pv->pv_name, vg->vg_name);
 		return (EEXIST);
 	}
 
 	pv->pv_start *= vg->vg_sectorsize;
 	gp = vg->vg_geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	G_LLVM_DEBUG(1, "Attached %s to %s at offset %ju",
 	    pp->name, pv->pv_name, pv->pv_start);
 
 	if (error != 0) {
 		G_LLVM_DEBUG(0, "cannot attach %s to %s",
 		    pp->name, vg->vg_name);
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL) {
 		if (fcp->provider->sectorsize != pp->sectorsize) {
 			G_LLVM_DEBUG(0, "Provider %s of %s has invalid "
 			    "sector size (%d)", pp->name, vg->vg_name,
 			    pp->sectorsize);
 			return (EINVAL);
 		}
 		if (fcp->acr > 0 || fcp->acw || fcp->ace > 0) {
 			/* Replicate access permissions from first "live"
 			 * consumer to the new one */
 			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 			if (error != 0) {
 				g_detach(cp);
 				g_destroy_consumer(cp);
 				return (error);
 			}
 		}
 	}
 
 	cp->private = pv;
 	pv->pv_gcons = cp;
 	pv->pv_gprov = pp;
 
 	LIST_FOREACH(lv, &vg->vg_lvs, lv_next) {
 		/* Find segments that map to this disk */
 		LIST_FOREACH(sg, &lv->lv_segs, sg_next) {
 			if (strcmp(sg->sg_pvname, pv->pv_name) == 0) {
 				/* avtivate the segment */
 				KASSERT(sg->sg_pv == NULL,
 				    ("segment already mapped"));
 				sg->sg_pvoffset =
 				    (off_t)sg->sg_pvstart * vg->vg_extentsize
 				    + pv->pv_start;
 				sg->sg_pv = pv;
 				lv->lv_sgactive++;
 
 				G_LLVM_DEBUG(2, "%s: %d to %d @ %s:%d"
 				    " offset %ju sector %ju",
 				    lv->lv_name, sg->sg_start, sg->sg_end,
 				    sg->sg_pvname, sg->sg_pvstart,
 				    sg->sg_pvoffset,
 				    sg->sg_pvoffset / vg->vg_sectorsize);
 			}
 		}
 		/* Activate any lvs waiting on this disk */
 		if (lv->lv_gprov == NULL && lv->lv_sgactive == lv->lv_sgcount) {
 			error = g_llvm_activate_lv(vg, lv);
 			if (error)
 				break;
 		}
 	}
 	return (error);
 }
 
 static void
 g_llvm_init(struct g_class *mp)
 {
 	LIST_INIT(&vg_list);
 }
 
 static void
 g_llvm_free_vg(struct g_llvm_vg *vg)
 {
 	struct g_llvm_pv *pv;
 	struct g_llvm_lv *lv;
 	struct g_llvm_segment *sg;
 
 	/* Free all the structures */
 	while ((pv = LIST_FIRST(&vg->vg_pvs)) != NULL) {
 		LIST_REMOVE(pv, pv_next);
 		free(pv, M_GLLVM);
 	}
 	while ((lv = LIST_FIRST(&vg->vg_lvs)) != NULL) {
 		while ((sg = LIST_FIRST(&lv->lv_segs)) != NULL) {
 			LIST_REMOVE(sg, sg_next);
 			free(sg, M_GLLVM);
 		}
 		LIST_REMOVE(lv, lv_next);
 		free(lv, M_GLLVM);
 	}
 	LIST_REMOVE(vg, vg_next);
 	free(vg, M_GLLVM);
 }
 
 static void
 g_llvm_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_llvm_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	struct g_llvm_label ll;
 	struct g_llvm_metadata md;
 	struct g_llvm_vg *vg;
 	int error;
 
 	bzero(&md, sizeof(md));
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	gp = g_new_geomf(mp, "linux_lvm:taste");
 	/* This orphan function should be never called. */
 	gp->orphan = g_llvm_taste_orphan;
 	cp = g_new_consumer(gp);
+	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error == 0) {
 		error = g_llvm_read_label(cp, &ll);
 		if (error == 0)
 			error = g_llvm_read_md(cp, &md, &ll);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 
 	vg = md.md_vg;
 	if (vg->vg_geom == NULL) {
 		/* new volume group */
 		gp = g_new_geomf(mp, "%s", vg->vg_name);
 		gp->start = g_llvm_start;
 		gp->spoiled = g_llvm_orphan;
 		gp->orphan = g_llvm_orphan;
 		gp->access = g_llvm_access;
 		vg->vg_sectorsize = pp->sectorsize;
 		vg->vg_extentsize *= vg->vg_sectorsize;
 		vg->vg_geom = gp;
 		gp->softc = vg;
 		G_LLVM_DEBUG(1, "Created volume %s, extent size %zuK",
 		    vg->vg_name, vg->vg_extentsize / 1024);
 	}
 
 	/* initialise this disk in the volume group */
 	g_llvm_add_disk(vg, pp, ll.ll_uuid);
 	return (vg->vg_geom);
 }
 
 static int
 g_llvm_destroy(struct g_llvm_vg *vg, int force)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	if (vg == NULL)
 		return (ENXIO);
 	gp = vg->vg_geom;
 
 	LIST_FOREACH(pp, &gp->provider, provider) {
 		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
 			G_LLVM_DEBUG(1, "Device %s is still open (r%dw%de%d)",
 			    pp->name, pp->acr, pp->acw, pp->ace);
 			if (!force)
 				return (EBUSY);
 		}
 	}
 
 	g_llvm_free_vg(gp->softc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static int
 g_llvm_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
     struct g_geom *gp)
 {
 	struct g_llvm_vg *vg;
 
 	vg = gp->softc;
 	return (g_llvm_destroy(vg, 0));
 }
 
 int
 g_llvm_read_label(struct g_consumer *cp, struct g_llvm_label *ll)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int i, error = 0;
 
 	g_topology_assert();
 
 	/* The LVM label is stored on the first four sectors */
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, 0, pp->sectorsize * 4, &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_LLVM_DEBUG(1, "Cannot read metadata from %s (error=%d)",
 		    pp->name, error);
 		return (error);
 	}
 
 	/* Search the four sectors for the LVM label. */
 	for (i = 0; i < 4; i++) {
 		error = llvm_label_decode(&buf[i * pp->sectorsize], ll, i);
 		if (error == 0)
 			break;	/* found it */
 	}
 	g_free(buf);
 	return (error);
 }
 
 int
 g_llvm_read_md(struct g_consumer *cp, struct g_llvm_metadata *md,
     struct g_llvm_label *ll)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 	int size;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, ll->ll_md_offset, pp->sectorsize, &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_LLVM_DEBUG(0, "Cannot read metadata from %s (error=%d)",
 		    cp->provider->name, error);
 		return (error);
 	}
 
 	error = llvm_md_decode(buf, md, ll);
 	g_free(buf);
 	if (error != 0) {
 		return (error);
 	}
 
 	G_LLVM_DEBUG(1, "reading LVM2 config @ %s:%ju", pp->name,
 		    ll->ll_md_offset + md->md_reloffset);
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* round up to the nearest sector */
 	size = md->md_relsize +
 	    (pp->sectorsize - md->md_relsize % pp->sectorsize);
 	buf = g_read_data(cp, ll->ll_md_offset + md->md_reloffset, size, &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_LLVM_DEBUG(0, "Cannot read LVM2 config from %s (error=%d)",
 		    pp->name, error);
 		return (error);
 	}
 	buf[md->md_relsize] = '\0';
 	G_LLVM_DEBUG(10, "LVM config:\n%s\n", buf);
 	error = llvm_textconf_decode(buf, md->md_relsize, md);
 	g_free(buf);
 
 	return (error);
 }
 
 static int
 llvm_label_decode(const u_char *data, struct g_llvm_label *ll, int sector)
 {
 	uint64_t off;
 	char *uuid;
 
 	/* Magic string */
 	if (bcmp("LABELONE", data , 8) != 0)
 		return (EINVAL);
 
 	/* We only support LVM2 text format */
 	if (bcmp("LVM2 001", data + 24, 8) != 0) {
 		G_LLVM_DEBUG(0, "Unsupported LVM format");
 		return (EINVAL);
 	}
 
 	ll->ll_sector = le64dec(data + 8);
 	ll->ll_crc = le32dec(data + 16);
 	ll->ll_offset = le32dec(data + 20);
 
 	if (ll->ll_sector != sector) {
 		G_LLVM_DEBUG(0, "Expected sector %ju, found at %d",
 		    ll->ll_sector, sector);
 		return (EINVAL);
 	}
 
 	off = ll->ll_offset;
 	/*
 	 * convert the binary uuid to string format, the format is
 	 * xxxxxx-xxxx-xxxx-xxxx-xxxx-xxxx-xxxxxx (6-4-4-4-4-4-6)
 	 */
 	uuid = ll->ll_uuid;
 	bcopy(data + off, uuid, 6);
 	off += 6;
 	uuid += 6;
 	*uuid++ = '-';
 	for (int i = 0; i < 5; i++) {
 		bcopy(data + off, uuid, 4);
 		off += 4;
 		uuid += 4;
 		*uuid++ = '-';
 	}
 	bcopy(data + off, uuid, 6);
 	off += 6;
 	uuid += 6;
 	*uuid++ = '\0';
 
 	ll->ll_size = le64dec(data + off);
 	off += 8;
 	ll->ll_pestart = le64dec(data + off);
 	off += 16;
 
 	/* Only one data section is supported */
 	if (le64dec(data + off) != 0) {
 		G_LLVM_DEBUG(0, "Only one data section supported");
 		return (EINVAL);
 	}
 
 	off += 16;
 	ll->ll_md_offset = le64dec(data + off);
 	off += 8;
 	ll->ll_md_size = le64dec(data + off);
 	off += 8;
 
 	G_LLVM_DEBUG(1, "LVM metadata: offset=%ju, size=%ju", ll->ll_md_offset,
 	    ll->ll_md_size);
 
 	/* Only one data section is supported */
 	if (le64dec(data + off) != 0) {
 		G_LLVM_DEBUG(0, "Only one metadata section supported");
 		return (EINVAL);
 	}
 
 	G_LLVM_DEBUG(2, "label uuid=%s", ll->ll_uuid);
 	G_LLVM_DEBUG(2, "sector=%ju, crc=%u, offset=%u, size=%ju, pestart=%ju",
 	    ll->ll_sector, ll->ll_crc, ll->ll_offset, ll->ll_size,
 	    ll->ll_pestart);
 
 	return (0);
 }
 
 static int
 llvm_md_decode(const u_char *data, struct g_llvm_metadata *md,
     struct g_llvm_label *ll)
 {
 	uint64_t off;
 	char magic[16];
 
 	off = 0;
 	md->md_csum = le32dec(data + off);
 	off += 4;
 	bcopy(data + off, magic, 16);
 	off += 16;
 	md->md_version = le32dec(data + off);
 	off += 4;
 	md->md_start = le64dec(data + off);
 	off += 8;
 	md->md_size = le64dec(data + off);
 	off += 8;
 
 	if (bcmp(G_LLVM_MAGIC, magic, 16) != 0) {
 		G_LLVM_DEBUG(0, "Incorrect md magic number");
 		return (EINVAL);
 	}
 	if (md->md_version != 1) {
 		G_LLVM_DEBUG(0, "Incorrect md version number (%u)",
 		    md->md_version);
 		return (EINVAL);
 	}
 	if (md->md_start != ll->ll_md_offset) {
 		G_LLVM_DEBUG(0, "Incorrect md offset (%ju)", md->md_start);
 		return (EINVAL);
 	}
 
 	/* Aparently only one is ever returned */
 	md->md_reloffset = le64dec(data + off);
 	off += 8;
 	md->md_relsize = le64dec(data + off);
 	off += 16;	/* XXX skipped checksum */
 
 	if (le64dec(data + off) != 0) {
 		G_LLVM_DEBUG(0, "Only one reloc supported");
 		return (EINVAL);
 	}
 
 	G_LLVM_DEBUG(3, "reloc: offset=%ju, size=%ju",
 	    md->md_reloffset, md->md_relsize);
 	G_LLVM_DEBUG(3, "md: version=%u, start=%ju, size=%ju",
 	    md->md_version, md->md_start, md->md_size);
 
 	return (0);
 }
 
 #define	GRAB_INT(key, tok1, tok2, v)					\
 	if (tok1 && tok2 && strncmp(tok1, key, sizeof(key)) == 0) {	\
 		v = strtol(tok2, &tok1, 10);				\
 		if (tok1 == tok2)					\
 			/* strtol did not eat any of the buffer */	\
 			goto bad;					\
 		continue;						\
 	}
 
 #define	GRAB_STR(key, tok1, tok2, v, len)				\
 	if (tok1 && tok2 && strncmp(tok1, key, sizeof(key)) == 0) {	\
 		strsep(&tok2, "\"");					\
 		if (tok2 == NULL)					\
 			continue;					\
 		tok1 = strsep(&tok2, "\"");				\
 		if (tok2 == NULL)					\
 			continue;					\
 		strncpy(v, tok1, len);					\
 		continue;						\
 	}
 
 #define	SPLIT(key, value, str)						\
 	key = strsep(&value, str);					\
 	/* strip trailing whitespace on the key */			\
 	for (char *t = key; *t != '\0'; t++)				\
 		if (isspace(*t)) {					\
 			*t = '\0';					\
 			break;						\
 		}
 
 static size_t 
 llvm_grab_name(char *name, const char *tok)
 {
 	size_t len;
 
 	len = 0;
 	if (tok == NULL)
 		return (0);
 	if (tok[0] == '-')
 		return (0);
 	if (strcmp(tok, ".") == 0 || strcmp(tok, "..") == 0)
 		return (0);
 	while (tok[len] && (isalpha(tok[len]) || isdigit(tok[len]) ||
 	    tok[len] == '.' || tok[len] == '_' || tok[len] == '-' ||
 	    tok[len] == '+') && len < G_LLVM_NAMELEN - 1)
 		len++;
 	bcopy(tok, name, len);
 	name[len] = '\0';
 	return (len);
 }
 
 static int
 llvm_textconf_decode(u_char *data, int buflen, struct g_llvm_metadata *md)
 {
 	struct g_llvm_vg	*vg;
 	char *buf = data;
 	char *tok, *v;
 	char name[G_LLVM_NAMELEN];
 	char uuid[G_LLVM_UUIDLEN];
 	size_t len;
 
 	if (buf == NULL || *buf == '\0')
 		return (EINVAL);
 
 	tok = strsep(&buf, "\n");
 	if (tok == NULL)
 		return (EINVAL);
 	len = llvm_grab_name(name, tok);
 	if (len == 0)
 		return (EINVAL);
 
 	/* check too see if the vg has already been loaded off another disk */
 	LIST_FOREACH(vg, &vg_list, vg_next) {
 		if (strcmp(vg->vg_name, name) == 0) {
 			uuid[0] = '\0';
 			/* grab the volume group uuid */
 			while ((tok = strsep(&buf, "\n")) != NULL) {
 				if (strstr(tok, "{"))
 					break;
 				if (strstr(tok, "=")) {
 					SPLIT(v, tok, "=");
 					GRAB_STR("id", v, tok, uuid,
 					    sizeof(uuid));
 				}
 			}
 			if (strcmp(vg->vg_uuid, uuid) == 0) {
 				/* existing vg */
 				md->md_vg = vg;
 				return (0);
 			}
 			/* XXX different volume group with name clash! */
 			G_LLVM_DEBUG(0,
 			    "%s already exists, volume group not loaded", name);
 			return (EINVAL);
 		}
 	}
 
 	vg = malloc(sizeof(*vg), M_GLLVM, M_NOWAIT|M_ZERO);
 	if (vg == NULL)
 		return (ENOMEM);
 
 	strncpy(vg->vg_name, name, sizeof(vg->vg_name));
 	LIST_INIT(&vg->vg_pvs);
 	LIST_INIT(&vg->vg_lvs);
 
 #define	VOL_FOREACH(func, tok, buf, p)					\
 	while ((tok = strsep(buf, "\n")) != NULL) {			\
 		if (strstr(tok, "{")) {					\
 			func(buf, tok, p);				\
 			continue;					\
 		}							\
 		if (strstr(tok, "}"))					\
 			break;						\
 	}
 
 	while ((tok = strsep(&buf, "\n")) != NULL) {
 		if (strcmp(tok, "physical_volumes {") == 0) {
 			VOL_FOREACH(llvm_textconf_decode_pv, tok, &buf, vg);
 			continue;
 		}
 		if (strcmp(tok, "logical_volumes {") == 0) {
 			VOL_FOREACH(llvm_textconf_decode_lv, tok, &buf, vg);
 			continue;
 		}
 		if (strstr(tok, "{")) {
 			G_LLVM_DEBUG(2, "unknown section %s", tok);
 			continue;
 		}
 
 		/* parse 'key = value' lines */
 		if (strstr(tok, "=")) {
 			SPLIT(v, tok, "=");
 			GRAB_STR("id", v, tok, vg->vg_uuid, sizeof(vg->vg_uuid));
 			GRAB_INT("extent_size", v, tok, vg->vg_extentsize);
 			continue;
 		}
 	}
 	/* basic checking */
 	if (vg->vg_extentsize == 0)
 		goto bad;
 
 	md->md_vg = vg;
 	LIST_INSERT_HEAD(&vg_list, vg, vg_next);
 	G_LLVM_DEBUG(3, "vg: name=%s uuid=%s", vg->vg_name, vg->vg_uuid);
 	return(0);
 
 bad:
 	g_llvm_free_vg(vg);
 	return (-1);
 }
 #undef	VOL_FOREACH
 
 static int
 llvm_textconf_decode_pv(char **buf, char *tok, struct g_llvm_vg *vg)
 {
 	struct g_llvm_pv	*pv;
 	char *v;
 	size_t len;
 
 	if (*buf == NULL || **buf == '\0')
 		return (EINVAL);
 
 	pv = malloc(sizeof(*pv), M_GLLVM, M_NOWAIT|M_ZERO);
 	if (pv == NULL)
 		return (ENOMEM);
 
 	pv->pv_vg = vg;
 	len = 0;
 	if (tok == NULL)
 		goto bad;
 	len = llvm_grab_name(pv->pv_name, tok);
 	if (len == 0)
 		goto bad;
 
 	while ((tok = strsep(buf, "\n")) != NULL) {
 		if (strstr(tok, "{"))
 			goto bad;
 
 		if (strstr(tok, "}"))
 			break;
 
 		/* parse 'key = value' lines */
 		if (strstr(tok, "=")) {
 			SPLIT(v, tok, "=");
 			GRAB_STR("id", v, tok, pv->pv_uuid, sizeof(pv->pv_uuid));
 			GRAB_INT("pe_start", v, tok, pv->pv_start);
 			GRAB_INT("pe_count", v, tok, pv->pv_count);
 			continue;
 		}
 	}
 	if (tok == NULL)
 		goto bad;
 	/* basic checking */
 	if (pv->pv_count == 0)
 		goto bad;
 
 	LIST_INSERT_HEAD(&vg->vg_pvs, pv, pv_next);
 	G_LLVM_DEBUG(3, "pv: name=%s uuid=%s", pv->pv_name, pv->pv_uuid);
 
 	return (0);
 bad:
 	free(pv, M_GLLVM);
 	return (-1);
 }
 
 static int
 llvm_textconf_decode_lv(char **buf, char *tok, struct g_llvm_vg *vg)
 {
 	struct g_llvm_lv	*lv;
 	struct g_llvm_segment *sg;
 	char *v;
 	size_t len;
 
 	if (*buf == NULL || **buf == '\0')
 		return (EINVAL);
 
 	lv = malloc(sizeof(*lv), M_GLLVM, M_NOWAIT|M_ZERO);
 	if (lv == NULL)
 		return (ENOMEM);
 
 	lv->lv_vg = vg;
 	LIST_INIT(&lv->lv_segs);
 
 	if (tok == NULL)
 		goto bad;
 	len = llvm_grab_name(lv->lv_name, tok);
 	if (len == 0)
 		goto bad;
 
 	while ((tok = strsep(buf, "\n")) != NULL) {
 		if (strstr(tok, "{")) {
 			if (strstr(tok, "segment")) {
 				llvm_textconf_decode_sg(buf, tok, lv);
 				continue;
 			} else
 				/* unexpected section */
 				goto bad;
 		}
 
 		if (strstr(tok, "}"))
 			break;
 
 		/* parse 'key = value' lines */
 		if (strstr(tok, "=")) {
 			SPLIT(v, tok, "=");
 			GRAB_STR("id", v, tok, lv->lv_uuid, sizeof(lv->lv_uuid));
 			GRAB_INT("segment_count", v, tok, lv->lv_sgcount);
 			continue;
 		}
 	}
 	if (tok == NULL)
 		goto bad;
 	if (lv->lv_sgcount == 0 || lv->lv_sgcount != lv->lv_numsegs)
 		/* zero or incomplete segment list */
 		goto bad;
 
 	/* Optimize for only one segment on the pv */
 	lv->lv_firstsg = LIST_FIRST(&lv->lv_segs);
 	LIST_INSERT_HEAD(&vg->vg_lvs, lv, lv_next);
 	G_LLVM_DEBUG(3, "lv: name=%s uuid=%s", lv->lv_name, lv->lv_uuid);
 
 	return (0);
 bad:
 	while ((sg = LIST_FIRST(&lv->lv_segs)) != NULL) {
 		LIST_REMOVE(sg, sg_next);
 		free(sg, M_GLLVM);
 	}
 	free(lv, M_GLLVM);
 	return (-1);
 }
 
 static int
 llvm_textconf_decode_sg(char **buf, char *tok, struct g_llvm_lv *lv)
 {
 	struct g_llvm_segment *sg;
 	char *v;
 	int count = 0;
 
 	if (*buf == NULL || **buf == '\0')
 		return (EINVAL);
 
 	sg = malloc(sizeof(*sg), M_GLLVM, M_NOWAIT|M_ZERO);
 	if (sg == NULL)
 		return (ENOMEM);
 
 	while ((tok = strsep(buf, "\n")) != NULL) {
 		/* only a single linear stripe is supported */
 		if (strstr(tok, "stripe_count")) {
 			SPLIT(v, tok, "=");
 			GRAB_INT("stripe_count", v, tok, count);
 			if (count != 1)
 				goto bad;
 		}
 
 		if (strstr(tok, "{"))
 			goto bad;
 
 		if (strstr(tok, "}"))
 			break;
 
 		if (strcmp(tok, "stripes = [") == 0) {
 			tok = strsep(buf, "\n");
 			if (tok == NULL)
 				goto bad;
 
 			strsep(&tok, "\"");
 			if (tok == NULL)
 				goto bad;	/* missing open quotes */
 			v = strsep(&tok, "\"");
 			if (tok == NULL)
 				goto bad;	/* missing close quotes */
 			strncpy(sg->sg_pvname, v, sizeof(sg->sg_pvname));
 			if (*tok != ',')
 				goto bad;	/* missing comma for stripe */
 			tok++;
 
 			sg->sg_pvstart = strtol(tok, &v, 10);
 			if (v == tok)
 				/* strtol did not eat any of the buffer */
 				goto bad;
 
 			continue;
 		}
 
 		/* parse 'key = value' lines */
 		if (strstr(tok, "=")) {
 			SPLIT(v, tok, "=");
 			GRAB_INT("start_extent", v, tok, sg->sg_start);
 			GRAB_INT("extent_count", v, tok, sg->sg_count);
 			continue;
 		}
 	}
 	if (tok == NULL)
 		goto bad;
 	/* basic checking */
 	if (count != 1 || sg->sg_count == 0)
 		goto bad;
 
 	sg->sg_end = sg->sg_start + sg->sg_count - 1;
 	lv->lv_numsegs++;
 	lv->lv_extentcount += sg->sg_count;
 	LIST_INSERT_HEAD(&lv->lv_segs, sg, sg_next);
 
 	return (0);
 bad:
 	free(sg, M_GLLVM);
 	return (-1);
 }
 #undef	GRAB_INT
 #undef	GRAB_STR
 #undef	SPLIT
 
 static struct g_class g_llvm_class = {
 	.name = G_LLVM_CLASS_NAME,
 	.version = G_VERSION,
 	.init = g_llvm_init,
 	.taste = g_llvm_taste,
 	.destroy_geom = g_llvm_destroy_geom
 };
 
 DECLARE_GEOM_CLASS(g_llvm_class, g_linux_lvm);
 MODULE_VERSION(geom_linux_lvm, 0);
diff --git a/sys/geom/mirror/g_mirror.c b/sys/geom/mirror/g_mirror.c
index c0641d15673e..455aae4bebf8 100644
--- a/sys/geom/mirror/g_mirror.c
+++ b/sys/geom/mirror/g_mirror.c
@@ -1,3600 +1,3601 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/eventhandler.h>
 #include <sys/fail.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #include <geom/geom.h>
 #include <geom/geom_dbg.h>
 #include <geom/mirror/g_mirror.h>
 
 FEATURE(geom_mirror, "GEOM mirroring support");
 
 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_MIRROR stuff");
 int g_mirror_debug = 0;
 SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
     "Debug level");
 bool g_launch_mirror_before_timeout = true;
 SYSCTL_BOOL(_kern_geom_mirror, OID_AUTO, launch_mirror_before_timeout,
     CTLFLAG_RWTUN, &g_launch_mirror_before_timeout, 0,
     "If false, force gmirror to wait out the full kern.geom.mirror.timeout "
     "before launching mirrors");
 static u_int g_mirror_timeout = 4;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
     0, "Time to wait on all mirror components");
 static u_int g_mirror_idletime = 5;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
     &g_mirror_idletime, 0, "Mark components as clean when idling");
 static u_int g_mirror_disconnect_on_failure = 1;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
     &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
 static u_int g_mirror_syncreqs = 2;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
     &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
 static u_int g_mirror_sync_period = 5;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
     &g_mirror_sync_period, 0,
     "Metadata update period during synchronization, in seconds");
 
 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
 } while (0)
 
 static eventhandler_tag g_mirror_post_sync = NULL;
 static int g_mirror_shutdown = 0;
 
 static g_ctl_destroy_geom_t g_mirror_destroy_geom;
 static g_taste_t g_mirror_taste;
 static g_init_t g_mirror_init;
 static g_fini_t g_mirror_fini;
 static g_provgone_t g_mirror_providergone;
 static g_resize_t g_mirror_resize;
 
 struct g_class g_mirror_class = {
 	.name = G_MIRROR_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_mirror_config,
 	.taste = g_mirror_taste,
 	.destroy_geom = g_mirror_destroy_geom,
 	.init = g_mirror_init,
 	.fini = g_mirror_fini,
 	.providergone = g_mirror_providergone,
 	.resize = g_mirror_resize
 };
 
 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
 static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 static void g_mirror_timeout_drain(struct g_mirror_softc *sc);
 static int g_mirror_refresh_device(struct g_mirror_softc *sc,
     const struct g_provider *pp, const struct g_mirror_metadata *md);
 static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
     struct bio *bp, off_t offset);
 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
 static void g_mirror_register_request(struct g_mirror_softc *sc,
     struct bio *bp);
 static void g_mirror_sync_release(struct g_mirror_softc *sc);
 
 static const char *
 g_mirror_disk_state2str(int state)
 {
 
 	switch (state) {
 	case G_MIRROR_DISK_STATE_NONE:
 		return ("NONE");
 	case G_MIRROR_DISK_STATE_NEW:
 		return ("NEW");
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		return ("ACTIVE");
 	case G_MIRROR_DISK_STATE_STALE:
 		return ("STALE");
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		return ("SYNCHRONIZING");
 	case G_MIRROR_DISK_STATE_DISCONNECTED:
 		return ("DISCONNECTED");
 	case G_MIRROR_DISK_STATE_DESTROY:
 		return ("DESTROY");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_mirror_device_state2str(int state)
 {
 
 	switch (state) {
 	case G_MIRROR_DEVICE_STATE_STARTING:
 		return ("STARTING");
 	case G_MIRROR_DEVICE_STATE_RUNNING:
 		return ("RUNNING");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_mirror_get_diskname(struct g_mirror_disk *disk)
 {
 
 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
 		return ("[unknown]");
 	return (disk->d_name);
 }
 
 /*
  * --- Events handling functions ---
  * Events in geom_mirror are used to maintain disks and device status
  * from one thread to simplify locking.
  */
 static void
 g_mirror_event_free(struct g_mirror_event *ep)
 {
 
 	free(ep, M_MIRROR);
 }
 
 static int
 g_mirror_event_dispatch(struct g_mirror_event *ep, void *arg, int state,
     int flags)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	int error;
 
 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
 		disk = NULL;
 		sc = arg;
 	} else {
 		disk = arg;
 		sc = disk->d_softc;
 	}
 	ep->e_disk = disk;
 	ep->e_state = state;
 	ep->e_flags = flags;
 	ep->e_error = 0;
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 		return (0);
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
 	sx_xunlock(&sc->sc_lock);
 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
 		mtx_lock(&sc->sc_events_mtx);
 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
 		    hz * 5);
 	}
 	error = ep->e_error;
 	g_mirror_event_free(ep);
 	sx_xlock(&sc->sc_lock);
 	return (error);
 }
 
 int
 g_mirror_event_send(void *arg, int state, int flags)
 {
 	struct g_mirror_event *ep;
 
 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
 	return (g_mirror_event_dispatch(ep, arg, state, flags));
 }
 
 static struct g_mirror_event *
 g_mirror_event_first(struct g_mirror_softc *sc)
 {
 	struct g_mirror_event *ep;
 
 	mtx_lock(&sc->sc_events_mtx);
 	ep = TAILQ_FIRST(&sc->sc_events);
 	mtx_unlock(&sc->sc_events_mtx);
 	return (ep);
 }
 
 static void
 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
 {
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 static void
 g_mirror_event_cancel(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_event *ep, *tmpep;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
 			continue;
 		if (ep->e_disk != disk)
 			continue;
 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 			g_mirror_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			wakeup(ep);
 		}
 	}
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 /*
  * Return the number of disks in given state.
  * If state is equal to -1, count all connected disks.
  */
 u_int
 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
 {
 	struct g_mirror_disk *disk;
 	u_int n = 0;
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (state == -1 || disk->d_state == state)
 			n++;
 	}
 	return (n);
 }
 
 /*
  * Find a disk in mirror by its disk ID.
  */
 static struct g_mirror_disk *
 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
 {
 	struct g_mirror_disk *disk;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_id == id)
 			return (disk);
 	}
 	return (NULL);
 }
 
 static u_int
 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 	struct bio *bp;
 	u_int nreqs = 0;
 
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
 		if (bp->bio_from == cp)
 			nreqs++;
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	return (nreqs);
 }
 
 static int
 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 
 	if (cp->index > 0) {
 		G_MIRROR_DEBUG(2,
 		    "I/O requests for %s exist, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	if (g_mirror_nrequests(sc, cp) > 0) {
 		G_MIRROR_DEBUG(2,
 		    "I/O requests for %s in queue, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	return (0);
 }
 
 static void
 g_mirror_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	cp = arg;
 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	int retaste_wait;
 
 	g_topology_assert();
 
 	cp->private = NULL;
 	if (g_mirror_is_busy(sc, cp))
 		return;
 	pp = cp->provider;
 	retaste_wait = 0;
 	if (cp->acw == 1) {
 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
 			retaste_wait = 1;
 	}
 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
 	    -cp->acw, -cp->ace, 0);
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	if (retaste_wait) {
 		/*
 		 * After retaste event was send (inside g_access()), we can send
 		 * event to detach and destroy consumer.
 		 * A class, which has consumer to the given provider connected
 		 * will not receive retaste event for the provider.
 		 * This is the way how I ignore retaste events when I close
 		 * consumers opened for write: I detach and destroy consumer
 		 * after retaste event is sent.
 		 */
 		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
 		return;
 	}
 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static int
 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
 {
 	struct g_consumer *cp;
 	int error;
 
 	g_topology_assert_not();
 	KASSERT(disk->d_consumer == NULL,
 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
 
 	g_topology_lock();
 	cp = g_new_consumer(disk->d_softc->sc_geom);
 	cp->flags |= G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		return (error);
 	}
 	error = g_access(cp, 1, 1, 1);
 	if (error != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
 		    pp->name, error);
 		return (error);
 	}
 	g_topology_unlock();
 	disk->d_consumer = cp;
 	disk->d_consumer->private = disk;
 	disk->d_consumer->index = 0;
 
 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
 	return (0);
 }
 
 static void
 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 
 	g_topology_assert();
 
 	if (cp == NULL)
 		return;
 	if (cp->provider != NULL)
 		g_mirror_kill_consumer(sc, cp);
 	else
 		g_destroy_consumer(cp);
 }
 
 /*
  * Initialize disk. This means allocate memory, create consumer, attach it
  * to the provider and open access (r1w1e1) to it.
  */
 static struct g_mirror_disk *
 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md, int *errorp)
 {
 	struct g_mirror_disk *disk;
 	int i, error;
 
 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
 	if (disk == NULL) {
 		error = ENOMEM;
 		goto fail;
 	}
 	disk->d_softc = sc;
 	error = g_mirror_connect_disk(disk, pp);
 	if (error != 0)
 		goto fail;
 	disk->d_id = md->md_did;
 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
 	disk->d_priority = md->md_priority;
 	disk->d_flags = md->md_dflags;
 	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
 	if (error == 0 && i != 0)
 		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
 	if (md->md_provider[0] != '\0')
 		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_sync.ds_offset = md->md_sync_offset;
 	disk->d_sync.ds_offset_done = md->md_sync_offset;
 	disk->d_sync.ds_update_ts = time_uptime;
 	disk->d_genid = md->md_genid;
 	disk->d_sync.ds_syncid = md->md_syncid;
 	disk->d_init_ndisks = md->md_all;
 	disk->d_init_slice = md->md_slice;
 	disk->d_init_balance = md->md_balance;
 	disk->d_init_mediasize = md->md_mediasize;
 	if (errorp != NULL)
 		*errorp = 0;
 	return (disk);
 fail:
 	if (errorp != NULL)
 		*errorp = error;
 	if (disk != NULL)
 		free(disk, M_MIRROR);
 	return (NULL);
 }
 
 static void
 g_mirror_destroy_disk(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	g_topology_lock();
 	LIST_REMOVE(disk, d_next);
 	g_topology_unlock();
 	g_mirror_event_cancel(disk);
 	if (sc->sc_hint == disk)
 		sc->sc_hint = NULL;
 	switch (disk->d_state) {
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		g_mirror_sync_stop(disk, 1);
 		/* FALLTHROUGH */
 	case G_MIRROR_DISK_STATE_NEW:
 	case G_MIRROR_DISK_STATE_STALE:
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		g_topology_lock();
 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
 		g_topology_unlock();
 		free(disk, M_MIRROR);
 		break;
 	default:
 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 	}
 }
 
 static void
 g_mirror_free_device(struct g_mirror_softc *sc)
 {
 
 	g_topology_assert();
 
 	mtx_destroy(&sc->sc_queue_mtx);
 	mtx_destroy(&sc->sc_events_mtx);
 	mtx_destroy(&sc->sc_done_mtx);
 	sx_destroy(&sc->sc_lock);
 	free(sc, M_MIRROR);
 }
 
 static void
 g_mirror_providergone(struct g_provider *pp)
 {
 	struct g_mirror_softc *sc = pp->private;
 
 	if ((--sc->sc_refcnt) == 0)
 		g_mirror_free_device(sc);
 }
 
 static void
 g_mirror_destroy_device(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct g_mirror_event *ep;
 	struct g_geom *gp;
 	struct g_consumer *cp, *tmpcp;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	gp = sc->sc_geom;
 	if (sc->sc_provider != NULL)
 		g_mirror_destroy_provider(sc);
 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
 	    disk = LIST_FIRST(&sc->sc_disks)) {
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 		g_mirror_destroy_disk(disk);
 	}
 	while ((ep = g_mirror_event_first(sc)) != NULL) {
 		g_mirror_event_remove(sc, ep);
 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 			g_mirror_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			ep->e_flags |= G_MIRROR_EVENT_DONE;
 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
 			mtx_lock(&sc->sc_events_mtx);
 			wakeup(ep);
 			mtx_unlock(&sc->sc_events_mtx);
 		}
 	}
 	g_mirror_timeout_drain(sc);
 
 	g_topology_lock();
 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
 		g_mirror_disconnect_consumer(sc, cp);
 	}
 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	sx_xunlock(&sc->sc_lock);
 	if ((--sc->sc_refcnt) == 0)
 		g_mirror_free_device(sc);
 	g_topology_unlock();
 }
 
 static void
 g_mirror_orphan(struct g_consumer *cp)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert();
 
 	disk = cp->private;
 	if (disk == NULL)
 		return;
 	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
 	    G_MIRROR_EVENT_DONTWAIT);
 }
 
 /*
  * Function should return the next active disk on the list.
  * It is possible that it will be the same disk as given.
  * If there are no active disks on list, NULL is returned.
  */
 static __inline struct g_mirror_disk *
 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
 {
 	struct g_mirror_disk *dp;
 
 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
 	    dp = LIST_NEXT(dp, d_next)) {
 		if (dp == NULL)
 			dp = LIST_FIRST(&sc->sc_disks);
 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
 			break;
 	}
 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 		return (NULL);
 	return (dp);
 }
 
 static struct g_mirror_disk *
 g_mirror_get_disk(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	if (sc->sc_hint == NULL) {
 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
 		if (sc->sc_hint == NULL)
 			return (NULL);
 	}
 	disk = sc->sc_hint;
 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
 		disk = g_mirror_find_next(sc, disk);
 		if (disk == NULL)
 			return (NULL);
 	}
 	sc->sc_hint = g_mirror_find_next(sc, disk);
 	return (disk);
 }
 
 static int
 g_mirror_write_metadata(struct g_mirror_disk *disk,
     struct g_mirror_metadata *md)
 {
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 	off_t offset, length;
 	u_char *sector;
 	int error = 0;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	cp = disk->d_consumer;
 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	length = cp->provider->sectorsize;
 	offset = cp->provider->mediasize - length;
 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
 	if (md != NULL &&
 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
 		/*
 		 * Handle the case, when the size of parent provider reduced.
 		 */
 		if (offset < md->md_mediasize)
 			error = ENOSPC;
 		else
 			mirror_metadata_encode(md, sector);
 	}
 	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
 	if (error == 0)
 		error = g_write_data(cp, offset, sector, length);
 	free(sector, M_MIRROR);
 	if (error != 0) {
 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
 			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
 			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_mirror_get_diskname(disk), sc->sc_name, error);
 		} else {
 			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_mirror_get_diskname(disk), sc->sc_name, error);
 		}
 		if (g_mirror_disconnect_on_failure &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 			g_mirror_event_send(disk,
 			    G_MIRROR_DISK_STATE_DISCONNECTED,
 			    G_MIRROR_EVENT_DONTWAIT);
 		}
 	}
 	return (error);
 }
 
 static int
 g_mirror_clear_metadata(struct g_mirror_disk *disk)
 {
 	int error;
 
 	g_topology_assert_not();
 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
 
 	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
 		return (0);
 	error = g_mirror_write_metadata(disk, NULL);
 	if (error == 0) {
 		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
 		    g_mirror_get_diskname(disk));
 	} else {
 		G_MIRROR_DEBUG(0,
 		    "Cannot clear metadata on disk %s (error=%d).",
 		    g_mirror_get_diskname(disk), error);
 	}
 	return (error);
 }
 
 void
 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
     struct g_mirror_metadata *md)
 {
 
 	bzero(md, sizeof(*md));
 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
 	md->md_version = G_MIRROR_VERSION;
 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
 	md->md_mid = sc->sc_id;
 	md->md_all = sc->sc_ndisks;
 	md->md_slice = sc->sc_slice;
 	md->md_balance = sc->sc_balance;
 	md->md_genid = sc->sc_genid;
 	md->md_mediasize = sc->sc_mediasize;
 	md->md_sectorsize = sc->sc_sectorsize;
 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
 	if (disk == NULL) {
 		md->md_did = arc4random();
 	} else {
 		md->md_did = disk->d_id;
 		md->md_priority = disk->d_priority;
 		md->md_syncid = disk->d_sync.ds_syncid;
 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			md->md_sync_offset = disk->d_sync.ds_offset_done;
 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
 			strlcpy(md->md_provider,
 			    disk->d_consumer->provider->name,
 			    sizeof(md->md_provider));
 		}
 		md->md_provsize = disk->d_consumer->provider->mediasize;
 	}
 }
 
 void
 g_mirror_update_metadata(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_metadata md;
 	int error;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
 		return;
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
 		g_mirror_fill_metadata(sc, disk, &md);
 	error = g_mirror_write_metadata(disk, &md);
 	if (error == 0) {
 		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
 		    g_mirror_get_diskname(disk));
 	} else {
 		G_MIRROR_DEBUG(0,
 		    "Cannot update metadata on disk %s (error=%d).",
 		    g_mirror_get_diskname(disk), error);
 	}
 }
 
 static void
 g_mirror_bump_syncid(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_syncid++;
 	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
 	    sc->sc_syncid);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_sync.ds_syncid = sc->sc_syncid;
 			g_mirror_update_metadata(disk);
 		}
 	}
 }
 
 static void
 g_mirror_bump_genid(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_genid++;
 	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
 	    sc->sc_genid);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_genid = sc->sc_genid;
 			g_mirror_update_metadata(disk);
 		}
 	}
 }
 
 static int
 g_mirror_idle(struct g_mirror_softc *sc, int acw)
 {
 	struct g_mirror_disk *disk;
 	int timeout;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (sc->sc_provider == NULL)
 		return (0);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return (0);
 	if (sc->sc_idle)
 		return (0);
 	if (sc->sc_writes > 0)
 		return (0);
 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
 		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
 		if (!g_mirror_shutdown && timeout > 0)
 			return (timeout);
 	}
 	sc->sc_idle = 1;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_mirror_unidle(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	sc->sc_idle = 0;
 	sc->sc_last_write = time_uptime;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 	}
 }
 
 static void
 g_mirror_done(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 }
 
 static void
 g_mirror_regular_request_error(struct g_mirror_softc *sc,
     struct g_mirror_disk *disk, struct bio *bp)
 {
 
 	if ((bp->bio_cmd == BIO_FLUSH || bp->bio_cmd == BIO_SPEEDUP) &&
 	    bp->bio_error == EOPNOTSUPP)
 		return;
 
 	if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
 		disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
 		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
 		    bp->bio_error);
 	} else {
 		G_MIRROR_LOGREQ(1, bp, "Request failed (error=%d).",
 		    bp->bio_error);
 	}
 	if (g_mirror_disconnect_on_failure &&
 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
 		if (bp->bio_error == ENXIO &&
 		    bp->bio_cmd == BIO_READ)
 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		else if (bp->bio_error == ENXIO)
 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
 		else
 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
 		    G_MIRROR_EVENT_DONTWAIT);
 	}
 }
 
 static void
 g_mirror_regular_request(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct bio *pbp;
 
 	g_topology_assert_not();
 	KASSERT(sc->sc_provider == bp->bio_parent->bio_to,
 	    ("regular request %p with unexpected origin", bp));
 
 	pbp = bp->bio_parent;
 	bp->bio_from->index--;
 	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
 		sc->sc_writes--;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		g_topology_lock();
 		g_mirror_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 	}
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
 		    bp->bio_error);
 		break;
 	case BIO_WRITE:
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
 		    bp->bio_error);
 		break;
 	case BIO_DELETE:
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_delete,
 		    bp->bio_error);
 		break;
 	case BIO_FLUSH:
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_flush,
 		    bp->bio_error);
 		break;
 	case BIO_SPEEDUP:
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_speedup,
 		    bp->bio_error);
 		break;
 	}
 
 	pbp->bio_inbed++;
 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
 	    pbp->bio_children));
 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
 		g_destroy_bio(bp);
 		if (pbp->bio_children == pbp->bio_inbed) {
 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
 			pbp->bio_completed = pbp->bio_length;
 			if (pbp->bio_cmd == BIO_WRITE ||
 			    pbp->bio_cmd == BIO_DELETE) {
 				TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
 				/* Release delayed sync requests if possible. */
 				g_mirror_sync_release(sc);
 			}
 			g_io_deliver(pbp, pbp->bio_error);
 		}
 		return;
 	} else if (bp->bio_error != 0) {
 		if (pbp->bio_error == 0)
 			pbp->bio_error = bp->bio_error;
 		if (disk != NULL)
 			g_mirror_regular_request_error(sc, disk, bp);
 		switch (pbp->bio_cmd) {
 		case BIO_DELETE:
 		case BIO_WRITE:
 		case BIO_FLUSH:
 		case BIO_SPEEDUP:
 			pbp->bio_inbed--;
 			pbp->bio_children--;
 			break;
 		}
 	}
 	g_destroy_bio(bp);
 
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if (pbp->bio_inbed < pbp->bio_children)
 			break;
 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
 			g_io_deliver(pbp, pbp->bio_error);
 		else {
 			pbp->bio_error = 0;
 			mtx_lock(&sc->sc_queue_mtx);
 			TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
 			mtx_unlock(&sc->sc_queue_mtx);
 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 			wakeup(sc);
 		}
 		break;
 	case BIO_DELETE:
 	case BIO_WRITE:
 	case BIO_FLUSH:
 	case BIO_SPEEDUP:
 		if (pbp->bio_children == 0) {
 			/*
 			 * All requests failed.
 			 */
 		} else if (pbp->bio_inbed < pbp->bio_children) {
 			/* Do nothing. */
 			break;
 		} else if (pbp->bio_children == pbp->bio_inbed) {
 			/* Some requests succeeded. */
 			pbp->bio_error = 0;
 			pbp->bio_completed = pbp->bio_length;
 		}
 		if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) {
 			TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
 			/* Release delayed sync requests if possible. */
 			g_mirror_sync_release(sc);
 		}
 		g_io_deliver(pbp, pbp->bio_error);
 		break;
 	default:
 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
 		break;
 	}
 }
 
 static void
 g_mirror_sync_done(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 }
 
 static void
 g_mirror_candelete(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	int val;
 
 	sc = bp->bio_to->private;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
 			break;
 	}
 	val = disk != NULL;
 	g_handleattr(bp, "GEOM::candelete", &val, sizeof(val));
 }
 
 static void
 g_mirror_kernel_dump(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	struct bio *cbp;
 	struct g_kerneldump *gkd;
 
 	/*
 	 * We configure dumping to the first component, because this component
 	 * will be used for reading with 'prefer' balance algorithm.
 	 * If the component with the highest priority is currently disconnected
 	 * we will not be able to read the dump after the reboot if it will be
 	 * connected and synchronized later. Can we do something better?
 	 */
 	sc = bp->bio_to->private;
 	disk = LIST_FIRST(&sc->sc_disks);
 
 	gkd = (struct g_kerneldump *)bp->bio_data;
 	if (gkd->length > bp->bio_to->mediasize)
 		gkd->length = bp->bio_to->mediasize;
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_std_done;
 	g_io_request(cbp, disk->d_consumer);
 	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
 	    g_mirror_get_diskname(disk));
 }
 
 static void
 g_mirror_start(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	sc = bp->bio_to->private;
 	/*
 	 * If sc == NULL or there are no valid disks, provider's error
 	 * should be set and g_mirror_start() should not be called at all.
 	 */
 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 	    ("Provider's error should be set (error=%d)(mirror=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_MIRROR_LOGREQ(3, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 	case BIO_SPEEDUP:
 	case BIO_FLUSH:
 		break;
 	case BIO_GETATTR:
 		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
 			g_mirror_candelete(bp);
 			return;
 		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
 			g_mirror_kernel_dump(bp);
 			return;
 		}
 		/* FALLTHROUGH */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	if (bp->bio_to->error != 0) {
 		mtx_unlock(&sc->sc_queue_mtx);
 		g_io_deliver(bp, bp->bio_to->error);
 		return;
 	}
 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	wakeup(sc);
 }
 
 /*
  * Return TRUE if the given request is colliding with a in-progress
  * synchronization request.
  */
 static bool
 g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct bio *sbp;
 	off_t rstart, rend, sstart, send;
 	u_int i;
 
 	if (sc->sc_sync.ds_ndisks == 0)
 		return (false);
 	rstart = bp->bio_offset;
 	rend = bp->bio_offset + bp->bio_length;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			continue;
 		for (i = 0; i < g_mirror_syncreqs; i++) {
 			sbp = disk->d_sync.ds_bios[i];
 			if (sbp == NULL)
 				continue;
 			sstart = sbp->bio_offset;
 			send = sbp->bio_offset + sbp->bio_length;
 			if (rend > sstart && rstart < send)
 				return (true);
 		}
 	}
 	return (false);
 }
 
 /*
  * Return TRUE if the given sync request is colliding with a in-progress regular
  * request.
  */
 static bool
 g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
 {
 	off_t rstart, rend, sstart, send;
 	struct bio *bp;
 
 	if (sc->sc_sync.ds_ndisks == 0)
 		return (false);
 	sstart = sbp->bio_offset;
 	send = sbp->bio_offset + sbp->bio_length;
 	TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
 		rstart = bp->bio_offset;
 		rend = bp->bio_offset + bp->bio_length;
 		if (rend > sstart && rstart < send)
 			return (true);
 	}
 	return (false);
 }
 
 /*
  * Puts regular request onto delayed queue.
  */
 static void
 g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
 {
 
 	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
 	TAILQ_INSERT_TAIL(&sc->sc_regular_delayed, bp, bio_queue);
 }
 
 /*
  * Puts synchronization request onto delayed queue.
  */
 static void
 g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
 {
 
 	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
 	TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
 }
 
 /*
  * Requeue delayed regular requests.
  */
 static void
 g_mirror_regular_release(struct g_mirror_softc *sc)
 {
 	struct bio *bp;
 
 	if ((bp = TAILQ_FIRST(&sc->sc_regular_delayed)) == NULL)
 		return;
 	if (g_mirror_sync_collision(sc, bp))
 		return;
 
 	G_MIRROR_DEBUG(2, "Requeuing regular requests after collision.");
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_CONCAT(&sc->sc_regular_delayed, &sc->sc_queue, bio_queue);
 	TAILQ_SWAP(&sc->sc_regular_delayed, &sc->sc_queue, bio, bio_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 }
 
 /*
  * Releases delayed sync requests which don't collide anymore with regular
  * requests.
  */
 static void
 g_mirror_sync_release(struct g_mirror_softc *sc)
 {
 	struct bio *bp, *bp2;
 
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
 		if (g_mirror_regular_collision(sc, bp))
 			continue;
 		TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
 		G_MIRROR_LOGREQ(2, bp,
 		    "Releasing delayed synchronization request.");
 		g_io_request(bp, bp->bio_from);
 	}
 }
 
 /*
  * Free a synchronization request and clear its slot in the array.
  */
 static void
 g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
 {
 	int idx;
 
 	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
 		idx = (int)(uintptr_t)bp->bio_caller1;
 		KASSERT(disk->d_sync.ds_bios[idx] == bp,
 		    ("unexpected sync BIO at %p:%d", disk, idx));
 		disk->d_sync.ds_bios[idx] = NULL;
 	}
 	free(bp->bio_data, M_MIRROR);
 	g_destroy_bio(bp);
 }
 
 /*
  * Handle synchronization requests.
  * Every synchronization request is a two-step process: first, a read request is
  * sent to the mirror provider via the sync consumer. If that request completes
  * successfully, it is converted to a write and sent to the disk being
  * synchronized. If the write also completes successfully, the synchronization
  * offset is advanced and a new read request is submitted.
  */
 static void
 g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct g_mirror_disk_sync *sync;
 
 	KASSERT((bp->bio_cmd == BIO_READ &&
 	    bp->bio_from->geom == sc->sc_sync.ds_geom) ||
 	    (bp->bio_cmd == BIO_WRITE && bp->bio_from->geom == sc->sc_geom),
 	    ("Sync BIO %p with unexpected origin", bp));
 
 	bp->bio_from->index--;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 		g_topology_lock();
 		g_mirror_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 		g_mirror_sync_request_free(NULL, bp);
 		sx_xlock(&sc->sc_lock);
 		return;
 	}
 
 	sync = &disk->d_sync;
 
 	/*
 	 * Synchronization request.
 	 */
 	switch (bp->bio_cmd) {
 	case BIO_READ: {
 		struct g_consumer *cp;
 
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
 		    bp->bio_error);
 
 		if (bp->bio_error != 0) {
 			G_MIRROR_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 
 			/*
 			 * The read error will trigger a syncid bump, so there's
 			 * no need to do that here.
 			 *
 			 * The read error handling for regular requests will
 			 * retry the read from all active mirrors before passing
 			 * the error back up, so there's no need to retry here.
 			 */
 			g_mirror_sync_request_free(disk, bp);
 			g_mirror_event_send(disk,
 			    G_MIRROR_DISK_STATE_DISCONNECTED,
 			    G_MIRROR_EVENT_DONTWAIT);
 			return;
 		}
 		G_MIRROR_LOGREQ(3, bp,
 		    "Synchronization request half-finished.");
 		bp->bio_cmd = BIO_WRITE;
 		bp->bio_cflags = 0;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(bp, cp);
 		return;
 	}
 	case BIO_WRITE: {
 		off_t offset;
 		int i;
 
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
 		    bp->bio_error);
 
 		if (bp->bio_error != 0) {
 			G_MIRROR_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_mirror_sync_request_free(disk, bp);
 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 			g_mirror_event_send(disk,
 			    G_MIRROR_DISK_STATE_DISCONNECTED,
 			    G_MIRROR_EVENT_DONTWAIT);
 			return;
 		}
 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
 		if (sync->ds_offset >= sc->sc_mediasize ||
 		    sync->ds_consumer == NULL ||
 		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 			/* Don't send more synchronization requests. */
 			sync->ds_inflight--;
 			g_mirror_sync_request_free(disk, bp);
 			if (sync->ds_inflight > 0)
 				return;
 			if (sync->ds_consumer == NULL ||
 			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				return;
 			}
 			/* Disk up-to-date, activate it. */
 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
 			    G_MIRROR_EVENT_DONTWAIT);
 			return;
 		}
 
 		/* Send next synchronization request. */
 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
 		sync->ds_offset += bp->bio_length;
 
 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
 		sync->ds_consumer->index++;
 
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_mirror_regular_collision(sc, bp))
 			g_mirror_sync_delay(sc, bp);
 		else
 			g_io_request(bp, sync->ds_consumer);
 
 		/* Requeue delayed requests if possible. */
 		g_mirror_regular_release(sc);
 
 		/* Find the smallest offset */
 		offset = sc->sc_mediasize;
 		for (i = 0; i < g_mirror_syncreqs; i++) {
 			bp = sync->ds_bios[i];
 			if (bp != NULL && bp->bio_offset < offset)
 				offset = bp->bio_offset;
 		}
 		if (g_mirror_sync_period > 0 &&
 		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
 			sync->ds_offset_done = offset;
 			g_mirror_update_metadata(disk);
 			sync->ds_update_ts = time_uptime;
 		}
 		return;
 	}
 	default:
 		panic("Invalid I/O request %p", bp);
 	}
 }
 
 static void
 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
 			break;
 	}
 	if (disk == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENXIO;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	g_io_request(cbp, cp);
 }
 
 static void
 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	disk = g_mirror_get_disk(sc);
 	if (disk == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENXIO;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	g_io_request(cbp, cp);
 }
 
 #define TRACK_SIZE  (1 * 1024 * 1024)
 #define LOAD_SCALE	256
 #define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
 
 static void
 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk, *dp;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	int prio, best;
 
 	/* Find a disk with the smallest load. */
 	disk = NULL;
 	best = INT_MAX;
 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		prio = dp->load;
 		/* If disk head is precisely in position - highly prefer it. */
 		if (dp->d_last_offset == bp->bio_offset)
 			prio -= 2 * LOAD_SCALE;
 		else
 		/* If disk head is close to position - prefer it. */
 		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
 			prio -= 1 * LOAD_SCALE;
 		if (prio <= best) {
 			disk = dp;
 			best = prio;
 		}
 	}
 	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	/* Remember last head position */
 	disk->d_last_offset = bp->bio_offset + bp->bio_length;
 	/* Update loads. */
 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 		dp->load = (dp->d_consumer->index * LOAD_SCALE +
 		    dp->load * 7) / 8;
 	}
 	g_io_request(cbp, cp);
 }
 
 static void
 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct bio_queue queue;
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	off_t left, mod, offset, slice;
 	u_char *data;
 	u_int ndisks;
 
 	if (bp->bio_length <= sc->sc_slice) {
 		g_mirror_request_round_robin(sc, bp);
 		return;
 	}
 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
 	slice = bp->bio_length / ndisks;
 	mod = slice % sc->sc_provider->sectorsize;
 	if (mod != 0)
 		slice += sc->sc_provider->sectorsize - mod;
 	/*
 	 * Allocate all bios before sending any request, so we can
 	 * return ENOMEM in nice and clean way.
 	 */
 	left = bp->bio_length;
 	offset = bp->bio_offset;
 	data = bp->bio_data;
 	TAILQ_INIT(&queue);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 				TAILQ_REMOVE(&queue, cbp, bio_queue);
 				g_destroy_bio(cbp);
 			}
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 		cbp->bio_done = g_mirror_done;
 		cbp->bio_caller1 = disk;
 		cbp->bio_to = disk->d_consumer->provider;
 		cbp->bio_offset = offset;
 		cbp->bio_data = data;
 		cbp->bio_length = MIN(left, slice);
 		left -= cbp->bio_length;
 		if (left == 0)
 			break;
 		offset += cbp->bio_length;
 		data += cbp->bio_length;
 	}
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 		disk = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		disk->d_consumer->index++;
 		g_io_request(cbp, disk->d_consumer);
 	}
 }
 
 static void
 g_mirror_register_request(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct bio_queue queue;
 	struct bio *cbp;
 	struct g_consumer *cp;
 	struct g_mirror_disk *disk;
 
 	sx_assert(&sc->sc_lock, SA_XLOCKED);
 
 	/*
 	 * To avoid ordering issues, if a write is deferred because of a
 	 * collision with a sync request, all I/O is deferred until that
 	 * write is initiated.
 	 */
 	if (bp->bio_from->geom != sc->sc_sync.ds_geom &&
 	    !TAILQ_EMPTY(&sc->sc_regular_delayed)) {
 		g_mirror_regular_delay(sc, bp);
 		return;
 	}
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		switch (sc->sc_balance) {
 		case G_MIRROR_BALANCE_LOAD:
 			g_mirror_request_load(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_PREFER:
 			g_mirror_request_prefer(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_ROUND_ROBIN:
 			g_mirror_request_round_robin(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_SPLIT:
 			g_mirror_request_split(sc, bp);
 			break;
 		}
 		return;
 	case BIO_WRITE:
 	case BIO_DELETE:
 		/*
 		 * Delay the request if it is colliding with a synchronization
 		 * request.
 		 */
 		if (g_mirror_sync_collision(sc, bp)) {
 			g_mirror_regular_delay(sc, bp);
 			return;
 		}
 
 		if (sc->sc_idle)
 			g_mirror_unidle(sc);
 		else
 			sc->sc_last_write = time_uptime;
 
 		/*
 		 * Bump syncid on first write.
 		 */
 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
 			g_mirror_bump_syncid(sc);
 		}
 
 		/*
 		 * Allocate all bios before sending any request, so we can
 		 * return ENOMEM in nice and clean way.
 		 */
 		TAILQ_INIT(&queue);
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			switch (disk->d_state) {
 			case G_MIRROR_DISK_STATE_ACTIVE:
 				break;
 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 				if (bp->bio_offset >= disk->d_sync.ds_offset)
 					continue;
 				break;
 			default:
 				continue;
 			}
 			if (bp->bio_cmd == BIO_DELETE &&
 			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
 				continue;
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL) {
 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 					TAILQ_REMOVE(&queue, cbp, bio_queue);
 					g_destroy_bio(cbp);
 				}
 				if (bp->bio_error == 0)
 					bp->bio_error = ENOMEM;
 				g_io_deliver(bp, bp->bio_error);
 				return;
 			}
 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 			cbp->bio_done = g_mirror_done;
 			cp = disk->d_consumer;
 			cbp->bio_caller1 = cp;
 			cbp->bio_to = cp->provider;
 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 			    ("Consumer %s not opened (r%dw%de%d).",
 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
 		}
 		if (TAILQ_EMPTY(&queue)) {
 			KASSERT(bp->bio_cmd == BIO_DELETE,
 			    ("No consumers for regular request %p", bp));
 			g_io_deliver(bp, EOPNOTSUPP);
 			return;
 		}
 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 			TAILQ_REMOVE(&queue, cbp, bio_queue);
 			cp = cbp->bio_caller1;
 			cbp->bio_caller1 = NULL;
 			cp->index++;
 			sc->sc_writes++;
 			g_io_request(cbp, cp);
 		}
 		/*
 		 * Put request onto inflight queue, so we can check if new
 		 * synchronization requests don't collide with it.
 		 */
 		TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
 		return;
 	case BIO_SPEEDUP:
 	case BIO_FLUSH:
 		TAILQ_INIT(&queue);
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 				continue;
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL) {
 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 					TAILQ_REMOVE(&queue, cbp, bio_queue);
 					g_destroy_bio(cbp);
 				}
 				if (bp->bio_error == 0)
 					bp->bio_error = ENOMEM;
 				g_io_deliver(bp, bp->bio_error);
 				return;
 			}
 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 			cbp->bio_done = g_mirror_done;
 			cbp->bio_caller1 = disk;
 			cbp->bio_to = disk->d_consumer->provider;
 		}
 		KASSERT(!TAILQ_EMPTY(&queue),
 		    ("No consumers for regular request %p", bp));
 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 			TAILQ_REMOVE(&queue, cbp, bio_queue);
 			disk = cbp->bio_caller1;
 			cbp->bio_caller1 = NULL;
 			cp = disk->d_consumer;
 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 			    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 			    cp->acr, cp->acw, cp->ace));
 			cp->index++;
 			g_io_request(cbp, cp);
 		}
 		break;
 	default:
 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
 		    bp->bio_cmd, sc->sc_name));
 		break;
 	}
 }
 
 static int
 g_mirror_can_destroy(struct g_mirror_softc *sc)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	gp = sc->sc_geom;
 	if (gp->softc == NULL)
 		return (1);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
 		return (0);
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_mirror_is_busy(sc, cp))
 			return (0);
 	}
 	gp = sc->sc_sync.ds_geom;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_mirror_is_busy(sc, cp))
 			return (0);
 	}
 	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
 	    sc->sc_name);
 	return (1);
 }
 
 static int
 g_mirror_try_destroy(struct g_mirror_softc *sc)
 {
 
 	if (sc->sc_rootmount != NULL) {
 		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 		    sc->sc_rootmount);
 		root_mount_rel(sc->sc_rootmount);
 		sc->sc_rootmount = NULL;
 	}
 	g_topology_lock();
 	if (!g_mirror_can_destroy(sc)) {
 		g_topology_unlock();
 		return (0);
 	}
 	sc->sc_geom->softc = NULL;
 	sc->sc_sync.ds_geom->softc = NULL;
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
 		g_topology_unlock();
 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
 		    &sc->sc_worker);
 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
 		sx_xunlock(&sc->sc_lock);
 		wakeup(&sc->sc_worker);
 		sc->sc_worker = NULL;
 	} else {
 		g_topology_unlock();
 		g_mirror_destroy_device(sc);
 	}
 	return (1);
 }
 
 /*
  * Worker thread.
  */
 static void
 g_mirror_worker(void *arg)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_event *ep;
 	struct bio *bp;
 	int timeout;
 
 	sc = arg;
 	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
 	thread_unlock(curthread);
 
 	sx_xlock(&sc->sc_lock);
 	for (;;) {
 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
 		/*
 		 * First take a look at events.
 		 * This is important to handle events before any I/O requests.
 		 */
 		ep = g_mirror_event_first(sc);
 		if (ep != NULL) {
 			g_mirror_event_remove(sc, ep);
 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
 				/* Update only device status. */
 				G_MIRROR_DEBUG(3,
 				    "Running event for device %s.",
 				    sc->sc_name);
 				ep->e_error = 0;
 				g_mirror_update_device(sc, true);
 			} else {
 				/* Update disk status. */
 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
 				     g_mirror_get_diskname(ep->e_disk));
 				ep->e_error = g_mirror_update_disk(ep->e_disk,
 				    ep->e_state);
 				if (ep->e_error == 0)
 					g_mirror_update_device(sc, false);
 			}
 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
 				KASSERT(ep->e_error == 0,
 				    ("Error cannot be handled."));
 				g_mirror_event_free(ep);
 			} else {
 				ep->e_flags |= G_MIRROR_EVENT_DONE;
 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
 				    ep);
 				mtx_lock(&sc->sc_events_mtx);
 				wakeup(ep);
 				mtx_unlock(&sc->sc_events_mtx);
 			}
 			if ((sc->sc_flags &
 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				if (g_mirror_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_MIRROR_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 			}
 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
 			continue;
 		}
 
 		/*
 		 * Check if we can mark array as CLEAN and if we can't take
 		 * how much seconds should we wait.
 		 */
 		timeout = g_mirror_idle(sc, -1);
 
 		/*
 		 * Handle I/O requests.
 		 */
 		mtx_lock(&sc->sc_queue_mtx);
 		bp = TAILQ_FIRST(&sc->sc_queue);
 		if (bp != NULL)
 			TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
 		else {
 			if ((sc->sc_flags &
 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				if (g_mirror_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_MIRROR_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 				mtx_lock(&sc->sc_queue_mtx);
 				if (!TAILQ_EMPTY(&sc->sc_queue)) {
 					mtx_unlock(&sc->sc_queue_mtx);
 					continue;
 				}
 			}
 			if (g_mirror_event_first(sc) != NULL) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				continue;
 			}
 			sx_xunlock(&sc->sc_lock);
 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
 			    timeout * hz);
 			sx_xlock(&sc->sc_lock);
 			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
 			continue;
 		}
 		mtx_unlock(&sc->sc_queue_mtx);
 
 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
 		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
 			/*
 			 * Handle completion of the first half (the read) of a
 			 * block synchronization operation.
 			 */
 			g_mirror_sync_request(sc, bp);
 		} else if (bp->bio_to != sc->sc_provider) {
 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
 				/*
 				 * Handle completion of a regular I/O request.
 				 */
 				g_mirror_regular_request(sc, bp);
 			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
 				/*
 				 * Handle completion of the second half (the
 				 * write) of a block synchronization operation.
 				 */
 				g_mirror_sync_request(sc, bp);
 			else {
 				KASSERT(0,
 				    ("Invalid request cflags=0x%hx to=%s.",
 				    bp->bio_cflags, bp->bio_to->name));
 			}
 		} else {
 			/*
 			 * Initiate an I/O request.
 			 */
 			g_mirror_register_request(sc, bp);
 		}
 		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
 	}
 }
 
 static void
 g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
 {
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 	} else if (sc->sc_idle &&
 	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 	}
 }
 
 static void
 g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp,
     off_t offset)
 {
 	void *data;
 	int idx;
 
 	data = bp->bio_data;
 	idx = (int)(uintptr_t)bp->bio_caller1;
 	g_reset_bio(bp);
 
 	bp->bio_cmd = BIO_READ;
 	bp->bio_data = data;
 	bp->bio_done = g_mirror_sync_done;
 	bp->bio_from = disk->d_sync.ds_consumer;
 	bp->bio_to = disk->d_softc->sc_provider;
 	bp->bio_caller1 = (void *)(uintptr_t)idx;
 	bp->bio_offset = offset;
 	bp->bio_length = MIN(maxphys,
 	    disk->d_softc->sc_mediasize - bp->bio_offset);
 }
 
 static void
 g_mirror_sync_start(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk_sync *sync;
 	struct g_consumer *cp;
 	struct bio *bp;
 	int error, i;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sync = &disk->d_sync;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 	    ("Disk %s is not marked for synchronization.",
 	    g_mirror_get_diskname(disk)));
 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
 	    sc->sc_state));
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	cp = g_new_consumer(sc->sc_sync.ds_geom);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, sc->sc_provider);
 	KASSERT(error == 0,
 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
 	error = g_access(cp, 1, 0, 0);
 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
 	    g_mirror_get_diskname(disk));
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 	KASSERT(sync->ds_consumer == NULL,
 	    ("Sync consumer already exists (device=%s, disk=%s).",
 	    sc->sc_name, g_mirror_get_diskname(disk)));
 
 	sync->ds_consumer = cp;
 	sync->ds_consumer->private = disk;
 	sync->ds_consumer->index = 0;
 
 	/*
 	 * Allocate memory for synchronization bios and initialize them.
 	 */
 	sync->ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
 	    M_MIRROR, M_WAITOK);
 	for (i = 0; i < g_mirror_syncreqs; i++) {
 		bp = g_alloc_bio();
 		sync->ds_bios[i] = bp;
 
 		bp->bio_data = malloc(maxphys, M_MIRROR, M_WAITOK);
 		bp->bio_caller1 = (void *)(uintptr_t)i;
 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
 		sync->ds_offset += bp->bio_length;
 	}
 
 	/* Increase the number of disks in SYNCHRONIZING state. */
 	sc->sc_sync.ds_ndisks++;
 	/* Set the number of in-flight synchronization requests. */
 	sync->ds_inflight = g_mirror_syncreqs;
 
 	/*
 	 * Fire off first synchronization requests.
 	 */
 	for (i = 0; i < g_mirror_syncreqs; i++) {
 		bp = sync->ds_bios[i];
 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
 		sync->ds_consumer->index++;
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_mirror_regular_collision(sc, bp))
 			g_mirror_sync_delay(sc, bp);
 		else
 			g_io_request(bp, sync->ds_consumer);
 	}
 }
 
 /*
  * Stop synchronization process.
  * type: 0 - synchronization finished
  *       1 - synchronization stopped
  */
 static void
 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
 {
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 	    g_mirror_disk_state2str(disk->d_state)));
 	if (disk->d_sync.ds_consumer == NULL)
 		return;
 
 	if (type == 0) {
 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 	} else /* if (type == 1) */ {
 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 	}
 	g_mirror_regular_release(sc);
 	free(disk->d_sync.ds_bios, M_MIRROR);
 	disk->d_sync.ds_bios = NULL;
 	cp = disk->d_sync.ds_consumer;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 	sc->sc_sync.ds_ndisks--;
 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 	g_topology_lock();
 	g_mirror_kill_consumer(sc, cp);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 }
 
 static void
 g_mirror_launch_provider(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct g_provider *pp, *dp;
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	g_topology_lock();
 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
 	pp->flags |= G_PF_DIRECT_RECEIVE;
 	pp->mediasize = sc->sc_mediasize;
 	pp->sectorsize = sc->sc_sectorsize;
 	pp->stripesize = 0;
 	pp->stripeoffset = 0;
 
 	/* Splitting of unmapped BIO's could work but isn't implemented now */
 	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
 		pp->flags |= G_PF_ACCEPT_UNMAPPED;
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_consumer && disk->d_consumer->provider) {
 			dp = disk->d_consumer->provider;
 			if (dp->stripesize > pp->stripesize) {
 				pp->stripesize = dp->stripesize;
 				pp->stripeoffset = dp->stripeoffset;
 			}
 			/* A provider underneath us doesn't support unmapped */
 			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
 				G_MIRROR_DEBUG(0, "Cancelling unmapped "
 				    "because of %s.", dp->name);
 				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
 			}
 		}
 	}
 	pp->private = sc;
 	sc->sc_refcnt++;
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 	g_topology_unlock();
 	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			g_mirror_sync_start(disk);
 	}
 }
 
 static void
 g_mirror_destroy_provider(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct bio *bp;
 
 	g_topology_assert_not();
 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
 	    sc->sc_name));
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			g_mirror_sync_stop(disk, 1);
 	}
 
 	g_topology_lock();
 	g_error_provider(sc->sc_provider, ENXIO);
 	mtx_lock(&sc->sc_queue_mtx);
 	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
 		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
 		/*
 		 * Abort any pending I/O that wasn't generated by us.
 		 * Synchronization requests and requests destined for individual
 		 * mirror components can be destroyed immediately.
 		 */
 		if (bp->bio_to == sc->sc_provider &&
 		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
 			g_io_deliver(bp, ENXIO);
 		} else {
 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
 				free(bp->bio_data, M_MIRROR);
 			g_destroy_bio(bp);
 		}
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	g_wither_provider(sc->sc_provider, ENXIO);
 	sc->sc_provider = NULL;
 	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
 	g_topology_unlock();
 }
 
 static void
 g_mirror_go(void *arg)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_event *ep;
 
 	sc = arg;
 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
 	ep = sc->sc_timeout_event;
 	sc->sc_timeout_event = NULL;
 	g_mirror_event_dispatch(ep, sc, 0,
 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
 }
 
 static void
 g_mirror_timeout_drain(struct g_mirror_softc *sc)
 {
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	callout_drain(&sc->sc_callout);
 	g_mirror_event_free(sc->sc_timeout_event);
 	sc->sc_timeout_event = NULL;
 }
 
 static u_int
 g_mirror_determine_state(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	u_int state;
 
 	sc = disk->d_softc;
 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
 		if ((disk->d_flags &
 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
 		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
 		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
 			/* Disk does not need synchronization. */
 			state = G_MIRROR_DISK_STATE_ACTIVE;
 		} else {
 			if ((sc->sc_flags &
 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 			    (disk->d_flags &
 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
 				/*
 				 * We can start synchronization from
 				 * the stored offset.
 				 */
 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
 			} else {
 				state = G_MIRROR_DISK_STATE_STALE;
 			}
 		}
 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
 		/*
 		 * Reset all synchronization data for this disk,
 		 * because if it even was synchronized, it was
 		 * synchronized to disks with different syncid.
 		 */
 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		disk->d_sync.ds_syncid = sc->sc_syncid;
 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
 		} else {
 			state = G_MIRROR_DISK_STATE_STALE;
 		}
 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
 		/*
 		 * Not good, NOT GOOD!
 		 * It means that mirror was started on stale disks
 		 * and more fresh disk just arrive.
 		 * If there were writes, mirror is broken, sorry.
 		 * I think the best choice here is don't touch
 		 * this disk and inform the user loudly.
 		 */
 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
 		    "disk (%s) arrives!! It will not be connected to the "
 		    "running device.", sc->sc_name,
 		    g_mirror_get_diskname(disk));
 		g_mirror_destroy_disk(disk);
 		state = G_MIRROR_DISK_STATE_NONE;
 		/* Return immediately, because disk was destroyed. */
 		return (state);
 	}
 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
 	return (state);
 }
 
 /*
  * Update device state.
  */
 static void
 g_mirror_update_device(struct g_mirror_softc *sc, bool force)
 {
 	struct g_mirror_disk *disk;
 	u_int state;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	switch (sc->sc_state) {
 	case G_MIRROR_DEVICE_STATE_STARTING:
 	    {
 		struct g_mirror_disk *pdisk, *tdisk;
 		const char *mismatch;
 		uintmax_t found, newest;
 		u_int dirty, ndisks;
 
 		/* Pre-flight checks */
 		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
 			/*
 			 * Confirm we already detected the newest genid.
 			 */
 			KASSERT(sc->sc_genid >= disk->d_genid,
 			    ("%s: found newer genid %u (sc:%p had %u).", __func__,
 			    disk->d_genid, sc, sc->sc_genid));
 
 			/* Kick out any previously tasted stale components. */
 			if (disk->d_genid < sc->sc_genid) {
 				G_MIRROR_DEBUG(0, "Stale 'genid' field on %s "
 				    "(device %s) (component=%u latest=%u), skipping.",
 				    g_mirror_get_diskname(disk), sc->sc_name,
 				    disk->d_genid, sc->sc_genid);
 				g_mirror_destroy_disk(disk);
 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 				continue;
 			}
 
 			/*
 			 * Confirm we already detected the newest syncid.
 			 */
 			KASSERT(sc->sc_syncid >= disk->d_sync.ds_syncid,
 			    ("%s: found newer syncid %u (sc:%p had %u).",
 			     __func__, disk->d_sync.ds_syncid, sc,
 			     sc->sc_syncid));
 
 #define DETECT_MISMATCH(field, name) \
 			if (mismatch == NULL &&					\
 			    disk->d_init_ ## field != sc->sc_ ## field) {	\
 				mismatch = name;				\
 				found = (intmax_t)disk->d_init_ ## field;	\
 				newest = (intmax_t)sc->sc_ ## field;		\
 			}
 			mismatch = NULL;
 			DETECT_MISMATCH(ndisks, "md_all");
 			DETECT_MISMATCH(balance, "md_balance");
 			DETECT_MISMATCH(slice, "md_slice");
 			DETECT_MISMATCH(mediasize, "md_mediasize");
 #undef DETECT_MISMATCH
 			if (mismatch != NULL) {
 				G_MIRROR_DEBUG(0, "Found a mismatching '%s' "
 				    "field on %s (device %s) (found=%ju "
 				    "newest=%ju).", mismatch,
 				    g_mirror_get_diskname(disk), sc->sc_name,
 				    found, newest);
 				g_mirror_destroy_disk(disk);
 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 				continue;
 			}
 		}
 
 		KASSERT(sc->sc_provider == NULL,
 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
 		/*
 		 * Are we ready? If the timeout (force is true) has expired, and
 		 * any disks are present, then yes. If we're permitted to launch
 		 * before the timeout has expired and the expected number of
 		 * current-generation mirror disks have been tasted, then yes.
 		 */
 		ndisks = g_mirror_ndisks(sc, -1);
 		if ((force && ndisks > 0) ||
 		    (g_launch_mirror_before_timeout && ndisks == sc->sc_ndisks)) {
 			;
 		} else if (ndisks == 0) {
 			/*
 			 * Disks went down in starting phase, so destroy
 			 * device.
 			 */
 			g_mirror_timeout_drain(sc);
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 			    sc->sc_rootmount);
 			root_mount_rel(sc->sc_rootmount);
 			sc->sc_rootmount = NULL;
 			return;
 		} else {
 			return;
 		}
 
 		/*
 		 * Activate all disks with the biggest syncid.
 		 */
 		if (force) {
 			/*
 			 * If 'force' is true, we have been called due to
 			 * timeout, so don't bother canceling timeout.
 			 */
 			ndisks = 0;
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
 					ndisks++;
 				}
 			}
 			if (ndisks == 0) {
 				/* No valid disks found, destroy device. */
 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 				return;
 			}
 		} else {
 			/* Cancel timeout. */
 			g_mirror_timeout_drain(sc);
 		}
 
 		/*
 		 * Here we need to look for dirty disks and if all disks
 		 * with the biggest syncid are dirty, we have to choose
 		 * one with the biggest priority and rebuild the rest.
 		 */
 		/*
 		 * Find the number of dirty disks with the biggest syncid.
 		 * Find the number of disks with the biggest syncid.
 		 * While here, find a disk with the biggest priority.
 		 */
 		dirty = ndisks = 0;
 		pdisk = NULL;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_sync.ds_syncid != sc->sc_syncid)
 				continue;
 			if ((disk->d_flags &
 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 				continue;
 			}
 			ndisks++;
 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
 				dirty++;
 				if (pdisk == NULL ||
 				    pdisk->d_priority < disk->d_priority) {
 					pdisk = disk;
 				}
 			}
 		}
 		if (dirty == 0) {
 			/* No dirty disks at all, great. */
 		} else if (dirty == ndisks) {
 			/*
 			 * Force synchronization for all dirty disks except one
 			 * with the biggest priority.
 			 */
 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
 			    "master disk for synchronization.",
 			    g_mirror_get_diskname(pdisk), sc->sc_name);
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_sync.ds_syncid != sc->sc_syncid)
 					continue;
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 					continue;
 				}
 				KASSERT((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
 				    ("Disk %s isn't marked as dirty.",
 				    g_mirror_get_diskname(disk)));
 				/* Skip the disk with the biggest priority. */
 				if (disk == pdisk)
 					continue;
 				disk->d_sync.ds_syncid = 0;
 			}
 		} else if (dirty < ndisks) {
 			/*
 			 * Force synchronization for all dirty disks.
 			 * We have some non-dirty disks.
 			 */
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_sync.ds_syncid != sc->sc_syncid)
 					continue;
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 					continue;
 				}
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
 					continue;
 				}
 				disk->d_sync.ds_syncid = 0;
 			}
 		}
 
 		/* Reset hint. */
 		sc->sc_hint = NULL;
 		if (force) {
 			/* Remember to bump syncid on first write. */
 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		}
 		state = G_MIRROR_DEVICE_STATE_RUNNING;
 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_device_state2str(state));
 		sc->sc_state = state;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			state = g_mirror_determine_state(disk);
 			g_mirror_event_send(disk, state,
 			    G_MIRROR_EVENT_DONTWAIT);
 			if (state == G_MIRROR_DISK_STATE_STALE)
 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		}
 		break;
 	    }
 	case G_MIRROR_DEVICE_STATE_RUNNING:
 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
 			/*
 			 * No usable disks, so destroy the device.
 			 */
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 			break;
 		} else if (g_mirror_ndisks(sc,
 		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
 			/*
 			 * We have active disks, launch provider if it doesn't
 			 * exist.
 			 */
 			if (sc->sc_provider == NULL)
 				g_mirror_launch_provider(sc);
 			if (sc->sc_rootmount != NULL) {
 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 			}
 		}
 		/*
 		 * Genid should be bumped immediately, so do it here.
 		 */
 		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
 			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
 			g_mirror_bump_genid(sc);
 		}
 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
 			g_mirror_bump_syncid(sc);
 		}
 		break;
 	default:
 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
 		break;
 	}
 }
 
 /*
  * Update disk state and device state if needed.
  */
 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
 	"Disk %s state changed from %s to %s (device %s).",		\
 	g_mirror_get_diskname(disk),					\
 	g_mirror_disk_state2str(disk->d_state),				\
 	g_mirror_disk_state2str(state), sc->sc_name)
 static int
 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
 {
 	struct g_mirror_softc *sc;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 again:
 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
 	    g_mirror_disk_state2str(state));
 	switch (state) {
 	case G_MIRROR_DISK_STATE_NEW:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk arrive.
 		 */
 		/* Previous state should be NONE. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_state = state;
 		g_topology_lock();
 		if (LIST_EMPTY(&sc->sc_disks))
 			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
 		else {
 			struct g_mirror_disk *dp;
 
 			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 				if (disk->d_priority >= dp->d_priority) {
 					LIST_INSERT_BEFORE(dp, disk, d_next);
 					dp = NULL;
 					break;
 				}
 				if (LIST_NEXT(dp, d_next) == NULL)
 					break;
 			}
 			if (dp != NULL)
 				LIST_INSERT_AFTER(dp, disk, d_next);
 		}
 		g_topology_unlock();
 		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
 			break;
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		state = g_mirror_determine_state(disk);
 		if (state != G_MIRROR_DISK_STATE_NONE)
 			goto again;
 		break;
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk does not need synchronization.
 		 * 2. Synchronization process finished successfully.
 		 */
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		/* Previous state should be NEW or SYNCHRONIZING. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
 			g_mirror_sync_stop(disk, 0);
 		}
 		disk->d_state = state;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		g_mirror_update_idle(sc, disk);
 		g_mirror_update_metadata(disk);
 		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		break;
 	case G_MIRROR_DISK_STATE_STALE:
 		/*
 		 * Possible scenarios:
 		 * 1. Stale disk was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		/*
 		 * STALE state is only possible if device is marked
 		 * NOAUTOSYNC.
 		 */
 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		g_mirror_update_metadata(disk);
 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		break;
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		/*
 		 * Possible scenarios:
 		 * 1. Disk which needs synchronization was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		if (sc->sc_provider != NULL) {
 			g_mirror_sync_start(disk);
 			g_mirror_update_metadata(disk);
 		}
 		break;
 	case G_MIRROR_DISK_STATE_DISCONNECTED:
 		/*
 		 * Possible scenarios:
 		 * 1. Device wasn't running yet, but disk disappear.
 		 * 2. Disk was active and disapppear.
 		 * 3. Disk disappear during synchronization process.
 		 */
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
 			/*
 			 * Previous state should be ACTIVE, STALE or
 			 * SYNCHRONIZING.
 			 */
 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 			    ("Wrong disk state (%s, %s).",
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
 			/* Previous state should be NEW. */
 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 			    ("Wrong disk state (%s, %s).",
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 			/*
 			 * Reset bumping syncid if disk disappeared in STARTING
 			 * state.
 			 */
 			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
 				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
 #ifdef	INVARIANTS
 		} else {
 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
 			    sc->sc_name,
 			    g_mirror_device_state2str(sc->sc_state),
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 #endif
 		}
 		DISK_STATE_CHANGED();
 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 
 		g_mirror_destroy_disk(disk);
 		break;
 	case G_MIRROR_DISK_STATE_DESTROY:
 	    {
 		int error;
 
 		error = g_mirror_clear_metadata(disk);
 		if (error != 0) {
 			G_MIRROR_DEBUG(0,
 			    "Device %s: failed to clear metadata on %s: %d.",
 			    sc->sc_name, g_mirror_get_diskname(disk), error);
 			break;
 		}
 		DISK_STATE_CHANGED();
 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 
 		g_mirror_destroy_disk(disk);
 		sc->sc_ndisks--;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			g_mirror_update_metadata(disk);
 		}
 		break;
 	    }
 	default:
 		KASSERT(1 == 0, ("Unknown state (%u).", state));
 		break;
 	}
 	return (0);
 }
 #undef	DISK_STATE_CHANGED
 
 int
 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* Metadata are stored on last sector. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    cp->provider->name, error);
 		return (error);
 	}
 
 	/* Decode metadata. */
 	error = mirror_metadata_decode(buf, md);
 	g_free(buf);
 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
 		return (EINVAL);
 	if (md->md_version > G_MIRROR_VERSION) {
 		G_MIRROR_DEBUG(0,
 		    "Kernel module is too old to handle metadata from %s.",
 		    cp->provider->name);
 		return (EINVAL);
 	}
 	if (error != 0) {
 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
 		    cp->provider->name);
 		return (error);
 	}
 
 	return (0);
 }
 
 static int
 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md)
 {
 
 	G_MIRROR_DEBUG(2, "%s: md_did 0x%u disk %s device %s md_all 0x%x "
 	    "sc_ndisks 0x%x md_slice 0x%x sc_slice 0x%x md_balance 0x%x "
 	    "sc_balance 0x%x sc_mediasize 0x%jx pp_mediasize 0x%jx "
 	    "md_sectorsize 0x%x sc_sectorsize 0x%x md_mflags 0x%jx "
 	    "md_dflags 0x%jx md_syncid 0x%x md_genid 0x%x md_priority 0x%x "
 	    "sc_state 0x%x.",
 	    __func__, md->md_did, pp->name, sc->sc_name, md->md_all,
 	    sc->sc_ndisks, md->md_slice, sc->sc_slice, md->md_balance,
 	    sc->sc_balance, (uintmax_t)sc->sc_mediasize,
 	    (uintmax_t)pp->mediasize, md->md_sectorsize, sc->sc_sectorsize,
 	    (uintmax_t)md->md_mflags, (uintmax_t)md->md_dflags, md->md_syncid,
 	    md->md_genid, md->md_priority, sc->sc_state);
 
 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
 		    pp->name, md->md_did);
 		return (EEXIST);
 	}
 	if (sc->sc_mediasize > pp->mediasize) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
 		    sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_sectorsize != sc->sc_sectorsize) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_sectorsize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid sector size of disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid device flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid disk flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md)
 {
 	struct g_mirror_disk *disk;
 	int error;
 
 	g_topology_assert_not();
 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
 
 	error = g_mirror_check_metadata(sc, pp, md);
 	if (error != 0)
 		return (error);
 
 	if (md->md_genid < sc->sc_genid) {
 		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 
 	/*
 	 * If the component disk we're tasting has newer metadata than the
 	 * STARTING gmirror device, refresh the device from the component.
 	 */
 	error = g_mirror_refresh_device(sc, pp, md);
 	if (error != 0)
 		return (error);
 
 	disk = g_mirror_init_disk(sc, pp, md, &error);
 	if (disk == NULL)
 		return (error);
 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
 	    G_MIRROR_EVENT_WAIT);
 	if (error != 0)
 		return (error);
 	if (md->md_version < G_MIRROR_VERSION) {
 		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
 		    pp->name, md->md_version, G_MIRROR_VERSION);
 		g_mirror_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_mirror_destroy_delayed(void *arg, int flag)
 {
 	struct g_mirror_softc *sc;
 	int error;
 
 	if (flag == EV_CANCEL) {
 		G_MIRROR_DEBUG(1, "Destroying canceled.");
 		return;
 	}
 	sc = arg;
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
 	    ("DESTROY flag set on %s.", sc->sc_name));
 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
 	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
 	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
 	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
 	if (error != 0) {
 		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
 		    sc->sc_name, error);
 		sx_xunlock(&sc->sc_lock);
 	}
 	g_topology_lock();
 }
 
 static int
 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
 {
 	struct g_mirror_softc *sc;
 	int error = 0;
 
 	g_topology_assert();
 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
 	    acw, ace);
 
 	sc = pp->private;
 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
 	    LIST_EMPTY(&sc->sc_disks)) {
 		if (acr > 0 || acw > 0 || ace > 0)
 			error = ENXIO;
 		goto end;
 	}
 	sc->sc_provider_open += acr + acw + ace;
 	if (pp->acw + acw == 0)
 		g_mirror_idle(sc, 0);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
 	    sc->sc_provider_open == 0)
 		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
 end:
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 static void
 g_mirror_reinit_from_metadata(struct g_mirror_softc *sc,
     const struct g_mirror_metadata *md)
 {
 
 	sc->sc_genid = md->md_genid;
 	sc->sc_syncid = md->md_syncid;
 
 	sc->sc_slice = md->md_slice;
 	sc->sc_balance = md->md_balance;
 	sc->sc_mediasize = md->md_mediasize;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_MASK;
 	sc->sc_flags |= (md->md_mflags & G_MIRROR_DEVICE_FLAG_MASK);
 }
 
 struct g_geom *
 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
     u_int type)
 {
 	struct g_mirror_softc *sc;
 	struct g_geom *gp;
 	int error, timeout;
 
 	g_topology_assert();
 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
 	    md->md_mid);
 
 	/* One disk is minimum. */
 	if (md->md_all < 1)
 		return (NULL);
 	/*
 	 * Action geom.
 	 */
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
 	gp->start = g_mirror_start;
 	gp->orphan = g_mirror_orphan;
 	gp->access = g_mirror_access;
 	gp->dumpconf = g_mirror_dumpconf;
 
 	sc->sc_type = type;
 	sc->sc_id = md->md_mid;
 	g_mirror_reinit_from_metadata(sc, md);
 	sc->sc_sectorsize = md->md_sectorsize;
 	sc->sc_bump_id = 0;
 	sc->sc_idle = 1;
 	sc->sc_last_write = time_uptime;
 	sc->sc_writes = 0;
 	sc->sc_refcnt = 1;
 	sx_init(&sc->sc_lock, "gmirror:lock");
 	TAILQ_INIT(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
 	TAILQ_INIT(&sc->sc_regular_delayed);
 	TAILQ_INIT(&sc->sc_inflight);
 	TAILQ_INIT(&sc->sc_sync_delayed);
 	LIST_INIT(&sc->sc_disks);
 	TAILQ_INIT(&sc->sc_events);
 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
 	callout_init(&sc->sc_callout, 1);
 	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 	sc->sc_provider_open = 0;
 	/*
 	 * Synchronization geom.
 	 */
 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
 	gp->softc = sc;
 	gp->orphan = g_mirror_orphan;
 	sc->sc_sync.ds_geom = gp;
 	sc->sc_sync.ds_ndisks = 0;
 	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
 	    "g_mirror %s", md->md_name);
 	if (error != 0) {
 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
 		    sc->sc_name);
 		g_destroy_geom(sc->sc_sync.ds_geom);
 		g_destroy_geom(sc->sc_geom);
 		g_mirror_free_device(sc);
 		return (NULL);
 	}
 
 	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
 
 	sc->sc_rootmount = root_mount_hold("GMIRROR");
 	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
 
 	/*
 	 * Schedule startup timeout.
 	 */
 	timeout = g_mirror_timeout * hz;
 	sc->sc_timeout_event = malloc(sizeof(struct g_mirror_event), M_MIRROR,
 	    M_WAITOK);
 	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
 	return (sc->sc_geom);
 }
 
 int
 g_mirror_destroy(struct g_mirror_softc *sc, int how)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (sc->sc_provider_open != 0) {
 		switch (how) {
 		case G_MIRROR_DESTROY_SOFT:
 			G_MIRROR_DEBUG(1,
 			    "Device %s is still open (%d).", sc->sc_name,
 			    sc->sc_provider_open);
 			return (EBUSY);
 		case G_MIRROR_DESTROY_DELAYED:
 			G_MIRROR_DEBUG(1,
 			    "Device %s will be destroyed on last close.",
 			    sc->sc_name);
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_state ==
 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 					g_mirror_sync_stop(disk, 1);
 				}
 			}
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
 			return (EBUSY);
 		case G_MIRROR_DESTROY_HARD:
 			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
 			    "can't be definitely removed.", sc->sc_name);
 		}
 	}
 
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 		sx_xunlock(&sc->sc_lock);
 		return (0);
 	}
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	sx_xunlock(&sc->sc_lock);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
 	while (sc->sc_worker != NULL)
 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
 	sx_xlock(&sc->sc_lock);
 	g_mirror_destroy_device(sc);
 	return (0);
 }
 
 static void
 g_mirror_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_mirror_metadata md;
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "mirror:taste");
 	/*
 	 * This orphan function should be never called.
 	 */
 	gp->orphan = g_mirror_taste_orphan;
 	cp = g_new_consumer(gp);
+	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error == 0) {
 		error = g_mirror_read_metadata(cp, &md);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
 		return (NULL);
 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
 		G_MIRROR_DEBUG(0,
 		    "Device %s: provider %s marked as inactive, skipping.",
 		    md.md_name, pp->name);
 		return (NULL);
 	}
 	if (g_mirror_debug >= 2)
 		mirror_metadata_dump(&md);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
 			continue;
 		if (sc->sc_sync.ds_geom == gp)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_mid != sc->sc_id) {
 			G_MIRROR_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 		break;
 	}
 	if (gp == NULL) {
 		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
 		if (gp == NULL) {
 			G_MIRROR_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 	}
 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
 	error = g_mirror_add_disk(sc, pp, &md);
 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
 	if (error != 0) {
 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 		    pp->name, gp->name, error);
 		if (LIST_EMPTY(&sc->sc_disks)) {
 			g_cancel_event(sc);
 			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
 			g_topology_lock();
 			return (NULL);
 		}
 		gp = NULL;
 	}
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
 		g_topology_lock();
 		return (NULL);
 	}
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (gp);
 }
 
 static void
 g_mirror_resize(struct g_consumer *cp)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
 
 	disk = cp->private;
 	if (disk == NULL)
 		return;
 	g_topology_unlock();
 	g_mirror_update_metadata(disk);
 	g_topology_lock();
 }
 
 static int
 g_mirror_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_mirror_softc *sc;
 	int error;
 
 	g_topology_unlock();
 	sc = gp->softc;
 	sx_xlock(&sc->sc_lock);
 	g_cancel_event(sc);
 	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
 	if (error != 0)
 		sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 static void
 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_mirror_softc *sc;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	/* Skip synchronization geom. */
 	if (gp == sc->sc_sync.ds_geom)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		struct g_mirror_disk *disk;
 
 		disk = cp->private;
 		if (disk == NULL)
 			return;
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			sbuf_printf(sb, "%s<Synchronized>", indent);
 			if (disk->d_sync.ds_offset == 0)
 				sbuf_cat(sb, "0%");
 			else
 				sbuf_printf(sb, "%u%%",
 				    (u_int)((disk->d_sync.ds_offset * 100) /
 				    sc->sc_mediasize));
 			sbuf_cat(sb, "</Synchronized>\n");
 			if (disk->d_sync.ds_offset > 0)
 				sbuf_printf(sb, "%s<BytesSynced>%jd"
 				    "</BytesSynced>\n", indent,
 				    (intmax_t)disk->d_sync.ds_offset);
 		}
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
 		    disk->d_sync.ds_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
 		    disk->d_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (disk->d_flags == 0)
 			sbuf_cat(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((disk->d_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_cat(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_cat(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
 			    "SYNCHRONIZING");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
 #undef	ADD_FLAG
 		}
 		sbuf_cat(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
 		    disk->d_priority);
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_mirror_disk_state2str(disk->d_state));
 	} else {
 		sbuf_printf(sb, "%s<Type>", indent);
 		switch (sc->sc_type) {
 		case G_MIRROR_TYPE_AUTOMATIC:
 			sbuf_cat(sb, "AUTOMATIC");
 			break;
 		case G_MIRROR_TYPE_MANUAL:
 			sbuf_cat(sb, "MANUAL");
 			break;
 		default:
 			sbuf_cat(sb, "UNKNOWN");
 			break;
 		}
 		sbuf_cat(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (sc->sc_flags == 0)
 			sbuf_cat(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((sc->sc_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_cat(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_cat(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
 #undef	ADD_FLAG
 		}
 		sbuf_cat(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
 		    (u_int)sc->sc_slice);
 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
 		    balance_name(sc->sc_balance));
 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
 		    sc->sc_ndisks);
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
 			sbuf_printf(sb, "%s", "STARTING");
 		else if (sc->sc_ndisks ==
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
 			sbuf_printf(sb, "%s", "COMPLETE");
 		else
 			sbuf_printf(sb, "%s", "DEGRADED");
 		sbuf_cat(sb, "</State>\n");
 	}
 }
 
 static void
 g_mirror_shutdown_post_sync(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_mirror_softc *sc;
 	int error;
 
 	if (KERNEL_PANICKED())
 		return;
 
 	mp = arg;
 	g_topology_lock();
 	g_mirror_shutdown = 1;
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if ((sc = gp->softc) == NULL)
 			continue;
 		/* Skip synchronization geom. */
 		if (gp == sc->sc_sync.ds_geom)
 			continue;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		g_mirror_idle(sc, -1);
 		g_cancel_event(sc);
 		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
 		if (error != 0)
 			sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 	g_topology_unlock();
 }
 
 static void
 g_mirror_init(struct g_class *mp)
 {
 
 	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
 	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_mirror_post_sync == NULL)
 		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_mirror_fini(struct g_class *mp)
 {
 
 	if (g_mirror_post_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
 }
 
 /*
  * Refresh the mirror device's metadata when gmirror encounters a newer
  * generation as the individual components are being added to the mirror set.
  */
 static int
 g_mirror_refresh_device(struct g_mirror_softc *sc, const struct g_provider *pp,
     const struct g_mirror_metadata *md)
 {
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	KASSERT(sc->sc_genid <= md->md_genid,
 	    ("%s: attempted to refresh from stale component %s (device %s) "
 	    "(%u < %u).", __func__, pp->name, sc->sc_name, md->md_genid,
 	    sc->sc_genid));
 
 	if (sc->sc_genid > md->md_genid || (sc->sc_genid == md->md_genid &&
 	    sc->sc_syncid >= md->md_syncid))
 		return (0);
 
 	G_MIRROR_DEBUG(0, "Found newer version for device %s (genid: curr=%u "
 	    "new=%u; syncid: curr=%u new=%u; ndisks: curr=%u new=%u; "
 	    "provider=%s).", sc->sc_name, sc->sc_genid, md->md_genid,
 	    sc->sc_syncid, md->md_syncid, sc->sc_ndisks, md->md_all, pp->name);
 
 	if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) {
 		/* Probable data corruption detected */
 		G_MIRROR_DEBUG(0, "Cannot refresh metadata in %s state "
 		    "(device=%s genid=%u). A stale mirror device was launched.",
 		    g_mirror_device_state2str(sc->sc_state), sc->sc_name,
 		    sc->sc_genid);
 		return (EINVAL);
 	}
 
 	/* Update softc */
 	g_mirror_reinit_from_metadata(sc, md);
 
 	G_MIRROR_DEBUG(1, "Refresh device %s (id=%u, state=%s) from disk %s "
 	    "(genid=%u syncid=%u md_all=%u).", sc->sc_name, md->md_mid,
 	    g_mirror_device_state2str(sc->sc_state), pp->name, md->md_genid,
 	    md->md_syncid, (unsigned)md->md_all);
 
 	return (0);
 }
 
 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
 MODULE_VERSION(geom_mirror, 0);
diff --git a/sys/geom/multipath/g_multipath.c b/sys/geom/multipath/g_multipath.c
index 120fced0a8f1..a721b0bc4459 100644
--- a/sys/geom/multipath/g_multipath.c
+++ b/sys/geom/multipath/g_multipath.c
@@ -1,1568 +1,1569 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011-2013 Alexander Motin <mav@FreeBSD.org>
  * Copyright (c) 2006-2007 Matthew Jacob <mjacob@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*
  * Based upon work by Pawel Jakub Dawidek <pjd@FreeBSD.org> for all of the
  * fine geom examples, and by Poul Henning Kamp <phk@FreeBSD.org> for GEOM
  * itself, all of which is most gratefully acknowledged.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sdt.h>
 #include <sys/sysctl.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <geom/geom.h>
 #include <geom/multipath/g_multipath.h>
 
 FEATURE(geom_multipath, "GEOM multipath support");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, multipath,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_MULTIPATH tunables");
 static u_int g_multipath_debug = 0;
 SYSCTL_UINT(_kern_geom_multipath, OID_AUTO, debug, CTLFLAG_RW,
     &g_multipath_debug, 0, "Debug level");
 static u_int g_multipath_exclusive = 1;
 SYSCTL_UINT(_kern_geom_multipath, OID_AUTO, exclusive, CTLFLAG_RW,
     &g_multipath_exclusive, 0, "Exclusively open providers");
 
 SDT_PROVIDER_DECLARE(geom);
 SDT_PROBE_DEFINE2(geom, multipath, config, restore, "char*", "char*");
 SDT_PROBE_DEFINE2(geom, multipath, config, remove, "char*", "char*");
 SDT_PROBE_DEFINE2(geom, multipath, config, disconnect, "char*", "char*");
 SDT_PROBE_DEFINE3(geom, multipath, config, fail, "char*", "char*", "int");
 SDT_PROBE_DEFINE2(geom, multipath, config, taste, "char*", "char*");
 SDT_PROBE_DEFINE2(geom, multipath, io, restart, "struct bio*", "struct bio*");
 
 static enum {
 	GKT_NIL,
 	GKT_RUN,
 	GKT_DIE
 } g_multipath_kt_state;
 static struct bio_queue_head gmtbq;
 static struct mtx gmtbq_mtx;
 
 static int g_multipath_read_metadata(struct g_consumer *cp,
     struct g_multipath_metadata *md);
 static int g_multipath_write_metadata(struct g_consumer *cp,
     struct g_multipath_metadata *md);
 
 static void g_multipath_orphan(struct g_consumer *);
 static void g_multipath_resize(struct g_consumer *);
 static void g_multipath_start(struct bio *);
 static void g_multipath_done(struct bio *);
 static void g_multipath_done_error(struct bio *);
 static void g_multipath_kt(void *);
 
 static int g_multipath_destroy(struct g_geom *);
 static int
 g_multipath_destroy_geom(struct gctl_req *, struct g_class *, struct g_geom *);
 
 static struct g_geom *g_multipath_find_geom(struct g_class *, const char *);
 static int g_multipath_rotate(struct g_geom *);
 
 static g_taste_t g_multipath_taste;
 static g_ctl_req_t g_multipath_config;
 static g_init_t g_multipath_init;
 static g_fini_t g_multipath_fini;
 static g_dumpconf_t g_multipath_dumpconf;
 
 struct g_class g_multipath_class = {
 	.name		= G_MULTIPATH_CLASS_NAME,
 	.version	= G_VERSION,
 	.ctlreq		= g_multipath_config,
 	.taste		= g_multipath_taste,
 	.destroy_geom	= g_multipath_destroy_geom,
 	.init		= g_multipath_init,
 	.fini		= g_multipath_fini
 };
 
 #define	MP_FAIL		0x00000001
 #define	MP_LOST		0x00000002
 #define	MP_NEW		0x00000004
 #define	MP_POSTED	0x00000008
 #define	MP_BAD		(MP_FAIL | MP_LOST | MP_NEW)
 #define	MP_WITHER	0x00000010
 #define	MP_IDLE		0x00000020
 #define	MP_IDLE_MASK	0xffffffe0
 
 static int
 g_multipath_good(struct g_geom *gp)
 {
 	struct g_consumer *cp;
 	int n = 0;
 
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if ((cp->index & MP_BAD) == 0)
 			n++;
 	}
 	return (n);
 }
 
 static void
 g_multipath_fault(struct g_consumer *cp, int cause)
 {
 	struct g_multipath_softc *sc;
 	struct g_consumer *lcp;
 	struct g_geom *gp;
 
 	gp = cp->geom;
 	sc = gp->softc;
 	cp->index |= cause;
 	if (g_multipath_good(gp) == 0 && sc->sc_ndisks > 0) {
 		LIST_FOREACH(lcp, &gp->consumer, consumer) {
 			if (lcp->provider == NULL ||
 			    (lcp->index & (MP_LOST | MP_NEW)))
 				continue;
 			if (sc->sc_ndisks > 1 && lcp == cp)
 				continue;
 			printf("GEOM_MULTIPATH: "
 			    "all paths in %s were marked FAIL, restore %s\n",
 			    sc->sc_name, lcp->provider->name);
 			SDT_PROBE2(geom, multipath, config, restore,
 			    sc->sc_name, lcp->provider->name);
 			lcp->index &= ~MP_FAIL;
 		}
 	}
 	if (cp != sc->sc_active)
 		return;
 	sc->sc_active = NULL;
 	LIST_FOREACH(lcp, &gp->consumer, consumer) {
 		if ((lcp->index & MP_BAD) == 0) {
 			sc->sc_active = lcp;
 			break;
 		}
 	}
 	if (sc->sc_active == NULL) {
 		printf("GEOM_MULTIPATH: out of providers for %s\n",
 		    sc->sc_name);
 	} else if (sc->sc_active_active != 1) {
 		printf("GEOM_MULTIPATH: %s is now active path in %s\n",
 		    sc->sc_active->provider->name, sc->sc_name);
 	}
 }
 
 static struct g_consumer *
 g_multipath_choose(struct g_geom *gp, struct bio *bp)
 {
 	struct g_multipath_softc *sc;
 	struct g_consumer *best, *cp;
 
 	sc = gp->softc;
 	if (sc->sc_active_active == 0 ||
 	    (sc->sc_active_active == 2 && bp->bio_cmd != BIO_READ))
 		return (sc->sc_active);
 	best = NULL;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->index & MP_BAD)
 			continue;
 		cp->index += MP_IDLE;
 		if (best == NULL || cp->private < best->private ||
 		    (cp->private == best->private && cp->index > best->index))
 			best = cp;
 	}
 	if (best != NULL)
 		best->index &= ~MP_IDLE_MASK;
 	return (best);
 }
 
 static void
 g_mpd(void *arg, int flags __unused)
 {
 	struct g_geom *gp;
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	int w;
 
 	g_topology_assert();
 	cp = arg;
 	gp = cp->geom;
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
 		w = cp->acw;
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 		if (w > 0 && cp->provider != NULL &&
 		    (cp->provider->geom->flags & G_GEOM_WITHER) == 0) {
 			cp->index |= MP_WITHER;
 			g_post_event(g_mpd, cp, M_WAITOK, NULL);
 			return;
 		}
 	}
 	sc = gp->softc;
 	mtx_lock(&sc->sc_mtx);
 	if (cp->provider) {
 		printf("GEOM_MULTIPATH: %s removed from %s\n",
 		    cp->provider->name, gp->name);
 		SDT_PROBE2(geom, multipath, config, remove,
 		    gp->name, cp->provider->name);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	mtx_unlock(&sc->sc_mtx);
 	if (LIST_EMPTY(&gp->consumer))
 		g_multipath_destroy(gp);
 }
 
 static void
 g_multipath_orphan(struct g_consumer *cp)
 {
 	struct g_multipath_softc *sc;
 	uintptr_t *cnt;
 
 	g_topology_assert();
 	printf("GEOM_MULTIPATH: %s in %s was disconnected\n",
 	    cp->provider->name, cp->geom->name);
 	SDT_PROBE2(geom, multipath, config, disconnect,
 	    cp->geom->name, cp->provider->name);
 	sc = cp->geom->softc;
 	cnt = (uintptr_t *)&cp->private;
 	mtx_lock(&sc->sc_mtx);
 	sc->sc_ndisks--;
 	g_multipath_fault(cp, MP_LOST);
 	if (*cnt == 0 && (cp->index & MP_POSTED) == 0) {
 		cp->index |= MP_POSTED;
 		mtx_unlock(&sc->sc_mtx);
 		g_mpd(cp, 0);
 	} else
 		mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_multipath_resize(struct g_consumer *cp)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp1;
 	struct g_provider *pp;
 	struct g_multipath_metadata md;
 	off_t size, psize, ssize;
 	int error;
 
 	g_topology_assert();
 
 	gp = cp->geom;
 	pp = cp->provider;
 	sc = gp->softc;
 
 	if (sc->sc_stopping)
 		return;
 
 	if (pp->mediasize < sc->sc_size) {
 		size = pp->mediasize;
 		ssize = pp->sectorsize;
 	} else {
 		size = ssize = OFF_MAX;
 		mtx_lock(&sc->sc_mtx);
 		LIST_FOREACH(cp1, &gp->consumer, consumer) {
 			pp = cp1->provider;
 			if (pp == NULL)
 				continue;
 			if (pp->mediasize < size) {
 				size = pp->mediasize;
 				ssize = pp->sectorsize;
 			}
 		}
 		mtx_unlock(&sc->sc_mtx);
 		if (size == OFF_MAX || size == sc->sc_size)
 			return;
 	}
 	psize = size - ((sc->sc_uuid[0] != 0) ? ssize : 0);
 	printf("GEOM_MULTIPATH: %s size changed from %jd to %jd\n",
 	    sc->sc_name, sc->sc_pp->mediasize, psize);
 	if (sc->sc_uuid[0] != 0 && size < sc->sc_size) {
 		error = g_multipath_read_metadata(cp, &md);
 		if (error ||
 		    (strcmp(md.md_magic, G_MULTIPATH_MAGIC) != 0) ||
 		    (memcmp(md.md_uuid, sc->sc_uuid, sizeof(sc->sc_uuid)) != 0) ||
 		    (strcmp(md.md_name, sc->sc_name) != 0) ||
 		    (md.md_size != 0 && md.md_size != size) ||
 		    (md.md_sectorsize != 0 && md.md_sectorsize != ssize)) {
 			g_multipath_destroy(gp);
 			return;
 		}
 	}
 	sc->sc_size = size;
 	g_resize_provider(sc->sc_pp, psize);
 
 	if (sc->sc_uuid[0] != 0) {
 		pp = cp->provider;
 		strlcpy(md.md_magic, G_MULTIPATH_MAGIC, sizeof(md.md_magic));
 		memcpy(md.md_uuid, sc->sc_uuid, sizeof (sc->sc_uuid));
 		strlcpy(md.md_name, sc->sc_name, sizeof(md.md_name));
 		md.md_version = G_MULTIPATH_VERSION;
 		md.md_size = size;
 		md.md_sectorsize = ssize;
 		md.md_active_active = sc->sc_active_active;
 		error = g_multipath_write_metadata(cp, &md);
 		if (error != 0)
 			printf("GEOM_MULTIPATH: Can't update metadata on %s "
 			    "(%d)\n", pp->name, error);
 	}
 }
 
 static void
 g_multipath_start(struct bio *bp)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	uintptr_t *cnt;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("NULL sc"));
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	mtx_lock(&sc->sc_mtx);
 	cp = g_multipath_choose(gp, bp);
 	if (cp == NULL) {
 		mtx_unlock(&sc->sc_mtx);
 		g_destroy_bio(cbp);
 		g_io_deliver(bp, ENXIO);
 		return;
 	}
 	if ((uintptr_t)bp->bio_driver1 < sc->sc_ndisks)
 		bp->bio_driver1 = (void *)(uintptr_t)sc->sc_ndisks;
 	cnt = (uintptr_t *)&cp->private;
 	(*cnt)++;
 	mtx_unlock(&sc->sc_mtx);
 	cbp->bio_done = g_multipath_done;
 	g_io_request(cbp, cp);
 }
 
 static void
 g_multipath_done(struct bio *bp)
 {
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	uintptr_t *cnt;
 
 	if (bp->bio_error == ENXIO || bp->bio_error == EIO) {
 		mtx_lock(&gmtbq_mtx);
 		bioq_insert_tail(&gmtbq, bp);
 		mtx_unlock(&gmtbq_mtx);
 		wakeup(&g_multipath_kt_state);
 	} else {
 		cp = bp->bio_from;
 		sc = cp->geom->softc;
 		cnt = (uintptr_t *)&cp->private;
 		mtx_lock(&sc->sc_mtx);
 		(*cnt)--;
 		if (*cnt == 0 && (cp->index & MP_LOST)) {
 			if (g_post_event(g_mpd, cp, M_NOWAIT, NULL) == 0)
 				cp->index |= MP_POSTED;
 			mtx_unlock(&sc->sc_mtx);
 		} else
 			mtx_unlock(&sc->sc_mtx);
 		if (bp->bio_error == 0 &&
 			bp->bio_cmd == BIO_GETATTR &&
 			!strcmp(bp->bio_attribute, "GEOM::physpath"))
 		{
 			strlcat(bp->bio_data, "/mp", bp->bio_length);
 		}
 		g_std_done(bp);
 	}
 }
 
 static void
 g_multipath_done_error(struct bio *bp)
 {
 	struct bio *pbp;
 	struct g_geom *gp;
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	uintptr_t *cnt;
 
 	/*
 	 * If we had a failure, we have to check first to see
 	 * whether the consumer it failed on was the currently
 	 * active consumer (i.e., this is the first in perhaps
 	 * a number of failures). If so, we then switch consumers
 	 * to the next available consumer.
 	 */
 
 	pbp = bp->bio_parent;
 	gp = pbp->bio_to->geom;
 	sc = gp->softc;
 	cp = bp->bio_from;
 	pp = cp->provider;
 	cnt = (uintptr_t *)&cp->private;
 
 	mtx_lock(&sc->sc_mtx);
 	if ((cp->index & MP_FAIL) == 0) {
 		printf("GEOM_MULTIPATH: Error %d, %s in %s marked FAIL\n",
 		    bp->bio_error, pp->name, sc->sc_name);
 		SDT_PROBE3(geom, multipath, config, fail,
 		    sc->sc_name, pp->name, bp->bio_error);
 		g_multipath_fault(cp, MP_FAIL);
 	}
 	(*cnt)--;
 	if (*cnt == 0 && (cp->index & (MP_LOST | MP_POSTED)) == MP_LOST) {
 		cp->index |= MP_POSTED;
 		mtx_unlock(&sc->sc_mtx);
 		g_post_event(g_mpd, cp, M_WAITOK, NULL);
 	} else
 		mtx_unlock(&sc->sc_mtx);
 
 	/*
 	 * If we can fruitfully restart the I/O, do so.
 	 */
 	if (pbp->bio_children < (uintptr_t)pbp->bio_driver1) {
 		pbp->bio_inbed++;
 		SDT_PROBE2(geom, multipath, io, restart, bp, pbp);
 		g_destroy_bio(bp);
 		g_multipath_start(pbp);
 	} else {
 		g_std_done(bp);
 	}
 }
 
 static void
 g_multipath_kt(void *arg)
 {
 
 	g_multipath_kt_state = GKT_RUN;
 	mtx_lock(&gmtbq_mtx);
 	while (g_multipath_kt_state == GKT_RUN) {
 		for (;;) {
 			struct bio *bp;
 
 			bp = bioq_takefirst(&gmtbq);
 			if (bp == NULL)
 				break;
 			mtx_unlock(&gmtbq_mtx);
 			g_multipath_done_error(bp);
 			mtx_lock(&gmtbq_mtx);
 		}
 		if (g_multipath_kt_state != GKT_RUN)
 			break;
 		msleep(&g_multipath_kt_state, &gmtbq_mtx, PRIBIO,
 		    "gkt:wait", 0);
 	}
 	mtx_unlock(&gmtbq_mtx);
 	wakeup(&g_multipath_kt_state);
 	kproc_exit(0);
 }
 
 static int
 g_multipath_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp, *badcp = NULL;
 	struct g_multipath_softc *sc;
 	int error;
 
 	gp = pp->geom;
 
 	/* Error used if we have no valid consumers. */
 	error = (dr > 0 || dw > 0 || de > 0) ? ENXIO : 0;
 
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->index & MP_WITHER)
 			continue;
 
 		error = g_access(cp, dr, dw, de);
 		if (error) {
 			badcp = cp;
 			goto fail;
 		}
 	}
 
 	if (error != 0)
 		return (error);
 
 	sc = gp->softc;
 	sc->sc_opened += dr + dw + de;
 	if (sc->sc_stopping && sc->sc_opened == 0)
 		g_multipath_destroy(gp);
 
 	return (0);
 
 fail:
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp == badcp)
 			break;
 		if (cp->index & MP_WITHER)
 			continue;
 
 		(void) g_access(cp, -dr, -dw, -de);
 	}
 	return (error);
 }
 
 static struct g_geom *
 g_multipath_create(struct g_class *mp, struct g_multipath_metadata *md)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL || sc->sc_stopping)
 			continue;
 		if (strcmp(gp->name, md->md_name) == 0) {
 			printf("GEOM_MULTIPATH: name %s already exists\n",
 			    md->md_name);
 			return (NULL);
 		}
 	}
 
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	mtx_init(&sc->sc_mtx, "multipath", NULL, MTX_DEF);
 	memcpy(sc->sc_uuid, md->md_uuid, sizeof (sc->sc_uuid));
 	memcpy(sc->sc_name, md->md_name, sizeof (sc->sc_name));
 	sc->sc_active_active = md->md_active_active;
 	sc->sc_size = md->md_size;
 	gp->softc = sc;
 	gp->start = g_multipath_start;
 	gp->orphan = g_multipath_orphan;
 	gp->resize = g_multipath_resize;
 	gp->access = g_multipath_access;
 	gp->dumpconf = g_multipath_dumpconf;
 
 	pp = g_new_providerf(gp, "multipath/%s", md->md_name);
 	pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
 	if (md->md_size != 0) {
 		pp->mediasize = md->md_size -
 		    ((md->md_uuid[0] != 0) ? md->md_sectorsize : 0);
 		pp->sectorsize = md->md_sectorsize;
 	}
 	sc->sc_pp = pp;
 	g_error_provider(pp, 0);
 	printf("GEOM_MULTIPATH: %s created\n", gp->name);
 	return (gp);
 }
 
 static int
 g_multipath_add_disk(struct g_geom *gp, struct g_provider *pp)
 {
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp, *nxtcp;
 	int error, acr, acw, ace;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	KASSERT(sc, ("no softc"));
 
 	/*
 	 * Make sure that the passed provider isn't already attached
 	 */
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->provider == pp)
 			break;
 	}
 	if (cp) {
 		printf("GEOM_MULTIPATH: provider %s already attached to %s\n",
 		    pp->name, gp->name);
 		return (EEXIST);
 	}
 	nxtcp = LIST_FIRST(&gp->consumer);
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	cp->private = NULL;
 	cp->index = MP_NEW;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		printf("GEOM_MULTIPATH: cannot attach %s to %s",
 		    pp->name, sc->sc_name);
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	/*
 	 * Set access permissions on new consumer to match other consumers
 	 */
 	if (sc->sc_pp) {
 		acr = sc->sc_pp->acr;
 		acw = sc->sc_pp->acw;
 		ace = sc->sc_pp->ace;
 	} else
 		acr = acw = ace = 0;
 	if (g_multipath_exclusive) {
 		acr++;
 		acw++;
 		ace++;
 	}
 	error = g_access(cp, acr, acw, ace);
 	if (error) {
 		printf("GEOM_MULTIPATH: cannot set access in "
 		    "attaching %s to %s (%d)\n",
 		    pp->name, sc->sc_name, error);
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		return (error);
 	}
 	if (sc->sc_size == 0) {
 		sc->sc_size = pp->mediasize -
 		    ((sc->sc_uuid[0] != 0) ? pp->sectorsize : 0);
 		sc->sc_pp->mediasize = sc->sc_size;
 		sc->sc_pp->sectorsize = pp->sectorsize;
 	}
 	if (sc->sc_pp->stripesize == 0 && sc->sc_pp->stripeoffset == 0) {
 		sc->sc_pp->stripesize = pp->stripesize;
 		sc->sc_pp->stripeoffset = pp->stripeoffset;
 	}
 	sc->sc_pp->flags |= pp->flags & G_PF_ACCEPT_UNMAPPED;
 	mtx_lock(&sc->sc_mtx);
 	cp->index = 0;
 	sc->sc_ndisks++;
 	mtx_unlock(&sc->sc_mtx);
 	printf("GEOM_MULTIPATH: %s added to %s\n",
 	    pp->name, sc->sc_name);
 	if (sc->sc_active == NULL) {
 		sc->sc_active = cp;
 		if (sc->sc_active_active != 1)
 			printf("GEOM_MULTIPATH: %s is now active path in %s\n",
 			    pp->name, sc->sc_name);
 	}
 	return (0);
 }
 
 static int
 g_multipath_destroy(struct g_geom *gp)
 {
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp, *cp1;
 
 	g_topology_assert();
 	if (gp->softc == NULL)
 		return (ENXIO);
 	sc = gp->softc;
 	if (!sc->sc_stopping) {
 		printf("GEOM_MULTIPATH: destroying %s\n", gp->name);
 		sc->sc_stopping = 1;
 	}
 	if (sc->sc_opened != 0) {
 		g_wither_provider(sc->sc_pp, ENXIO);
 		sc->sc_pp = NULL;
 		return (EINPROGRESS);
 	}
 	LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
 		mtx_lock(&sc->sc_mtx);
 		if ((cp->index & MP_POSTED) == 0) {
 			cp->index |= MP_POSTED;
 			mtx_unlock(&sc->sc_mtx);
 			g_mpd(cp, 0);
 			if (cp1 == NULL)
 				return(0);	/* Recursion happened. */
 		} else
 			mtx_unlock(&sc->sc_mtx);
 	}
 	if (!LIST_EMPTY(&gp->consumer))
 		return (EINPROGRESS);
 	mtx_destroy(&sc->sc_mtx);
 	g_free(gp->softc);
 	gp->softc = NULL;
 	printf("GEOM_MULTIPATH: %s destroyed\n", gp->name);
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static int
 g_multipath_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp)
 {
 
 	return (g_multipath_destroy(gp));
 }
 
 static int
 g_multipath_rotate(struct g_geom *gp)
 {
 	struct g_consumer *lcp, *first_good_cp = NULL;
 	struct g_multipath_softc *sc = gp->softc;
 	int active_cp_seen = 0;
 
 	g_topology_assert();
 	if (sc == NULL)
 		return (ENXIO);
 	LIST_FOREACH(lcp, &gp->consumer, consumer) {
 		if ((lcp->index & MP_BAD) == 0) {
 			if (first_good_cp == NULL)
 				first_good_cp = lcp;
 			if (active_cp_seen)
 				break;
 		}
 		if (sc->sc_active == lcp)
 			active_cp_seen = 1;
 	}
 	if (lcp == NULL)
 		lcp = first_good_cp;
 	if (lcp && lcp != sc->sc_active) {
 		sc->sc_active = lcp;
 		if (sc->sc_active_active != 1)
 			printf("GEOM_MULTIPATH: %s is now active path in %s\n",
 			    lcp->provider->name, sc->sc_name);
 	}
 	return (0);
 }
 
 static void
 g_multipath_init(struct g_class *mp)
 {
 	bioq_init(&gmtbq);
 	mtx_init(&gmtbq_mtx, "gmtbq", NULL, MTX_DEF);
 	kproc_create(g_multipath_kt, mp, NULL, 0, 0, "g_mp_kt");
 }
 
 static void
 g_multipath_fini(struct g_class *mp)
 {
 	if (g_multipath_kt_state == GKT_RUN) {
 		mtx_lock(&gmtbq_mtx);
 		g_multipath_kt_state = GKT_DIE;
 		wakeup(&g_multipath_kt_state);
 		msleep(&g_multipath_kt_state, &gmtbq_mtx, PRIBIO,
 		    "gmp:fini", 0);
 		mtx_unlock(&gmtbq_mtx);
 	}
 }
 
 static int
 g_multipath_read_metadata(struct g_consumer *cp,
     struct g_multipath_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize,
 	    pp->sectorsize, &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 	multipath_metadata_decode(buf, md);
 	g_free(buf);
 	return (0);
 }
 
 static int
 g_multipath_write_metadata(struct g_consumer *cp,
     struct g_multipath_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 	error = g_access(cp, 1, 1, 1);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO);
 	multipath_metadata_encode(md, buf);
 	error = g_write_data(cp, pp->mediasize - pp->sectorsize,
 	    buf, pp->sectorsize);
 	g_topology_lock();
 	g_access(cp, -1, -1, -1);
 	g_free(buf);
 	return (error);
 }
 
 static struct g_geom *
 g_multipath_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_multipath_metadata md;
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp, *gp1;
 	int error, isnew;
 
 	g_topology_assert();
 
 	gp = g_new_geomf(mp, "multipath:taste");
 	gp->start = g_multipath_start;
 	gp->access = g_multipath_access;
 	gp->orphan = g_multipath_orphan;
 	cp = g_new_consumer(gp);
+	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error == 0) {
 		error = g_multipath_read_metadata(cp, &md);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_MULTIPATH_MAGIC) != 0) {
 		if (g_multipath_debug)
 			printf("%s is not MULTIPATH\n", pp->name);
 		return (NULL);
 	}
 	if (md.md_version != G_MULTIPATH_VERSION) {
 		printf("%s has version %d multipath id- this module is version "
 		    " %d: rejecting\n", pp->name, md.md_version,
 		    G_MULTIPATH_VERSION);
 		return (NULL);
 	}
 	if (md.md_size != 0 && md.md_size != pp->mediasize)
 		return (NULL);
 	if (md.md_sectorsize != 0 && md.md_sectorsize != pp->sectorsize)
 		return (NULL);
 	if (g_multipath_debug)
 		printf("MULTIPATH: %s/%s\n", md.md_name, md.md_uuid);
 	SDT_PROBE2(geom, multipath, config, taste, md.md_name, md.md_uuid);
 
 	/*
 	 * Let's check if such a device already is present. We check against
 	 * uuid alone first because that's the true distinguishor. If that
 	 * passes, then we check for name conflicts. If there are conflicts, 
 	 * modify the name.
 	 *
 	 * The whole purpose of this is to solve the problem that people don't
 	 * pick good unique names, but good unique names (like uuids) are a
 	 * pain to use. So, we allow people to build GEOMs with friendly names
 	 * and uuids, and modify the names in case there's a collision.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL || sc->sc_stopping)
 			continue;
 		if (strncmp(md.md_uuid, sc->sc_uuid, sizeof(md.md_uuid)) == 0)
 			break;
 	}
 
 	LIST_FOREACH(gp1, &mp->geom, geom) {
 		if (gp1 == gp)
 			continue;
 		sc = gp1->softc;
 		if (sc == NULL || sc->sc_stopping)
 			continue;
 		if (strncmp(md.md_name, sc->sc_name, sizeof(md.md_name)) == 0)
 			break;
 	}
 
 	/*
 	 * If gp is NULL, we had no extant MULTIPATH geom with this uuid.
 	 *
 	 * If gp1 is *not* NULL, that means we have a MULTIPATH geom extant
 	 * with the same name (but a different UUID).
 	 *
 	 * If gp is NULL, then modify the name with a random number and
   	 * complain, but allow the creation of the geom to continue.
 	 *
 	 * If gp is *not* NULL, just use the geom's name as we're attaching
 	 * this disk to the (previously generated) name.
 	 */
 
 	if (gp1) {
 		sc = gp1->softc;
 		if (gp == NULL) {
 			char buf[16];
 			u_long rand = random();
 
 			snprintf(buf, sizeof (buf), "%s-%lu", md.md_name, rand);
 			printf("GEOM_MULTIPATH: geom %s/%s exists already\n",
 			    sc->sc_name, sc->sc_uuid);
 			printf("GEOM_MULTIPATH: %s will be (temporarily) %s\n",
 			    md.md_uuid, buf);
 			strlcpy(md.md_name, buf, sizeof(md.md_name));
 		} else {
 			strlcpy(md.md_name, sc->sc_name, sizeof(md.md_name));
 		}
 	}
 
 	if (gp == NULL) {
 		gp = g_multipath_create(mp, &md);
 		if (gp == NULL) {
 			printf("GEOM_MULTIPATH: cannot create geom %s/%s\n",
 			    md.md_name, md.md_uuid);
 			return (NULL);
 		}
 		isnew = 1;
 	} else {
 		isnew = 0;
 	}
 
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("sc is NULL"));
 	error = g_multipath_add_disk(gp, pp);
 	if (error != 0) {
 		if (isnew)
 			g_multipath_destroy(gp);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_multipath_ctl_add_name(struct gctl_req *req, struct g_class *mp,
     const char *name)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	const char *mpname;
 	static const char devpf[6] = _PATH_DEV;
 	int error;
 
 	g_topology_assert();
 
 	mpname = gctl_get_asciiparam(req, "arg0");
         if (mpname == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, mpname);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", mpname);
 		return;
 	}
 	sc = gp->softc;
 
 	if (strncmp(name, devpf, 5) == 0)
 		name += 5;
 	pp = g_provider_by_name(name);
 	if (pp == NULL) {
 		gctl_error(req, "Provider %s is invalid", name);
 		return;
 	}
 
 	/*
 	 * Check to make sure parameters match.
 	 */
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->provider == pp) {
 			gctl_error(req, "provider %s is already there",
 			    pp->name);
 			return;
 		}
 	}
 	if (sc->sc_pp->mediasize != 0 &&
 	    sc->sc_pp->mediasize + (sc->sc_uuid[0] != 0 ? pp->sectorsize : 0)
 	     != pp->mediasize) {
 		gctl_error(req, "Providers size mismatch %jd != %jd",
 		    (intmax_t) sc->sc_pp->mediasize +
 			(sc->sc_uuid[0] != 0 ? pp->sectorsize : 0),
 		    (intmax_t) pp->mediasize);
 		return;
 	}
 	if (sc->sc_pp->sectorsize != 0 &&
 	    sc->sc_pp->sectorsize != pp->sectorsize) {
 		gctl_error(req, "Providers sectorsize mismatch %u != %u",
 		    sc->sc_pp->sectorsize, pp->sectorsize);
 		return;
 	}
 
 	error = g_multipath_add_disk(gp, pp);
 	if (error != 0)
 		gctl_error(req, "Provider addition error: %d", error);
 }
 
 static void
 g_multipath_ctl_prefer(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_geom *gp;
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	const char *name, *mpname;
 	static const char devpf[6] = _PATH_DEV;
 	int *nargs;
 
 	g_topology_assert();
 
 	mpname = gctl_get_asciiparam(req, "arg0");
         if (mpname == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, mpname);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", mpname);
 		return;
 	}
 	sc = gp->softc;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No 'nargs' argument");
 		return;
 	}
 	if (*nargs != 2) {
 		gctl_error(req, "missing device");
 		return;
 	}
 
 	name = gctl_get_asciiparam(req, "arg1");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg1' argument");
 		return;
 	}
 	if (strncmp(name, devpf, 5) == 0) {
 		name += 5;
 	}
 
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->provider != NULL
                       && strcmp(cp->provider->name, name) == 0)
 		    break;
 	}
 
 	if (cp == NULL) {
 		gctl_error(req, "Provider %s not found", name);
 		return;
 	}
 
 	mtx_lock(&sc->sc_mtx);
 
 	if (cp->index & MP_BAD) {
 		gctl_error(req, "Consumer %s is invalid", name);
 		mtx_unlock(&sc->sc_mtx);
 		return;
 	}
 
 	/* Here when the consumer is present and in good shape */
 
 	sc->sc_active = cp;
 	if (!sc->sc_active_active)
 	    printf("GEOM_MULTIPATH: %s now active path in %s\n",
 		sc->sc_active->provider->name, sc->sc_name);
 
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_multipath_ctl_add(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	const char *mpname, *name;
 
 	mpname = gctl_get_asciiparam(req, "arg0");
         if (mpname == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, mpname);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s not found", mpname);
 		return;
 	}
 	sc = gp->softc;
 
 	name = gctl_get_asciiparam(req, "arg1");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg1' argument");
 		return;
 	}
 	g_multipath_ctl_add_name(req, mp, name);
 }
 
 static void
 g_multipath_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_multipath_metadata md;
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	const char *mpname, *name;
 	char param[16];
 	int *nargs, i, *val;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (*nargs < 2) {
 		gctl_error(req, "wrong number of arguments.");
 		return;
 	}
 
 	mpname = gctl_get_asciiparam(req, "arg0");
         if (mpname == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, mpname);
 	if (gp != NULL) {
 		gctl_error(req, "Device %s already exist", mpname);
 		return;
 	}
 
 	memset(&md, 0, sizeof(md));
 	strlcpy(md.md_magic, G_MULTIPATH_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_MULTIPATH_VERSION;
 	strlcpy(md.md_name, mpname, sizeof(md.md_name));
 	md.md_size = 0;
 	md.md_sectorsize = 0;
 	md.md_uuid[0] = 0;
 	md.md_active_active = 0;
 	val = gctl_get_paraml(req, "active_active", sizeof(*val));
 	if (val != NULL && *val != 0)
 		md.md_active_active = 1;
 	val = gctl_get_paraml(req, "active_read", sizeof(*val));
 	if (val != NULL && *val != 0)
 		md.md_active_active = 2;
 	gp = g_multipath_create(mp, &md);
 	if (gp == NULL) {
 		gctl_error(req, "GEOM_MULTIPATH: cannot create geom %s/%s\n",
 		    md.md_name, md.md_uuid);
 		return;
 	}
 	sc = gp->softc;
 
 	for (i = 1; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		g_multipath_ctl_add_name(req, mp, name);
 	}
 
 	if (sc->sc_ndisks != (*nargs - 1))
 		g_multipath_destroy(gp);
 }
 
 static void
 g_multipath_ctl_configure(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	struct g_multipath_metadata md;
 	const char *name;
 	int error, *val;
 
 	g_topology_assert();
 
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg0' argument");
 		return;
 	}
 	gp = g_multipath_find_geom(mp, name);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", name);
 		return;
 	}
 	sc = gp->softc;
 	val = gctl_get_paraml(req, "active_active", sizeof(*val));
 	if (val != NULL && *val != 0)
 		sc->sc_active_active = 1;
 	val = gctl_get_paraml(req, "active_read", sizeof(*val));
 	if (val != NULL && *val != 0)
 		sc->sc_active_active = 2;
 	val = gctl_get_paraml(req, "active_passive", sizeof(*val));
 	if (val != NULL && *val != 0)
 		sc->sc_active_active = 0;
 	if (sc->sc_uuid[0] != 0 && sc->sc_active != NULL) {
 		cp = sc->sc_active;
 		pp = cp->provider;
 		strlcpy(md.md_magic, G_MULTIPATH_MAGIC, sizeof(md.md_magic));
 		memcpy(md.md_uuid, sc->sc_uuid, sizeof (sc->sc_uuid));
 		strlcpy(md.md_name, name, sizeof(md.md_name));
 		md.md_version = G_MULTIPATH_VERSION;
 		md.md_size = pp->mediasize;
 		md.md_sectorsize = pp->sectorsize;
 		md.md_active_active = sc->sc_active_active;
 		error = g_multipath_write_metadata(cp, &md);
 		if (error != 0)
 			gctl_error(req, "Can't update metadata on %s (%d)",
 			    pp->name, error);
 	}
 }
 
 static void
 g_multipath_ctl_fail(struct gctl_req *req, struct g_class *mp, int fail)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	const char *mpname, *name;
 	int found;
 
 	mpname = gctl_get_asciiparam(req, "arg0");
         if (mpname == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, mpname);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s not found", mpname);
 		return;
 	}
 	sc = gp->softc;
 
 	name = gctl_get_asciiparam(req, "arg1");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg1' argument");
 		return;
 	}
 
 	found = 0;
 	mtx_lock(&sc->sc_mtx);
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->provider != NULL &&
 		    strcmp(cp->provider->name, name) == 0 &&
 		    (cp->index & MP_LOST) == 0) {
 			found = 1;
 			if (!fail == !(cp->index & MP_FAIL))
 				continue;
 			printf("GEOM_MULTIPATH: %s in %s is marked %s.\n",
 				name, sc->sc_name, fail ? "FAIL" : "OK");
 			if (fail) {
 				g_multipath_fault(cp, MP_FAIL);
 				SDT_PROBE3(geom, multipath, config, fail,
 				    sc->sc_name, cp->provider->name, 0);
 			} else {
 				cp->index &= ~MP_FAIL;
 				SDT_PROBE2(geom, multipath, config, restore,
 				    sc->sc_name, cp->provider->name);
 			}
 		}
 	}
 	mtx_unlock(&sc->sc_mtx);
 	if (found == 0)
 		gctl_error(req, "Provider %s not found", name);
 }
 
 static void
 g_multipath_ctl_remove(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp, *cp1;
 	const char *mpname, *name;
 	uintptr_t *cnt;
 	int found;
 
 	mpname = gctl_get_asciiparam(req, "arg0");
         if (mpname == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, mpname);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s not found", mpname);
 		return;
 	}
 	sc = gp->softc;
 
 	name = gctl_get_asciiparam(req, "arg1");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg1' argument");
 		return;
 	}
 
 	found = 0;
 	mtx_lock(&sc->sc_mtx);
 	LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
 		if (cp->provider != NULL &&
 		    strcmp(cp->provider->name, name) == 0 &&
 		    (cp->index & MP_LOST) == 0) {
 			found = 1;
 			printf("GEOM_MULTIPATH: removing %s from %s\n",
 			    cp->provider->name, cp->geom->name);
 			SDT_PROBE2(geom, multipath, config, remove,
 			    cp->geom->name, cp->provider->name);
 			sc->sc_ndisks--;
 			g_multipath_fault(cp, MP_LOST);
 			cnt = (uintptr_t *)&cp->private;
 			if (*cnt == 0 && (cp->index & MP_POSTED) == 0) {
 				cp->index |= MP_POSTED;
 				mtx_unlock(&sc->sc_mtx);
 				g_mpd(cp, 0);
 				if (cp1 == NULL)
 					return;	/* Recursion happened. */
 				mtx_lock(&sc->sc_mtx);
 			}
 		}
 	}
 	mtx_unlock(&sc->sc_mtx);
 	if (found == 0)
 		gctl_error(req, "Provider %s not found", name);
 }
 
 static struct g_geom *
 g_multipath_find_geom(struct g_class *mp, const char *name)
 {
 	struct g_geom *gp;
 	struct g_multipath_softc *sc;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL || sc->sc_stopping)
 			continue;
 		if (strcmp(gp->name, name) == 0)
 			return (gp);
 	}
 	return (NULL);
 }
 
 static void
 g_multipath_ctl_stop(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_geom *gp;
 	const char *name;
 	int error;
 
 	g_topology_assert();
 
 	name = gctl_get_asciiparam(req, "arg0");
         if (name == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, name);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", name);
 		return;
 	}
 	error = g_multipath_destroy(gp);
 	if (error != 0 && error != EINPROGRESS)
 		gctl_error(req, "failed to stop %s (err=%d)", name, error);
 }
 
 static void
 g_multipath_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_geom *gp;
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	const char *name;
 	uint8_t *buf;
 	int error;
 
 	g_topology_assert();
 
 	name = gctl_get_asciiparam(req, "arg0");
         if (name == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, name);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", name);
 		return;
 	}
 	sc = gp->softc;
 
 	if (sc->sc_uuid[0] != 0 && sc->sc_active != NULL) {
 		cp = sc->sc_active;
 		pp = cp->provider;
 		error = g_access(cp, 1, 1, 1);
 		if (error != 0) {
 			gctl_error(req, "Can't open %s (%d)", pp->name, error);
 			goto destroy;
 		}
 		g_topology_unlock();
 		buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO);
 		error = g_write_data(cp, pp->mediasize - pp->sectorsize,
 		    buf, pp->sectorsize);
 		g_topology_lock();
 		g_access(cp, -1, -1, -1);
 		if (error != 0)
 			gctl_error(req, "Can't erase metadata on %s (%d)",
 			    pp->name, error);
 	}
 
 destroy:
 	error = g_multipath_destroy(gp);
 	if (error != 0 && error != EINPROGRESS)
 		gctl_error(req, "failed to destroy %s (err=%d)", name, error);
 }
 
 static void
 g_multipath_ctl_rotate(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_geom *gp;
 	const char *name;
 	int error;
 
 	g_topology_assert();
 
 	name = gctl_get_asciiparam(req, "arg0");
         if (name == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, name);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", name);
 		return;
 	}
 	error = g_multipath_rotate(gp);
 	if (error != 0) {
 		gctl_error(req, "failed to rotate %s (err=%d)", name, error);
 	}
 }
 
 static void
 g_multipath_ctl_getactive(struct gctl_req *req, struct g_class *mp)
 {
 	struct sbuf *sb;
 	struct g_geom *gp;
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	const char *name;
 	int empty;
 
 	sb = sbuf_new_auto();
 
 	g_topology_assert();
 	name = gctl_get_asciiparam(req, "arg0");
         if (name == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, name);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", name);
 		return;
 	}
 	sc = gp->softc;
 	if (sc->sc_active_active == 1) {
 		empty = 1;
 		LIST_FOREACH(cp, &gp->consumer, consumer) {
 			if (cp->index & MP_BAD)
 				continue;
 			if (!empty)
 				sbuf_cat(sb, " ");
 			sbuf_cat(sb, cp->provider->name);
 			empty = 0;
 		}
 		if (empty)
 			sbuf_cat(sb, "none");
 		sbuf_cat(sb, "\n");
 	} else if (sc->sc_active && sc->sc_active->provider) {
 		sbuf_printf(sb, "%s\n", sc->sc_active->provider->name);
 	} else {
 		sbuf_cat(sb, "none\n");
 	}
 	sbuf_finish(sb);
 	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
 	sbuf_delete(sb);
 }
 
 static void
 g_multipath_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 	g_topology_assert();
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No 'version' argument");
 	} else if (*version != G_MULTIPATH_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync");
 	} else if (strcmp(verb, "add") == 0) {
 		g_multipath_ctl_add(req, mp);
 	} else if (strcmp(verb, "prefer") == 0) {
 		g_multipath_ctl_prefer(req, mp);
 	} else if (strcmp(verb, "create") == 0) {
 		g_multipath_ctl_create(req, mp);
 	} else if (strcmp(verb, "configure") == 0) {
 		g_multipath_ctl_configure(req, mp);
 	} else if (strcmp(verb, "stop") == 0) {
 		g_multipath_ctl_stop(req, mp);
 	} else if (strcmp(verb, "destroy") == 0) {
 		g_multipath_ctl_destroy(req, mp);
 	} else if (strcmp(verb, "fail") == 0) {
 		g_multipath_ctl_fail(req, mp, 1);
 	} else if (strcmp(verb, "restore") == 0) {
 		g_multipath_ctl_fail(req, mp, 0);
 	} else if (strcmp(verb, "remove") == 0) {
 		g_multipath_ctl_remove(req, mp);
 	} else if (strcmp(verb, "rotate") == 0) {
 		g_multipath_ctl_rotate(req, mp);
 	} else if (strcmp(verb, "getactive") == 0) {
 		g_multipath_ctl_getactive(req, mp);
 	} else {
 		gctl_error(req, "Unknown verb %s", verb);
 	}
 }
 
 static void
 g_multipath_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_multipath_softc *sc;
 	int good;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (cp != NULL) {
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    (cp->index & MP_NEW) ? "NEW" :
 		    (cp->index & MP_LOST) ? "LOST" :
 		    (cp->index & MP_FAIL) ? "FAIL" :
 		    (sc->sc_active_active == 1 || sc->sc_active == cp) ?
 		     "ACTIVE" :
 		     sc->sc_active_active == 2 ? "READ" : "PASSIVE");
 	} else {
 		good = g_multipath_good(gp);
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    good == 0 ? "BROKEN" :
 		    (good != sc->sc_ndisks || sc->sc_ndisks == 1) ?
 		    "DEGRADED" : "OPTIMAL");
 	}
 	if (cp == NULL && pp == NULL) {
 		sbuf_printf(sb, "%s<UUID>%s</UUID>\n", indent, sc->sc_uuid);
 		sbuf_printf(sb, "%s<Mode>Active/%s</Mode>\n", indent,
 		    sc->sc_active_active == 2 ? "Read" :
 		    sc->sc_active_active == 1 ? "Active" : "Passive");
 		sbuf_printf(sb, "%s<Type>%s</Type>\n", indent,
 		    sc->sc_uuid[0] == 0 ? "MANUAL" : "AUTOMATIC");
 	}
 }
 
 DECLARE_GEOM_CLASS(g_multipath_class, g_multipath);
 MODULE_VERSION(geom_multipath, 0);
diff --git a/sys/geom/raid3/g_raid3.c b/sys/geom/raid3/g_raid3.c
index 159eff990892..27925b5e49f3 100644
--- a/sys/geom/raid3/g_raid3.c
+++ b/sys/geom/raid3/g_raid3.c
@@ -1,3584 +1,3585 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/eventhandler.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 #include <geom/geom_dbg.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/sched.h>
 #include <geom/raid3/g_raid3.h>
 
 FEATURE(geom_raid3, "GEOM RAID-3 functionality");
 
 static MALLOC_DEFINE(M_RAID3, "raid3_data", "GEOM_RAID3 Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, raid3, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_RAID3 stuff");
 u_int g_raid3_debug = 0;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, debug, CTLFLAG_RWTUN, &g_raid3_debug, 0,
     "Debug level");
 static u_int g_raid3_timeout = 4;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_raid3_timeout,
     0, "Time to wait on all raid3 components");
 static u_int g_raid3_idletime = 5;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, idletime, CTLFLAG_RWTUN,
     &g_raid3_idletime, 0, "Mark components as clean when idling");
 static u_int g_raid3_disconnect_on_failure = 1;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
     &g_raid3_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
 static u_int g_raid3_syncreqs = 2;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
     &g_raid3_syncreqs, 0, "Parallel synchronization I/O requests.");
 static u_int g_raid3_use_malloc = 0;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, use_malloc, CTLFLAG_RDTUN,
     &g_raid3_use_malloc, 0, "Use malloc(9) instead of uma(9).");
 
 static u_int g_raid3_n64k = 50;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n64k, CTLFLAG_RDTUN, &g_raid3_n64k, 0,
     "Maximum number of 64kB allocations");
 static u_int g_raid3_n16k = 200;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n16k, CTLFLAG_RDTUN, &g_raid3_n16k, 0,
     "Maximum number of 16kB allocations");
 static u_int g_raid3_n4k = 1200;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n4k, CTLFLAG_RDTUN, &g_raid3_n4k, 0,
     "Maximum number of 4kB allocations");
 
 static SYSCTL_NODE(_kern_geom_raid3, OID_AUTO, stat,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_RAID3 statistics");
 static u_int g_raid3_parity_mismatch = 0;
 SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, parity_mismatch, CTLFLAG_RD,
     &g_raid3_parity_mismatch, 0, "Number of failures in VERIFY mode");
 
 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
 	G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
 	G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
 } while (0)
 
 static eventhandler_tag g_raid3_post_sync = NULL;
 static int g_raid3_shutdown = 0;
 
 static int g_raid3_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static g_taste_t g_raid3_taste;
 static void g_raid3_init(struct g_class *mp);
 static void g_raid3_fini(struct g_class *mp);
 
 struct g_class g_raid3_class = {
 	.name = G_RAID3_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_raid3_config,
 	.taste = g_raid3_taste,
 	.destroy_geom = g_raid3_destroy_geom,
 	.init = g_raid3_init,
 	.fini = g_raid3_fini
 };
 
 static void g_raid3_destroy_provider(struct g_raid3_softc *sc);
 static int g_raid3_update_disk(struct g_raid3_disk *disk, u_int state);
 static void g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force);
 static void g_raid3_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 static void g_raid3_sync_stop(struct g_raid3_softc *sc, int type);
 static int g_raid3_register_request(struct bio *pbp);
 static void g_raid3_sync_release(struct g_raid3_softc *sc);
 
 static const char *
 g_raid3_disk_state2str(int state)
 {
 
 	switch (state) {
 	case G_RAID3_DISK_STATE_NODISK:
 		return ("NODISK");
 	case G_RAID3_DISK_STATE_NONE:
 		return ("NONE");
 	case G_RAID3_DISK_STATE_NEW:
 		return ("NEW");
 	case G_RAID3_DISK_STATE_ACTIVE:
 		return ("ACTIVE");
 	case G_RAID3_DISK_STATE_STALE:
 		return ("STALE");
 	case G_RAID3_DISK_STATE_SYNCHRONIZING:
 		return ("SYNCHRONIZING");
 	case G_RAID3_DISK_STATE_DISCONNECTED:
 		return ("DISCONNECTED");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_raid3_device_state2str(int state)
 {
 
 	switch (state) {
 	case G_RAID3_DEVICE_STATE_STARTING:
 		return ("STARTING");
 	case G_RAID3_DEVICE_STATE_DEGRADED:
 		return ("DEGRADED");
 	case G_RAID3_DEVICE_STATE_COMPLETE:
 		return ("COMPLETE");
 	default:
 		return ("INVALID");
 	}
 }
 
 const char *
 g_raid3_get_diskname(struct g_raid3_disk *disk)
 {
 
 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
 		return ("[unknown]");
 	return (disk->d_name);
 }
 
 static void *
 g_raid3_alloc(struct g_raid3_softc *sc, size_t size, int flags)
 {
 	void *ptr;
 	enum g_raid3_zones zone;
 
 	if (g_raid3_use_malloc ||
 	    (zone = g_raid3_zone(size)) == G_RAID3_NUM_ZONES)
 		ptr = malloc(size, M_RAID3, flags);
 	else {
 		ptr = uma_zalloc_arg(sc->sc_zones[zone].sz_zone,
 		   &sc->sc_zones[zone], flags);
 		sc->sc_zones[zone].sz_requested++;
 		if (ptr == NULL)
 			sc->sc_zones[zone].sz_failed++;
 	}
 	return (ptr);
 }
 
 static void
 g_raid3_free(struct g_raid3_softc *sc, void *ptr, size_t size)
 {
 	enum g_raid3_zones zone;
 
 	if (g_raid3_use_malloc ||
 	    (zone = g_raid3_zone(size)) == G_RAID3_NUM_ZONES)
 		free(ptr, M_RAID3);
 	else {
 		uma_zfree_arg(sc->sc_zones[zone].sz_zone,
 		    ptr, &sc->sc_zones[zone]);
 	}
 }
 
 static int
 g_raid3_uma_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct g_raid3_zone *sz = arg;
 
 	if (sz->sz_max > 0 && sz->sz_inuse == sz->sz_max)
 		return (ENOMEM);
 	sz->sz_inuse++;
 	return (0);
 }
 
 static void
 g_raid3_uma_dtor(void *mem, int size, void *arg)
 {
 	struct g_raid3_zone *sz = arg;
 
 	sz->sz_inuse--;
 }
 
 #define	g_raid3_xor(src, dst, size)					\
 	_g_raid3_xor((uint64_t *)(src),					\
 	    (uint64_t *)(dst), (size_t)size)
 static void
 _g_raid3_xor(uint64_t *src, uint64_t *dst, size_t size)
 {
 
 	KASSERT((size % 128) == 0, ("Invalid size: %zu.", size));
 	for (; size > 0; size -= 128) {
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 	}
 }
 
 static int
 g_raid3_is_zero(struct bio *bp)
 {
 	static const uint64_t zeros[] = {
 	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 	};
 	u_char *addr;
 	ssize_t size;
 
 	size = bp->bio_length;
 	addr = (u_char *)bp->bio_data;
 	for (; size > 0; size -= sizeof(zeros), addr += sizeof(zeros)) {
 		if (bcmp(addr, zeros, sizeof(zeros)) != 0)
 			return (0);
 	}
 	return (1);
 }
 
 /*
  * --- Events handling functions ---
  * Events in geom_raid3 are used to maintain disks and device status
  * from one thread to simplify locking.
  */
 static void
 g_raid3_event_free(struct g_raid3_event *ep)
 {
 
 	free(ep, M_RAID3);
 }
 
 int
 g_raid3_event_send(void *arg, int state, int flags)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct g_raid3_event *ep;
 	int error;
 
 	ep = malloc(sizeof(*ep), M_RAID3, M_WAITOK);
 	G_RAID3_DEBUG(4, "%s: Sending event %p.", __func__, ep);
 	if ((flags & G_RAID3_EVENT_DEVICE) != 0) {
 		disk = NULL;
 		sc = arg;
 	} else {
 		disk = arg;
 		sc = disk->d_softc;
 	}
 	ep->e_disk = disk;
 	ep->e_state = state;
 	ep->e_flags = flags;
 	ep->e_error = 0;
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 	G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	if ((flags & G_RAID3_EVENT_DONTWAIT) != 0)
 		return (0);
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
 	sx_xunlock(&sc->sc_lock);
 	while ((ep->e_flags & G_RAID3_EVENT_DONE) == 0) {
 		mtx_lock(&sc->sc_events_mtx);
 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "r3:event",
 		    hz * 5);
 	}
 	error = ep->e_error;
 	g_raid3_event_free(ep);
 	sx_xlock(&sc->sc_lock);
 	return (error);
 }
 
 static struct g_raid3_event *
 g_raid3_event_get(struct g_raid3_softc *sc)
 {
 	struct g_raid3_event *ep;
 
 	mtx_lock(&sc->sc_events_mtx);
 	ep = TAILQ_FIRST(&sc->sc_events);
 	mtx_unlock(&sc->sc_events_mtx);
 	return (ep);
 }
 
 static void
 g_raid3_event_remove(struct g_raid3_softc *sc, struct g_raid3_event *ep)
 {
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 static void
 g_raid3_event_cancel(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_event *ep, *tmpep;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
 		if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0)
 			continue;
 		if (ep->e_disk != disk)
 			continue;
 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 		if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0)
 			g_raid3_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			wakeup(ep);
 		}
 	}
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 /*
  * Return the number of disks in the given state.
  * If state is equal to -1, count all connected disks.
  */
 u_int
 g_raid3_ndisks(struct g_raid3_softc *sc, int state)
 {
 	struct g_raid3_disk *disk;
 	u_int n, ndisks;
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	for (n = ndisks = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 			continue;
 		if (state == -1 || disk->d_state == state)
 			ndisks++;
 	}
 	return (ndisks);
 }
 
 static u_int
 g_raid3_nrequests(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 	struct bio *bp;
 	u_int nreqs = 0;
 
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 		if (bp->bio_from == cp)
 			nreqs++;
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	return (nreqs);
 }
 
 static int
 g_raid3_is_busy(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 
 	if (cp->index > 0) {
 		G_RAID3_DEBUG(2,
 		    "I/O requests for %s exist, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	if (g_raid3_nrequests(sc, cp) > 0) {
 		G_RAID3_DEBUG(2,
 		    "I/O requests for %s in queue, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	return (0);
 }
 
 static void
 g_raid3_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	cp = arg;
 	G_RAID3_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_raid3_kill_consumer(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	int retaste_wait;
 
 	g_topology_assert();
 
 	cp->private = NULL;
 	if (g_raid3_is_busy(sc, cp))
 		return;
 	G_RAID3_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
 	pp = cp->provider;
 	retaste_wait = 0;
 	if (cp->acw == 1) {
 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
 			retaste_wait = 1;
 	}
 	G_RAID3_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
 	    -cp->acw, -cp->ace, 0);
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	if (retaste_wait) {
 		/*
 		 * After retaste event was send (inside g_access()), we can send
 		 * event to detach and destroy consumer.
 		 * A class, which has consumer to the given provider connected
 		 * will not receive retaste event for the provider.
 		 * This is the way how I ignore retaste events when I close
 		 * consumers opened for write: I detach and destroy consumer
 		 * after retaste event is sent.
 		 */
 		g_post_event(g_raid3_destroy_consumer, cp, M_WAITOK, NULL);
 		return;
 	}
 	G_RAID3_DEBUG(1, "Consumer %s destroyed.", pp->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static int
 g_raid3_connect_disk(struct g_raid3_disk *disk, struct g_provider *pp)
 {
 	struct g_consumer *cp;
 	int error;
 
 	g_topology_assert_not();
 	KASSERT(disk->d_consumer == NULL,
 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
 
 	g_topology_lock();
 	cp = g_new_consumer(disk->d_softc->sc_geom);
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		return (error);
 	}
 	error = g_access(cp, 1, 1, 1);
 		g_topology_unlock();
 	if (error != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		G_RAID3_DEBUG(0, "Cannot open consumer %s (error=%d).",
 		    pp->name, error);
 		return (error);
 	}
 	disk->d_consumer = cp;
 	disk->d_consumer->private = disk;
 	disk->d_consumer->index = 0;
 	G_RAID3_DEBUG(2, "Disk %s connected.", g_raid3_get_diskname(disk));
 	return (0);
 }
 
 static void
 g_raid3_disconnect_consumer(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 
 	g_topology_assert();
 
 	if (cp == NULL)
 		return;
 	if (cp->provider != NULL)
 		g_raid3_kill_consumer(sc, cp);
 	else
 		g_destroy_consumer(cp);
 }
 
 /*
  * Initialize disk. This means allocate memory, create consumer, attach it
  * to the provider and open access (r1w1e1) to it.
  */
 static struct g_raid3_disk *
 g_raid3_init_disk(struct g_raid3_softc *sc, struct g_provider *pp,
     struct g_raid3_metadata *md, int *errorp)
 {
 	struct g_raid3_disk *disk;
 	int error;
 
 	disk = &sc->sc_disks[md->md_no];
 	error = g_raid3_connect_disk(disk, pp);
 	if (error != 0) {
 		if (errorp != NULL)
 			*errorp = error;
 		return (NULL);
 	}
 	disk->d_state = G_RAID3_DISK_STATE_NONE;
 	disk->d_flags = md->md_dflags;
 	if (md->md_provider[0] != '\0')
 		disk->d_flags |= G_RAID3_DISK_FLAG_HARDCODED;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_sync.ds_offset = md->md_sync_offset;
 	disk->d_sync.ds_offset_done = md->md_sync_offset;
 	disk->d_genid = md->md_genid;
 	disk->d_sync.ds_syncid = md->md_syncid;
 	if (errorp != NULL)
 		*errorp = 0;
 	return (disk);
 }
 
 static void
 g_raid3_destroy_disk(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 		return;
 	g_raid3_event_cancel(disk);
 	switch (disk->d_state) {
 	case G_RAID3_DISK_STATE_SYNCHRONIZING:
 		if (sc->sc_syncdisk != NULL)
 			g_raid3_sync_stop(sc, 1);
 		/* FALLTHROUGH */
 	case G_RAID3_DISK_STATE_NEW:
 	case G_RAID3_DISK_STATE_STALE:
 	case G_RAID3_DISK_STATE_ACTIVE:
 		g_topology_lock();
 		g_raid3_disconnect_consumer(sc, disk->d_consumer);
 		g_topology_unlock();
 		disk->d_consumer = NULL;
 		break;
 	default:
 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 	}
 	disk->d_state = G_RAID3_DISK_STATE_NODISK;
 }
 
 static void
 g_raid3_destroy_device(struct g_raid3_softc *sc)
 {
 	struct g_raid3_event *ep;
 	struct g_raid3_disk *disk;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	u_int n;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	gp = sc->sc_geom;
 	if (sc->sc_provider != NULL)
 		g_raid3_destroy_provider(sc);
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state != G_RAID3_DISK_STATE_NODISK) {
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 			g_raid3_update_metadata(disk);
 			g_raid3_destroy_disk(disk);
 		}
 	}
 	while ((ep = g_raid3_event_get(sc)) != NULL) {
 		g_raid3_event_remove(sc, ep);
 		if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0)
 			g_raid3_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			ep->e_flags |= G_RAID3_EVENT_DONE;
 			G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, ep);
 			mtx_lock(&sc->sc_events_mtx);
 			wakeup(ep);
 			mtx_unlock(&sc->sc_events_mtx);
 		}
 	}
 	callout_drain(&sc->sc_callout);
 	cp = LIST_FIRST(&sc->sc_sync.ds_geom->consumer);
 	g_topology_lock();
 	if (cp != NULL)
 		g_raid3_disconnect_consumer(sc, cp);
 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
 	G_RAID3_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	g_topology_unlock();
 	if (!g_raid3_use_malloc) {
 		uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone);
 		uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone);
 		uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone);
 	}
 	mtx_destroy(&sc->sc_queue_mtx);
 	mtx_destroy(&sc->sc_events_mtx);
 	sx_xunlock(&sc->sc_lock);
 	sx_destroy(&sc->sc_lock);
 }
 
 static void
 g_raid3_orphan(struct g_consumer *cp)
 {
 	struct g_raid3_disk *disk;
 
 	g_topology_assert();
 
 	disk = cp->private;
 	if (disk == NULL)
 		return;
 	disk->d_softc->sc_bump_id = G_RAID3_BUMP_SYNCID;
 	g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED,
 	    G_RAID3_EVENT_DONTWAIT);
 }
 
 static int
 g_raid3_write_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md)
 {
 	struct g_raid3_softc *sc;
 	struct g_consumer *cp;
 	off_t offset, length;
 	u_char *sector;
 	int error = 0;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	cp = disk->d_consumer;
 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	length = cp->provider->sectorsize;
 	offset = cp->provider->mediasize - length;
 	sector = malloc((size_t)length, M_RAID3, M_WAITOK | M_ZERO);
 	if (md != NULL)
 		raid3_metadata_encode(md, sector);
 	error = g_write_data(cp, offset, sector, length);
 	free(sector, M_RAID3);
 	if (error != 0) {
 		if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) {
 			G_RAID3_DEBUG(0, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_raid3_get_diskname(disk), sc->sc_name, error);
 			disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN;
 		} else {
 			G_RAID3_DEBUG(1, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_raid3_get_diskname(disk), sc->sc_name, error);
 		}
 		if (g_raid3_disconnect_on_failure &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 			g_raid3_event_send(disk,
 			    G_RAID3_DISK_STATE_DISCONNECTED,
 			    G_RAID3_EVENT_DONTWAIT);
 		}
 	}
 	return (error);
 }
 
 int
 g_raid3_clear_metadata(struct g_raid3_disk *disk)
 {
 	int error;
 
 	g_topology_assert_not();
 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
 
 	error = g_raid3_write_metadata(disk, NULL);
 	if (error == 0) {
 		G_RAID3_DEBUG(2, "Metadata on %s cleared.",
 		    g_raid3_get_diskname(disk));
 	} else {
 		G_RAID3_DEBUG(0,
 		    "Cannot clear metadata on disk %s (error=%d).",
 		    g_raid3_get_diskname(disk), error);
 	}
 	return (error);
 }
 
 void
 g_raid3_fill_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md)
 {
 	struct g_raid3_softc *sc;
 	struct g_provider *pp;
 
 	bzero(md, sizeof(*md));
 	sc = disk->d_softc;
 	strlcpy(md->md_magic, G_RAID3_MAGIC, sizeof(md->md_magic));
 	md->md_version = G_RAID3_VERSION;
 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
 	md->md_id = sc->sc_id;
 	md->md_all = sc->sc_ndisks;
 	md->md_genid = sc->sc_genid;
 	md->md_mediasize = sc->sc_mediasize;
 	md->md_sectorsize = sc->sc_sectorsize;
 	md->md_mflags = (sc->sc_flags & G_RAID3_DEVICE_FLAG_MASK);
 	md->md_no = disk->d_no;
 	md->md_syncid = disk->d_sync.ds_syncid;
 	md->md_dflags = (disk->d_flags & G_RAID3_DISK_FLAG_MASK);
 	if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 		md->md_sync_offset =
 		    disk->d_sync.ds_offset_done / (sc->sc_ndisks - 1);
 	}
 	if (disk->d_consumer != NULL && disk->d_consumer->provider != NULL)
 		pp = disk->d_consumer->provider;
 	else
 		pp = NULL;
 	if ((disk->d_flags & G_RAID3_DISK_FLAG_HARDCODED) != 0 && pp != NULL)
 		strlcpy(md->md_provider, pp->name, sizeof(md->md_provider));
 	if (pp != NULL)
 		md->md_provsize = pp->mediasize;
 }
 
 void
 g_raid3_update_metadata(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_metadata md;
 	int error;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	g_raid3_fill_metadata(disk, &md);
 	error = g_raid3_write_metadata(disk, &md);
 	if (error == 0) {
 		G_RAID3_DEBUG(2, "Metadata on %s updated.",
 		    g_raid3_get_diskname(disk));
 	} else {
 		G_RAID3_DEBUG(0,
 		    "Cannot update metadata on disk %s (error=%d).",
 		    g_raid3_get_diskname(disk), error);
 	}
 }
 
 static void
 g_raid3_bump_syncid(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	u_int n;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_syncid++;
 	G_RAID3_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
 	    sc->sc_syncid);
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			disk->d_sync.ds_syncid = sc->sc_syncid;
 			g_raid3_update_metadata(disk);
 		}
 	}
 }
 
 static void
 g_raid3_bump_genid(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	u_int n;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_genid++;
 	G_RAID3_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
 	    sc->sc_genid);
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			disk->d_genid = sc->sc_genid;
 			g_raid3_update_metadata(disk);
 		}
 	}
 }
 
 static int
 g_raid3_idle(struct g_raid3_softc *sc, int acw)
 {
 	struct g_raid3_disk *disk;
 	u_int i;
 	int timeout;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (sc->sc_provider == NULL)
 		return (0);
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return (0);
 	if (sc->sc_idle)
 		return (0);
 	if (sc->sc_writes > 0)
 		return (0);
 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
 		timeout = g_raid3_idletime - (time_uptime - sc->sc_last_write);
 		if (!g_raid3_shutdown && timeout > 0)
 			return (timeout);
 	}
 	sc->sc_idle = 1;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
 			continue;
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		g_raid3_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_raid3_unidle(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	u_int i;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	sc->sc_idle = 0;
 	sc->sc_last_write = time_uptime;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
 			continue;
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 		g_raid3_update_metadata(disk);
 	}
 }
 
 /*
  * Treat bio_driver1 field in parent bio as list head and field bio_caller1
  * in child bio as pointer to the next element on the list.
  */
 #define	G_RAID3_HEAD_BIO(pbp)	(pbp)->bio_driver1
 
 #define	G_RAID3_NEXT_BIO(cbp)	(cbp)->bio_caller1
 
 #define	G_RAID3_FOREACH_BIO(pbp, bp)					\
 	for ((bp) = G_RAID3_HEAD_BIO(pbp); (bp) != NULL;		\
 	    (bp) = G_RAID3_NEXT_BIO(bp))
 
 #define	G_RAID3_FOREACH_SAFE_BIO(pbp, bp, tmpbp)			\
 	for ((bp) = G_RAID3_HEAD_BIO(pbp);				\
 	    (bp) != NULL && ((tmpbp) = G_RAID3_NEXT_BIO(bp), 1);	\
 	    (bp) = (tmpbp))
 
 static void
 g_raid3_init_bio(struct bio *pbp)
 {
 
 	G_RAID3_HEAD_BIO(pbp) = NULL;
 }
 
 static void
 g_raid3_remove_bio(struct bio *cbp)
 {
 	struct bio *pbp, *bp;
 
 	pbp = cbp->bio_parent;
 	if (G_RAID3_HEAD_BIO(pbp) == cbp)
 		G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp);
 	else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == cbp) {
 				G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp);
 				break;
 			}
 		}
 	}
 	G_RAID3_NEXT_BIO(cbp) = NULL;
 }
 
 static void
 g_raid3_replace_bio(struct bio *sbp, struct bio *dbp)
 {
 	struct bio *pbp, *bp;
 
 	g_raid3_remove_bio(sbp);
 	pbp = dbp->bio_parent;
 	G_RAID3_NEXT_BIO(sbp) = G_RAID3_NEXT_BIO(dbp);
 	if (G_RAID3_HEAD_BIO(pbp) == dbp)
 		G_RAID3_HEAD_BIO(pbp) = sbp;
 	else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == dbp) {
 				G_RAID3_NEXT_BIO(bp) = sbp;
 				break;
 			}
 		}
 	}
 	G_RAID3_NEXT_BIO(dbp) = NULL;
 }
 
 static void
 g_raid3_destroy_bio(struct g_raid3_softc *sc, struct bio *cbp)
 {
 	struct bio *bp, *pbp;
 	size_t size;
 
 	pbp = cbp->bio_parent;
 	pbp->bio_children--;
 	KASSERT(cbp->bio_data != NULL, ("NULL bio_data"));
 	size = pbp->bio_length / (sc->sc_ndisks - 1);
 	g_raid3_free(sc, cbp->bio_data, size);
 	if (G_RAID3_HEAD_BIO(pbp) == cbp) {
 		G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp);
 		G_RAID3_NEXT_BIO(cbp) = NULL;
 		g_destroy_bio(cbp);
 	} else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == cbp)
 				break;
 		}
 		if (bp != NULL) {
 			KASSERT(G_RAID3_NEXT_BIO(bp) != NULL,
 			    ("NULL bp->bio_driver1"));
 			G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp);
 			G_RAID3_NEXT_BIO(cbp) = NULL;
 		}
 		g_destroy_bio(cbp);
 	}
 }
 
 static struct bio *
 g_raid3_clone_bio(struct g_raid3_softc *sc, struct bio *pbp)
 {
 	struct bio *bp, *cbp;
 	size_t size;
 	int memflag;
 
 	cbp = g_clone_bio(pbp);
 	if (cbp == NULL)
 		return (NULL);
 	size = pbp->bio_length / (sc->sc_ndisks - 1);
 	if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0)
 		memflag = M_WAITOK;
 	else
 		memflag = M_NOWAIT;
 	cbp->bio_data = g_raid3_alloc(sc, size, memflag);
 	if (cbp->bio_data == NULL) {
 		pbp->bio_children--;
 		g_destroy_bio(cbp);
 		return (NULL);
 	}
 	G_RAID3_NEXT_BIO(cbp) = NULL;
 	if (G_RAID3_HEAD_BIO(pbp) == NULL)
 		G_RAID3_HEAD_BIO(pbp) = cbp;
 	else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == NULL) {
 				G_RAID3_NEXT_BIO(bp) = cbp;
 				break;
 			}
 		}
 	}
 	return (cbp);
 }
 
 static void
 g_raid3_scatter(struct bio *pbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct bio *bp, *cbp, *tmpbp;
 	off_t atom, cadd, padd, left;
 	int first;
 
 	sc = pbp->bio_to->geom->softc;
 	bp = NULL;
 	if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) {
 		/*
 		 * Find bio for which we should calculate data.
 		 */
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) {
 				bp = cbp;
 				break;
 			}
 		}
 		KASSERT(bp != NULL, ("NULL parity bio."));
 	}
 	atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
 	cadd = padd = 0;
 	for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) {
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			if (cbp == bp)
 				continue;
 			bcopy(pbp->bio_data + padd, cbp->bio_data + cadd, atom);
 			padd += atom;
 		}
 		cadd += atom;
 	}
 	if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) {
 		/*
 		 * Calculate parity.
 		 */
 		first = 1;
 		G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) {
 			if (cbp == bp)
 				continue;
 			if (first) {
 				bcopy(cbp->bio_data, bp->bio_data,
 				    bp->bio_length);
 				first = 0;
 			} else {
 				g_raid3_xor(cbp->bio_data, bp->bio_data,
 				    bp->bio_length);
 			}
 			if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_NODISK) != 0)
 				g_raid3_destroy_bio(sc, cbp);
 		}
 	}
 	G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) {
 		struct g_consumer *cp;
 
 		disk = cbp->bio_caller2;
 		cp = disk->d_consumer;
 		cbp->bio_to = cp->provider;
 		G_RAID3_LOGREQ(3, cbp, "Sending request.");
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		sc->sc_writes++;
 		g_io_request(cbp, cp);
 	}
 }
 
 static void
 g_raid3_gather(struct bio *pbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct bio *xbp, *fbp, *cbp;
 	off_t atom, cadd, padd, left;
 
 	sc = pbp->bio_to->geom->softc;
 	/*
 	 * Find bio for which we have to calculate data.
 	 * While going through this path, check if all requests
 	 * succeeded, if not, deny whole request.
 	 * If we're in COMPLETE mode, we allow one request to fail,
 	 * so if we find one, we're sending it to the parity consumer.
 	 * If there are more failed requests, we deny whole request.
 	 */
 	xbp = fbp = NULL;
 	G_RAID3_FOREACH_BIO(pbp, cbp) {
 		if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) {
 			KASSERT(xbp == NULL, ("More than one parity bio."));
 			xbp = cbp;
 		}
 		if (cbp->bio_error == 0)
 			continue;
 		/*
 		 * Found failed request.
 		 */
 		if (fbp == NULL) {
 			if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) {
 				/*
 				 * We are already in degraded mode, so we can't
 				 * accept any failures.
 				 */
 				if (pbp->bio_error == 0)
 					pbp->bio_error = cbp->bio_error;
 			} else {
 				fbp = cbp;
 			}
 		} else {
 			/*
 			 * Next failed request, that's too many.
 			 */
 			if (pbp->bio_error == 0)
 				pbp->bio_error = fbp->bio_error;
 		}
 		disk = cbp->bio_caller2;
 		if (disk == NULL)
 			continue;
 		if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) {
 			disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN;
 			G_RAID3_LOGREQ(0, cbp, "Request failed (error=%d).",
 			    cbp->bio_error);
 		} else {
 			G_RAID3_LOGREQ(1, cbp, "Request failed (error=%d).",
 			    cbp->bio_error);
 		}
 		if (g_raid3_disconnect_on_failure &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 			g_raid3_event_send(disk,
 			    G_RAID3_DISK_STATE_DISCONNECTED,
 			    G_RAID3_EVENT_DONTWAIT);
 		}
 	}
 	if (pbp->bio_error != 0)
 		goto finish;
 	if (fbp != NULL && (pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) {
 		pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_VERIFY;
 		if (xbp != fbp)
 			g_raid3_replace_bio(xbp, fbp);
 		g_raid3_destroy_bio(sc, fbp);
 	} else if (fbp != NULL) {
 		struct g_consumer *cp;
 
 		/*
 		 * One request failed, so send the same request to
 		 * the parity consumer.
 		 */
 		disk = pbp->bio_driver2;
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) {
 			pbp->bio_error = fbp->bio_error;
 			goto finish;
 		}
 		pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
 		pbp->bio_inbed--;
 		fbp->bio_flags &= ~(BIO_DONE | BIO_ERROR);
 		if (disk->d_no == sc->sc_ndisks - 1)
 			fbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 		fbp->bio_error = 0;
 		fbp->bio_completed = 0;
 		fbp->bio_children = 0;
 		fbp->bio_inbed = 0;
 		cp = disk->d_consumer;
 		fbp->bio_caller2 = disk;
 		fbp->bio_to = cp->provider;
 		G_RAID3_LOGREQ(3, fbp, "Sending request (recover).");
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(fbp, cp);
 		return;
 	}
 	if (xbp != NULL) {
 		/*
 		 * Calculate parity.
 		 */
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0)
 				continue;
 			g_raid3_xor(cbp->bio_data, xbp->bio_data,
 			    xbp->bio_length);
 		}
 		xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY;
 		if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) {
 			if (!g_raid3_is_zero(xbp)) {
 				g_raid3_parity_mismatch++;
 				pbp->bio_error = EIO;
 				goto finish;
 			}
 			g_raid3_destroy_bio(sc, xbp);
 		}
 	}
 	atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
 	cadd = padd = 0;
 	for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) {
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			bcopy(cbp->bio_data + cadd, pbp->bio_data + padd, atom);
 			pbp->bio_completed += atom;
 			padd += atom;
 		}
 		cadd += atom;
 	}
 finish:
 	if (pbp->bio_error == 0)
 		G_RAID3_LOGREQ(3, pbp, "Request finished.");
 	else {
 		if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0)
 			G_RAID3_LOGREQ(1, pbp, "Verification error.");
 		else
 			G_RAID3_LOGREQ(0, pbp, "Request failed.");
 	}
 	pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_MASK;
 	while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL)
 		g_raid3_destroy_bio(sc, cbp);
 	g_io_deliver(pbp, pbp->bio_error);
 }
 
 static void
 g_raid3_done(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags |= G_RAID3_BIO_CFLAG_REGULAR;
 	G_RAID3_LOGREQ(3, bp, "Regular request done (error=%d).", bp->bio_error);
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_head(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 }
 
 static void
 g_raid3_regular_request(struct bio *cbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct bio *pbp;
 
 	g_topology_assert_not();
 
 	pbp = cbp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	cbp->bio_from->index--;
 	if (cbp->bio_cmd == BIO_WRITE)
 		sc->sc_writes--;
 	disk = cbp->bio_from->private;
 	if (disk == NULL) {
 		g_topology_lock();
 		g_raid3_kill_consumer(sc, cbp->bio_from);
 		g_topology_unlock();
 	}
 
 	G_RAID3_LOGREQ(3, cbp, "Request finished.");
 	pbp->bio_inbed++;
 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
 	    pbp->bio_children));
 	if (pbp->bio_inbed != pbp->bio_children)
 		return;
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		g_raid3_gather(pbp);
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 	    {
 		int error = 0;
 
 		pbp->bio_completed = pbp->bio_length;
 		while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) {
 			if (cbp->bio_error == 0) {
 				g_raid3_destroy_bio(sc, cbp);
 				continue;
 			}
 
 			if (error == 0)
 				error = cbp->bio_error;
 			else if (pbp->bio_error == 0) {
 				/*
 				 * Next failed request, that's too many.
 				 */
 				pbp->bio_error = error;
 			}
 
 			disk = cbp->bio_caller2;
 			if (disk == NULL) {
 				g_raid3_destroy_bio(sc, cbp);
 				continue;
 			}
 
 			if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) {
 				disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN;
 				G_RAID3_LOGREQ(0, cbp,
 				    "Request failed (error=%d).",
 				    cbp->bio_error);
 			} else {
 				G_RAID3_LOGREQ(1, cbp,
 				    "Request failed (error=%d).",
 				    cbp->bio_error);
 			}
 			if (g_raid3_disconnect_on_failure &&
 			    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 				sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 				g_raid3_event_send(disk,
 				    G_RAID3_DISK_STATE_DISCONNECTED,
 				    G_RAID3_EVENT_DONTWAIT);
 			}
 			g_raid3_destroy_bio(sc, cbp);
 		}
 		if (pbp->bio_error == 0)
 			G_RAID3_LOGREQ(3, pbp, "Request finished.");
 		else
 			G_RAID3_LOGREQ(0, pbp, "Request failed.");
 		pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_DEGRADED;
 		pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_NOPARITY;
 		bioq_remove(&sc->sc_inflight, pbp);
 		/* Release delayed sync requests if possible. */
 		g_raid3_sync_release(sc);
 		g_io_deliver(pbp, pbp->bio_error);
 		break;
 	    }
 	}
 }
 
 static void
 g_raid3_sync_done(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 
 	G_RAID3_LOGREQ(3, bp, "Synchronization request delivered.");
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags |= G_RAID3_BIO_CFLAG_SYNC;
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_head(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 }
 
 static void
 g_raid3_flush(struct g_raid3_softc *sc, struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_raid3_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	u_int i;
 
 	bioq_init(&queue);
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
 			continue;
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			for (cbp = bioq_first(&queue); cbp != NULL;
 			    cbp = bioq_first(&queue)) {
 				bioq_remove(&queue, cbp);
 				g_destroy_bio(cbp);
 			}
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		cbp->bio_done = g_std_done;
 		cbp->bio_caller1 = disk;
 		cbp->bio_to = disk->d_consumer->provider;
 	}
 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
 		bioq_remove(&queue, cbp);
 		G_RAID3_LOGREQ(3, cbp, "Sending request.");
 		disk = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		g_io_request(cbp, disk->d_consumer);
 	}
 }
 
 static void
 g_raid3_start(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL or there are no valid disks, provider's error
 	 * should be set and g_raid3_start() should not be called at all.
 	 */
 	KASSERT(sc != NULL && (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 	    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE),
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_RAID3_LOGREQ(3, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_SPEEDUP:
 	case BIO_FLUSH:
 		g_raid3_flush(sc, bp);
 		return;
 	case BIO_GETATTR:
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	wakeup(sc);
 }
 
 /*
  * Return TRUE if the given request is colliding with a in-progress
  * synchronization request.
  */
 static int
 g_raid3_sync_collision(struct g_raid3_softc *sc, struct bio *bp)
 {
 	struct g_raid3_disk *disk;
 	struct bio *sbp;
 	off_t rstart, rend, sstart, send;
 	int i;
 
 	disk = sc->sc_syncdisk;
 	if (disk == NULL)
 		return (0);
 	rstart = bp->bio_offset;
 	rend = bp->bio_offset + bp->bio_length;
 	for (i = 0; i < g_raid3_syncreqs; i++) {
 		sbp = disk->d_sync.ds_bios[i];
 		if (sbp == NULL)
 			continue;
 		sstart = sbp->bio_offset;
 		send = sbp->bio_length;
 		if (sbp->bio_cmd == BIO_WRITE) {
 			sstart *= sc->sc_ndisks - 1;
 			send *= sc->sc_ndisks - 1;
 		}
 		send += sstart;
 		if (rend > sstart && rstart < send)
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Return TRUE if the given sync request is colliding with a in-progress regular
  * request.
  */
 static int
 g_raid3_regular_collision(struct g_raid3_softc *sc, struct bio *sbp)
 {
 	off_t rstart, rend, sstart, send;
 	struct bio *bp;
 
 	if (sc->sc_syncdisk == NULL)
 		return (0);
 	sstart = sbp->bio_offset;
 	send = sstart + sbp->bio_length;
 	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
 		rstart = bp->bio_offset;
 		rend = bp->bio_offset + bp->bio_length;
 		if (rend > sstart && rstart < send)
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Puts request onto delayed queue.
  */
 static void
 g_raid3_regular_delay(struct g_raid3_softc *sc, struct bio *bp)
 {
 
 	G_RAID3_LOGREQ(2, bp, "Delaying request.");
 	bioq_insert_head(&sc->sc_regular_delayed, bp);
 }
 
 /*
  * Puts synchronization request onto delayed queue.
  */
 static void
 g_raid3_sync_delay(struct g_raid3_softc *sc, struct bio *bp)
 {
 
 	G_RAID3_LOGREQ(2, bp, "Delaying synchronization request.");
 	bioq_insert_tail(&sc->sc_sync_delayed, bp);
 }
 
 /*
  * Releases delayed regular requests which don't collide anymore with sync
  * requests.
  */
 static void
 g_raid3_regular_release(struct g_raid3_softc *sc)
 {
 	struct bio *bp, *bp2;
 
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
 		if (g_raid3_sync_collision(sc, bp))
 			continue;
 		bioq_remove(&sc->sc_regular_delayed, bp);
 		G_RAID3_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
 		mtx_lock(&sc->sc_queue_mtx);
 		bioq_insert_head(&sc->sc_queue, bp);
 #if 0
 		/*
 		 * wakeup() is not needed, because this function is called from
 		 * the worker thread.
 		 */
 		wakeup(&sc->sc_queue);
 #endif
 		mtx_unlock(&sc->sc_queue_mtx);
 	}
 }
 
 /*
  * Releases delayed sync requests which don't collide anymore with regular
  * requests.
  */
 static void
 g_raid3_sync_release(struct g_raid3_softc *sc)
 {
 	struct bio *bp, *bp2;
 
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
 		if (g_raid3_regular_collision(sc, bp))
 			continue;
 		bioq_remove(&sc->sc_sync_delayed, bp);
 		G_RAID3_LOGREQ(2, bp,
 		    "Releasing delayed synchronization request.");
 		g_io_request(bp, bp->bio_from);
 	}
 }
 
 /*
  * Handle synchronization requests.
  * Every synchronization request is two-steps process: first, READ request is
  * send to active provider and then WRITE request (with read data) to the provider
  * being synchronized. When WRITE is finished, new synchronization request is
  * send.
  */
 static void
 g_raid3_sync_request(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 
 	bp->bio_from->index--;
 	sc = bp->bio_from->geom->softc;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 		g_topology_lock();
 		g_raid3_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 		free(bp->bio_data, M_RAID3);
 		g_destroy_bio(bp);
 		sx_xlock(&sc->sc_lock);
 		return;
 	}
 
 	/*
 	 * Synchronization request.
 	 */
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	    {
 		struct g_consumer *cp;
 		u_char *dst, *src;
 		off_t left;
 		u_int atom;
 
 		if (bp->bio_error != 0) {
 			G_RAID3_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_destroy_bio(bp);
 			return;
 		}
 		G_RAID3_LOGREQ(3, bp, "Synchronization request finished.");
 		atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
 		dst = src = bp->bio_data;
 		if (disk->d_no == sc->sc_ndisks - 1) {
 			u_int n;
 
 			/* Parity component. */
 			for (left = bp->bio_length; left > 0;
 			    left -= sc->sc_sectorsize) {
 				bcopy(src, dst, atom);
 				src += atom;
 				for (n = 1; n < sc->sc_ndisks - 1; n++) {
 					g_raid3_xor(src, dst, atom);
 					src += atom;
 				}
 				dst += atom;
 			}
 		} else {
 			/* Regular component. */
 			src += atom * disk->d_no;
 			for (left = bp->bio_length; left > 0;
 			    left -= sc->sc_sectorsize) {
 				bcopy(src, dst, atom);
 				src += sc->sc_sectorsize;
 				dst += atom;
 			}
 		}
 		bp->bio_driver1 = bp->bio_driver2 = NULL;
 		bp->bio_pflags = 0;
 		bp->bio_offset /= sc->sc_ndisks - 1;
 		bp->bio_length /= sc->sc_ndisks - 1;
 		bp->bio_cmd = BIO_WRITE;
 		bp->bio_cflags = 0;
 		bp->bio_children = bp->bio_inbed = 0;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(bp, cp);
 		return;
 	    }
 	case BIO_WRITE:
 	    {
 		struct g_raid3_disk_sync *sync;
 		off_t boffset, moffset;
 		void *data;
 		int i;
 
 		if (bp->bio_error != 0) {
 			G_RAID3_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_destroy_bio(bp);
 			sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 			g_raid3_event_send(disk,
 			    G_RAID3_DISK_STATE_DISCONNECTED,
 			    G_RAID3_EVENT_DONTWAIT);
 			return;
 		}
 		G_RAID3_LOGREQ(3, bp, "Synchronization request finished.");
 		sync = &disk->d_sync;
 		if (sync->ds_offset == sc->sc_mediasize / (sc->sc_ndisks - 1) ||
 		    sync->ds_consumer == NULL ||
 		    (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 			/* Don't send more synchronization requests. */
 			sync->ds_inflight--;
 			if (sync->ds_bios != NULL) {
 				i = (int)(uintptr_t)bp->bio_caller1;
 				sync->ds_bios[i] = NULL;
 			}
 			free(bp->bio_data, M_RAID3);
 			g_destroy_bio(bp);
 			if (sync->ds_inflight > 0)
 				return;
 			if (sync->ds_consumer == NULL ||
 			    (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 				return;
 			}
 			/*
 			 * Disk up-to-date, activate it.
 			 */
 			g_raid3_event_send(disk, G_RAID3_DISK_STATE_ACTIVE,
 			    G_RAID3_EVENT_DONTWAIT);
 			return;
 		}
 
 		/* Send next synchronization request. */
 		data = bp->bio_data;
 		g_reset_bio(bp);
 		bp->bio_cmd = BIO_READ;
 		bp->bio_offset = sync->ds_offset * (sc->sc_ndisks - 1);
 		bp->bio_length = MIN(maxphys, sc->sc_mediasize - bp->bio_offset);
 		sync->ds_offset += bp->bio_length / (sc->sc_ndisks - 1);
 		bp->bio_done = g_raid3_sync_done;
 		bp->bio_data = data;
 		bp->bio_from = sync->ds_consumer;
 		bp->bio_to = sc->sc_provider;
 		G_RAID3_LOGREQ(3, bp, "Sending synchronization request.");
 		sync->ds_consumer->index++;
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_raid3_regular_collision(sc, bp))
 			g_raid3_sync_delay(sc, bp);
 		else
 			g_io_request(bp, sync->ds_consumer);
 
 		/* Release delayed requests if possible. */
 		g_raid3_regular_release(sc);
 
 		/* Find the smallest offset. */
 		moffset = sc->sc_mediasize;
 		for (i = 0; i < g_raid3_syncreqs; i++) {
 			bp = sync->ds_bios[i];
 			boffset = bp->bio_offset;
 			if (bp->bio_cmd == BIO_WRITE)
 				boffset *= sc->sc_ndisks - 1;
 			if (boffset < moffset)
 				moffset = boffset;
 		}
 		if (sync->ds_offset_done + maxphys * 100 < moffset) {
 			/* Update offset_done on every 100 blocks. */
 			sync->ds_offset_done = moffset;
 			g_raid3_update_metadata(disk);
 		}
 		return;
 	    }
 	default:
 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
 		    bp->bio_cmd, sc->sc_name));
 		break;
 	}
 }
 
 static int
 g_raid3_register_request(struct bio *pbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp, *tmpbp;
 	off_t offset, length;
 	u_int n, ndisks;
 	int round_robin, verify;
 
 	ndisks = 0;
 	sc = pbp->bio_to->geom->softc;
 	if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGSYNC) != 0 &&
 	    sc->sc_syncdisk == NULL) {
 		g_io_deliver(pbp, EIO);
 		return (0);
 	}
 	g_raid3_init_bio(pbp);
 	length = pbp->bio_length / (sc->sc_ndisks - 1);
 	offset = pbp->bio_offset / (sc->sc_ndisks - 1);
 	round_robin = verify = 0;
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			pbp->bio_pflags |= G_RAID3_BIO_PFLAG_VERIFY;
 			verify = 1;
 			ndisks = sc->sc_ndisks;
 		} else {
 			verify = 0;
 			ndisks = sc->sc_ndisks - 1;
 		}
 		if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0 &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			round_robin = 1;
 		} else {
 			round_robin = 0;
 		}
 		KASSERT(!round_robin || !verify,
 		    ("ROUND-ROBIN and VERIFY are mutually exclusive."));
 		pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1];
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 		/*
 		 * Delay the request if it is colliding with a synchronization
 		 * request.
 		 */
 		if (g_raid3_sync_collision(sc, pbp)) {
 			g_raid3_regular_delay(sc, pbp);
 			return (0);
 		}
 
 		if (sc->sc_idle)
 			g_raid3_unidle(sc);
 		else
 			sc->sc_last_write = time_uptime;
 
 		ndisks = sc->sc_ndisks;
 		break;
 	}
 	for (n = 0; n < ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		cbp = g_raid3_clone_bio(sc, pbp);
 		if (cbp == NULL) {
 			while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL)
 				g_raid3_destroy_bio(sc, cbp);
 			/*
 			 * To prevent deadlock, we must run back up
 			 * with the ENOMEM for failed requests of any
 			 * of our consumers.  Our own sync requests
 			 * can stick around, as they are finite.
 			 */
 			if ((pbp->bio_cflags &
 			    G_RAID3_BIO_CFLAG_REGULAR) != 0) {
 				g_io_deliver(pbp, ENOMEM);
 				return (0);
 			}
 			return (ENOMEM);
 		}
 		cbp->bio_offset = offset;
 		cbp->bio_length = length;
 		cbp->bio_done = g_raid3_done;
 		switch (pbp->bio_cmd) {
 		case BIO_READ:
 			if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) {
 				/*
 				 * Replace invalid component with the parity
 				 * component.
 				 */
 				disk = &sc->sc_disks[sc->sc_ndisks - 1];
 				cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 				pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
 			} else if (round_robin &&
 			    disk->d_no == sc->sc_round_robin) {
 				/*
 				 * In round-robin mode skip one data component
 				 * and use parity component when reading.
 				 */
 				pbp->bio_driver2 = disk;
 				disk = &sc->sc_disks[sc->sc_ndisks - 1];
 				cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 				sc->sc_round_robin++;
 				round_robin = 0;
 			} else if (verify && disk->d_no == sc->sc_ndisks - 1) {
 				cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 			}
 			break;
 		case BIO_WRITE:
 		case BIO_DELETE:
 			if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 			    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 				if (n == ndisks - 1) {
 					/*
 					 * Active parity component, mark it as such.
 					 */
 					cbp->bio_cflags |=
 					    G_RAID3_BIO_CFLAG_PARITY;
 				}
 			} else {
 				pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
 				if (n == ndisks - 1) {
 					/*
 					 * Parity component is not connected,
 					 * so destroy its request.
 					 */
 					pbp->bio_pflags |=
 					    G_RAID3_BIO_PFLAG_NOPARITY;
 					g_raid3_destroy_bio(sc, cbp);
 					cbp = NULL;
 				} else {
 					cbp->bio_cflags |=
 					    G_RAID3_BIO_CFLAG_NODISK;
 					disk = NULL;
 				}
 			}
 			break;
 		}
 		if (cbp != NULL)
 			cbp->bio_caller2 = disk;
 	}
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if (round_robin) {
 			/*
 			 * If we are in round-robin mode and 'round_robin' is
 			 * still 1, it means, that we skipped parity component
 			 * for this read and must reset sc_round_robin field.
 			 */
 			sc->sc_round_robin = 0;
 		}
 		G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) {
 			disk = cbp->bio_caller2;
 			cp = disk->d_consumer;
 			cbp->bio_to = cp->provider;
 			G_RAID3_LOGREQ(3, cbp, "Sending request.");
 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 			    ("Consumer %s not opened (r%dw%de%d).",
 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
 			cp->index++;
 			g_io_request(cbp, cp);
 		}
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 		/*
 		 * Put request onto inflight queue, so we can check if new
 		 * synchronization requests don't collide with it.
 		 */
 		bioq_insert_tail(&sc->sc_inflight, pbp);
 
 		/*
 		 * Bump syncid on first write.
 		 */
 		if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) {
 			sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID;
 			g_raid3_bump_syncid(sc);
 		}
 		g_raid3_scatter(pbp);
 		break;
 	}
 	return (0);
 }
 
 static int
 g_raid3_can_destroy(struct g_raid3_softc *sc)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	gp = sc->sc_geom;
 	if (gp->softc == NULL)
 		return (1);
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_raid3_is_busy(sc, cp))
 			return (0);
 	}
 	gp = sc->sc_sync.ds_geom;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_raid3_is_busy(sc, cp))
 			return (0);
 	}
 	G_RAID3_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
 	    sc->sc_name);
 	return (1);
 }
 
 static int
 g_raid3_try_destroy(struct g_raid3_softc *sc)
 {
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (sc->sc_rootmount != NULL) {
 		G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 		    sc->sc_rootmount);
 		root_mount_rel(sc->sc_rootmount);
 		sc->sc_rootmount = NULL;
 	}
 
 	g_topology_lock();
 	if (!g_raid3_can_destroy(sc)) {
 		g_topology_unlock();
 		return (0);
 	}
 	sc->sc_geom->softc = NULL;
 	sc->sc_sync.ds_geom->softc = NULL;
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_WAIT) != 0) {
 		g_topology_unlock();
 		G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__,
 		    &sc->sc_worker);
 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
 		sx_xunlock(&sc->sc_lock);
 		wakeup(&sc->sc_worker);
 		sc->sc_worker = NULL;
 	} else {
 		g_topology_unlock();
 		g_raid3_destroy_device(sc);
 		free(sc->sc_disks, M_RAID3);
 		free(sc, M_RAID3);
 	}
 	return (1);
 }
 
 /*
  * Worker thread.
  */
 static void
 g_raid3_worker(void *arg)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_event *ep;
 	struct bio *bp;
 	int timeout;
 
 	sc = arg;
 	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
 	thread_unlock(curthread);
 
 	sx_xlock(&sc->sc_lock);
 	for (;;) {
 		G_RAID3_DEBUG(5, "%s: Let's see...", __func__);
 		/*
 		 * First take a look at events.
 		 * This is important to handle events before any I/O requests.
 		 */
 		ep = g_raid3_event_get(sc);
 		if (ep != NULL) {
 			g_raid3_event_remove(sc, ep);
 			if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) {
 				/* Update only device status. */
 				G_RAID3_DEBUG(3,
 				    "Running event for device %s.",
 				    sc->sc_name);
 				ep->e_error = 0;
 				g_raid3_update_device(sc, 1);
 			} else {
 				/* Update disk status. */
 				G_RAID3_DEBUG(3, "Running event for disk %s.",
 				     g_raid3_get_diskname(ep->e_disk));
 				ep->e_error = g_raid3_update_disk(ep->e_disk,
 				    ep->e_state);
 				if (ep->e_error == 0)
 					g_raid3_update_device(sc, 0);
 			}
 			if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) {
 				KASSERT(ep->e_error == 0,
 				    ("Error cannot be handled."));
 				g_raid3_event_free(ep);
 			} else {
 				ep->e_flags |= G_RAID3_EVENT_DONE;
 				G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__,
 				    ep);
 				mtx_lock(&sc->sc_events_mtx);
 				wakeup(ep);
 				mtx_unlock(&sc->sc_events_mtx);
 			}
 			if ((sc->sc_flags &
 			    G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 				if (g_raid3_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_RAID3_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 			}
 			G_RAID3_DEBUG(5, "%s: I'm here 1.", __func__);
 			continue;
 		}
 		/*
 		 * Check if we can mark array as CLEAN and if we can't take
 		 * how much seconds should we wait.
 		 */
 		timeout = g_raid3_idle(sc, -1);
 		/*
 		 * Now I/O requests.
 		 */
 		/* Get first request from the queue. */
 		mtx_lock(&sc->sc_queue_mtx);
 		bp = bioq_first(&sc->sc_queue);
 		if (bp == NULL) {
 			if ((sc->sc_flags &
 			    G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				if (g_raid3_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_RAID3_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 				mtx_lock(&sc->sc_queue_mtx);
 			}
 			sx_xunlock(&sc->sc_lock);
 			/*
 			 * XXX: We can miss an event here, because an event
 			 *      can be added without sx-device-lock and without
 			 *      mtx-queue-lock. Maybe I should just stop using
 			 *      dedicated mutex for events synchronization and
 			 *      stick with the queue lock?
 			 *      The event will hang here until next I/O request
 			 *      or next event is received.
 			 */
 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "r3:w1",
 			    timeout * hz);
 			sx_xlock(&sc->sc_lock);
 			G_RAID3_DEBUG(5, "%s: I'm here 4.", __func__);
 			continue;
 		}
 process:
 		bioq_remove(&sc->sc_queue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 
 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
 		    (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) {
 			g_raid3_sync_request(bp);	/* READ */
 		} else if (bp->bio_to != sc->sc_provider) {
 			if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0)
 				g_raid3_regular_request(bp);
 			else if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0)
 				g_raid3_sync_request(bp);	/* WRITE */
 			else {
 				KASSERT(0,
 				    ("Invalid request cflags=0x%hx to=%s.",
 				    bp->bio_cflags, bp->bio_to->name));
 			}
 		} else if (g_raid3_register_request(bp) != 0) {
 			mtx_lock(&sc->sc_queue_mtx);
 			bioq_insert_head(&sc->sc_queue, bp);
 			/*
 			 * We are short in memory, let see if there are finished
 			 * request we can free.
 			 */
 			TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 				if (bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR)
 					goto process;
 			}
 			/*
 			 * No finished regular request, so at least keep
 			 * synchronization running.
 			 */
 			TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 				if (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC)
 					goto process;
 			}
 			sx_xunlock(&sc->sc_lock);
 			MSLEEP(&sc->sc_queue, &sc->sc_queue_mtx, PRIBIO | PDROP,
 			    "r3:lowmem", hz / 10);
 			sx_xlock(&sc->sc_lock);
 		}
 		G_RAID3_DEBUG(5, "%s: I'm here 9.", __func__);
 	}
 }
 
 static void
 g_raid3_update_idle(struct g_raid3_softc *sc, struct g_raid3_disk *disk)
 {
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	if (!sc->sc_idle && (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) == 0) {
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 	} else if (sc->sc_idle &&
 	    (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) {
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 	}
 }
 
 static void
 g_raid3_sync_start(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	struct g_consumer *cp;
 	struct bio *bp;
 	int error;
 	u_int n;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
 	    ("Device not in DEGRADED state (%s, %u).", sc->sc_name,
 	    sc->sc_state));
 	KASSERT(sc->sc_syncdisk == NULL, ("Syncdisk is not NULL (%s, %u).",
 	    sc->sc_name, sc->sc_state));
 	disk = NULL;
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		if (sc->sc_disks[n].d_state != G_RAID3_DISK_STATE_SYNCHRONIZING)
 			continue;
 		disk = &sc->sc_disks[n];
 		break;
 	}
 	if (disk == NULL)
 		return;
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	cp = g_new_consumer(sc->sc_sync.ds_geom);
 	error = g_attach(cp, sc->sc_provider);
 	KASSERT(error == 0,
 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
 	error = g_access(cp, 1, 0, 0);
 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
 	    g_raid3_get_diskname(disk));
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) == 0)
 		disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 	KASSERT(disk->d_sync.ds_consumer == NULL,
 	    ("Sync consumer already exists (device=%s, disk=%s).",
 	    sc->sc_name, g_raid3_get_diskname(disk)));
 
 	disk->d_sync.ds_consumer = cp;
 	disk->d_sync.ds_consumer->private = disk;
 	disk->d_sync.ds_consumer->index = 0;
 	sc->sc_syncdisk = disk;
 
 	/*
 	 * Allocate memory for synchronization bios and initialize them.
 	 */
 	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_raid3_syncreqs,
 	    M_RAID3, M_WAITOK);
 	for (n = 0; n < g_raid3_syncreqs; n++) {
 		bp = g_alloc_bio();
 		disk->d_sync.ds_bios[n] = bp;
 		bp->bio_parent = NULL;
 		bp->bio_cmd = BIO_READ;
 		bp->bio_data = malloc(maxphys, M_RAID3, M_WAITOK);
 		bp->bio_cflags = 0;
 		bp->bio_offset = disk->d_sync.ds_offset * (sc->sc_ndisks - 1);
 		bp->bio_length = MIN(maxphys, sc->sc_mediasize - bp->bio_offset);
 		disk->d_sync.ds_offset += bp->bio_length / (sc->sc_ndisks - 1);
 		bp->bio_done = g_raid3_sync_done;
 		bp->bio_from = disk->d_sync.ds_consumer;
 		bp->bio_to = sc->sc_provider;
 		bp->bio_caller1 = (void *)(uintptr_t)n;
 	}
 
 	/* Set the number of in-flight synchronization requests. */
 	disk->d_sync.ds_inflight = g_raid3_syncreqs;
 
 	/*
 	 * Fire off first synchronization requests.
 	 */
 	for (n = 0; n < g_raid3_syncreqs; n++) {
 		bp = disk->d_sync.ds_bios[n];
 		G_RAID3_LOGREQ(3, bp, "Sending synchronization request.");
 		disk->d_sync.ds_consumer->index++;
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_raid3_regular_collision(sc, bp))
 			g_raid3_sync_delay(sc, bp);
 		else
 			g_io_request(bp, disk->d_sync.ds_consumer);
 	}
 }
 
 /*
  * Stop synchronization process.
  * type: 0 - synchronization finished
  *       1 - synchronization stopped
  */
 static void
 g_raid3_sync_stop(struct g_raid3_softc *sc, int type)
 {
 	struct g_raid3_disk *disk;
 	struct g_consumer *cp;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
 	    ("Device not in DEGRADED state (%s, %u).", sc->sc_name,
 	    sc->sc_state));
 	disk = sc->sc_syncdisk;
 	sc->sc_syncdisk = NULL;
 	KASSERT(disk != NULL, ("No disk was synchronized (%s).", sc->sc_name));
 	KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
 	    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 	    g_raid3_disk_state2str(disk->d_state)));
 	if (disk->d_sync.ds_consumer == NULL)
 		return;
 
 	if (type == 0) {
 		G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s finished.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 	} else /* if (type == 1) */ {
 		G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 	}
 	free(disk->d_sync.ds_bios, M_RAID3);
 	disk->d_sync.ds_bios = NULL;
 	cp = disk->d_sync.ds_consumer;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 	g_topology_lock();
 	g_raid3_kill_consumer(sc, cp);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 }
 
 static void
 g_raid3_launch_provider(struct g_raid3_softc *sc)
 {
 	struct g_provider *pp;
 	struct g_raid3_disk *disk;
 	int n;
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	g_topology_lock();
 	pp = g_new_providerf(sc->sc_geom, "raid3/%s", sc->sc_name);
 	pp->mediasize = sc->sc_mediasize;
 	pp->sectorsize = sc->sc_sectorsize;
 	pp->stripesize = 0;
 	pp->stripeoffset = 0;
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_consumer && disk->d_consumer->provider &&
 		    disk->d_consumer->provider->stripesize > pp->stripesize) {
 			pp->stripesize = disk->d_consumer->provider->stripesize;
 			pp->stripeoffset = disk->d_consumer->provider->stripeoffset;
 		}
 	}
 	pp->stripesize *= sc->sc_ndisks - 1;
 	pp->stripeoffset *= sc->sc_ndisks - 1;
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 	g_topology_unlock();
 	G_RAID3_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
 	    g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE), sc->sc_ndisks);
 
 	if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED)
 		g_raid3_sync_start(sc);
 }
 
 static void
 g_raid3_destroy_provider(struct g_raid3_softc *sc)
 {
 	struct bio *bp;
 
 	g_topology_assert_not();
 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
 	    sc->sc_name));
 
 	g_topology_lock();
 	g_error_provider(sc->sc_provider, ENXIO);
 	mtx_lock(&sc->sc_queue_mtx);
 	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
 		bioq_remove(&sc->sc_queue, bp);
 		g_io_deliver(bp, ENXIO);
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_RAID3_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
 	    sc->sc_provider->name);
 	g_wither_provider(sc->sc_provider, ENXIO);
 	g_topology_unlock();
 	sc->sc_provider = NULL;
 	if (sc->sc_syncdisk != NULL)
 		g_raid3_sync_stop(sc, 1);
 }
 
 static void
 g_raid3_go(void *arg)
 {
 	struct g_raid3_softc *sc;
 
 	sc = arg;
 	G_RAID3_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
 	g_raid3_event_send(sc, 0,
 	    G_RAID3_EVENT_DONTWAIT | G_RAID3_EVENT_DEVICE);
 }
 
 static u_int
 g_raid3_determine_state(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 	u_int state;
 
 	sc = disk->d_softc;
 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
 		if ((disk->d_flags &
 		    G_RAID3_DISK_FLAG_SYNCHRONIZING) == 0) {
 			/* Disk does not need synchronization. */
 			state = G_RAID3_DISK_STATE_ACTIVE;
 		} else {
 			if ((sc->sc_flags &
 			     G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 			    (disk->d_flags &
 			     G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) {
 				/*
 				 * We can start synchronization from
 				 * the stored offset.
 				 */
 				state = G_RAID3_DISK_STATE_SYNCHRONIZING;
 			} else {
 				state = G_RAID3_DISK_STATE_STALE;
 			}
 		}
 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
 		/*
 		 * Reset all synchronization data for this disk,
 		 * because if it even was synchronized, it was
 		 * synchronized to disks with different syncid.
 		 */
 		disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		disk->d_sync.ds_syncid = sc->sc_syncid;
 		if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 		    (disk->d_flags & G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) {
 			state = G_RAID3_DISK_STATE_SYNCHRONIZING;
 		} else {
 			state = G_RAID3_DISK_STATE_STALE;
 		}
 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
 		/*
 		 * Not good, NOT GOOD!
 		 * It means that device was started on stale disks
 		 * and more fresh disk just arrive.
 		 * If there were writes, device is broken, sorry.
 		 * I think the best choice here is don't touch
 		 * this disk and inform the user loudly.
 		 */
 		G_RAID3_DEBUG(0, "Device %s was started before the freshest "
 		    "disk (%s) arrives!! It will not be connected to the "
 		    "running device.", sc->sc_name,
 		    g_raid3_get_diskname(disk));
 		g_raid3_destroy_disk(disk);
 		state = G_RAID3_DISK_STATE_NONE;
 		/* Return immediately, because disk was destroyed. */
 		return (state);
 	}
 	G_RAID3_DEBUG(3, "State for %s disk: %s.",
 	    g_raid3_get_diskname(disk), g_raid3_disk_state2str(state));
 	return (state);
 }
 
 /*
  * Update device state.
  */
 static void
 g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force)
 {
 	struct g_raid3_disk *disk;
 	u_int state;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	switch (sc->sc_state) {
 	case G_RAID3_DEVICE_STATE_STARTING:
 	    {
 		u_int n, ndirty, ndisks, genid, syncid;
 
 		KASSERT(sc->sc_provider == NULL,
 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
 		/*
 		 * Are we ready? We are, if all disks are connected or
 		 * one disk is missing and 'force' is true.
 		 */
 		if (g_raid3_ndisks(sc, -1) + force == sc->sc_ndisks) {
 			if (!force)
 				callout_drain(&sc->sc_callout);
 		} else {
 			if (force) {
 				/*
 				 * Timeout expired, so destroy device.
 				 */
 				sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 				G_RAID3_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 			}
 			return;
 		}
 
 		/*
 		 * Find the biggest genid.
 		 */
 		genid = 0;
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			if (disk->d_genid > genid)
 				genid = disk->d_genid;
 		}
 		sc->sc_genid = genid;
 		/*
 		 * Remove all disks without the biggest genid.
 		 */
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			if (disk->d_genid < genid) {
 				G_RAID3_DEBUG(0,
 				    "Component %s (device %s) broken, skipping.",
 				    g_raid3_get_diskname(disk), sc->sc_name);
 				g_raid3_destroy_disk(disk);
 			}
 		}
 
 		/*
 		 * There must be at least 'sc->sc_ndisks - 1' components
 		 * with the same syncid and without SYNCHRONIZING flag.
 		 */
 
 		/*
 		 * Find the biggest syncid, number of valid components and
 		 * number of dirty components.
 		 */
 		ndirty = ndisks = syncid = 0;
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0)
 				ndirty++;
 			if (disk->d_sync.ds_syncid > syncid) {
 				syncid = disk->d_sync.ds_syncid;
 				ndisks = 0;
 			} else if (disk->d_sync.ds_syncid < syncid) {
 				continue;
 			}
 			if ((disk->d_flags &
 			    G_RAID3_DISK_FLAG_SYNCHRONIZING) != 0) {
 				continue;
 			}
 			ndisks++;
 		}
 		/*
 		 * Do we have enough valid components?
 		 */
 		if (ndisks + 1 < sc->sc_ndisks) {
 			G_RAID3_DEBUG(0,
 			    "Device %s is broken, too few valid components.",
 			    sc->sc_name);
 			sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 			return;
 		}
 		/*
 		 * If there is one DIRTY component and all disks are present,
 		 * mark it for synchronization. If there is more than one DIRTY
 		 * component, mark parity component for synchronization.
 		 */
 		if (ndisks == sc->sc_ndisks && ndirty == 1) {
 			for (n = 0; n < sc->sc_ndisks; n++) {
 				disk = &sc->sc_disks[n];
 				if ((disk->d_flags &
 				    G_RAID3_DISK_FLAG_DIRTY) == 0) {
 					continue;
 				}
 				disk->d_flags |=
 				    G_RAID3_DISK_FLAG_SYNCHRONIZING;
 			}
 		} else if (ndisks == sc->sc_ndisks && ndirty > 1) {
 			disk = &sc->sc_disks[sc->sc_ndisks - 1];
 			disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING;
 		}
 
 		sc->sc_syncid = syncid;
 		if (force) {
 			/* Remember to bump syncid on first write. */
 			sc->sc_bump_id |= G_RAID3_BUMP_SYNCID;
 		}
 		if (ndisks == sc->sc_ndisks)
 			state = G_RAID3_DEVICE_STATE_COMPLETE;
 		else /* if (ndisks == sc->sc_ndisks - 1) */
 			state = G_RAID3_DEVICE_STATE_DEGRADED;
 		G_RAID3_DEBUG(1, "Device %s state changed from %s to %s.",
 		    sc->sc_name, g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_device_state2str(state));
 		sc->sc_state = state;
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			state = g_raid3_determine_state(disk);
 			g_raid3_event_send(disk, state, G_RAID3_EVENT_DONTWAIT);
 			if (state == G_RAID3_DISK_STATE_STALE)
 				sc->sc_bump_id |= G_RAID3_BUMP_SYNCID;
 		}
 		break;
 	    }
 	case G_RAID3_DEVICE_STATE_DEGRADED:
 		/*
 		 * Genid need to be bumped immediately, so do it here.
 		 */
 		if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) {
 			sc->sc_bump_id &= ~G_RAID3_BUMP_GENID;
 			g_raid3_bump_genid(sc);
 		}
 
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0)
 			return;
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) <
 		    sc->sc_ndisks - 1) {
 			if (sc->sc_provider != NULL)
 				g_raid3_destroy_provider(sc);
 			sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 			return;
 		}
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) ==
 		    sc->sc_ndisks) {
 			state = G_RAID3_DEVICE_STATE_COMPLETE;
 			G_RAID3_DEBUG(1,
 			    "Device %s state changed from %s to %s.",
 			    sc->sc_name, g_raid3_device_state2str(sc->sc_state),
 			    g_raid3_device_state2str(state));
 			sc->sc_state = state;
 		}
 		if (sc->sc_provider == NULL)
 			g_raid3_launch_provider(sc);
 		if (sc->sc_rootmount != NULL) {
 			G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 			    sc->sc_rootmount);
 			root_mount_rel(sc->sc_rootmount);
 			sc->sc_rootmount = NULL;
 		}
 		break;
 	case G_RAID3_DEVICE_STATE_COMPLETE:
 		/*
 		 * Genid need to be bumped immediately, so do it here.
 		 */
 		if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) {
 			sc->sc_bump_id &= ~G_RAID3_BUMP_GENID;
 			g_raid3_bump_genid(sc);
 		}
 
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0)
 			return;
 		KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) >=
 		    sc->sc_ndisks - 1,
 		    ("Too few ACTIVE components in COMPLETE state (device %s).",
 		    sc->sc_name));
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) ==
 		    sc->sc_ndisks - 1) {
 			state = G_RAID3_DEVICE_STATE_DEGRADED;
 			G_RAID3_DEBUG(1,
 			    "Device %s state changed from %s to %s.",
 			    sc->sc_name, g_raid3_device_state2str(sc->sc_state),
 			    g_raid3_device_state2str(state));
 			sc->sc_state = state;
 		}
 		if (sc->sc_provider == NULL)
 			g_raid3_launch_provider(sc);
 		if (sc->sc_rootmount != NULL) {
 			G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 			    sc->sc_rootmount);
 			root_mount_rel(sc->sc_rootmount);
 			sc->sc_rootmount = NULL;
 		}
 		break;
 	default:
 		KASSERT(1 == 0, ("Wrong device state (%s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state)));
 		break;
 	}
 }
 
 /*
  * Update disk state and device state if needed.
  */
 #define	DISK_STATE_CHANGED()	G_RAID3_DEBUG(1,			\
 	"Disk %s state changed from %s to %s (device %s).",		\
 	g_raid3_get_diskname(disk),					\
 	g_raid3_disk_state2str(disk->d_state),				\
 	g_raid3_disk_state2str(state), sc->sc_name)
 static int
 g_raid3_update_disk(struct g_raid3_disk *disk, u_int state)
 {
 	struct g_raid3_softc *sc;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 again:
 	G_RAID3_DEBUG(3, "Changing disk %s state from %s to %s.",
 	    g_raid3_get_diskname(disk), g_raid3_disk_state2str(disk->d_state),
 	    g_raid3_disk_state2str(state));
 	switch (state) {
 	case G_RAID3_DISK_STATE_NEW:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk arrive.
 		 */
 		/* Previous state should be NONE. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NONE,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_state = state;
 		G_RAID3_DEBUG(1, "Device %s: provider %s detected.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING)
 			break;
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		state = g_raid3_determine_state(disk);
 		if (state != G_RAID3_DISK_STATE_NONE)
 			goto again;
 		break;
 	case G_RAID3_DISK_STATE_ACTIVE:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk does not need synchronization.
 		 * 2. Synchronization process finished successfully.
 		 */
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		/* Previous state should be NEW or SYNCHRONIZING. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW ||
 		    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_SYNCHRONIZING;
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_FORCE_SYNC;
 			g_raid3_sync_stop(sc, 0);
 		}
 		disk->d_state = state;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		g_raid3_update_idle(sc, disk);
 		g_raid3_update_metadata(disk);
 		G_RAID3_DEBUG(1, "Device %s: provider %s activated.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		break;
 	case G_RAID3_DISK_STATE_STALE:
 		/*
 		 * Possible scenarios:
 		 * 1. Stale disk was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		/*
 		 * STALE state is only possible if device is marked
 		 * NOAUTOSYNC.
 		 */
 		KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		g_raid3_update_metadata(disk);
 		G_RAID3_DEBUG(0, "Device %s: provider %s is stale.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		break;
 	case G_RAID3_DISK_STATE_SYNCHRONIZING:
 		/*
 		 * Possible scenarios:
 		 * 1. Disk which needs synchronization was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_RAID3_DISK_STATE_NEW)
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		if (sc->sc_provider != NULL) {
 			g_raid3_sync_start(sc);
 			g_raid3_update_metadata(disk);
 		}
 		break;
 	case G_RAID3_DISK_STATE_DISCONNECTED:
 		/*
 		 * Possible scenarios:
 		 * 1. Device wasn't running yet, but disk disappear.
 		 * 2. Disk was active and disapppear.
 		 * 3. Disk disappear during synchronization process.
 		 */
 		if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			/*
 			 * Previous state should be ACTIVE, STALE or
 			 * SYNCHRONIZING.
 			 */
 			KASSERT(disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 			    disk->d_state == G_RAID3_DISK_STATE_STALE ||
 			    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
 			    ("Wrong disk state (%s, %s).",
 			    g_raid3_get_diskname(disk),
 			    g_raid3_disk_state2str(disk->d_state)));
 		} else if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) {
 			/* Previous state should be NEW. */
 			KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
 			    ("Wrong disk state (%s, %s).",
 			    g_raid3_get_diskname(disk),
 			    g_raid3_disk_state2str(disk->d_state)));
 			/*
 			 * Reset bumping syncid if disk disappeared in STARTING
 			 * state.
 			 */
 			if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0)
 				sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID;
 #ifdef	INVARIANTS
 		} else {
 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
 			    sc->sc_name,
 			    g_raid3_device_state2str(sc->sc_state),
 			    g_raid3_get_diskname(disk),
 			    g_raid3_disk_state2str(disk->d_state)));
 #endif
 		}
 		DISK_STATE_CHANGED();
 		G_RAID3_DEBUG(0, "Device %s: provider %s disconnected.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 
 		g_raid3_destroy_disk(disk);
 		break;
 	default:
 		KASSERT(1 == 0, ("Unknown state (%u).", state));
 		break;
 	}
 	return (0);
 }
 #undef	DISK_STATE_CHANGED
 
 int
 g_raid3_read_metadata(struct g_consumer *cp, struct g_raid3_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* Metadata are stored on last sector. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_RAID3_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    cp->provider->name, error);
 		return (error);
 	}
 
 	/* Decode metadata. */
 	error = raid3_metadata_decode(buf, md);
 	g_free(buf);
 	if (strcmp(md->md_magic, G_RAID3_MAGIC) != 0)
 		return (EINVAL);
 	if (md->md_version > G_RAID3_VERSION) {
 		G_RAID3_DEBUG(0,
 		    "Kernel module is too old to handle metadata from %s.",
 		    cp->provider->name);
 		return (EINVAL);
 	}
 	if (error != 0) {
 		G_RAID3_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
 		    cp->provider->name);
 		return (error);
 	}
 	if (md->md_sectorsize > maxphys) {
 		G_RAID3_DEBUG(0, "The blocksize is too big.");
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static int
 g_raid3_check_metadata(struct g_raid3_softc *sc, struct g_provider *pp,
     struct g_raid3_metadata *md)
 {
 
 	if (md->md_no >= sc->sc_ndisks) {
 		G_RAID3_DEBUG(1, "Invalid disk %s number (no=%u), skipping.",
 		    pp->name, md->md_no);
 		return (EINVAL);
 	}
 	if (sc->sc_disks[md->md_no].d_state != G_RAID3_DISK_STATE_NODISK) {
 		G_RAID3_DEBUG(1, "Disk %s (no=%u) already exists, skipping.",
 		    pp->name, md->md_no);
 		return (EEXIST);
 	}
 	if (md->md_all != sc->sc_ndisks) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_all", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mediasize % md->md_sectorsize) != 0) {
 		G_RAID3_DEBUG(1, "Invalid metadata (mediasize %% sectorsize != "
 		    "0) on disk %s (device %s), skipping.", pp->name,
 		    sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_mediasize != sc->sc_mediasize) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_mediasize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mediasize % (sc->sc_ndisks - 1)) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_mediasize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((sc->sc_mediasize / (sc->sc_ndisks - 1)) > pp->mediasize) {
 		G_RAID3_DEBUG(1,
 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
 		    sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_sectorsize / pp->sectorsize) < sc->sc_ndisks - 1) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_sectorsize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_sectorsize != sc->sc_sectorsize) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_sectorsize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid sector size of disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mflags & ~G_RAID3_DEVICE_FLAG_MASK) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid device flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 &&
 	    (md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) {
 		/*
 		 * VERIFY and ROUND-ROBIN options are mutally exclusive.
 		 */
 		G_RAID3_DEBUG(1, "Both VERIFY and ROUND-ROBIN flags exist on "
 		    "disk %s (device %s), skipping.", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_dflags & ~G_RAID3_DISK_FLAG_MASK) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid disk flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 g_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp,
     struct g_raid3_metadata *md)
 {
 	struct g_raid3_disk *disk;
 	int error;
 
 	g_topology_assert_not();
 	G_RAID3_DEBUG(2, "Adding disk %s.", pp->name);
 
 	error = g_raid3_check_metadata(sc, pp, md);
 	if (error != 0)
 		return (error);
 	if (sc->sc_state != G_RAID3_DEVICE_STATE_STARTING &&
 	    md->md_genid < sc->sc_genid) {
 		G_RAID3_DEBUG(0, "Component %s (device %s) broken, skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	disk = g_raid3_init_disk(sc, pp, md, &error);
 	if (disk == NULL)
 		return (error);
 	error = g_raid3_event_send(disk, G_RAID3_DISK_STATE_NEW,
 	    G_RAID3_EVENT_WAIT);
 	if (error != 0)
 		return (error);
 	if (md->md_version < G_RAID3_VERSION) {
 		G_RAID3_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
 		    pp->name, md->md_version, G_RAID3_VERSION);
 		g_raid3_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_raid3_destroy_delayed(void *arg, int flag)
 {
 	struct g_raid3_softc *sc;
 	int error;
 
 	if (flag == EV_CANCEL) {
 		G_RAID3_DEBUG(1, "Destroying canceled.");
 		return;
 	}
 	sc = arg;
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) == 0,
 	    ("DESTROY flag set on %s.", sc->sc_name));
 	KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0,
 	    ("DESTROYING flag not set on %s.", sc->sc_name));
 	G_RAID3_DEBUG(0, "Destroying %s (delayed).", sc->sc_name);
 	error = g_raid3_destroy(sc, G_RAID3_DESTROY_SOFT);
 	if (error != 0) {
 		G_RAID3_DEBUG(0, "Cannot destroy %s.", sc->sc_name);
 		sx_xunlock(&sc->sc_lock);
 	}
 	g_topology_lock();
 }
 
 static int
 g_raid3_access(struct g_provider *pp, int acr, int acw, int ace)
 {
 	struct g_raid3_softc *sc;
 	int dcr, dcw, dce, error = 0;
 
 	g_topology_assert();
 	G_RAID3_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
 	    acw, ace);
 
 	sc = pp->geom->softc;
 	if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0)
 		return (0);
 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
 
 	dcr = pp->acr + acr;
 	dcw = pp->acw + acw;
 	dce = pp->ace + ace;
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0 ||
 	    g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks - 1) {
 		if (acr > 0 || acw > 0 || ace > 0)
 			error = ENXIO;
 		goto end;
 	}
 	if (dcw == 0)
 		g_raid3_idle(sc, dcw);
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0) {
 		if (acr > 0 || acw > 0 || ace > 0) {
 			error = ENXIO;
 			goto end;
 		}
 		if (dcr == 0 && dcw == 0 && dce == 0) {
 			g_post_event(g_raid3_destroy_delayed, sc, M_WAITOK,
 			    sc, NULL);
 		}
 	}
 end:
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 static struct g_geom *
 g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md)
 {
 	struct g_raid3_softc *sc;
 	struct g_geom *gp;
 	int error, timeout;
 	u_int n;
 
 	g_topology_assert();
 	G_RAID3_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id);
 
 	/* One disk is minimum. */
 	if (md->md_all < 1)
 		return (NULL);
 	/*
 	 * Action geom.
 	 */
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_RAID3, M_WAITOK | M_ZERO);
 	sc->sc_disks = malloc(sizeof(struct g_raid3_disk) * md->md_all, M_RAID3,
 	    M_WAITOK | M_ZERO);
 	gp->start = g_raid3_start;
 	gp->orphan = g_raid3_orphan;
 	gp->access = g_raid3_access;
 	gp->dumpconf = g_raid3_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_mediasize = md->md_mediasize;
 	sc->sc_sectorsize = md->md_sectorsize;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_round_robin = 0;
 	sc->sc_flags = md->md_mflags;
 	sc->sc_bump_id = 0;
 	sc->sc_idle = 1;
 	sc->sc_last_write = time_uptime;
 	sc->sc_writes = 0;
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		sc->sc_disks[n].d_softc = sc;
 		sc->sc_disks[n].d_no = n;
 		sc->sc_disks[n].d_state = G_RAID3_DISK_STATE_NODISK;
 	}
 	sx_init(&sc->sc_lock, "graid3:lock");
 	bioq_init(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "graid3:queue", NULL, MTX_DEF);
 	bioq_init(&sc->sc_regular_delayed);
 	bioq_init(&sc->sc_inflight);
 	bioq_init(&sc->sc_sync_delayed);
 	TAILQ_INIT(&sc->sc_events);
 	mtx_init(&sc->sc_events_mtx, "graid3:events", NULL, MTX_DEF);
 	callout_init(&sc->sc_callout, 1);
 	sc->sc_state = G_RAID3_DEVICE_STATE_STARTING;
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 	/*
 	 * Synchronization geom.
 	 */
 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
 	gp->softc = sc;
 	gp->orphan = g_raid3_orphan;
 	sc->sc_sync.ds_geom = gp;
 
 	if (!g_raid3_use_malloc) {
 		sc->sc_zones[G_RAID3_ZONE_64K].sz_zone = uma_zcreate("gr3:64k",
 		    65536, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		sc->sc_zones[G_RAID3_ZONE_64K].sz_inuse = 0;
 		sc->sc_zones[G_RAID3_ZONE_64K].sz_max = g_raid3_n64k;
 		sc->sc_zones[G_RAID3_ZONE_64K].sz_requested =
 		    sc->sc_zones[G_RAID3_ZONE_64K].sz_failed = 0;
 		sc->sc_zones[G_RAID3_ZONE_16K].sz_zone = uma_zcreate("gr3:16k",
 		    16384, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		sc->sc_zones[G_RAID3_ZONE_16K].sz_inuse = 0;
 		sc->sc_zones[G_RAID3_ZONE_16K].sz_max = g_raid3_n16k;
 		sc->sc_zones[G_RAID3_ZONE_16K].sz_requested =
 		    sc->sc_zones[G_RAID3_ZONE_16K].sz_failed = 0;
 		sc->sc_zones[G_RAID3_ZONE_4K].sz_zone = uma_zcreate("gr3:4k",
 		    4096, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		sc->sc_zones[G_RAID3_ZONE_4K].sz_inuse = 0;
 		sc->sc_zones[G_RAID3_ZONE_4K].sz_max = g_raid3_n4k;
 		sc->sc_zones[G_RAID3_ZONE_4K].sz_requested =
 		    sc->sc_zones[G_RAID3_ZONE_4K].sz_failed = 0;
 	}
 
 	error = kproc_create(g_raid3_worker, sc, &sc->sc_worker, 0, 0,
 	    "g_raid3 %s", md->md_name);
 	if (error != 0) {
 		G_RAID3_DEBUG(1, "Cannot create kernel thread for %s.",
 		    sc->sc_name);
 		if (!g_raid3_use_malloc) {
 			uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone);
 			uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone);
 			uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone);
 		}
 		g_destroy_geom(sc->sc_sync.ds_geom);
 		mtx_destroy(&sc->sc_events_mtx);
 		mtx_destroy(&sc->sc_queue_mtx);
 		sx_destroy(&sc->sc_lock);
 		g_destroy_geom(sc->sc_geom);
 		free(sc->sc_disks, M_RAID3);
 		free(sc, M_RAID3);
 		return (NULL);
 	}
 
 	G_RAID3_DEBUG(1, "Device %s created (%u components, id=%u).",
 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
 
 	sc->sc_rootmount = root_mount_hold("GRAID3");
 	G_RAID3_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
 
 	/*
 	 * Run timeout.
 	 */
 	timeout = atomic_load_acq_int(&g_raid3_timeout);
 	callout_reset(&sc->sc_callout, timeout * hz, g_raid3_go, sc);
 	return (sc->sc_geom);
 }
 
 int
 g_raid3_destroy(struct g_raid3_softc *sc, int how)
 {
 	struct g_provider *pp;
 
 	g_topology_assert_not();
 	if (sc == NULL)
 		return (ENXIO);
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		switch (how) {
 		case G_RAID3_DESTROY_SOFT:
 			G_RAID3_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		case G_RAID3_DESTROY_DELAYED:
 			G_RAID3_DEBUG(1,
 			    "Device %s will be destroyed on last close.",
 			    pp->name);
 			if (sc->sc_syncdisk != NULL)
 				g_raid3_sync_stop(sc, 1);
 			sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROYING;
 			return (EBUSY);
 		case G_RAID3_DESTROY_HARD:
 			G_RAID3_DEBUG(1, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 			break;
 		}
 	}
 
 	g_topology_lock();
 	if (sc->sc_geom->softc == NULL) {
 		g_topology_unlock();
 		return (0);
 	}
 	sc->sc_geom->softc = NULL;
 	sc->sc_sync.ds_geom->softc = NULL;
 	g_topology_unlock();
 
 	sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 	sc->sc_flags |= G_RAID3_DEVICE_FLAG_WAIT;
 	G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	sx_xunlock(&sc->sc_lock);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
 	while (sc->sc_worker != NULL)
 		tsleep(&sc->sc_worker, PRIBIO, "r3:destroy", hz / 5);
 	G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
 	sx_xlock(&sc->sc_lock);
 	g_raid3_destroy_device(sc);
 	free(sc->sc_disks, M_RAID3);
 	free(sc, M_RAID3);
 	return (0);
 }
 
 static void
 g_raid3_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_raid3_metadata md;
 	struct g_raid3_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	G_RAID3_DEBUG(2, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "raid3:taste");
 	/* This orphan function should be never called. */
 	gp->orphan = g_raid3_taste_orphan;
 	cp = g_new_consumer(gp);
+	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error == 0) {
 		error = g_raid3_read_metadata(cp, &md);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
 		return (NULL);
 	if (g_raid3_debug >= 2)
 		raid3_metadata_dump(&md);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_sync.ds_geom == gp)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id) {
 			G_RAID3_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 		break;
 	}
 	if (gp == NULL) {
 		gp = g_raid3_create(mp, &md);
 		if (gp == NULL) {
 			G_RAID3_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 	}
 	G_RAID3_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	error = g_raid3_add_disk(sc, pp, &md);
 	if (error != 0) {
 		G_RAID3_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 		    pp->name, gp->name, error);
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NODISK) ==
 		    sc->sc_ndisks) {
 			g_cancel_event(sc);
 			g_raid3_destroy(sc, G_RAID3_DESTROY_HARD);
 			g_topology_lock();
 			return (NULL);
 		}
 		gp = NULL;
 	}
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (gp);
 }
 
 static int
 g_raid3_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
     struct g_geom *gp)
 {
 	struct g_raid3_softc *sc;
 	int error;
 
 	g_topology_unlock();
 	sc = gp->softc;
 	sx_xlock(&sc->sc_lock);
 	g_cancel_event(sc);
 	error = g_raid3_destroy(gp->softc, G_RAID3_DESTROY_SOFT);
 	if (error != 0)
 		sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 static void
 g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_raid3_softc *sc;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	/* Skip synchronization geom. */
 	if (gp == sc->sc_sync.ds_geom)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		struct g_raid3_disk *disk;
 
 		disk = cp->private;
 		if (disk == NULL)
 			return;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		sbuf_printf(sb, "%s<Type>", indent);
 		if (disk->d_no == sc->sc_ndisks - 1)
 			sbuf_cat(sb, "PARITY");
 		else
 			sbuf_cat(sb, "DATA");
 		sbuf_cat(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
 		    (u_int)disk->d_no);
 		if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			sbuf_printf(sb, "%s<Synchronized>", indent);
 			if (disk->d_sync.ds_offset == 0)
 				sbuf_cat(sb, "0%");
 			else {
 				sbuf_printf(sb, "%u%%",
 				    (u_int)((disk->d_sync.ds_offset * 100) /
 				    (sc->sc_mediasize / (sc->sc_ndisks - 1))));
 			}
 			sbuf_cat(sb, "</Synchronized>\n");
 			if (disk->d_sync.ds_offset > 0) {
 				sbuf_printf(sb, "%s<BytesSynced>%jd"
 				    "</BytesSynced>\n", indent,
 				    (intmax_t)disk->d_sync.ds_offset);
 			}
 		}
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
 		    disk->d_sync.ds_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, disk->d_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (disk->d_flags == 0)
 			sbuf_cat(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((disk->d_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_cat(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_cat(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_RAID3_DISK_FLAG_DIRTY, "DIRTY");
 			ADD_FLAG(G_RAID3_DISK_FLAG_HARDCODED, "HARDCODED");
 			ADD_FLAG(G_RAID3_DISK_FLAG_SYNCHRONIZING,
 			    "SYNCHRONIZING");
 			ADD_FLAG(G_RAID3_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
 			ADD_FLAG(G_RAID3_DISK_FLAG_BROKEN, "BROKEN");
 #undef	ADD_FLAG
 		}
 		sbuf_cat(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_raid3_disk_state2str(disk->d_state));
 		sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	} else {
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		if (!g_raid3_use_malloc) {
 			sbuf_printf(sb,
 			    "%s<Zone4kRequested>%u</Zone4kRequested>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_4K].sz_requested);
 			sbuf_printf(sb,
 			    "%s<Zone4kFailed>%u</Zone4kFailed>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_4K].sz_failed);
 			sbuf_printf(sb,
 			    "%s<Zone16kRequested>%u</Zone16kRequested>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_16K].sz_requested);
 			sbuf_printf(sb,
 			    "%s<Zone16kFailed>%u</Zone16kFailed>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_16K].sz_failed);
 			sbuf_printf(sb,
 			    "%s<Zone64kRequested>%u</Zone64kRequested>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_64K].sz_requested);
 			sbuf_printf(sb,
 			    "%s<Zone64kFailed>%u</Zone64kFailed>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_64K].sz_failed);
 		}
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (sc->sc_flags == 0)
 			sbuf_cat(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((sc->sc_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_cat(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_cat(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN,
 			    "ROUND-ROBIN");
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_VERIFY, "VERIFY");
 #undef	ADD_FLAG
 		}
 		sbuf_cat(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
 		    sc->sc_ndisks);
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_raid3_device_state2str(sc->sc_state));
 		sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 }
 
 static void
 g_raid3_shutdown_post_sync(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_raid3_softc *sc;
 	int error;
 
 	mp = arg;
 	g_topology_lock();
 	g_raid3_shutdown = 1;
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if ((sc = gp->softc) == NULL)
 			continue;
 		/* Skip synchronization geom. */
 		if (gp == sc->sc_sync.ds_geom)
 			continue;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		g_raid3_idle(sc, -1);
 		g_cancel_event(sc);
 		error = g_raid3_destroy(sc, G_RAID3_DESTROY_DELAYED);
 		if (error != 0)
 			sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 	g_topology_unlock();
 }
 
 static void
 g_raid3_init(struct g_class *mp)
 {
 
 	g_raid3_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
 	    g_raid3_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_raid3_post_sync == NULL)
 		G_RAID3_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_raid3_fini(struct g_class *mp)
 {
 
 	if (g_raid3_post_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid3_post_sync);
 }
 
 DECLARE_GEOM_CLASS(g_raid3_class, g_raid3);
 MODULE_VERSION(geom_raid3, 0);
diff --git a/sys/geom/shsec/g_shsec.c b/sys/geom/shsec/g_shsec.c
index 2b9e127ce350..5ca00b64219d 100644
--- a/sys/geom/shsec/g_shsec.c
+++ b/sys/geom/shsec/g_shsec.c
@@ -1,844 +1,845 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 #include <geom/geom_dbg.h>
 #include <geom/shsec/g_shsec.h>
 
 FEATURE(geom_shsec, "GEOM shared secret device support");
 
 static MALLOC_DEFINE(M_SHSEC, "shsec_data", "GEOM_SHSEC Data");
 
 static uma_zone_t g_shsec_zone;
 
 static int g_shsec_destroy(struct g_shsec_softc *sc, boolean_t force);
 static int g_shsec_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 
 static g_taste_t g_shsec_taste;
 static g_ctl_req_t g_shsec_config;
 static g_dumpconf_t g_shsec_dumpconf;
 static g_init_t g_shsec_init;
 static g_fini_t g_shsec_fini;
 
 struct g_class g_shsec_class = {
 	.name = G_SHSEC_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_shsec_config,
 	.taste = g_shsec_taste,
 	.destroy_geom = g_shsec_destroy_geom,
 	.init = g_shsec_init,
 	.fini = g_shsec_fini
 };
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, shsec, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_SHSEC stuff");
 static u_int g_shsec_debug;
 SYSCTL_UINT(_kern_geom_shsec, OID_AUTO, debug, CTLFLAG_RWTUN, &g_shsec_debug, 0,
     "Debug level");
 static u_long g_shsec_maxmem;
 SYSCTL_ULONG(_kern_geom_shsec, OID_AUTO, maxmem,
     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &g_shsec_maxmem,
     0, "Maximum memory that can be allocated for I/O (in bytes)");
 static u_int g_shsec_alloc_failed = 0;
 SYSCTL_UINT(_kern_geom_shsec, OID_AUTO, alloc_failed, CTLFLAG_RD,
     &g_shsec_alloc_failed, 0, "How many times I/O allocation failed");
 
 /*
  * Greatest Common Divisor.
  */
 static u_int
 gcd(u_int a, u_int b)
 {
 	u_int c;
 
 	while (b != 0) {
 		c = a;
 		a = b;
 		b = (c % b);
 	}
 	return (a);
 }
 
 /*
  * Least Common Multiple.
  */
 static u_int
 lcm(u_int a, u_int b)
 {
 
 	return ((a * b) / gcd(a, b));
 }
 
 static void
 g_shsec_init(struct g_class *mp __unused)
 {
 
 	g_shsec_maxmem = maxphys * 100;
 	TUNABLE_ULONG_FETCH("kern.geom.shsec.maxmem,", &g_shsec_maxmem);
 	g_shsec_zone = uma_zcreate("g_shsec_zone", maxphys, NULL, NULL, NULL,
 	    NULL, 0, 0);
 	g_shsec_maxmem -= g_shsec_maxmem % maxphys;
 	uma_zone_set_max(g_shsec_zone, g_shsec_maxmem / maxphys);
 }
 
 static void
 g_shsec_fini(struct g_class *mp __unused)
 {
 
 	uma_zdestroy(g_shsec_zone);
 }
 
 /*
  * Return the number of valid disks.
  */
 static u_int
 g_shsec_nvalid(struct g_shsec_softc *sc)
 {
 	u_int i, no;
 
 	no = 0;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		if (sc->sc_disks[i] != NULL)
 			no++;
 	}
 
 	return (no);
 }
 
 static void
 g_shsec_remove_disk(struct g_consumer *cp)
 {
 	struct g_shsec_softc *sc;
 	u_int no;
 
 	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
 	sc = (struct g_shsec_softc *)cp->private;
 	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
 	no = cp->index;
 
 	G_SHSEC_DEBUG(0, "Disk %s removed from %s.", cp->provider->name,
 	    sc->sc_name);
 
 	sc->sc_disks[no] = NULL;
 	if (sc->sc_provider != NULL) {
 		g_wither_provider(sc->sc_provider, ENXIO);
 		sc->sc_provider = NULL;
 		G_SHSEC_DEBUG(0, "Device %s removed.", sc->sc_name);
 	}
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		return;
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_shsec_orphan(struct g_consumer *cp)
 {
 	struct g_shsec_softc *sc;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	g_shsec_remove_disk(cp);
 	/* If there are no valid disks anymore, remove device. */
 	if (LIST_EMPTY(&gp->consumer))
 		g_shsec_destroy(sc, 1);
 }
 
 static int
 g_shsec_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *cp1, *cp2, *tmp;
 	struct g_shsec_softc *sc;
 	struct g_geom *gp;
 	int error;
 
 	gp = pp->geom;
 	sc = gp->softc;
 
 	/* On first open, grab an extra "exclusive" bit */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... and let go of it on last close */
 	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
 		de--;
 
 	error = ENXIO;
 	LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) {
 		error = g_access(cp1, dr, dw, de);
 		if (error != 0)
 			goto fail;
 		if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 &&
 		    cp1->flags & G_CF_ORPHAN) {
 			g_detach(cp1);
 			g_destroy_consumer(cp1);
 		}
 	}
 
 	/* If there are no valid disks anymore, remove device. */
 	if (LIST_EMPTY(&gp->consumer))
 		g_shsec_destroy(sc, 1);
 
 	return (error);
 
 fail:
 	/* If we fail here, backout all previous changes. */
 	LIST_FOREACH(cp2, &gp->consumer, consumer) {
 		if (cp1 == cp2)
 			break;
 		g_access(cp2, -dr, -dw, -de);
 	}
 	return (error);
 }
 
 static void
 g_shsec_xor1(uint32_t *src, uint32_t *dst, ssize_t len)
 {
 
 	for (; len > 0; len -= sizeof(uint32_t), dst++)
 		*dst = *dst ^ *src++;
 	KASSERT(len == 0, ("len != 0 (len=%zd)", len));
 }
 
 static void
 g_shsec_done(struct bio *bp)
 {
 	struct g_shsec_softc *sc;
 	struct bio *pbp;
 
 	pbp = bp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	if (bp->bio_error == 0)
 		G_SHSEC_LOGREQ(2, bp, "Request done.");
 	else {
 		G_SHSEC_LOGREQ(0, bp, "Request failed (error=%d).",
 		    bp->bio_error);
 		if (pbp->bio_error == 0)
 			pbp->bio_error = bp->bio_error;
 	}
 	if (pbp->bio_cmd == BIO_READ) {
 		if ((pbp->bio_pflags & G_SHSEC_BFLAG_FIRST) != 0) {
 			bcopy(bp->bio_data, pbp->bio_data, pbp->bio_length);
 			pbp->bio_pflags = 0;
 		} else {
 			g_shsec_xor1((uint32_t *)bp->bio_data,
 			    (uint32_t *)pbp->bio_data,
 			    (ssize_t)pbp->bio_length);
 		}
 	}
 	if (bp->bio_data != NULL) {
 		explicit_bzero(bp->bio_data, bp->bio_length);
 		uma_zfree(g_shsec_zone, bp->bio_data);
 	}
 	g_destroy_bio(bp);
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
 		pbp->bio_completed = pbp->bio_length;
 		g_io_deliver(pbp, pbp->bio_error);
 	}
 }
 
 static void
 g_shsec_xor2(uint32_t *rand, uint32_t *dst, ssize_t len)
 {
 
 	for (; len > 0; len -= sizeof(uint32_t), dst++) {
 		*rand = arc4random();
 		*dst = *dst ^ *rand++;
 	}
 	KASSERT(len == 0, ("len != 0 (len=%zd)", len));
 }
 
 static void
 g_shsec_start(struct bio *bp)
 {
 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
 	struct g_shsec_softc *sc;
 	struct bio *cbp;
 	uint32_t *dst;
 	ssize_t len;
 	u_int no;
 	int error;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL, provider's error should be set and g_shsec_start()
 	 * should not be called at all.
 	 */
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 
 	G_SHSEC_LOGREQ(2, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_FLUSH:
 	case BIO_SPEEDUP:
 		/*
 		 * Only those requests are supported.
 		 */
 		break;
 	case BIO_DELETE:
 	case BIO_GETATTR:
 		/* To which provider it should be delivered? */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	/*
 	 * Allocate all bios first and calculate XOR.
 	 */
 	dst = NULL;
 	len = bp->bio_length;
 	if (bp->bio_cmd == BIO_READ)
 		bp->bio_pflags = G_SHSEC_BFLAG_FIRST;
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			error = ENOMEM;
 			goto failure;
 		}
 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 
 		/*
 		 * Fill in the component buf structure.
 		 */
 		cbp->bio_done = g_shsec_done;
 		cbp->bio_caller2 = sc->sc_disks[no];
 		if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
 			cbp->bio_data = uma_zalloc(g_shsec_zone, M_NOWAIT);
 			if (cbp->bio_data == NULL) {
 				g_shsec_alloc_failed++;
 				error = ENOMEM;
 				goto failure;
 			}
 			if (bp->bio_cmd == BIO_WRITE) {
 				if (no == 0) {
 					dst = (uint32_t *)cbp->bio_data;
 					bcopy(bp->bio_data, dst, len);
 				} else {
 					g_shsec_xor2((uint32_t *)cbp->bio_data,
 					    dst, len);
 				}
 			}
 		}
 	}
 	/*
 	 * Fire off all allocated requests!
 	 */
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		struct g_consumer *cp;
 
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		cbp->bio_to = cp->provider;
 		G_SHSEC_LOGREQ(2, cbp, "Sending request.");
 		g_io_request(cbp, cp);
 	}
 	return;
 failure:
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		bp->bio_children--;
 		if (cbp->bio_data != NULL) {
 			explicit_bzero(cbp->bio_data, cbp->bio_length);
 			uma_zfree(g_shsec_zone, cbp->bio_data);
 		}
 		g_destroy_bio(cbp);
 	}
 	if (bp->bio_error == 0)
 		bp->bio_error = error;
 	g_io_deliver(bp, bp->bio_error);
 }
 
 static void
 g_shsec_check_and_run(struct g_shsec_softc *sc)
 {
 	off_t mediasize, ms;
 	u_int no, sectorsize = 0;
 
 	if (g_shsec_nvalid(sc) != sc->sc_ndisks)
 		return;
 
 	sc->sc_provider = g_new_providerf(sc->sc_geom, "shsec/%s", sc->sc_name);
 	/*
 	 * Find the smallest disk.
 	 */
 	mediasize = sc->sc_disks[0]->provider->mediasize;
 	mediasize -= sc->sc_disks[0]->provider->sectorsize;
 	sectorsize = sc->sc_disks[0]->provider->sectorsize;
 	for (no = 1; no < sc->sc_ndisks; no++) {
 		ms = sc->sc_disks[no]->provider->mediasize;
 		ms -= sc->sc_disks[no]->provider->sectorsize;
 		if (ms < mediasize)
 			mediasize = ms;
 		sectorsize = lcm(sectorsize,
 		    sc->sc_disks[no]->provider->sectorsize);
 	}
 	sc->sc_provider->sectorsize = sectorsize;
 	sc->sc_provider->mediasize = mediasize;
 	g_error_provider(sc->sc_provider, 0);
 
 	G_SHSEC_DEBUG(0, "Device %s activated.", sc->sc_name);
 }
 
 static int
 g_shsec_read_metadata(struct g_consumer *cp, struct g_shsec_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	/* Decode metadata. */
 	shsec_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /*
  * Add disk to given device.
  */
 static int
 g_shsec_add_disk(struct g_shsec_softc *sc, struct g_provider *pp, u_int no)
 {
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	struct g_shsec_metadata md;
 	int error;
 
 	/* Metadata corrupted? */
 	if (no >= sc->sc_ndisks)
 		return (EINVAL);
 
 	/* Check if disk is not already attached. */
 	if (sc->sc_disks[no] != NULL)
 		return (EEXIST);
 
 	gp = sc->sc_geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
 		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 		if (error != 0) {
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			return (error);
 		}
 	}
 
 	/* Reread metadata. */
 	error = g_shsec_read_metadata(cp, &md);
 	if (error != 0)
 		goto fail;
 
 	if (strcmp(md.md_magic, G_SHSEC_MAGIC) != 0 ||
 	    strcmp(md.md_name, sc->sc_name) != 0 || md.md_id != sc->sc_id) {
 		G_SHSEC_DEBUG(0, "Metadata on %s changed.", pp->name);
 		goto fail;
 	}
 
 	cp->private = sc;
 	cp->index = no;
 	sc->sc_disks[no] = cp;
 
 	G_SHSEC_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
 
 	g_shsec_check_and_run(sc);
 
 	return (0);
 fail:
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
 		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	return (error);
 }
 
 static struct g_geom *
 g_shsec_create(struct g_class *mp, const struct g_shsec_metadata *md)
 {
 	struct g_shsec_softc *sc;
 	struct g_geom *gp;
 	u_int no;
 
 	G_SHSEC_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id);
 
 	/* Two disks is minimum. */
 	if (md->md_all < 2) {
 		G_SHSEC_DEBUG(0, "Too few disks defined for %s.", md->md_name);
 		return (NULL);
 	}
 
 	/* Check for duplicate unit */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
 			G_SHSEC_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_SHSEC, M_WAITOK | M_ZERO);
 	gp->start = g_shsec_start;
 	gp->spoiled = g_shsec_orphan;
 	gp->orphan = g_shsec_orphan;
 	gp->access = g_shsec_access;
 	gp->dumpconf = g_shsec_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
 	    M_SHSEC, M_WAITOK | M_ZERO);
 	for (no = 0; no < sc->sc_ndisks; no++)
 		sc->sc_disks[no] = NULL;
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 
 	G_SHSEC_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
 
 	return (gp);
 }
 
 static int
 g_shsec_destroy(struct g_shsec_softc *sc, boolean_t force)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	u_int no;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_SHSEC_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_SHSEC_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		if (sc->sc_disks[no] != NULL)
 			g_shsec_remove_disk(sc->sc_disks[no]);
 	}
 
 	gp = sc->sc_geom;
 	gp->softc = NULL;
 	KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
 	    gp->name));
 	free(sc->sc_disks, M_SHSEC);
 	free(sc, M_SHSEC);
 
 	pp = LIST_FIRST(&gp->provider);
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		G_SHSEC_DEBUG(0, "Device %s destroyed.", gp->name);
 
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_shsec_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
     struct g_geom *gp)
 {
 	struct g_shsec_softc *sc;
 
 	sc = gp->softc;
 	return (g_shsec_destroy(sc, 0));
 }
 
 static struct g_geom *
 g_shsec_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_shsec_metadata md;
 	struct g_shsec_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	/* Skip providers that are already open for writing. */
 	if (pp->acw > 0)
 		return (NULL);
 
 	G_SHSEC_DEBUG(3, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "shsec:taste");
 	gp->start = g_shsec_start;
 	gp->access = g_shsec_access;
 	gp->orphan = g_shsec_orphan;
 	cp = g_new_consumer(gp);
+	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error == 0) {
 		error = g_shsec_read_metadata(cp, &md);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_SHSEC_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_SHSEC_VERSION) {
 		G_SHSEC_DEBUG(0, "Kernel module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	/*
 	 * Backward compatibility:
 	 */
 	/* There was no md_provsize field in earlier versions of metadata. */
 	if (md.md_version < 1)
 		md.md_provsize = pp->mediasize;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id)
 			continue;
 		break;
 	}
 	if (gp != NULL) {
 		G_SHSEC_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_shsec_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_SHSEC_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 			    pp->name, gp->name, error);
 			return (NULL);
 		}
 	} else {
 		gp = g_shsec_create(mp, &md);
 		if (gp == NULL) {
 			G_SHSEC_DEBUG(0, "Cannot create device %s.", md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		G_SHSEC_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_shsec_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_SHSEC_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 			    pp->name, gp->name, error);
 			g_shsec_destroy(sc, 1);
 			return (NULL);
 		}
 	}
 	return (gp);
 }
 
 static struct g_shsec_softc *
 g_shsec_find_device(struct g_class *mp, const char *name)
 {
 	struct g_shsec_softc *sc;
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(sc->sc_name, name) == 0)
 			return (sc);
 	}
 	return (NULL);
 }
 
 static void
 g_shsec_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_shsec_softc *sc;
 	int *force, *nargs, error;
 	const char *name;
 	char param[16];
 	u_int i;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No '%s' argument.", "force");
 		return;
 	}
 
 	for (i = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			return;
 		}
 		sc = g_shsec_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		error = g_shsec_destroy(sc, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_shsec_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_SHSEC_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "stop") == 0) {
 		g_shsec_ctl_destroy(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_shsec_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_shsec_softc *sc;
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
 		    (u_int)cp->index);
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
 		    indent, sc->sc_ndisks, g_shsec_nvalid(sc));
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
 			sbuf_printf(sb, "UP");
 		else
 			sbuf_printf(sb, "DOWN");
 		sbuf_printf(sb, "</State>\n");
 	}
 }
 
 DECLARE_GEOM_CLASS(g_shsec_class, g_shsec);
 MODULE_VERSION(geom_shsec, 0);
diff --git a/sys/geom/stripe/g_stripe.c b/sys/geom/stripe/g_stripe.c
index 9b4df1b8dba6..3ae6a0e3f871 100644
--- a/sys/geom/stripe/g_stripe.c
+++ b/sys/geom/stripe/g_stripe.c
@@ -1,1261 +1,1262 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 #include <geom/geom_dbg.h>
 #include <geom/stripe/g_stripe.h>
 
 FEATURE(geom_stripe, "GEOM striping support");
 
 static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data");
 
 static uma_zone_t g_stripe_zone;
 
 static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force);
 static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 
 static g_taste_t g_stripe_taste;
 static g_ctl_req_t g_stripe_config;
 static g_dumpconf_t g_stripe_dumpconf;
 static g_init_t g_stripe_init;
 static g_fini_t g_stripe_fini;
 
 struct g_class g_stripe_class = {
 	.name = G_STRIPE_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_stripe_config,
 	.taste = g_stripe_taste,
 	.destroy_geom = g_stripe_destroy_geom,
 	.init = g_stripe_init,
 	.fini = g_stripe_fini
 };
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_STRIPE stuff");
 static u_int g_stripe_debug = 0;
 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0,
     "Debug level");
 static int g_stripe_fast = 0;
 SYSCTL_INT(_kern_geom_stripe, OID_AUTO, fast,
     CTLFLAG_RWTUN, &g_stripe_fast, 0,
     "Fast, but memory-consuming, mode");
 static u_long g_stripe_maxmem;
 SYSCTL_ULONG(_kern_geom_stripe, OID_AUTO, maxmem,
     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &g_stripe_maxmem, 0,
     "Maximum memory that can be allocated in \"fast\" mode (in bytes)");
 static u_int g_stripe_fast_failed = 0;
 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD,
     &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed");
 
 /*
  * Greatest Common Divisor.
  */
 static u_int
 gcd(u_int a, u_int b)
 {
 	u_int c;
 
 	while (b != 0) {
 		c = a;
 		a = b;
 		b = (c % b);
 	}
 	return (a);
 }
 
 /*
  * Least Common Multiple.
  */
 static u_int
 lcm(u_int a, u_int b)
 {
 
 	return ((a * b) / gcd(a, b));
 }
 
 static void
 g_stripe_init(struct g_class *mp __unused)
 {
 
 	g_stripe_maxmem = maxphys * 100;
 	TUNABLE_ULONG_FETCH("kern.geom.stripe.maxmem,", &g_stripe_maxmem);
 	g_stripe_zone = uma_zcreate("g_stripe_zone", maxphys, NULL, NULL,
 	    NULL, NULL, 0, 0);
 	g_stripe_maxmem -= g_stripe_maxmem % maxphys;
 	uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / maxphys);
 }
 
 static void
 g_stripe_fini(struct g_class *mp __unused)
 {
 
 	uma_zdestroy(g_stripe_zone);
 }
 
 /*
  * Return the number of valid disks.
  */
 static u_int
 g_stripe_nvalid(struct g_stripe_softc *sc)
 {
 	u_int i, no;
 
 	no = 0;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		if (sc->sc_disks[i] != NULL)
 			no++;
 	}
 
 	return (no);
 }
 
 static void
 g_stripe_remove_disk(struct g_consumer *cp)
 {
 	struct g_stripe_softc *sc;
 
 	g_topology_assert();
 	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
 	sc = (struct g_stripe_softc *)cp->geom->softc;
 	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
 
 	if (cp->private == NULL) {
 		G_STRIPE_DEBUG(0, "Disk %s removed from %s.",
 		    cp->provider->name, sc->sc_name);
 		cp->private = (void *)(uintptr_t)-1;
 	}
 
 	if (sc->sc_provider != NULL) {
 		G_STRIPE_DEBUG(0, "Device %s deactivated.",
 		    sc->sc_provider->name);
 		g_wither_provider(sc->sc_provider, ENXIO);
 		sc->sc_provider = NULL;
 	}
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		return;
 	sc->sc_disks[cp->index] = NULL;
 	cp->index = 0;
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	/* If there are no valid disks anymore, remove device. */
 	if (LIST_EMPTY(&sc->sc_geom->consumer))
 		g_stripe_destroy(sc, 1);
 }
 
 static void
 g_stripe_orphan(struct g_consumer *cp)
 {
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	g_stripe_remove_disk(cp);
 }
 
 static int
 g_stripe_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *cp1, *cp2, *tmp;
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	gp = pp->geom;
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
 
 	/* On first open, grab an extra "exclusive" bit */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... and let go of it on last close */
 	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
 		de--;
 
 	LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) {
 		error = g_access(cp1, dr, dw, de);
 		if (error != 0)
 			goto fail;
 		if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 &&
 		    cp1->private != NULL) {
 			g_stripe_remove_disk(cp1); /* May destroy geom. */
 		}
 	}
 	return (0);
 
 fail:
 	LIST_FOREACH(cp2, &gp->consumer, consumer) {
 		if (cp1 == cp2)
 			break;
 		g_access(cp2, -dr, -dw, -de);
 	}
 	return (error);
 }
 
 static void
 g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset,
     off_t length, int mode)
 {
 	off_t stripesize;
 	size_t len;
 
 	stripesize = sc->sc_stripesize;
 	len = (size_t)(stripesize - (offset & (stripesize - 1)));
 	do {
 		bcopy(src, dst, len);
 		if (mode) {
 			dst += len + stripesize * (sc->sc_ndisks - 1);
 			src += len;
 		} else {
 			dst += len;
 			src += len + stripesize * (sc->sc_ndisks - 1);
 		}
 		length -= len;
 		KASSERT(length >= 0,
 		    ("Length < 0 (stripesize=%ju, offset=%ju, length=%jd).",
 		    (uintmax_t)stripesize, (uintmax_t)offset, (intmax_t)length));
 		if (length > stripesize)
 			len = stripesize;
 		else
 			len = length;
 	} while (length > 0);
 }
 
 static void
 g_stripe_done(struct bio *bp)
 {
 	struct g_stripe_softc *sc;
 	struct bio *pbp;
 
 	pbp = bp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) {
 		g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset,
 		    bp->bio_length, 1);
 		bp->bio_data = bp->bio_caller1;
 		bp->bio_caller1 = NULL;
 	}
 	mtx_lock(&sc->sc_lock);
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	pbp->bio_completed += bp->bio_completed;
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
 		mtx_unlock(&sc->sc_lock);
 		if (pbp->bio_driver1 != NULL)
 			uma_zfree(g_stripe_zone, pbp->bio_driver1);
 		if (bp->bio_cmd == BIO_SPEEDUP)
 			pbp->bio_completed = pbp->bio_length;
 		g_io_deliver(pbp, pbp->bio_error);
 	} else
 		mtx_unlock(&sc->sc_lock);
 	g_destroy_bio(bp);
 }
 
 static int
 g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length)
 {
 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
 	struct g_stripe_softc *sc;
 	char *addr, *data = NULL;
 	struct bio *cbp;
 	off_t stripesize;
 	u_int nparts = 0;
 	int error;
 
 	sc = bp->bio_to->geom->softc;
 
 	addr = bp->bio_data;
 	stripesize = sc->sc_stripesize;
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		error = ENOMEM;
 		goto failure;
 	}
 	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 	nparts++;
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cbp->bio_done = g_stripe_done;
 	cbp->bio_offset = offset;
 	cbp->bio_data = addr;
 	cbp->bio_caller1 = NULL;
 	cbp->bio_length = length;
 	cbp->bio_caller2 = sc->sc_disks[no];
 
 	/* offset -= offset % stripesize; */
 	offset -= offset & (stripesize - 1);
 	addr += length;
 	length = bp->bio_length - length;
 	for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
 		if (no > sc->sc_ndisks - 1) {
 			no = 0;
 			offset += stripesize;
 		}
 		if (nparts >= sc->sc_ndisks) {
 			cbp = TAILQ_NEXT(cbp, bio_queue);
 			if (cbp == NULL)
 				cbp = TAILQ_FIRST(&queue);
 			nparts++;
 			/*
 			 * Update bio structure.
 			 */
 			/*
 			 * MIN() is in case when
 			 * (bp->bio_length % sc->sc_stripesize) != 0.
 			 */
 			cbp->bio_length += MIN(stripesize, length);
 			if (cbp->bio_caller1 == NULL) {
 				cbp->bio_caller1 = cbp->bio_data;
 				cbp->bio_data = NULL;
 				if (data == NULL) {
 					data = uma_zalloc(g_stripe_zone,
 					    M_NOWAIT);
 					if (data == NULL) {
 						error = ENOMEM;
 						goto failure;
 					}
 				}
 			}
 		} else {
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL) {
 				error = ENOMEM;
 				goto failure;
 			}
 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 			nparts++;
 			/*
 			 * Fill in the component buf structure.
 			 */
 			cbp->bio_done = g_stripe_done;
 			cbp->bio_offset = offset;
 			cbp->bio_data = addr;
 			cbp->bio_caller1 = NULL;
 			/*
 			 * MIN() is in case when
 			 * (bp->bio_length % sc->sc_stripesize) != 0.
 			 */
 			cbp->bio_length = MIN(stripesize, length);
 			cbp->bio_caller2 = sc->sc_disks[no];
 		}
 	}
 	if (data != NULL)
 		bp->bio_driver1 = data;
 	/*
 	 * Fire off all allocated requests!
 	 */
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		struct g_consumer *cp;
 
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		cbp->bio_to = cp->provider;
 		if (cbp->bio_caller1 != NULL) {
 			cbp->bio_data = data;
 			if (bp->bio_cmd == BIO_WRITE) {
 				g_stripe_copy(sc, cbp->bio_caller1, data,
 				    cbp->bio_offset, cbp->bio_length, 0);
 			}
 			data += cbp->bio_length;
 		}
 		G_STRIPE_LOGREQ(cbp, "Sending request.");
 		g_io_request(cbp, cp);
 	}
 	return (0);
 failure:
 	if (data != NULL)
 		uma_zfree(g_stripe_zone, data);
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		if (cbp->bio_caller1 != NULL) {
 			cbp->bio_data = cbp->bio_caller1;
 			cbp->bio_caller1 = NULL;
 		}
 		bp->bio_children--;
 		g_destroy_bio(cbp);
 	}
 	return (error);
 }
 
 static int
 g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
 {
 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
 	struct g_stripe_softc *sc;
 	off_t stripesize;
 	struct bio *cbp;
 	char *addr;
 	int error;
 
 	sc = bp->bio_to->geom->softc;
 
 	stripesize = sc->sc_stripesize;
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		error = ENOMEM;
 		goto failure;
 	}
 	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 	/*
 	 * Fill in the component buf structure.
 	 */
 	if (bp->bio_length == length)
 		cbp->bio_done = g_std_done;	/* Optimized lockless case. */
 	else
 		cbp->bio_done = g_stripe_done;
 	cbp->bio_offset = offset;
 	cbp->bio_length = length;
 	if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 		bp->bio_ma_n = round_page(bp->bio_ma_offset +
 		    bp->bio_length) / PAGE_SIZE;
 		addr = NULL;
 	} else
 		addr = bp->bio_data;
 	cbp->bio_caller2 = sc->sc_disks[no];
 
 	/* offset -= offset % stripesize; */
 	offset -= offset & (stripesize - 1);
 	if (bp->bio_cmd != BIO_DELETE)
 		addr += length;
 	length = bp->bio_length - length;
 	for (no++; length > 0; no++, length -= stripesize) {
 		if (no > sc->sc_ndisks - 1) {
 			no = 0;
 			offset += stripesize;
 		}
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			error = ENOMEM;
 			goto failure;
 		}
 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 
 		/*
 		 * Fill in the component buf structure.
 		 */
 		cbp->bio_done = g_stripe_done;
 		cbp->bio_offset = offset;
 		/*
 		 * MIN() is in case when
 		 * (bp->bio_length % sc->sc_stripesize) != 0.
 		 */
 		cbp->bio_length = MIN(stripesize, length);
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 			cbp->bio_ma_offset += (uintptr_t)addr;
 			cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
 			cbp->bio_ma_offset %= PAGE_SIZE;
 			cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
 			    cbp->bio_length) / PAGE_SIZE;
 		} else
 			cbp->bio_data = addr;
 
 		cbp->bio_caller2 = sc->sc_disks[no];
 
 		if (bp->bio_cmd != BIO_DELETE)
 			addr += stripesize;
 	}
 	/*
 	 * Fire off all allocated requests!
 	 */
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		struct g_consumer *cp;
 
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		cbp->bio_to = cp->provider;
 		G_STRIPE_LOGREQ(cbp, "Sending request.");
 		g_io_request(cbp, cp);
 	}
 	return (0);
 failure:
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		bp->bio_children--;
 		g_destroy_bio(cbp);
 	}
 	return (error);
 }
 
 static void
 g_stripe_pushdown(struct g_stripe_softc *sc, struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	u_int no;
 
 	bioq_init(&queue);
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			for (cbp = bioq_first(&queue); cbp != NULL;
 			    cbp = bioq_first(&queue)) {
 				bioq_remove(&queue, cbp);
 				g_destroy_bio(cbp);
 			}
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		cbp->bio_done = g_stripe_done;
 		cbp->bio_caller2 = sc->sc_disks[no];
 		cbp->bio_to = sc->sc_disks[no]->provider;
 	}
 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
 		bioq_remove(&queue, cbp);
 		G_STRIPE_LOGREQ(cbp, "Sending request.");
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		g_io_request(cbp, cp);
 	}
 }
 
 static void
 g_stripe_start(struct bio *bp)
 {
 	off_t offset, start, length, nstripe, stripesize;
 	struct g_stripe_softc *sc;
 	u_int no;
 	int error, fast = 0;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL, provider's error should be set and g_stripe_start()
 	 * should not be called at all.
 	 */
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 
 	G_STRIPE_LOGREQ(bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_SPEEDUP:
 	case BIO_FLUSH:
 		g_stripe_pushdown(sc, bp);
 		return;
 	case BIO_GETATTR:
 		/* To which provider it should be delivered? */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	stripesize = sc->sc_stripesize;
 
 	/*
 	 * Calculations are quite messy, but fast I hope.
 	 */
 
 	/* Stripe number. */
 	/* nstripe = bp->bio_offset / stripesize; */
 	nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits;
 	/* Disk number. */
 	no = nstripe % sc->sc_ndisks;
 	/* Start position in stripe. */
 	/* start = bp->bio_offset % stripesize; */
 	start = bp->bio_offset & (stripesize - 1);
 	/* Start position in disk. */
 	/* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */
 	offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start;
 	/* Length of data to operate. */
 	length = MIN(bp->bio_length, stripesize - start);
 
 	/*
 	 * Do use "fast" mode when:
 	 * 1. "Fast" mode is ON.
 	 * and
 	 * 2. Request size is less than or equal to maxphys,
 	 *    which should always be true.
 	 * and
 	 * 3. Request size is bigger than stripesize * ndisks. If it isn't,
 	 *    there will be no need to send more than one I/O request to
 	 *    a provider, so there is nothing to optmize.
 	 * and
 	 * 4. Request is not unmapped.
 	 * and
 	 * 5. It is not a BIO_DELETE.
 	 */
 	if (g_stripe_fast && bp->bio_length <= maxphys &&
 	    bp->bio_length >= stripesize * sc->sc_ndisks &&
 	    (bp->bio_flags & BIO_UNMAPPED) == 0 &&
 	    bp->bio_cmd != BIO_DELETE) {
 		fast = 1;
 	}
 	error = 0;
 	if (fast) {
 		error = g_stripe_start_fast(bp, no, offset, length);
 		if (error != 0)
 			g_stripe_fast_failed++;
 	}
 	/*
 	 * Do use "economic" when:
 	 * 1. "Economic" mode is ON.
 	 * or
 	 * 2. "Fast" mode failed. It can only fail if there is no memory.
 	 */
 	if (!fast || error != 0)
 		error = g_stripe_start_economic(bp, no, offset, length);
 	if (error != 0) {
 		if (bp->bio_error == 0)
 			bp->bio_error = error;
 		g_io_deliver(bp, bp->bio_error);
 	}
 }
 
 static void
 g_stripe_check_and_run(struct g_stripe_softc *sc)
 {
 	struct g_provider *dp;
 	off_t mediasize, ms;
 	u_int no, sectorsize = 0;
 
 	g_topology_assert();
 	if (g_stripe_nvalid(sc) != sc->sc_ndisks)
 		return;
 
 	sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s",
 	    sc->sc_name);
 	sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
 	if (g_stripe_fast == 0)
 		sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED;
 	/*
 	 * Find the smallest disk.
 	 */
 	mediasize = sc->sc_disks[0]->provider->mediasize;
 	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
 		mediasize -= sc->sc_disks[0]->provider->sectorsize;
 	mediasize -= mediasize % sc->sc_stripesize;
 	sectorsize = sc->sc_disks[0]->provider->sectorsize;
 	for (no = 1; no < sc->sc_ndisks; no++) {
 		dp = sc->sc_disks[no]->provider;
 		ms = dp->mediasize;
 		if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
 			ms -= dp->sectorsize;
 		ms -= ms % sc->sc_stripesize;
 		if (ms < mediasize)
 			mediasize = ms;
 		sectorsize = lcm(sectorsize, dp->sectorsize);
 
 		/* A provider underneath us doesn't support unmapped */
 		if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
 			G_STRIPE_DEBUG(1, "Cancelling unmapped "
 			    "because of %s.", dp->name);
 			sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED;
 		}
 	}
 	sc->sc_provider->sectorsize = sectorsize;
 	sc->sc_provider->mediasize = mediasize * sc->sc_ndisks;
 	sc->sc_provider->stripesize = sc->sc_stripesize;
 	sc->sc_provider->stripeoffset = 0;
 	g_error_provider(sc->sc_provider, 0);
 
 	G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name);
 }
 
 static int
 g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	/* Decode metadata. */
 	stripe_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /*
  * Add disk to given device.
  */
 static int
 g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no)
 {
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	/* Metadata corrupted? */
 	if (no >= sc->sc_ndisks)
 		return (EINVAL);
 
 	/* Check if disk is not already attached. */
 	if (sc->sc_disks[no] != NULL)
 		return (EEXIST);
 
 	gp = sc->sc_geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	cp->private = NULL;
 	cp->index = no;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
 		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 		if (error != 0) {
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			return (error);
 		}
 	}
 	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) {
 		struct g_stripe_metadata md;
 
 		/* Reread metadata. */
 		error = g_stripe_read_metadata(cp, &md);
 		if (error != 0)
 			goto fail;
 
 		if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 ||
 		    strcmp(md.md_name, sc->sc_name) != 0 ||
 		    md.md_id != sc->sc_id) {
 			G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name);
 			goto fail;
 		}
 	}
 
 	sc->sc_disks[no] = cp;
 	G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
 	g_stripe_check_and_run(sc);
 
 	return (0);
 fail:
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
 		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	return (error);
 }
 
 static struct g_geom *
 g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
     u_int type)
 {
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 	u_int no;
 
 	g_topology_assert();
 	G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
 	    md->md_id);
 
 	/* Two disks is minimum. */
 	if (md->md_all < 2) {
 		G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name);
 		return (NULL);
 	}
 #if 0
 	/* Stripe size have to be grater than or equal to sector size. */
 	if (md->md_stripesize < sectorsize) {
 		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
 		return (NULL);
 	}
 #endif
 	/* Stripe size have to be power of 2. */
 	if (!powerof2(md->md_stripesize)) {
 		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
 		return (NULL);
 	}
 
 	/* Check for duplicate unit */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
 			G_STRIPE_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO);
 	gp->start = g_stripe_start;
 	gp->spoiled = g_stripe_orphan;
 	gp->orphan = g_stripe_orphan;
 	gp->access = g_stripe_access;
 	gp->dumpconf = g_stripe_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_stripesize = md->md_stripesize;
 	sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1);
 	sc->sc_ndisks = md->md_all;
 	sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
 	    M_STRIPE, M_WAITOK | M_ZERO);
 	for (no = 0; no < sc->sc_ndisks; no++)
 		sc->sc_disks[no] = NULL;
 	sc->sc_type = type;
 	mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF);
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 
 	G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
 
 	return (gp);
 }
 
 static int
 g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp, *cp1;
 	struct g_geom *gp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_STRIPE_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_STRIPE_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	gp = sc->sc_geom;
 	LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
 		g_stripe_remove_disk(cp);
 		if (cp1 == NULL)
 			return (0);	/* Recursion happened. */
 	}
 	if (!LIST_EMPTY(&gp->consumer))
 		return (EINPROGRESS);
 
 	gp->softc = NULL;
 	KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
 	    gp->name));
 	free(sc->sc_disks, M_STRIPE);
 	mtx_destroy(&sc->sc_lock);
 	free(sc, M_STRIPE);
 	G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static int
 g_stripe_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_stripe_softc *sc;
 
 	sc = gp->softc;
 	return (g_stripe_destroy(sc, 0));
 }
 
 static struct g_geom *
 g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_stripe_metadata md;
 	struct g_stripe_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	/* Skip providers that are already open for writing. */
 	if (pp->acw > 0)
 		return (NULL);
 
 	G_STRIPE_DEBUG(3, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "stripe:taste");
 	gp->start = g_stripe_start;
 	gp->access = g_stripe_access;
 	gp->orphan = g_stripe_orphan;
 	cp = g_new_consumer(gp);
+	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error == 0) {
 		error = g_stripe_read_metadata(cp, &md);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_STRIPE_VERSION) {
 		printf("geom_stripe.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	/*
 	 * Backward compatibility:
 	 */
 	/* There was no md_provider field in earlier versions of metadata. */
 	if (md.md_version < 2)
 		bzero(md.md_provider, sizeof(md.md_provider));
 	/* There was no md_provsize field in earlier versions of metadata. */
 	if (md.md_version < 3)
 		md.md_provsize = pp->mediasize;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id)
 			continue;
 		break;
 	}
 	if (gp != NULL) {
 		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_stripe_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_STRIPE_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			return (NULL);
 		}
 	} else {
 		gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC);
 		if (gp == NULL) {
 			G_STRIPE_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_stripe_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_STRIPE_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			g_stripe_destroy(sc, 1);
 			return (NULL);
 		}
 	}
 
 	return (gp);
 }
 
 static void
 g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	u_int attached, no;
 	struct g_stripe_metadata md;
 	struct g_provider *pp;
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 	struct sbuf *sb;
 	off_t *stripesize;
 	const char *name;
 	char param[16];
 	int *nargs;
 
 	g_topology_assert();
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 2) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 
 	strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_STRIPE_VERSION;
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 	md.md_id = arc4random();
 	md.md_no = 0;
 	md.md_all = *nargs - 1;
 	stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
 	if (stripesize == NULL) {
 		gctl_error(req, "No '%s' argument.", "stripesize");
 		return;
 	}
 	md.md_stripesize = (uint32_t)*stripesize;
 	bzero(md.md_provider, sizeof(md.md_provider));
 	/* This field is not important here. */
 	md.md_provsize = 0;
 
 	/* Check all providers are valid */
 	for (no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		pp = gctl_get_provider(req, param);
 		if (pp == NULL)
 			return;
 	}
 
 	gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL);
 	if (gp == NULL) {
 		gctl_error(req, "Can't configure %s.", md.md_name);
 		return;
 	}
 
 	sc = gp->softc;
 	sb = sbuf_new_auto();
 	sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
 	for (attached = 0, no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		pp  = gctl_get_provider(req, param);
 		if (pp == NULL) {
 			name = gctl_get_asciiparam(req, param);
 			MPASS(name != NULL);
 			sbuf_printf(sb, " %s", name);
 			continue;
 		}
 		if (g_stripe_add_disk(sc, pp, no - 1) != 0) {
 			G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.",
 			    no, pp->name, gp->name);
 			sbuf_printf(sb, " %s", pp->name);
 			continue;
 		}
 		attached++;
 	}
 	sbuf_finish(sb);
 	if (md.md_all != attached) {
 		g_stripe_destroy(gp->softc, 1);
 		gctl_error(req, "%s", sbuf_data(sb));
 	}
 	sbuf_delete(sb);
 }
 
 static struct g_stripe_softc *
 g_stripe_find_device(struct g_class *mp, const char *name)
 {
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(sc->sc_name, name) == 0)
 			return (sc);
 	}
 	return (NULL);
 }
 
 static void
 g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_stripe_softc *sc;
 	int *force, *nargs, error;
 	const char *name;
 	char param[16];
 	u_int i;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No '%s' argument.", "force");
 		return;
 	}
 
 	for (i = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			return;
 		}
 		sc = g_stripe_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		error = g_stripe_destroy(sc, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_STRIPE_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_stripe_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0 ||
 	    strcmp(verb, "stop") == 0) {
 		g_stripe_ctl_destroy(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_stripe_softc *sc;
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
 		    (u_int)cp->index);
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<Stripesize>%ju</Stripesize>\n", indent,
 		    (uintmax_t)sc->sc_stripesize);
 		sbuf_printf(sb, "%s<Type>", indent);
 		switch (sc->sc_type) {
 		case G_STRIPE_TYPE_AUTOMATIC:
 			sbuf_cat(sb, "AUTOMATIC");
 			break;
 		case G_STRIPE_TYPE_MANUAL:
 			sbuf_cat(sb, "MANUAL");
 			break;
 		default:
 			sbuf_cat(sb, "UNKNOWN");
 			break;
 		}
 		sbuf_cat(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
 		    indent, sc->sc_ndisks, g_stripe_nvalid(sc));
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
 			sbuf_cat(sb, "UP");
 		else
 			sbuf_cat(sb, "DOWN");
 		sbuf_cat(sb, "</State>\n");
 	}
 }
 
 DECLARE_GEOM_CLASS(g_stripe_class, g_stripe);
 MODULE_VERSION(geom_stripe, 0);
diff --git a/sys/geom/vinum/geom_vinum.c b/sys/geom/vinum/geom_vinum.c
index 0c60a051619f..86f5c9f08e1f 100644
--- a/sys/geom/vinum/geom_vinum.c
+++ b/sys/geom/vinum/geom_vinum.c
@@ -1,1049 +1,1050 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  *  Copyright (c) 2004, 2007 Lukas Ertl
  *  Copyright (c) 2007, 2009 Ulf Lilleengen
  *  All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
 #include <geom/geom_dbg.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum.h>
 #include <geom/vinum/geom_vinum_raid5.h>
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, vinum, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_VINUM stuff");
 u_int g_vinum_debug = 0;
 SYSCTL_UINT(_kern_geom_vinum, OID_AUTO, debug, CTLFLAG_RWTUN, &g_vinum_debug, 0,
     "Debug level");
 
 static int	gv_create(struct g_geom *, struct gctl_req *);
 static void	gv_attach(struct gv_softc *, struct gctl_req *);
 static void	gv_detach(struct gv_softc *, struct gctl_req *);
 static void	gv_parityop(struct gv_softc *, struct gctl_req *);
 
 static void
 gv_orphan(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 	struct gv_drive *d;
 
 	g_topology_assert();
 
 	KASSERT(cp != NULL, ("gv_orphan: null cp"));
 	gp = cp->geom;
 	KASSERT(gp != NULL, ("gv_orphan: null gp"));
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("gv_orphan: null sc"));
 	d = cp->private;
 	KASSERT(d != NULL, ("gv_orphan: null d"));
 
 	g_trace(G_T_TOPOLOGY, "gv_orphan(%s)", gp->name);
 
 	gv_post_event(sc, GV_EVENT_DRIVE_LOST, d, NULL, 0, 0);
 }
 
 void
 gv_start(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_GETATTR:
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	mtx_lock(&sc->bqueue_mtx);
 	bioq_disksort(sc->bqueue_down, bp);
 	wakeup(sc);
 	mtx_unlock(&sc->bqueue_mtx);
 }
 
 void
 gv_done(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 
 	KASSERT(bp != NULL, ("NULL bp"));
 
 	gp = bp->bio_from->geom;
 	sc = gp->softc;
 
 	mtx_lock(&sc->bqueue_mtx);
 	bioq_disksort(sc->bqueue_up, bp);
 	wakeup(sc);
 	mtx_unlock(&sc->bqueue_mtx);
 }
 
 int
 gv_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 	struct gv_drive *d, *d2;
 	int error;
 
 	gp = pp->geom;
 	sc = gp->softc;
 	/*
 	 * We want to modify the read count with the write count in case we have
 	 * plexes in a RAID-5 organization.
 	 */
 	dr += dw;
 
 	LIST_FOREACH(d, &sc->drives, drive) {
 		if (d->consumer == NULL)
 			continue;
 		error = g_access(d->consumer, dr, dw, de);
 		if (error) {
 			LIST_FOREACH(d2, &sc->drives, drive) {
 				if (d == d2)
 					break;
 				g_access(d2->consumer, -dr, -dw, -de);
 			}
 			G_VINUM_DEBUG(0, "g_access '%s' failed: %d", d->name,
 			    error);
 			return (error);
 		}
 	}
 	return (0);
 }
 
 static void
 gv_init(struct g_class *mp)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 
 	g_trace(G_T_TOPOLOGY, "gv_init(%p)", mp);
 
 	gp = g_new_geomf(mp, "VINUM");
 	gp->spoiled = gv_orphan;
 	gp->orphan = gv_orphan;
 	gp->access = gv_access;
 	gp->start = gv_start;
 	gp->softc = g_malloc(sizeof(struct gv_softc), M_WAITOK | M_ZERO);
 	sc = gp->softc;
 	sc->geom = gp;
 	sc->bqueue_down = g_malloc(sizeof(struct bio_queue_head),
 	    M_WAITOK | M_ZERO);
 	sc->bqueue_up = g_malloc(sizeof(struct bio_queue_head),
 	    M_WAITOK | M_ZERO);
 	bioq_init(sc->bqueue_down);
 	bioq_init(sc->bqueue_up);
 	LIST_INIT(&sc->drives);
 	LIST_INIT(&sc->subdisks);
 	LIST_INIT(&sc->plexes);
 	LIST_INIT(&sc->volumes);
 	TAILQ_INIT(&sc->equeue);
 	mtx_init(&sc->config_mtx, "gv_config", NULL, MTX_DEF);
 	mtx_init(&sc->equeue_mtx, "gv_equeue", NULL, MTX_DEF);
 	mtx_init(&sc->bqueue_mtx, "gv_bqueue", NULL, MTX_DEF);
 	kproc_create(gv_worker, sc, &sc->worker, 0, 0, "gv_worker");
 }
 
 static int
 gv_unload(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 	struct gv_softc *sc;
 
 	g_trace(G_T_TOPOLOGY, "gv_unload(%p)", mp);
 
 	g_topology_assert();
 	sc = gp->softc;
 
 	if (sc != NULL) {
 		gv_worker_exit(sc);
 		gp->softc = NULL;
 		g_wither_geom(gp, ENXIO);
 	}
 
 	return (0);
 }
 
 /* Handle userland request of attaching object. */
 static void
 gv_attach(struct gv_softc *sc, struct gctl_req *req)
 {
 	struct gv_volume *v;
 	struct gv_plex *p;
 	struct gv_sd *s;
 	off_t *offset;
 	int *rename, type_child, type_parent;
 	char *child, *parent;
 
 	child = gctl_get_param(req, "child", NULL);
 	if (child == NULL) {
 		gctl_error(req, "no child given");
 		return;
 	}
 	parent = gctl_get_param(req, "parent", NULL);
 	if (parent == NULL) {
 		gctl_error(req, "no parent given");
 		return;
 	}
 	offset = gctl_get_paraml(req, "offset", sizeof(*offset));
 	if (offset == NULL) {
 		gctl_error(req, "no offset given");
 		return;
 	}
 	rename = gctl_get_paraml(req, "rename", sizeof(*rename));
 	if (rename == NULL) {
 		gctl_error(req, "no rename flag given");
 		return;
 	}
 
 	type_child = gv_object_type(sc, child);
 	type_parent = gv_object_type(sc, parent);
 
 	switch (type_child) {
 	case GV_TYPE_PLEX:
 		if (type_parent != GV_TYPE_VOL) {
 			gctl_error(req, "no such volume to attach to");
 			return;
 		}
 		v = gv_find_vol(sc, parent);
 		p = gv_find_plex(sc, child);
 		gv_post_event(sc, GV_EVENT_ATTACH_PLEX, p, v, *offset, *rename);
 		break;
 	case GV_TYPE_SD:
 		if (type_parent != GV_TYPE_PLEX) {
 			gctl_error(req, "no such plex to attach to");
 			return;
 		}
 		p = gv_find_plex(sc, parent);
 		s = gv_find_sd(sc, child);
 		gv_post_event(sc, GV_EVENT_ATTACH_SD, s, p, *offset, *rename);
 		break;
 	default:
 		gctl_error(req, "invalid child type");
 		break;
 	}
 }
 
 /* Handle userland request of detaching object. */
 static void
 gv_detach(struct gv_softc *sc, struct gctl_req *req)
 {
 	struct gv_plex *p;
 	struct gv_sd *s;
 	int *flags, type;
 	char *object;
 
 	object = gctl_get_param(req, "object", NULL);
 	if (object == NULL) {
 		gctl_error(req, "no argument given");
 		return;
 	}
 
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	type = gv_object_type(sc, object);
 	switch (type) {
 	case GV_TYPE_PLEX:
 		p = gv_find_plex(sc, object);
 		gv_post_event(sc, GV_EVENT_DETACH_PLEX, p, NULL, *flags, 0);
 		break;
 	case GV_TYPE_SD:
 		s = gv_find_sd(sc, object);
 		gv_post_event(sc, GV_EVENT_DETACH_SD, s, NULL, *flags, 0);
 		break;
 	default:
 		gctl_error(req, "invalid object type");
 		break;
 	}
 }
 
 /* Handle userland requests for creating new objects. */
 static int
 gv_create(struct g_geom *gp, struct gctl_req *req)
 {
 	struct gv_softc *sc;
 	struct gv_drive *d, *d2;
 	struct gv_plex *p, *p2;
 	struct gv_sd *s, *s2;
 	struct gv_volume *v, *v2;
 	struct g_provider *pp;
 	int error, i, *drives, *flags, *plexes, *subdisks, *volumes;
 	char buf[20];
 
 	g_topology_assert();
 
 	sc = gp->softc;
 
 	/* Find out how many of each object have been passed in. */
 	volumes = gctl_get_paraml(req, "volumes", sizeof(*volumes));
 	plexes = gctl_get_paraml(req, "plexes", sizeof(*plexes));
 	subdisks = gctl_get_paraml(req, "subdisks", sizeof(*subdisks));
 	drives = gctl_get_paraml(req, "drives", sizeof(*drives));
 	if (volumes == NULL || plexes == NULL || subdisks == NULL ||
 	    drives == NULL) {
 		gctl_error(req, "number of objects not given");
 		return (-1);
 	}
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	if (flags == NULL) {
 		gctl_error(req, "flags not given");
 		return (-1);
 	}
 
 	/* First, handle drive definitions ... */
 	for (i = 0; i < *drives; i++) {
 		snprintf(buf, sizeof(buf), "drive%d", i);
 		d2 = gctl_get_paraml(req, buf, sizeof(*d2));
 		if (d2 == NULL) {
 			gctl_error(req, "no drive definition given");
 			return (-1);
 		}
 		/*
 		 * Make sure that the device specified in the drive config is
 		 * an active GEOM provider.
 		 */
 		pp = g_provider_by_name(d2->device);
 		if (pp == NULL) {
 			gctl_error(req, "%s: device not found", d2->device);
 			goto error;
 		}
 		if (gv_find_drive(sc, d2->name) != NULL) {
 			/* Ignore error. */
 			if (*flags & GV_FLAG_F)
 				continue;
 			gctl_error(req, "drive '%s' already exists", d2->name);
 			goto error;
 		}
 		if (gv_find_drive_device(sc, d2->device) != NULL) {
 			gctl_error(req, "device '%s' already configured in "
 			    "gvinum", d2->device);
 			goto error;
 		}
 
 		d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
 		bcopy(d2, d, sizeof(*d));
 
 		gv_post_event(sc, GV_EVENT_CREATE_DRIVE, d, NULL, 0, 0);
 	}
 
 	/* ... then volume definitions ... */
 	for (i = 0; i < *volumes; i++) {
 		error = 0;
 		snprintf(buf, sizeof(buf), "volume%d", i);
 		v2 = gctl_get_paraml(req, buf, sizeof(*v2));
 		if (v2 == NULL) {
 			gctl_error(req, "no volume definition given");
 			return (-1);
 		}
 		if (gv_find_vol(sc, v2->name) != NULL) {
 			/* Ignore error. */
 			if (*flags & GV_FLAG_F)
 				continue;
 			gctl_error(req, "volume '%s' already exists", v2->name);
 			goto error;
 		}
 
 		v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO);
 		bcopy(v2, v, sizeof(*v));
 
 		gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0);
 	}
 
 	/* ... then plex definitions ... */
 	for (i = 0; i < *plexes; i++) {
 		error = 0;
 		snprintf(buf, sizeof(buf), "plex%d", i);
 		p2 = gctl_get_paraml(req, buf, sizeof(*p2));
 		if (p2 == NULL) {
 			gctl_error(req, "no plex definition given");
 			return (-1);
 		}
 		if (gv_find_plex(sc, p2->name) != NULL) {
 			/* Ignore error. */
 			if (*flags & GV_FLAG_F)
 				continue;
 			gctl_error(req, "plex '%s' already exists", p2->name);
 			goto error;
 		}
 
 		p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO);
 		bcopy(p2, p, sizeof(*p));
 
 		gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0);
 	}
 
 	/* ... and, finally, subdisk definitions. */
 	for (i = 0; i < *subdisks; i++) {
 		error = 0;
 		snprintf(buf, sizeof(buf), "sd%d", i);
 		s2 = gctl_get_paraml(req, buf, sizeof(*s2));
 		if (s2 == NULL) {
 			gctl_error(req, "no subdisk definition given");
 			return (-1);
 		}
 		if (gv_find_sd(sc, s2->name) != NULL) {
 			/* Ignore error. */
 			if (*flags & GV_FLAG_F)
 				continue;
 			gctl_error(req, "sd '%s' already exists", s2->name);
 			goto error;
 		}
 
 		s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO);
 		bcopy(s2, s, sizeof(*s));
 
 		gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0);
 	}
 
 error:
 	gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0);
 	gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 
 	return (0);
 }
 
 static void
 gv_config(struct gctl_req *req, struct g_class *mp, char const *verb)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 	struct sbuf *sb;
 	char *comment;
 
 	g_topology_assert();
 
 	gp = LIST_FIRST(&mp->geom);
 	sc = gp->softc;
 
 	if (!strcmp(verb, "attach")) {
 		gv_attach(sc, req);
 
 	} else if (!strcmp(verb, "concat")) {
 		gv_concat(gp, req);
 
 	} else if (!strcmp(verb, "detach")) {
 		gv_detach(sc, req);
 
 	} else if (!strcmp(verb, "list")) {
 		gv_list(gp, req);
 
 	/* Save our configuration back to disk. */
 	} else if (!strcmp(verb, "saveconfig")) {
 		gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 
 	/* Return configuration in string form. */
 	} else if (!strcmp(verb, "getconfig")) {
 		comment = gctl_get_param(req, "comment", NULL);
 		if (comment == NULL) {
 			gctl_error(req, "no comment parameter given");
 			return;
 		}
 		sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
 		gv_format_config(sc, sb, 0, comment);
 		sbuf_finish(sb);
 		gctl_set_param(req, "config", sbuf_data(sb), sbuf_len(sb) + 1);
 		sbuf_delete(sb);
 
 	} else if (!strcmp(verb, "create")) {
 		gv_create(gp, req);
 
 	} else if (!strcmp(verb, "mirror")) {
 		gv_mirror(gp, req);
 
 	} else if (!strcmp(verb, "move")) {
 		gv_move(gp, req);
 
 	} else if (!strcmp(verb, "raid5")) {
 		gv_raid5(gp, req);
 
 	} else if (!strcmp(verb, "rebuildparity") ||
 	    !strcmp(verb, "checkparity")) {
 		gv_parityop(sc, req);
 
 	} else if (!strcmp(verb, "remove")) {
 		gv_remove(gp, req);
 
 	} else if (!strcmp(verb, "rename")) {
 		gv_rename(gp, req);
 
 	} else if (!strcmp(verb, "resetconfig")) {
 		gv_post_event(sc, GV_EVENT_RESET_CONFIG, sc, NULL, 0, 0);
 
 	} else if (!strcmp(verb, "start")) {
 		gv_start_obj(gp, req);
 
 	} else if (!strcmp(verb, "stripe")) {
 		gv_stripe(gp, req);
 
 	} else if (!strcmp(verb, "setstate")) {
 		gv_setstate(gp, req);
 	} else
 		gctl_error(req, "Unknown verb parameter");
 }
 
 static void
 gv_parityop(struct gv_softc *sc, struct gctl_req *req)
 {
 	struct gv_plex *p;
 	int *flags, *rebuild, type;
 	char *plex;
 
 	plex = gctl_get_param(req, "plex", NULL);
 	if (plex == NULL) {
 		gctl_error(req, "no plex given");
 		return;
 	}
 
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	if (flags == NULL) {
 		gctl_error(req, "no flags given");
 		return;
 	}
 
 	rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild));
 	if (rebuild == NULL) {
 		gctl_error(req, "no operation given");
 		return;
 	}
 
 	type = gv_object_type(sc, plex);
 	if (type != GV_TYPE_PLEX) {
 		gctl_error(req, "'%s' is not a plex", plex);
 		return;
 	}
 	p = gv_find_plex(sc, plex);
 
 	if (p->state != GV_PLEX_UP) {
 		gctl_error(req, "plex %s is not completely accessible",
 		    p->name);
 		return;
 	}
 
 	if (p->org != GV_PLEX_RAID5) {
 		gctl_error(req, "plex %s is not a RAID5 plex", p->name);
 		return;
 	}
 
 	/* Put it in the event queue. */
 	/* XXX: The state of the plex might have changed when this event is
 	 * picked up ... We should perhaps check this afterwards. */
 	if (*rebuild)
 		gv_post_event(sc, GV_EVENT_PARITY_REBUILD, p, NULL, 0, 0);
 	else
 		gv_post_event(sc, GV_EVENT_PARITY_CHECK, p, NULL, 0, 0);
 }
 
 static struct g_geom *
 gv_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct gv_softc *sc;
 	struct gv_hdr vhdr;
 	int error;
 
  	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "gv_taste(%s, %s)", mp->name, pp->name);
 
 	gp = LIST_FIRST(&mp->geom);
 	if (gp == NULL) {
 		G_VINUM_DEBUG(0, "error: tasting, but not initialized?");
 		return (NULL);
 	}
 	sc = gp->softc;
 
 	cp = g_new_consumer(gp);
+	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	if (g_attach(cp, pp) != 0) {
 		g_destroy_consumer(cp);
 		return (NULL);
 	}
 	if (g_access(cp, 1, 0, 0) != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		return (NULL);
 	}
 	g_topology_unlock();
 
 	error = gv_read_header(cp, &vhdr);
 
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 
 	/* Check if what we've been given is a valid vinum drive. */
 	if (!error)
 		gv_post_event(sc, GV_EVENT_DRIVE_TASTED, pp, NULL, 0, 0);
 
 	return (NULL);
 }
 
 void
 gv_worker(void *arg)
 {
 	struct g_provider *pp;
 	struct gv_softc *sc;
 	struct gv_event *ev;
 	struct gv_volume *v;
 	struct gv_plex *p;
 	struct gv_sd *s;
 	struct gv_drive *d;
 	struct bio *bp;
 	int newstate, flags, err, rename;
 	char *newname;
 	off_t offset;
 
 	sc = arg;
 	KASSERT(sc != NULL, ("NULL sc"));
 	for (;;) {
 		/* Look at the events first... */
 		ev = gv_get_event(sc);
 		if (ev != NULL) {
 			gv_remove_event(sc, ev);
 
 			switch (ev->type) {
 			case GV_EVENT_DRIVE_TASTED:
 				G_VINUM_DEBUG(2, "event 'drive tasted'");
 				pp = ev->arg1;
 				gv_drive_tasted(sc, pp);
 				break;
 
 			case GV_EVENT_DRIVE_LOST:
 				G_VINUM_DEBUG(2, "event 'drive lost'");
 				d = ev->arg1;
 				gv_drive_lost(sc, d);
 				break;
 
 			case GV_EVENT_CREATE_DRIVE:
 				G_VINUM_DEBUG(2, "event 'create drive'");
 				d = ev->arg1;
 				gv_create_drive(sc, d);
 				break;
 
 			case GV_EVENT_CREATE_VOLUME:
 				G_VINUM_DEBUG(2, "event 'create volume'");
 				v = ev->arg1;
 				gv_create_volume(sc, v);
 				break;
 
 			case GV_EVENT_CREATE_PLEX:
 				G_VINUM_DEBUG(2, "event 'create plex'");
 				p = ev->arg1;
 				gv_create_plex(sc, p);
 				break;
 
 			case GV_EVENT_CREATE_SD:
 				G_VINUM_DEBUG(2, "event 'create sd'");
 				s = ev->arg1;
 				gv_create_sd(sc, s);
 				break;
 
 			case GV_EVENT_RM_DRIVE:
 				G_VINUM_DEBUG(2, "event 'remove drive'");
 				d = ev->arg1;
 				flags = ev->arg3;
 				gv_rm_drive(sc, d, flags);
 				/*gv_setup_objects(sc);*/
 				break;
 
 			case GV_EVENT_RM_VOLUME:
 				G_VINUM_DEBUG(2, "event 'remove volume'");
 				v = ev->arg1;
 				gv_rm_vol(sc, v);
 				/*gv_setup_objects(sc);*/
 				break;
 
 			case GV_EVENT_RM_PLEX:
 				G_VINUM_DEBUG(2, "event 'remove plex'");
 				p = ev->arg1;
 				gv_rm_plex(sc, p);
 				/*gv_setup_objects(sc);*/
 				break;
 
 			case GV_EVENT_RM_SD:
 				G_VINUM_DEBUG(2, "event 'remove sd'");
 				s = ev->arg1;
 				gv_rm_sd(sc, s);
 				/*gv_setup_objects(sc);*/
 				break;
 
 			case GV_EVENT_SAVE_CONFIG:
 				G_VINUM_DEBUG(2, "event 'save config'");
 				gv_save_config(sc);
 				break;
 
 			case GV_EVENT_SET_SD_STATE:
 				G_VINUM_DEBUG(2, "event 'setstate sd'");
 				s = ev->arg1;
 				newstate = ev->arg3;
 				flags = ev->arg4;
 				err = gv_set_sd_state(s, newstate, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error setting subdisk"
 					    " state: error code %d", err);
 				break;
 
 			case GV_EVENT_SET_DRIVE_STATE:
 				G_VINUM_DEBUG(2, "event 'setstate drive'");
 				d = ev->arg1;
 				newstate = ev->arg3;
 				flags = ev->arg4;
 				err = gv_set_drive_state(d, newstate, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error setting drive "
 					    "state: error code %d", err);
 				break;
 
 			case GV_EVENT_SET_VOL_STATE:
 				G_VINUM_DEBUG(2, "event 'setstate volume'");
 				v = ev->arg1;
 				newstate = ev->arg3;
 				flags = ev->arg4;
 				err = gv_set_vol_state(v, newstate, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error setting volume "
 					    "state: error code %d", err);
 				break;
 
 			case GV_EVENT_SET_PLEX_STATE:
 				G_VINUM_DEBUG(2, "event 'setstate plex'");
 				p = ev->arg1;
 				newstate = ev->arg3;
 				flags = ev->arg4;
 				err = gv_set_plex_state(p, newstate, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error setting plex "
 					    "state: error code %d", err);
 				break;
 
 			case GV_EVENT_SETUP_OBJECTS:
 				G_VINUM_DEBUG(2, "event 'setup objects'");
 				gv_setup_objects(sc);
 				break;
 
 			case GV_EVENT_RESET_CONFIG:
 				G_VINUM_DEBUG(2, "event 'resetconfig'");
 				err = gv_resetconfig(sc);
 				if (err)
 					G_VINUM_DEBUG(0, "error resetting "
 					    "config: error code %d", err);
 				break;
 
 			case GV_EVENT_PARITY_REBUILD:
 				/*
 				 * Start the rebuild. The gv_plex_done will
 				 * handle issuing of the remaining rebuild bio's
 				 * until it's finished. 
 				 */
 				G_VINUM_DEBUG(2, "event 'rebuild'");
 				p = ev->arg1;
 				if (p->state != GV_PLEX_UP) {
 					G_VINUM_DEBUG(0, "plex %s is not "
 					    "completely accessible", p->name);
 					break;
 				}
 				if (p->flags & GV_PLEX_SYNCING ||
 				    p->flags & GV_PLEX_REBUILDING ||
 				    p->flags & GV_PLEX_GROWING) {
 					G_VINUM_DEBUG(0, "plex %s is busy with "
 					    "syncing or parity build", p->name);
 					break;
 				}
 				p->synced = 0;
 				p->flags |= GV_PLEX_REBUILDING;
 				g_topology_assert_not();
 				g_topology_lock();
 				err = gv_access(p->vol_sc->provider, 1, 1, 0);
 				if (err) {
 					G_VINUM_DEBUG(0, "unable to access "
 					    "provider");
 					break;
 				}
 				g_topology_unlock();
 				gv_parity_request(p, GV_BIO_CHECK |
 				    GV_BIO_PARITY, 0);
 				break;
 
 			case GV_EVENT_PARITY_CHECK:
 				/* Start parity check. */
 				G_VINUM_DEBUG(2, "event 'check'");
 				p = ev->arg1;
 				if (p->state != GV_PLEX_UP) {
 					G_VINUM_DEBUG(0, "plex %s is not "
 					    "completely accessible", p->name);
 					break;
 				}
 				if (p->flags & GV_PLEX_SYNCING ||
 				    p->flags & GV_PLEX_REBUILDING ||
 				    p->flags & GV_PLEX_GROWING) {
 					G_VINUM_DEBUG(0, "plex %s is busy with "
 					    "syncing or parity build", p->name);
 					break;
 				}
 				p->synced = 0;
 				g_topology_assert_not();
 				g_topology_lock();
 				err = gv_access(p->vol_sc->provider, 1, 1, 0);
 				if (err) {
 					G_VINUM_DEBUG(0, "unable to access "
 					    "provider");
 					break;
 				}
 				g_topology_unlock();
 				gv_parity_request(p, GV_BIO_CHECK, 0);
 				break;
 
 			case GV_EVENT_START_PLEX:
 				G_VINUM_DEBUG(2, "event 'start' plex");
 				p = ev->arg1;
 				gv_start_plex(p);
 				break;
 
 			case GV_EVENT_START_VOLUME:
 				G_VINUM_DEBUG(2, "event 'start' volume");
 				v = ev->arg1;
 				gv_start_vol(v);
 				break;
 
 			case GV_EVENT_ATTACH_PLEX:
 				G_VINUM_DEBUG(2, "event 'attach' plex");
 				p = ev->arg1;
 				v = ev->arg2;
 				rename = ev->arg4;
 				err = gv_attach_plex(p, v, rename);
 				if (err)
 					G_VINUM_DEBUG(0, "error attaching %s to"
 					    " %s: error code %d", p->name,
 					    v->name, err);
 				break;
 
 			case GV_EVENT_ATTACH_SD:
 				G_VINUM_DEBUG(2, "event 'attach' sd");
 				s = ev->arg1;
 				p = ev->arg2;
 				offset = ev->arg3;
 				rename = ev->arg4;
 				err = gv_attach_sd(s, p, offset, rename);
 				if (err)
 					G_VINUM_DEBUG(0, "error attaching %s to"
 					    " %s: error code %d", s->name,
 					    p->name, err);
 				break;
 
 			case GV_EVENT_DETACH_PLEX:
 				G_VINUM_DEBUG(2, "event 'detach' plex");
 				p = ev->arg1;
 				flags = ev->arg3;
 				err = gv_detach_plex(p, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error detaching %s: "
 					    "error code %d", p->name, err);
 				break;
 
 			case GV_EVENT_DETACH_SD:
 				G_VINUM_DEBUG(2, "event 'detach' sd");
 				s = ev->arg1;
 				flags = ev->arg3;
 				err = gv_detach_sd(s, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error detaching %s: "
 					    "error code %d", s->name, err);
 				break;
 
 			case GV_EVENT_RENAME_VOL:
 				G_VINUM_DEBUG(2, "event 'rename' volume");
 				v = ev->arg1;
 				newname = ev->arg2;
 				flags = ev->arg3;
 				err = gv_rename_vol(sc, v, newname, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error renaming %s to "
 					    "%s: error code %d", v->name,
 					    newname, err);
 				g_free(newname);
 				/* Destroy and recreate the provider if we can. */
 				if (gv_provider_is_open(v->provider)) {
 					G_VINUM_DEBUG(0, "unable to rename "
 					    "provider to %s: provider in use",
 					    v->name);
 					break;
 				}
 				g_topology_lock();
 				g_wither_provider(v->provider, ENOENT);
 				g_topology_unlock();
 				v->provider = NULL;
 				gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc,
 				    NULL, 0, 0);
 				break;
 
 			case GV_EVENT_RENAME_PLEX:
 				G_VINUM_DEBUG(2, "event 'rename' plex");
 				p = ev->arg1;
 				newname = ev->arg2;
 				flags = ev->arg3;
 				err = gv_rename_plex(sc, p, newname, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error renaming %s to "
 					    "%s: error code %d", p->name,
 					    newname, err);
 				g_free(newname);
 				break;
 
 			case GV_EVENT_RENAME_SD:
 				G_VINUM_DEBUG(2, "event 'rename' sd");
 				s = ev->arg1;
 				newname = ev->arg2;
 				flags = ev->arg3;
 				err = gv_rename_sd(sc, s, newname, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error renaming %s to "
 					    "%s: error code %d", s->name,
 					    newname, err);
 				g_free(newname);
 				break;
 
 			case GV_EVENT_RENAME_DRIVE:
 				G_VINUM_DEBUG(2, "event 'rename' drive");
 				d = ev->arg1;
 				newname = ev->arg2;
 				flags = ev->arg3;
 				err = gv_rename_drive(sc, d, newname, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error renaming %s to "
 					    "%s: error code %d", d->name,
 					    newname, err);
 				g_free(newname);
 				break;
 
 			case GV_EVENT_MOVE_SD:
 				G_VINUM_DEBUG(2, "event 'move' sd");
 				s = ev->arg1;
 				d = ev->arg2;
 				flags = ev->arg3;
 				err = gv_move_sd(sc, s, d, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error moving %s to "
 					    "%s: error code %d", s->name,
 					    d->name, err);
 				break;
 
 			case GV_EVENT_THREAD_EXIT:
 				G_VINUM_DEBUG(2, "event 'thread exit'");
 				g_free(ev);
 				mtx_lock(&sc->equeue_mtx);
 				mtx_lock(&sc->bqueue_mtx);
 				gv_cleanup(sc);
 				mtx_destroy(&sc->bqueue_mtx);
 				mtx_destroy(&sc->equeue_mtx);
 				g_free(sc->bqueue_down);
 				g_free(sc->bqueue_up);
 				g_free(sc);
 				kproc_exit(0);
 				/* NOTREACHED */
 
 			default:
 				G_VINUM_DEBUG(1, "unknown event %d", ev->type);
 			}
 
 			g_free(ev);
 			continue;
 		}
 
 		/* ... then do I/O processing. */
 		mtx_lock(&sc->bqueue_mtx);
 		/* First do new requests. */
 		bp = bioq_takefirst(sc->bqueue_down);
 		if (bp != NULL) {
 			mtx_unlock(&sc->bqueue_mtx);
 			/* A bio that interfered with another bio. */
 			if (bp->bio_pflags & GV_BIO_ONHOLD) {
 				s = bp->bio_caller1;
 				p = s->plex_sc;
 				/* Is it still locked out? */
 				if (gv_stripe_active(p, bp)) {
 					/* Park the bio on the waiting queue. */
 					bioq_disksort(p->wqueue, bp);
 				} else {
 					bp->bio_pflags &= ~GV_BIO_ONHOLD;
 					g_io_request(bp, s->drive_sc->consumer);
 				}
 			/* A special request requireing special handling. */
 			} else if (bp->bio_pflags & GV_BIO_INTERNAL) {
 				p = bp->bio_caller1;
 				gv_plex_start(p, bp);
 			} else {
 				gv_volume_start(sc, bp);
 			}
 			mtx_lock(&sc->bqueue_mtx);
 		}
 		/* Then do completed requests. */
 		bp = bioq_takefirst(sc->bqueue_up);
 		if (bp == NULL) {
 			msleep(sc, &sc->bqueue_mtx, PRIBIO, "-", hz/10);
 			mtx_unlock(&sc->bqueue_mtx);
 			continue;
 		}
 		mtx_unlock(&sc->bqueue_mtx);
 		gv_bio_done(sc, bp);
 	}
 }
 
 #define	VINUM_CLASS_NAME "VINUM"
 
 static struct g_class g_vinum_class	= {
 	.name = VINUM_CLASS_NAME,
 	.version = G_VERSION,
 	.init = gv_init,
 	.taste = gv_taste,
 	.ctlreq = gv_config,
 	.destroy_geom = gv_unload,
 };
 
 DECLARE_GEOM_CLASS(g_vinum_class, g_vinum);
 MODULE_VERSION(geom_vinum, 0);
diff --git a/sys/geom/virstor/g_virstor.c b/sys/geom/virstor/g_virstor.c
index e27d92b509d4..8e4725997ab2 100644
--- a/sys/geom/virstor/g_virstor.c
+++ b/sys/geom/virstor/g_virstor.c
@@ -1,1878 +1,1879 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006-2007 Ivan Voras <ivoras@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /* Implementation notes:
  * - "Components" are wrappers around providers that make up the
  *   virtual storage (i.e. a virstor has "physical" components)
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sx.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/time.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/mutex.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 #include <geom/geom_dbg.h>
 
 #include <geom/virstor/g_virstor.h>
 #include <geom/virstor/g_virstor_md.h>
 
 FEATURE(g_virstor, "GEOM virtual storage support");
 
 /* Declare malloc(9) label */
 static MALLOC_DEFINE(M_GVIRSTOR, "gvirstor", "GEOM_VIRSTOR Data");
 
 /* GEOM class methods */
 static g_init_t g_virstor_init;
 static g_fini_t g_virstor_fini;
 static g_taste_t g_virstor_taste;
 static g_ctl_req_t g_virstor_config;
 static g_ctl_destroy_geom_t g_virstor_destroy_geom;
 
 /* Declare & initialize class structure ("geom class") */
 struct g_class g_virstor_class = {
 	.name =		G_VIRSTOR_CLASS_NAME,
 	.version =	G_VERSION,
 	.init =		g_virstor_init,
 	.fini =		g_virstor_fini,
 	.taste =	g_virstor_taste,
 	.ctlreq =	g_virstor_config,
 	.destroy_geom = g_virstor_destroy_geom
 	/* The .dumpconf and the rest are only usable for a geom instance, so
 	 * they will be set when such instance is created. */
 };
 
 /* Declare sysctl's and loader tunables */
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, virstor,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_GVIRSTOR information");
 
 static u_int g_virstor_debug = 2; /* XXX: lower to 2 when released to public */
 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, debug, CTLFLAG_RWTUN, &g_virstor_debug,
     0, "Debug level (2=production, 5=normal, 15=excessive)");
 
 static u_int g_virstor_chunk_watermark = 100;
 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, chunk_watermark, CTLFLAG_RWTUN,
     &g_virstor_chunk_watermark, 0,
     "Minimum number of free chunks before issuing administrative warning");
 
 static u_int g_virstor_component_watermark = 1;
 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, component_watermark, CTLFLAG_RWTUN,
     &g_virstor_component_watermark, 0,
     "Minimum number of free components before issuing administrative warning");
 
 static int read_metadata(struct g_consumer *, struct g_virstor_metadata *);
 static void write_metadata(struct g_consumer *, struct g_virstor_metadata *);
 static int clear_metadata(struct g_virstor_component *);
 static int add_provider_to_geom(struct g_virstor_softc *, struct g_provider *,
     struct g_virstor_metadata *);
 static struct g_geom *create_virstor_geom(struct g_class *,
     struct g_virstor_metadata *);
 static void virstor_check_and_run(struct g_virstor_softc *);
 static u_int virstor_valid_components(struct g_virstor_softc *);
 static int virstor_geom_destroy(struct g_virstor_softc *, boolean_t,
     boolean_t);
 static void remove_component(struct g_virstor_softc *,
     struct g_virstor_component *, boolean_t);
 static void bioq_dismantle(struct bio_queue_head *);
 static int allocate_chunk(struct g_virstor_softc *,
     struct g_virstor_component **, u_int *, u_int *);
 static void delay_destroy_consumer(void *, int);
 static void dump_component(struct g_virstor_component *comp);
 #if 0
 static void dump_me(struct virstor_map_entry *me, unsigned int nr);
 #endif
 
 static void virstor_ctl_stop(struct gctl_req *, struct g_class *);
 static void virstor_ctl_add(struct gctl_req *, struct g_class *);
 static void virstor_ctl_remove(struct gctl_req *, struct g_class *);
 static struct g_virstor_softc * virstor_find_geom(const struct g_class *,
     const char *);
 static void update_metadata(struct g_virstor_softc *);
 static void fill_metadata(struct g_virstor_softc *, struct g_virstor_metadata *,
     u_int, u_int);
 
 static void g_virstor_orphan(struct g_consumer *);
 static int g_virstor_access(struct g_provider *, int, int, int);
 static void g_virstor_start(struct bio *);
 static void g_virstor_dumpconf(struct sbuf *, const char *, struct g_geom *,
     struct g_consumer *, struct g_provider *);
 static void g_virstor_done(struct bio *);
 
 static void invalid_call(void);
 /*
  * Initialise GEOM class (per-class callback)
  */
 static void
 g_virstor_init(struct g_class *mp __unused)
 {
 
 	/* Catch map struct size mismatch at compile time; Map entries must
 	 * fit into maxphys exactly, with no wasted space. */
 	MPASS(VIRSTOR_MAP_BLOCK_ENTRIES * VIRSTOR_MAP_ENTRY_SIZE == maxphys);
 
 	/* Init UMA zones, TAILQ's, other global vars */
 }
 
 /*
  * Finalise GEOM class (per-class callback)
  */
 static void
 g_virstor_fini(struct g_class *mp __unused)
 {
 
 	/* Deinit UMA zones & global vars */
 }
 
 /*
  * Config (per-class callback)
  */
 static void
 g_virstor_config(struct gctl_req *req, struct g_class *cp, char const *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "Failed to get 'version' argument");
 		return;
 	}
 	if (*version != G_VIRSTOR_VERSION) {
 		gctl_error(req, "Userland and kernel versions out of sync");
 		return;
 	}
 
 	g_topology_unlock();
 	if (strcmp(verb, "add") == 0)
 		virstor_ctl_add(req, cp);
 	else if (strcmp(verb, "stop") == 0 || strcmp(verb, "destroy") == 0)
 		virstor_ctl_stop(req, cp);
 	else if (strcmp(verb, "remove") == 0)
 		virstor_ctl_remove(req, cp);
 	else
 		gctl_error(req, "unknown verb: '%s'", verb);
 	g_topology_lock();
 }
 
 /*
  * "stop" verb from userland
  */
 static void
 virstor_ctl_stop(struct gctl_req *req, struct g_class *cp)
 {
 	int *force, *nargs;
 	int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof *nargs);
 	if (nargs == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "nargs");
 		return;
 	}
 	if (*nargs < 1) {
 		gctl_error(req, "Invalid number of arguments");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof *force);
 	if (force == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "force");
 		return;
 	}
 
 	g_topology_lock();
 	for (i = 0; i < *nargs; i++) {
 		char param[8];
 		const char *name;
 		struct g_virstor_softc *sc;
 		int error;
 
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			g_topology_unlock();
 			return;
 		}
 		sc = virstor_find_geom(cp, name);
 		if (sc == NULL) {
 			gctl_error(req, "Don't know anything about '%s'", name);
 			g_topology_unlock();
 			return;
 		}
 
 		LOG_MSG(LVL_INFO, "Stopping %s by the userland command",
 		    sc->geom->name);
 		update_metadata(sc);
 		if ((error = virstor_geom_destroy(sc, TRUE, TRUE)) != 0) {
 			LOG_MSG(LVL_ERROR, "Cannot destroy %s: %d",
 			    sc->geom->name, error);
 		}
 	}
 	g_topology_unlock();
 }
 
 /*
  * "add" verb from userland - add new component(s) to the structure.
  * This will be done all at once in here, without going through the
  * .taste function for new components.
  */
 static void
 virstor_ctl_add(struct gctl_req *req, struct g_class *cp)
 {
 	/* Note: while this is going on, I/O is being done on
 	 * the g_up and g_down threads. The idea is to make changes
 	 * to softc members in a way that can atomically activate
 	 * them all at once. */
 	struct g_virstor_softc *sc;
 	int *hardcode, *nargs;
 	const char *geom_name;	/* geom to add a component to */
 	struct g_consumer *fcp;
 	struct g_virstor_bio_q *bq;
 	u_int added;
 	int error;
 	int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Invalid number of arguments");
 		return;
 	}
 	hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode));
 	if (hardcode == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "hardcode");
 		return;
 	}
 
 	/* Find "our" geom */
 	geom_name = gctl_get_asciiparam(req, "arg0");
 	if (geom_name == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "geom_name (arg0)");
 		return;
 	}
 	sc = virstor_find_geom(cp, geom_name);
 	if (sc == NULL) {
 		gctl_error(req, "Don't know anything about '%s'", geom_name);
 		return;
 	}
 
 	if (virstor_valid_components(sc) != sc->n_components) {
 		LOG_MSG(LVL_ERROR, "Cannot add components to incomplete "
 		    "virstor %s", sc->geom->name);
 		gctl_error(req, "Virstor %s is incomplete", sc->geom->name);
 		return;
 	}
 
 	fcp = sc->components[0].gcons;
 	added = 0;
 	g_topology_lock();
 	for (i = 1; i < *nargs; i++) {
 		struct g_virstor_metadata md;
 		char aname[8];
 		struct g_provider *pp;
 		struct g_consumer *cp;
 		u_int nc;
 		u_int j;
 
 		snprintf(aname, sizeof aname, "arg%d", i);
 		pp = gctl_get_provider(req, aname);
 		if (pp == NULL) {
 			/* This is the most common error so be verbose about it */
 			if (added != 0) {
 				gctl_error(req, "Invalid provider. (added"
 				    " %u components)", added);
 				update_metadata(sc);
 			}
 			g_topology_unlock();
 			return;
 		}
 		cp = g_new_consumer(sc->geom);
 		if (cp == NULL) {
 			gctl_error(req, "Cannot create consumer");
 			g_topology_unlock();
 			return;
 		}
 		error = g_attach(cp, pp);
 		if (error != 0) {
 			gctl_error(req, "Cannot attach a consumer to %s",
 			    pp->name);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			return;
 		}
 		if (fcp->acr != 0 || fcp->acw != 0 || fcp->ace != 0) {
 			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 			if (error != 0) {
 				gctl_error(req, "Access request failed for %s",
 				    pp->name);
 				g_destroy_consumer(cp);
 				g_topology_unlock();
 				return;
 			}
 		}
 		if (fcp->provider->sectorsize != pp->sectorsize) {
 			gctl_error(req, "Sector size doesn't fit for %s",
 			    pp->name);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			return;
 		}
 		for (j = 0; j < sc->n_components; j++) {
 			if (strcmp(sc->components[j].gcons->provider->name,
 			    pp->name) == 0) {
 				gctl_error(req, "Component %s already in %s",
 				    pp->name, sc->geom->name);
 				g_destroy_consumer(cp);
 				g_topology_unlock();
 				return;
 			}
 		}
 		sc->components = realloc(sc->components,
 		    sizeof(*sc->components) * (sc->n_components + 1),
 		    M_GVIRSTOR, M_WAITOK);
 
 		nc = sc->n_components;
 		sc->components[nc].gcons = cp;
 		sc->components[nc].sc = sc;
 		sc->components[nc].index = nc;
 		sc->components[nc].chunk_count = cp->provider->mediasize /
 		    sc->chunk_size;
 		sc->components[nc].chunk_next = 0;
 		sc->components[nc].chunk_reserved = 0;
 
 		if (sc->components[nc].chunk_count < 4) {
 			gctl_error(req, "Provider too small: %s",
 			    cp->provider->name);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			return;
 		}
 		fill_metadata(sc, &md, nc, *hardcode);
 		write_metadata(cp, &md);
 		/* The new component becomes visible when n_components is
 		 * incremented */
 		sc->n_components++;
 		added++;
 	}
 	/* This call to update_metadata() is critical. In case there's a
 	 * power failure in the middle of it and some components are updated
 	 * while others are not, there will be trouble on next .taste() iff
 	 * a non-updated component is detected first */
 	update_metadata(sc);
 	g_topology_unlock();
 	LOG_MSG(LVL_INFO, "Added %d component(s) to %s", added,
 	    sc->geom->name);
 	/* Fire off BIOs previously queued because there wasn't any
 	 * physical space left. If the BIOs still can't be satisfied
 	 * they will again be added to the end of the queue (during
 	 * which the mutex will be recursed) */
 	bq = malloc(sizeof(*bq), M_GVIRSTOR, M_WAITOK);
 	bq->bio = NULL;
 	mtx_lock(&sc->delayed_bio_q_mtx);
 	/* First, insert a sentinel to the queue end, so we don't
 	 * end up in an infinite loop if there's still no free
 	 * space available. */
 	STAILQ_INSERT_TAIL(&sc->delayed_bio_q, bq, linkage);
 	while (!STAILQ_EMPTY(&sc->delayed_bio_q)) {
 		bq = STAILQ_FIRST(&sc->delayed_bio_q);
 		if (bq->bio != NULL) {
 			g_virstor_start(bq->bio);
 			STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
 			free(bq, M_GVIRSTOR);
 		} else {
 			STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
 			free(bq, M_GVIRSTOR);
 			break;
 		}
 	}
 	mtx_unlock(&sc->delayed_bio_q_mtx);
 
 }
 
 /*
  * Find a geom handled by the class
  */
 static struct g_virstor_softc *
 virstor_find_geom(const struct g_class *cp, const char *name)
 {
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &cp->geom, geom) {
 		if (strcmp(name, gp->name) == 0)
 			return (gp->softc);
 	}
 	return (NULL);
 }
 
 /*
  * Update metadata on all components to reflect the current state
  * of these fields:
  *    - chunk_next
  *    - flags
  *    - md_count
  * Expects things to be set up so write_metadata() can work, i.e.
  * the topology lock must be held.
  */
 static void
 update_metadata(struct g_virstor_softc *sc)
 {
 	struct g_virstor_metadata md;
 	u_int n;
 
 	if (virstor_valid_components(sc) != sc->n_components)
 		return; /* Incomplete device */
 	LOG_MSG(LVL_DEBUG, "Updating metadata on components for %s",
 	    sc->geom->name);
 	/* Update metadata on components */
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__,
 	    sc->geom->class->name, sc->geom->name);
 	g_topology_assert();
 	for (n = 0; n < sc->n_components; n++) {
 		read_metadata(sc->components[n].gcons, &md);
 		md.chunk_next = sc->components[n].chunk_next;
 		md.flags = sc->components[n].flags;
 		md.md_count = sc->n_components;
 		write_metadata(sc->components[n].gcons, &md);
 	}
 }
 
 /*
  * Fills metadata (struct md) from information stored in softc and the nc'th
  * component of virstor
  */
 static void
 fill_metadata(struct g_virstor_softc *sc, struct g_virstor_metadata *md,
     u_int nc, u_int hardcode)
 {
 	struct g_virstor_component *c;
 
 	bzero(md, sizeof *md);
 	c = &sc->components[nc];
 
 	strncpy(md->md_magic, G_VIRSTOR_MAGIC, sizeof md->md_magic);
 	md->md_version = G_VIRSTOR_VERSION;
 	strncpy(md->md_name, sc->geom->name, sizeof md->md_name);
 	md->md_id = sc->id;
 	md->md_virsize = sc->virsize;
 	md->md_chunk_size = sc->chunk_size;
 	md->md_count = sc->n_components;
 
 	if (hardcode) {
 		strncpy(md->provider, c->gcons->provider->name,
 		    sizeof md->provider);
 	}
 	md->no = nc;
 	md->provsize = c->gcons->provider->mediasize;
 	md->chunk_count = c->chunk_count;
 	md->chunk_next = c->chunk_next;
 	md->chunk_reserved = c->chunk_reserved;
 	md->flags = c->flags;
 }
 
 /*
  * Remove a component from virstor device.
  * Can only be done if the component is unallocated.
  */
 static void
 virstor_ctl_remove(struct gctl_req *req, struct g_class *cp)
 {
 	/* As this is executed in parallel to I/O, operations on virstor
 	 * structures must be as atomic as possible. */
 	struct g_virstor_softc *sc;
 	int *nargs;
 	const char *geom_name;
 	u_int removed;
 	int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Invalid number of arguments");
 		return;
 	}
 	/* Find "our" geom */
 	geom_name = gctl_get_asciiparam(req, "arg0");
 	if (geom_name == NULL) {
 		gctl_error(req, "Error fetching argument '%s'",
 		    "geom_name (arg0)");
 		return;
 	}
 	sc = virstor_find_geom(cp, geom_name);
 	if (sc == NULL) {
 		gctl_error(req, "Don't know anything about '%s'", geom_name);
 		return;
 	}
 
 	if (virstor_valid_components(sc) != sc->n_components) {
 		LOG_MSG(LVL_ERROR, "Cannot remove components from incomplete "
 		    "virstor %s", sc->geom->name);
 		gctl_error(req, "Virstor %s is incomplete", sc->geom->name);
 		return;
 	}
 
 	removed = 0;
 	for (i = 1; i < *nargs; i++) {
 		char param[8];
 		const char *prov_name;
 		int j, found;
 		struct g_virstor_component *newcomp, *compbak;
 
 		snprintf(param, sizeof(param), "arg%d", i);
 		prov_name = gctl_get_asciiparam(req, param);
 		if (prov_name == NULL) {
 			gctl_error(req, "Error fetching argument '%s'", param);
 			return;
 		}
 		if (strncmp(prov_name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
 			prov_name += sizeof(_PATH_DEV) - 1;
 
 		found = -1;
 		for (j = 0; j < sc->n_components; j++) {
 			if (strcmp(sc->components[j].gcons->provider->name,
 			    prov_name) == 0) {
 				found = j;
 				break;
 			}
 		}
 		if (found == -1) {
 			LOG_MSG(LVL_ERROR, "No %s component in %s",
 			    prov_name, sc->geom->name);
 			continue;
 		}
 
 		compbak = sc->components;
 		newcomp = malloc(sc->n_components * sizeof(*sc->components),
 		    M_GVIRSTOR, M_WAITOK | M_ZERO);
 		bcopy(sc->components, newcomp, found * sizeof(*sc->components));
 		bcopy(&sc->components[found + 1], newcomp + found,
 		    found * sizeof(*sc->components));
 		if ((sc->components[j].flags & VIRSTOR_PROVIDER_ALLOCATED) != 0) {
 			LOG_MSG(LVL_ERROR, "Allocated provider %s cannot be "
 			    "removed from %s",
 			    prov_name, sc->geom->name);
 			free(newcomp, M_GVIRSTOR);
 			/* We'll consider this non-fatal error */
 			continue;
 		}
 		/* Renumerate unallocated components */
 		for (j = 0; j < sc->n_components-1; j++) {
 			if ((sc->components[j].flags &
 			    VIRSTOR_PROVIDER_ALLOCATED) == 0) {
 				sc->components[j].index = j;
 			}
 		}
 		/* This is the critical section. If a component allocation
 		 * event happens while both variables are not yet set,
 		 * there will be trouble. Something will panic on encountering
 		 * NULL sc->components[x].gcomp member.
 		 * Luckily, component allocation happens very rarely and
 		 * removing components is an abnormal action in any case. */
 		sc->components = newcomp;
 		sc->n_components--;
 		/* End critical section */
 
 		g_topology_lock();
 		if (clear_metadata(&compbak[found]) != 0) {
 			LOG_MSG(LVL_WARNING, "Trouble ahead: cannot clear "
 			    "metadata on %s", prov_name);
 		}
 		g_detach(compbak[found].gcons);
 		g_destroy_consumer(compbak[found].gcons);
 		g_topology_unlock();
 
 		free(compbak, M_GVIRSTOR);
 
 		removed++;
 	}
 
 	/* This call to update_metadata() is critical. In case there's a
 	 * power failure in the middle of it and some components are updated
 	 * while others are not, there will be trouble on next .taste() iff
 	 * a non-updated component is detected first */
 	g_topology_lock();
 	update_metadata(sc);
 	g_topology_unlock();
 	LOG_MSG(LVL_INFO, "Removed %d component(s) from %s", removed,
 	    sc->geom->name);
 }
 
 /*
  * Clear metadata sector on component
  */
 static int
 clear_metadata(struct g_virstor_component *comp)
 {
 	char *buf;
 	int error;
 
 	LOG_MSG(LVL_INFO, "Clearing metadata on %s",
 	    comp->gcons->provider->name);
 	g_topology_assert();
 	error = g_access(comp->gcons, 0, 1, 0);
 	if (error != 0)
 		return (error);
 	buf = malloc(comp->gcons->provider->sectorsize, M_GVIRSTOR,
 	    M_WAITOK | M_ZERO);
 	error = g_write_data(comp->gcons,
 	    comp->gcons->provider->mediasize -
 	    comp->gcons->provider->sectorsize,
 	    buf,
 	    comp->gcons->provider->sectorsize);
 	free(buf, M_GVIRSTOR);
 	g_access(comp->gcons, 0, -1, 0);
 	return (error);
 }
 
 /*
  * Destroy geom forcibly.
  */
 static int
 g_virstor_destroy_geom(struct gctl_req *req __unused, struct g_class *mp,
     struct g_geom *gp)
 {
 	struct g_virstor_softc *sc;
 	int exitval;
 
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("%s: NULL sc", __func__));
 
 	exitval = 0;
 	LOG_MSG(LVL_DEBUG, "%s called for %s, sc=%p", __func__, gp->name,
 	    gp->softc);
 
 	if (sc != NULL) {
 #ifdef INVARIANTS
 		char *buf;
 		int error;
 		off_t off;
 		int isclean, count;
 		int n;
 
 		LOG_MSG(LVL_INFO, "INVARIANTS detected");
 		LOG_MSG(LVL_INFO, "Verifying allocation "
 		    "table for %s", sc->geom->name);
 		count = 0;
 		for (n = 0; n < sc->chunk_count; n++) {
 			if (sc->map[n].flags || VIRSTOR_MAP_ALLOCATED != 0)
 				count++;
 		}
 		LOG_MSG(LVL_INFO, "Device %s has %d allocated chunks",
 		    sc->geom->name, count);
 		n = off = count = 0;
 		isclean = 1;
 		if (virstor_valid_components(sc) != sc->n_components) {
 			/* This is a incomplete virstor device (not all
 			 * components have been found) */
 			LOG_MSG(LVL_ERROR, "Device %s is incomplete",
 			    sc->geom->name);
 			goto bailout;
 		}
 		error = g_access(sc->components[0].gcons, 1, 0, 0);
 		KASSERT(error == 0, ("%s: g_access failed (%d)", __func__,
 		    error));
 		/* Compare the whole on-disk allocation table with what's
 		 * currently in memory */
 		while (n < sc->chunk_count) {
 			buf = g_read_data(sc->components[0].gcons, off,
 			    sc->sectorsize, &error);
 			KASSERT(buf != NULL, ("g_read_data returned NULL (%d) "
 			    "for read at %jd", error, off));
 			if (bcmp(buf, &sc->map[n], sc->sectorsize) != 0) {
 				LOG_MSG(LVL_ERROR, "ERROR in allocation table, "
 				    "entry %d, offset %jd", n, off);
 				isclean = 0;
 				count++;
 			}
 			n += sc->me_per_sector;
 			off += sc->sectorsize;
 			g_free(buf);
 		}
 		error = g_access(sc->components[0].gcons, -1, 0, 0);
 		KASSERT(error == 0, ("%s: g_access failed (%d) on exit",
 		    __func__, error));
 		if (isclean != 1) {
 			LOG_MSG(LVL_ERROR, "ALLOCATION TABLE CORRUPTED FOR %s "
 			    "(%d sectors don't match, max %zu allocations)",
 			    sc->geom->name, count,
 			    count * sc->me_per_sector);
 		} else {
 			LOG_MSG(LVL_INFO, "Allocation table ok for %s",
 			    sc->geom->name);
 		}
 bailout:
 #endif
 		update_metadata(sc);
 		virstor_geom_destroy(sc, FALSE, FALSE);
 		exitval = EAGAIN;
 	} else
 		exitval = 0;
 	return (exitval);
 }
 
 /*
  * Taste event (per-class callback)
  * Examines a provider and creates geom instances if needed
  */
 static struct g_geom *
 g_virstor_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	struct g_virstor_metadata md;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_virstor_softc *sc;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 	LOG_MSG(LVL_DEBUG, "Tasting %s", pp->name);
 
 	/* We need a dummy geom to attach a consumer to the given provider */
 	gp = g_new_geomf(mp, "virstor:taste.helper");
 	gp->start = (void *)invalid_call;	/* XXX: hacked up so the        */
 	gp->access = (void *)invalid_call;	/* compiler doesn't complain.   */
 	gp->orphan = (void *)invalid_call;	/* I really want these to fail. */
 
 	cp = g_new_consumer(gp);
+	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error == 0) {
 		error = read_metadata(cp, &md);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 
 	if (error != 0)
 		return (NULL);
 
 	if (strcmp(md.md_magic, G_VIRSTOR_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version != G_VIRSTOR_VERSION) {
 		LOG_MSG(LVL_ERROR, "Kernel module version invalid "
 		    "to handle %s (%s) : %d should be %d",
 		    md.md_name, pp->name, md.md_version, G_VIRSTOR_VERSION);
 		return (NULL);
 	}
 	if (md.provsize != pp->mediasize)
 		return (NULL);
 
 	/* If the provider name is hardcoded, use the offered provider only
 	 * if it's been offered with its proper name (the one used in
 	 * the label command). */
 	if (md.provider[0] != '\0' &&
 	    !g_compare_names(md.provider, pp->name))
 		return (NULL);
 
 	/* Iterate all geoms this class already knows about to see if a new
 	 * geom instance of this class needs to be created (in case the provider
 	 * is first from a (possibly) multi-consumer geom) or it just needs
 	 * to be added to an existing instance. */
 	sc = NULL;
 	gp = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(md.md_name, sc->geom->name) != 0)
 			continue;
 		if (md.md_id != sc->id)
 			continue;
 		break;
 	}
 	if (gp != NULL) { /* We found an existing geom instance; add to it */
 		LOG_MSG(LVL_INFO, "Adding %s to %s", pp->name, md.md_name);
 		error = add_provider_to_geom(sc, pp, &md);
 		if (error != 0) {
 			LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)",
 			    pp->name, md.md_name, error);
 			return (NULL);
 		}
 	} else { /* New geom instance needs to be created */
 		gp = create_virstor_geom(mp, &md);
 		if (gp == NULL) {
 			LOG_MSG(LVL_ERROR, "Error creating new instance of "
 			    "class %s: %s", mp->name, md.md_name);
 			LOG_MSG(LVL_DEBUG, "Error creating %s at %s",
 			    md.md_name, pp->name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		LOG_MSG(LVL_INFO, "Adding %s to %s (first found)", pp->name,
 		    md.md_name);
 		error = add_provider_to_geom(sc, pp, &md);
 		if (error != 0) {
 			LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)",
 			    pp->name, md.md_name, error);
 			virstor_geom_destroy(sc, TRUE, FALSE);
 			return (NULL);
 		}
 	}
 
 	return (gp);
 }
 
 /*
  * Destroyes consumer passed to it in arguments. Used as a callback
  * on g_event queue.
  */
 static void
 delay_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *c = arg;
 	KASSERT(c != NULL, ("%s: invalid consumer", __func__));
 	LOG_MSG(LVL_DEBUG, "Consumer %s destroyed with delay",
 	    c->provider->name);
 	g_detach(c);
 	g_destroy_consumer(c);
 }
 
 /*
  * Remove a component (consumer) from geom instance; If it's the first
  * component being removed, orphan the provider to announce geom's being
  * dismantled
  */
 static void
 remove_component(struct g_virstor_softc *sc, struct g_virstor_component *comp,
     boolean_t delay)
 {
 	struct g_consumer *c;
 
 	KASSERT(comp->gcons != NULL, ("Component with no consumer in %s",
 	    sc->geom->name));
 	c = comp->gcons;
 
 	comp->gcons = NULL;
 	KASSERT(c->provider != NULL, ("%s: no provider", __func__));
 	LOG_MSG(LVL_DEBUG, "Component %s removed from %s", c->provider->name,
 	    sc->geom->name);
 	if (sc->provider != NULL) {
 		LOG_MSG(LVL_INFO, "Removing provider %s", sc->provider->name);
 		g_wither_provider(sc->provider, ENXIO);
 		sc->provider = NULL;
 	}
 
 	if (c->acr > 0 || c->acw > 0 || c->ace > 0)
 		return;
 	if (delay) {
 		/* Destroy consumer after it's tasted */
 		g_post_event(delay_destroy_consumer, c, M_WAITOK, NULL);
 	} else {
 		g_detach(c);
 		g_destroy_consumer(c);
 	}
 }
 
 /*
  * Destroy geom - called internally
  * See g_virstor_destroy_geom for the other one
  */
 static int
 virstor_geom_destroy(struct g_virstor_softc *sc, boolean_t force,
     boolean_t delay)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	u_int n;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		LOG_MSG(force ? LVL_WARNING : LVL_ERROR,
 		    "Device %s is still open.", pp->name);
 		if (!force)
 			return (EBUSY);
 	}
 
 	for (n = 0; n < sc->n_components; n++) {
 		if (sc->components[n].gcons != NULL)
 			remove_component(sc, &sc->components[n], delay);
 	}
 
 	gp = sc->geom;
 	gp->softc = NULL;
 
 	KASSERT(sc->provider == NULL, ("Provider still exists for %s",
 	    gp->name));
 
 	/* XXX: This might or might not work, since we're called with
 	 * the topology lock held. Also, it might panic the kernel if
 	 * the error'd BIO is in softupdates code. */
 	mtx_lock(&sc->delayed_bio_q_mtx);
 	while (!STAILQ_EMPTY(&sc->delayed_bio_q)) {
 		struct g_virstor_bio_q *bq;
 		bq = STAILQ_FIRST(&sc->delayed_bio_q);
 		bq->bio->bio_error = ENOSPC;
 		g_io_deliver(bq->bio, EIO);
 		STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
 		free(bq, M_GVIRSTOR);
 	}
 	mtx_unlock(&sc->delayed_bio_q_mtx);
 	mtx_destroy(&sc->delayed_bio_q_mtx);
 
 	free(sc->map, M_GVIRSTOR);
 	free(sc->components, M_GVIRSTOR);
 	bzero(sc, sizeof *sc);
 	free(sc, M_GVIRSTOR);
 
 	pp = LIST_FIRST(&gp->provider); /* We only offer one provider */
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		LOG_MSG(LVL_DEBUG, "Device %s destroyed", gp->name);
 
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 /*
  * Utility function: read metadata & decode. Wants topology lock to be
  * held.
  */
 static int
 read_metadata(struct g_consumer *cp, struct g_virstor_metadata *md)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error;
 
 	g_topology_assert();
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	virstor_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /**
  * Utility function: encode & write metadata. Assumes topology lock is
  * held.
  *
  * There is no useful way of recovering from errors in this function,
  * not involving panicking the kernel. If the metadata cannot be written
  * the most we can do is notify the operator and hope he spots it and
  * replaces the broken drive.
  */
 static void
 write_metadata(struct g_consumer *cp, struct g_virstor_metadata *md)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error;
 
 	KASSERT(cp != NULL && md != NULL && cp->provider != NULL,
 	    ("Something's fishy in %s", __func__));
 	LOG_MSG(LVL_DEBUG, "Writing metadata on %s", cp->provider->name);
 	g_topology_assert();
 	error = g_access(cp, 0, 1, 0);
 	if (error != 0) {
 		LOG_MSG(LVL_ERROR, "g_access(0,1,0) failed for %s: %d",
 		    cp->provider->name, error);
 		return;
 	}
 	pp = cp->provider;
 
 	buf = malloc(pp->sectorsize, M_GVIRSTOR, M_WAITOK);
 	bzero(buf, pp->sectorsize);
 	virstor_metadata_encode(md, buf);
 	g_topology_unlock();
 	error = g_write_data(cp, pp->mediasize - pp->sectorsize, buf,
 	    pp->sectorsize);
 	g_topology_lock();
 	g_access(cp, 0, -1, 0);
 	free(buf, M_GVIRSTOR);
 
 	if (error != 0)
 		LOG_MSG(LVL_ERROR, "Error %d writing metadata to %s",
 		    error, cp->provider->name);
 }
 
 /*
  * Creates a new instance of this GEOM class, initialise softc
  */
 static struct g_geom *
 create_virstor_geom(struct g_class *mp, struct g_virstor_metadata *md)
 {
 	struct g_geom *gp;
 	struct g_virstor_softc *sc;
 
 	LOG_MSG(LVL_DEBUG, "Creating geom instance for %s (id=%u)",
 	    md->md_name, md->md_id);
 
 	if (md->md_count < 1 || md->md_chunk_size < 1 ||
 	    md->md_virsize < md->md_chunk_size) {
 		/* This is bogus configuration, and probably means data is
 		 * somehow corrupted. Panic, maybe? */
 		LOG_MSG(LVL_ERROR, "Nonsensical metadata information for %s",
 		    md->md_name);
 		return (NULL);
 	}
 
 	/* Check if it's already created */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->geom->name, md->md_name) == 0) {
 			LOG_MSG(LVL_WARNING, "Geom %s already exists",
 			    md->md_name);
 			if (sc->id != md->md_id) {
 				LOG_MSG(LVL_ERROR,
 				    "Some stale or invalid components "
 				    "exist for virstor device named %s. "
 				    "You will need to <CLEAR> all stale "
 				    "components and maybe reconfigure "
 				    "the virstor device. Tune "
 				    "kern.geom.virstor.debug sysctl up "
 				    "for more information.",
 				    sc->geom->name);
 			}
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	gp->softc = NULL; /* to circumevent races that test softc */
 
 	gp->start = g_virstor_start;
 	gp->spoiled = g_virstor_orphan;
 	gp->orphan = g_virstor_orphan;
 	gp->access = g_virstor_access;
 	gp->dumpconf = g_virstor_dumpconf;
 
 	sc = malloc(sizeof(*sc), M_GVIRSTOR, M_WAITOK | M_ZERO);
 	sc->id = md->md_id;
 	sc->n_components = md->md_count;
 	sc->components = malloc(sizeof(struct g_virstor_component) * md->md_count,
 	    M_GVIRSTOR, M_WAITOK | M_ZERO);
 	sc->chunk_size = md->md_chunk_size;
 	sc->virsize = md->md_virsize;
 	STAILQ_INIT(&sc->delayed_bio_q);
 	mtx_init(&sc->delayed_bio_q_mtx, "gvirstor_delayed_bio_q_mtx",
 	    "gvirstor", MTX_DEF | MTX_RECURSE);
 
 	sc->geom = gp;
 	sc->provider = NULL; /* virstor_check_and_run will create it */
 	gp->softc = sc;
 
 	LOG_MSG(LVL_ANNOUNCE, "Device %s created", sc->geom->name);
 
 	return (gp);
 }
 
 /*
  * Add provider to a GEOM class instance
  */
 static int
 add_provider_to_geom(struct g_virstor_softc *sc, struct g_provider *pp,
     struct g_virstor_metadata *md)
 {
 	struct g_virstor_component *component;
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	int error;
 
 	if (md->no >= sc->n_components)
 		return (EINVAL);
 
 	/* "Current" compontent */
 	component = &(sc->components[md->no]);
 	if (component->gcons != NULL)
 		return (EEXIST);
 
 	gp = sc->geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL) {
 		if (fcp->provider->sectorsize != pp->sectorsize) {
 			/* TODO: this can be made to work */
 			LOG_MSG(LVL_ERROR, "Provider %s of %s has invalid "
 			    "sector size (%d)", pp->name, sc->geom->name,
 			    pp->sectorsize);
 			return (EINVAL);
 		}
 		if (fcp->acr > 0 || fcp->acw || fcp->ace > 0) {
 			/* Replicate access permissions from first "live" consumer
 			 * to the new one */
 			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 			if (error != 0) {
 				g_detach(cp);
 				g_destroy_consumer(cp);
 				return (error);
 			}
 		}
 	}
 
 	/* Bring up a new component */
 	cp->private = component;
 	component->gcons = cp;
 	component->sc = sc;
 	component->index = md->no;
 	component->chunk_count = md->chunk_count;
 	component->chunk_next = md->chunk_next;
 	component->chunk_reserved = md->chunk_reserved;
 	component->flags = md->flags;
 
 	LOG_MSG(LVL_DEBUG, "%s attached to %s", pp->name, sc->geom->name);
 
 	virstor_check_and_run(sc);
 	return (0);
 }
 
 /*
  * Check if everything's ready to create the geom provider & device entry,
  * create and start provider.
  * Called ultimately by .taste, from g_event thread
  */
 static void
 virstor_check_and_run(struct g_virstor_softc *sc)
 {
 	off_t off;
 	size_t n, count;
 	int index;
 	int error;
 
 	if (virstor_valid_components(sc) != sc->n_components)
 		return;
 
 	if (virstor_valid_components(sc) == 0) {
 		/* This is actually a candidate for panic() */
 		LOG_MSG(LVL_ERROR, "No valid components for %s?",
 		    sc->provider->name);
 		return;
 	}
 
 	sc->sectorsize = sc->components[0].gcons->provider->sectorsize;
 
 	/* Initialise allocation map from the first consumer */
 	sc->chunk_count = sc->virsize / sc->chunk_size;
 	if (sc->chunk_count * (off_t)sc->chunk_size != sc->virsize) {
 		LOG_MSG(LVL_WARNING, "Device %s truncated to %ju bytes",
 		    sc->provider->name,
 		    sc->chunk_count * (off_t)sc->chunk_size);
 	}
 	sc->map_size = sc->chunk_count * sizeof *(sc->map);
 	/* The following allocation is in order of 4MB - 8MB */
 	sc->map = malloc(sc->map_size, M_GVIRSTOR, M_WAITOK);
 	KASSERT(sc->map != NULL, ("%s: Memory allocation error (%zu bytes) for %s",
 	    __func__, sc->map_size, sc->provider->name));
 	sc->map_sectors = sc->map_size / sc->sectorsize;
 
 	count = 0;
 	for (n = 0; n < sc->n_components; n++)
 		count += sc->components[n].chunk_count;
 	LOG_MSG(LVL_INFO, "Device %s has %zu physical chunks and %zu virtual "
 	    "(%zu KB chunks)",
 	    sc->geom->name, count, sc->chunk_count, sc->chunk_size / 1024);
 
 	error = g_access(sc->components[0].gcons, 1, 0, 0);
 	if (error != 0) {
 		LOG_MSG(LVL_ERROR, "Cannot acquire read access for %s to "
 		    "read allocation map for %s",
 		    sc->components[0].gcons->provider->name,
 		    sc->geom->name);
 		return;
 	}
 	/* Read in the allocation map */
 	LOG_MSG(LVL_DEBUG, "Reading map for %s from %s", sc->geom->name,
 	    sc->components[0].gcons->provider->name);
 	off = count = n = 0;
 	while (count < sc->map_size) {
 		struct g_virstor_map_entry *mapbuf;
 		size_t bs;
 
 		bs = MIN(maxphys, sc->map_size - count);
 		if (bs % sc->sectorsize != 0) {
 			/* Check for alignment errors */
 			bs = rounddown(bs, sc->sectorsize);
 			if (bs == 0)
 				break;
 			LOG_MSG(LVL_ERROR, "Trouble: map is not sector-aligned "
 			    "for %s on %s", sc->geom->name,
 			    sc->components[0].gcons->provider->name);
 		}
 		mapbuf = g_read_data(sc->components[0].gcons, off, bs, &error);
 		if (mapbuf == NULL) {
 			free(sc->map, M_GVIRSTOR);
 			LOG_MSG(LVL_ERROR, "Error reading allocation map "
 			    "for %s from %s (offset %ju) (error %d)",
 			    sc->geom->name,
 			    sc->components[0].gcons->provider->name,
 			    off, error);
 			return;
 		}
 
 		bcopy(mapbuf, &sc->map[n], bs);
 		off += bs;
 		count += bs;
 		n += bs / sizeof *(sc->map);
 		g_free(mapbuf);
 	}
 	g_access(sc->components[0].gcons, -1, 0, 0);
 	LOG_MSG(LVL_DEBUG, "Read map for %s", sc->geom->name);
 
 	/* find first component with allocatable chunks */
 	index = -1;
 	for (n = 0; n < sc->n_components; n++) {
 		if (sc->components[n].chunk_next <
 		    sc->components[n].chunk_count) {
 			index = n;
 			break;
 		}
 	}
 	if (index == -1)
 		/* not found? set it to the last component and handle it
 		 * later */
 		index = sc->n_components - 1;
 
 	if (index >= sc->n_components - g_virstor_component_watermark - 1) {
 		LOG_MSG(LVL_WARNING, "Device %s running out of components "
 		    "(%d/%u: %s)", sc->geom->name,
 		    index+1,
 		    sc->n_components,
 		    sc->components[index].gcons->provider->name);
 	}
 	sc->curr_component = index;
 
 	if (sc->components[index].chunk_next >=
 	    sc->components[index].chunk_count - g_virstor_chunk_watermark) {
 		LOG_MSG(LVL_WARNING,
 		    "Component %s of %s is running out of free space "
 		    "(%u chunks left)",
 		    sc->components[index].gcons->provider->name,
 		    sc->geom->name, sc->components[index].chunk_count -
 		    sc->components[index].chunk_next);
 	}
 
 	sc->me_per_sector = sc->sectorsize / sizeof *(sc->map);
 	if (sc->sectorsize % sizeof *(sc->map) != 0) {
 		LOG_MSG(LVL_ERROR,
 		    "%s: Map entries don't fit exactly in a sector (%s)",
 		    __func__, sc->geom->name);
 		return;
 	}
 
 	/* Recalculate allocated chunks in components & at the same time
 	 * verify map data is sane. We could trust metadata on this, but
 	 * we want to make sure. */
 	for (n = 0; n < sc->n_components; n++)
 		sc->components[n].chunk_next = sc->components[n].chunk_reserved;
 
 	for (n = 0; n < sc->chunk_count; n++) {
 		if (sc->map[n].provider_no >= sc->n_components ||
 			sc->map[n].provider_chunk >=
 			sc->components[sc->map[n].provider_no].chunk_count) {
 			LOG_MSG(LVL_ERROR, "%s: Invalid entry %u in map for %s",
 			    __func__, (u_int)n, sc->geom->name);
 			LOG_MSG(LVL_ERROR, "%s: provider_no: %u, n_components: %u"
 			    " provider_chunk: %u, chunk_count: %u", __func__,
 			    sc->map[n].provider_no, sc->n_components,
 			    sc->map[n].provider_chunk,
 			    sc->components[sc->map[n].provider_no].chunk_count);
 			return;
 		}
 		if (sc->map[n].flags & VIRSTOR_MAP_ALLOCATED)
 			sc->components[sc->map[n].provider_no].chunk_next++;
 	}
 
 	sc->provider = g_new_providerf(sc->geom, "virstor/%s",
 	    sc->geom->name);
 
 	sc->provider->sectorsize = sc->sectorsize;
 	sc->provider->mediasize = sc->virsize;
 	g_error_provider(sc->provider, 0);
 
 	LOG_MSG(LVL_INFO, "%s activated", sc->provider->name);
 	LOG_MSG(LVL_DEBUG, "%s starting with current component %u, starting "
 	    "chunk %u", sc->provider->name, sc->curr_component,
 	    sc->components[sc->curr_component].chunk_next);
 }
 
 /*
  * Returns count of active providers in this geom instance
  */
 static u_int
 virstor_valid_components(struct g_virstor_softc *sc)
 {
 	unsigned int nc, i;
 
 	nc = 0;
 	KASSERT(sc != NULL, ("%s: softc is NULL", __func__));
 	KASSERT(sc->components != NULL, ("%s: sc->components is NULL", __func__));
 	for (i = 0; i < sc->n_components; i++)
 		if (sc->components[i].gcons != NULL)
 			nc++;
 	return (nc);
 }
 
 /*
  * Called when the consumer gets orphaned (?)
  */
 static void
 g_virstor_orphan(struct g_consumer *cp)
 {
 	struct g_virstor_softc *sc;
 	struct g_virstor_component *comp;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	comp = cp->private;
 	KASSERT(comp != NULL, ("%s: No component in private part of consumer",
 	    __func__));
 	remove_component(sc, comp, FALSE);
 	if (LIST_EMPTY(&gp->consumer))
 		virstor_geom_destroy(sc, TRUE, FALSE);
 }
 
 /*
  * Called to notify geom when it's been opened, and for what intent
  */
 static int
 g_virstor_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *c, *c2, *tmp;
 	struct g_virstor_softc *sc;
 	struct g_geom *gp;
 	int error;
 
 	KASSERT(pp != NULL, ("%s: NULL provider", __func__));
 	gp = pp->geom;
 	KASSERT(gp != NULL, ("%s: NULL geom", __func__));
 	sc = gp->softc;
 
 	/* Grab an exclusive bit to propagate on our consumers on first open */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... drop it on close */
 	if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0) {
 		de--;
 		if (sc != NULL)
 			update_metadata(sc);
 	}
 
 	error = ENXIO;
 	LIST_FOREACH_SAFE(c, &gp->consumer, consumer, tmp) {
 		error = g_access(c, dr, dw, de);
 		if (error != 0)
 			goto fail;
 		if (c->acr == 0 && c->acw == 0 && c->ace == 0 &&
 		    c->flags & G_CF_ORPHAN) {
 			g_detach(c);
 			g_destroy_consumer(c);
 		}
 	}
 
 	if (sc != NULL && LIST_EMPTY(&gp->consumer))
 		virstor_geom_destroy(sc, TRUE, FALSE);
 
 	return (error);
 
 fail:
 	/* Backout earlier changes */
 	LIST_FOREACH(c2, &gp->consumer, consumer) {
 		if (c2 == c)
 			break;
 		g_access(c2, -dr, -dw, -de);
 	}
 	return (error);
 }
 
 /*
  * Generate XML dump of current state
  */
 static void
 g_virstor_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_virstor_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 
 	if (sc == NULL || pp != NULL)
 		return;
 
 	if (cp != NULL) {
 		/* For each component */
 		struct g_virstor_component *comp;
 
 		comp = cp->private;
 		if (comp == NULL)
 			return;
 		sbuf_printf(sb, "%s<ComponentIndex>%u</ComponentIndex>\n",
 		    indent, comp->index);
 		sbuf_printf(sb, "%s<ChunkCount>%u</ChunkCount>\n",
 		    indent, comp->chunk_count);
 		sbuf_printf(sb, "%s<ChunksUsed>%u</ChunksUsed>\n",
 		    indent, comp->chunk_next);
 		sbuf_printf(sb, "%s<ChunksReserved>%u</ChunksReserved>\n",
 		    indent, comp->chunk_reserved);
 		sbuf_printf(sb, "%s<StorageFree>%u%%</StorageFree>\n",
 		    indent,
 		    comp->chunk_next > 0 ? 100 -
 		    ((comp->chunk_next + comp->chunk_reserved) * 100) /
 		    comp->chunk_count : 100);
 	} else {
 		/* For the whole thing */
 		u_int count, used, i;
 		off_t size;
 
 		count = used = size = 0;
 		for (i = 0; i < sc->n_components; i++) {
 			if (sc->components[i].gcons != NULL) {
 				count += sc->components[i].chunk_count;
 				used += sc->components[i].chunk_next +
 				    sc->components[i].chunk_reserved;
 				size += sc->components[i].gcons->
 				    provider->mediasize;
 			}
 		}
 
 		sbuf_printf(sb, "%s<Status>"
 		    "Components=%u, Online=%u</Status>\n", indent,
 		    sc->n_components, virstor_valid_components(sc));
 		sbuf_printf(sb, "%s<State>%u%% physical free</State>\n",
 		    indent, 100-(used * 100) / count);
 		sbuf_printf(sb, "%s<ChunkSize>%zu</ChunkSize>\n", indent,
 		    sc->chunk_size);
 		sbuf_printf(sb, "%s<PhysicalFree>%u%%</PhysicalFree>\n",
 		    indent, used > 0 ? 100 - (used * 100) / count : 100);
 		sbuf_printf(sb, "%s<ChunkPhysicalCount>%u</ChunkPhysicalCount>\n",
 		    indent, count);
 		sbuf_printf(sb, "%s<ChunkVirtualCount>%zu</ChunkVirtualCount>\n",
 		    indent, sc->chunk_count);
 		sbuf_printf(sb, "%s<PhysicalBacking>%zu%%</PhysicalBacking>\n",
 		    indent,
 		    (count * 100) / sc->chunk_count);
 		sbuf_printf(sb, "%s<PhysicalBackingSize>%jd</PhysicalBackingSize>\n",
 		    indent, size);
 		sbuf_printf(sb, "%s<VirtualSize>%jd</VirtualSize>\n", indent,
 		    sc->virsize);
 	}
 }
 
 /*
  * GEOM .done handler
  * Can't use standard handler because one requested IO may
  * fork into additional data IOs
  */
 static void
 g_virstor_done(struct bio *b)
 {
 	struct g_virstor_softc *sc;
 	struct bio *parent_b;
 
 	parent_b = b->bio_parent;
 	sc = parent_b->bio_to->geom->softc;
 
 	if (b->bio_error != 0) {
 		LOG_MSG(LVL_ERROR, "Error %d for offset=%ju, length=%ju, %s",
 		    b->bio_error, b->bio_offset, b->bio_length,
 		    b->bio_to->name);
 		if (parent_b->bio_error == 0)
 			parent_b->bio_error = b->bio_error;
 	}
 
 	parent_b->bio_inbed++;
 	parent_b->bio_completed += b->bio_completed;
 
 	if (parent_b->bio_children == parent_b->bio_inbed) {
 		parent_b->bio_completed = parent_b->bio_length;
 		g_io_deliver(parent_b, parent_b->bio_error);
 	}
 	g_destroy_bio(b);
 }
 
 /*
  * I/O starts here
  * Called in g_down thread
  */
 static void
 g_virstor_start(struct bio *b)
 {
 	struct g_virstor_softc *sc;
 	struct g_virstor_component *comp;
 	struct bio *cb;
 	struct g_provider *pp;
 	char *addr;
 	off_t offset, length;
 	struct bio_queue_head bq;
 	size_t chunk_size;	/* cached for convenience */
 	u_int count;
 
 	pp = b->bio_to;
 	sc = pp->geom->softc;
 	KASSERT(sc != NULL, ("%s: no softc (error=%d, device=%s)", __func__,
 	    b->bio_to->error, b->bio_to->name));
 
 	LOG_REQ(LVL_MOREDEBUG, b, "%s", __func__);
 
 	switch (b->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	default:
 		g_io_deliver(b, EOPNOTSUPP);
 		return;
 	}
 
 	LOG_MSG(LVL_DEBUG2, "BIO arrived, size=%ju", b->bio_length);
 	bioq_init(&bq);
 
 	chunk_size = sc->chunk_size;
 	addr = b->bio_data;
 	offset = b->bio_offset;	/* virtual offset and length */
 	length = b->bio_length;
 
 	while (length > 0) {
 		size_t chunk_index, in_chunk_offset, in_chunk_length;
 		struct virstor_map_entry *me;
 
 		chunk_index = offset / chunk_size; /* round downwards */
 		in_chunk_offset = offset % chunk_size;
 		in_chunk_length = min(length, chunk_size - in_chunk_offset);
 		LOG_MSG(LVL_DEBUG, "Mapped %s(%ju, %ju) to (%zu,%zu,%zu)",
 		    b->bio_cmd == BIO_READ ? "R" : "W",
 		    offset, length,
 		    chunk_index, in_chunk_offset, in_chunk_length);
 		me = &sc->map[chunk_index];
 
 		if (b->bio_cmd == BIO_READ || b->bio_cmd == BIO_DELETE) {
 			if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) {
 				/* Reads from unallocated chunks return zeroed
 				 * buffers */
 				if (b->bio_cmd == BIO_READ)
 					bzero(addr, in_chunk_length);
 			} else {
 				comp = &sc->components[me->provider_no];
 
 				cb = g_clone_bio(b);
 				if (cb == NULL) {
 					bioq_dismantle(&bq);
 					if (b->bio_error == 0)
 						b->bio_error = ENOMEM;
 					g_io_deliver(b, b->bio_error);
 					return;
 				}
 				cb->bio_to = comp->gcons->provider;
 				cb->bio_done = g_virstor_done;
 				cb->bio_offset =
 				    (off_t)me->provider_chunk * (off_t)chunk_size
 				    + in_chunk_offset;
 				cb->bio_length = in_chunk_length;
 				cb->bio_data = addr;
 				cb->bio_caller1 = comp;
 				bioq_disksort(&bq, cb);
 			}
 		} else { /* handle BIO_WRITE */
 			KASSERT(b->bio_cmd == BIO_WRITE,
 			    ("%s: Unknown command %d", __func__,
 			    b->bio_cmd));
 
 			if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) {
 				/* We have a virtual chunk, represented by
 				 * the "me" entry, but it's not yet allocated
 				 * (tied to) a physical chunk. So do it now. */
 				struct virstor_map_entry *data_me;
 				u_int phys_chunk, comp_no;
 				off_t s_offset;
 				int error;
 
 				error = allocate_chunk(sc, &comp, &comp_no,
 				    &phys_chunk);
 				if (error != 0) {
 					/* We cannot allocate a physical chunk
 					 * to satisfy this request, so we'll
 					 * delay it to when we can...
 					 * XXX: this will prevent the fs from
 					 * being umounted! */
 					struct g_virstor_bio_q *biq;
 					biq = malloc(sizeof *biq, M_GVIRSTOR,
 					    M_NOWAIT);
 					if (biq == NULL) {
 						bioq_dismantle(&bq);
 						if (b->bio_error == 0)
 							b->bio_error = ENOMEM;
 						g_io_deliver(b, b->bio_error);
 						return;
 					}
 					biq->bio = b;
 					mtx_lock(&sc->delayed_bio_q_mtx);
 					STAILQ_INSERT_TAIL(&sc->delayed_bio_q,
 					    biq, linkage);
 					mtx_unlock(&sc->delayed_bio_q_mtx);
 					LOG_MSG(LVL_WARNING, "Delaying BIO "
 					    "(size=%ju) until free physical "
 					    "space can be found on %s",
 					    b->bio_length,
 					    sc->provider->name);
 					return;
 				}
 				LOG_MSG(LVL_DEBUG, "Allocated chunk %u on %s "
 				    "for %s",
 				    phys_chunk,
 				    comp->gcons->provider->name,
 				    sc->provider->name);
 
 				me->provider_no = comp_no;
 				me->provider_chunk = phys_chunk;
 				me->flags |= VIRSTOR_MAP_ALLOCATED;
 
 				cb = g_clone_bio(b);
 				if (cb == NULL) {
 					me->flags &= ~VIRSTOR_MAP_ALLOCATED;
 					me->provider_no = 0;
 					me->provider_chunk = 0;
 					bioq_dismantle(&bq);
 					if (b->bio_error == 0)
 						b->bio_error = ENOMEM;
 					g_io_deliver(b, b->bio_error);
 					return;
 				}
 
 				/* The allocation table is stored continuously
 				 * at the start of the drive. We need to
 				 * calculate the offset of the sector that holds
 				 * this map entry both on the drive and in the
 				 * map array.
 				 * sc_offset will end up pointing to the drive
 				 * sector. */
 				s_offset = chunk_index * sizeof *me;
 				s_offset = rounddown(s_offset, sc->sectorsize);
 
 				/* data_me points to map entry sector
 				 * in memory (analogous to offset) */
 				data_me = &sc->map[rounddown(chunk_index,
 				    sc->me_per_sector)];
 
 				/* Commit sector with map entry to storage */
 				cb->bio_to = sc->components[0].gcons->provider;
 				cb->bio_done = g_virstor_done;
 				cb->bio_offset = s_offset;
 				cb->bio_data = (char *)data_me;
 				cb->bio_length = sc->sectorsize;
 				cb->bio_caller1 = &sc->components[0];
 				bioq_disksort(&bq, cb);
 			}
 
 			comp = &sc->components[me->provider_no];
 			cb = g_clone_bio(b);
 			if (cb == NULL) {
 				bioq_dismantle(&bq);
 				if (b->bio_error == 0)
 					b->bio_error = ENOMEM;
 				g_io_deliver(b, b->bio_error);
 				return;
 			}
 			/* Finally, handle the data */
 			cb->bio_to = comp->gcons->provider;
 			cb->bio_done = g_virstor_done;
 			cb->bio_offset = (off_t)me->provider_chunk*(off_t)chunk_size +
 			    in_chunk_offset;
 			cb->bio_length = in_chunk_length;
 			cb->bio_data = addr;
 			cb->bio_caller1 = comp;
 			bioq_disksort(&bq, cb);
 		}
 		addr += in_chunk_length;
 		length -= in_chunk_length;
 		offset += in_chunk_length;
 	}
 
 	/* Fire off bio's here */
 	count = 0;
 	for (cb = bioq_first(&bq); cb != NULL; cb = bioq_first(&bq)) {
 		bioq_remove(&bq, cb);
 		LOG_REQ(LVL_MOREDEBUG, cb, "Firing request");
 		comp = cb->bio_caller1;
 		cb->bio_caller1 = NULL;
 		LOG_MSG(LVL_DEBUG, " firing bio, offset=%ju, length=%ju",
 		    cb->bio_offset, cb->bio_length);
 		g_io_request(cb, comp->gcons);
 		count++;
 	}
 	if (count == 0) { /* We handled everything locally */
 		b->bio_completed = b->bio_length;
 		g_io_deliver(b, 0);
 	}
 
 }
 
 /*
  * Allocate a chunk from a physical provider. Returns physical component,
  * chunk index relative to the component and the component's index.
  */
 static int
 allocate_chunk(struct g_virstor_softc *sc, struct g_virstor_component **comp,
     u_int *comp_no_p, u_int *chunk)
 {
 	u_int comp_no;
 
 	KASSERT(sc->curr_component < sc->n_components,
 	    ("%s: Invalid curr_component: %u",  __func__, sc->curr_component));
 
 	comp_no = sc->curr_component;
 	*comp = &sc->components[comp_no];
 	dump_component(*comp);
 	if ((*comp)->chunk_next >= (*comp)->chunk_count) {
 		/* This component is full. Allocate next component */
 		if (comp_no >= sc->n_components-1) {
 			LOG_MSG(LVL_ERROR, "All physical space allocated for %s",
 			    sc->geom->name);
 			return (-1);
 		}
 		(*comp)->flags &= ~VIRSTOR_PROVIDER_CURRENT;
 		sc->curr_component = ++comp_no;
 
 		*comp = &sc->components[comp_no];
 		if (comp_no >= sc->n_components - g_virstor_component_watermark-1)
 			LOG_MSG(LVL_WARNING, "Device %s running out of components "
 			    "(switching to %u/%u: %s)", sc->geom->name,
 			    comp_no+1, sc->n_components,
 			    (*comp)->gcons->provider->name);
 		/* Take care not to overwrite reserved chunks */
 		if ( (*comp)->chunk_reserved > 0 &&
 		    (*comp)->chunk_next < (*comp)->chunk_reserved)
 			(*comp)->chunk_next = (*comp)->chunk_reserved;
 
 		(*comp)->flags |=
 		    VIRSTOR_PROVIDER_ALLOCATED | VIRSTOR_PROVIDER_CURRENT;
 		dump_component(*comp);
 		*comp_no_p = comp_no;
 		*chunk = (*comp)->chunk_next++;
 	} else {
 		*comp_no_p = comp_no;
 		*chunk = (*comp)->chunk_next++;
 	}
 	return (0);
 }
 
 /* Dump a component */
 static void
 dump_component(struct g_virstor_component *comp)
 {
 
 	if (g_virstor_debug < LVL_DEBUG2)
 		return;
 	printf("Component %d: %s\n", comp->index, comp->gcons->provider->name);
 	printf("  chunk_count: %u\n", comp->chunk_count);
 	printf("   chunk_next: %u\n", comp->chunk_next);
 	printf("        flags: %u\n", comp->flags);
 }
 
 #if 0
 /* Dump a map entry */
 static void
 dump_me(struct virstor_map_entry *me, unsigned int nr)
 {
 	if (g_virstor_debug < LVL_DEBUG)
 		return;
 	printf("VIRT. CHUNK #%d: ", nr);
 	if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0)
 		printf("(unallocated)\n");
 	else
 		printf("allocated at provider %u, provider_chunk %u\n",
 		    me->provider_no, me->provider_chunk);
 }
 #endif
 
 /*
  * Dismantle bio_queue and destroy its components
  */
 static void
 bioq_dismantle(struct bio_queue_head *bq)
 {
 	struct bio *b;
 
 	for (b = bioq_first(bq); b != NULL; b = bioq_first(bq)) {
 		bioq_remove(bq, b);
 		g_destroy_bio(b);
 	}
 }
 
 /*
  * The function that shouldn't be called.
  * When this is called, the stack is already garbled because of
  * argument mismatch. There's nothing to do now but panic, which is
  * accidentally the whole purpose of this function.
  * Motivation: to guard from accidentally calling geom methods when
  * they shouldn't be called. (see g_..._taste)
  */
 static void
 invalid_call(void)
 {
 	panic("invalid_call() has just been called. Something's fishy here.");
 }
 
 DECLARE_GEOM_CLASS(g_virstor_class, g_virstor); /* Let there be light */
 MODULE_VERSION(geom_virstor, 0);