Index: stable/11/sys/geom/bde/g_bde.c
===================================================================
--- stable/11/sys/geom/bde/g_bde.c	(revision 332639)
+++ stable/11/sys/geom/bde/g_bde.c	(revision 332640)
@@ -1,292 +1,293 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/sysctl.h>
 
 #include <crypto/rijndael/rijndael-api-fst.h>
 #include <crypto/sha2/sha512.h>
 #include <geom/geom.h>
 #include <geom/bde/g_bde.h>
 #define BDE_CLASS_NAME "BDE"
 
 FEATURE(geom_bde, "GEOM-based Disk Encryption");
 
 static void
 g_bde_start(struct bio *bp)
 {
 
 	switch (bp->bio_cmd) {
 	case BIO_DELETE:
 	case BIO_READ:
 	case BIO_WRITE:
 		g_bde_start1(bp);
 		break;
 	case BIO_GETATTR:
 		g_io_deliver(bp, EOPNOTSUPP);
 		break;
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	return;
 }
 
 static void
 g_bde_orphan(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_bde_softc *sc;
 
 	g_trace(G_T_TOPOLOGY, "g_bde_orphan(%p/%s)", cp, cp->provider->name);
 	g_topology_assert();
 
 	gp = cp->geom;
 	sc = gp->softc;
 	gp->flags |= G_GEOM_WITHER;
 	LIST_FOREACH(pp, &gp->provider, provider)
 		g_wither_provider(pp, ENXIO);
 	bzero(sc, sizeof(struct g_bde_softc));	/* destroy evidence */
 	return;
 }
 
 static int
 g_bde_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	if (cp->acr == 0 && cp->acw == 0 && cp->ace == 0) {
 		de++;
 		dr++;
 	}
 	/* ... and let go of it on last close */
 	if ((cp->acr + dr) == 0 && (cp->acw + dw) == 0 && (cp->ace + de) == 1) {
 		de--;
 		dr--;
 	}
 	return (g_access(cp, dr, dw, de));
 }
 
 static void
 g_bde_create_geom(struct gctl_req *req, struct g_class *mp, struct g_provider *pp)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_bde_key *kp;
 	int error, i;
 	u_int sectorsize;
 	off_t mediasize;
 	struct g_bde_softc *sc;
 	void *pass;
 	void *key;
 
 	g_trace(G_T_TOPOLOGY, "g_bde_create_geom(%s, %s)", mp->name, pp->name);
 	g_topology_assert();
 	gp = NULL;
 
 
 	gp = g_new_geomf(mp, "%s.bde", pp->name);
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_access(cp, 1, 1, 1);
 	if (error) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		g_destroy_geom(gp);
 		gctl_error(req, "could not access consumer");
 		return;
 	}
 	pass = NULL;
 	key = NULL;
 	do {
 		pass = gctl_get_param(req, "pass", &i);
 		if (pass == NULL || i != SHA512_DIGEST_LENGTH) {
 			gctl_error(req, "No usable key presented");
 			break;
 		}
 		key = gctl_get_param(req, "key", &i);
 		if (key != NULL && i != 16) {
 			gctl_error(req, "Invalid key presented");
 			break;
 		}
 		sectorsize = cp->provider->sectorsize;
 		mediasize = cp->provider->mediasize;
 		sc = g_malloc(sizeof(struct g_bde_softc), M_WAITOK | M_ZERO);
 		gp->softc = sc;
 		sc->geom = gp;
 		sc->consumer = cp;
 
 		error = g_bde_decrypt_lock(sc, pass, key,
 		    mediasize, sectorsize, NULL);
 		bzero(sc->sha2, sizeof sc->sha2);
 		if (error)
 			break;
 		kp = &sc->key;
 
 		/* Initialize helper-fields */
 		kp->keys_per_sector = kp->sectorsize / G_BDE_SKEYLEN;
 		kp->zone_cont = kp->keys_per_sector * kp->sectorsize;
 		kp->zone_width = kp->zone_cont + kp->sectorsize;
 		kp->media_width = kp->sectorN - kp->sector0 -
 		    G_BDE_MAXKEYS * kp->sectorsize;
 
 		/* Our external parameters */
 		sc->zone_cont = kp->zone_cont;
 		sc->mediasize = g_bde_max_sector(kp);
 		sc->sectorsize = kp->sectorsize;
 
 		TAILQ_INIT(&sc->freelist);
 		TAILQ_INIT(&sc->worklist);
 		mtx_init(&sc->worklist_mutex, "g_bde_worklist", NULL, MTX_DEF);
 		/* XXX: error check */
 		kproc_create(g_bde_worker, gp, &sc->thread, 0, 0,
 			"g_bde %s", gp->name);
 		pp = g_new_providerf(gp, "%s", gp->name);
 		pp->stripesize = kp->zone_cont;
 		pp->stripeoffset = 0;
 		pp->mediasize = sc->mediasize;
 		pp->sectorsize = sc->sectorsize;
 		g_error_provider(pp, 0);
 		break;
 	} while (0);
 	if (pass != NULL)
 		bzero(pass, SHA512_DIGEST_LENGTH);
 	if (key != NULL)
 		bzero(key, 16);
 	if (error == 0)
 		return;
 	g_access(cp, -1, -1, -1);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	if (gp->softc != NULL)
 		g_free(gp->softc);
 	g_destroy_geom(gp);
 	switch (error) {
 	case ENOENT:
 		gctl_error(req, "Lock was destroyed");
 		break;
 	case ESRCH:
 		gctl_error(req, "Lock was nuked");
 		break;
 	case EINVAL:
 		gctl_error(req, "Could not open lock");
 		break;
 	case ENOTDIR:
 		gctl_error(req, "Lock not found");
 		break;
 	default:
 		gctl_error(req, "Could not open lock (%d)", error);
 		break;
 	}
 	return;
 }
 
 
 static int
 g_bde_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	struct g_bde_softc *sc;
 
 	g_trace(G_T_TOPOLOGY, "g_bde_destroy_geom(%s, %s)", mp->name, gp->name);
 	g_topology_assert();
 	/*
 	 * Orderly detachment.
 	 */
 	KASSERT(gp != NULL, ("NULL geom"));
 	pp = LIST_FIRST(&gp->provider);
 	KASSERT(pp != NULL, ("NULL provider"));
 	if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
 		return (EBUSY);
 	sc = gp->softc;
 	cp = LIST_FIRST(&gp->consumer);
 	KASSERT(cp != NULL, ("NULL consumer"));
 	sc->dead = 1;
 	wakeup(sc);
 	g_access(cp, -1, -1, -1);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	while (sc->dead != 2 && !LIST_EMPTY(&pp->consumers))
 		tsleep(sc, PRIBIO, "g_bdedie", hz);
 	mtx_destroy(&sc->worklist_mutex);
 	bzero(&sc->key, sizeof sc->key);
 	g_free(sc);
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static void
 g_bde_ctlreq(struct gctl_req *req, struct g_class *mp, char const *verb)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	if (!strcmp(verb, "create geom")) {
 		pp = gctl_get_provider(req, "provider");
 		if (pp != NULL)
 			g_bde_create_geom(req, mp, pp);
 	} else if (!strcmp(verb, "destroy geom")) {
 		gp = gctl_get_geom(req, mp, "geom");
 		if (gp != NULL)
 			g_bde_destroy_geom(req, mp, gp);
 	} else {
 		gctl_error(req, "unknown verb");
 	}
 }
 
 static struct g_class g_bde_class	= {
 	.name = BDE_CLASS_NAME,
 	.version = G_VERSION,
 	.destroy_geom = g_bde_destroy_geom,
 	.ctlreq = g_bde_ctlreq,
 	.start = g_bde_start,
 	.orphan = g_bde_orphan,
 	.access = g_bde_access,
 	.spoiled = g_std_spoiled,
 };
 
 DECLARE_GEOM_CLASS(g_bde_class, g_bde);
+MODULE_VERSION(geom_bde, 0);
Index: stable/11/sys/geom/cache/g_cache.c
===================================================================
--- stable/11/sys/geom/cache/g_cache.c	(revision 332639)
+++ stable/11/sys/geom/cache/g_cache.c	(revision 332640)
@@ -1,1016 +1,1017 @@
 /*-
  * Copyright (c) 2006 Ruslan Ermilov <ru@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/time.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 #include <geom/cache/g_cache.h>
 
 FEATURE(geom_cache, "GEOM cache module");
 
 static MALLOC_DEFINE(M_GCACHE, "gcache_data", "GEOM_CACHE Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, cache, CTLFLAG_RW, 0,
     "GEOM_CACHE stuff");
 static u_int g_cache_debug = 0;
 SYSCTL_UINT(_kern_geom_cache, OID_AUTO, debug, CTLFLAG_RW, &g_cache_debug, 0,
     "Debug level");
 static u_int g_cache_enable = 1;
 SYSCTL_UINT(_kern_geom_cache, OID_AUTO, enable, CTLFLAG_RW, &g_cache_enable, 0,
     "");
 static u_int g_cache_timeout = 10;
 SYSCTL_UINT(_kern_geom_cache, OID_AUTO, timeout, CTLFLAG_RW, &g_cache_timeout,
     0, "");
 static u_int g_cache_idletime = 5;
 SYSCTL_UINT(_kern_geom_cache, OID_AUTO, idletime, CTLFLAG_RW, &g_cache_idletime,
     0, "");
 static u_int g_cache_used_lo = 5;
 static u_int g_cache_used_hi = 20;
 static int
 sysctl_handle_pct(SYSCTL_HANDLER_ARGS)
 {
 	u_int val = *(u_int *)arg1;
 	int error;
 
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (val > 100)
 		return (EINVAL);
 	if ((arg1 == &g_cache_used_lo && val > g_cache_used_hi) ||
 	    (arg1 == &g_cache_used_hi && g_cache_used_lo > val))
 		return (EINVAL);
 	*(u_int *)arg1 = val;
 	return (0);
 }
 SYSCTL_PROC(_kern_geom_cache, OID_AUTO, used_lo, CTLTYPE_UINT|CTLFLAG_RW,
 	&g_cache_used_lo, 0, sysctl_handle_pct, "IU", "");
 SYSCTL_PROC(_kern_geom_cache, OID_AUTO, used_hi, CTLTYPE_UINT|CTLFLAG_RW,
 	&g_cache_used_hi, 0, sysctl_handle_pct, "IU", "");
 
 
 static int g_cache_destroy(struct g_cache_softc *sc, boolean_t force);
 static g_ctl_destroy_geom_t g_cache_destroy_geom;
 
 static g_taste_t g_cache_taste;
 static g_ctl_req_t g_cache_config;
 static g_dumpconf_t g_cache_dumpconf;
 
 struct g_class g_cache_class = {
 	.name = G_CACHE_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_cache_config,
 	.taste = g_cache_taste,
 	.destroy_geom = g_cache_destroy_geom
 };
 
 #define	OFF2BNO(off, sc)	((off) >> (sc)->sc_bshift)
 #define	BNO2OFF(bno, sc)	((bno) << (sc)->sc_bshift)
 
 
 static struct g_cache_desc *
 g_cache_alloc(struct g_cache_softc *sc)
 {
 	struct g_cache_desc *dp;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	if (!TAILQ_EMPTY(&sc->sc_usedlist)) {
 		dp = TAILQ_FIRST(&sc->sc_usedlist);
 		TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 		sc->sc_nused--;
 		dp->d_flags = 0;
 		LIST_REMOVE(dp, d_next);
 		return (dp);
 	}
 	if (sc->sc_nent > sc->sc_maxent) {
 		sc->sc_cachefull++;
 		return (NULL);
 	}
 	dp = malloc(sizeof(*dp), M_GCACHE, M_NOWAIT | M_ZERO);
 	if (dp == NULL)
 		return (NULL);
 	dp->d_data = uma_zalloc(sc->sc_zone, M_NOWAIT);
 	if (dp->d_data == NULL) {
 		free(dp, M_GCACHE);
 		return (NULL);
 	}
 	sc->sc_nent++;
 	return (dp);
 }
 
 static void
 g_cache_free(struct g_cache_softc *sc, struct g_cache_desc *dp)
 {
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	uma_zfree(sc->sc_zone, dp->d_data);
 	free(dp, M_GCACHE);
 	sc->sc_nent--;
 }
 
 static void
 g_cache_free_used(struct g_cache_softc *sc)
 {
 	struct g_cache_desc *dp;
 	u_int n;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	n = g_cache_used_lo * sc->sc_maxent / 100;
 	while (sc->sc_nused > n) {
 		KASSERT(!TAILQ_EMPTY(&sc->sc_usedlist), ("used list empty"));
 		dp = TAILQ_FIRST(&sc->sc_usedlist);
 		TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 		sc->sc_nused--;
 		LIST_REMOVE(dp, d_next);
 		g_cache_free(sc, dp);
 	}
 }
 
 static void
 g_cache_deliver(struct g_cache_softc *sc, struct bio *bp,
     struct g_cache_desc *dp, int error)
 {
 	off_t off1, off, len;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 	KASSERT(OFF2BNO(bp->bio_offset, sc) <= dp->d_bno, ("wrong entry"));
 	KASSERT(OFF2BNO(bp->bio_offset + bp->bio_length - 1, sc) >=
 	    dp->d_bno, ("wrong entry"));
 
 	off1 = BNO2OFF(dp->d_bno, sc);
 	off = MAX(bp->bio_offset, off1);
 	len = MIN(bp->bio_offset + bp->bio_length, off1 + sc->sc_bsize) - off;
 
 	if (bp->bio_error == 0)
 		bp->bio_error = error;
 	if (bp->bio_error == 0) {
 		bcopy(dp->d_data + (off - off1),
 		    bp->bio_data + (off - bp->bio_offset), len);
 	}
 	bp->bio_completed += len;
 	KASSERT(bp->bio_completed <= bp->bio_length, ("extra data"));
 	if (bp->bio_completed == bp->bio_length) {
 		if (bp->bio_error != 0)
 			bp->bio_completed = 0;
 		g_io_deliver(bp, bp->bio_error);
 	}
 
 	if (dp->d_flags & D_FLAG_USED) {
 		TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 		TAILQ_INSERT_TAIL(&sc->sc_usedlist, dp, d_used);
 	} else if (OFF2BNO(off + len, sc) > dp->d_bno) {
 		TAILQ_INSERT_TAIL(&sc->sc_usedlist, dp, d_used);
 		sc->sc_nused++;
 		dp->d_flags |= D_FLAG_USED;
 	}
 	dp->d_atime = time_uptime;
 }
 
 static void
 g_cache_done(struct bio *bp)
 {
 	struct g_cache_softc *sc;
 	struct g_cache_desc *dp;
 	struct bio *bp2, *tmpbp;
 
 	sc = bp->bio_from->geom->softc;
 	KASSERT(G_CACHE_DESC1(bp) == sc, ("corrupt bio_caller in g_cache_done()"));
 	dp = G_CACHE_DESC2(bp);
 	mtx_lock(&sc->sc_mtx);
 	bp2 = dp->d_biolist;
 	while (bp2 != NULL) {
 		KASSERT(G_CACHE_NEXT_BIO1(bp2) == sc, ("corrupt bio_driver in g_cache_done()"));
 		tmpbp = G_CACHE_NEXT_BIO2(bp2);
 		g_cache_deliver(sc, bp2, dp, bp->bio_error);
 		bp2 = tmpbp;
 	}
 	dp->d_biolist = NULL;
 	if (dp->d_flags & D_FLAG_INVALID) {
 		sc->sc_invalid--;
 		g_cache_free(sc, dp);
 	} else if (bp->bio_error) {
 		LIST_REMOVE(dp, d_next);
 		if (dp->d_flags & D_FLAG_USED) {
 			TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 			sc->sc_nused--;
 		}
 		g_cache_free(sc, dp);
 	}
 	mtx_unlock(&sc->sc_mtx);
 	g_destroy_bio(bp);
 }
 
 static struct g_cache_desc *
 g_cache_lookup(struct g_cache_softc *sc, off_t bno)
 {
 	struct g_cache_desc *dp;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	LIST_FOREACH(dp, &sc->sc_desclist[G_CACHE_BUCKET(bno)], d_next)
 		if (dp->d_bno == bno)
 			return (dp);
 	return (NULL);
 }
 
 static int
 g_cache_read(struct g_cache_softc *sc, struct bio *bp)
 {
 	struct bio *cbp;
 	struct g_cache_desc *dp;
 
 	mtx_lock(&sc->sc_mtx);
 	dp = g_cache_lookup(sc,
 	    OFF2BNO(bp->bio_offset + bp->bio_completed, sc));
 	if (dp != NULL) {
 		/* Add to waiters list or deliver. */
 		sc->sc_cachehits++;
 		if (dp->d_biolist != NULL) {
 			G_CACHE_NEXT_BIO1(bp) = sc;
 			G_CACHE_NEXT_BIO2(bp) = dp->d_biolist;
 			dp->d_biolist = bp;
 		} else
 			g_cache_deliver(sc, bp, dp, 0);
 		mtx_unlock(&sc->sc_mtx);
 		return (0);
 	}
 
 	/* Cache miss.  Allocate entry and schedule bio.  */
 	sc->sc_cachemisses++;
 	dp = g_cache_alloc(sc);
 	if (dp == NULL) {
 		mtx_unlock(&sc->sc_mtx);
 		return (ENOMEM);
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_cache_free(sc, dp);
 		mtx_unlock(&sc->sc_mtx);
 		return (ENOMEM);
 	}
 
 	dp->d_bno = OFF2BNO(bp->bio_offset + bp->bio_completed, sc);
 	G_CACHE_NEXT_BIO1(bp) = sc;
 	G_CACHE_NEXT_BIO2(bp) = NULL;
 	dp->d_biolist = bp;
 	LIST_INSERT_HEAD(&sc->sc_desclist[G_CACHE_BUCKET(dp->d_bno)],
 	    dp, d_next);
 	mtx_unlock(&sc->sc_mtx);
 
 	G_CACHE_DESC1(cbp) = sc;
 	G_CACHE_DESC2(cbp) = dp;
 	cbp->bio_done = g_cache_done;
 	cbp->bio_offset = BNO2OFF(dp->d_bno, sc);
 	cbp->bio_data = dp->d_data;
 	cbp->bio_length = sc->sc_bsize;
 	g_io_request(cbp, LIST_FIRST(&bp->bio_to->geom->consumer));
 	return (0);
 }
 
 static void
 g_cache_invalidate(struct g_cache_softc *sc, struct bio *bp)
 {
 	struct g_cache_desc *dp;
 	off_t bno, lim;
 
 	mtx_lock(&sc->sc_mtx);
 	bno = OFF2BNO(bp->bio_offset, sc);
 	lim = OFF2BNO(bp->bio_offset + bp->bio_length - 1, sc);
 	do {
 		if ((dp = g_cache_lookup(sc, bno)) != NULL) {
 			LIST_REMOVE(dp, d_next);
 			if (dp->d_flags & D_FLAG_USED) {
 				TAILQ_REMOVE(&sc->sc_usedlist, dp, d_used);
 				sc->sc_nused--;
 			}
 			if (dp->d_biolist == NULL)
 				g_cache_free(sc, dp);
 			else {
 				dp->d_flags = D_FLAG_INVALID;
 				sc->sc_invalid++;
 			}
 		}
 		bno++;
 	} while (bno <= lim);
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_cache_start(struct bio *bp)
 {
 	struct g_cache_softc *sc;
 	struct g_geom *gp;
 	struct g_cache_desc *dp;
 	struct bio *cbp;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 	G_CACHE_LOGREQ(bp, "Request received.");
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		sc->sc_reads++;
 		sc->sc_readbytes += bp->bio_length;
 		if (!g_cache_enable)
 			break;
 		if (bp->bio_offset + bp->bio_length > sc->sc_tail)
 			break;
 		if (OFF2BNO(bp->bio_offset, sc) ==
 		    OFF2BNO(bp->bio_offset + bp->bio_length - 1, sc)) {
 			sc->sc_cachereads++;
 			sc->sc_cachereadbytes += bp->bio_length;
 			if (g_cache_read(sc, bp) == 0)
 				return;
 			sc->sc_cachereads--;
 			sc->sc_cachereadbytes -= bp->bio_length;
 			break;
 		} else if (OFF2BNO(bp->bio_offset, sc) + 1 ==
 		    OFF2BNO(bp->bio_offset + bp->bio_length - 1, sc)) {
 			mtx_lock(&sc->sc_mtx);
 			dp = g_cache_lookup(sc, OFF2BNO(bp->bio_offset, sc));
 			if (dp == NULL || dp->d_biolist != NULL) {
 				mtx_unlock(&sc->sc_mtx);
 				break;
 			}
 			sc->sc_cachereads++;
 			sc->sc_cachereadbytes += bp->bio_length;
 			g_cache_deliver(sc, bp, dp, 0);
 			mtx_unlock(&sc->sc_mtx);
 			if (g_cache_read(sc, bp) == 0)
 				return;
 			sc->sc_cachereads--;
 			sc->sc_cachereadbytes -= bp->bio_length;
 			break;
 		}
 		break;
 	case BIO_WRITE:
 		sc->sc_writes++;
 		sc->sc_wrotebytes += bp->bio_length;
 		g_cache_invalidate(sc, bp);
 		break;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_std_done;
 	G_CACHE_LOGREQ(cbp, "Sending request.");
 	g_io_request(cbp, LIST_FIRST(&gp->consumer));
 }
 
 static void
 g_cache_go(void *arg)
 {
 	struct g_cache_softc *sc = arg;
 	struct g_cache_desc *dp;
 	int i;
 
 	mtx_assert(&sc->sc_mtx, MA_OWNED);
 
 	/* Forcibly mark idle ready entries as used. */
 	for (i = 0; i < G_CACHE_BUCKETS; i++) {
 		LIST_FOREACH(dp, &sc->sc_desclist[i], d_next) {
 			if (dp->d_flags & D_FLAG_USED ||
 			    dp->d_biolist != NULL ||
 			    time_uptime - dp->d_atime < g_cache_idletime)
 				continue;
 			TAILQ_INSERT_TAIL(&sc->sc_usedlist, dp, d_used);
 			sc->sc_nused++;
 			dp->d_flags |= D_FLAG_USED;
 		}
 	}
 
 	/* Keep the number of used entries low. */
 	if (sc->sc_nused > g_cache_used_hi * sc->sc_maxent / 100)
 		g_cache_free_used(sc);
 
 	callout_reset(&sc->sc_callout, g_cache_timeout * hz, g_cache_go, sc);
 }
 
 static int
 g_cache_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error;
 
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	error = g_access(cp, dr, dw, de);
 
 	return (error);
 }
 
 static void
 g_cache_orphan(struct g_consumer *cp)
 {
 
 	g_topology_assert();
 	g_cache_destroy(cp->geom->softc, 1);
 }
 
 static struct g_cache_softc *
 g_cache_find_device(struct g_class *mp, const char *name)
 {
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (strcmp(gp->name, name) == 0)
 			return (gp->softc);
 	}
 	return (NULL);
 }
 
 static struct g_geom *
 g_cache_create(struct g_class *mp, struct g_provider *pp,
     const struct g_cache_metadata *md, u_int type)
 {
 	struct g_cache_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *newpp;
 	struct g_consumer *cp;
 	u_int bshift;
 	int i;
 
 	g_topology_assert();
 
 	gp = NULL;
 	newpp = NULL;
 	cp = NULL;
 
 	G_CACHE_DEBUG(1, "Creating device %s.", md->md_name);
 
 	/* Cache size is minimum 100. */
 	if (md->md_size < 100) {
 		G_CACHE_DEBUG(0, "Invalid size for device %s.", md->md_name);
 		return (NULL);
 	}
 
 	/* Block size restrictions. */
 	bshift = ffs(md->md_bsize) - 1;
 	if (md->md_bsize == 0 || md->md_bsize > MAXPHYS ||
 	    md->md_bsize != 1 << bshift ||
 	    (md->md_bsize % pp->sectorsize) != 0) {
 		G_CACHE_DEBUG(0, "Invalid blocksize for provider %s.", pp->name);
 		return (NULL);
 	}
 
 	/* Check for duplicate unit. */
 	if (g_cache_find_device(mp, (const char *)&md->md_name) != NULL) {
 		G_CACHE_DEBUG(0, "Provider %s already exists.", md->md_name);
 		return (NULL);
 	}
 
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	sc->sc_type = type;
 	sc->sc_bshift = bshift;
 	sc->sc_bsize = 1 << bshift;
 	sc->sc_zone = uma_zcreate("gcache", sc->sc_bsize, NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	mtx_init(&sc->sc_mtx, "GEOM CACHE mutex", NULL, MTX_DEF);
 	for (i = 0; i < G_CACHE_BUCKETS; i++)
 		LIST_INIT(&sc->sc_desclist[i]);
 	TAILQ_INIT(&sc->sc_usedlist);
 	sc->sc_maxent = md->md_size;
 	callout_init_mtx(&sc->sc_callout, &sc->sc_mtx, 0);
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	gp->start = g_cache_start;
 	gp->orphan = g_cache_orphan;
 	gp->access = g_cache_access;
 	gp->dumpconf = g_cache_dumpconf;
 
 	newpp = g_new_providerf(gp, "cache/%s", gp->name);
 	newpp->sectorsize = pp->sectorsize;
 	newpp->mediasize = pp->mediasize;
 	if (type == G_CACHE_TYPE_AUTOMATIC)
 		newpp->mediasize -= pp->sectorsize;
 	sc->sc_tail = BNO2OFF(OFF2BNO(newpp->mediasize, sc), sc);
 
 	cp = g_new_consumer(gp);
 	if (g_attach(cp, pp) != 0) {
 		G_CACHE_DEBUG(0, "Cannot attach to provider %s.", pp->name);
 		g_destroy_consumer(cp);
 		g_destroy_provider(newpp);
 		mtx_destroy(&sc->sc_mtx);
 		g_free(sc);
 		g_destroy_geom(gp);
 		return (NULL);
 	}
 
 	g_error_provider(newpp, 0);
 	G_CACHE_DEBUG(0, "Device %s created.", gp->name);
 	callout_reset(&sc->sc_callout, g_cache_timeout * hz, g_cache_go, sc);
 	return (gp);
 }
 
 static int
 g_cache_destroy(struct g_cache_softc *sc, boolean_t force)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_cache_desc *dp, *dp2;
 	int i;
 
 	g_topology_assert();
 	if (sc == NULL)
 		return (ENXIO);
 	gp = sc->sc_geom;
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_CACHE_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_CACHE_DEBUG(1, "Device %s is still open (r%dw%de%d).",
 			    pp->name, pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	} else {
 		G_CACHE_DEBUG(0, "Device %s removed.", gp->name);
 	}
 	callout_drain(&sc->sc_callout);
 	mtx_lock(&sc->sc_mtx);
 	for (i = 0; i < G_CACHE_BUCKETS; i++) {
 		dp = LIST_FIRST(&sc->sc_desclist[i]);
 		while (dp != NULL) {
 			dp2 = LIST_NEXT(dp, d_next);
 			g_cache_free(sc, dp);
 			dp = dp2;
 		}
 	}
 	mtx_unlock(&sc->sc_mtx);
 	mtx_destroy(&sc->sc_mtx);
 	uma_zdestroy(sc->sc_zone);
 	g_free(sc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_cache_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 
 	return (g_cache_destroy(gp->softc, 0));
 }
 
 static int
 g_cache_read_metadata(struct g_consumer *cp, struct g_cache_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	/* Decode metadata. */
 	cache_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 static int
 g_cache_write_metadata(struct g_consumer *cp, struct g_cache_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 0, 1, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	buf = malloc((size_t)pp->sectorsize, M_GCACHE, M_WAITOK | M_ZERO);
 	cache_metadata_encode(md, buf);
 	g_topology_unlock();
 	error = g_write_data(cp, pp->mediasize - pp->sectorsize, buf, pp->sectorsize);
 	g_topology_lock();
 	g_access(cp, 0, -1, 0);
 	free(buf, M_GCACHE);
 
 	return (error);
 }
 
 static struct g_geom *
 g_cache_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_cache_metadata md;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	G_CACHE_DEBUG(3, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "cache:taste");
 	gp->start = g_cache_start;
 	gp->orphan = g_cache_orphan;
 	gp->access = g_cache_access;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_cache_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 
 	if (strcmp(md.md_magic, G_CACHE_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_CACHE_VERSION) {
 		printf("geom_cache.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 
 	gp = g_cache_create(mp, pp, &md, G_CACHE_TYPE_AUTOMATIC);
 	if (gp == NULL) {
 		G_CACHE_DEBUG(0, "Can't create %s.", md.md_name);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_cache_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_cache_metadata md;
 	struct g_provider *pp;
 	struct g_geom *gp;
 	intmax_t *bsize, *size;
 	const char *name;
 	int *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs != 2) {
 		gctl_error(req, "Invalid number of arguments.");
 		return;
 	}
 
 	strlcpy(md.md_magic, G_CACHE_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_CACHE_VERSION;
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg0' argument");
 		return;
 	}
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 
 	size = gctl_get_paraml(req, "size", sizeof(*size));
 	if (size == NULL) {
 		gctl_error(req, "No '%s' argument", "size");
 		return;
 	}
 	if ((u_int)*size < 100) {
 		gctl_error(req, "Invalid '%s' argument", "size");
 		return;
 	}
 	md.md_size = (u_int)*size;
 
 	bsize = gctl_get_paraml(req, "blocksize", sizeof(*bsize));
 	if (bsize == NULL) {
 		gctl_error(req, "No '%s' argument", "blocksize");
 		return;
 	}
 	if (*bsize < 0) {
 		gctl_error(req, "Invalid '%s' argument", "blocksize");
 		return;
 	}
 	md.md_bsize = (u_int)*bsize;
 
 	/* This field is not important here. */
 	md.md_provsize = 0;
 
 	name = gctl_get_asciiparam(req, "arg1");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg1' argument");
 		return;
 	}
 	if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 		name += strlen("/dev/");
 	pp = g_provider_by_name(name);
 	if (pp == NULL) {
 		G_CACHE_DEBUG(1, "Provider %s is invalid.", name);
 		gctl_error(req, "Provider %s is invalid.", name);
 		return;
 	}
 	gp = g_cache_create(mp, pp, &md, G_CACHE_TYPE_MANUAL);
 	if (gp == NULL) {
 		gctl_error(req, "Can't create %s.", md.md_name);
 		return;
 	}
 }
 
 static void
 g_cache_ctl_configure(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_cache_metadata md;
 	struct g_cache_softc *sc;
 	struct g_consumer *cp;
 	intmax_t *bsize, *size;
 	const char *name;
 	int error, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs != 1) {
 		gctl_error(req, "Missing device.");
 		return;
 	}
 
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg0' argument");
 		return;
 	}
 	sc = g_cache_find_device(mp, name);
 	if (sc == NULL) {
 		G_CACHE_DEBUG(1, "Device %s is invalid.", name);
 		gctl_error(req, "Device %s is invalid.", name);
 		return;
 	}
 
 	size = gctl_get_paraml(req, "size", sizeof(*size));
 	if (size == NULL) {
 		gctl_error(req, "No '%s' argument", "size");
 		return;
 	}
 	if ((u_int)*size != 0 && (u_int)*size < 100) {
 		gctl_error(req, "Invalid '%s' argument", "size");
 		return;
 	}
 	if ((u_int)*size != 0)
 		sc->sc_maxent = (u_int)*size;
 
 	bsize = gctl_get_paraml(req, "blocksize", sizeof(*bsize));
 	if (bsize == NULL) {
 		gctl_error(req, "No '%s' argument", "blocksize");
 		return;
 	}
 	if (*bsize < 0) {
 		gctl_error(req, "Invalid '%s' argument", "blocksize");
 		return;
 	}
 
 	if (sc->sc_type != G_CACHE_TYPE_AUTOMATIC)
 		return;
 
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 	strlcpy(md.md_magic, G_CACHE_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_CACHE_VERSION;
 	if ((u_int)*size != 0)
 		md.md_size = (u_int)*size;
 	else
 		md.md_size = sc->sc_maxent;
 	if ((u_int)*bsize != 0)
 		md.md_bsize = (u_int)*bsize;
 	else
 		md.md_bsize = sc->sc_bsize;
 	cp = LIST_FIRST(&sc->sc_geom->consumer);
 	md.md_provsize = cp->provider->mediasize;
 	error = g_cache_write_metadata(cp, &md);
 	if (error == 0)
 		G_CACHE_DEBUG(2, "Metadata on %s updated.", cp->provider->name);
 	else
 		G_CACHE_DEBUG(0, "Cannot update metadata on %s (error=%d).",
 		    cp->provider->name, error);
 }
 
 static void
 g_cache_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	int *nargs, *force, error, i;
 	struct g_cache_softc *sc;
 	const char *name;
 	char param[16];
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No 'force' argument");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		sc = g_cache_find_device(mp, name);
 		if (sc == NULL) {
 			G_CACHE_DEBUG(1, "Device %s is invalid.", name);
 			gctl_error(req, "Device %s is invalid.", name);
 			return;
 		}
 		error = g_cache_destroy(sc, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_cache_ctl_reset(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_cache_softc *sc;
 	const char *name;
 	char param[16];
 	int i, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		sc = g_cache_find_device(mp, name);
 		if (sc == NULL) {
 			G_CACHE_DEBUG(1, "Device %s is invalid.", name);
 			gctl_error(req, "Device %s is invalid.", name);
 			return;
 		}
 		sc->sc_reads = 0;
 		sc->sc_readbytes = 0;
 		sc->sc_cachereads = 0;
 		sc->sc_cachereadbytes = 0;
 		sc->sc_cachehits = 0;
 		sc->sc_cachemisses = 0;
 		sc->sc_cachefull = 0;
 		sc->sc_writes = 0;
 		sc->sc_wrotebytes = 0;
 	}
 }
 
 static void
 g_cache_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_CACHE_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_cache_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "configure") == 0) {
 		g_cache_ctl_configure(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0 ||
 	    strcmp(verb, "stop") == 0) {
 		g_cache_ctl_destroy(req, mp);
 		return;
 	} else if (strcmp(verb, "reset") == 0) {
 		g_cache_ctl_reset(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_cache_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_cache_softc *sc;
 
 	if (pp != NULL || cp != NULL)
 		return;
 	sc = gp->softc;
 	sbuf_printf(sb, "%s<Size>%u</Size>\n", indent, sc->sc_maxent);
 	sbuf_printf(sb, "%s<BlockSize>%u</BlockSize>\n", indent, sc->sc_bsize);
 	sbuf_printf(sb, "%s<TailOffset>%ju</TailOffset>\n", indent,
 	    (uintmax_t)sc->sc_tail);
 	sbuf_printf(sb, "%s<Entries>%u</Entries>\n", indent, sc->sc_nent);
 	sbuf_printf(sb, "%s<UsedEntries>%u</UsedEntries>\n", indent,
 	    sc->sc_nused);
 	sbuf_printf(sb, "%s<InvalidEntries>%u</InvalidEntries>\n", indent,
 	    sc->sc_invalid);
 	sbuf_printf(sb, "%s<Reads>%ju</Reads>\n", indent, sc->sc_reads);
 	sbuf_printf(sb, "%s<ReadBytes>%ju</ReadBytes>\n", indent,
 	    sc->sc_readbytes);
 	sbuf_printf(sb, "%s<CacheReads>%ju</CacheReads>\n", indent,
 	    sc->sc_cachereads);
 	sbuf_printf(sb, "%s<CacheReadBytes>%ju</CacheReadBytes>\n", indent,
 	    sc->sc_cachereadbytes);
 	sbuf_printf(sb, "%s<CacheHits>%ju</CacheHits>\n", indent,
 	    sc->sc_cachehits);
 	sbuf_printf(sb, "%s<CacheMisses>%ju</CacheMisses>\n", indent,
 	    sc->sc_cachemisses);
 	sbuf_printf(sb, "%s<CacheFull>%ju</CacheFull>\n", indent,
 	    sc->sc_cachefull);
 	sbuf_printf(sb, "%s<Writes>%ju</Writes>\n", indent, sc->sc_writes);
 	sbuf_printf(sb, "%s<WroteBytes>%ju</WroteBytes>\n", indent,
 	    sc->sc_wrotebytes);
 }
 
 DECLARE_GEOM_CLASS(g_cache_class, g_cache);
+MODULE_VERSION(geom_cache, 0);
Index: stable/11/sys/geom/concat/g_concat.c
===================================================================
--- stable/11/sys/geom/concat/g_concat.c	(revision 332639)
+++ stable/11/sys/geom/concat/g_concat.c	(revision 332640)
@@ -1,993 +1,994 @@
 /*-
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <geom/geom.h>
 #include <geom/concat/g_concat.h>
 
 FEATURE(geom_concat, "GEOM concatenation support");
 
 static MALLOC_DEFINE(M_CONCAT, "concat_data", "GEOM_CONCAT Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, concat, CTLFLAG_RW, 0,
     "GEOM_CONCAT stuff");
 static u_int g_concat_debug = 0;
 SYSCTL_UINT(_kern_geom_concat, OID_AUTO, debug, CTLFLAG_RWTUN, &g_concat_debug, 0,
     "Debug level");
 
 static int g_concat_destroy(struct g_concat_softc *sc, boolean_t force);
 static int g_concat_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 
 static g_taste_t g_concat_taste;
 static g_ctl_req_t g_concat_config;
 static g_dumpconf_t g_concat_dumpconf;
 
 struct g_class g_concat_class = {
 	.name = G_CONCAT_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_concat_config,
 	.taste = g_concat_taste,
 	.destroy_geom = g_concat_destroy_geom
 };
 
 
 /*
  * Greatest Common Divisor.
  */
 static u_int
 gcd(u_int a, u_int b)
 {
 	u_int c;
 
 	while (b != 0) {
 		c = a;
 		a = b;
 		b = (c % b);
 	}
 	return (a);
 }
 
 /*
  * Least Common Multiple.
  */
 static u_int
 lcm(u_int a, u_int b)
 {
 
 	return ((a * b) / gcd(a, b));
 }
 
 /*
  * Return the number of valid disks.
  */
 static u_int
 g_concat_nvalid(struct g_concat_softc *sc)
 {
 	u_int i, no;
 
 	no = 0;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		if (sc->sc_disks[i].d_consumer != NULL)
 			no++;
 	}
 
 	return (no);
 }
 
 static void
 g_concat_remove_disk(struct g_concat_disk *disk)
 {
 	struct g_consumer *cp;
 	struct g_concat_softc *sc;
 
 	g_topology_assert();
 	KASSERT(disk->d_consumer != NULL, ("Non-valid disk in %s.", __func__));
 	sc = disk->d_softc;
 	cp = disk->d_consumer;
 
 	if (!disk->d_removed) {
 		G_CONCAT_DEBUG(0, "Disk %s removed from %s.",
 		    cp->provider->name, sc->sc_name);
 		disk->d_removed = 1;
 	}
 
 	if (sc->sc_provider != NULL) {
 		G_CONCAT_DEBUG(0, "Device %s deactivated.",
 		    sc->sc_provider->name);
 		g_wither_provider(sc->sc_provider, ENXIO);
 		sc->sc_provider = NULL;
 	}
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		return;
 	disk->d_consumer = NULL;
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	/* If there are no valid disks anymore, remove device. */
 	if (LIST_EMPTY(&sc->sc_geom->consumer))
 		g_concat_destroy(sc, 1);
 }
 
 static void
 g_concat_orphan(struct g_consumer *cp)
 {
 	struct g_concat_softc *sc;
 	struct g_concat_disk *disk;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	disk = cp->private;
 	if (disk == NULL)	/* Possible? */
 		return;
 	g_concat_remove_disk(disk);
 }
 
 static int
 g_concat_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *cp1, *cp2, *tmp;
 	struct g_concat_disk *disk;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	gp = pp->geom;
 
 	/* On first open, grab an extra "exclusive" bit */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... and let go of it on last close */
 	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
 		de--;
 
 	LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) {
 		error = g_access(cp1, dr, dw, de);
 		if (error != 0)
 			goto fail;
 		disk = cp1->private;
 		if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 &&
 		    disk->d_removed) {
 			g_concat_remove_disk(disk); /* May destroy geom. */
 		}
 	}
 	return (0);
 
 fail:
 	LIST_FOREACH(cp2, &gp->consumer, consumer) {
 		if (cp1 == cp2)
 			break;
 		g_access(cp2, -dr, -dw, -de);
 	}
 	return (error);
 }
 
 static void
 g_concat_kernel_dump(struct bio *bp)
 {
 	struct g_concat_softc *sc;
 	struct g_concat_disk *disk;
 	struct bio *cbp;
 	struct g_kerneldump *gkd;
 	u_int i;
 
 	sc = bp->bio_to->geom->softc;
 	gkd = (struct g_kerneldump *)bp->bio_data;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		if (sc->sc_disks[i].d_start <= gkd->offset &&
 		    sc->sc_disks[i].d_end > gkd->offset)
 			break;
 	}
 	if (i == sc->sc_ndisks)
 		g_io_deliver(bp, EOPNOTSUPP);
 	disk = &sc->sc_disks[i];
 	gkd->offset -= disk->d_start;
 	if (gkd->length > disk->d_end - disk->d_start - gkd->offset)
 		gkd->length = disk->d_end - disk->d_start - gkd->offset;
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_std_done;
 	g_io_request(cbp, disk->d_consumer);
 	G_CONCAT_DEBUG(1, "Kernel dump will go to %s.",
 	    disk->d_consumer->provider->name);
 }
 
 static void
 g_concat_done(struct bio *bp)
 {
 	struct g_concat_softc *sc;
 	struct bio *pbp;
 
 	pbp = bp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	mtx_lock(&sc->sc_lock);
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	pbp->bio_completed += bp->bio_completed;
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
 		mtx_unlock(&sc->sc_lock);
 		g_io_deliver(pbp, pbp->bio_error);
 	} else
 		mtx_unlock(&sc->sc_lock);
 	g_destroy_bio(bp);
 }
 
 static void
 g_concat_flush(struct g_concat_softc *sc, struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	u_int no;
 
 	bioq_init(&queue);
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			while ((cbp = bioq_takefirst(&queue)) != NULL)
 				g_destroy_bio(cbp);
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		cbp->bio_done = g_concat_done;
 		cbp->bio_caller1 = sc->sc_disks[no].d_consumer;
 		cbp->bio_to = sc->sc_disks[no].d_consumer->provider;
 	}
 	while ((cbp = bioq_takefirst(&queue)) != NULL) {
 		G_CONCAT_LOGREQ(cbp, "Sending request.");
 		cp = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		g_io_request(cbp, cp);
 	}
 }
 
 static void
 g_concat_start(struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_concat_softc *sc;
 	struct g_concat_disk *disk;
 	struct g_provider *pp;
 	off_t offset, end, length, off, len;
 	struct bio *cbp;
 	char *addr;
 	u_int no;
 
 	pp = bp->bio_to;
 	sc = pp->geom->softc;
 	/*
 	 * If sc == NULL, provider's error should be set and g_concat_start()
 	 * should not be called at all.
 	 */
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 
 	G_CONCAT_LOGREQ(bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_FLUSH:
 		g_concat_flush(sc, bp);
 		return;
 	case BIO_GETATTR:
 		if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
 			g_concat_kernel_dump(bp);
 			return;
 		}
 		/* To which provider it should be delivered? */
 		/* FALLTHROUGH */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	offset = bp->bio_offset;
 	length = bp->bio_length;
 	if ((bp->bio_flags & BIO_UNMAPPED) != 0)
 		addr = NULL;
 	else
 		addr = bp->bio_data;
 	end = offset + length;
 
 	bioq_init(&queue);
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		disk = &sc->sc_disks[no];
 		if (disk->d_end <= offset)
 			continue;
 		if (disk->d_start >= end)
 			break;
 
 		off = offset - disk->d_start;
 		len = MIN(length, disk->d_end - offset);
 		length -= len;
 		offset += len;
 
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			while ((cbp = bioq_takefirst(&queue)) != NULL)
 				g_destroy_bio(cbp);
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		/*
 		 * Fill in the component buf structure.
 		 */
 		if (len == bp->bio_length)
 			cbp->bio_done = g_std_done;
 		else
 			cbp->bio_done = g_concat_done;
 		cbp->bio_offset = off;
 		cbp->bio_length = len;
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 			cbp->bio_ma_offset += (uintptr_t)addr;
 			cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
 			cbp->bio_ma_offset %= PAGE_SIZE;
 			cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
 			    cbp->bio_length) / PAGE_SIZE;
 		} else
 			cbp->bio_data = addr;
 		addr += len;
 		cbp->bio_to = disk->d_consumer->provider;
 		cbp->bio_caller1 = disk;
 
 		if (length == 0)
 			break;
 	}
 	KASSERT(length == 0,
 	    ("Length is still greater than 0 (class=%s, name=%s).",
 	    bp->bio_to->geom->class->name, bp->bio_to->geom->name));
 	while ((cbp = bioq_takefirst(&queue)) != NULL) {
 		G_CONCAT_LOGREQ(cbp, "Sending request.");
 		disk = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		g_io_request(cbp, disk->d_consumer);
 	}
 }
 
 static void
 g_concat_check_and_run(struct g_concat_softc *sc)
 {
 	struct g_concat_disk *disk;
 	struct g_provider *dp, *pp;
 	u_int no, sectorsize = 0;
 	off_t start;
 
 	g_topology_assert();
 	if (g_concat_nvalid(sc) != sc->sc_ndisks)
 		return;
 
 	pp = g_new_providerf(sc->sc_geom, "concat/%s", sc->sc_name);
 	pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE |
 	    G_PF_ACCEPT_UNMAPPED;
 	start = 0;
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		disk = &sc->sc_disks[no];
 		dp = disk->d_consumer->provider;
 		disk->d_start = start;
 		disk->d_end = disk->d_start + dp->mediasize;
 		if (sc->sc_type == G_CONCAT_TYPE_AUTOMATIC)
 			disk->d_end -= dp->sectorsize;
 		start = disk->d_end;
 		if (no == 0)
 			sectorsize = dp->sectorsize;
 		else
 			sectorsize = lcm(sectorsize, dp->sectorsize);
 
 		/* A provider underneath us doesn't support unmapped */
 		if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
 			G_CONCAT_DEBUG(1, "Cancelling unmapped "
 			    "because of %s.", dp->name);
 			pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
 		}
 	}
 	pp->sectorsize = sectorsize;
 	/* We have sc->sc_disks[sc->sc_ndisks - 1].d_end in 'start'. */
 	pp->mediasize = start;
 	pp->stripesize = sc->sc_disks[0].d_consumer->provider->stripesize;
 	pp->stripeoffset = sc->sc_disks[0].d_consumer->provider->stripeoffset;
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 
 	G_CONCAT_DEBUG(0, "Device %s activated.", sc->sc_provider->name);
 }
 
 static int
 g_concat_read_metadata(struct g_consumer *cp, struct g_concat_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	/* Decode metadata. */
 	concat_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /*
  * Add disk to given device.
  */
 static int
 g_concat_add_disk(struct g_concat_softc *sc, struct g_provider *pp, u_int no)
 {
 	struct g_concat_disk *disk;
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	/* Metadata corrupted? */
 	if (no >= sc->sc_ndisks)
 		return (EINVAL);
 
 	disk = &sc->sc_disks[no];
 	/* Check if disk is not already attached. */
 	if (disk->d_consumer != NULL)
 		return (EEXIST);
 
 	gp = sc->sc_geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
 		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 		if (error != 0) {
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			return (error);
 		}
 	}
 	if (sc->sc_type == G_CONCAT_TYPE_AUTOMATIC) {
 		struct g_concat_metadata md;
 
 		/* Re-read metadata. */
 		error = g_concat_read_metadata(cp, &md);
 		if (error != 0)
 			goto fail;
 
 		if (strcmp(md.md_magic, G_CONCAT_MAGIC) != 0 ||
 		    strcmp(md.md_name, sc->sc_name) != 0 ||
 		    md.md_id != sc->sc_id) {
 			G_CONCAT_DEBUG(0, "Metadata on %s changed.", pp->name);
 			goto fail;
 		}
 	}
 
 	cp->private = disk;
 	disk->d_consumer = cp;
 	disk->d_softc = sc;
 	disk->d_start = 0;	/* not yet */
 	disk->d_end = 0;	/* not yet */
 	disk->d_removed = 0;
 
 	G_CONCAT_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
 
 	g_concat_check_and_run(sc);
 
 	return (0);
 fail:
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
 		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	return (error);
 }
 
 static struct g_geom *
 g_concat_create(struct g_class *mp, const struct g_concat_metadata *md,
     u_int type)
 {
 	struct g_concat_softc *sc;
 	struct g_geom *gp;
 	u_int no;
 
 	G_CONCAT_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
 	    md->md_id);
 
 	/* One disks is minimum. */
 	if (md->md_all < 1)
 		return (NULL);
 
 	/* Check for duplicate unit */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
 			G_CONCAT_DEBUG(0, "Device %s already configured.",
 			    gp->name);
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_CONCAT, M_WAITOK | M_ZERO);
 	gp->start = g_concat_start;
 	gp->spoiled = g_concat_orphan;
 	gp->orphan = g_concat_orphan;
 	gp->access = g_concat_access;
 	gp->dumpconf = g_concat_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_disks = malloc(sizeof(struct g_concat_disk) * sc->sc_ndisks,
 	    M_CONCAT, M_WAITOK | M_ZERO);
 	for (no = 0; no < sc->sc_ndisks; no++)
 		sc->sc_disks[no].d_consumer = NULL;
 	sc->sc_type = type;
 	mtx_init(&sc->sc_lock, "gconcat lock", NULL, MTX_DEF);
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 
 	G_CONCAT_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
 
 	return (gp);
 }
 
 static int
 g_concat_destroy(struct g_concat_softc *sc, boolean_t force)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp, *cp1;
 	struct g_geom *gp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_CONCAT_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_CONCAT_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	gp = sc->sc_geom;
 	LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
 		g_concat_remove_disk(cp->private);
 		if (cp1 == NULL)
 			return (0);	/* Recursion happened. */
 	}
 	if (!LIST_EMPTY(&gp->consumer))
 		return (EINPROGRESS);
 
 	gp->softc = NULL;
 	KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
 	    gp->name));
 	free(sc->sc_disks, M_CONCAT);
 	mtx_destroy(&sc->sc_lock);
 	free(sc, M_CONCAT);
 
 	G_CONCAT_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static int
 g_concat_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_concat_softc *sc;
 
 	sc = gp->softc;
 	return (g_concat_destroy(sc, 0));
 }
 
 static struct g_geom *
 g_concat_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_concat_metadata md;
 	struct g_concat_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	/* Skip providers that are already open for writing. */
 	if (pp->acw > 0)
 		return (NULL);
 
 	G_CONCAT_DEBUG(3, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "concat:taste");
 	gp->start = g_concat_start;
 	gp->access = g_concat_access;
 	gp->orphan = g_concat_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_concat_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_CONCAT_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_CONCAT_VERSION) {
 		printf("geom_concat.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	/*
 	 * Backward compatibility:
 	 */
 	/* There was no md_provider field in earlier versions of metadata. */
 	if (md.md_version < 3)
 		bzero(md.md_provider, sizeof(md.md_provider));
 	/* There was no md_provsize field in earlier versions of metadata. */
 	if (md.md_version < 4)
 		md.md_provsize = pp->mediasize;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_type != G_CONCAT_TYPE_AUTOMATIC)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id)
 			continue;
 		break;
 	}
 	if (gp != NULL) {
 		G_CONCAT_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_concat_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_CONCAT_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			return (NULL);
 		}
 	} else {
 		gp = g_concat_create(mp, &md, G_CONCAT_TYPE_AUTOMATIC);
 		if (gp == NULL) {
 			G_CONCAT_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		G_CONCAT_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_concat_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_CONCAT_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			g_concat_destroy(sc, 1);
 			return (NULL);
 		}
 	}
 
 	return (gp);
 }
 
 static void
 g_concat_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	u_int attached, no;
 	struct g_concat_metadata md;
 	struct g_provider *pp;
 	struct g_concat_softc *sc;
 	struct g_geom *gp;
 	struct sbuf *sb;
 	const char *name;
 	char param[16];
 	int *nargs;
 
 	g_topology_assert();
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 
 	strlcpy(md.md_magic, G_CONCAT_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_CONCAT_VERSION;
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 	md.md_id = arc4random();
 	md.md_no = 0;
 	md.md_all = *nargs - 1;
 	bzero(md.md_provider, sizeof(md.md_provider));
 	/* This field is not important here. */
 	md.md_provsize = 0;
 
 	/* Check all providers are valid */
 	for (no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", no);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			G_CONCAT_DEBUG(1, "Disk %s is invalid.", name);
 			gctl_error(req, "Disk %s is invalid.", name);
 			return;
 		}
 	}
 
 	gp = g_concat_create(mp, &md, G_CONCAT_TYPE_MANUAL);
 	if (gp == NULL) {
 		gctl_error(req, "Can't configure %s.", md.md_name);
 		return;
 	}
 
 	sc = gp->softc;
 	sb = sbuf_new_auto();
 	sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
 	for (attached = 0, no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument.", no);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		KASSERT(pp != NULL, ("Provider %s disappear?!", name));
 		if (g_concat_add_disk(sc, pp, no - 1) != 0) {
 			G_CONCAT_DEBUG(1, "Disk %u (%s) not attached to %s.",
 			    no, pp->name, gp->name);
 			sbuf_printf(sb, " %s", pp->name);
 			continue;
 		}
 		attached++;
 	}
 	sbuf_finish(sb);
 	if (md.md_all != attached) {
 		g_concat_destroy(gp->softc, 1);
 		gctl_error(req, "%s", sbuf_data(sb));
 	}
 	sbuf_delete(sb);
 }
 
 static struct g_concat_softc *
 g_concat_find_device(struct g_class *mp, const char *name)
 {
 	struct g_concat_softc *sc;
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(sc->sc_name, name) == 0)
 			return (sc);
 	}
 	return (NULL);
 }
 
 static void
 g_concat_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_concat_softc *sc;
 	int *force, *nargs, error;
 	const char *name;
 	char param[16];
 	u_int i;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No '%s' argument.", "force");
 		return;
 	}
 
 	for (i = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			return;
 		}
 		sc = g_concat_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		error = g_concat_destroy(sc, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_concat_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_CONCAT_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_concat_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0 ||
 	    strcmp(verb, "stop") == 0) {
 		g_concat_ctl_destroy(req, mp);
 		return;
 	}
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_concat_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_concat_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		struct g_concat_disk *disk;
 
 		disk = cp->private;
 		if (disk == NULL)
 			return;
 		sbuf_printf(sb, "%s<End>%jd</End>\n", indent,
 		    (intmax_t)disk->d_end);
 		sbuf_printf(sb, "%s<Start>%jd</Start>\n", indent,
 		    (intmax_t)disk->d_start);
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<Type>", indent);
 		switch (sc->sc_type) {
 		case G_CONCAT_TYPE_AUTOMATIC:
 			sbuf_printf(sb, "AUTOMATIC");
 			break;
 		case G_CONCAT_TYPE_MANUAL:
 			sbuf_printf(sb, "MANUAL");
 			break;
 		default:
 			sbuf_printf(sb, "UNKNOWN");
 			break;
 		}
 		sbuf_printf(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
 		    indent, sc->sc_ndisks, g_concat_nvalid(sc));
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
 			sbuf_printf(sb, "UP");
 		else
 			sbuf_printf(sb, "DOWN");
 		sbuf_printf(sb, "</State>\n");
 	}
 }
 
 DECLARE_GEOM_CLASS(g_concat_class, g_concat);
+MODULE_VERSION(geom_concat, 0);
Index: stable/11/sys/geom/eli/g_eli.c
===================================================================
--- stable/11/sys/geom/eli/g_eli.c	(revision 332639)
+++ stable/11/sys/geom/eli/g_eli.c	(revision 332640)
@@ -1,1333 +1,1334 @@
 /*-
  * Copyright (c) 2005-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cons.h>
 #include <sys/kernel.h>
 #include <sys/linker.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/eventhandler.h>
 #include <sys/kthread.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/uio.h>
 #include <sys/vnode.h>
 
 #include <vm/uma.h>
 
 #include <geom/geom.h>
 #include <geom/eli/g_eli.h>
 #include <geom/eli/pkcs5v2.h>
 
 #include <crypto/intake.h>
 
 FEATURE(geom_eli, "GEOM crypto module");
 
 MALLOC_DEFINE(M_ELI, "eli data", "GEOM_ELI Data");
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW, 0, "GEOM_ELI stuff");
 static int g_eli_version = G_ELI_VERSION;
 SYSCTL_INT(_kern_geom_eli, OID_AUTO, version, CTLFLAG_RD, &g_eli_version, 0,
     "GELI version");
 int g_eli_debug = 0;
 SYSCTL_INT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RWTUN, &g_eli_debug, 0,
     "Debug level");
 static u_int g_eli_tries = 3;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RWTUN, &g_eli_tries, 0,
     "Number of tries for entering the passphrase");
 static u_int g_eli_visible_passphrase = GETS_NOECHO;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RWTUN,
     &g_eli_visible_passphrase, 0,
     "Visibility of passphrase prompt (0 = invisible, 1 = visible, 2 = asterisk)");
 u_int g_eli_overwrites = G_ELI_OVERWRITES;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RWTUN, &g_eli_overwrites,
     0, "Number of times on-disk keys should be overwritten when destroying them");
 static u_int g_eli_threads = 0;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RWTUN, &g_eli_threads, 0,
     "Number of threads doing crypto work");
 u_int g_eli_batch = 0;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, batch, CTLFLAG_RWTUN, &g_eli_batch, 0,
     "Use crypto operations batching");
 
 /*
  * Passphrase cached during boot, in order to be more user-friendly if
  * there are multiple providers using the same passphrase.
  */
 static char cached_passphrase[256];
 static u_int g_eli_boot_passcache = 1;
 TUNABLE_INT("kern.geom.eli.boot_passcache", &g_eli_boot_passcache);
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, boot_passcache, CTLFLAG_RD,
     &g_eli_boot_passcache, 0,
     "Passphrases are cached during boot process for possible reuse");
 static void
 fetch_loader_passphrase(void * dummy)
 {
 	char * env_passphrase;
 
 	KASSERT(dynamic_kenv, ("need dynamic kenv"));
 
 	if ((env_passphrase = kern_getenv("kern.geom.eli.passphrase")) != NULL) {
 		/* Extract passphrase from the environment. */
 		strlcpy(cached_passphrase, env_passphrase,
 		    sizeof(cached_passphrase));
 		freeenv(env_passphrase);
 
 		/* Wipe the passphrase from the environment. */
 		kern_unsetenv("kern.geom.eli.passphrase");
 	}
 }
 SYSINIT(geli_fetch_loader_passphrase, SI_SUB_KMEM + 1, SI_ORDER_ANY,
     fetch_loader_passphrase, NULL);
 
 static void
 zero_boot_passcache(void)
 {
 
         explicit_bzero(cached_passphrase, sizeof(cached_passphrase));
 }
 
 static void
 zero_geli_intake_keys(void)
 {
         struct keybuf *keybuf;
         int i;
 
         if ((keybuf = get_keybuf()) != NULL) {
                 /* Scan the key buffer, clear all GELI keys. */
                 for (i = 0; i < keybuf->kb_nents; i++) {
                          if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
                                  explicit_bzero(keybuf->kb_ents[i].ke_data,
                                      sizeof(keybuf->kb_ents[i].ke_data));
                                  keybuf->kb_ents[i].ke_type = KEYBUF_TYPE_NONE;
                          }
                 }
         }
 }
 
 static void
 zero_intake_passcache(void *dummy)
 {
         zero_boot_passcache();
         zero_geli_intake_keys();
 }
 EVENTHANDLER_DEFINE(mountroot, zero_intake_passcache, NULL, 0);
 
 static eventhandler_tag g_eli_pre_sync = NULL;
 
 static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static void g_eli_init(struct g_class *mp);
 static void g_eli_fini(struct g_class *mp);
 
 static g_taste_t g_eli_taste;
 static g_dumpconf_t g_eli_dumpconf;
 
 struct g_class g_eli_class = {
 	.name = G_ELI_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_eli_config,
 	.taste = g_eli_taste,
 	.destroy_geom = g_eli_destroy_geom,
 	.init = g_eli_init,
 	.fini = g_eli_fini
 };
 
 
 /*
  * Code paths:
  * BIO_READ:
  *	g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  * BIO_WRITE:
  *	g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 
 
 /*
  * EAGAIN from crypto(9) means, that we were probably balanced to another crypto
  * accelerator or something like this.
  * The function updates the SID and rerun the operation.
  */
 int
 g_eli_crypto_rerun(struct cryptop *crp)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct bio *bp;
 	int error;
 
 	bp = (struct bio *)crp->crp_opaque;
 	sc = bp->bio_to->geom->softc;
 	LIST_FOREACH(wr, &sc->sc_workers, w_next) {
 		if (wr->w_number == bp->bio_pflags)
 			break;
 	}
 	KASSERT(wr != NULL, ("Invalid worker (%u).", bp->bio_pflags));
 	G_ELI_DEBUG(1, "Rerunning crypto %s request (sid: %ju -> %ju).",
 	    bp->bio_cmd == BIO_READ ? "READ" : "WRITE", (uintmax_t)wr->w_sid,
 	    (uintmax_t)crp->crp_sid);
 	wr->w_sid = crp->crp_sid;
 	crp->crp_etype = 0;
 	error = crypto_dispatch(crp);
 	if (error == 0)
 		return (0);
 	G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error);
 	crp->crp_etype = error;
 	return (error);
 }
 
 static void
 g_eli_getattr_done(struct bio *bp)
 {
 	if (bp->bio_error == 0 && 
 	    !strcmp(bp->bio_attribute, "GEOM::physpath")) {
 		strlcat(bp->bio_data, "/eli", bp->bio_length);
 	}
 	g_std_done(bp);
 }
 
 /*
  * The function is called afer reading encrypted data from the provider.
  *
  * g_eli_start -> g_eli_crypto_read -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  */
 void
 g_eli_read_done(struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct bio *pbp;
 
 	G_ELI_LOGREQ(2, bp, "Request done.");
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0 && bp->bio_error != 0)
 		pbp->bio_error = bp->bio_error;
 	g_destroy_bio(bp);
 	/*
 	 * Do we have all sectors already?
 	 */
 	pbp->bio_inbed++;
 	if (pbp->bio_inbed < pbp->bio_children)
 		return;
 	sc = pbp->bio_to->geom->softc;
 	if (pbp->bio_error != 0) {
 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
 		    pbp->bio_error);
 		pbp->bio_completed = 0;
 		if (pbp->bio_driver2 != NULL) {
 			free(pbp->bio_driver2, M_ELI);
 			pbp->bio_driver2 = NULL;
 		}
 		g_io_deliver(pbp, pbp->bio_error);
 		atomic_subtract_int(&sc->sc_inflight, 1);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, pbp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 }
 
 /*
  * The function is called after we encrypt and write data.
  *
  * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver
  */
 void
 g_eli_write_done(struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct bio *pbp;
 
 	G_ELI_LOGREQ(2, bp, "Request done.");
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0 && bp->bio_error != 0)
 		pbp->bio_error = bp->bio_error;
 	g_destroy_bio(bp);
 	/*
 	 * Do we have all sectors already?
 	 */
 	pbp->bio_inbed++;
 	if (pbp->bio_inbed < pbp->bio_children)
 		return;
 	free(pbp->bio_driver2, M_ELI);
 	pbp->bio_driver2 = NULL;
 	if (pbp->bio_error != 0) {
 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
 		    pbp->bio_error);
 		pbp->bio_completed = 0;
 	} else
 		pbp->bio_completed = pbp->bio_length;
 
 	/*
 	 * Write is finished, send it up.
 	 */
 	sc = pbp->bio_to->geom->softc;
 	g_io_deliver(pbp, pbp->bio_error);
 	atomic_subtract_int(&sc->sc_inflight, 1);
 }
 
 /*
  * This function should never be called, but GEOM made as it set ->orphan()
  * method for every geom.
  */
 static void
 g_eli_orphan_spoil_assert(struct g_consumer *cp)
 {
 
 	panic("Function %s() called for %s.", __func__, cp->geom->name);
 }
 
 static void
 g_eli_orphan(struct g_consumer *cp)
 {
 	struct g_eli_softc *sc;
 
 	g_topology_assert();
 	sc = cp->geom->softc;
 	if (sc == NULL)
 		return;
 	g_eli_destroy(sc, TRUE);
 }
 
 /*
  * BIO_READ:
  *	G_ELI_START -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  * BIO_WRITE:
  *	G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 static void
 g_eli_start(struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	sc = bp->bio_to->geom->softc;
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_ELI_LOGREQ(2, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_GETATTR:
 	case BIO_FLUSH:
 	case BIO_ZONE:
 		break;
 	case BIO_DELETE:
 		/*
 		 * If the user hasn't set the NODELETE flag, we just pass
 		 * it down the stack and let the layers beneath us do (or
 		 * not) whatever they do with it.  If they have, we
 		 * reject it.  A possible extension would be an
 		 * additional flag to take it as a hint to shred the data
 		 * with [multiple?] overwrites.
 		 */
 		if (!(sc->sc_flags & G_ELI_FLAG_NODELETE))
 			break;
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	bp->bio_driver1 = cbp;
 	bp->bio_pflags = G_ELI_NEW_BIO;
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) {
 			g_eli_crypto_read(sc, bp, 0);
 			break;
 		}
 		/* FALLTHROUGH */
 	case BIO_WRITE:
 		mtx_lock(&sc->sc_queue_mtx);
 		bioq_insert_tail(&sc->sc_queue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 		wakeup(sc);
 		break;
 	case BIO_GETATTR:
 	case BIO_FLUSH:
 	case BIO_DELETE:
 	case BIO_ZONE:
 		if (bp->bio_cmd == BIO_GETATTR)
 			cbp->bio_done = g_eli_getattr_done;
 		else
 			cbp->bio_done = g_std_done;
 		cp = LIST_FIRST(&sc->sc_geom->consumer);
 		cbp->bio_to = cp->provider;
 		G_ELI_LOGREQ(2, cbp, "Sending request.");
 		g_io_request(cbp, cp);
 		break;
 	}
 }
 
 static int
 g_eli_newsession(struct g_eli_worker *wr)
 {
 	struct g_eli_softc *sc;
 	struct cryptoini crie, cria;
 	int error;
 
 	sc = wr->w_softc;
 
 	bzero(&crie, sizeof(crie));
 	crie.cri_alg = sc->sc_ealgo;
 	crie.cri_klen = sc->sc_ekeylen;
 	if (sc->sc_ealgo == CRYPTO_AES_XTS)
 		crie.cri_klen <<= 1;
 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) {
 		crie.cri_key = g_eli_key_hold(sc, 0,
 		    LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize);
 	} else {
 		crie.cri_key = sc->sc_ekey;
 	}
 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 		bzero(&cria, sizeof(cria));
 		cria.cri_alg = sc->sc_aalgo;
 		cria.cri_klen = sc->sc_akeylen;
 		cria.cri_key = sc->sc_akey;
 		crie.cri_next = &cria;
 	}
 
 	switch (sc->sc_crypto) {
 	case G_ELI_CRYPTO_SW:
 		error = crypto_newsession(&wr->w_sid, &crie,
 		    CRYPTOCAP_F_SOFTWARE);
 		break;
 	case G_ELI_CRYPTO_HW:
 		error = crypto_newsession(&wr->w_sid, &crie,
 		    CRYPTOCAP_F_HARDWARE);
 		break;
 	case G_ELI_CRYPTO_UNKNOWN:
 		error = crypto_newsession(&wr->w_sid, &crie,
 		    CRYPTOCAP_F_HARDWARE);
 		if (error == 0) {
 			mtx_lock(&sc->sc_queue_mtx);
 			if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
 				sc->sc_crypto = G_ELI_CRYPTO_HW;
 			mtx_unlock(&sc->sc_queue_mtx);
 		} else {
 			error = crypto_newsession(&wr->w_sid, &crie,
 			    CRYPTOCAP_F_SOFTWARE);
 			mtx_lock(&sc->sc_queue_mtx);
 			if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
 				sc->sc_crypto = G_ELI_CRYPTO_SW;
 			mtx_unlock(&sc->sc_queue_mtx);
 		}
 		break;
 	default:
 		panic("%s: invalid condition", __func__);
 	}
 
 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0)
 		g_eli_key_drop(sc, crie.cri_key);
 
 	return (error);
 }
 
 static void
 g_eli_freesession(struct g_eli_worker *wr)
 {
 
 	crypto_freesession(wr->w_sid);
 }
 
 static void
 g_eli_cancel(struct g_eli_softc *sc)
 {
 	struct bio *bp;
 
 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
 
 	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
 		KASSERT(bp->bio_pflags == G_ELI_NEW_BIO,
 		    ("Not new bio when canceling (bp=%p).", bp));
 		g_io_deliver(bp, ENXIO);
 	}
 }
 
 static struct bio *
 g_eli_takefirst(struct g_eli_softc *sc)
 {
 	struct bio *bp;
 
 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
 
 	if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND))
 		return (bioq_takefirst(&sc->sc_queue));
 	/*
 	 * Device suspended, so we skip new I/O requests.
 	 */
 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 		if (bp->bio_pflags != G_ELI_NEW_BIO)
 			break;
 	}
 	if (bp != NULL)
 		bioq_remove(&sc->sc_queue, bp);
 	return (bp);
 }
 
 /*
  * This is the main function for kernel worker thread when we don't have
  * hardware acceleration and we have to do cryptography in software.
  * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM
  * threads with crypto work.
  */
 static void
 g_eli_worker(void *arg)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct bio *bp;
 	int error;
 
 	wr = arg;
 	sc = wr->w_softc;
 #ifdef EARLY_AP_STARTUP
 	MPASS(!sc->sc_cpubind || smp_started);
 #elif defined(SMP)
 	/* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */
 	if (sc->sc_cpubind) {
 		while (!smp_started)
 			tsleep(wr, 0, "geli:smp", hz / 4);
 	}
 #endif
 	thread_lock(curthread);
 	sched_prio(curthread, PUSER);
 	if (sc->sc_cpubind)
 		sched_bind(curthread, wr->w_number % mp_ncpus);
 	thread_unlock(curthread);
 
 	G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm);
 
 	for (;;) {
 		mtx_lock(&sc->sc_queue_mtx);
 again:
 		bp = g_eli_takefirst(sc);
 		if (bp == NULL) {
 			if (sc->sc_flags & G_ELI_FLAG_DESTROY) {
 				g_eli_cancel(sc);
 				LIST_REMOVE(wr, w_next);
 				g_eli_freesession(wr);
 				free(wr, M_ELI);
 				G_ELI_DEBUG(1, "Thread %s exiting.",
 				    curthread->td_proc->p_comm);
 				wakeup(&sc->sc_workers);
 				mtx_unlock(&sc->sc_queue_mtx);
 				kproc_exit(0);
 			}
 			while (sc->sc_flags & G_ELI_FLAG_SUSPEND) {
 				if (sc->sc_inflight > 0) {
 					G_ELI_DEBUG(0, "inflight=%d",
 					    sc->sc_inflight);
 					/*
 					 * We still have inflight BIOs, so
 					 * sleep and retry.
 					 */
 					msleep(sc, &sc->sc_queue_mtx, PRIBIO,
 					    "geli:inf", hz / 5);
 					goto again;
 				}
 				/*
 				 * Suspend requested, mark the worker as
 				 * suspended and go to sleep.
 				 */
 				if (wr->w_active) {
 					g_eli_freesession(wr);
 					wr->w_active = FALSE;
 				}
 				wakeup(&sc->sc_workers);
 				msleep(sc, &sc->sc_queue_mtx, PRIBIO,
 				    "geli:suspend", 0);
 				if (!wr->w_active &&
 				    !(sc->sc_flags & G_ELI_FLAG_SUSPEND)) {
 					error = g_eli_newsession(wr);
 					KASSERT(error == 0,
 					    ("g_eli_newsession() failed on resume (error=%d)",
 					    error));
 					wr->w_active = TRUE;
 				}
 				goto again;
 			}
 			msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0);
 			continue;
 		}
 		if (bp->bio_pflags == G_ELI_NEW_BIO)
 			atomic_add_int(&sc->sc_inflight, 1);
 		mtx_unlock(&sc->sc_queue_mtx);
 		if (bp->bio_pflags == G_ELI_NEW_BIO) {
 			bp->bio_pflags = 0;
 			if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 				if (bp->bio_cmd == BIO_READ)
 					g_eli_auth_read(sc, bp);
 				else
 					g_eli_auth_run(wr, bp);
 			} else {
 				if (bp->bio_cmd == BIO_READ)
 					g_eli_crypto_read(sc, bp, 1);
 				else
 					g_eli_crypto_run(wr, bp);
 			}
 		} else {
 			if (sc->sc_flags & G_ELI_FLAG_AUTH)
 				g_eli_auth_run(wr, bp);
 			else
 				g_eli_crypto_run(wr, bp);
 		}
 	}
 }
 
 int
 g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
     struct g_eli_metadata *md)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	u_char *buf = NULL;
 	int error;
 
 	g_topology_assert();
 
 	gp = g_new_geomf(mp, "eli:taste");
 	gp->start = g_eli_start;
 	gp->access = g_std_access;
 	/*
 	 * g_eli_read_metadata() is always called from the event thread.
 	 * Our geom is created and destroyed in the same event, so there
 	 * could be no orphan nor spoil event in the meantime.
 	 */
 	gp->orphan = g_eli_orphan_spoil_assert;
 	gp->spoiled = g_eli_orphan_spoil_assert;
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	if (error != 0)
 		goto end;
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		goto end;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	if (buf == NULL)
 		goto end;
 	error = eli_metadata_decode(buf, md);
 	if (error != 0)
 		goto end;
 	/* Metadata was read and decoded successfully. */
 end:
 	if (buf != NULL)
 		g_free(buf);
 	if (cp->provider != NULL) {
 		if (cp->acr == 1)
 			g_access(cp, -1, 0, 0);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	return (error);
 }
 
 /*
  * The function is called when we had last close on provider and user requested
  * to close it when this situation occur.
  */
 static void
 g_eli_last_close(void *arg, int flags __unused)
 {
 	struct g_geom *gp;
 	char gpname[64];
 	int error;
 
 	g_topology_assert();
 	gp = arg;
 	strlcpy(gpname, gp->name, sizeof(gpname));
 	error = g_eli_destroy(gp->softc, TRUE);
 	KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).",
 	    gpname, error));
 	G_ELI_DEBUG(0, "Detached %s on last close.", gpname);
 }
 
 int
 g_eli_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_eli_softc *sc;
 	struct g_geom *gp;
 
 	gp = pp->geom;
 	sc = gp->softc;
 
 	if (dw > 0) {
 		if (sc->sc_flags & G_ELI_FLAG_RO) {
 			/* Deny write attempts. */
 			return (EROFS);
 		}
 		/* Someone is opening us for write, we need to remember that. */
 		sc->sc_flags |= G_ELI_FLAG_WOPEN;
 		return (0);
 	}
 	/* Is this the last close? */
 	if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0)
 		return (0);
 
 	/*
 	 * Automatically detach on last close if requested.
 	 */
 	if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) ||
 	    (sc->sc_flags & G_ELI_FLAG_WOPEN)) {
 		g_post_event(g_eli_last_close, gp, M_WAITOK, NULL);
 	}
 	return (0);
 }
 
 static int
 g_eli_cpu_is_disabled(int cpu)
 {
 #ifdef SMP
 	return (CPU_ISSET(cpu, &hlt_cpus_mask));
 #else
 	return (0);
 #endif
 }
 
 struct g_geom *
 g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
     const struct g_eli_metadata *md, const u_char *mkey, int nkey)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	u_int i, threads;
 	int error;
 
 	G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX);
 
 	gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX);
 	sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO);
 	gp->start = g_eli_start;
 	/*
 	 * Spoiling can happen even though we have the provider open
 	 * exclusively, e.g. through media change events.
 	 */
 	gp->spoiled = g_eli_orphan;
 	gp->orphan = g_eli_orphan;
 	gp->dumpconf = g_eli_dumpconf;
 	/*
 	 * If detach-on-last-close feature is not enabled and we don't operate
 	 * on read-only provider, we can simply use g_std_access().
 	 */
 	if (md->md_flags & (G_ELI_FLAG_WO_DETACH | G_ELI_FLAG_RO))
 		gp->access = g_eli_access;
 	else
 		gp->access = g_std_access;
 
 	eli_metadata_softc(sc, md, bpp->sectorsize, bpp->mediasize);
 	sc->sc_nkey = nkey;
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 
 	bioq_init(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF);
 	mtx_init(&sc->sc_ekeys_lock, "geli:ekeys", NULL, MTX_DEF);
 
 	pp = NULL;
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, bpp);
 	if (error != 0) {
 		if (req != NULL) {
 			gctl_error(req, "Cannot attach to %s (error=%d).",
 			    bpp->name, error);
 		} else {
 			G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).",
 			    bpp->name, error);
 		}
 		goto failed;
 	}
 	/*
 	 * Keep provider open all the time, so we can run critical tasks,
 	 * like Master Keys deletion, without wondering if we can open
 	 * provider or not.
 	 * We don't open provider for writing only when user requested read-only
 	 * access.
 	 */
 	if (sc->sc_flags & G_ELI_FLAG_RO)
 		error = g_access(cp, 1, 0, 1);
 	else
 		error = g_access(cp, 1, 1, 1);
 	if (error != 0) {
 		if (req != NULL) {
 			gctl_error(req, "Cannot access %s (error=%d).",
 			    bpp->name, error);
 		} else {
 			G_ELI_DEBUG(1, "Cannot access %s (error=%d).",
 			    bpp->name, error);
 		}
 		goto failed;
 	}
 
 	/*
 	 * Remember the keys in our softc structure.
 	 */
 	g_eli_mkey_propagate(sc, mkey);
 
 	LIST_INIT(&sc->sc_workers);
 
 	threads = g_eli_threads;
 	if (threads == 0)
 		threads = mp_ncpus;
 	sc->sc_cpubind = (mp_ncpus > 1 && threads == mp_ncpus);
 	for (i = 0; i < threads; i++) {
 		if (g_eli_cpu_is_disabled(i)) {
 			G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.",
 			    bpp->name, i);
 			continue;
 		}
 		wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO);
 		wr->w_softc = sc;
 		wr->w_number = i;
 		wr->w_active = TRUE;
 
 		error = g_eli_newsession(wr);
 		if (error != 0) {
 			free(wr, M_ELI);
 			if (req != NULL) {
 				gctl_error(req, "Cannot set up crypto session "
 				    "for %s (error=%d).", bpp->name, error);
 			} else {
 				G_ELI_DEBUG(1, "Cannot set up crypto session "
 				    "for %s (error=%d).", bpp->name, error);
 			}
 			goto failed;
 		}
 
 		error = kproc_create(g_eli_worker, wr, &wr->w_proc, 0, 0,
 		    "g_eli[%u] %s", i, bpp->name);
 		if (error != 0) {
 			g_eli_freesession(wr);
 			free(wr, M_ELI);
 			if (req != NULL) {
 				gctl_error(req, "Cannot create kernel thread "
 				    "for %s (error=%d).", bpp->name, error);
 			} else {
 				G_ELI_DEBUG(1, "Cannot create kernel thread "
 				    "for %s (error=%d).", bpp->name, error);
 			}
 			goto failed;
 		}
 		LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next);
 	}
 
 	/*
 	 * Create decrypted provider.
 	 */
 	pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX);
 	pp->mediasize = sc->sc_mediasize;
 	pp->sectorsize = sc->sc_sectorsize;
 
 	g_error_provider(pp, 0);
 
 	G_ELI_DEBUG(0, "Device %s created.", pp->name);
 	G_ELI_DEBUG(0, "Encryption: %s %u", g_eli_algo2str(sc->sc_ealgo),
 	    sc->sc_ekeylen);
 	if (sc->sc_flags & G_ELI_FLAG_AUTH)
 		G_ELI_DEBUG(0, " Integrity: %s", g_eli_algo2str(sc->sc_aalgo));
 	G_ELI_DEBUG(0, "    Crypto: %s",
 	    sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware");
 	return (gp);
 failed:
 	mtx_lock(&sc->sc_queue_mtx);
 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
 	wakeup(sc);
 	/*
 	 * Wait for kernel threads self destruction.
 	 */
 	while (!LIST_EMPTY(&sc->sc_workers)) {
 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
 		    "geli:destroy", 0);
 	}
 	mtx_destroy(&sc->sc_queue_mtx);
 	if (cp->provider != NULL) {
 		if (cp->acr == 1)
 			g_access(cp, -1, -1, -1);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	g_eli_key_destroy(sc);
 	bzero(sc, sizeof(*sc));
 	free(sc, M_ELI);
 	return (NULL);
 }
 
 int
 g_eli_destroy(struct g_eli_softc *sc, boolean_t force)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	gp = sc->sc_geom;
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_ELI_DEBUG(1, "Device %s is still open, so it "
 			    "cannot be definitely removed.", pp->name);
 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
 			gp->access = g_eli_access;
 			g_wither_provider(pp, ENXIO);
 			return (EBUSY);
 		} else {
 			G_ELI_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	mtx_lock(&sc->sc_queue_mtx);
 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
 	wakeup(sc);
 	while (!LIST_EMPTY(&sc->sc_workers)) {
 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
 		    "geli:destroy", 0);
 	}
 	mtx_destroy(&sc->sc_queue_mtx);
 	gp->softc = NULL;
 	g_eli_key_destroy(sc);
 	bzero(sc, sizeof(*sc));
 	free(sc, M_ELI);
 
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		G_ELI_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom_close(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_eli_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_eli_softc *sc;
 
 	sc = gp->softc;
 	return (g_eli_destroy(sc, FALSE));
 }
 
 static int
 g_eli_keyfiles_load(struct hmac_ctx *ctx, const char *provider)
 {
 	u_char *keyfile, *data;
 	char *file, name[64];
 	size_t size;
 	int i;
 
 	for (i = 0; ; i++) {
 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
 		keyfile = preload_search_by_type(name);
 		if (keyfile == NULL && i == 0) {
 			/*
 			 * If there is only one keyfile, allow simpler name.
 			 */
 			snprintf(name, sizeof(name), "%s:geli_keyfile", provider);
 			keyfile = preload_search_by_type(name);
 		}
 		if (keyfile == NULL)
 			return (i);	/* Return number of loaded keyfiles. */
 		data = preload_fetch_addr(keyfile);
 		if (data == NULL) {
 			G_ELI_DEBUG(0, "Cannot find key file data for %s.",
 			    name);
 			return (0);
 		}
 		size = preload_fetch_size(keyfile);
 		if (size == 0) {
 			G_ELI_DEBUG(0, "Cannot find key file size for %s.",
 			    name);
 			return (0);
 		}
 		file = preload_search_info(keyfile, MODINFO_NAME);
 		if (file == NULL) {
 			G_ELI_DEBUG(0, "Cannot find key file name for %s.",
 			    name);
 			return (0);
 		}
 		G_ELI_DEBUG(1, "Loaded keyfile %s for %s (type: %s).", file,
 		    provider, name);
 		g_eli_crypto_hmac_update(ctx, data, size);
 	}
 }
 
 static void
 g_eli_keyfiles_clear(const char *provider)
 {
 	u_char *keyfile, *data;
 	char name[64];
 	size_t size;
 	int i;
 
 	for (i = 0; ; i++) {
 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
 		keyfile = preload_search_by_type(name);
 		if (keyfile == NULL)
 			return;
 		data = preload_fetch_addr(keyfile);
 		size = preload_fetch_size(keyfile);
 		if (data != NULL && size != 0)
 			bzero(data, size);
 	}
 }
 
 /*
  * Tasting is only made on boot.
  * We detect providers which should be attached before root is mounted.
  */
 static struct g_geom *
 g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_eli_metadata md;
 	struct g_geom *gp;
 	struct hmac_ctx ctx;
 	char passphrase[256];
 	u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN];
 	u_int i, nkey, nkeyfiles, tries, showpass;
 	int error;
         struct keybuf *keybuf;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	if (root_mounted() || g_eli_tries == 0)
 		return (NULL);
 
 	G_ELI_DEBUG(3, "Tasting %s.", pp->name);
 
 	error = g_eli_read_metadata(mp, pp, &md);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_ELI_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_ELI_VERSION) {
 		printf("geom_eli.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 	/* Should we attach it on boot? */
 	if (!(md.md_flags & G_ELI_FLAG_BOOT))
 		return (NULL);
 	if (md.md_keys == 0x00) {
 		G_ELI_DEBUG(0, "No valid keys on %s.", pp->name);
 		return (NULL);
 	}
 	if (md.md_iterations == -1) {
 		/* If there is no passphrase, we try only once. */
 		tries = 1;
 	} else {
 		/* Ask for the passphrase no more than g_eli_tries times. */
 		tries = g_eli_tries;
 	}
 
         if ((keybuf = get_keybuf()) != NULL) {
                 /* Scan the key buffer, try all GELI keys. */
                 for (i = 0; i < keybuf->kb_nents; i++) {
                          if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
                                  memcpy(key, keybuf->kb_ents[i].ke_data,
                                      sizeof(key));
 
                                  if (g_eli_mkey_decrypt(&md, key,
                                      mkey, &nkey) == 0 ) {
                                          explicit_bzero(key, sizeof(key));
                                          goto have_key;
                                  }
                          }
                 }
         }
 
         for (i = 0; i <= tries; i++) {
                 g_eli_crypto_hmac_init(&ctx, NULL, 0);
 
                 /*
                  * Load all key files.
                  */
                 nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name);
 
                 if (nkeyfiles == 0 && md.md_iterations == -1) {
                         /*
                          * No key files and no passphrase, something is
                          * definitely wrong here.
                          * geli(8) doesn't allow for such situation, so assume
                          * that there was really no passphrase and in that case
                          * key files are no properly defined in loader.conf.
                          */
                         G_ELI_DEBUG(0,
                             "Found no key files in loader.conf for %s.",
                             pp->name);
                         return (NULL);
                 }
 
                 /* Ask for the passphrase if defined. */
                 if (md.md_iterations >= 0) {
                         /* Try first with cached passphrase. */
                         if (i == 0) {
                                 if (!g_eli_boot_passcache)
                                         continue;
                                 memcpy(passphrase, cached_passphrase,
                                     sizeof(passphrase));
                         } else {
                                 printf("Enter passphrase for %s: ", pp->name);
 				showpass = g_eli_visible_passphrase;
 				if ((md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS) != 0)
 					showpass = GETS_ECHOPASS;
                                 cngets(passphrase, sizeof(passphrase),
 				    showpass);
                                 memcpy(cached_passphrase, passphrase,
                                     sizeof(passphrase));
                         }
                 }
 
                 /*
                  * Prepare Derived-Key from the user passphrase.
                  */
                 if (md.md_iterations == 0) {
                         g_eli_crypto_hmac_update(&ctx, md.md_salt,
                             sizeof(md.md_salt));
                         g_eli_crypto_hmac_update(&ctx, passphrase,
                             strlen(passphrase));
                         explicit_bzero(passphrase, sizeof(passphrase));
                 } else if (md.md_iterations > 0) {
                         u_char dkey[G_ELI_USERKEYLEN];
 
                         pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt,
                             sizeof(md.md_salt), passphrase, md.md_iterations);
                         bzero(passphrase, sizeof(passphrase));
                         g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey));
                         explicit_bzero(dkey, sizeof(dkey));
                 }
 
                 g_eli_crypto_hmac_final(&ctx, key, 0);
 
                 /*
                  * Decrypt Master-Key.
                  */
                 error = g_eli_mkey_decrypt(&md, key, mkey, &nkey);
                 bzero(key, sizeof(key));
                 if (error == -1) {
                         if (i == tries) {
                                 G_ELI_DEBUG(0,
                                     "Wrong key for %s. No tries left.",
                                     pp->name);
                                 g_eli_keyfiles_clear(pp->name);
                                 return (NULL);
                         }
                         if (i > 0) {
                                 G_ELI_DEBUG(0,
                                     "Wrong key for %s. Tries left: %u.",
                                     pp->name, tries - i);
                         }
                         /* Try again. */
                         continue;
                 } else if (error > 0) {
                         G_ELI_DEBUG(0,
                             "Cannot decrypt Master Key for %s (error=%d).",
                             pp->name, error);
                         g_eli_keyfiles_clear(pp->name);
                         return (NULL);
                 }
                 g_eli_keyfiles_clear(pp->name);
                 G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
                 break;
         }
 have_key:
 
 	/*
 	 * We have correct key, let's attach provider.
 	 */
 	gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey);
 	bzero(mkey, sizeof(mkey));
 	bzero(&md, sizeof(md));
 	if (gp == NULL) {
 		G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name,
 		    G_ELI_SUFFIX);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_eli_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL || cp != NULL)
 		return;	/* Nothing here. */
 
 	sbuf_printf(sb, "%s<KeysTotal>%ju</KeysTotal>\n", indent,
 	    (uintmax_t)sc->sc_ekeys_total);
 	sbuf_printf(sb, "%s<KeysAllocated>%ju</KeysAllocated>\n", indent,
 	    (uintmax_t)sc->sc_ekeys_allocated);
 	sbuf_printf(sb, "%s<Flags>", indent);
 	if (sc->sc_flags == 0)
 		sbuf_printf(sb, "NONE");
 	else {
 		int first = 1;
 
 #define ADD_FLAG(flag, name)	do {					\
 	if (sc->sc_flags & (flag)) {					\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 		ADD_FLAG(G_ELI_FLAG_SUSPEND, "SUSPEND");
 		ADD_FLAG(G_ELI_FLAG_SINGLE_KEY, "SINGLE-KEY");
 		ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER");
 		ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME");
 		ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT");
 		ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH");
 		ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH");
 		ADD_FLAG(G_ELI_FLAG_AUTH, "AUTH");
 		ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN");
 		ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY");
 		ADD_FLAG(G_ELI_FLAG_RO, "READ-ONLY");
 		ADD_FLAG(G_ELI_FLAG_NODELETE, "NODELETE");
 		ADD_FLAG(G_ELI_FLAG_GELIBOOT, "GELIBOOT");
 		ADD_FLAG(G_ELI_FLAG_GELIDISPLAYPASS, "GELIDISPLAYPASS");
 #undef  ADD_FLAG
 	}
 	sbuf_printf(sb, "</Flags>\n");
 
 	if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) {
 		sbuf_printf(sb, "%s<UsedKey>%u</UsedKey>\n", indent,
 		    sc->sc_nkey);
 	}
 	sbuf_printf(sb, "%s<Version>%u</Version>\n", indent, sc->sc_version);
 	sbuf_printf(sb, "%s<Crypto>", indent);
 	switch (sc->sc_crypto) {
 	case G_ELI_CRYPTO_HW:
 		sbuf_printf(sb, "hardware");
 		break;
 	case G_ELI_CRYPTO_SW:
 		sbuf_printf(sb, "software");
 		break;
 	default:
 		sbuf_printf(sb, "UNKNOWN");
 		break;
 	}
 	sbuf_printf(sb, "</Crypto>\n");
 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 		sbuf_printf(sb,
 		    "%s<AuthenticationAlgorithm>%s</AuthenticationAlgorithm>\n",
 		    indent, g_eli_algo2str(sc->sc_aalgo));
 	}
 	sbuf_printf(sb, "%s<KeyLength>%u</KeyLength>\n", indent,
 	    sc->sc_ekeylen);
 	sbuf_printf(sb, "%s<EncryptionAlgorithm>%s</EncryptionAlgorithm>\n",
 	    indent, g_eli_algo2str(sc->sc_ealgo));
 	sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 	    (sc->sc_flags & G_ELI_FLAG_SUSPEND) ? "SUSPENDED" : "ACTIVE");
 }
 
 static void
 g_eli_shutdown_pre_sync(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_provider *pp;
 	struct g_eli_softc *sc;
 	int error;
 
 	mp = arg;
 	g_topology_lock();
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		pp = LIST_FIRST(&gp->provider);
 		KASSERT(pp != NULL, ("No provider? gp=%p (%s)", gp, gp->name));
 		if (pp->acr + pp->acw + pp->ace == 0)
 			error = g_eli_destroy(sc, TRUE);
 		else {
 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
 			gp->access = g_eli_access;
 		}
 	}
 	g_topology_unlock();
 }
 
 static void
 g_eli_init(struct g_class *mp)
 {
 
 	g_eli_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
 	    g_eli_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_eli_pre_sync == NULL)
 		G_ELI_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_eli_fini(struct g_class *mp)
 {
 
 	if (g_eli_pre_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_eli_pre_sync);
 }
 
 DECLARE_GEOM_CLASS(g_eli_class, g_eli);
 MODULE_DEPEND(g_eli, crypto, 1, 1, 1);
+MODULE_VERSION(geom_eli, 0);
Index: stable/11/sys/geom/gate/g_gate.c
===================================================================
--- stable/11/sys/geom/gate/g_gate.c	(revision 332639)
+++ stable/11/sys/geom/gate/g_gate.c	(revision 332640)
@@ -1,964 +1,965 @@
 /*-
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * Copyright (c) 2009-2010 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by Pawel Jakub Dawidek
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/fcntl.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/limits.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/signalvar.h>
 #include <sys/time.h>
 #include <machine/atomic.h>
 
 #include <geom/geom.h>
 #include <geom/gate/g_gate.h>
 
 FEATURE(geom_gate, "GEOM Gate module");
 
 static MALLOC_DEFINE(M_GATE, "gg_data", "GEOM Gate Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, gate, CTLFLAG_RW, 0,
     "GEOM_GATE configuration");
 static int g_gate_debug = 0;
 SYSCTL_INT(_kern_geom_gate, OID_AUTO, debug, CTLFLAG_RWTUN, &g_gate_debug, 0,
     "Debug level");
 static u_int g_gate_maxunits = 256;
 SYSCTL_UINT(_kern_geom_gate, OID_AUTO, maxunits, CTLFLAG_RDTUN,
     &g_gate_maxunits, 0, "Maximum number of ggate devices");
 
 struct g_class g_gate_class = {
 	.name = G_GATE_CLASS_NAME,
 	.version = G_VERSION,
 };
 
 static struct cdev *status_dev;
 static d_ioctl_t g_gate_ioctl;
 static struct cdevsw g_gate_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_ioctl =	g_gate_ioctl,
 	.d_name =	G_GATE_CTL_NAME
 };
 
 
 static struct g_gate_softc **g_gate_units;
 static u_int g_gate_nunits;
 static struct mtx g_gate_units_lock;
 
 static int
 g_gate_destroy(struct g_gate_softc *sc, boolean_t force)
 {
 	struct bio_queue_head queue;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	struct bio *bp;
 
 	g_topology_assert();
 	mtx_assert(&g_gate_units_lock, MA_OWNED);
 	pp = sc->sc_provider;
 	if (!force && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		mtx_unlock(&g_gate_units_lock);
 		return (EBUSY);
 	}
 	mtx_unlock(&g_gate_units_lock);
 	mtx_lock(&sc->sc_queue_mtx);
 	if ((sc->sc_flags & G_GATE_FLAG_DESTROY) == 0)
 		sc->sc_flags |= G_GATE_FLAG_DESTROY;
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 	gp = pp->geom;
 	g_wither_provider(pp, ENXIO);
 	callout_drain(&sc->sc_callout);
 	bioq_init(&queue);
 	mtx_lock(&sc->sc_queue_mtx);
 	while ((bp = bioq_takefirst(&sc->sc_inqueue)) != NULL) {
 		sc->sc_queue_count--;
 		bioq_insert_tail(&queue, bp);
 	}
 	while ((bp = bioq_takefirst(&sc->sc_outqueue)) != NULL) {
 		sc->sc_queue_count--;
 		bioq_insert_tail(&queue, bp);
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	g_topology_unlock();
 	while ((bp = bioq_takefirst(&queue)) != NULL) {
 		G_GATE_LOGREQ(1, bp, "Request canceled.");
 		g_io_deliver(bp, ENXIO);
 	}
 	mtx_lock(&g_gate_units_lock);
 	/* One reference is ours. */
 	sc->sc_ref--;
 	while (sc->sc_ref > 0)
 		msleep(&sc->sc_ref, &g_gate_units_lock, 0, "gg:destroy", 0);
 	g_gate_units[sc->sc_unit] = NULL;
 	KASSERT(g_gate_nunits > 0, ("negative g_gate_nunits?"));
 	g_gate_nunits--;
 	mtx_unlock(&g_gate_units_lock);
 	mtx_destroy(&sc->sc_queue_mtx);
 	g_topology_lock();
 	if ((cp = sc->sc_readcons) != NULL) {
 		sc->sc_readcons = NULL;
 		(void)g_access(cp, -1, 0, 0);
 		g_detach(cp);
 		g_destroy_consumer(cp);
 	}
 	G_GATE_DEBUG(1, "Device %s destroyed.", gp->name);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 	sc->sc_provider = NULL;
 	free(sc, M_GATE);
 	return (0);
 }
 
 static int
 g_gate_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_gate_softc *sc;
 
 	if (dr <= 0 && dw <= 0 && de <= 0)
 		return (0);
 	sc = pp->geom->softc;
 	if (sc == NULL || (sc->sc_flags & G_GATE_FLAG_DESTROY) != 0)
 		return (ENXIO);
 	/* XXX: Hack to allow read-only mounts. */
 #if 0
 	if ((sc->sc_flags & G_GATE_FLAG_READONLY) != 0 && dw > 0)
 		return (EPERM);
 #endif
 	if ((sc->sc_flags & G_GATE_FLAG_WRITEONLY) != 0 && dr > 0)
 		return (EPERM);
 	return (0);
 }
 
 static void
 g_gate_queue_io(struct bio *bp)
 {
 	struct g_gate_softc *sc;
 
 	sc = bp->bio_to->geom->softc;
 	if (sc == NULL || (sc->sc_flags & G_GATE_FLAG_DESTROY) != 0) {
 		g_io_deliver(bp, ENXIO);
 		return;
 	}
 
 	mtx_lock(&sc->sc_queue_mtx);
 
 	if (sc->sc_queue_size > 0 && sc->sc_queue_count > sc->sc_queue_size) {
 		mtx_unlock(&sc->sc_queue_mtx);
 		G_GATE_LOGREQ(1, bp, "Queue full, request canceled.");
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 
 	bp->bio_driver1 = (void *)sc->sc_seq;
 	sc->sc_seq++;
 	sc->sc_queue_count++;
 
 	bioq_insert_tail(&sc->sc_inqueue, bp);
 	wakeup(sc);
 
 	mtx_unlock(&sc->sc_queue_mtx);
 }
 
 static void
 g_gate_done(struct bio *cbp)
 {
 	struct bio *pbp;
 
 	pbp = cbp->bio_parent;
 	if (cbp->bio_error == 0) {
 		pbp->bio_completed = cbp->bio_completed;
 		g_destroy_bio(cbp);
 		pbp->bio_inbed++;
 		g_io_deliver(pbp, 0);
 	} else {
 		/* If direct read failed, pass it through userland daemon. */
 		g_destroy_bio(cbp);
 		pbp->bio_children--;
 		g_gate_queue_io(pbp);
 	}
 }
 
 static void
 g_gate_start(struct bio *pbp)
 {
 	struct g_gate_softc *sc;
 
 	sc = pbp->bio_to->geom->softc;
 	if (sc == NULL || (sc->sc_flags & G_GATE_FLAG_DESTROY) != 0) {
 		g_io_deliver(pbp, ENXIO);
 		return;
 	}
 	G_GATE_LOGREQ(2, pbp, "Request received.");
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if (sc->sc_readcons != NULL) {
 			struct bio *cbp;
 
 			cbp = g_clone_bio(pbp);
 			if (cbp == NULL) {
 				g_io_deliver(pbp, ENOMEM);
 				return;
 			}
 			cbp->bio_done = g_gate_done;
 			cbp->bio_offset = pbp->bio_offset + sc->sc_readoffset;
 			cbp->bio_to = sc->sc_readcons->provider;
 			g_io_request(cbp, sc->sc_readcons);
 			return;
 		}
 		break;
 	case BIO_DELETE:
 	case BIO_WRITE:
 	case BIO_FLUSH:
 		/* XXX: Hack to allow read-only mounts. */
 		if ((sc->sc_flags & G_GATE_FLAG_READONLY) != 0) {
 			g_io_deliver(pbp, EPERM);
 			return;
 		}
 		break;
 	case BIO_GETATTR:
 	default:
 		G_GATE_LOGREQ(2, pbp, "Ignoring request.");
 		g_io_deliver(pbp, EOPNOTSUPP);
 		return;
 	}
 
 	g_gate_queue_io(pbp);
 }
 
 static struct g_gate_softc *
 g_gate_hold(int unit, const char *name)
 {
 	struct g_gate_softc *sc = NULL;
 
 	mtx_lock(&g_gate_units_lock);
 	if (unit >= 0 && unit < g_gate_maxunits)
 		sc = g_gate_units[unit];
 	else if (unit == G_GATE_NAME_GIVEN) {
 		KASSERT(name != NULL, ("name is NULL"));
 		for (unit = 0; unit < g_gate_maxunits; unit++) {
 			if (g_gate_units[unit] == NULL)
 				continue;
 			if (strcmp(name,
 			    g_gate_units[unit]->sc_provider->name) != 0) {
 				continue;
 			}
 			sc = g_gate_units[unit];
 			break;
 		}
 	}
 	if (sc != NULL)
 		sc->sc_ref++;
 	mtx_unlock(&g_gate_units_lock);
 	return (sc);
 }
 
 static void
 g_gate_release(struct g_gate_softc *sc)
 {
 
 	g_topology_assert_not();
 	mtx_lock(&g_gate_units_lock);
 	sc->sc_ref--;
 	KASSERT(sc->sc_ref >= 0, ("Negative sc_ref for %s.", sc->sc_name));
 	if (sc->sc_ref == 0 && (sc->sc_flags & G_GATE_FLAG_DESTROY) != 0)
 		wakeup(&sc->sc_ref);
 	mtx_unlock(&g_gate_units_lock);
 }
 
 static int
 g_gate_getunit(int unit, int *errorp)
 {
 
 	mtx_assert(&g_gate_units_lock, MA_OWNED);
 	if (unit >= 0) {
 		if (unit >= g_gate_maxunits)
 			*errorp = EINVAL;
 		else if (g_gate_units[unit] == NULL)
 			return (unit);
 		else
 			*errorp = EEXIST;
 	} else {
 		for (unit = 0; unit < g_gate_maxunits; unit++) {
 			if (g_gate_units[unit] == NULL)
 				return (unit);
 		}
 		*errorp = ENFILE;
 	}
 	return (-1);
 }
 
 static void
 g_gate_guard(void *arg)
 {
 	struct bio_queue_head queue;
 	struct g_gate_softc *sc;
 	struct bintime curtime;
 	struct bio *bp, *bp2;
 
 	sc = arg;
 	binuptime(&curtime);
 	g_gate_hold(sc->sc_unit, NULL);
 	bioq_init(&queue);
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_inqueue.queue, bio_queue, bp2) {
 		if (curtime.sec - bp->bio_t0.sec < 5)
 			continue;
 		bioq_remove(&sc->sc_inqueue, bp);
 		sc->sc_queue_count--;
 		bioq_insert_tail(&queue, bp);
 	}
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_outqueue.queue, bio_queue, bp2) {
 		if (curtime.sec - bp->bio_t0.sec < 5)
 			continue;
 		bioq_remove(&sc->sc_outqueue, bp);
 		sc->sc_queue_count--;
 		bioq_insert_tail(&queue, bp);
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	while ((bp = bioq_takefirst(&queue)) != NULL) {
 		G_GATE_LOGREQ(1, bp, "Request timeout.");
 		g_io_deliver(bp, EIO);
 	}
 	if ((sc->sc_flags & G_GATE_FLAG_DESTROY) == 0) {
 		callout_reset(&sc->sc_callout, sc->sc_timeout * hz,
 		    g_gate_guard, sc);
 	}
 	g_gate_release(sc);
 }
 
 static void
 g_gate_orphan(struct g_consumer *cp)
 {
 	struct g_gate_softc *sc;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	KASSERT(cp == sc->sc_readcons, ("cp=%p sc_readcons=%p", cp,
 	    sc->sc_readcons));
 	sc->sc_readcons = NULL;
 	G_GATE_DEBUG(1, "Destroying read consumer on provider %s orphan.",
 	    cp->provider->name);
 	(void)g_access(cp, -1, 0, 0);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_gate_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_gate_softc *sc;
 
 	sc = gp->softc;
 	if (sc == NULL || pp != NULL || cp != NULL)
 		return;
 	sc = g_gate_hold(sc->sc_unit, NULL);
 	if (sc == NULL)
 		return;
 	if ((sc->sc_flags & G_GATE_FLAG_READONLY) != 0) {
 		sbuf_printf(sb, "%s<access>%s</access>\n", indent, "read-only");
 	} else if ((sc->sc_flags & G_GATE_FLAG_WRITEONLY) != 0) {
 		sbuf_printf(sb, "%s<access>%s</access>\n", indent,
 		    "write-only");
 	} else {
 		sbuf_printf(sb, "%s<access>%s</access>\n", indent,
 		    "read-write");
 	}
 	if (sc->sc_readcons != NULL) {
 		sbuf_printf(sb, "%s<read_offset>%jd</read_offset>\n",
 		    indent, (intmax_t)sc->sc_readoffset);
 		sbuf_printf(sb, "%s<read_provider>%s</read_provider>\n",
 		    indent, sc->sc_readcons->provider->name);
 	}
 	sbuf_printf(sb, "%s<timeout>%u</timeout>\n", indent, sc->sc_timeout);
 	sbuf_printf(sb, "%s<info>%s</info>\n", indent, sc->sc_info);
 	sbuf_printf(sb, "%s<queue_count>%u</queue_count>\n", indent,
 	    sc->sc_queue_count);
 	sbuf_printf(sb, "%s<queue_size>%u</queue_size>\n", indent,
 	    sc->sc_queue_size);
 	sbuf_printf(sb, "%s<ref>%u</ref>\n", indent, sc->sc_ref);
 	sbuf_printf(sb, "%s<unit>%d</unit>\n", indent, sc->sc_unit);
 	g_topology_unlock();
 	g_gate_release(sc);
 	g_topology_lock();
 }
 
 static int
 g_gate_create(struct g_gate_ctl_create *ggio)
 {
 	struct g_gate_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *pp, *ropp;
 	struct g_consumer *cp;
 	char name[NAME_MAX];
 	int error = 0, unit;
 
 	if (ggio->gctl_mediasize <= 0) {
 		G_GATE_DEBUG(1, "Invalid media size.");
 		return (EINVAL);
 	}
 	if (ggio->gctl_sectorsize <= 0) {
 		G_GATE_DEBUG(1, "Invalid sector size.");
 		return (EINVAL);
 	}
 	if (!powerof2(ggio->gctl_sectorsize)) {
 		G_GATE_DEBUG(1, "Invalid sector size.");
 		return (EINVAL);
 	}
 	if ((ggio->gctl_mediasize % ggio->gctl_sectorsize) != 0) {
 		G_GATE_DEBUG(1, "Invalid media size.");
 		return (EINVAL);
 	}
 	if ((ggio->gctl_flags & G_GATE_FLAG_READONLY) != 0 &&
 	    (ggio->gctl_flags & G_GATE_FLAG_WRITEONLY) != 0) {
 		G_GATE_DEBUG(1, "Invalid flags.");
 		return (EINVAL);
 	}
 	if (ggio->gctl_unit != G_GATE_UNIT_AUTO &&
 	    ggio->gctl_unit != G_GATE_NAME_GIVEN &&
 	    ggio->gctl_unit < 0) {
 		G_GATE_DEBUG(1, "Invalid unit number.");
 		return (EINVAL);
 	}
 	if (ggio->gctl_unit == G_GATE_NAME_GIVEN &&
 	    ggio->gctl_name[0] == '\0') {
 		G_GATE_DEBUG(1, "No device name.");
 		return (EINVAL);
 	}
 
 	sc = malloc(sizeof(*sc), M_GATE, M_WAITOK | M_ZERO);
 	sc->sc_flags = (ggio->gctl_flags & G_GATE_USERFLAGS);
 	strlcpy(sc->sc_info, ggio->gctl_info, sizeof(sc->sc_info));
 	sc->sc_seq = 1;
 	bioq_init(&sc->sc_inqueue);
 	bioq_init(&sc->sc_outqueue);
 	mtx_init(&sc->sc_queue_mtx, "gg:queue", NULL, MTX_DEF);
 	sc->sc_queue_count = 0;
 	sc->sc_queue_size = ggio->gctl_maxcount;
 	if (sc->sc_queue_size > G_GATE_MAX_QUEUE_SIZE)
 		sc->sc_queue_size = G_GATE_MAX_QUEUE_SIZE;
 	sc->sc_timeout = ggio->gctl_timeout;
 	callout_init(&sc->sc_callout, 1);
 
 	mtx_lock(&g_gate_units_lock);
 	sc->sc_unit = g_gate_getunit(ggio->gctl_unit, &error);
 	if (sc->sc_unit < 0)
 		goto fail1;
 	if (ggio->gctl_unit == G_GATE_NAME_GIVEN)
 		snprintf(name, sizeof(name), "%s", ggio->gctl_name);
 	else {
 		snprintf(name, sizeof(name), "%s%d", G_GATE_PROVIDER_NAME,
 		    sc->sc_unit);
 	}
 	/* Check for name collision. */
 	for (unit = 0; unit < g_gate_maxunits; unit++) {
 		if (g_gate_units[unit] == NULL)
 			continue;
 		if (strcmp(name, g_gate_units[unit]->sc_name) != 0)
 			continue;
 		error = EEXIST;
 		goto fail1;
 	}
 	sc->sc_name = name;
 	g_gate_units[sc->sc_unit] = sc;
 	g_gate_nunits++;
 	mtx_unlock(&g_gate_units_lock);
 
 	g_topology_lock();
 
 	if (ggio->gctl_readprov[0] == '\0') {
 		ropp = NULL;
 	} else {
 		ropp = g_provider_by_name(ggio->gctl_readprov);
 		if (ropp == NULL) {
 			G_GATE_DEBUG(1, "Provider %s doesn't exist.",
 			    ggio->gctl_readprov);
 			error = EINVAL;
 			goto fail2;
 		}
 		if ((ggio->gctl_readoffset % ggio->gctl_sectorsize) != 0) {
 			G_GATE_DEBUG(1, "Invalid read offset.");
 			error = EINVAL;
 			goto fail2;
 		}
 		if (ggio->gctl_mediasize + ggio->gctl_readoffset >
 		    ropp->mediasize) {
 			G_GATE_DEBUG(1, "Invalid read offset or media size.");
 			error = EINVAL;
 			goto fail2;
 		}
 	}
 
 	gp = g_new_geomf(&g_gate_class, "%s", name);
 	gp->start = g_gate_start;
 	gp->access = g_gate_access;
 	gp->orphan = g_gate_orphan;
 	gp->dumpconf = g_gate_dumpconf;
 	gp->softc = sc;
 
 	if (ropp != NULL) {
 		cp = g_new_consumer(gp);
 		cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 		error = g_attach(cp, ropp);
 		if (error != 0) {
 			G_GATE_DEBUG(1, "Unable to attach to %s.", ropp->name);
 			goto fail3;
 		}
 		error = g_access(cp, 1, 0, 0);
 		if (error != 0) {
 			G_GATE_DEBUG(1, "Unable to access %s.", ropp->name);
 			g_detach(cp);
 			goto fail3;
 		}
 		sc->sc_readcons = cp;
 		sc->sc_readoffset = ggio->gctl_readoffset;
 	}
 
 	ggio->gctl_unit = sc->sc_unit;
 
 	pp = g_new_providerf(gp, "%s", name);
 	pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
 	pp->mediasize = ggio->gctl_mediasize;
 	pp->sectorsize = ggio->gctl_sectorsize;
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 
 	g_topology_unlock();
 	mtx_lock(&g_gate_units_lock);
 	sc->sc_name = sc->sc_provider->name;
 	mtx_unlock(&g_gate_units_lock);
 	G_GATE_DEBUG(1, "Device %s created.", gp->name);
 
 	if (sc->sc_timeout > 0) {
 		callout_reset(&sc->sc_callout, sc->sc_timeout * hz,
 		    g_gate_guard, sc);
 	}
 	return (0);
 fail3:
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 fail2:
 	g_topology_unlock();
 	mtx_lock(&g_gate_units_lock);
 	g_gate_units[sc->sc_unit] = NULL;
 	KASSERT(g_gate_nunits > 0, ("negative g_gate_nunits?"));
 	g_gate_nunits--;
 fail1:
 	mtx_unlock(&g_gate_units_lock);
 	mtx_destroy(&sc->sc_queue_mtx);
 	free(sc, M_GATE);
 	return (error);
 }
 
 static int
 g_gate_modify(struct g_gate_softc *sc, struct g_gate_ctl_modify *ggio)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	int error;
 
 	if ((ggio->gctl_modify & GG_MODIFY_MEDIASIZE) != 0) {
 		if (ggio->gctl_mediasize <= 0) {
 			G_GATE_DEBUG(1, "Invalid media size.");
 			return (EINVAL);
 		}
 		pp = sc->sc_provider;
 		if ((ggio->gctl_mediasize % pp->sectorsize) != 0) {
 			G_GATE_DEBUG(1, "Invalid media size.");
 			return (EINVAL);
 		}
 		/* TODO */
 		return (EOPNOTSUPP);
 	}
 
 	if ((ggio->gctl_modify & GG_MODIFY_INFO) != 0)
 		(void)strlcpy(sc->sc_info, ggio->gctl_info, sizeof(sc->sc_info));
 
 	cp = NULL;
 
 	if ((ggio->gctl_modify & GG_MODIFY_READPROV) != 0) {
 		g_topology_lock();
 		if (sc->sc_readcons != NULL) {
 			cp = sc->sc_readcons;
 			sc->sc_readcons = NULL;
 			(void)g_access(cp, -1, 0, 0);
 			g_detach(cp);
 			g_destroy_consumer(cp);
 		}
 		if (ggio->gctl_readprov[0] != '\0') {
 			pp = g_provider_by_name(ggio->gctl_readprov);
 			if (pp == NULL) {
 				g_topology_unlock();
 				G_GATE_DEBUG(1, "Provider %s doesn't exist.",
 				    ggio->gctl_readprov);
 				return (EINVAL);
 			}
 			cp = g_new_consumer(sc->sc_provider->geom);
 			cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 			error = g_attach(cp, pp);
 			if (error != 0) {
 				G_GATE_DEBUG(1, "Unable to attach to %s.",
 				    pp->name);
 			} else {
 				error = g_access(cp, 1, 0, 0);
 				if (error != 0) {
 					G_GATE_DEBUG(1, "Unable to access %s.",
 					    pp->name);
 					g_detach(cp);
 				}
 			}
 			if (error != 0) {
 				g_destroy_consumer(cp);
 				g_topology_unlock();
 				return (error);
 			}
 		}
 	} else {
 		cp = sc->sc_readcons;
 	}
 
 	if ((ggio->gctl_modify & GG_MODIFY_READOFFSET) != 0) {
 		if (cp == NULL) {
 			G_GATE_DEBUG(1, "No read provider.");
 			return (EINVAL);
 		}
 		pp = sc->sc_provider;
 		if ((ggio->gctl_readoffset % pp->sectorsize) != 0) {
 			G_GATE_DEBUG(1, "Invalid read offset.");
 			return (EINVAL);
 		}
 		if (pp->mediasize + ggio->gctl_readoffset >
 		    cp->provider->mediasize) {
 			G_GATE_DEBUG(1, "Invalid read offset or media size.");
 			return (EINVAL);
 		}
 		sc->sc_readoffset = ggio->gctl_readoffset;
 	}
 
 	if ((ggio->gctl_modify & GG_MODIFY_READPROV) != 0) {
 		sc->sc_readcons = cp;
 		g_topology_unlock();
 	}
 
 	return (0);
 }
 
 #define	G_GATE_CHECK_VERSION(ggio)	do {				\
 	if ((ggio)->gctl_version != G_GATE_VERSION) {			\
 		printf("Version mismatch %d != %d.\n",			\
 		    ggio->gctl_version, G_GATE_VERSION);		\
 		return (EINVAL);					\
 	}								\
 } while (0)
 static int
 g_gate_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
 {
 	struct g_gate_softc *sc;
 	struct bio *bp;
 	int error = 0;
 
 	G_GATE_DEBUG(4, "ioctl(%s, %lx, %p, %x, %p)", devtoname(dev), cmd, addr,
 	    flags, td);
 
 	switch (cmd) {
 	case G_GATE_CMD_CREATE:
 	    {
 		struct g_gate_ctl_create *ggio = (void *)addr;
 
 		G_GATE_CHECK_VERSION(ggio);
 		error = g_gate_create(ggio);
 		/*
 		 * Reset TDP_GEOM flag.
 		 * There are pending events for sure, because we just created
 		 * new provider and other classes want to taste it, but we
 		 * cannot answer on I/O requests until we're here.
 		 */
 		td->td_pflags &= ~TDP_GEOM;
 		return (error);
 	    }
 	case G_GATE_CMD_MODIFY:
 	    {
 		struct g_gate_ctl_modify *ggio = (void *)addr;
 
 		G_GATE_CHECK_VERSION(ggio);
 		sc = g_gate_hold(ggio->gctl_unit, NULL);
 		if (sc == NULL)
 			return (ENXIO);
 		error = g_gate_modify(sc, ggio);
 		g_gate_release(sc);
 		return (error);
 	    }
 	case G_GATE_CMD_DESTROY:
 	    {
 		struct g_gate_ctl_destroy *ggio = (void *)addr;
 
 		G_GATE_CHECK_VERSION(ggio);
 		sc = g_gate_hold(ggio->gctl_unit, ggio->gctl_name);
 		if (sc == NULL)
 			return (ENXIO);
 		g_topology_lock();
 		mtx_lock(&g_gate_units_lock);
 		error = g_gate_destroy(sc, ggio->gctl_force);
 		g_topology_unlock();
 		if (error != 0)
 			g_gate_release(sc);
 		return (error);
 	    }
 	case G_GATE_CMD_CANCEL:
 	    {
 		struct g_gate_ctl_cancel *ggio = (void *)addr;
 		struct bio *tbp, *lbp;
 
 		G_GATE_CHECK_VERSION(ggio);
 		sc = g_gate_hold(ggio->gctl_unit, ggio->gctl_name);
 		if (sc == NULL)
 			return (ENXIO);
 		lbp = NULL;
 		mtx_lock(&sc->sc_queue_mtx);
 		TAILQ_FOREACH_SAFE(bp, &sc->sc_outqueue.queue, bio_queue, tbp) {
 			if (ggio->gctl_seq == 0 ||
 			    ggio->gctl_seq == (uintptr_t)bp->bio_driver1) {
 				G_GATE_LOGREQ(1, bp, "Request canceled.");
 				bioq_remove(&sc->sc_outqueue, bp);
 				/*
 				 * Be sure to put requests back onto incoming
 				 * queue in the proper order.
 				 */
 				if (lbp == NULL)
 					bioq_insert_head(&sc->sc_inqueue, bp);
 				else {
 					TAILQ_INSERT_AFTER(&sc->sc_inqueue.queue,
 					    lbp, bp, bio_queue);
 				}
 				lbp = bp;
 				/*
 				 * If only one request was canceled, leave now.
 				 */
 				if (ggio->gctl_seq != 0)
 					break;
 			}
 		}
 		if (ggio->gctl_unit == G_GATE_NAME_GIVEN)
 			ggio->gctl_unit = sc->sc_unit;
 		mtx_unlock(&sc->sc_queue_mtx);
 		g_gate_release(sc);
 		return (error);
 	    }
 	case G_GATE_CMD_START:
 	    {
 		struct g_gate_ctl_io *ggio = (void *)addr;
 
 		G_GATE_CHECK_VERSION(ggio);
 		sc = g_gate_hold(ggio->gctl_unit, NULL);
 		if (sc == NULL)
 			return (ENXIO);
 		error = 0;
 		for (;;) {
 			mtx_lock(&sc->sc_queue_mtx);
 			bp = bioq_first(&sc->sc_inqueue);
 			if (bp != NULL)
 				break;
 			if ((sc->sc_flags & G_GATE_FLAG_DESTROY) != 0) {
 				ggio->gctl_error = ECANCELED;
 				mtx_unlock(&sc->sc_queue_mtx);
 				goto start_end;
 			}
 			if (msleep(sc, &sc->sc_queue_mtx,
 			    PPAUSE | PDROP | PCATCH, "ggwait", 0) != 0) {
 				ggio->gctl_error = ECANCELED;
 				goto start_end;
 			}
 		}
 		ggio->gctl_cmd = bp->bio_cmd;
 		if (bp->bio_cmd == BIO_WRITE &&
 		    bp->bio_length > ggio->gctl_length) {
 			mtx_unlock(&sc->sc_queue_mtx);
 			ggio->gctl_length = bp->bio_length;
 			ggio->gctl_error = ENOMEM;
 			goto start_end;
 		}
 		bioq_remove(&sc->sc_inqueue, bp);
 		bioq_insert_tail(&sc->sc_outqueue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 
 		ggio->gctl_seq = (uintptr_t)bp->bio_driver1;
 		ggio->gctl_offset = bp->bio_offset;
 		ggio->gctl_length = bp->bio_length;
 
 		switch (bp->bio_cmd) {
 		case BIO_READ:
 		case BIO_DELETE:
 		case BIO_FLUSH:
 			break;
 		case BIO_WRITE:
 			error = copyout(bp->bio_data, ggio->gctl_data,
 			    bp->bio_length);
 			if (error != 0) {
 				mtx_lock(&sc->sc_queue_mtx);
 				bioq_remove(&sc->sc_outqueue, bp);
 				bioq_insert_head(&sc->sc_inqueue, bp);
 				mtx_unlock(&sc->sc_queue_mtx);
 				goto start_end;
 			}
 			break;
 		}
 start_end:
 		g_gate_release(sc);
 		return (error);
 	    }
 	case G_GATE_CMD_DONE:
 	    {
 		struct g_gate_ctl_io *ggio = (void *)addr;
 
 		G_GATE_CHECK_VERSION(ggio);
 		sc = g_gate_hold(ggio->gctl_unit, NULL);
 		if (sc == NULL)
 			return (ENOENT);
 		error = 0;
 		mtx_lock(&sc->sc_queue_mtx);
 		TAILQ_FOREACH(bp, &sc->sc_outqueue.queue, bio_queue) {
 			if (ggio->gctl_seq == (uintptr_t)bp->bio_driver1)
 				break;
 		}
 		if (bp != NULL) {
 			bioq_remove(&sc->sc_outqueue, bp);
 			sc->sc_queue_count--;
 		}
 		mtx_unlock(&sc->sc_queue_mtx);
 		if (bp == NULL) {
 			/*
 			 * Request was probably canceled.
 			 */
 			goto done_end;
 		}
 		if (ggio->gctl_error == EAGAIN) {
 			bp->bio_error = 0;
 			G_GATE_LOGREQ(1, bp, "Request desisted.");
 			mtx_lock(&sc->sc_queue_mtx);
 			sc->sc_queue_count++;
 			bioq_insert_head(&sc->sc_inqueue, bp);
 			wakeup(sc);
 			mtx_unlock(&sc->sc_queue_mtx);
 		} else {
 			bp->bio_error = ggio->gctl_error;
 			if (bp->bio_error == 0) {
 				bp->bio_completed = bp->bio_length;
 				switch (bp->bio_cmd) {
 				case BIO_READ:
 					error = copyin(ggio->gctl_data,
 					    bp->bio_data, bp->bio_length);
 					if (error != 0)
 						bp->bio_error = error;
 					break;
 				case BIO_DELETE:
 				case BIO_WRITE:
 				case BIO_FLUSH:
 					break;
 				}
 			}
 			G_GATE_LOGREQ(2, bp, "Request done.");
 			g_io_deliver(bp, bp->bio_error);
 		}
 done_end:
 		g_gate_release(sc);
 		return (error);
 	    }
 	}
 	return (ENOIOCTL);
 }
 
 static void
 g_gate_device(void)
 {
 
 	status_dev = make_dev(&g_gate_cdevsw, 0x0, UID_ROOT, GID_WHEEL, 0600,
 	    G_GATE_CTL_NAME);
 }
 
 static int
 g_gate_modevent(module_t mod, int type, void *data)
 {
 	int error = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		mtx_init(&g_gate_units_lock, "gg_units_lock", NULL, MTX_DEF);
 		g_gate_units = malloc(g_gate_maxunits * sizeof(g_gate_units[0]),
 		    M_GATE, M_WAITOK | M_ZERO);
 		g_gate_nunits = 0;
 		g_gate_device();
 		break;
 	case MOD_UNLOAD:
 		mtx_lock(&g_gate_units_lock);
 		if (g_gate_nunits > 0) {
 			mtx_unlock(&g_gate_units_lock);
 			error = EBUSY;
 			break;
 		}
 		mtx_unlock(&g_gate_units_lock);
 		mtx_destroy(&g_gate_units_lock);
 		if (status_dev != NULL)
 			destroy_dev(status_dev);
 		free(g_gate_units, M_GATE);
 		break;
 	default:
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	return (error);
 }
 static moduledata_t g_gate_module = {
 	G_GATE_MOD_NAME,
 	g_gate_modevent,
 	NULL
 };
 DECLARE_MODULE(geom_gate, g_gate_module, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
 DECLARE_GEOM_CLASS(g_gate_class, g_gate);
+MODULE_VERSION(geom_gate, 0);
Index: stable/11/sys/geom/geom_bsd.c
===================================================================
--- stable/11/sys/geom/geom_bsd.c	(revision 332639)
+++ stable/11/sys/geom/geom_bsd.c	(revision 332640)
@@ -1,624 +1,625 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * This is the method for dealing with BSD disklabels.  It has been
  * extensively (by my standards at least) commented, in the vain hope that
  * it will serve as the source in future copy&paste operations.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/conf.h>
 #include <sys/bio.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/md5.h>
 #include <sys/errno.h>
 #include <sys/disklabel.h>
 #include <sys/gpt.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/uuid.h>
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 
 FEATURE(geom_bsd, "GEOM BSD disklabels support");
 
 #define	BSD_CLASS_NAME "BSD"
 
 #define ALPHA_LABEL_OFFSET	64
 #define HISTORIC_LABEL_OFFSET	512
 
 #define LABELSIZE (148 + 16 * MAXPARTITIONS)
 
 static int g_bsd_once;
 
 static void g_bsd_hotwrite(void *arg, int flag);
 /*
  * Our private data about one instance.  All the rest is handled by the
  * slice code and stored in its softc, so this is just the stuff
  * specific to BSD disklabels.
  */
 struct g_bsd_softc {
 	off_t	labeloffset;
 	off_t	mbroffset;
 	off_t	rawoffset;
 	struct disklabel ondisk;
 	u_char	label[LABELSIZE];
 	u_char	labelsum[16];
 };
 
 /*
  * Modify our slicer to match proposed disklabel, if possible.
  * This is where we make sure we don't do something stupid.
  */
 static int
 g_bsd_modify(struct g_geom *gp, u_char *label)
 {
 	int i, error;
 	struct partition *ppp;
 	struct g_slicer *gsp;
 	struct g_consumer *cp;
 	struct g_bsd_softc *ms;
 	u_int secsize, u;
 	off_t rawoffset, o;
 	struct disklabel dl;
 	MD5_CTX md5sum;
 
 	g_topology_assert();
 	gsp = gp->softc;
 	ms = gsp->softc;
 
 	error = bsd_disklabel_le_dec(label, &dl, MAXPARTITIONS);
 	if (error) {
 		return (error);
 	}
 
 	/* Get dimensions of our device. */
 	cp = LIST_FIRST(&gp->consumer);
 	secsize = cp->provider->sectorsize;
 
 	/* ... or a smaller sector size. */
 	if (dl.d_secsize < secsize) {
 		return (EINVAL);
 	}
 
 	/* ... or a non-multiple sector size. */
 	if (dl.d_secsize % secsize != 0) {
 		return (EINVAL);
 	}
 
 	/* Historical braindamage... */
 	rawoffset = (off_t)dl.d_partitions[RAW_PART].p_offset * dl.d_secsize;
 
 	for (i = 0; i < dl.d_npartitions; i++) {
 		ppp = &dl.d_partitions[i];
 		if (ppp->p_size == 0)
 			continue;
 	        o = (off_t)ppp->p_offset * dl.d_secsize;
 
 		if (o < rawoffset)
 			rawoffset = 0;
 	}
 	
 	if (rawoffset != 0 && (off_t)rawoffset != ms->mbroffset)
 		printf("WARNING: %s expected rawoffset %jd, found %jd\n",
 		    gp->name,
 		    (intmax_t)ms->mbroffset/dl.d_secsize,
 		    (intmax_t)rawoffset/dl.d_secsize);
 
 	/* Don't munge open partitions. */
 	for (i = 0; i < dl.d_npartitions; i++) {
 		ppp = &dl.d_partitions[i];
 
 	        o = (off_t)ppp->p_offset * dl.d_secsize;
 		if (o == 0)
 			o = rawoffset;
 		error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
 		    o - rawoffset,
 		    (off_t)ppp->p_size * dl.d_secsize,
 		     dl.d_secsize,
 		    "%s%c", gp->name, 'a' + i);
 		if (error)
 			return (error);
 	}
 
 	/* Look good, go for it... */
 	for (u = 0; u < gsp->nslice; u++) {
 		ppp = &dl.d_partitions[u];
 	        o = (off_t)ppp->p_offset * dl.d_secsize;
 		if (o == 0)
 			o = rawoffset;
 		g_slice_config(gp, u, G_SLICE_CONFIG_SET,
 		    o - rawoffset,
 		    (off_t)ppp->p_size * dl.d_secsize,
 		     dl.d_secsize,
 		    "%s%c", gp->name, 'a' + u);
 	}
 
 	/* Update our softc */
 	ms->ondisk = dl;
 	if (label != ms->label)
 		bcopy(label, ms->label, LABELSIZE);
 	ms->rawoffset = rawoffset;
 
 	/*
 	 * In order to avoid recursively attaching to the same
 	 * on-disk label (it's usually visible through the 'c'
 	 * partition) we calculate an MD5 and ask if other BSD's
 	 * below us love that label.  If they do, we don't.
 	 */
 	MD5Init(&md5sum);
 	MD5Update(&md5sum, ms->label, sizeof(ms->label));
 	MD5Final(ms->labelsum, &md5sum);
 
 	return (0);
 }
 
 /*
  * This is an internal helper function, called multiple times from the taste
  * function to try to locate a disklabel on the disk.  More civilized formats
  * will not need this, as there is only one possible place on disk to look
  * for the magic spot.
  */
 
 static int
 g_bsd_try(struct g_geom *gp, struct g_slicer *gsp, struct g_consumer *cp, int secsize, struct g_bsd_softc *ms, off_t offset)
 {
 	int error;
 	u_char *buf;
 	struct disklabel *dl;
 	off_t secoff;
 
 	/*
 	 * We need to read entire aligned sectors, and we assume that the
 	 * disklabel does not span sectors, so one sector is enough.
 	 */
 	secoff = offset % secsize;
 	buf = g_read_data(cp, offset - secoff, secsize, NULL);
 	if (buf == NULL)
 		return (ENOENT);
 
 	/* Decode into our native format. */
 	dl = &ms->ondisk;
 	error = bsd_disklabel_le_dec(buf + secoff, dl, MAXPARTITIONS);
 	if (!error)
 		bcopy(buf + secoff, ms->label, LABELSIZE);
 
 	/* Remember to free the buffer g_read_data() gave us. */
 	g_free(buf);
 
 	ms->labeloffset = offset;
 	return (error);
 }
 
 /*
  * This function writes the current label to disk, possibly updating
  * the alpha SRM checksum.
  */
 
 static int
 g_bsd_writelabel(struct g_geom *gp, u_char *bootcode)
 {
 	off_t secoff;
 	u_int secsize;
 	struct g_consumer *cp;
 	struct g_slicer *gsp;
 	struct g_bsd_softc *ms;
 	u_char *buf;
 	uint64_t sum;
 	int error, i;
 
 	gsp = gp->softc;
 	ms = gsp->softc;
 	cp = LIST_FIRST(&gp->consumer);
 	/* Get sector size, we need it to read data. */
 	secsize = cp->provider->sectorsize;
 	secoff = ms->labeloffset % secsize;
 	if (bootcode == NULL) {
 		buf = g_read_data(cp, ms->labeloffset - secoff, secsize, &error);
 		if (buf == NULL)
 			return (error);
 		bcopy(ms->label, buf + secoff, sizeof(ms->label));
 	} else {
 		buf = bootcode;
 		bcopy(ms->label, buf + ms->labeloffset, sizeof(ms->label));
 	}
 	if (ms->labeloffset == ALPHA_LABEL_OFFSET) {
 		sum = 0;
 		for (i = 0; i < 63; i++)
 			sum += le64dec(buf + i * 8);
 		le64enc(buf + 504, sum);
 	}
 	if (bootcode == NULL) {
 		error = g_write_data(cp, ms->labeloffset - secoff, buf, secsize);
 		g_free(buf);
 	} else {
 		error = g_write_data(cp, 0, bootcode, BBSIZE);
 	}
 	return(error);
 }
 
 /*
  * If the user tries to overwrite our disklabel through an open partition
  * or via a magicwrite config call, we end up here and try to prevent
  * footshooting as best we can.
  */
 static void
 g_bsd_hotwrite(void *arg, int flag)
 {
 	struct bio *bp;
 	struct g_geom *gp;
 	struct g_slicer *gsp;
 	struct g_slice *gsl;
 	struct g_bsd_softc *ms;
 	u_char *p;
 	int error;
 	
 	g_topology_assert();
 	/*
 	 * We should never get canceled, because that would amount to a removal
 	 * of the geom while there was outstanding I/O requests.
 	 */
 	KASSERT(flag != EV_CANCEL, ("g_bsd_hotwrite cancelled"));
 	bp = arg;
 	gp = bp->bio_to->geom;
 	gsp = gp->softc;
 	ms = gsp->softc;
 	gsl = &gsp->slices[bp->bio_to->index];
 	p = (u_char*)bp->bio_data + ms->labeloffset -
 	    (bp->bio_offset + gsl->offset);
 	error = g_bsd_modify(gp, p);
 	if (error) {
 		g_io_deliver(bp, EPERM);
 		return;
 	}
 	g_slice_finish_hot(bp);
 }
 
 static int
 g_bsd_start(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct g_bsd_softc *ms;
 	struct g_slicer *gsp;
 
 	gp = bp->bio_to->geom;
 	gsp = gp->softc;
 	ms = gsp->softc;
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr(bp, "BSD::labelsum", ms->labelsum,
 		    sizeof(ms->labelsum)))
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Dump configuration information in XML format.
  * Notice that the function is called once for the geom and once for each
  * consumer and provider.  We let g_slice_dumpconf() do most of the work.
  */
 static void
 g_bsd_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_bsd_softc *ms;
 	struct g_slicer *gsp;
 
 	gsp = gp->softc;
 	ms = gsp->softc;
 	g_slice_dumpconf(sb, indent, gp, cp, pp);
 	if (indent != NULL && pp == NULL && cp == NULL) {
 		sbuf_printf(sb, "%s<labeloffset>%jd</labeloffset>\n",
 		    indent, (intmax_t)ms->labeloffset);
 		sbuf_printf(sb, "%s<rawoffset>%jd</rawoffset>\n",
 		    indent, (intmax_t)ms->rawoffset);
 		sbuf_printf(sb, "%s<mbroffset>%jd</mbroffset>\n",
 		    indent, (intmax_t)ms->mbroffset);
 	} else if (pp != NULL) {
 		if (indent == NULL)
 			sbuf_printf(sb, " ty %d",
 			    ms->ondisk.d_partitions[pp->index].p_fstype);
 		else
 			sbuf_printf(sb, "%s<type>%d</type>\n", indent,
 			    ms->ondisk.d_partitions[pp->index].p_fstype);
 	}
 }
 
 /*
  * The taste function is called from the event-handler, with the topology
  * lock already held and a provider to examine.  The flags are unused.
  *
  * If flags == G_TF_NORMAL, the idea is to take a bite of the provider and
  * if we find valid, consistent magic on it, build a geom on it.
  *
  * There may be cases where the operator would like to put a BSD-geom on
  * providers which do not meet all of the requirements.  This can be done
  * by instead passing the G_TF_INSIST flag, which will override these
  * checks.
  *
  * The final flags value is G_TF_TRANSPARENT, which instructs the method
  * to put a geom on top of the provider and configure it to be as transparent
  * as possible.  This is not really relevant to the BSD method and therefore
  * not implemented here.
  */
 
 static struct uuid freebsd_slice = GPT_ENT_TYPE_FREEBSD;
 
 static struct g_geom *
 g_bsd_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error, i;
 	struct g_bsd_softc *ms;
 	u_int secsize;
 	struct g_slicer *gsp;
 	u_char hash[16];
 	MD5_CTX md5sum;
 	struct uuid uuid;
 
 	g_trace(G_T_TOPOLOGY, "bsd_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 
 	/* We don't implement transparent inserts. */
 	if (flags == G_TF_TRANSPARENT)
 		return (NULL);
 
 	/*
 	 * BSD labels are a subclass of the general "slicing" topology so
 	 * a lot of the work can be done by the common "slice" code.
 	 * Create a geom with space for MAXPARTITIONS providers, one consumer
 	 * and a softc structure for us.  Specify the provider to attach
 	 * the consumer to and our "start" routine for special requests.
 	 * The provider is opened with mode (1,0,0) so we can do reads
 	 * from it.
 	 */
 	gp = g_slice_new(mp, MAXPARTITIONS, pp, &cp, &ms,
 	     sizeof(*ms), g_bsd_start);
 	if (gp == NULL)
 		return (NULL);
 
 	/* Get the geom_slicer softc from the geom. */
 	gsp = gp->softc;
 
 	/*
 	 * The do...while loop here allows us to have multiple escapes
 	 * using a simple "break".  This improves code clarity without
 	 * ending up in deep nesting and without using goto or come from.
 	 */
 	do {
 		/*
 		 * If the provider is an MBR we will only auto attach
 		 * to type 165 slices in the G_TF_NORMAL case.  We will
 		 * attach to any other type.
 		 */
 		error = g_getattr("MBR::type", cp, &i);
 		if (!error) {
 			if (i != 165 && flags == G_TF_NORMAL)
 				break;
 			error = g_getattr("MBR::offset", cp, &ms->mbroffset);
 			if (error)
 				break;
 		}
 
 		/* Same thing if we are inside a PC98 */
 		error = g_getattr("PC98::type", cp, &i);
 		if (!error) {
 			if (i != 0xc494 && flags == G_TF_NORMAL)
 				break;
 			error = g_getattr("PC98::offset", cp, &ms->mbroffset);
 			if (error)
 				break;
 		}
 
 		/* Same thing if we are inside a GPT */
 		error = g_getattr("GPT::type", cp, &uuid);
 		if (!error) {
 			if (memcmp(&uuid, &freebsd_slice, sizeof(uuid)) != 0 &&
 			    flags == G_TF_NORMAL)
 				break;
 		}
 
 		/* Get sector size, we need it to read data. */
 		secsize = cp->provider->sectorsize;
 		if (secsize < 512)
 			break;
 
 		/* First look for a label at the start of the second sector. */
 		error = g_bsd_try(gp, gsp, cp, secsize, ms, secsize);
 
 		/*
 		 * If sector size is not 512 the label still can be at
 		 * offset 512, not at the start of the second sector. At least
 		 * it's true for labels created by the FreeBSD's bsdlabel(8).
 		 */
 		if (error && secsize != HISTORIC_LABEL_OFFSET)
 			error = g_bsd_try(gp, gsp, cp, secsize, ms,
 			    HISTORIC_LABEL_OFFSET);
 
 		/* Next, look for alpha labels */
 		if (error)
 			error = g_bsd_try(gp, gsp, cp, secsize, ms,
 			    ALPHA_LABEL_OFFSET);
 
 		/* If we didn't find a label, punt. */
 		if (error)
 			break;
 
 		/*
 		 * In order to avoid recursively attaching to the same
 		 * on-disk label (it's usually visible through the 'c'
 		 * partition) we calculate an MD5 and ask if other BSD's
 		 * below us love that label.  If they do, we don't.
 		 */
 		MD5Init(&md5sum);
 		MD5Update(&md5sum, ms->label, sizeof(ms->label));
 		MD5Final(ms->labelsum, &md5sum);
 
 		error = g_getattr("BSD::labelsum", cp, &hash);
 		if (!error && !bcmp(ms->labelsum, hash, sizeof(hash)))
 			break;
 
 		/*
 		 * Process the found disklabel, and modify our "slice"
 		 * instance to match it, if possible.
 		 */
 		error = g_bsd_modify(gp, ms->label);
 	} while (0);
 
 	/* Success or failure, we can close our provider now. */
 	g_access(cp, -1, 0, 0);
 
 	/* If we have configured any providers, return the new geom. */
 	if (gsp->nprovider > 0) {
 		g_slice_conf_hot(gp, 0, ms->labeloffset, LABELSIZE,
 		    G_SLICE_HOT_ALLOW, G_SLICE_HOT_DENY, G_SLICE_HOT_CALL);
 		gsp->hot = g_bsd_hotwrite;
 		if (!g_bsd_once) {
 			g_bsd_once = 1;
 			printf(
 			    "WARNING: geom_bsd (geom %s) is deprecated, "
 			    "use gpart instead.\n", gp->name);
 		}
 		return (gp);
 	}
 	/*
 	 * ...else push the "self-destruct" button, by spoiling our own
 	 * consumer.  This triggers a call to g_slice_spoiled which will
 	 * dismantle what was setup.
 	 */
 	g_slice_spoiled(cp);
 	return (NULL);
 }
 
 struct h0h0 {
 	struct g_geom *gp;
 	struct g_bsd_softc *ms;
 	u_char *label;
 	int error;
 };
 
 static void
 g_bsd_callconfig(void *arg, int flag)
 {
 	struct h0h0 *hp;
 
 	hp = arg;
 	hp->error = g_bsd_modify(hp->gp, hp->label);
 	if (!hp->error)
 		hp->error = g_bsd_writelabel(hp->gp, NULL);
 }
 
 /*
  * NB! curthread is user process which GCTL'ed.
  */
 static void
 g_bsd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
 {
 	u_char *label;
 	int error;
 	struct h0h0 h0h0;
 	struct g_geom *gp;
 	struct g_slicer *gsp;
 	struct g_consumer *cp;
 	struct g_bsd_softc *ms;
 
 	g_topology_assert();
 	gp = gctl_get_geom(req, mp, "geom");
 	if (gp == NULL)
 		return;
 	cp = LIST_FIRST(&gp->consumer);
 	gsp = gp->softc;
 	ms = gsp->softc;
 	if (!strcmp(verb, "read mbroffset")) {
 		gctl_set_param_err(req, "mbroffset", &ms->mbroffset,
 		    sizeof(ms->mbroffset));
 		return;
 	} else if (!strcmp(verb, "write label")) {
 		label = gctl_get_paraml(req, "label", LABELSIZE);
 		if (label == NULL)
 			return;
 		h0h0.gp = gp;
 		h0h0.ms = gsp->softc;
 		h0h0.label = label;
 		h0h0.error = -1;
 		/* XXX: Does this reference register with our selfdestruct code ? */
 		error = g_access(cp, 1, 1, 1);
 		if (error) {
 			gctl_error(req, "could not access consumer");
 			return;
 		}
 		g_bsd_callconfig(&h0h0, 0);
 		error = h0h0.error;
 		g_access(cp, -1, -1, -1);
 	} else if (!strcmp(verb, "write bootcode")) {
 		label = gctl_get_paraml(req, "bootcode", BBSIZE);
 		if (label == NULL)
 			return;
 		/* XXX: Does this reference register with our selfdestruct code ? */
 		error = g_access(cp, 1, 1, 1);
 		if (error) {
 			gctl_error(req, "could not access consumer");
 			return;
 		}
 		error = g_bsd_writelabel(gp, label);
 		g_access(cp, -1, -1, -1);
 	} else {
 		gctl_error(req, "Unknown verb parameter");
 	}
 
 	return;
 }
 
 /* Finally, register with GEOM infrastructure. */
 static struct g_class g_bsd_class = {
 	.name = BSD_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_bsd_taste,
 	.ctlreq = g_bsd_config,
 	.dumpconf = g_bsd_dumpconf,
 };
 
 DECLARE_GEOM_CLASS(g_bsd_class, g_bsd);
+MODULE_VERSION(geom_bsd, 0);
Index: stable/11/sys/geom/geom_ccd.c
===================================================================
--- stable/11/sys/geom/geom_ccd.c	(revision 332639)
+++ stable/11/sys/geom/geom_ccd.c	(revision 332640)
@@ -1,908 +1,909 @@
 /*-
  * Copyright (c) 2003 Poul-Henning Kamp.
  * Copyright (c) 1995 Jason R. Thorpe.
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  * Copyright (c) 1988 University of Utah.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed for the NetBSD Project
  *	by Jason R. Thorpe.
  * 4. The names of the authors may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Dynamic configuration and disklabel support by:
  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
  *	Numerical Aerodynamic Simulation Facility
  *	Mail Stop 258-6
  *	NASA Ames Research Center
  *	Moffett Field, CA 94035
  *
  * from: Utah $Hdr: cd.c 1.6 90/11/28$
  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
  *	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/bio.h>
 #include <sys/malloc.h>
 #include <sys/sbuf.h>
 #include <geom/geom.h>
 
 /*
  * Number of blocks to untouched in front of a component partition.
  * This is to avoid violating its disklabel area when it starts at the
  * beginning of the slice.
  */
 #if !defined(CCD_OFFSET)
 #define CCD_OFFSET 16
 #endif
 
 /* sc_flags */
 #define CCDF_UNIFORM	0x02	/* use LCCD of sizes for uniform interleave */
 #define CCDF_MIRROR	0x04	/* use mirroring */
 #define CCDF_NO_OFFSET	0x08	/* do not leave space in front */
 #define CCDF_LINUX	0x10	/* use Linux compatibility mode */
 
 /* Mask of user-settable ccd flags. */
 #define CCDF_USERMASK	(CCDF_UNIFORM|CCDF_MIRROR)
 
 /*
  * Interleave description table.
  * Computed at boot time to speed irregular-interleave lookups.
  * The idea is that we interleave in "groups".  First we interleave
  * evenly over all component disks up to the size of the smallest
  * component (the first group), then we interleave evenly over all
  * remaining disks up to the size of the next-smallest (second group),
  * and so on.
  *
  * Each table entry describes the interleave characteristics of one
  * of these groups.  For example if a concatenated disk consisted of
  * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at
  * DEV_BSIZE (1), the table would have three entries:
  *
  *	ndisk	startblk	startoff	dev
  *	3	0		0		0, 1, 2
  *	2	9		3		0, 2
  *	1	13		5		2
  *	0	-		-		-
  *
  * which says that the first nine blocks (0-8) are interleaved over
  * 3 disks (0, 1, 2) starting at block offset 0 on any component disk,
  * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting
  * at component block 3, and the remaining blocks (13-14) are on disk
  * 2 starting at offset 5.
  */
 struct ccdiinfo {
 	int	ii_ndisk;	/* # of disks range is interleaved over */
 	daddr_t	ii_startblk;	/* starting scaled block # for range */
 	daddr_t	ii_startoff;	/* starting component offset (block #) */
 	int	*ii_index;	/* ordered list of components in range */
 };
 
 /*
  * Component info table.
  * Describes a single component of a concatenated disk.
  */
 struct ccdcinfo {
 	daddr_t		ci_size; 		/* size */
 	struct g_provider *ci_provider;		/* provider */
 	struct g_consumer *ci_consumer;		/* consumer */
 };
 
 /*
  * A concatenated disk is described by this structure.
  */
 
 struct ccd_s {
 	LIST_ENTRY(ccd_s) list;
 
 	int		 sc_unit;		/* logical unit number */
 	int		 sc_flags;		/* flags */
 	daddr_t		 sc_size;		/* size of ccd */
 	int		 sc_ileave;		/* interleave */
 	u_int		 sc_ndisks;		/* number of components */
 	struct ccdcinfo	 *sc_cinfo;		/* component info */
 	struct ccdiinfo	 *sc_itable;		/* interleave table */
 	u_int32_t	 sc_secsize;		/* # bytes per sector */
 	int		 sc_pick;		/* side of mirror picked */
 	daddr_t		 sc_blk[2];		/* mirror localization */
 	u_int32_t	 sc_offset;		/* actual offset used */
 };
 
 static g_start_t g_ccd_start;
 static void ccdiodone(struct bio *bp);
 static void ccdinterleave(struct ccd_s *);
 static int ccdinit(struct gctl_req *req, struct ccd_s *);
 static int ccdbuffer(struct bio **ret, struct ccd_s *,
 		      struct bio *, daddr_t, caddr_t, long);
 
 static void
 g_ccd_orphan(struct g_consumer *cp)
 {
 	/*
 	 * XXX: We don't do anything here.  It is not obvious
 	 * XXX: what DTRT would be, so we do what the previous
 	 * XXX: code did: ignore it and let the user cope.
 	 */
 }
 
 static int
 g_ccd_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp1, *cp2;
 	int error;
 
 	de += dr;
 	de += dw;
 
 	gp = pp->geom;
 	error = ENXIO;
 	LIST_FOREACH(cp1, &gp->consumer, consumer) {
 		error = g_access(cp1, dr, dw, de);
 		if (error) {
 			LIST_FOREACH(cp2, &gp->consumer, consumer) {
 				if (cp1 == cp2)
 					break;
 				g_access(cp2, -dr, -dw, -de);
 			}
 			break;
 		}
 	}
 	return (error);
 }
 
 /*
  * Free the softc and its substructures.
  */
 static void
 g_ccd_freesc(struct ccd_s *sc)
 {
 	struct ccdiinfo *ii;
 
 	g_free(sc->sc_cinfo);
 	if (sc->sc_itable != NULL) {
 		for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++)
 			if (ii->ii_index != NULL)
 				g_free(ii->ii_index);
 		g_free(sc->sc_itable);
 	}
 	g_free(sc);
 }
 
 
 static int
 ccdinit(struct gctl_req *req, struct ccd_s *cs)
 {
 	struct ccdcinfo *ci;
 	daddr_t size;
 	int ix;
 	daddr_t minsize;
 	int maxsecsize;
 	off_t mediasize;
 	u_int sectorsize;
 
 	cs->sc_size = 0;
 
 	maxsecsize = 0;
 	minsize = 0;
 
 	if (cs->sc_flags & CCDF_LINUX) {
 		cs->sc_offset = 0;
 		cs->sc_ileave *= 2;
 		if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2)
 			gctl_error(req, "Mirror mode for Linux raids is "
 			                "only supported with 2 devices");
 	} else {
 		if (cs->sc_flags & CCDF_NO_OFFSET)
 			cs->sc_offset = 0;
 		else
 			cs->sc_offset = CCD_OFFSET;
 
 	}
 	for (ix = 0; ix < cs->sc_ndisks; ix++) {
 		ci = &cs->sc_cinfo[ix];
 
 		mediasize = ci->ci_provider->mediasize;
 		sectorsize = ci->ci_provider->sectorsize;
 		if (sectorsize > maxsecsize)
 			maxsecsize = sectorsize;
 		size = mediasize / DEV_BSIZE - cs->sc_offset;
 
 		/* Truncate to interleave boundary */
 
 		if (cs->sc_ileave > 1)
 			size -= size % cs->sc_ileave;
 
 		if (size == 0) {
 			gctl_error(req, "Component %s has effective size zero",
 			    ci->ci_provider->name);
 			return(ENODEV);
 		}
 
 		if (minsize == 0 || size < minsize)
 			minsize = size;
 		ci->ci_size = size;
 		cs->sc_size += size;
 	}
 
 	/*
 	 * Don't allow the interleave to be smaller than
 	 * the biggest component sector.
 	 */
 	if ((cs->sc_ileave > 0) &&
 	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
 		gctl_error(req, "Interleave to small for sector size");
 		return(EINVAL);
 	}
 
 	/*
 	 * If uniform interleave is desired set all sizes to that of
 	 * the smallest component.  This will guarantee that a single
 	 * interleave table is generated.
 	 *
 	 * Lost space must be taken into account when calculating the
 	 * overall size.  Half the space is lost when CCDF_MIRROR is
 	 * specified.
 	 */
 	if (cs->sc_flags & CCDF_UNIFORM) {
 		for (ix = 0; ix < cs->sc_ndisks; ix++) {
 			ci = &cs->sc_cinfo[ix];
 			ci->ci_size = minsize;
 		}
 		cs->sc_size = cs->sc_ndisks * minsize;
 	}
 
 	if (cs->sc_flags & CCDF_MIRROR) {
 		/*
 		 * Check to see if an even number of components
 		 * have been specified.  The interleave must also
 		 * be non-zero in order for us to be able to 
 		 * guarantee the topology.
 		 */
 		if (cs->sc_ndisks % 2) {
 			gctl_error(req,
 			      "Mirroring requires an even number of disks");
 			return(EINVAL);
 		}
 		if (cs->sc_ileave == 0) {
 			gctl_error(req,
 			     "An interleave must be specified when mirroring");
 			return(EINVAL);
 		}
 		cs->sc_size = (cs->sc_ndisks/2) * minsize;
 	} 
 
 	/*
 	 * Construct the interleave table.
 	 */
 	ccdinterleave(cs);
 
 	/*
 	 * Create pseudo-geometry based on 1MB cylinders.  It's
 	 * pretty close.
 	 */
 	cs->sc_secsize = maxsecsize;
 
 	return (0);
 }
 
 static void
 ccdinterleave(struct ccd_s *cs)
 {
 	struct ccdcinfo *ci, *smallci;
 	struct ccdiinfo *ii;
 	daddr_t bn, lbn;
 	int ix;
 	daddr_t size;
 
 
 	/*
 	 * Allocate an interleave table.  The worst case occurs when each
 	 * of N disks is of a different size, resulting in N interleave
 	 * tables.
 	 *
 	 * Chances are this is too big, but we don't care.
 	 */
 	size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo);
 	cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO);
 
 	/*
 	 * Trivial case: no interleave (actually interleave of disk size).
 	 * Each table entry represents a single component in its entirety.
 	 *
 	 * An interleave of 0 may not be used with a mirror setup.
 	 */
 	if (cs->sc_ileave == 0) {
 		bn = 0;
 		ii = cs->sc_itable;
 
 		for (ix = 0; ix < cs->sc_ndisks; ix++) {
 			/* Allocate space for ii_index. */
 			ii->ii_index = g_malloc(sizeof(int), M_WAITOK);
 			ii->ii_ndisk = 1;
 			ii->ii_startblk = bn;
 			ii->ii_startoff = 0;
 			ii->ii_index[0] = ix;
 			bn += cs->sc_cinfo[ix].ci_size;
 			ii++;
 		}
 		ii->ii_ndisk = 0;
 		return;
 	}
 
 	/*
 	 * The following isn't fast or pretty; it doesn't have to be.
 	 */
 	size = 0;
 	bn = lbn = 0;
 	for (ii = cs->sc_itable; ; ii++) {
 		/*
 		 * Allocate space for ii_index.  We might allocate more then
 		 * we use.
 		 */
 		ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks),
 		    M_WAITOK);
 
 		/*
 		 * Locate the smallest of the remaining components
 		 */
 		smallci = NULL;
 		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks]; 
 		    ci++) {
 			if (ci->ci_size > size &&
 			    (smallci == NULL ||
 			     ci->ci_size < smallci->ci_size)) {
 				smallci = ci;
 			}
 		}
 
 		/*
 		 * Nobody left, all done
 		 */
 		if (smallci == NULL) {
 			ii->ii_ndisk = 0;
 			g_free(ii->ii_index);
 			ii->ii_index = NULL;
 			break;
 		}
 
 		/*
 		 * Record starting logical block using an sc_ileave blocksize.
 		 */
 		ii->ii_startblk = bn / cs->sc_ileave;
 
 		/*
 		 * Record starting component block using an sc_ileave 
 		 * blocksize.  This value is relative to the beginning of
 		 * a component disk.
 		 */
 		ii->ii_startoff = lbn;
 
 		/*
 		 * Determine how many disks take part in this interleave
 		 * and record their indices.
 		 */
 		ix = 0;
 		for (ci = cs->sc_cinfo; 
 		    ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) {
 			if (ci->ci_size >= smallci->ci_size) {
 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
 			}
 		}
 		ii->ii_ndisk = ix;
 		bn += ix * (smallci->ci_size - size);
 		lbn = smallci->ci_size / cs->sc_ileave;
 		size = smallci->ci_size;
 	}
 }
 
 static void
 g_ccd_start(struct bio *bp)
 {
 	long bcount, rcount;
 	struct bio *cbp[2];
 	caddr_t addr;
 	daddr_t bn;
 	int err;
 	struct ccd_s *cs;
 
 	cs = bp->bio_to->geom->softc;
 
 	/*
 	 * Block all GETATTR requests, we wouldn't know which of our
 	 * subdevices we should ship it off to.
 	 * XXX: this may not be the right policy.
 	 */
 	if(bp->bio_cmd == BIO_GETATTR) {
 		g_io_deliver(bp, EINVAL);
 		return;
 	}
 
 	/*
 	 * Translate the partition-relative block number to an absolute.
 	 */
 	bn = bp->bio_offset / cs->sc_secsize;
 
 	/*
 	 * Allocate component buffers and fire off the requests
 	 */
 	addr = bp->bio_data;
 	for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
 		err = ccdbuffer(cbp, cs, bp, bn, addr, bcount);
 		if (err) {
 			bp->bio_completed += bcount;
 			if (bp->bio_error == 0)
 				bp->bio_error = err;
 			if (bp->bio_completed == bp->bio_length)
 				g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		rcount = cbp[0]->bio_length;
 
 		if (cs->sc_flags & CCDF_MIRROR) {
 			/*
 			 * Mirroring.  Writes go to both disks, reads are
 			 * taken from whichever disk seems most appropriate.
 			 *
 			 * We attempt to localize reads to the disk whos arm
 			 * is nearest the read request.  We ignore seeks due
 			 * to writes when making this determination and we
 			 * also try to avoid hogging.
 			 */
 			if (cbp[0]->bio_cmd != BIO_READ) {
 				g_io_request(cbp[0], cbp[0]->bio_from);
 				g_io_request(cbp[1], cbp[1]->bio_from);
 			} else {
 				int pick = cs->sc_pick;
 				daddr_t range = cs->sc_size / 16;
 
 				if (bn < cs->sc_blk[pick] - range ||
 				    bn > cs->sc_blk[pick] + range
 				) {
 					cs->sc_pick = pick = 1 - pick;
 				}
 				cs->sc_blk[pick] = bn + btodb(rcount);
 				g_io_request(cbp[pick], cbp[pick]->bio_from);
 			}
 		} else {
 			/*
 			 * Not mirroring
 			 */
 			g_io_request(cbp[0], cbp[0]->bio_from);
 		}
 		bn += btodb(rcount);
 		addr += rcount;
 	}
 }
 
 /*
  * Build a component buffer header.
  */
 static int
 ccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
 {
 	struct ccdcinfo *ci, *ci2 = NULL;
 	struct bio *cbp;
 	daddr_t cbn, cboff;
 	off_t cbc;
 
 	/*
 	 * Determine which component bn falls in.
 	 */
 	cbn = bn;
 	cboff = 0;
 
 	if (cs->sc_ileave == 0) {
 		/*
 		 * Serially concatenated and neither a mirror nor a parity
 		 * config.  This is a special case.
 		 */
 		daddr_t sblk;
 
 		sblk = 0;
 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
 			sblk += ci->ci_size;
 		cbn -= sblk;
 	} else {
 		struct ccdiinfo *ii;
 		int ccdisk, off;
 
 		/*
 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
 		 * to cbn.
 		 */
 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
 
 		/*
 		 * Figure out which interleave table to use.
 		 */
 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
 			if (ii->ii_startblk > cbn)
 				break;
 		}
 		ii--;
 
 		/*
 		 * off is the logical superblock relative to the beginning 
 		 * of this interleave block.  
 		 */
 		off = cbn - ii->ii_startblk;
 
 		/*
 		 * We must calculate which disk component to use (ccdisk),
 		 * and recalculate cbn to be the superblock relative to
 		 * the beginning of the component.  This is typically done by
 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
 		 * must typically be divided by the number of components in
 		 * this interleave array to be properly convert it from a
 		 * CCD-relative logical superblock number to a 
 		 * component-relative superblock number.
 		 */
 		if (ii->ii_ndisk == 1) {
 			/*
 			 * When we have just one disk, it can't be a mirror
 			 * or a parity config.
 			 */
 			ccdisk = ii->ii_index[0];
 			cbn = ii->ii_startoff + off;
 		} else {
 			if (cs->sc_flags & CCDF_MIRROR) {
 				/*
 				 * We have forced a uniform mapping, resulting
 				 * in a single interleave array.  We double
 				 * up on the first half of the available
 				 * components and our mirror is in the second
 				 * half.  This only works with a single 
 				 * interleave array because doubling up
 				 * doubles the number of sectors, so there
 				 * cannot be another interleave array because
 				 * the next interleave array's calculations
 				 * would be off.
 				 */
 				int ndisk2 = ii->ii_ndisk / 2;
 				ccdisk = ii->ii_index[off % ndisk2];
 				cbn = ii->ii_startoff + off / ndisk2;
 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
 			} else {
 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
 			}
 		}
 
 		ci = &cs->sc_cinfo[ccdisk];
 
 		/*
 		 * Convert cbn from a superblock to a normal block so it
 		 * can be used to calculate (along with cboff) the normal
 		 * block index into this particular disk.
 		 */
 		cbn *= cs->sc_ileave;
 	}
 
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL)
 		return (ENOMEM);
 	cbp->bio_done = g_std_done;
 	cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset);
 	cbp->bio_data = addr;
 	if (cs->sc_ileave == 0)
               cbc = dbtob((off_t)(ci->ci_size - cbn));
 	else
               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
 	cbp->bio_length = (cbc < bcount) ? cbc : bcount;
 
 	cbp->bio_from = ci->ci_consumer;
 	cb[0] = cbp;
 
 	if (cs->sc_flags & CCDF_MIRROR) {
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL)
 			return (ENOMEM);
 		cbp->bio_done = cb[0]->bio_done = ccdiodone;
 		cbp->bio_offset = cb[0]->bio_offset;
 		cbp->bio_data = cb[0]->bio_data;
 		cbp->bio_length = cb[0]->bio_length;
 		cbp->bio_from = ci2->ci_consumer;
 		cbp->bio_caller1 = cb[0];
 		cb[0]->bio_caller1 = cbp;
 		cb[1] = cbp;
 	}
 	return (0);
 }
 
 /*
  * Called only for mirrored operations.
  */
 static void
 ccdiodone(struct bio *cbp)
 {
 	struct bio *mbp, *pbp;
 
 	mbp = cbp->bio_caller1;
 	pbp = cbp->bio_parent;
 
 	if (pbp->bio_cmd == BIO_READ) {
 		if (cbp->bio_error == 0) {
 			/* We will not be needing the partner bio */
 			if (mbp != NULL) {
 				pbp->bio_inbed++;
 				g_destroy_bio(mbp);
 			}
 			g_std_done(cbp);
 			return;
 		}
 		if (mbp != NULL) {
 			/* Try partner the bio instead */
 			mbp->bio_caller1 = NULL;
 			pbp->bio_inbed++;
 			g_destroy_bio(cbp);
 			g_io_request(mbp, mbp->bio_from);
 			/*
 			 * XXX: If this comes back OK, we should actually
 			 * try to write the good data on the failed mirror
 			 */
 			return;
 		}
 		g_std_done(cbp);
 		return;
 	}
 	if (mbp != NULL) {
 		mbp->bio_caller1 = NULL;
 		pbp->bio_inbed++;
 		if (cbp->bio_error != 0 && pbp->bio_error == 0)
 			pbp->bio_error = cbp->bio_error;
 		g_destroy_bio(cbp);
 		return;
 	}
 	g_std_done(cbp);
 }
 
 static void
 g_ccd_create(struct gctl_req *req, struct g_class *mp)
 {
 	int *unit, *ileave, *nprovider;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	struct ccd_s *sc;
 	struct sbuf *sb;
 	char buf[20];
 	int i, error;
 
 	g_topology_assert();
 	unit = gctl_get_paraml(req, "unit", sizeof (*unit));
 	if (unit == NULL) {
 		gctl_error(req, "unit parameter not given");
 		return;
 	}
 	ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave));
 	if (ileave == NULL) {
 		gctl_error(req, "ileave parameter not given");
 		return;
 	}
 	nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider));
 	if (nprovider == NULL) {
 		gctl_error(req, "nprovider parameter not given");
 		return;
 	}
 
 	/* Check for duplicate unit */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && sc->sc_unit == *unit) {
 			gctl_error(req, "Unit %d already configured", *unit);
 			return;
 		}
 	}
 
 	if (*nprovider <= 0) {
 		gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider);
 		return;
 	}
 
 	/* Check all providers are valid */
 	for (i = 0; i < *nprovider; i++) {
 		sprintf(buf, "provider%d", i);
 		pp = gctl_get_provider(req, buf);
 		if (pp == NULL)
 			return;
 	}
 
 	gp = g_new_geomf(mp, "ccd%d", *unit);
 	sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
 	gp->softc = sc;
 	sc->sc_ndisks = *nprovider;
 
 	/* Allocate space for the component info. */
 	sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo),
 	    M_WAITOK | M_ZERO);
 
 	/* Create consumers and attach to all providers */
 	for (i = 0; i < *nprovider; i++) {
 		sprintf(buf, "provider%d", i);
 		pp = gctl_get_provider(req, buf);
 		cp = g_new_consumer(gp);
 		error = g_attach(cp, pp);
 		KASSERT(error == 0, ("attach to %s failed", pp->name));
 		sc->sc_cinfo[i].ci_consumer = cp;
 		sc->sc_cinfo[i].ci_provider = pp;
 	}
 
 	sc->sc_unit = *unit;
 	sc->sc_ileave = *ileave;
 
 	if (gctl_get_param(req, "no_offset", NULL))
 		sc->sc_flags |= CCDF_NO_OFFSET;
 	if (gctl_get_param(req, "linux", NULL))
 		sc->sc_flags |= CCDF_LINUX;
 
 	if (gctl_get_param(req, "uniform", NULL))
 		sc->sc_flags |= CCDF_UNIFORM;
 	if (gctl_get_param(req, "mirror", NULL))
 		sc->sc_flags |= CCDF_MIRROR;
 
 	if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) {
 		printf("%s: disabling mirror, interleave is 0\n", gp->name);
 		sc->sc_flags &= ~(CCDF_MIRROR);
 	}
 
 	if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) {
 		printf("%s: mirror/parity forces uniform flag\n", gp->name);
 		sc->sc_flags |= CCDF_UNIFORM;
 	}
 
 	error = ccdinit(req, sc);
 	if (error != 0) {
 		g_ccd_freesc(sc);
 		gp->softc = NULL;
 		g_wither_geom(gp, ENXIO);
 		return;
 	}
 
 	pp = g_new_providerf(gp, "%s", gp->name);
 	pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize;
 	pp->sectorsize = sc->sc_secsize;
 	g_error_provider(pp, 0);
 
 	sb = sbuf_new_auto();
 	sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider);
 	for (i = 0; i < *nprovider; i++) {
 		sbuf_printf(sb, "%s%s",
 		    i == 0 ? "(" : ", ", 
 		    sc->sc_cinfo[i].ci_provider->name);
 	}
 	sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE);
 	if (sc->sc_ileave != 0)
 		sbuf_printf(sb, "interleaved at %d blocks\n",
 			sc->sc_ileave);
 	else
 		sbuf_printf(sb, "concatenated\n");
 	sbuf_finish(sb);
 	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
 	sbuf_delete(sb);
 }
 
 static int
 g_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 	struct g_provider *pp;
 	struct ccd_s *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 	pp = LIST_FIRST(&gp->provider);
 	if (sc == NULL || pp == NULL)
 		return (EBUSY);
 	if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
 		gctl_error(req, "%s is open(r%dw%de%d)", gp->name,
 		    pp->acr, pp->acw, pp->ace);
 		return (EBUSY);
 	}
 	g_ccd_freesc(sc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static void
 g_ccd_list(struct gctl_req *req, struct g_class *mp)
 {
 	struct sbuf *sb;
 	struct ccd_s *cs;
 	struct g_geom *gp;
 	int i, unit, *up;
 
 	up = gctl_get_paraml(req, "unit", sizeof (*up));
 	if (up == NULL) {
 		gctl_error(req, "unit parameter not given");
 		return;
 	}
 	unit = *up;
 	sb = sbuf_new_auto();
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		cs = gp->softc;
 		if (cs == NULL || (unit >= 0 && unit != cs->sc_unit))
 			continue;
 		sbuf_printf(sb, "ccd%d\t\t%d\t%d\t",
 		    cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK);
 			
 		for (i = 0; i < cs->sc_ndisks; ++i) {
 			sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ",
 			    cs->sc_cinfo[i].ci_provider->name);
 		}
 		sbuf_printf(sb, "\n");
 	}
 	sbuf_finish(sb);
 	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
 	sbuf_delete(sb);
 }
 
 static void
 g_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
 {
 	struct g_geom *gp;
 
 	g_topology_assert();
 	if (!strcmp(verb, "create geom")) {
 		g_ccd_create(req, mp);
 	} else if (!strcmp(verb, "destroy geom")) {
 		gp = gctl_get_geom(req, mp, "geom");
 		if (gp != NULL)
 		g_ccd_destroy_geom(req, mp, gp);
 	} else if (!strcmp(verb, "list")) {
 		g_ccd_list(req, mp);
 	} else {
 		gctl_error(req, "unknown verb");
 	}
 }
 
 static struct g_class g_ccd_class = {
 	.name = "CCD",
 	.version = G_VERSION,
 	.ctlreq = g_ccd_config,
 	.destroy_geom = g_ccd_destroy_geom,
 	.start = g_ccd_start,
 	.orphan = g_ccd_orphan,
 	.access = g_ccd_access,
 };
 
 DECLARE_GEOM_CLASS(g_ccd_class, g_ccd);
+MODULE_VERSION(geom_ccd, 0);
Index: stable/11/sys/geom/geom_fox.c
===================================================================
--- stable/11/sys/geom/geom_fox.c	(revision 332639)
+++ stable/11/sys/geom/geom_fox.c	(revision 332640)
@@ -1,485 +1,486 @@
 /*-
  * Copyright (c) 2003 Poul-Henning Kamp
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /* This is a GEOM module for handling path selection for multi-path
  * storage devices.  It is named "fox" because it, like they, prefer
  * to have multiple exits to choose from.
  *
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/bio.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/libkern.h>
 #include <sys/endian.h>
 #include <sys/md5.h>
 #include <sys/errno.h>
 #include <geom/geom.h>
 
 #define FOX_CLASS_NAME "FOX"
 #define FOX_MAGIC	"GEOM::FOX"
 
 static int g_fox_once;
 
 FEATURE(geom_fox, "GEOM FOX redundant path mitigation support");
 
 struct g_fox_softc {
 	off_t			mediasize;
 	u_int			sectorsize;
 	TAILQ_HEAD(, bio)	queue;
 	struct mtx		lock;
 	u_char 			magic[16];
 	struct g_consumer 	*path;
 	struct g_consumer 	*opath;
 	int			waiting;
 	int			cr, cw, ce;
 };
 
 /*
  * This function is called whenever we need to select a new path.
  */
 static void
 g_fox_select_path(void *arg, int flag)
 {
 	struct g_geom *gp;
 	struct g_fox_softc *sc;
 	struct g_consumer *cp1;
 	struct bio *bp;
 	int error;
 
 	g_topology_assert();
 	if (flag == EV_CANCEL)
 		return;
 	gp = arg;
 	sc = gp->softc;
 
 	if (sc->opath != NULL) {
 		/*
 		 * First, close the old path entirely.
 		 */
 		printf("Closing old path (%s) on fox (%s)\n",
 			sc->opath->provider->name, gp->name);
 
 		cp1 = LIST_NEXT(sc->opath, consumer);
 
 		g_access(sc->opath, -sc->cr, -sc->cw, -(sc->ce + 1));
 
 		/*
 		 * The attempt to reopen it with a exclusive count
 		 */
 		error = g_access(sc->opath, 0, 0, 1);
 		if (error) {
 			/*
 			 * Ok, ditch this consumer, we can't use it.
 			 */
 			printf("Drop old path (%s) on fox (%s)\n",
 				sc->opath->provider->name, gp->name);
 			g_detach(sc->opath);
 			g_destroy_consumer(sc->opath);
 			if (LIST_EMPTY(&gp->consumer)) {
 				/* No consumers left */
 				g_wither_geom(gp, ENXIO);
 				for (;;) {
 					bp = TAILQ_FIRST(&sc->queue);
 					if (bp == NULL)
 						break;
 					TAILQ_REMOVE(&sc->queue, bp, bio_queue);
 					bp->bio_error = ENXIO;
 					g_std_done(bp);
 				}
 				return;
 			}
 		} else {
 			printf("Got e-bit on old path (%s) on fox (%s)\n",
 				sc->opath->provider->name, gp->name);
 		}
 		sc->opath = NULL;
 	} else {
 		cp1 = LIST_FIRST(&gp->consumer);
 	}
 	if (cp1 == NULL)
 		cp1 = LIST_FIRST(&gp->consumer);
 	printf("Open new path (%s) on fox (%s)\n",
 		cp1->provider->name, gp->name);
 	error = g_access(cp1, sc->cr, sc->cw, sc->ce);
 	if (error) {
 		/*
 		 * If we failed, we take another trip through here
 		 */
 		printf("Open new path (%s) on fox (%s) failed, reselect.\n",
 			cp1->provider->name, gp->name);
 		sc->opath = cp1;
 		g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL);
 	} else {
 		printf("Open new path (%s) on fox (%s) succeeded\n",
 			cp1->provider->name, gp->name);
 		mtx_lock(&sc->lock);
 		sc->path = cp1;
 		sc->waiting = 0;
 		for (;;) {
 			bp = TAILQ_FIRST(&sc->queue);
 			if (bp == NULL)
 				break;
 			TAILQ_REMOVE(&sc->queue, bp, bio_queue);
 			g_io_request(bp, sc->path);
 		}
 		mtx_unlock(&sc->lock);
 	}
 }
 
 static void
 g_fox_orphan(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct g_fox_softc *sc;
 	int error, mark;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	printf("Removing path (%s) from fox (%s)\n",
 	    cp->provider->name, gp->name);
 	mtx_lock(&sc->lock);
 	if (cp == sc->path) {
 		sc->opath = NULL;
 		sc->path = NULL;
 		sc->waiting = 1;
 		mark = 1;
 	} else {
 		mark = 0;
 	}
 	mtx_unlock(&sc->lock);
 	    
 	g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	error = cp->provider->error;
 	g_detach(cp);
 	g_destroy_consumer(cp);	
 	if (!LIST_EMPTY(&gp->consumer)) {
 		if (mark)
 			g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL);
 		return;
 	}
 
 	mtx_destroy(&sc->lock);
 	g_free(gp->softc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 }
 
 static void
 g_fox_done(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct g_fox_softc *sc;
 	int error;
 
 	if (bp->bio_error == 0) {
 		g_std_done(bp);
 		return;
 	}
 	gp = bp->bio_from->geom;
 	sc = gp->softc;
 	if (bp->bio_from != sc->path) {
 		g_io_request(bp, sc->path);
 		return;
 	}
 	mtx_lock(&sc->lock);
 	sc->opath = sc->path;
 	sc->path = NULL;
 	error = g_post_event(g_fox_select_path, gp, M_NOWAIT, gp, NULL);
 	if (error) {
 		bp->bio_error = ENOMEM;
 		g_std_done(bp);
 	} else {
 		sc->waiting = 1;
 		TAILQ_INSERT_TAIL(&sc->queue, bp, bio_queue);
 	}
 	mtx_unlock(&sc->lock);
 }
 
 static void
 g_fox_start(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct bio *bp2;
 	struct g_fox_softc *sc;
 	int error;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 	if (sc == NULL) {
 		g_io_deliver(bp, ENXIO);
 		return;
 	}
 	switch(bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		bp2 = g_clone_bio(bp);
 		if (bp2 == NULL) {
 			g_io_deliver(bp, ENOMEM);
 			break;
 		}
 		bp2->bio_offset += sc->sectorsize;
 		bp2->bio_done = g_fox_done;
 		mtx_lock(&sc->lock);
 		if (sc->path == NULL || !TAILQ_EMPTY(&sc->queue)) {
 			if (sc->waiting == 0) {
 				error = g_post_event(g_fox_select_path, gp,
 				    M_NOWAIT, gp, NULL);
 				if (error) {
 					g_destroy_bio(bp2);
 					bp2 = NULL;
 					g_io_deliver(bp, error);
 				} else {
 					sc->waiting = 1;
 				}
 			}
 			if (bp2 != NULL)
 				TAILQ_INSERT_TAIL(&sc->queue, bp2,
 				    bio_queue);
 		} else {
 			g_io_request(bp2, sc->path);
 		}
 		mtx_unlock(&sc->lock);
 		break;
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		break;
 	}
 	return;
 }
 
 static int
 g_fox_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_fox_softc *sc;
 	struct g_consumer *cp1;
 	int error;
 
 	g_topology_assert();
 	gp = pp->geom;
 	sc = gp->softc;
 	if (sc == NULL) {
 		if (dr <= 0 && dw <= 0 && de <= 0)
 			return (0);
 		else
 			return (ENXIO);
 	}
 
 	if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) {
 		/*
 		 * First open, open all consumers with an exclusive bit
 		 */
 		error = 0;
 		LIST_FOREACH(cp1, &gp->consumer, consumer) {
 			error = g_access(cp1, 0, 0, 1);
 			if (error) {
 				printf("FOX: access(%s,0,0,1) = %d\n",
 				    cp1->provider->name, error);
 				break;
 			}
 		}
 		if (error) {
 			LIST_FOREACH(cp1, &gp->consumer, consumer) {
 				if (cp1->ace)
 					g_access(cp1, 0, 0, -1);
 			}
 			return (error);
 		}
 	}
 	if (sc->path == NULL)
 		g_fox_select_path(gp, 0);
 	if (sc->path == NULL)
 		error = ENXIO;
 	else
 		error = g_access(sc->path, dr, dw, de);
 	if (error == 0) {
 		sc->cr += dr;
 		sc->cw += dw;
 		sc->ce += de;
 		if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) {
 			/*
 			 * Last close, remove e-bit on all consumers
 			 */
 			LIST_FOREACH(cp1, &gp->consumer, consumer)
 				g_access(cp1, 0, 0, -1);
 		}
 	}
 	return (error);
 }
 
 static struct g_geom *
 g_fox_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_geom *gp, *gp2;
 	struct g_provider *pp2;
 	struct g_consumer *cp, *cp2;
 	struct g_fox_softc *sc, *sc2;
 	int error;
 	u_int sectorsize;
 	u_char *buf;
 
 	g_trace(G_T_TOPOLOGY, "fox_taste(%s, %s)", mp->name, pp->name);
 	g_topology_assert();
 	if (!strcmp(pp->geom->class->name, mp->name))
 		return (NULL);
 	gp = g_new_geomf(mp, "%s.fox", pp->name);
 	gp->softc = g_malloc(sizeof(struct g_fox_softc), M_WAITOK | M_ZERO);
 	sc = gp->softc;
 
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_access(cp, 1, 0, 0);
 	if (error) {
 		g_free(sc);
 		g_detach(cp);
 		g_destroy_consumer(cp);	
 		g_destroy_geom(gp);
 		return(NULL);
 	}
 	do {
 		sectorsize = cp->provider->sectorsize;
 		g_topology_unlock();
 		buf = g_read_data(cp, 0, sectorsize, NULL);
 		g_topology_lock();
 		if (buf == NULL)
 			break;
 		if (memcmp(buf, FOX_MAGIC, strlen(FOX_MAGIC)))
 			break;
 
 		/*
 		 * First we need to see if this a new path for an existing fox.
 		 */
 		LIST_FOREACH(gp2, &mp->geom, geom) {
 			sc2 = gp2->softc;
 			if (sc2 == NULL)
 				continue;
 			if (memcmp(buf + 16, sc2->magic, sizeof sc2->magic))
 				continue;
 			break;
 		}
 		if (gp2 != NULL) {
 			/*
 			 * It was.  Create a new consumer for that fox,
 			 * attach it, and if the fox is open, open this
 			 * path with an exclusive count of one.
 			 */
 			printf("Adding path (%s) to fox (%s)\n",
 			    pp->name, gp2->name);
 			cp2 = g_new_consumer(gp2);
 			g_attach(cp2, pp);
 			pp2 = LIST_FIRST(&gp2->provider);
 			if (pp2->acr > 0 || pp2->acw > 0 || pp2->ace > 0) {
 				error = g_access(cp2, 0, 0, 1);
 				if (error) {
 					/*
 					 * This is bad, or more likely,
 					 * the user is doing something stupid
 					 */
 					printf(
 	"WARNING: New path (%s) to fox(%s) not added: %s\n%s",
 					    cp2->provider->name, gp2->name,
 	"Could not get exclusive bit.",
 	"WARNING: This indicates a risk of data inconsistency."
 					);
 					g_detach(cp2);
 					g_destroy_consumer(cp2);
 				}
 			}
 			break;
 		}
 		printf("Creating new fox (%s)\n", pp->name);
 		sc->path = cp;
 		memcpy(sc->magic, buf + 16, sizeof sc->magic);
 		pp2 = g_new_providerf(gp, "%s", gp->name);
 		pp2->mediasize = sc->mediasize = pp->mediasize - pp->sectorsize;
 		pp2->sectorsize = sc->sectorsize = pp->sectorsize;
 printf("fox %s lock %p\n", gp->name, &sc->lock);
 
 		mtx_init(&sc->lock, "fox queue", NULL, MTX_DEF);
 		TAILQ_INIT(&sc->queue);
 		g_error_provider(pp2, 0);
 	} while (0);
 	if (buf != NULL)
 		g_free(buf);
 	g_access(cp, -1, 0, 0);
 
 	if (!LIST_EMPTY(&gp->provider)) {
 		if (!g_fox_once) {
 			g_fox_once = 1;
 			printf(
 			    "WARNING: geom_fox (geom %s) is deprecated, "
 			    "use gmultipath instead.\n", gp->name);
 		}
 		return (gp);
 	}
 
 	g_free(gp->softc);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	return (NULL);
 }
 
 static int
 g_fox_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 	struct g_fox_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 	mtx_destroy(&sc->lock);
 	g_free(gp->softc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static struct g_class g_fox_class	= {
 	.name = FOX_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_fox_taste,
 	.destroy_geom = g_fox_destroy_geom,
 	.start = g_fox_start,
 	.spoiled = g_fox_orphan,
 	.orphan = g_fox_orphan,
 	.access= g_fox_access,
 };
 
 DECLARE_GEOM_CLASS(g_fox_class, g_fox);
+MODULE_VERSION(geom_fox, 0);
Index: stable/11/sys/geom/geom_map.c
===================================================================
--- stable/11/sys/geom/geom_map.c	(revision 332639)
+++ stable/11/sys/geom/geom_map.c	(revision 332640)
@@ -1,407 +1,408 @@
 /*-
  * Copyright (c) 2010-2011 Aleksandr Rybalko <ray@dlink.ua>
  *   based on geom_redboot.c
  * Copyright (c) 2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/errno.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sbuf.h>
 
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 
 #define	MAP_CLASS_NAME	"MAP"
 #define	MAP_MAXSLICE	64
 #define	MAP_MAX_MARKER_LEN	64
 
 struct g_map_softc {
 	off_t		 offset[MAP_MAXSLICE];	/* offset in flash */
 	off_t		 size[MAP_MAXSLICE];	/* image size in bytes */
 	off_t		 entry[MAP_MAXSLICE];
 	off_t		 dsize[MAP_MAXSLICE];
 	uint8_t		 readonly[MAP_MAXSLICE];
 	g_access_t	*parent_access;
 };
 
 static int
 g_map_access(struct g_provider *pp, int dread, int dwrite, int dexcl)
 {
 	struct g_geom *gp;
 	struct g_slicer *gsp;
 	struct g_map_softc *sc;
 
 	gp = pp->geom;
 	gsp = gp->softc;
 	sc = gsp->softc;
 
 	if (dwrite > 0 && sc->readonly[pp->index])
 		return (EPERM);
 
 	return (sc->parent_access(pp, dread, dwrite, dexcl)); 
 }
 
 static int
 g_map_start(struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	struct g_map_softc *sc;
 	struct g_slicer *gsp;
 	int idx;
 
 	pp = bp->bio_to;
 	idx = pp->index;
 	gp = pp->geom;
 	gsp = gp->softc;
 	sc = gsp->softc;
 
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr_int(bp, MAP_CLASS_NAME "::entry",
 		    sc->entry[idx])) {
 			return (1);
 		}
 		if (g_handleattr_int(bp, MAP_CLASS_NAME "::dsize",
 		    sc->dsize[idx])) {
 			return (1);
 		}
 	}
 
 	return (0);
 }
 
 static void
 g_map_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp __unused, struct g_provider *pp)
 {
 	struct g_map_softc *sc;
 	struct g_slicer *gsp;
 
 	gsp = gp->softc;
 	sc = gsp->softc;
 	g_slice_dumpconf(sb, indent, gp, cp, pp);
 	if (pp != NULL) {
 		if (indent == NULL) {
 			sbuf_printf(sb, " entry %jd", (intmax_t)sc->entry[pp->index]);
 			sbuf_printf(sb, " dsize %jd", (intmax_t)sc->dsize[pp->index]);
 		} else {
 			sbuf_printf(sb, "%s<entry>%jd</entry>\n", indent,
 			    (intmax_t)sc->entry[pp->index]);
 			sbuf_printf(sb, "%s<dsize>%jd</dsize>\n", indent,
 			    (intmax_t)sc->dsize[pp->index]);
 		}
 	}
 }
 
 static int
 find_marker(struct g_consumer *cp, const char *line, off_t *offset)
 {
 	off_t search_start, search_offset, search_step;
 	size_t sectorsize;
 	uint8_t *buf;
 	char *op, key[MAP_MAX_MARKER_LEN], search_key[MAP_MAX_MARKER_LEN];
 	int ret, c;
 
 	/* Try convert to numeric first */
 	*offset = strtouq(line, &op, 0);
 	if (*op == '\0') 
 		return (0);
 
 	bzero(search_key, MAP_MAX_MARKER_LEN);
 	sectorsize = cp->provider->sectorsize;
 
 #ifdef __LP64__
 	ret = sscanf(line, "search:%li:%li:%63c",
 	    &search_start, &search_step, search_key);
 #else
 	ret = sscanf(line, "search:%qi:%qi:%63c",
 	    &search_start, &search_step, search_key);
 #endif
 	if (ret < 3)
 		return (1);
 
 	if (bootverbose) {
 		printf("MAP: search %s for key \"%s\" from 0x%jx, step 0x%jx\n",
 		    cp->geom->name, search_key, (intmax_t)search_start, (intmax_t)search_step);
 	}
 
 	/* error if search_key is empty */
 	if (strlen(search_key) < 1)
 		return (1);
 
 	/* sscanf successful, and we start marker search */
 	for (search_offset = search_start;
 	     search_offset < cp->provider->mediasize;
 	     search_offset += search_step) {
 
 		g_topology_unlock();
 		buf = g_read_data(cp, rounddown(search_offset, sectorsize),
 		    roundup(strlen(search_key), sectorsize), NULL);
 		g_topology_lock();
 
 		/*
 		 * Don't bother doing the rest if buf==NULL; eg derefencing
 		 * to assemble 'key'.
 		 */
 		if (buf == NULL)
 			continue;
 
 		/* Wildcard, replace '.' with byte from data */
 		/* TODO: add support wildcard escape '\.' */
 
 		strncpy(key, search_key, MAP_MAX_MARKER_LEN);
 
 		for (c = 0; c < MAP_MAX_MARKER_LEN && key[c]; c++) {
 			if (key[c] == '.') {
 				key[c] = ((char *)(buf + 
 				    (search_offset % sectorsize)))[c];
 			}
 		}
 
 		/* Assume buf != NULL here */
 		if (memcmp(buf + search_offset % sectorsize,
 		    key, strlen(search_key)) == 0) {
 			g_free(buf);
 			/* Marker found, so return their offset */
 			*offset = search_offset;
 			return (0);
 		}
 		g_free(buf);
 	}
 
 	/* Marker not found */
 	return (1);
 }
 
 static int
 g_map_parse_part(struct g_class *mp, struct g_provider *pp,
     struct g_consumer *cp, struct g_geom *gp, struct g_map_softc *sc, int i)
 {
 	const char *value, *name;
 	char *op;
 	off_t start, end, offset, size, dsize;
 	int readonly, ret;
 
 	/* hint.map.0.at="cfid0" - bind to cfid0 media */
 	if (resource_string_value("map", i, "at", &value) != 0)
 		return (1);
 
 	/* Check if this correct provider */
 	if (strcmp(pp->name, value) != 0)
 		return (1);
 
 	/*
 	 * hint.map.0.name="uboot" - name of partition, will be available
 	 * as "/dev/map/uboot"
 	 */
 	if (resource_string_value("map", i, "name", &name) != 0) {
 		if (bootverbose)
 			printf("MAP: hint.map.%d has no name\n", i);
 		return (1);
 	}
 
 	/*
 	 * hint.map.0.start="0x00010000" - partition start at 0x00010000
 	 * or hint.map.0.start="search:0x00010000:0x200:marker text" -
 	 * search for text "marker text", begin at 0x10000, step 0x200
 	 * until we found marker or end of media reached
 	 */ 
 	if (resource_string_value("map", i, "start", &value) != 0) {
 		if (bootverbose)
 			printf("MAP: \"%s\" has no start value\n", name);
 		return (1);
 	}
 	if (find_marker(cp, value, &start) != 0) {
 		if (bootverbose) {
 			printf("MAP: \"%s\" can't parse/use start value\n",
 			    name);
 		}
 		return (1);
 	}
 
 	/* like "start" */
 	if (resource_string_value("map", i, "end", &value) != 0) {
 		if (bootverbose)
 			printf("MAP: \"%s\" has no end value\n", name);
 		return (1);
 	}
 	if (find_marker(cp, value, &end) != 0) {
 		if (bootverbose) {
 			printf("MAP: \"%s\" can't parse/use end value\n",
 			    name);
 		}
 		return (1);
 	}
 
 	/* variable readonly optional, disable write access */
 	if (resource_int_value("map", i, "readonly", &readonly) != 0)
 		readonly = 0;
 
 	/* offset of partition data, from partition begin */
 	if (resource_string_value("map", i, "offset", &value) == 0) {
 		offset = strtouq(value, &op, 0);
 		if (*op != '\0') {
 			if (bootverbose) {
 				printf("MAP: \"%s\" can't parse offset\n",
 				    name);
 			}
 			return (1);
 		}
 	} else {
 		offset = 0;
 	}
 
 	/* partition data size */
 	if (resource_string_value("map", i, "dsize", &value) == 0) {
 		dsize = strtouq(value, &op, 0);
 		if (*op != '\0') {
 			if (bootverbose) {
 				printf("MAP: \"%s\" can't parse dsize\n", 
 				    name);
 			}
 			return (1);
 		}
 	} else {
 		dsize = 0;
 	}
 
 	size = end - start;
 	if (dsize == 0)
 		dsize = size - offset;
 
 	/* end is 0 or size is 0, No MAP - so next */
 	if (end < start) {
 		if (bootverbose) {
 			printf("MAP: \"%s\", \"end\" less than "
 			    "\"start\"\n", name);
 		}
 		return (1);
 	}
 
 	if (offset + dsize > size) {
 		if (bootverbose) {
 			printf("MAP: \"%s\", \"dsize\" bigger than "
 			    "partition - offset\n", name);
 		}
 		return (1);
 	}
 
 	ret = g_slice_config(gp, i, G_SLICE_CONFIG_SET, start + offset,
 	    dsize, cp->provider->sectorsize, "map/%s", name);
 	if (ret != 0) {
 		if (bootverbose) {
 			printf("MAP: g_slice_config returns %d for \"%s\"\n", 
 			    ret, name);
 		}
 		return (1);
 	}
 
 	if (bootverbose) {
 		printf("MAP: %s: %jxx%jx, data=%jxx%jx "
 		    "\"/dev/map/%s\"\n",
 		    cp->geom->name, (intmax_t)start, (intmax_t)size, (intmax_t)offset,
 		    (intmax_t)dsize, name);
 	}
 
 	sc->offset[i] = start;
 	sc->size[i] = size;
 	sc->entry[i] = offset;
 	sc->dsize[i] = dsize;
 	sc->readonly[i] = readonly ? 1 : 0;
 
 	return (0);
 }
 
 static struct g_geom *
 g_map_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
 {
 	struct g_map_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int i;
 
 	g_trace(G_T_TOPOLOGY, "map_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 	if (strcmp(pp->geom->class->name, MAP_CLASS_NAME) == 0)
 		return (NULL);
 
 	gp = g_slice_new(mp, MAP_MAXSLICE, pp, &cp, &sc, sizeof(*sc),
 	    g_map_start);
 	if (gp == NULL)
 		return (NULL);
 
 	/* interpose our access method */
 	sc->parent_access = gp->access;
 	gp->access = g_map_access;
 
 	for (i = 0; i < MAP_MAXSLICE; i++)
 		g_map_parse_part(mp, pp, cp, gp, sc, i);
 
 
 	g_access(cp, -1, 0, 0);
 	if (LIST_EMPTY(&gp->provider)) {
 		if (bootverbose)
 			printf("MAP: No valid partition found at %s\n", pp->name);
 		g_slice_spoiled(cp);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_map_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = gctl_get_geom(req, mp, "geom");
 	if (gp == NULL)
 		return;
 	gctl_error(req, "Unknown verb");
 }
 
 static struct g_class g_map_class = {
 	.name = MAP_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_map_taste,
 	.dumpconf = g_map_dumpconf,
 	.ctlreq = g_map_config,
 };
 DECLARE_GEOM_CLASS(g_map_class, g_map);
+MODULE_VERSION(geom_map, 0);
Index: stable/11/sys/geom/geom_mbr.c
===================================================================
--- stable/11/sys/geom/geom_mbr.c	(revision 332639)
+++ stable/11/sys/geom/geom_mbr.c	(revision 332640)
@@ -1,528 +1,529 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/md5.h>
 #include <sys/proc.h>
 
 #include <sys/diskmbr.h>
 #include <sys/sbuf.h>
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 
 FEATURE(geom_mbr, "GEOM DOS/MBR partitioning support");
 
 #define MBR_CLASS_NAME "MBR"
 #define MBREXT_CLASS_NAME "MBREXT"
 
 static int g_mbr_once = 0;
 
 static struct dos_partition historical_bogus_partition_table[NDOSPART] = {
         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
         { 0x80, 0, 1, 0, DOSPTYP_386BSD, 255, 255, 255, 0, 50000, },
 };
 
 static struct dos_partition historical_bogus_partition_table_fixed[NDOSPART] = {
         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
         { 0x80, 0, 1, 0, DOSPTYP_386BSD, 254, 255, 255, 0, 50000, },
 };
 
 static void
 g_mbr_print(int i, struct dos_partition *dp)
 {
 
 	printf("[%d] f:%02x typ:%d", i, dp->dp_flag, dp->dp_typ);
 	printf(" s(CHS):%d/%d/%d", DPCYL(dp->dp_scyl, dp->dp_ssect),
 	    dp->dp_shd, DPSECT(dp->dp_ssect));
 	printf(" e(CHS):%d/%d/%d", DPCYL(dp->dp_ecyl, dp->dp_esect),
 	    dp->dp_ehd, DPSECT(dp->dp_esect));
 	printf(" s:%d l:%d\n", dp->dp_start, dp->dp_size);
 }
 
 struct g_mbr_softc {
 	int		type [NDOSPART];
 	u_int		sectorsize;
 	u_char		sec0[512];
 	u_char		slicesum[16];
 };
 
 /*
  * XXX: Add gctl_req arg and give good error msgs.
  * XXX: Check that length argument does not bring boot code inside any slice.
  */
 static int
 g_mbr_modify(struct g_geom *gp, struct g_mbr_softc *ms, u_char *sec0, int len __unused)
 {
 	int i, error;
 	off_t l[NDOSPART];
 	struct dos_partition ndp[NDOSPART], *dp;
 	MD5_CTX md5sum;
 
 	g_topology_assert();
 
 	if (sec0[0x1fe] != 0x55 && sec0[0x1ff] != 0xaa)
 		return (EBUSY);
 
 	dp = ndp;
 	for (i = 0; i < NDOSPART; i++) {
 		dos_partition_dec(
 		    sec0 + DOSPARTOFF + i * sizeof(struct dos_partition),
 		    dp + i);
 	}
 	if ((!bcmp(dp, historical_bogus_partition_table,
 	    sizeof historical_bogus_partition_table)) ||
 	    (!bcmp(dp, historical_bogus_partition_table_fixed,
 	    sizeof historical_bogus_partition_table_fixed))) {
 		/*
 		 * We will not allow people to write these from "the inside",
 		 * Since properly selfdestructing takes too much code.  If 
 		 * people really want to do this, they cannot have any
 		 * providers of this geom open, and in that case they can just
 		 * as easily overwrite the MBR in the parent device.
 		 */
 		return(EBUSY);
 	}
 	for (i = 0; i < NDOSPART; i++) {
 		/* 
 		 * A Protective MBR (PMBR) has a single partition of
 		 * type 0xEE spanning the whole disk. Such a MBR
 		 * protects a GPT on the disk from MBR tools that
 		 * don't know anything about GPT. We're interpreting
 		 * it a bit more loosely: any partition of type 0xEE
 		 * is to be skipped as it doesn't contain any data
 		 * that we should care about. We still allow other
 		 * partitions to be present in the MBR. A PMBR will
 		 * be handled correctly anyway.
 		 */
 		if (dp[i].dp_typ == DOSPTYP_PMBR)
 			l[i] = 0;
 		else if (dp[i].dp_flag != 0 && dp[i].dp_flag != 0x80)
 			l[i] = 0;
 		else if (dp[i].dp_typ == 0)
 			l[i] = 0;
 		else
 			l[i] = (off_t)dp[i].dp_size * ms->sectorsize;
 		error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
 		    (off_t)dp[i].dp_start * ms->sectorsize, l[i],
 		    ms->sectorsize, "%ss%d", gp->name, 1 + i);
 		if (error)
 			return (error);
 	}
 	for (i = 0; i < NDOSPART; i++) {
 		ms->type[i] = dp[i].dp_typ;
 		g_slice_config(gp, i, G_SLICE_CONFIG_SET,
 		    (off_t)dp[i].dp_start * ms->sectorsize, l[i],
 		    ms->sectorsize, "%ss%d", gp->name, 1 + i);
 	}
 	bcopy(sec0, ms->sec0, 512);
 
 	/*
 	 * Calculate MD5 from the first sector and use it for avoiding
 	 * recursive slices creation.
 	 */
 	MD5Init(&md5sum);
 	MD5Update(&md5sum, ms->sec0, sizeof(ms->sec0));
 	MD5Final(ms->slicesum, &md5sum);
 
 	return (0);
 }
 
 static int
 g_mbr_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td)
 {
 	struct g_geom *gp;
 	struct g_mbr_softc *ms;
 	struct g_slicer *gsp;
 	struct g_consumer *cp;
 	int error, opened;
 
 	gp = pp->geom;
 	gsp = gp->softc;
 	ms = gsp->softc;
 
 	opened = 0;
 	error = 0;
 	switch(cmd) {
 	case DIOCSMBR: {
 		if (!(fflag & FWRITE))
 			return (EPERM);
 		g_topology_lock();
 		cp = LIST_FIRST(&gp->consumer);
 		if (cp->acw == 0) {
 			error = g_access(cp, 0, 1, 0);
 			if (error == 0)
 				opened = 1;
 		}
 		if (!error)
 			error = g_mbr_modify(gp, ms, data, 512);
 		if (!error)
 			error = g_write_data(cp, 0, data, 512);
 		if (opened)
 			g_access(cp, 0, -1 , 0);
 		g_topology_unlock();
 		return(error);
 	}
 	default:
 		return (ENOIOCTL);
 	}
 }
 
 static int
 g_mbr_start(struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	struct g_mbr_softc *mp;
 	struct g_slicer *gsp;
 	int idx;
 
 	pp = bp->bio_to;
 	idx = pp->index;
 	gp = pp->geom;
 	gsp = gp->softc;
 	mp = gsp->softc;
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr_int(bp, "MBR::type", mp->type[idx]))
 			return (1);
 		if (g_handleattr_off_t(bp, "MBR::offset",
 		    gsp->slices[idx].offset))
 			return (1);
 		if (g_handleattr(bp, "MBR::slicesum", mp->slicesum,
 		    sizeof(mp->slicesum)))
 			return (1);
 	}
 
 	return (0);
 }
 
 static void
 g_mbr_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp)
 {
 	struct g_mbr_softc *mp;
 	struct g_slicer *gsp;
 
 	gsp = gp->softc;
 	mp = gsp->softc;
 	g_slice_dumpconf(sb, indent, gp, cp, pp);
 	if (pp != NULL) {
 		if (indent == NULL)
 			sbuf_printf(sb, " ty %d", mp->type[pp->index]);
 		else
 			sbuf_printf(sb, "%s<type>%d</type>\n", indent,
 			    mp->type[pp->index]);
 	}
 }
 
 static struct g_geom *
 g_mbr_taste(struct g_class *mp, struct g_provider *pp, int insist)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error;
 	struct g_mbr_softc *ms;
 	u_int fwsectors, sectorsize;
 	u_char *buf;
 	u_char hash[16];
 	MD5_CTX md5sum;
 
 	g_trace(G_T_TOPOLOGY, "mbr_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 	if (!strcmp(pp->geom->class->name, MBR_CLASS_NAME))
 		return (NULL);
 	gp = g_slice_new(mp, NDOSPART, pp, &cp, &ms, sizeof *ms, g_mbr_start);
 	if (gp == NULL)
 		return (NULL);
 	g_topology_unlock();
 	do {
 		error = g_getattr("GEOM::fwsectors", cp, &fwsectors);
 		if (error)
 			fwsectors = 17;
 		sectorsize = cp->provider->sectorsize;
 		if (sectorsize < 512)
 			break;
 		ms->sectorsize = sectorsize;
 		buf = g_read_data(cp, 0, sectorsize, NULL);
 		if (buf == NULL)
 			break;
 
 		/*
 		 * Calculate MD5 from the first sector and use it for avoiding
 		 * recursive slices creation.
 		 */
 		bcopy(buf, ms->sec0, 512);
 		MD5Init(&md5sum);
 		MD5Update(&md5sum, ms->sec0, sizeof(ms->sec0));
 		MD5Final(ms->slicesum, &md5sum);
 
 		error = g_getattr("MBR::slicesum", cp, &hash);
 		if (!error && !bcmp(ms->slicesum, hash, sizeof(hash))) {
 			g_free(buf);
 			break;
 		}
 
 		g_topology_lock();
 		g_mbr_modify(gp, ms, buf, 512);
 		g_topology_unlock();
 		g_free(buf);
 		break;
 	} while (0);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (LIST_EMPTY(&gp->provider)) {
 		g_slice_spoiled(cp);
 		return (NULL);
 	}
 	if (!g_mbr_once) {
 		g_mbr_once = 1;
 		printf(
 		    "WARNING: geom_mbr (geom %s) is deprecated, "
 		    "use gpart instead.\n", gp->name);
 	}
 	return (gp);
 }
 
 static void
 g_mbr_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_mbr_softc *ms;
 	struct g_slicer *gsp;
 	int opened = 0, error = 0;
 	void *data;
 	int len;
 
 	g_topology_assert();
 	gp = gctl_get_geom(req, mp, "geom");
 	if (gp == NULL)
 		return;
 	if (strcmp(verb, "write MBR")) {
 		gctl_error(req, "Unknown verb");
 		return;
 	}
 	gsp = gp->softc;
 	ms = gsp->softc;
 	data = gctl_get_param(req, "data", &len);
 	if (data == NULL)
 		return;
 	if (len < 512 || (len % 512)) {
 		gctl_error(req, "Wrong request length");
 		return;
 	}
 	cp = LIST_FIRST(&gp->consumer);
 	if (cp->acw == 0) {
 		error = g_access(cp, 0, 1, 0);
 		if (error == 0)
 			opened = 1;
 	}
 	if (!error)
 		error = g_mbr_modify(gp, ms, data, len);
 	if (error)
 		gctl_error(req, "conflict with open slices");
 	if (!error)
 		error = g_write_data(cp, 0, data, len);
 	if (error)
 		gctl_error(req, "sector zero write failed");
 	if (opened)
 		g_access(cp, 0, -1 , 0);
 	return;
 }
 
 static struct g_class g_mbr_class	= {
 	.name = MBR_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_mbr_taste,
 	.dumpconf = g_mbr_dumpconf,
 	.ctlreq = g_mbr_config,
 	.ioctl = g_mbr_ioctl,
 };
 
 DECLARE_GEOM_CLASS(g_mbr_class, g_mbr);
 
 #define NDOSEXTPART		32
 struct g_mbrext_softc {
 	int		type [NDOSEXTPART];
 };
 
 static int
 g_mbrext_start(struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	struct g_mbrext_softc *mp;
 	struct g_slicer *gsp;
 	int idx;
 
 	pp = bp->bio_to;
 	idx = pp->index;
 	gp = pp->geom;
 	gsp = gp->softc;
 	mp = gsp->softc;
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr_int(bp, "MBR::type", mp->type[idx]))
 			return (1);
 	}
 	return (0);
 }
 
 static void
 g_mbrext_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp)
 {
 	struct g_mbrext_softc *mp;
 	struct g_slicer *gsp;
 
 	g_slice_dumpconf(sb, indent, gp, cp, pp);
 	gsp = gp->softc;
 	mp = gsp->softc;
 	if (pp != NULL) {
 		if (indent == NULL)
 			sbuf_printf(sb, " ty %d", mp->type[pp->index]);
 		else
 			sbuf_printf(sb, "%s<type>%d</type>\n", indent,
 			    mp->type[pp->index]);
 	}
 }
 
 static struct g_geom *
 g_mbrext_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error, i, slice;
 	struct g_mbrext_softc *ms;
 	off_t off;
 	u_char *buf;
 	struct dos_partition dp[4];
 	u_int fwsectors, sectorsize;
 
 	g_trace(G_T_TOPOLOGY, "g_mbrext_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 	if (strcmp(pp->geom->class->name, MBR_CLASS_NAME))
 		return (NULL);
 	gp = g_slice_new(mp, NDOSEXTPART, pp, &cp, &ms, sizeof *ms,
 	    g_mbrext_start);
 	if (gp == NULL)
 		return (NULL);
 	g_topology_unlock();
 	off = 0;
 	slice = 0;
 	do {
 		error = g_getattr("MBR::type", cp, &i);
 		if (error || (i != DOSPTYP_EXT && i != DOSPTYP_EXTLBA))
 			break;
 		error = g_getattr("GEOM::fwsectors", cp, &fwsectors);
 		if (error)
 			fwsectors = 17;
 		sectorsize = cp->provider->sectorsize;
 		if (sectorsize != 512)
 			break;
 		for (;;) {
 			buf = g_read_data(cp, off, sectorsize, NULL);
 			if (buf == NULL)
 				break;
 			if (buf[0x1fe] != 0x55 && buf[0x1ff] != 0xaa) {
 				g_free(buf);
 				break;
 			}
 			for (i = 0; i < NDOSPART; i++) 
 				dos_partition_dec(
 				    buf + DOSPARTOFF + 
 				    i * sizeof(struct dos_partition), dp + i);
 			g_free(buf);
 			if (0 && bootverbose) {
 				printf("MBREXT Slice %d on %s:\n",
 				    slice + 5, gp->name);
 				g_mbr_print(0, dp);
 				g_mbr_print(1, dp + 1);
 			}
 			if ((dp[0].dp_flag & 0x7f) == 0 &&
 			     dp[0].dp_size != 0 && dp[0].dp_typ != 0) {
 				g_topology_lock();
 				g_slice_config(gp, slice, G_SLICE_CONFIG_SET,
 				    (((off_t)dp[0].dp_start) << 9ULL) + off,
 				    ((off_t)dp[0].dp_size) << 9ULL,
 				    sectorsize,
 				    "%*.*s%d",
 				    (int)strlen(gp->name) - 1,
 				    (int)strlen(gp->name) - 1,
 				    gp->name,
 				    slice + 5);
 				g_topology_unlock();
 				ms->type[slice] = dp[0].dp_typ;
 				slice++;
 			}
 			if (dp[1].dp_flag != 0)
 				break;
 			if (dp[1].dp_typ != DOSPTYP_EXT &&
 			    dp[1].dp_typ != DOSPTYP_EXTLBA)
 				break;
 			if (dp[1].dp_size == 0)
 				break;
 			off = ((off_t)dp[1].dp_start) << 9ULL;
 		}
 		break;
 	} while (0);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (LIST_EMPTY(&gp->provider)) {
 		g_slice_spoiled(cp);
 		return (NULL);
 	}
 	return (gp);
 }
 
 
 static struct g_class g_mbrext_class	= {
 	.name = MBREXT_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_mbrext_taste,
 	.dumpconf = g_mbrext_dumpconf,
 };
 
 DECLARE_GEOM_CLASS(g_mbrext_class, g_mbrext);
+MODULE_VERSION(geom_mbr, 0);
Index: stable/11/sys/geom/geom_pc98.c
===================================================================
--- stable/11/sys/geom/geom_pc98.c	(revision 332639)
+++ stable/11/sys/geom/geom_pc98.c	(revision 332640)
@@ -1,372 +1,373 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 
 #include <sys/diskpc98.h>
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 
 FEATURE(geom_pc98, "GEOM NEC PC9800 partitioning support");
 
 #define PC98_CLASS_NAME "PC98"
 
 struct g_pc98_softc {
 	u_int fwsectors, fwheads, sectorsize;
 	int type[PC98_NPARTS];
 	u_char sec[8192];
 };
 
 static void
 g_pc98_print(int i, struct pc98_partition *dp)
 {
 	char sname[17];
 
 	strncpy(sname, dp->dp_name, 16);
 	sname[16] = '\0';
 
 	hexdump(dp, sizeof(dp[0]), NULL, 0);
 	printf("[%d] mid:%d(0x%x) sid:%d(0x%x)",
 	       i, dp->dp_mid, dp->dp_mid, dp->dp_sid, dp->dp_sid);
 	printf(" s:%d/%d/%d", dp->dp_scyl, dp->dp_shd, dp->dp_ssect);
 	printf(" e:%d/%d/%d", dp->dp_ecyl, dp->dp_ehd, dp->dp_esect);
 	printf(" sname:%s\n", sname);
 }
 
 /*
  * XXX: Add gctl_req arg and give good error msgs.
  * XXX: Check that length argument does not bring boot code inside any slice.
  */
 static int
 g_pc98_modify(struct g_geom *gp, struct g_pc98_softc *ms, u_char *sec, int len __unused)
 {
 	int i, error;
 	off_t s[PC98_NPARTS], l[PC98_NPARTS];
 	struct pc98_partition dp[PC98_NPARTS];
 
 	g_topology_assert();
 	
 	if (sec[0x1fe] != 0x55 || sec[0x1ff] != 0xaa)
 		return (EBUSY);
 
 #if 0
 	/*
 	 * By convetion, it seems that the ipl program has a jump at location
 	 * 0 to the real start of the boot loader.  By convetion, it appears
 	 * that after this jump, there's a string, terminated by at last one,
 	 * if not more, zeros, followed by the target of the jump.  FreeBSD's
 	 * pc98 boot0 uses 'IPL1' followed by 3 zeros here, likely for
 	 * compatibility with some older boot loader.  Linux98's boot loader
 	 * appears to use 'Linux 98' followed by only two.  GRUB/98 appears to
 	 * use 'GRUB/98 ' followed by none.  These last two appear to be
 	 * ported from the ia32 versions, but appear to show similar
 	 * convention.  Grub/98 has an additional NOP after the jmp, which
 	 * isn't present in others.
 	 *
 	 * The following test was inspired by looking only at partitions
 	 * with FreeBSD's boot0 (or one that it is compatible with).  As
 	 * such, if failed when other IPL programs were used.
 	 */
 	if (sec[4] != 'I' || sec[5] != 'P' || sec[6] != 'L' || sec[7] != '1')
 		return (EBUSY);
 #endif
 
 	for (i = 0; i < PC98_NPARTS; i++)
 		pc98_partition_dec(
 			sec + 512 + i * sizeof(struct pc98_partition), &dp[i]);
 
 	for (i = 0; i < PC98_NPARTS; i++) {
 		/* If start and end are identical it's bogus */
 		if (dp[i].dp_ssect == dp[i].dp_esect &&
 		    dp[i].dp_shd == dp[i].dp_ehd &&
 		    dp[i].dp_scyl == dp[i].dp_ecyl)
 			s[i] = l[i] = 0;
 		else if (dp[i].dp_ecyl == 0)
 			s[i] = l[i] = 0;
 		else {
 			s[i] = (off_t)dp[i].dp_scyl *
 				ms->fwsectors * ms->fwheads * ms->sectorsize;
 			l[i] = (off_t)(dp[i].dp_ecyl - dp[i].dp_scyl + 1) *
 				ms->fwsectors * ms->fwheads * ms->sectorsize;
 		}
 		if (bootverbose) {
 			printf("PC98 Slice %d on %s:\n", i + 1, gp->name);
 			g_pc98_print(i, dp + i);
 		}
 		if (s[i] < 0 || l[i] < 0)
 			error = EBUSY;
 		else
 			error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
 				       s[i], l[i], ms->sectorsize,
 				       "%ss%d", gp->name, i + 1);
 		if (error)
 			return (error);
 	}
 
 	for (i = 0; i < PC98_NPARTS; i++) {
 		ms->type[i] = (dp[i].dp_sid << 8) | dp[i].dp_mid;
 		g_slice_config(gp, i, G_SLICE_CONFIG_SET, s[i], l[i],
 			       ms->sectorsize, "%ss%d", gp->name, i + 1);
 	}
 
 	bcopy(sec, ms->sec, sizeof (ms->sec));
 
 	return (0);
 }
 
 static int
 g_pc98_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td)
 {
 	struct g_geom *gp;
 	struct g_pc98_softc *ms;
 	struct g_slicer *gsp;
 	struct g_consumer *cp;
 	int error, opened;
 
 	gp = pp->geom;
 	gsp = gp->softc;
 	ms = gsp->softc;
 
 	opened = 0;
 	error = 0;
 	switch(cmd) {
 	case DIOCSPC98: {
 		if (!(fflag & FWRITE))
 			return (EPERM);
 		g_topology_lock();
 		cp = LIST_FIRST(&gp->consumer);
 		if (cp->acw == 0) {
 			error = g_access(cp, 0, 1, 0);
 			if (error == 0)
 				opened = 1;
 		}
 		if (!error)
 			error = g_pc98_modify(gp, ms, data, 8192);
 		if (!error)
 			error = g_write_data(cp, 0, data, 8192);
 		if (opened)
 			g_access(cp, 0, -1 , 0);
 		g_topology_unlock();
 		return(error);
 	}
 	default:
 		return (ENOIOCTL);
 	}
 }
 
 static int
 g_pc98_start(struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	struct g_pc98_softc *mp;
 	struct g_slicer *gsp;
 	int idx;
 
 	pp = bp->bio_to;
 	idx = pp->index;
 	gp = pp->geom;
 	gsp = gp->softc;
 	mp = gsp->softc;
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr_int(bp, "PC98::type", mp->type[idx]))
 			return (1);
 		if (g_handleattr_off_t(bp, "PC98::offset",
 				       gsp->slices[idx].offset))
 			return (1);
 	}
 
 	return (0);
 }
 
 static void
 g_pc98_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 		struct g_consumer *cp __unused, struct g_provider *pp)
 {
 	struct g_pc98_softc *mp;
 	struct g_slicer *gsp;
 	struct pc98_partition dp;
 	char sname[17];
 
 	gsp = gp->softc;
 	mp = gsp->softc;
 	g_slice_dumpconf(sb, indent, gp, cp, pp);
 	if (pp != NULL) {
 		pc98_partition_dec(
 			mp->sec + 512 +
 			pp->index * sizeof(struct pc98_partition), &dp);
 		strncpy(sname, dp.dp_name, 16);
 		sname[16] = '\0';
 		if (indent == NULL) {
 			sbuf_printf(sb, " ty %d", mp->type[pp->index]);
 			sbuf_printf(sb, " sn %s", sname);
 		} else {
 			sbuf_printf(sb, "%s<type>%d</type>\n", indent,
 				    mp->type[pp->index]);
 			sbuf_printf(sb, "%s<sname>%s</sname>\n", indent,
 				    sname);
 		}
 	}
 }
 
 static struct g_geom *
 g_pc98_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error;
 	struct g_pc98_softc *ms;
 	u_int fwsectors, fwheads, sectorsize;
 	u_char *buf;
 
 	g_trace(G_T_TOPOLOGY, "g_pc98_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 	if (flags == G_TF_NORMAL &&
 	    !strcmp(pp->geom->class->name, PC98_CLASS_NAME))
 		return (NULL);
 	gp = g_slice_new(mp, PC98_NPARTS, pp, &cp, &ms, sizeof *ms,
 	    g_pc98_start);
 	if (gp == NULL)
 		return (NULL);
 	g_topology_unlock();
 	do {
 		if (gp->rank != 2 && flags == G_TF_NORMAL)
 			break;
 		error = g_getattr("GEOM::fwsectors", cp, &fwsectors);
 		if (error || fwsectors == 0) {
 			fwsectors = 17;
 			if (bootverbose)
 				printf("g_pc98_taste: guessing %d sectors\n",
 				    fwsectors);
 		}
 		error = g_getattr("GEOM::fwheads", cp, &fwheads);
 		if (error || fwheads == 0) {
 			fwheads = 8;
 			if (bootverbose)
 				printf("g_pc98_taste: guessing %d heads\n",
 				    fwheads);
 		}
 		sectorsize = cp->provider->sectorsize;
 		if (sectorsize % 512 != 0)
 			break;
 		buf = g_read_data(cp, 0, 8192, NULL);
 		if (buf == NULL)
 			break;
 		ms->fwsectors = fwsectors;
 		ms->fwheads = fwheads;
 		ms->sectorsize = sectorsize;
 		g_topology_lock();
 		g_pc98_modify(gp, ms, buf, 8192);
 		g_topology_unlock();
 		g_free(buf);
 		break;
 	} while (0);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (LIST_EMPTY(&gp->provider)) {
 		g_slice_spoiled(cp);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_pc98_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_pc98_softc *ms;
 	struct g_slicer *gsp;
 	int opened = 0, error = 0;
 	void *data;
 	int len;
 
 	g_topology_assert();
 	gp = gctl_get_geom(req, mp, "geom");
 	if (gp == NULL)
 		return;
 	if (strcmp(verb, "write PC98")) {
 		gctl_error(req, "Unknown verb");
 		return;
 	}
 	gsp = gp->softc;
 	ms = gsp->softc;
 	data = gctl_get_param(req, "data", &len);
 	if (data == NULL)
 		return;
 	if (len < 8192 || (len % 512)) {
 		gctl_error(req, "Wrong request length");
 		return;
 	}
 	cp = LIST_FIRST(&gp->consumer);
 	if (cp->acw == 0) {
 		error = g_access(cp, 0, 1, 0);
 		if (error == 0)
 			opened = 1;
 	}
 	if (!error)
 		error = g_pc98_modify(gp, ms, data, len);
 	if (error)
 		gctl_error(req, "conflict with open slices");
 	if (!error)
 		error = g_write_data(cp, 0, data, len);
 	if (error)
 		gctl_error(req, "sector zero write failed");
 	if (opened)
 		g_access(cp, 0, -1 , 0);
 	return;
 }
 
 static struct g_class g_pc98_class = {
 	.name = PC98_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_pc98_taste,
 	.dumpconf = g_pc98_dumpconf,
 	.ctlreq = g_pc98_config,
 	.ioctl = g_pc98_ioctl,
 };
 
 DECLARE_GEOM_CLASS(g_pc98_class, g_pc98);
+MODULE_VERSION(geom_pc98, 0);
Index: stable/11/sys/geom/geom_redboot.c
===================================================================
--- stable/11/sys/geom/geom_redboot.c	(revision 332639)
+++ stable/11/sys/geom/geom_redboot.c	(revision 332640)
@@ -1,345 +1,346 @@
 /*-
  * Copyright (c) 2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include <sys/sbuf.h>
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 
 #define REDBOOT_CLASS_NAME "REDBOOT"
 
 struct fis_image_desc {
 	uint8_t		name[16];	/* null-terminated name */
 	uint32_t	offset;		/* offset in flash */
 	uint32_t	addr;		/* address in memory */
 	uint32_t	size;		/* image size in bytes */
 	uint32_t	entry;		/* offset in image for entry point */
 	uint32_t	dsize;		/* data size in bytes */
 	uint8_t		pad[256-(16+7*sizeof(uint32_t)+sizeof(void*))];
 	struct fis_image_desc *next;	/* linked list (in memory) */
 	uint32_t	dsum;		/* descriptor checksum */
 	uint32_t	fsum;		/* checksum over image data */
 };
 
 #define	FISDIR_NAME	"FIS directory"
 #define	REDBCFG_NAME	"RedBoot config"
 #define	REDBOOT_NAME	"RedBoot"
 
 #define	REDBOOT_MAXSLICE	64
 #define	REDBOOT_MAXOFF \
 	(REDBOOT_MAXSLICE*sizeof(struct fis_image_desc))
 
 struct g_redboot_softc {
 	uint32_t	entry[REDBOOT_MAXSLICE];
 	uint32_t	dsize[REDBOOT_MAXSLICE];
 	uint8_t		readonly[REDBOOT_MAXSLICE];
 	g_access_t	*parent_access;
 };
 
 static void
 g_redboot_print(int i, struct fis_image_desc *fd)
 {
 
 	printf("[%2d] \"%-15.15s\" %08x:%08x", i, fd->name,
 	    fd->offset, fd->size);
 	printf(" addr %08x entry %08x\n", fd->addr, fd->entry);
 	printf("     dsize 0x%x dsum 0x%x fsum 0x%x\n", fd->dsize,
 	    fd->dsum, fd->fsum);
 }
 
 static int
 g_redboot_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td)
 {
 	return (ENOIOCTL);
 }
 
 static int
 g_redboot_access(struct g_provider *pp, int dread, int dwrite, int dexcl)
 {
 	struct g_geom *gp = pp->geom;
 	struct g_slicer *gsp = gp->softc;
 	struct g_redboot_softc *sc = gsp->softc;
 
 	if (dwrite > 0 && sc->readonly[pp->index])
 		return (EPERM);
 	return (sc->parent_access(pp, dread, dwrite, dexcl));
 }
 
 static int
 g_redboot_start(struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	struct g_redboot_softc *sc;
 	struct g_slicer *gsp;
 	int idx;
 
 	pp = bp->bio_to;
 	idx = pp->index;
 	gp = pp->geom;
 	gsp = gp->softc;
 	sc = gsp->softc;
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr_int(bp, REDBOOT_CLASS_NAME "::entry",
 		    sc->entry[idx]))
 			return (1);
 		if (g_handleattr_int(bp, REDBOOT_CLASS_NAME "::dsize",
 		    sc->dsize[idx]))
 			return (1);
 	}
 
 	return (0);
 }
 
 static void
 g_redboot_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 	struct g_consumer *cp __unused, struct g_provider *pp)
 {
 	struct g_redboot_softc *sc;
 	struct g_slicer *gsp;
 
 	gsp = gp->softc;
 	sc = gsp->softc;
 	g_slice_dumpconf(sb, indent, gp, cp, pp);
 	if (pp != NULL) {
 		if (indent == NULL) {
 			sbuf_printf(sb, " entry %d", sc->entry[pp->index]);
 			sbuf_printf(sb, " dsize %d", sc->dsize[pp->index]);
 		} else {
 			sbuf_printf(sb, "%s<entry>%d</entry>\n", indent,
 			    sc->entry[pp->index]);
 			sbuf_printf(sb, "%s<dsize>%d</dsize>\n", indent,
 			    sc->dsize[pp->index]);
 		}
 	}
 }
 
 #include <sys/ctype.h>
 
 static int
 nameok(const char name[16])
 {
 	int i;
 
 	/* descriptor names are null-terminated printable ascii */
 	for (i = 0; i < 15; i++)
 		if (!isprint(name[i]))
 			break;
 	return (name[i] == '\0');
 }
 
 static struct fis_image_desc *
 parse_fis_directory(u_char *buf, size_t bufsize, off_t offset, uint32_t offmask)
 {
 #define	match(a,b)	(bcmp(a, b, sizeof(b)-1) == 0)
 	struct fis_image_desc *fd, *efd;
 	struct fis_image_desc *fisdir, *redbcfg;
 	struct fis_image_desc *head, **tail;
 	int i;
 
 	fd = (struct fis_image_desc *)buf;
 	efd = fd + (bufsize / sizeof(struct fis_image_desc));
 #if 0
 	/*
 	 * Find the start of the FIS table.
 	 */
 	while (fd < efd && fd->name[0] != 0xff)
 		fd++;
 	if (fd == efd)
 		return (NULL);
 	if (bootverbose)
 		printf("RedBoot FIS table starts at 0x%jx\n",
 		    offset + fd - (struct fis_image_desc *) buf);
 #endif
 	/*
 	 * Scan forward collecting entries in a list.
 	 */
 	fisdir = redbcfg = NULL;
 	*(tail = &head) = NULL;
 	for (i = 0; fd < efd; i++, fd++) {
 		if (fd->name[0] == 0xff)
 			continue;
 		if (match(fd->name, FISDIR_NAME))
 			fisdir = fd;
 		else if (match(fd->name, REDBCFG_NAME))
 			redbcfg = fd;
 		if (nameok(fd->name)) {
 			/*
 			 * NB: flash address includes platform mapping;
 			 *     strip it so we have only a flash offset.
 			 */
 			fd->offset &= offmask;
 			if (bootverbose)
 				g_redboot_print(i, fd);
 			*tail = fd;
 			*(tail = &fd->next) = NULL;
 		}
 	}
 	if (fisdir == NULL) {
 		if (bootverbose)
 			printf("No RedBoot FIS table located at %lu\n",
 			    (long) offset);
 		return (NULL);
 	}
 	if (redbcfg != NULL &&
 	    fisdir->offset + fisdir->size == redbcfg->offset) {
 		/*
 		 * Merged FIS/RedBoot config directory.
 		 */
 		if (bootverbose)
 			printf("FIS/RedBoot merged at 0x%jx (not yet)\n",
 			    offset + fisdir->offset);
 		/* XXX */
 	}
 	return head;
 #undef match
 }
 
 static struct g_geom *
 g_redboot_taste(struct g_class *mp, struct g_provider *pp, int insist)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_redboot_softc *sc;
 	int error, sectorsize, i;
 	struct fis_image_desc *fd, *head;
 	uint32_t offmask;
 	u_int blksize;		/* NB: flash block size stored as stripesize */
 	u_char *buf;
 	off_t offset;
 
 	g_trace(G_T_TOPOLOGY, "redboot_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 	if (!strcmp(pp->geom->class->name, REDBOOT_CLASS_NAME))
 		return (NULL);
 	/* XXX only taste flash providers */
 	if (strncmp(pp->name, "cfi", 3) && 
 	    strncmp(pp->name, "flash/spi", 9))
 		return (NULL);
 	gp = g_slice_new(mp, REDBOOT_MAXSLICE, pp, &cp, &sc, sizeof(*sc),
 	    g_redboot_start);
 	if (gp == NULL)
 		return (NULL);
 	/* interpose our access method */
 	sc->parent_access = gp->access;
 	gp->access = g_redboot_access;
 
 	sectorsize = cp->provider->sectorsize;
 	blksize = cp->provider->stripesize;
 	if (powerof2(cp->provider->mediasize))
 		offmask = cp->provider->mediasize-1;
 	else
 		offmask = 0xffffffff;		/* XXX */
 	if (bootverbose)
 		printf("%s: mediasize %ld secsize %d blksize %d offmask 0x%x\n",
 		    __func__, (long) cp->provider->mediasize, sectorsize,
 		    blksize, offmask);
 	if (sectorsize < sizeof(struct fis_image_desc) ||
 	    (sectorsize % sizeof(struct fis_image_desc)))
 		return (NULL);
 	g_topology_unlock();
 	head = NULL;
 	offset = cp->provider->mediasize - blksize;
 again:
 	buf = g_read_data(cp, offset, blksize, NULL);
 	if (buf != NULL)
 		head = parse_fis_directory(buf, blksize, offset, offmask);
 	if (head == NULL && offset != 0) {
 		if (buf != NULL)
 			g_free(buf);
 		offset = 0;			/* check the front */
 		goto again;
 	}
 	g_topology_lock();
 	if (head == NULL) {
 		if (buf != NULL)
 			g_free(buf);
 		return NULL;
 	}
 	/*
 	 * Craft a slice for each entry.
 	 */
 	for (fd = head, i = 0; fd != NULL; fd = fd->next) {
 		if (fd->name[0] == '\0')
 			continue;
 		error = g_slice_config(gp, i, G_SLICE_CONFIG_SET,
 		    fd->offset, fd->size, sectorsize, "redboot/%s", fd->name);
 		if (error)
 			printf("%s: g_slice_config returns %d for \"%s\"\n",
 			    __func__, error, fd->name);
 		sc->entry[i] = fd->entry;
 		sc->dsize[i] = fd->dsize;
 		/* disallow writing hard-to-recover entries */
 		sc->readonly[i] = (strcmp(fd->name, FISDIR_NAME) == 0) ||
 				  (strcmp(fd->name, REDBOOT_NAME) == 0);
 		i++;
 	}
 	g_free(buf);
 	g_access(cp, -1, 0, 0);
 	if (LIST_EMPTY(&gp->provider)) {
 		g_slice_spoiled(cp);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_redboot_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = gctl_get_geom(req, mp, "geom");
 	if (gp == NULL)
 		return;
 	gctl_error(req, "Unknown verb");
 }
 
 static struct g_class g_redboot_class	= {
 	.name		= REDBOOT_CLASS_NAME,
 	.version	= G_VERSION,
 	.taste		= g_redboot_taste,
 	.dumpconf	= g_redboot_dumpconf,
 	.ctlreq		= g_redboot_config,
 	.ioctl		= g_redboot_ioctl,
 };
 DECLARE_GEOM_CLASS(g_redboot_class, g_redboot);
+MODULE_VERSION(geom_redboot, 0);
Index: stable/11/sys/geom/geom_sunlabel.c
===================================================================
--- stable/11/sys/geom/geom_sunlabel.c	(revision 332639)
+++ stable/11/sys/geom/geom_sunlabel.c	(revision 332640)
@@ -1,334 +1,335 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/bio.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/md5.h>
 #include <sys/sbuf.h>
 #include <sys/sun_disklabel.h>
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 #include <machine/endian.h>
 
 FEATURE(geom_sunlabel, "GEOM Sun/Solaris partitioning support");
 
 #define SUNLABEL_CLASS_NAME "SUN"
 
 struct g_sunlabel_softc {
 	int sectorsize;
 	int nheads;
 	int nsects;
 	int nalt;
 	u_char labelsum[16];
 };
 
 static int g_sunlabel_once = 0;
 
 static int
 g_sunlabel_modify(struct g_geom *gp, struct g_sunlabel_softc *ms, u_char *sec0)
 {
 	int i, error;
 	u_int u, v, csize;
 	struct sun_disklabel sl;
 	MD5_CTX md5sum;
 
 	error = sunlabel_dec(sec0, &sl);
 	if (error)
 		return (error);
 
 	csize = sl.sl_ntracks * sl.sl_nsectors;
 
 	for (i = 0; i < SUN_NPART; i++) {
 		v = sl.sl_part[i].sdkp_cyloffset;
 		u = sl.sl_part[i].sdkp_nsectors;
 		error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
 		    ((off_t)v * csize) << 9ULL,
 		    ((off_t)u) << 9ULL,
 		    ms->sectorsize,
 		    "%s%c", gp->name, 'a' + i);
 		if (error)
 			return (error);
 	}
 	for (i = 0; i < SUN_NPART; i++) {
 		v = sl.sl_part[i].sdkp_cyloffset;
 		u = sl.sl_part[i].sdkp_nsectors;
 		g_slice_config(gp, i, G_SLICE_CONFIG_SET,
 		    ((off_t)v * csize) << 9ULL,
 		    ((off_t)u) << 9ULL,
 		    ms->sectorsize,
 		    "%s%c", gp->name, 'a' + i);
 	}
 	ms->nalt = sl.sl_acylinders;
 	ms->nheads = sl.sl_ntracks;
 	ms->nsects = sl.sl_nsectors;
 
 	/*
 	 * Calculate MD5 from the first sector and use it for avoiding
 	 * recursive labels creation.
 	 */
 	MD5Init(&md5sum);
 	MD5Update(&md5sum, sec0, ms->sectorsize);
 	MD5Final(ms->labelsum, &md5sum);
 
 	return (0);
 }
 
 static void
 g_sunlabel_hotwrite(void *arg, int flag)
 {
 	struct bio *bp;
 	struct g_geom *gp;
 	struct g_slicer *gsp;
 	struct g_slice *gsl;
 	struct g_sunlabel_softc *ms;
 	u_char *p;
 	int error;
 
 	KASSERT(flag != EV_CANCEL, ("g_sunlabel_hotwrite cancelled"));
 	bp = arg;
 	gp = bp->bio_to->geom;
 	gsp = gp->softc;
 	ms = gsp->softc;
 	gsl = &gsp->slices[bp->bio_to->index];
 	/*
 	 * XXX: For all practical purposes, this whould be equvivalent to
 	 * XXX: "p = (u_char *)bp->bio_data;" because the label is always
 	 * XXX: in the first sector and we refuse sectors smaller than the
 	 * XXX: label.
 	 */
 	p = (u_char *)bp->bio_data - (bp->bio_offset + gsl->offset);
 
 	error = g_sunlabel_modify(gp, ms, p);
 	if (error) {
 		g_io_deliver(bp, EPERM);
 		return;
 	}
 	g_slice_finish_hot(bp);
 }
 
 static void
 g_sunlabel_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp)
 {
 	struct g_slicer *gsp;
 	struct g_sunlabel_softc *ms;
 
 	gsp = gp->softc;
 	ms = gsp->softc;
 	g_slice_dumpconf(sb, indent, gp, cp, pp);
 	if (indent == NULL) {
 		sbuf_printf(sb, " sc %u hd %u alt %u",
 		    ms->nsects, ms->nheads, ms->nalt);
 	}
 }
 
 struct g_hh01 {
 	struct g_geom *gp;
 	struct g_sunlabel_softc *ms;
 	u_char *label;
 	int error;
 };
 
 static void
 g_sunlabel_callconfig(void *arg, int flag)
 {
 	struct g_hh01 *hp;
 
 	hp = arg;
 	hp->error = g_sunlabel_modify(hp->gp, hp->ms, hp->label);
 	if (!hp->error)
 		hp->error = g_write_data(LIST_FIRST(&hp->gp->consumer),
 		    0, hp->label, SUN_SIZE);
 }
 
 /*
  * NB! curthread is user process which GCTL'ed.
  */
 static void
 g_sunlabel_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	u_char *label;
 	int error, i;
 	struct g_hh01 h0h0;
 	struct g_slicer *gsp;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	gp = gctl_get_geom(req, mp, "geom");
 	if (gp == NULL)
 		return;
 	cp = LIST_FIRST(&gp->consumer);
 	gsp = gp->softc;
 	if (!strcmp(verb, "write label")) {
 		label = gctl_get_paraml(req, "label", SUN_SIZE);
 		if (label == NULL)
 			return;
 		h0h0.gp = gp;
 		h0h0.ms = gsp->softc;
 		h0h0.label = label;
 		h0h0.error = -1;
 		/* XXX: Does this reference register with our selfdestruct code ? */
 		error = g_access(cp, 1, 1, 1);
 		if (error) {
 			gctl_error(req, "could not access consumer");
 			return;
 		}
 		g_sunlabel_callconfig(&h0h0, 0);
 		g_access(cp, -1, -1, -1);
 	} else if (!strcmp(verb, "write bootcode")) {
 		label = gctl_get_paraml(req, "bootcode", SUN_BOOTSIZE);
 		if (label == NULL)
 			return;
 		/* XXX: Does this reference register with our selfdestruct code ? */
 		error = g_access(cp, 1, 1, 1);
 		if (error) {
 			gctl_error(req, "could not access consumer");
 			return;
 		}
 		for (i = 0; i < SUN_NPART; i++) {
 			if (gsp->slices[i].length <= SUN_BOOTSIZE)
 				continue;
 			g_write_data(cp,
 			    gsp->slices[i].offset + SUN_SIZE, label + SUN_SIZE,
 			    SUN_BOOTSIZE - SUN_SIZE);
 		}
 		g_access(cp, -1, -1, -1);
 	} else {
 		gctl_error(req, "Unknown verb parameter");
 	}
 }
 
 static int
 g_sunlabel_start(struct bio *bp)
 {
 	struct g_sunlabel_softc *mp;
 	struct g_slicer *gsp;
 
 	gsp = bp->bio_to->geom->softc;
 	mp = gsp->softc;
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr(bp, "SUN::labelsum", mp->labelsum,
 		    sizeof(mp->labelsum)))
 			return (1);
 	}
 	return (0);
 }
 
 static struct g_geom *
 g_sunlabel_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_sunlabel_softc *ms;
 	struct g_slicer *gsp;
 	u_char *buf, hash[16];
 	MD5_CTX md5sum;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "g_sunlabel_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 	if (flags == G_TF_NORMAL &&
 	    !strcmp(pp->geom->class->name, SUNLABEL_CLASS_NAME))
 		return (NULL);
 	gp = g_slice_new(mp, 8, pp, &cp, &ms, sizeof *ms, g_sunlabel_start);
 	if (gp == NULL)
 		return (NULL);
 	gsp = gp->softc;
 	do {
 		ms->sectorsize = cp->provider->sectorsize;
 		if (ms->sectorsize < 512)
 			break;
 		g_topology_unlock();
 		buf = g_read_data(cp, 0, ms->sectorsize, NULL);
 		g_topology_lock();
 		if (buf == NULL)
 			break;
 
 		/*
 		 * Calculate MD5 from the first sector and use it for avoiding
 		 * recursive labels creation.
 		 */
 		MD5Init(&md5sum);
 		MD5Update(&md5sum, buf, ms->sectorsize);
 		MD5Final(ms->labelsum, &md5sum);
  
 		error = g_getattr("SUN::labelsum", cp, &hash);
 		if (!error && !bcmp(ms->labelsum, hash, sizeof(hash))) {
 			g_free(buf);
 			break;
 		}
 
 		g_sunlabel_modify(gp, ms, buf);
 		g_free(buf);
 
 		break;
 	} while (0);
 	g_access(cp, -1, 0, 0);
 	if (LIST_EMPTY(&gp->provider)) {
 		g_slice_spoiled(cp);
 		return (NULL);
 	}
 	g_slice_conf_hot(gp, 0, 0, SUN_SIZE,
 	    G_SLICE_HOT_ALLOW, G_SLICE_HOT_DENY, G_SLICE_HOT_CALL);
 	gsp->hot = g_sunlabel_hotwrite;
 	if (!g_sunlabel_once) {
 		g_sunlabel_once = 1;
 		printf(
 		    "WARNING: geom_sunlabel (geom %s) is deprecated, "
 		    "use gpart instead.\n", gp->name);
 	}
 	return (gp);
 }
 
 static struct g_class g_sunlabel_class = {
 	.name = SUNLABEL_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_sunlabel_taste,
 	.ctlreq = g_sunlabel_config,
 	.dumpconf = g_sunlabel_dumpconf,
 };
 
 DECLARE_GEOM_CLASS(g_sunlabel_class, g_sunlabel);
+MODULE_VERSION(geom_sunlabel, 0);
Index: stable/11/sys/geom/geom_vol_ffs.c
===================================================================
--- stable/11/sys/geom/geom_vol_ffs.c	(revision 332639)
+++ stable/11/sys/geom/geom_vol_ffs.c	(revision 332640)
@@ -1,164 +1,165 @@
 /*-
  * Copyright (c) 2002, 2003 Gordon Tetlow
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 
 FEATURE(geom_vol, "GEOM support for volume names from UFS superblock");
 
 #define VOL_FFS_CLASS_NAME "VOL_FFS"
 
 static int superblocks[] = SBLOCKSEARCH;
 static int g_vol_ffs_once;
 
 struct g_vol_ffs_softc {
 	char *	vol;
 };
 
 static int
 g_vol_ffs_start(struct bio *bp __unused)
 {
 	return(0);
 }
 
 static struct g_geom *
 g_vol_ffs_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_vol_ffs_softc *ms;
 	int sb, superblock;
 	struct fs *fs;
 
 	g_trace(G_T_TOPOLOGY, "vol_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 
 	/* 
 	 * XXX This is a really weak way to make sure we don't recurse.
 	 * Probably ought to use BIO_GETATTR to check for this.
 	 */
 	if (flags == G_TF_NORMAL &&
 	    !strcmp(pp->geom->class->name, VOL_FFS_CLASS_NAME))
 		return (NULL);
 
 	gp = g_slice_new(mp, 1, pp, &cp, &ms, sizeof(*ms), g_vol_ffs_start);
 	if (gp == NULL)
 		return (NULL);
 	g_topology_unlock();
 	/*
 	 * Walk through the standard places that superblocks hide and look
 	 * for UFS magic. If we find magic, then check that the size in the
 	 * superblock corresponds to the size of the underlying provider.
 	 * Finally, look for a volume label and create an appropriate 
 	 * provider based on that.
 	 */
 	for (sb=0; (superblock = superblocks[sb]) != -1; sb++) {
 		/*
 		 * Take care not to issue an invalid I/O request.  The
 		 * offset and size of the superblock candidate must be
 		 * multiples of the provider's sector size, otherwise an
 		 * FFS can't exist on the provider anyway.
 		 */
 		if (superblock % cp->provider->sectorsize != 0 ||
 		    SBLOCKSIZE % cp->provider->sectorsize != 0)
 			continue;
 
 		fs = (struct fs *) g_read_data(cp, superblock,
 			SBLOCKSIZE, NULL);
 		if (fs == NULL)
 			continue;
 		/* Check for magic and make sure things are the right size */
 		if (fs->fs_magic == FS_UFS1_MAGIC) {
 			if (fs->fs_old_size * fs->fs_fsize !=
 			    (int32_t) pp->mediasize) {
 				g_free(fs);
 				continue;
 			}
 		} else if (fs->fs_magic == FS_UFS2_MAGIC) {
 			if (fs->fs_size * fs->fs_fsize !=
 			    (int64_t) pp->mediasize) {
 				g_free(fs);
 				continue;
 			}
 		} else {
 			g_free(fs);
 			continue;
 		}
 		/* Check for volume label */
 		if (fs->fs_volname[0] == '\0') {
 			g_free(fs);
 			continue;
 		}
 		/* XXX We need to check for namespace conflicts. */
 		/* XXX How do you handle a mirror set? */
 		/* XXX We don't validate the volume name. */
 		g_topology_lock();
 		/* Alright, we have a label and a volume name, reconfig. */
 		g_slice_config(gp, 0, G_SLICE_CONFIG_SET, (off_t) 0,
 		    pp->mediasize, pp->sectorsize, "vol/%s",
 		    fs->fs_volname);
 		g_free(fs);
 		g_topology_unlock();
 		break;
 	}
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (LIST_EMPTY(&gp->provider)) {
 		g_slice_spoiled(cp);
 		return (NULL);
 	}
 	if (!g_vol_ffs_once) {
 		g_vol_ffs_once = 1;
 		printf(
 		    "WARNING: geom_vol_Ffs (geom %s) is deprecated, "
 		    "use glabel instead.\n", gp->name);
 	}
 	return (gp);
 }
 
 static struct g_class g_vol_ffs_class	= {
 	.name = VOL_FFS_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_vol_ffs_taste,
 };
 
 DECLARE_GEOM_CLASS(g_vol_ffs_class, g_vol_ffs);
+MODULE_VERSION(geom_vol_ffs, 0);
Index: stable/11/sys/geom/journal/g_journal_ufs.c
===================================================================
--- stable/11/sys/geom/journal/g_journal_ufs.c	(revision 332639)
+++ stable/11/sys/geom/journal/g_journal_ufs.c	(revision 332640)
@@ -1,107 +1,108 @@
 /*-
  * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 #include <ufs/ufs/ufsmount.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 #include <geom/geom.h>
 #include <geom/journal/g_journal.h>
 
 static const int superblocks[] = SBLOCKSEARCH;
 
 static int
 g_journal_ufs_clean(struct mount *mp)
 {
 	struct ufsmount *ump;
 	struct fs *fs;
 	int flags;
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 
 	flags = fs->fs_flags;
 	fs->fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK);
 	ffs_sbupdate(ump, MNT_WAIT, 1);
 	fs->fs_flags = flags;
 
 	return (0);
 }
 
 static void
 g_journal_ufs_dirty(struct g_consumer *cp)
 {
 	struct fs *fs;
 	int error, i, sb;
 
 	if (SBLOCKSIZE % cp->provider->sectorsize != 0)
 		return;
 	for (i = 0; (sb = superblocks[i]) != -1; i++) {
 		if (sb % cp->provider->sectorsize != 0)
 			continue;
 		fs = g_read_data(cp, sb, SBLOCKSIZE, NULL);
 		if (fs == NULL)
 			continue;
 		if (fs->fs_magic != FS_UFS1_MAGIC &&
 		    fs->fs_magic != FS_UFS2_MAGIC) {
 			g_free(fs);
 			continue;
 		}
 		GJ_DEBUG(0, "clean=%d flags=0x%x", fs->fs_clean, fs->fs_flags);
 		fs->fs_clean = 0;
 		fs->fs_flags |= FS_NEEDSFSCK | FS_UNCLEAN;
 		error = g_write_data(cp, sb, fs, SBLOCKSIZE);
 		g_free(fs);
 		if (error != 0) {
 			GJ_DEBUG(0, "Cannot mark file system %s as dirty "
 			    "(error=%d).", cp->provider->name, error);
 		} else {
 			GJ_DEBUG(0, "File system %s marked as dirty.",
 			    cp->provider->name);
 		}
 	}
 }
 
 const struct g_journal_desc g_journal_ufs = {
 	.jd_fstype = "ufs",
 	.jd_clean = g_journal_ufs_clean,
 	.jd_dirty = g_journal_ufs_dirty
 };
 
 MODULE_DEPEND(g_journal, ufs, 1, 1, 1);
+MODULE_VERSION(geom_journal, 0);
Index: stable/11/sys/geom/label/g_label.c
===================================================================
--- stable/11/sys/geom/label/g_label.c	(revision 332639)
+++ stable/11/sys/geom/label/g_label.c	(revision 332640)
@@ -1,552 +1,553 @@
 /*-
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_geom.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/ctype.h>
 #include <sys/malloc.h>
 #include <sys/libkern.h>
 #include <sys/sbuf.h>
 #include <sys/stddef.h>
 #include <sys/sysctl.h>
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 #include <geom/label/g_label.h>
 
 FEATURE(geom_label, "GEOM labeling support");
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, label, CTLFLAG_RW, 0, "GEOM_LABEL stuff");
 u_int g_label_debug = 0;
 SYSCTL_UINT(_kern_geom_label, OID_AUTO, debug, CTLFLAG_RWTUN, &g_label_debug, 0,
     "Debug level");
 
 static int g_label_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static int g_label_destroy(struct g_geom *gp, boolean_t force);
 static struct g_geom *g_label_taste(struct g_class *mp, struct g_provider *pp,
     int flags __unused);
 static void g_label_config(struct gctl_req *req, struct g_class *mp,
     const char *verb);
 
 struct g_class g_label_class = {
 	.name = G_LABEL_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_label_config,
 	.taste = g_label_taste,
 	.destroy_geom = g_label_destroy_geom
 };
 
 /*
  * To add a new file system where you want to look for volume labels,
  * you have to:
  * 1. Add a file g_label_<file system>.c which implements labels recognition.
  * 2. Add an 'extern const struct g_label_desc g_label_<file system>;' into
  *    g_label.h file.
  * 3. Add an element to the table below '&g_label_<file system>,'.
  * 4. Add your file to sys/conf/files.
  * 5. Add your file to sys/modules/geom/geom_label/Makefile.
  * 6. Add your file system to manual page sbin/geom/class/label/glabel.8.
  */
 const struct g_label_desc *g_labels[] = {
 	&g_label_gpt,
 	&g_label_gpt_uuid,
 #ifdef GEOM_LABEL
 	&g_label_ufs_id,
 	&g_label_ufs_volume,
 	&g_label_iso9660,
 	&g_label_msdosfs,
 	&g_label_ext2fs,
 	&g_label_reiserfs,
 	&g_label_ntfs,
 	&g_label_disk_ident,
 #endif
 	NULL
 };
 
 void
 g_label_rtrim(char *label, size_t size)
 {
 	ptrdiff_t i;
 
 	for (i = size - 1; i >= 0; i--) {
 		if (label[i] == '\0')
 			continue;
 		else if (label[i] == ' ')
 			label[i] = '\0';
 		else
 			break;
 	}
 }
 
 static int
 g_label_destroy_geom(struct gctl_req *req __unused, struct g_class *mp,
     struct g_geom *gp __unused)
 {
 
 	/*
 	 * XXX: Unloading a class which is using geom_slice:1.56 is currently
 	 * XXX: broken, so we deny unloading when we have geoms.
 	 */
 	return (EOPNOTSUPP);
 }
 
 static void
 g_label_orphan(struct g_consumer *cp)
 {
 
 	G_LABEL_DEBUG(1, "Label %s removed.",
 	    LIST_FIRST(&cp->geom->provider)->name);
 	g_slice_orphan(cp);
 }
 
 static void
 g_label_spoiled(struct g_consumer *cp)
 {
 
 	G_LABEL_DEBUG(1, "Label %s removed.",
 	    LIST_FIRST(&cp->geom->provider)->name);
 	g_slice_spoiled(cp);
 }
 
 static void
 g_label_resize(struct g_consumer *cp)
 {
 
 	G_LABEL_DEBUG(1, "Label %s resized.",
 	    LIST_FIRST(&cp->geom->provider)->name);
 
 	g_slice_config(cp->geom, 0, G_SLICE_CONFIG_FORCE, (off_t)0,
 	    cp->provider->mediasize, cp->provider->sectorsize, "notused");
 }
 
 static int
 g_label_is_name_ok(const char *label)
 {
 	const char *s;
 
 	/* Check if the label starts from ../ */
 	if (strncmp(label, "../", 3) == 0)
 		return (0);
 	/* Check if the label contains /../ */
 	if (strstr(label, "/../") != NULL)
 		return (0);
 	/* Check if the label ends at ../ */
 	if ((s = strstr(label, "/..")) != NULL && s[3] == '\0')
 		return (0);
 	return (1);
 }
 
 static void
 g_label_mangle_name(char *label, size_t size)
 {
 	struct sbuf *sb;
 	const u_char *c;
 
 	sb = sbuf_new(NULL, NULL, size, SBUF_FIXEDLEN);
 	for (c = label; *c != '\0'; c++) {
 		if (!isprint(*c) || isspace(*c) || *c =='"' || *c == '%')
 			sbuf_printf(sb, "%%%02X", *c);
 		else
 			sbuf_putc(sb, *c);
 	}
 	if (sbuf_finish(sb) != 0)
 		label[0] = '\0';
 	else
 		strlcpy(label, sbuf_data(sb), size);
 	sbuf_delete(sb);
 }
 
 static struct g_geom *
 g_label_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp,
     const char *label, const char *dir, off_t mediasize)
 {
 	struct g_geom *gp;
 	struct g_provider *pp2;
 	struct g_consumer *cp;
 	char name[64];
 
 	g_topology_assert();
 
 	if (!g_label_is_name_ok(label)) {
 		G_LABEL_DEBUG(0, "%s contains suspicious label, skipping.",
 		    pp->name);
 		G_LABEL_DEBUG(1, "%s suspicious label is: %s", pp->name, label);
 		if (req != NULL)
 			gctl_error(req, "Label name %s is invalid.", label);
 		return (NULL);
 	}
 	gp = NULL;
 	cp = NULL;
 	snprintf(name, sizeof(name), "%s/%s", dir, label);
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		pp2 = LIST_FIRST(&gp->provider);
 		if (pp2 == NULL)
 			continue;
 		if ((pp2->flags & G_PF_ORPHAN) != 0)
 			continue;
 		if (strcmp(pp2->name, name) == 0) {
 			G_LABEL_DEBUG(1, "Label %s(%s) already exists (%s).",
 			    label, name, pp->name);
 			if (req != NULL) {
 				gctl_error(req, "Provider %s already exists.",
 				    name);
 			}
 			return (NULL);
 		}
 	}
 	gp = g_slice_new(mp, 1, pp, &cp, NULL, 0, NULL);
 	if (gp == NULL) {
 		G_LABEL_DEBUG(0, "Cannot create slice %s.", label);
 		if (req != NULL)
 			gctl_error(req, "Cannot create slice %s.", label);
 		return (NULL);
 	}
 	gp->orphan = g_label_orphan;
 	gp->spoiled = g_label_spoiled;
 	gp->resize = g_label_resize;
 	g_access(cp, -1, 0, 0);
 	g_slice_config(gp, 0, G_SLICE_CONFIG_SET, (off_t)0, mediasize,
 	    pp->sectorsize, "%s", name);
 	G_LABEL_DEBUG(1, "Label for provider %s is %s.", pp->name, name);
 	return (gp);
 }
 
 static int
 g_label_destroy(struct g_geom *gp, boolean_t force)
 {
 	struct g_provider *pp;
 
 	g_topology_assert();
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_LABEL_DEBUG(0, "Provider %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_LABEL_DEBUG(1,
 			    "Provider %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	} else if (pp != NULL)
 		G_LABEL_DEBUG(1, "Label %s removed.", pp->name);
 	g_slice_spoiled(LIST_FIRST(&gp->consumer));
 	return (0);
 }
 
 static int
 g_label_read_metadata(struct g_consumer *cp, struct g_label_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	if (buf == NULL)
 		return (error);
 	/* Decode metadata. */
 	label_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 static void
 g_label_orphan_taste(struct g_consumer *cp __unused)
 {
 
 	KASSERT(1 == 0, ("%s called?", __func__));
 }
 
 static void
 g_label_start_taste(struct bio *bp __unused)
 {
 
 	KASSERT(1 == 0, ("%s called?", __func__));
 }
 
 static int
 g_label_access_taste(struct g_provider *pp __unused, int dr __unused,
     int dw __unused, int de __unused)
 {
 
 	KASSERT(1 == 0, ("%s called", __func__));
 	return (EOPNOTSUPP);
 }
 
 static struct g_geom *
 g_label_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_label_metadata md;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int i;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	G_LABEL_DEBUG(2, "Tasting %s.", pp->name);
 
 	/* Skip providers that are already open for writing. */
 	if (pp->acw > 0)
 		return (NULL);
 
 	if (strcmp(pp->geom->class->name, mp->name) == 0)
 		return (NULL);
 
 	gp = g_new_geomf(mp, "label:taste");
 	gp->start = g_label_start_taste;
 	gp->access = g_label_access_taste;
 	gp->orphan = g_label_orphan_taste;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	if (g_access(cp, 1, 0, 0) != 0)
 		goto end;
 	do {
 		if (g_label_read_metadata(cp, &md) != 0)
 			break;
 		if (strcmp(md.md_magic, G_LABEL_MAGIC) != 0)
 			break;
 		if (md.md_version > G_LABEL_VERSION) {
 			printf("geom_label.ko module is too old to handle %s.\n",
 			    pp->name);
 			break;
 		}
 
 		/*
 		 * Backward compatibility:
 		 */
 		/*
 		 * There was no md_provsize field in earlier versions of
 		 * metadata.
 		 */
 		if (md.md_version < 2)
 			md.md_provsize = pp->mediasize;
 
 		if (md.md_provsize != pp->mediasize)
 			break;
 
 		g_label_create(NULL, mp, pp, md.md_label, G_LABEL_DIR,
 		    pp->mediasize - pp->sectorsize);
 	} while (0);
 	for (i = 0; g_labels[i] != NULL; i++) {
 		char label[128];
 
 		if (g_labels[i]->ld_enabled == 0)
 			continue;
 		g_topology_unlock();
 		g_labels[i]->ld_taste(cp, label, sizeof(label));
 		g_label_mangle_name(label, sizeof(label));
 		g_topology_lock();
 		if (label[0] == '\0')
 			continue;
 		g_label_create(NULL, mp, pp, label, g_labels[i]->ld_dir,
 		    pp->mediasize);
 	}
 	g_access(cp, -1, 0, 0);
 end:
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	return (NULL);
 }
 
 static void
 g_label_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_provider *pp;
 	const char *name;
 	int *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs != 2) {
 		gctl_error(req, "Invalid number of arguments.");
 		return;
 	}
 	/*
 	 * arg1 is the name of provider.
 	 */
 	name = gctl_get_asciiparam(req, "arg1");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%d' argument", 1);
 		return;
 	}
 	if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 		name += strlen("/dev/");
 	pp = g_provider_by_name(name);
 	if (pp == NULL) {
 		G_LABEL_DEBUG(1, "Provider %s is invalid.", name);
 		gctl_error(req, "Provider %s is invalid.", name);
 		return;
 	}
 	/*
 	 * arg0 is the label.
 	 */
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%d' argument", 0);
 		return;
 	}
 	g_label_create(req, mp, pp, name, G_LABEL_DIR, pp->mediasize);
 }
 
 static const char *
 g_label_skip_dir(const char *name)
 {
 	char path[64];
 	u_int i;
 
 	if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 		name += strlen("/dev/");
 	if (strncmp(name, G_LABEL_DIR "/", strlen(G_LABEL_DIR "/")) == 0)
 		name += strlen(G_LABEL_DIR "/");
 	for (i = 0; g_labels[i] != NULL; i++) {
 		snprintf(path, sizeof(path), "%s/", g_labels[i]->ld_dir);
 		if (strncmp(name, path, strlen(path)) == 0) {
 			name += strlen(path);
 			break;
 		}
 	}
 	return (name);
 }
 
 static struct g_geom *
 g_label_find_geom(struct g_class *mp, const char *name)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	const char *pname;
 
 	name = g_label_skip_dir(name);
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		pp = LIST_FIRST(&gp->provider);
 		pname = g_label_skip_dir(pp->name);
 		if (strcmp(pname, name) == 0)
 			return (gp);
 	}
 	return (NULL);
 }
 
 static void
 g_label_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	int *nargs, *force, error, i;
 	struct g_geom *gp;
 	const char *name;
 	char param[16];
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No 'force' argument");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		gp = g_label_find_geom(mp, name);
 		if (gp == NULL) {
 			G_LABEL_DEBUG(1, "Label %s is invalid.", name);
 			gctl_error(req, "Label %s is invalid.", name);
 			return;
 		}
 		error = g_label_destroy(gp, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy label %s (error=%d).",
 			    LIST_FIRST(&gp->provider)->name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_label_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_LABEL_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_label_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0 ||
 	    strcmp(verb, "stop") == 0) {
 		g_label_ctl_destroy(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 DECLARE_GEOM_CLASS(g_label_class, g_label);
+MODULE_VERSION(geom_label, 0);
Index: stable/11/sys/geom/linux_lvm/g_linux_lvm.c
===================================================================
--- stable/11/sys/geom/linux_lvm/g_linux_lvm.c	(revision 332639)
+++ stable/11/sys/geom/linux_lvm/g_linux_lvm.c	(revision 332640)
@@ -1,1190 +1,1191 @@
 /*-
  * Copyright (c) 2008 Andrew Thompson <thompsa@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/ctype.h>
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
 #include <sys/endian.h>
 
 #include <geom/linux_lvm/g_linux_lvm.h>
 
 FEATURE(geom_linux_lvm, "GEOM Linux LVM partitioning support");
 
 /* Declare malloc(9) label */
 static MALLOC_DEFINE(M_GLLVM, "gllvm", "GEOM_LINUX_LVM Data");
 
 /* GEOM class methods */
 static g_access_t g_llvm_access;
 static g_init_t g_llvm_init;
 static g_orphan_t g_llvm_orphan;
 static g_orphan_t g_llvm_taste_orphan;
 static g_start_t g_llvm_start;
 static g_taste_t g_llvm_taste;
 static g_ctl_destroy_geom_t g_llvm_destroy_geom;
 
 static void	g_llvm_done(struct bio *);
 static void	g_llvm_remove_disk(struct g_llvm_vg *, struct g_consumer *);
 static int	g_llvm_activate_lv(struct g_llvm_vg *, struct g_llvm_lv *);
 static int	g_llvm_add_disk(struct g_llvm_vg *, struct g_provider *, char *);
 static void	g_llvm_free_vg(struct g_llvm_vg *);
 static int	g_llvm_destroy(struct g_llvm_vg *, int);
 static int	g_llvm_read_label(struct g_consumer *, struct g_llvm_label *);
 static int	g_llvm_read_md(struct g_consumer *, struct g_llvm_metadata *,
 		    struct g_llvm_label *);
 
 static int	llvm_label_decode(const u_char *, struct g_llvm_label *, int);
 static int	llvm_md_decode(const u_char *, struct g_llvm_metadata *,
 		    struct g_llvm_label *);
 static int	llvm_textconf_decode(u_char *, int,
 		    struct g_llvm_metadata *);
 static int	llvm_textconf_decode_pv(char **, char *, struct g_llvm_vg *);
 static int	llvm_textconf_decode_lv(char **, char *, struct g_llvm_vg *);
 static int	llvm_textconf_decode_sg(char **, char *, struct g_llvm_lv *);
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, linux_lvm, CTLFLAG_RW, 0,
     "GEOM_LINUX_LVM stuff");
 static u_int g_llvm_debug = 0;
 SYSCTL_UINT(_kern_geom_linux_lvm, OID_AUTO, debug, CTLFLAG_RWTUN, &g_llvm_debug, 0,
     "Debug level");
 
 LIST_HEAD(, g_llvm_vg) vg_list;
 
 /*
  * Called to notify geom when it's been opened, and for what intent
  */
 static int
 g_llvm_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *c;
 	struct g_llvm_vg *vg;
 	struct g_geom *gp;
 	int error;
 
 	KASSERT(pp != NULL, ("%s: NULL provider", __func__));
 	gp = pp->geom;
 	KASSERT(gp != NULL, ("%s: NULL geom", __func__));
 	vg = gp->softc;
 
 	if (vg == NULL) {
 		/* It seems that .access can be called with negative dr,dw,dx
 		 * in this case but I want to check for myself */
 		G_LLVM_DEBUG(0, "access(%d, %d, %d) for %s",
 		    dr, dw, de, pp->name);
 
 		/* This should only happen when geom is withered so
 		 * allow only negative requests */
 		KASSERT(dr <= 0 && dw <= 0 && de <= 0,
 		    ("%s: Positive access for %s", __func__, pp->name));
 		if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0)
 			G_LLVM_DEBUG(0,
 			    "Device %s definitely destroyed", pp->name);
 		return (0);
 	}
 
 	/* Grab an exclusive bit to propagate on our consumers on first open */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... drop it on close */
 	if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0)
 		de--;
 
 	error = ENXIO;
 	LIST_FOREACH(c, &gp->consumer, consumer) {
 		KASSERT(c != NULL, ("%s: consumer is NULL", __func__));
 		error = g_access(c, dr, dw, de);
 		if (error != 0) {
 			struct g_consumer *c2;
 
 			/* Backout earlier changes */
 			LIST_FOREACH(c2, &gp->consumer, consumer) {
 				if (c2 == c) /* all eariler components fixed */
 					return (error);
 				g_access(c2, -dr, -dw, -de);
 			}
 		}
 	}
 
 	return (error);
 }
 
 /*
  * Dismantle bio_queue and destroy its components
  */
 static void
 bioq_dismantle(struct bio_queue_head *bq)
 {
 	struct bio *b;
 
 	for (b = bioq_first(bq); b != NULL; b = bioq_first(bq)) {
 		bioq_remove(bq, b);
 		g_destroy_bio(b);
 	}
 }
 
 /*
  * GEOM .done handler
  * Can't use standard handler because one requested IO may
  * fork into additional data IOs
  */
 static void
 g_llvm_done(struct bio *b)
 {
 	struct bio *parent_b;
 
 	parent_b = b->bio_parent;
 
 	if (b->bio_error != 0) {
 		G_LLVM_DEBUG(0, "Error %d for offset=%ju, length=%ju on %s",
 		    b->bio_error, b->bio_offset, b->bio_length,
 		    b->bio_to->name);
 		if (parent_b->bio_error == 0)
 			parent_b->bio_error = b->bio_error;
 	}
 
 	parent_b->bio_inbed++;
 	parent_b->bio_completed += b->bio_completed;
 
 	if (parent_b->bio_children == parent_b->bio_inbed) {
 		parent_b->bio_completed = parent_b->bio_length;
 		g_io_deliver(parent_b, parent_b->bio_error);
 	}
 	g_destroy_bio(b);
 }
 
 static void
 g_llvm_start(struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_llvm_vg *vg;
 	struct g_llvm_pv *pv;
 	struct g_llvm_lv *lv;
 	struct g_llvm_segment *sg;
 	struct bio *cb;
 	struct bio_queue_head bq;
 	size_t chunk_size;
 	off_t offset, length;
 	char *addr;
 	u_int count;
 
 	pp = bp->bio_to;
 	lv = pp->private;
 	vg = pp->geom->softc;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 	/* XXX BIO_GETATTR allowed? */
 		break;
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	bioq_init(&bq);
 
 	chunk_size = vg->vg_extentsize;
 	addr = bp->bio_data;
 	offset = bp->bio_offset;	/* virtual offset and length */
 	length = bp->bio_length;
 
 	while (length > 0) {
 		size_t chunk_index, in_chunk_offset, in_chunk_length;
 
 		pv = NULL;
 		cb = g_clone_bio(bp);
 		if (cb == NULL) {
 			bioq_dismantle(&bq);
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 
 		/* get the segment and the pv */
 		if (lv->lv_sgcount == 1) {
 			/* skip much of the calculations for a single sg */
 			chunk_index = 0;
 			in_chunk_offset = 0;
 			in_chunk_length = length;
 			sg = lv->lv_firstsg;
 			pv = sg->sg_pv;
 			cb->bio_offset = offset + sg->sg_pvoffset;
 		} else {
 			chunk_index = offset / chunk_size; /* round downwards */
 			in_chunk_offset = offset % chunk_size;
 			in_chunk_length =
 			    min(length, chunk_size - in_chunk_offset);
 
 			/* XXX could be faster */
 			LIST_FOREACH(sg, &lv->lv_segs, sg_next) {
 				if (chunk_index >= sg->sg_start &&
 				    chunk_index <= sg->sg_end) {
 					/* adjust chunk index for sg start */
 					chunk_index -= sg->sg_start;
 					pv = sg->sg_pv;
 					break;
 				}
 			}
 			cb->bio_offset =
 			    (off_t)chunk_index * (off_t)chunk_size
 			    + in_chunk_offset + sg->sg_pvoffset;
 		}
 
 		KASSERT(pv != NULL, ("Can't find PV for chunk %zu",
 		    chunk_index));
 
 		cb->bio_to = pv->pv_gprov;
 		cb->bio_done = g_llvm_done;
 		cb->bio_length = in_chunk_length;
 		cb->bio_data = addr;
 		cb->bio_caller1 = pv;
 		bioq_disksort(&bq, cb);
 
 		G_LLVM_DEBUG(5,
 		    "Mapped %s(%ju, %ju) on %s to %zu(%zu,%zu) @ %s:%ju",
 		    bp->bio_cmd == BIO_READ ? "R" : "W",
 		    offset, length, lv->lv_name,
 		    chunk_index, in_chunk_offset, in_chunk_length,
 		    pv->pv_name, cb->bio_offset);
 
 		addr += in_chunk_length;
 		length -= in_chunk_length;
 		offset += in_chunk_length;
 	}
 
 	/* Fire off bio's here */
 	count = 0;
 	for (cb = bioq_first(&bq); cb != NULL; cb = bioq_first(&bq)) {
 		bioq_remove(&bq, cb);
 		pv = cb->bio_caller1;
 		cb->bio_caller1 = NULL;
 		G_LLVM_DEBUG(6, "firing bio to %s, offset=%ju, length=%ju",
 		    cb->bio_to->name, cb->bio_offset, cb->bio_length);
 		g_io_request(cb, pv->pv_gcons);
 		count++;
 	}
 	if (count == 0) { /* We handled everything locally */
 		bp->bio_completed = bp->bio_length;
 		g_io_deliver(bp, 0);
 	}
 }
 
 static void
 g_llvm_remove_disk(struct g_llvm_vg *vg, struct g_consumer *cp)
 {
 	struct g_llvm_pv *pv;
 	struct g_llvm_lv *lv;
 	struct g_llvm_segment *sg;
 	int found;
 
 	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
 	pv = (struct g_llvm_pv *)cp->private;
 
 	G_LLVM_DEBUG(0, "Disk %s removed from %s.", cp->provider->name,
 	    pv->pv_name);
 
 	LIST_FOREACH(lv, &vg->vg_lvs, lv_next) {
 		/* Find segments that map to this disk */
 		found = 0;
 		LIST_FOREACH(sg, &lv->lv_segs, sg_next) {
 			if (sg->sg_pv == pv) {
 				sg->sg_pv = NULL;
 				lv->lv_sgactive--;
 				found = 1;
 				break;
 			}
 		}
 		if (found) {
 			G_LLVM_DEBUG(0, "Device %s removed.",
 			    lv->lv_gprov->name);
 			g_wither_provider(lv->lv_gprov, ENXIO);
 			lv->lv_gprov = NULL;
 		}
 	}
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_llvm_orphan(struct g_consumer *cp)
 {
 	struct g_llvm_vg *vg;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	vg = gp->softc;
 	if (vg == NULL)
 		return;
 
 	g_llvm_remove_disk(vg, cp);
 	g_llvm_destroy(vg, 1);
 }
 
 static int
 g_llvm_activate_lv(struct g_llvm_vg *vg, struct g_llvm_lv *lv)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	KASSERT(lv->lv_sgactive == lv->lv_sgcount, ("segment missing"));
 
 	gp = vg->vg_geom;
 	pp = g_new_providerf(gp, "linux_lvm/%s-%s", vg->vg_name, lv->lv_name);
 	pp->mediasize = vg->vg_extentsize * (off_t)lv->lv_extentcount;
 	pp->sectorsize = vg->vg_sectorsize;
 	g_error_provider(pp, 0);
 	lv->lv_gprov = pp;
 	pp->private = lv;
 
 	G_LLVM_DEBUG(1, "Created %s, %juM", pp->name,
 	    pp->mediasize / (1024*1024));
 
 	return (0);
 }
 
 static int
 g_llvm_add_disk(struct g_llvm_vg *vg, struct g_provider *pp, char *uuid)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp, *fcp;
 	struct g_llvm_pv *pv;
 	struct g_llvm_lv *lv;
 	struct g_llvm_segment *sg;
 	int error;
 
 	g_topology_assert();
 
 	LIST_FOREACH(pv, &vg->vg_pvs, pv_next) {
 		if (strcmp(pv->pv_uuid, uuid) == 0)
 			break;	/* found it */
 	}
 	if (pv == NULL) {
 		G_LLVM_DEBUG(3, "uuid %s not found in pv list", uuid);
 		return (ENOENT);
 	}
 	if (pv->pv_gprov != NULL) {
 		G_LLVM_DEBUG(0, "disk %s already initialised in %s",
 		    pv->pv_name, vg->vg_name);
 		return (EEXIST);
 	}
 
 	pv->pv_start *= vg->vg_sectorsize;
 	gp = vg->vg_geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	G_LLVM_DEBUG(1, "Attached %s to %s at offset %ju",
 	    pp->name, pv->pv_name, pv->pv_start);
 
 	if (error != 0) {
 		G_LLVM_DEBUG(0, "cannot attach %s to %s",
 		    pp->name, vg->vg_name);
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL) {
 		if (fcp->provider->sectorsize != pp->sectorsize) {
 			G_LLVM_DEBUG(0, "Provider %s of %s has invalid "
 			    "sector size (%d)", pp->name, vg->vg_name,
 			    pp->sectorsize);
 			return (EINVAL);
 		}
 		if (fcp->acr > 0 || fcp->acw || fcp->ace > 0) {
 			/* Replicate access permissions from first "live"
 			 * consumer to the new one */
 			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 			if (error != 0) {
 				g_detach(cp);
 				g_destroy_consumer(cp);
 				return (error);
 			}
 		}
 	}
 
 	cp->private = pv;
 	pv->pv_gcons = cp;
 	pv->pv_gprov = pp;
 
 	LIST_FOREACH(lv, &vg->vg_lvs, lv_next) {
 		/* Find segments that map to this disk */
 		LIST_FOREACH(sg, &lv->lv_segs, sg_next) {
 			if (strcmp(sg->sg_pvname, pv->pv_name) == 0) {
 				/* avtivate the segment */
 				KASSERT(sg->sg_pv == NULL,
 				    ("segment already mapped"));
 				sg->sg_pvoffset =
 				    (off_t)sg->sg_pvstart * vg->vg_extentsize
 				    + pv->pv_start;
 				sg->sg_pv = pv;
 				lv->lv_sgactive++;
 
 				G_LLVM_DEBUG(2, "%s: %d to %d @ %s:%d"
 				    " offset %ju sector %ju",
 				    lv->lv_name, sg->sg_start, sg->sg_end,
 				    sg->sg_pvname, sg->sg_pvstart,
 				    sg->sg_pvoffset,
 				    sg->sg_pvoffset / vg->vg_sectorsize);
 			}
 		}
 		/* Activate any lvs waiting on this disk */
 		if (lv->lv_gprov == NULL && lv->lv_sgactive == lv->lv_sgcount) {
 			error = g_llvm_activate_lv(vg, lv);
 			if (error)
 				break;
 		}
 	}
 	return (error);
 }
 
 static void
 g_llvm_init(struct g_class *mp)
 {
 	LIST_INIT(&vg_list);
 }
 
 static void
 g_llvm_free_vg(struct g_llvm_vg *vg)
 {
 	struct g_llvm_pv *pv;
 	struct g_llvm_lv *lv;
 	struct g_llvm_segment *sg;
 
 	/* Free all the structures */
 	while ((pv = LIST_FIRST(&vg->vg_pvs)) != NULL) {
 		LIST_REMOVE(pv, pv_next);
 		free(pv, M_GLLVM);
 	}
 	while ((lv = LIST_FIRST(&vg->vg_lvs)) != NULL) {
 		while ((sg = LIST_FIRST(&lv->lv_segs)) != NULL) {
 			LIST_REMOVE(sg, sg_next);
 			free(sg, M_GLLVM);
 		}
 		LIST_REMOVE(lv, lv_next);
 		free(lv, M_GLLVM);
 	}
 	LIST_REMOVE(vg, vg_next);
 	free(vg, M_GLLVM);
 }
 
 static void
 g_llvm_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_llvm_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	struct g_llvm_label ll;
 	struct g_llvm_metadata md;
 	struct g_llvm_vg *vg;
 	int error;
 
 	bzero(&md, sizeof(md));
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	gp = g_new_geomf(mp, "linux_lvm:taste");
 	/* This orphan function should be never called. */
 	gp->orphan = g_llvm_taste_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_llvm_read_label(cp, &ll);
 	if (!error)
 		error = g_llvm_read_md(cp, &md, &ll);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 
 	vg = md.md_vg;
 	if (vg->vg_geom == NULL) {
 		/* new volume group */
 		gp = g_new_geomf(mp, "%s", vg->vg_name);
 		gp->start = g_llvm_start;
 		gp->spoiled = g_llvm_orphan;
 		gp->orphan = g_llvm_orphan;
 		gp->access = g_llvm_access;
 		vg->vg_sectorsize = pp->sectorsize;
 		vg->vg_extentsize *= vg->vg_sectorsize;
 		vg->vg_geom = gp;
 		gp->softc = vg;
 		G_LLVM_DEBUG(1, "Created volume %s, extent size %zuK",
 		    vg->vg_name, vg->vg_extentsize / 1024);
 	}
 
 	/* initialise this disk in the volume group */
 	g_llvm_add_disk(vg, pp, ll.ll_uuid);
 	return (vg->vg_geom);
 }
 
 static int
 g_llvm_destroy(struct g_llvm_vg *vg, int force)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	if (vg == NULL)
 		return (ENXIO);
 	gp = vg->vg_geom;
 
 	LIST_FOREACH(pp, &gp->provider, provider) {
 		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
 			G_LLVM_DEBUG(1, "Device %s is still open (r%dw%de%d)",
 			    pp->name, pp->acr, pp->acw, pp->ace);
 			if (!force)
 				return (EBUSY);
 		}
 	}
 
 	g_llvm_free_vg(gp->softc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static int
 g_llvm_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
     struct g_geom *gp)
 {
 	struct g_llvm_vg *vg;
 
 	vg = gp->softc;
 	return (g_llvm_destroy(vg, 0));
 }
 
 int
 g_llvm_read_label(struct g_consumer *cp, struct g_llvm_label *ll)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int i, error = 0;
 
 	g_topology_assert();
 
 	/* The LVM label is stored on the first four sectors */
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, 0, pp->sectorsize * 4, &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_LLVM_DEBUG(1, "Cannot read metadata from %s (error=%d)",
 		    pp->name, error);
 		return (error);
 	}
 
 	/* Search the four sectors for the LVM label. */
 	for (i = 0; i < 4; i++) {
 		error = llvm_label_decode(&buf[i * pp->sectorsize], ll, i);
 		if (error == 0)
 			break;	/* found it */
 	}
 	g_free(buf);
 	return (error);
 }
 
 int
 g_llvm_read_md(struct g_consumer *cp, struct g_llvm_metadata *md,
     struct g_llvm_label *ll)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 	int size;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, ll->ll_md_offset, pp->sectorsize, &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_LLVM_DEBUG(0, "Cannot read metadata from %s (error=%d)",
 		    cp->provider->name, error);
 		return (error);
 	}
 
 	error = llvm_md_decode(buf, md, ll);
 	g_free(buf);
 	if (error != 0) {
 		return (error);
 	}
 
 	G_LLVM_DEBUG(1, "reading LVM2 config @ %s:%ju", pp->name,
 		    ll->ll_md_offset + md->md_reloffset);
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* round up to the nearest sector */
 	size = md->md_relsize +
 	    (pp->sectorsize - md->md_relsize % pp->sectorsize);
 	buf = g_read_data(cp, ll->ll_md_offset + md->md_reloffset, size, &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_LLVM_DEBUG(0, "Cannot read LVM2 config from %s (error=%d)",
 		    pp->name, error);
 		return (error);
 	}
 	buf[md->md_relsize] = '\0';
 	G_LLVM_DEBUG(10, "LVM config:\n%s\n", buf);
 	error = llvm_textconf_decode(buf, md->md_relsize, md);
 	g_free(buf);
 
 	return (error);
 }
 
 static int
 llvm_label_decode(const u_char *data, struct g_llvm_label *ll, int sector)
 {
 	uint64_t off;
 	char *uuid;
 
 	/* Magic string */
 	if (bcmp("LABELONE", data , 8) != 0)
 		return (EINVAL);
 
 	/* We only support LVM2 text format */
 	if (bcmp("LVM2 001", data + 24, 8) != 0) {
 		G_LLVM_DEBUG(0, "Unsupported LVM format");
 		return (EINVAL);
 	}
 
 	ll->ll_sector = le64dec(data + 8);
 	ll->ll_crc = le32dec(data + 16);
 	ll->ll_offset = le32dec(data + 20);
 
 	if (ll->ll_sector != sector) {
 		G_LLVM_DEBUG(0, "Expected sector %ju, found at %d",
 		    ll->ll_sector, sector);
 		return (EINVAL);
 	}
 
 	off = ll->ll_offset;
 	/*
 	 * convert the binary uuid to string format, the format is
 	 * xxxxxx-xxxx-xxxx-xxxx-xxxx-xxxx-xxxxxx (6-4-4-4-4-4-6)
 	 */
 	uuid = ll->ll_uuid;
 	bcopy(data + off, uuid, 6);
 	off += 6;
 	uuid += 6;
 	*uuid++ = '-';
 	for (int i = 0; i < 5; i++) {
 		bcopy(data + off, uuid, 4);
 		off += 4;
 		uuid += 4;
 		*uuid++ = '-';
 	}
 	bcopy(data + off, uuid, 6);
 	off += 6;
 	uuid += 6;
 	*uuid++ = '\0';
 
 	ll->ll_size = le64dec(data + off);
 	off += 8;
 	ll->ll_pestart = le64dec(data + off);
 	off += 16;
 
 	/* Only one data section is supported */
 	if (le64dec(data + off) != 0) {
 		G_LLVM_DEBUG(0, "Only one data section supported");
 		return (EINVAL);
 	}
 
 	off += 16;
 	ll->ll_md_offset = le64dec(data + off);
 	off += 8;
 	ll->ll_md_size = le64dec(data + off);
 	off += 8;
 
 	G_LLVM_DEBUG(1, "LVM metadata: offset=%ju, size=%ju", ll->ll_md_offset,
 	    ll->ll_md_size);
 
 	/* Only one data section is supported */
 	if (le64dec(data + off) != 0) {
 		G_LLVM_DEBUG(0, "Only one metadata section supported");
 		return (EINVAL);
 	}
 
 	G_LLVM_DEBUG(2, "label uuid=%s", ll->ll_uuid);
 	G_LLVM_DEBUG(2, "sector=%ju, crc=%u, offset=%u, size=%ju, pestart=%ju",
 	    ll->ll_sector, ll->ll_crc, ll->ll_offset, ll->ll_size,
 	    ll->ll_pestart);
 
 	return (0);
 }
 
 static int
 llvm_md_decode(const u_char *data, struct g_llvm_metadata *md,
     struct g_llvm_label *ll)
 {
 	uint64_t off;
 	char magic[16];
 
 	off = 0;
 	md->md_csum = le32dec(data + off);
 	off += 4;
 	bcopy(data + off, magic, 16);
 	off += 16;
 	md->md_version = le32dec(data + off);
 	off += 4;
 	md->md_start = le64dec(data + off);
 	off += 8;
 	md->md_size = le64dec(data + off);
 	off += 8;
 
 	if (bcmp(G_LLVM_MAGIC, magic, 16) != 0) {
 		G_LLVM_DEBUG(0, "Incorrect md magic number");
 		return (EINVAL);
 	}
 	if (md->md_version != 1) {
 		G_LLVM_DEBUG(0, "Incorrect md version number (%u)",
 		    md->md_version);
 		return (EINVAL);
 	}
 	if (md->md_start != ll->ll_md_offset) {
 		G_LLVM_DEBUG(0, "Incorrect md offset (%ju)", md->md_start);
 		return (EINVAL);
 	}
 
 	/* Aparently only one is ever returned */
 	md->md_reloffset = le64dec(data + off);
 	off += 8;
 	md->md_relsize = le64dec(data + off);
 	off += 16;	/* XXX skipped checksum */
 
 	if (le64dec(data + off) != 0) {
 		G_LLVM_DEBUG(0, "Only one reloc supported");
 		return (EINVAL);
 	}
 
 	G_LLVM_DEBUG(3, "reloc: offset=%ju, size=%ju",
 	    md->md_reloffset, md->md_relsize);
 	G_LLVM_DEBUG(3, "md: version=%u, start=%ju, size=%ju",
 	    md->md_version, md->md_start, md->md_size);
 
 	return (0);
 }
 
 #define	GRAB_INT(key, tok1, tok2, v)					\
 	if (tok1 && tok2 && strncmp(tok1, key, sizeof(key)) == 0) {	\
 		v = strtol(tok2, &tok1, 10);				\
 		if (tok1 == tok2)					\
 			/* strtol did not eat any of the buffer */	\
 			goto bad;					\
 		continue;						\
 	}
 
 #define	GRAB_STR(key, tok1, tok2, v, len)				\
 	if (tok1 && tok2 && strncmp(tok1, key, sizeof(key)) == 0) {	\
 		strsep(&tok2, "\"");					\
 		if (tok2 == NULL)					\
 			continue;					\
 		tok1 = strsep(&tok2, "\"");				\
 		if (tok2 == NULL)					\
 			continue;					\
 		strncpy(v, tok1, len);					\
 		continue;						\
 	}
 
 #define	SPLIT(key, value, str)						\
 	key = strsep(&value, str);					\
 	/* strip trailing whitespace on the key */			\
 	for (char *t = key; *t != '\0'; t++)				\
 		if (isspace(*t)) {					\
 			*t = '\0';					\
 			break;						\
 		}
 
 static size_t 
 llvm_grab_name(char *name, const char *tok)
 {
 	size_t len;
 
 	len = 0;
 	if (tok == NULL)
 		return (0);
 	if (tok[0] == '-')
 		return (0);
 	if (strcmp(tok, ".") == 0 || strcmp(tok, "..") == 0)
 		return (0);
 	while (tok[len] && (isalpha(tok[len]) || isdigit(tok[len]) ||
 	    tok[len] == '.' || tok[len] == '_' || tok[len] == '-' ||
 	    tok[len] == '+') && len < G_LLVM_NAMELEN - 1)
 		len++;
 	bcopy(tok, name, len);
 	name[len] = '\0';
 	return (len);
 }
 
 static int
 llvm_textconf_decode(u_char *data, int buflen, struct g_llvm_metadata *md)
 {
 	struct g_llvm_vg	*vg;
 	char *buf = data;
 	char *tok, *v;
 	char name[G_LLVM_NAMELEN];
 	char uuid[G_LLVM_UUIDLEN];
 	size_t len;
 
 	if (buf == NULL || *buf == '\0')
 		return (EINVAL);
 
 	tok = strsep(&buf, "\n");
 	if (tok == NULL)
 		return (EINVAL);
 	len = llvm_grab_name(name, tok);
 	if (len == 0)
 		return (EINVAL);
 
 	/* check too see if the vg has already been loaded off another disk */
 	LIST_FOREACH(vg, &vg_list, vg_next) {
 		if (strcmp(vg->vg_name, name) == 0) {
 			uuid[0] = '\0';
 			/* grab the volume group uuid */
 			while ((tok = strsep(&buf, "\n")) != NULL) {
 				if (strstr(tok, "{"))
 					break;
 				if (strstr(tok, "=")) {
 					SPLIT(v, tok, "=");
 					GRAB_STR("id", v, tok, uuid,
 					    sizeof(uuid));
 				}
 			}
 			if (strcmp(vg->vg_uuid, uuid) == 0) {
 				/* existing vg */
 				md->md_vg = vg;
 				return (0);
 			}
 			/* XXX different volume group with name clash! */
 			G_LLVM_DEBUG(0,
 			    "%s already exists, volume group not loaded", name);
 			return (EINVAL);
 		}
 	}
 
 	vg = malloc(sizeof(*vg), M_GLLVM, M_NOWAIT|M_ZERO);
 	if (vg == NULL)
 		return (ENOMEM);
 
 	strncpy(vg->vg_name, name, sizeof(vg->vg_name));
 	LIST_INIT(&vg->vg_pvs);
 	LIST_INIT(&vg->vg_lvs);
 
 #define	VOL_FOREACH(func, tok, buf, p)					\
 	while ((tok = strsep(buf, "\n")) != NULL) {			\
 		if (strstr(tok, "{")) {					\
 			func(buf, tok, p);				\
 			continue;					\
 		}							\
 		if (strstr(tok, "}"))					\
 			break;						\
 	}
 
 	while ((tok = strsep(&buf, "\n")) != NULL) {
 		if (strcmp(tok, "physical_volumes {") == 0) {
 			VOL_FOREACH(llvm_textconf_decode_pv, tok, &buf, vg);
 			continue;
 		}
 		if (strcmp(tok, "logical_volumes {") == 0) {
 			VOL_FOREACH(llvm_textconf_decode_lv, tok, &buf, vg);
 			continue;
 		}
 		if (strstr(tok, "{")) {
 			G_LLVM_DEBUG(2, "unknown section %s", tok);
 			continue;
 		}
 
 		/* parse 'key = value' lines */
 		if (strstr(tok, "=")) {
 			SPLIT(v, tok, "=");
 			GRAB_STR("id", v, tok, vg->vg_uuid, sizeof(vg->vg_uuid));
 			GRAB_INT("extent_size", v, tok, vg->vg_extentsize);
 			continue;
 		}
 	}
 	/* basic checking */
 	if (vg->vg_extentsize == 0)
 		goto bad;
 
 	md->md_vg = vg;
 	LIST_INSERT_HEAD(&vg_list, vg, vg_next);
 	G_LLVM_DEBUG(3, "vg: name=%s uuid=%s", vg->vg_name, vg->vg_uuid);
 	return(0);
 
 bad:
 	g_llvm_free_vg(vg);
 	return (-1);
 }
 #undef	VOL_FOREACH
 
 static int
 llvm_textconf_decode_pv(char **buf, char *tok, struct g_llvm_vg *vg)
 {
 	struct g_llvm_pv	*pv;
 	char *v;
 	size_t len;
 
 	if (*buf == NULL || **buf == '\0')
 		return (EINVAL);
 
 	pv = malloc(sizeof(*pv), M_GLLVM, M_NOWAIT|M_ZERO);
 	if (pv == NULL)
 		return (ENOMEM);
 
 	pv->pv_vg = vg;
 	len = 0;
 	if (tok == NULL)
 		goto bad;
 	len = llvm_grab_name(pv->pv_name, tok);
 	if (len == 0)
 		goto bad;
 
 	while ((tok = strsep(buf, "\n")) != NULL) {
 		if (strstr(tok, "{"))
 			goto bad;
 
 		if (strstr(tok, "}"))
 			break;
 
 		/* parse 'key = value' lines */
 		if (strstr(tok, "=")) {
 			SPLIT(v, tok, "=");
 			GRAB_STR("id", v, tok, pv->pv_uuid, sizeof(pv->pv_uuid));
 			GRAB_INT("pe_start", v, tok, pv->pv_start);
 			GRAB_INT("pe_count", v, tok, pv->pv_count);
 			continue;
 		}
 	}
 	if (tok == NULL)
 		goto bad;
 	/* basic checking */
 	if (pv->pv_count == 0)
 		goto bad;
 
 	LIST_INSERT_HEAD(&vg->vg_pvs, pv, pv_next);
 	G_LLVM_DEBUG(3, "pv: name=%s uuid=%s", pv->pv_name, pv->pv_uuid);
 
 	return (0);
 bad:
 	free(pv, M_GLLVM);
 	return (-1);
 }
 
 static int
 llvm_textconf_decode_lv(char **buf, char *tok, struct g_llvm_vg *vg)
 {
 	struct g_llvm_lv	*lv;
 	struct g_llvm_segment *sg;
 	char *v;
 	size_t len;
 
 	if (*buf == NULL || **buf == '\0')
 		return (EINVAL);
 
 	lv = malloc(sizeof(*lv), M_GLLVM, M_NOWAIT|M_ZERO);
 	if (lv == NULL)
 		return (ENOMEM);
 
 	lv->lv_vg = vg;
 	LIST_INIT(&lv->lv_segs);
 
 	if (tok == NULL)
 		goto bad;
 	len = llvm_grab_name(lv->lv_name, tok);
 	if (len == 0)
 		goto bad;
 
 	while ((tok = strsep(buf, "\n")) != NULL) {
 		if (strstr(tok, "{")) {
 			if (strstr(tok, "segment")) {
 				llvm_textconf_decode_sg(buf, tok, lv);
 				continue;
 			} else
 				/* unexpected section */
 				goto bad;
 		}
 
 		if (strstr(tok, "}"))
 			break;
 
 		/* parse 'key = value' lines */
 		if (strstr(tok, "=")) {
 			SPLIT(v, tok, "=");
 			GRAB_STR("id", v, tok, lv->lv_uuid, sizeof(lv->lv_uuid));
 			GRAB_INT("segment_count", v, tok, lv->lv_sgcount);
 			continue;
 		}
 	}
 	if (tok == NULL)
 		goto bad;
 	if (lv->lv_sgcount == 0 || lv->lv_sgcount != lv->lv_numsegs)
 		/* zero or incomplete segment list */
 		goto bad;
 
 	/* Optimize for only one segment on the pv */
 	lv->lv_firstsg = LIST_FIRST(&lv->lv_segs);
 	LIST_INSERT_HEAD(&vg->vg_lvs, lv, lv_next);
 	G_LLVM_DEBUG(3, "lv: name=%s uuid=%s", lv->lv_name, lv->lv_uuid);
 
 	return (0);
 bad:
 	while ((sg = LIST_FIRST(&lv->lv_segs)) != NULL) {
 		LIST_REMOVE(sg, sg_next);
 		free(sg, M_GLLVM);
 	}
 	free(lv, M_GLLVM);
 	return (-1);
 }
 
 static int
 llvm_textconf_decode_sg(char **buf, char *tok, struct g_llvm_lv *lv)
 {
 	struct g_llvm_segment *sg;
 	char *v;
 	int count = 0;
 
 	if (*buf == NULL || **buf == '\0')
 		return (EINVAL);
 
 	sg = malloc(sizeof(*sg), M_GLLVM, M_NOWAIT|M_ZERO);
 	if (sg == NULL)
 		return (ENOMEM);
 
 	while ((tok = strsep(buf, "\n")) != NULL) {
 		/* only a single linear stripe is supported */
 		if (strstr(tok, "stripe_count")) {
 			SPLIT(v, tok, "=");
 			GRAB_INT("stripe_count", v, tok, count);
 			if (count != 1)
 				goto bad;
 		}
 
 		if (strstr(tok, "{"))
 			goto bad;
 
 		if (strstr(tok, "}"))
 			break;
 
 		if (strcmp(tok, "stripes = [") == 0) {
 			tok = strsep(buf, "\n");
 			if (tok == NULL)
 				goto bad;
 
 			strsep(&tok, "\"");
 			if (tok == NULL)
 				goto bad;	/* missing open quotes */
 			v = strsep(&tok, "\"");
 			if (tok == NULL)
 				goto bad;	/* missing close quotes */
 			strncpy(sg->sg_pvname, v, sizeof(sg->sg_pvname));
 			if (*tok != ',')
 				goto bad;	/* missing comma for stripe */
 			tok++;
 
 			sg->sg_pvstart = strtol(tok, &v, 10);
 			if (v == tok)
 				/* strtol did not eat any of the buffer */
 				goto bad;
 
 			continue;
 		}
 
 		/* parse 'key = value' lines */
 		if (strstr(tok, "=")) {
 			SPLIT(v, tok, "=");
 			GRAB_INT("start_extent", v, tok, sg->sg_start);
 			GRAB_INT("extent_count", v, tok, sg->sg_count);
 			continue;
 		}
 	}
 	if (tok == NULL)
 		goto bad;
 	/* basic checking */
 	if (count != 1 || sg->sg_count == 0)
 		goto bad;
 
 	sg->sg_end = sg->sg_start + sg->sg_count - 1;
 	lv->lv_numsegs++;
 	lv->lv_extentcount += sg->sg_count;
 	LIST_INSERT_HEAD(&lv->lv_segs, sg, sg_next);
 
 	return (0);
 bad:
 	free(sg, M_GLLVM);
 	return (-1);
 }
 #undef	GRAB_INT
 #undef	GRAB_STR
 #undef	SPLIT
 
 static struct g_class g_llvm_class = {
 	.name = G_LLVM_CLASS_NAME,
 	.version = G_VERSION,
 	.init = g_llvm_init,
 	.taste = g_llvm_taste,
 	.destroy_geom = g_llvm_destroy_geom
 };
 
 DECLARE_GEOM_CLASS(g_llvm_class, g_linux_lvm);
+MODULE_VERSION(geom_linux_lvm, 0);
Index: stable/11/sys/geom/mirror/g_mirror.c
===================================================================
--- stable/11/sys/geom/mirror/g_mirror.c	(revision 332639)
+++ stable/11/sys/geom/mirror/g_mirror.c	(revision 332640)
@@ -1,3492 +1,3493 @@
 /*-
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/eventhandler.h>
 #include <sys/fail.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #include <geom/geom.h>
 #include <geom/mirror/g_mirror.h>
 
 FEATURE(geom_mirror, "GEOM mirroring support");
 
 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
     "GEOM_MIRROR stuff");
 int g_mirror_debug = 0;
 SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
     "Debug level");
 static u_int g_mirror_timeout = 4;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
     0, "Time to wait on all mirror components");
 static u_int g_mirror_idletime = 5;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
     &g_mirror_idletime, 0, "Mark components as clean when idling");
 static u_int g_mirror_disconnect_on_failure = 1;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
     &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
 static u_int g_mirror_syncreqs = 2;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
     &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
 static u_int g_mirror_sync_period = 5;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
     &g_mirror_sync_period, 0,
     "Metadata update period during synchronization, in seconds");
 
 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
 } while (0)
 
 static eventhandler_tag g_mirror_post_sync = NULL;
 static int g_mirror_shutdown = 0;
 
 static g_ctl_destroy_geom_t g_mirror_destroy_geom;
 static g_taste_t g_mirror_taste;
 static g_init_t g_mirror_init;
 static g_fini_t g_mirror_fini;
 static g_provgone_t g_mirror_providergone;
 static g_resize_t g_mirror_resize;
 
 struct g_class g_mirror_class = {
 	.name = G_MIRROR_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_mirror_config,
 	.taste = g_mirror_taste,
 	.destroy_geom = g_mirror_destroy_geom,
 	.init = g_mirror_init,
 	.fini = g_mirror_fini,
 	.providergone = g_mirror_providergone,
 	.resize = g_mirror_resize
 };
 
 
 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
 static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
     struct bio *bp, off_t offset);
 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
 static void g_mirror_register_request(struct g_mirror_softc *sc,
     struct bio *bp);
 static void g_mirror_sync_release(struct g_mirror_softc *sc);
 
 
 static const char *
 g_mirror_disk_state2str(int state)
 {
 
 	switch (state) {
 	case G_MIRROR_DISK_STATE_NONE:
 		return ("NONE");
 	case G_MIRROR_DISK_STATE_NEW:
 		return ("NEW");
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		return ("ACTIVE");
 	case G_MIRROR_DISK_STATE_STALE:
 		return ("STALE");
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		return ("SYNCHRONIZING");
 	case G_MIRROR_DISK_STATE_DISCONNECTED:
 		return ("DISCONNECTED");
 	case G_MIRROR_DISK_STATE_DESTROY:
 		return ("DESTROY");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_mirror_device_state2str(int state)
 {
 
 	switch (state) {
 	case G_MIRROR_DEVICE_STATE_STARTING:
 		return ("STARTING");
 	case G_MIRROR_DEVICE_STATE_RUNNING:
 		return ("RUNNING");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_mirror_get_diskname(struct g_mirror_disk *disk)
 {
 
 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
 		return ("[unknown]");
 	return (disk->d_name);
 }
 
 /*
  * --- Events handling functions ---
  * Events in geom_mirror are used to maintain disks and device status
  * from one thread to simplify locking.
  */
 static void
 g_mirror_event_free(struct g_mirror_event *ep)
 {
 
 	free(ep, M_MIRROR);
 }
 
 int
 g_mirror_event_send(void *arg, int state, int flags)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	struct g_mirror_event *ep;
 	int error;
 
 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
 		disk = NULL;
 		sc = arg;
 	} else {
 		disk = arg;
 		sc = disk->d_softc;
 	}
 	ep->e_disk = disk;
 	ep->e_state = state;
 	ep->e_flags = flags;
 	ep->e_error = 0;
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 		return (0);
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
 	sx_xunlock(&sc->sc_lock);
 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
 		mtx_lock(&sc->sc_events_mtx);
 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
 		    hz * 5);
 	}
 	error = ep->e_error;
 	g_mirror_event_free(ep);
 	sx_xlock(&sc->sc_lock);
 	return (error);
 }
 
 static struct g_mirror_event *
 g_mirror_event_first(struct g_mirror_softc *sc)
 {
 	struct g_mirror_event *ep;
 
 	mtx_lock(&sc->sc_events_mtx);
 	ep = TAILQ_FIRST(&sc->sc_events);
 	mtx_unlock(&sc->sc_events_mtx);
 	return (ep);
 }
 
 static void
 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
 {
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 static void
 g_mirror_event_cancel(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_event *ep, *tmpep;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
 			continue;
 		if (ep->e_disk != disk)
 			continue;
 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 			g_mirror_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			wakeup(ep);
 		}
 	}
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 /*
  * Return the number of disks in given state.
  * If state is equal to -1, count all connected disks.
  */
 u_int
 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
 {
 	struct g_mirror_disk *disk;
 	u_int n = 0;
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (state == -1 || disk->d_state == state)
 			n++;
 	}
 	return (n);
 }
 
 /*
  * Find a disk in mirror by its disk ID.
  */
 static struct g_mirror_disk *
 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
 {
 	struct g_mirror_disk *disk;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_id == id)
 			return (disk);
 	}
 	return (NULL);
 }
 
 static u_int
 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 	struct bio *bp;
 	u_int nreqs = 0;
 
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
 		if (bp->bio_from == cp)
 			nreqs++;
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	return (nreqs);
 }
 
 static int
 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 
 	if (cp->index > 0) {
 		G_MIRROR_DEBUG(2,
 		    "I/O requests for %s exist, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	if (g_mirror_nrequests(sc, cp) > 0) {
 		G_MIRROR_DEBUG(2,
 		    "I/O requests for %s in queue, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	return (0);
 }
 
 static void
 g_mirror_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	cp = arg;
 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	int retaste_wait;
 
 	g_topology_assert();
 
 	cp->private = NULL;
 	if (g_mirror_is_busy(sc, cp))
 		return;
 	pp = cp->provider;
 	retaste_wait = 0;
 	if (cp->acw == 1) {
 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
 			retaste_wait = 1;
 	}
 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
 	    -cp->acw, -cp->ace, 0);
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	if (retaste_wait) {
 		/*
 		 * After retaste event was send (inside g_access()), we can send
 		 * event to detach and destroy consumer.
 		 * A class, which has consumer to the given provider connected
 		 * will not receive retaste event for the provider.
 		 * This is the way how I ignore retaste events when I close
 		 * consumers opened for write: I detach and destroy consumer
 		 * after retaste event is sent.
 		 */
 		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
 		return;
 	}
 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static int
 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
 {
 	struct g_consumer *cp;
 	int error;
 
 	g_topology_assert_not();
 	KASSERT(disk->d_consumer == NULL,
 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
 
 	g_topology_lock();
 	cp = g_new_consumer(disk->d_softc->sc_geom);
 	cp->flags |= G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		return (error);
 	}
 	error = g_access(cp, 1, 1, 1);
 	if (error != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
 		    pp->name, error);
 		return (error);
 	}
 	g_topology_unlock();
 	disk->d_consumer = cp;
 	disk->d_consumer->private = disk;
 	disk->d_consumer->index = 0;
 
 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
 	return (0);
 }
 
 static void
 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 
 	g_topology_assert();
 
 	if (cp == NULL)
 		return;
 	if (cp->provider != NULL)
 		g_mirror_kill_consumer(sc, cp);
 	else
 		g_destroy_consumer(cp);
 }
 
 /*
  * Initialize disk. This means allocate memory, create consumer, attach it
  * to the provider and open access (r1w1e1) to it.
  */
 static struct g_mirror_disk *
 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md, int *errorp)
 {
 	struct g_mirror_disk *disk;
 	int i, error;
 
 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
 	if (disk == NULL) {
 		error = ENOMEM;
 		goto fail;
 	}
 	disk->d_softc = sc;
 	error = g_mirror_connect_disk(disk, pp);
 	if (error != 0)
 		goto fail;
 	disk->d_id = md->md_did;
 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
 	disk->d_priority = md->md_priority;
 	disk->d_flags = md->md_dflags;
 	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
 	if (error == 0 && i != 0)
 		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
 	if (md->md_provider[0] != '\0')
 		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_sync.ds_offset = md->md_sync_offset;
 	disk->d_sync.ds_offset_done = md->md_sync_offset;
 	disk->d_sync.ds_update_ts = time_uptime;
 	disk->d_genid = md->md_genid;
 	disk->d_sync.ds_syncid = md->md_syncid;
 	if (errorp != NULL)
 		*errorp = 0;
 	return (disk);
 fail:
 	if (errorp != NULL)
 		*errorp = error;
 	if (disk != NULL)
 		free(disk, M_MIRROR);
 	return (NULL);
 }
 
 static void
 g_mirror_destroy_disk(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	LIST_REMOVE(disk, d_next);
 	g_mirror_event_cancel(disk);
 	if (sc->sc_hint == disk)
 		sc->sc_hint = NULL;
 	switch (disk->d_state) {
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		g_mirror_sync_stop(disk, 1);
 		/* FALLTHROUGH */
 	case G_MIRROR_DISK_STATE_NEW:
 	case G_MIRROR_DISK_STATE_STALE:
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		g_topology_lock();
 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
 		g_topology_unlock();
 		free(disk, M_MIRROR);
 		break;
 	default:
 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 	}
 }
 
 static void
 g_mirror_free_device(struct g_mirror_softc *sc)
 {
 
 	mtx_destroy(&sc->sc_queue_mtx);
 	mtx_destroy(&sc->sc_events_mtx);
 	mtx_destroy(&sc->sc_done_mtx);
 	sx_destroy(&sc->sc_lock);
 	free(sc, M_MIRROR);
 }
 
 static void
 g_mirror_providergone(struct g_provider *pp)
 {
 	struct g_mirror_softc *sc = pp->private;
 
 	if ((--sc->sc_refcnt) == 0)
 		g_mirror_free_device(sc);
 }
 
 static void
 g_mirror_destroy_device(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct g_mirror_event *ep;
 	struct g_geom *gp;
 	struct g_consumer *cp, *tmpcp;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	gp = sc->sc_geom;
 	if (sc->sc_provider != NULL)
 		g_mirror_destroy_provider(sc);
 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
 	    disk = LIST_FIRST(&sc->sc_disks)) {
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 		g_mirror_destroy_disk(disk);
 	}
 	while ((ep = g_mirror_event_first(sc)) != NULL) {
 		g_mirror_event_remove(sc, ep);
 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 			g_mirror_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			ep->e_flags |= G_MIRROR_EVENT_DONE;
 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
 			mtx_lock(&sc->sc_events_mtx);
 			wakeup(ep);
 			mtx_unlock(&sc->sc_events_mtx);
 		}
 	}
 	callout_drain(&sc->sc_callout);
 
 	g_topology_lock();
 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
 		g_mirror_disconnect_consumer(sc, cp);
 	}
 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	sx_xunlock(&sc->sc_lock);
 	if ((--sc->sc_refcnt) == 0)
 		g_mirror_free_device(sc);
 	g_topology_unlock();
 }
 
 static void
 g_mirror_orphan(struct g_consumer *cp)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert();
 
 	disk = cp->private;
 	if (disk == NULL)
 		return;
 	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
 	    G_MIRROR_EVENT_DONTWAIT);
 }
 
 /*
  * Function should return the next active disk on the list.
  * It is possible that it will be the same disk as given.
  * If there are no active disks on list, NULL is returned.
  */
 static __inline struct g_mirror_disk *
 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
 {
 	struct g_mirror_disk *dp;
 
 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
 	    dp = LIST_NEXT(dp, d_next)) {
 		if (dp == NULL)
 			dp = LIST_FIRST(&sc->sc_disks);
 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
 			break;
 	}
 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 		return (NULL);
 	return (dp);
 }
 
 static struct g_mirror_disk *
 g_mirror_get_disk(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	if (sc->sc_hint == NULL) {
 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
 		if (sc->sc_hint == NULL)
 			return (NULL);
 	}
 	disk = sc->sc_hint;
 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
 		disk = g_mirror_find_next(sc, disk);
 		if (disk == NULL)
 			return (NULL);
 	}
 	sc->sc_hint = g_mirror_find_next(sc, disk);
 	return (disk);
 }
 
 static int
 g_mirror_write_metadata(struct g_mirror_disk *disk,
     struct g_mirror_metadata *md)
 {
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 	off_t offset, length;
 	u_char *sector;
 	int error = 0;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	cp = disk->d_consumer;
 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	length = cp->provider->sectorsize;
 	offset = cp->provider->mediasize - length;
 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
 	if (md != NULL &&
 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
 		/*
 		 * Handle the case, when the size of parent provider reduced.
 		 */
 		if (offset < md->md_mediasize)
 			error = ENOSPC;
 		else
 			mirror_metadata_encode(md, sector);
 	}
 	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
 	if (error == 0)
 		error = g_write_data(cp, offset, sector, length);
 	free(sector, M_MIRROR);
 	if (error != 0) {
 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
 			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
 			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_mirror_get_diskname(disk), sc->sc_name, error);
 		} else {
 			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_mirror_get_diskname(disk), sc->sc_name, error);
 		}
 		if (g_mirror_disconnect_on_failure &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 			g_mirror_event_send(disk,
 			    G_MIRROR_DISK_STATE_DISCONNECTED,
 			    G_MIRROR_EVENT_DONTWAIT);
 		}
 	}
 	return (error);
 }
 
 static int
 g_mirror_clear_metadata(struct g_mirror_disk *disk)
 {
 	int error;
 
 	g_topology_assert_not();
 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
 
 	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
 		return (0);
 	error = g_mirror_write_metadata(disk, NULL);
 	if (error == 0) {
 		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
 		    g_mirror_get_diskname(disk));
 	} else {
 		G_MIRROR_DEBUG(0,
 		    "Cannot clear metadata on disk %s (error=%d).",
 		    g_mirror_get_diskname(disk), error);
 	}
 	return (error);
 }
 
 void
 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
     struct g_mirror_metadata *md)
 {
 
 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
 	md->md_version = G_MIRROR_VERSION;
 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
 	md->md_mid = sc->sc_id;
 	md->md_all = sc->sc_ndisks;
 	md->md_slice = sc->sc_slice;
 	md->md_balance = sc->sc_balance;
 	md->md_genid = sc->sc_genid;
 	md->md_mediasize = sc->sc_mediasize;
 	md->md_sectorsize = sc->sc_sectorsize;
 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
 	bzero(md->md_provider, sizeof(md->md_provider));
 	if (disk == NULL) {
 		md->md_did = arc4random();
 		md->md_priority = 0;
 		md->md_syncid = 0;
 		md->md_dflags = 0;
 		md->md_sync_offset = 0;
 		md->md_provsize = 0;
 	} else {
 		md->md_did = disk->d_id;
 		md->md_priority = disk->d_priority;
 		md->md_syncid = disk->d_sync.ds_syncid;
 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			md->md_sync_offset = disk->d_sync.ds_offset_done;
 		else
 			md->md_sync_offset = 0;
 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
 			strlcpy(md->md_provider,
 			    disk->d_consumer->provider->name,
 			    sizeof(md->md_provider));
 		}
 		md->md_provsize = disk->d_consumer->provider->mediasize;
 	}
 }
 
 void
 g_mirror_update_metadata(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_metadata md;
 	int error;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
 		return;
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
 		g_mirror_fill_metadata(sc, disk, &md);
 	error = g_mirror_write_metadata(disk, &md);
 	if (error == 0) {
 		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
 		    g_mirror_get_diskname(disk));
 	} else {
 		G_MIRROR_DEBUG(0,
 		    "Cannot update metadata on disk %s (error=%d).",
 		    g_mirror_get_diskname(disk), error);
 	}
 }
 
 static void
 g_mirror_bump_syncid(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_syncid++;
 	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
 	    sc->sc_syncid);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_sync.ds_syncid = sc->sc_syncid;
 			g_mirror_update_metadata(disk);
 		}
 	}
 }
 
 static void
 g_mirror_bump_genid(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_genid++;
 	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
 	    sc->sc_genid);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_genid = sc->sc_genid;
 			g_mirror_update_metadata(disk);
 		}
 	}
 }
 
 static int
 g_mirror_idle(struct g_mirror_softc *sc, int acw)
 {
 	struct g_mirror_disk *disk;
 	int timeout;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (sc->sc_provider == NULL)
 		return (0);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return (0);
 	if (sc->sc_idle)
 		return (0);
 	if (sc->sc_writes > 0)
 		return (0);
 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
 		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
 		if (!g_mirror_shutdown && timeout > 0)
 			return (timeout);
 	}
 	sc->sc_idle = 1;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_mirror_unidle(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	sc->sc_idle = 0;
 	sc->sc_last_write = time_uptime;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 	}
 }
 
 static void
 g_mirror_done(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 }
 
 static void
 g_mirror_regular_request_error(struct g_mirror_softc *sc,
     struct g_mirror_disk *disk, struct bio *bp)
 {
 
 	if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == EOPNOTSUPP)
 		return;
 
 	if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
 		disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
 		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
 		    bp->bio_error);
 	} else {
 		G_MIRROR_LOGREQ(1, bp, "Request failed (error=%d).",
 		    bp->bio_error);
 	}
 	if (g_mirror_disconnect_on_failure &&
 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
 		if (bp->bio_error == ENXIO &&
 		    bp->bio_cmd == BIO_READ)
 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		else if (bp->bio_error == ENXIO)
 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
 		else
 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
 		    G_MIRROR_EVENT_DONTWAIT);
 	}
 }
 
 static void
 g_mirror_regular_request(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct bio *pbp;
 
 	g_topology_assert_not();
 	KASSERT(sc->sc_provider == bp->bio_parent->bio_to,
 	    ("regular request %p with unexpected origin", bp));
 
 	pbp = bp->bio_parent;
 	bp->bio_from->index--;
 	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
 		sc->sc_writes--;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		g_topology_lock();
 		g_mirror_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 	}
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
 		    bp->bio_error);
 		break;
 	case BIO_WRITE:
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
 		    bp->bio_error);
 		break;
 	case BIO_DELETE:
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_delete,
 		    bp->bio_error);
 		break;
 	case BIO_FLUSH:
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_flush,
 		    bp->bio_error);
 		break;
 	}
 
 	pbp->bio_inbed++;
 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
 	    pbp->bio_children));
 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
 		g_destroy_bio(bp);
 		if (pbp->bio_children == pbp->bio_inbed) {
 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
 			pbp->bio_completed = pbp->bio_length;
 			if (pbp->bio_cmd == BIO_WRITE ||
 			    pbp->bio_cmd == BIO_DELETE) {
 				TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
 				/* Release delayed sync requests if possible. */
 				g_mirror_sync_release(sc);
 			}
 			g_io_deliver(pbp, pbp->bio_error);
 		}
 		return;
 	} else if (bp->bio_error != 0) {
 		if (pbp->bio_error == 0)
 			pbp->bio_error = bp->bio_error;
 		if (disk != NULL)
 			g_mirror_regular_request_error(sc, disk, bp);
 		switch (pbp->bio_cmd) {
 		case BIO_DELETE:
 		case BIO_WRITE:
 		case BIO_FLUSH:
 			pbp->bio_inbed--;
 			pbp->bio_children--;
 			break;
 		}
 	}
 	g_destroy_bio(bp);
 
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if (pbp->bio_inbed < pbp->bio_children)
 			break;
 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
 			g_io_deliver(pbp, pbp->bio_error);
 		else {
 			pbp->bio_error = 0;
 			mtx_lock(&sc->sc_queue_mtx);
 			TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
 			mtx_unlock(&sc->sc_queue_mtx);
 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 			wakeup(sc);
 		}
 		break;
 	case BIO_DELETE:
 	case BIO_WRITE:
 	case BIO_FLUSH:
 		if (pbp->bio_children == 0) {
 			/*
 			 * All requests failed.
 			 */
 		} else if (pbp->bio_inbed < pbp->bio_children) {
 			/* Do nothing. */
 			break;
 		} else if (pbp->bio_children == pbp->bio_inbed) {
 			/* Some requests succeeded. */
 			pbp->bio_error = 0;
 			pbp->bio_completed = pbp->bio_length;
 		}
 		if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) {
 			TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
 			/* Release delayed sync requests if possible. */
 			g_mirror_sync_release(sc);
 		}
 		g_io_deliver(pbp, pbp->bio_error);
 		break;
 	default:
 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
 		break;
 	}
 }
 
 static void
 g_mirror_sync_done(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 }
 
 static void
 g_mirror_candelete(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	int *val;
 
 	sc = bp->bio_to->private;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
 			break;
 	}
 	val = (int *)bp->bio_data;
 	*val = (disk != NULL);
 	g_io_deliver(bp, 0);
 }
 
 static void
 g_mirror_kernel_dump(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	struct bio *cbp;
 	struct g_kerneldump *gkd;
 
 	/*
 	 * We configure dumping to the first component, because this component
 	 * will be used for reading with 'prefer' balance algorithm.
 	 * If the component with the highest priority is currently disconnected
 	 * we will not be able to read the dump after the reboot if it will be
 	 * connected and synchronized later. Can we do something better?
 	 */
 	sc = bp->bio_to->private;
 	disk = LIST_FIRST(&sc->sc_disks);
 
 	gkd = (struct g_kerneldump *)bp->bio_data;
 	if (gkd->length > bp->bio_to->mediasize)
 		gkd->length = bp->bio_to->mediasize;
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_std_done;
 	g_io_request(cbp, disk->d_consumer);
 	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
 	    g_mirror_get_diskname(disk));
 }
 
 static void
 g_mirror_start(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	sc = bp->bio_to->private;
 	/*
 	 * If sc == NULL or there are no valid disks, provider's error
 	 * should be set and g_mirror_start() should not be called at all.
 	 */
 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 	    ("Provider's error should be set (error=%d)(mirror=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_MIRROR_LOGREQ(3, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 	case BIO_FLUSH:
 		break;
 	case BIO_GETATTR:
 		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
 			g_mirror_candelete(bp);
 			return;
 		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
 			g_mirror_kernel_dump(bp);
 			return;
 		}
 		/* FALLTHROUGH */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	if (bp->bio_to->error != 0) {
 		mtx_unlock(&sc->sc_queue_mtx);
 		g_io_deliver(bp, bp->bio_to->error);
 		return;
 	}
 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	wakeup(sc);
 }
 
 /*
  * Return TRUE if the given request is colliding with a in-progress
  * synchronization request.
  */
 static bool
 g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct bio *sbp;
 	off_t rstart, rend, sstart, send;
 	u_int i;
 
 	if (sc->sc_sync.ds_ndisks == 0)
 		return (false);
 	rstart = bp->bio_offset;
 	rend = bp->bio_offset + bp->bio_length;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			continue;
 		for (i = 0; i < g_mirror_syncreqs; i++) {
 			sbp = disk->d_sync.ds_bios[i];
 			if (sbp == NULL)
 				continue;
 			sstart = sbp->bio_offset;
 			send = sbp->bio_offset + sbp->bio_length;
 			if (rend > sstart && rstart < send)
 				return (true);
 		}
 	}
 	return (false);
 }
 
 /*
  * Return TRUE if the given sync request is colliding with a in-progress regular
  * request.
  */
 static bool
 g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
 {
 	off_t rstart, rend, sstart, send;
 	struct bio *bp;
 
 	if (sc->sc_sync.ds_ndisks == 0)
 		return (false);
 	sstart = sbp->bio_offset;
 	send = sbp->bio_offset + sbp->bio_length;
 	TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
 		rstart = bp->bio_offset;
 		rend = bp->bio_offset + bp->bio_length;
 		if (rend > sstart && rstart < send)
 			return (true);
 	}
 	return (false);
 }
 
 /*
  * Puts regular request onto delayed queue.
  */
 static void
 g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
 {
 
 	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
 	TAILQ_INSERT_TAIL(&sc->sc_regular_delayed, bp, bio_queue);
 }
 
 /*
  * Puts synchronization request onto delayed queue.
  */
 static void
 g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
 {
 
 	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
 	TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
 }
 
 /*
  * Requeue delayed regular requests.
  */
 static void
 g_mirror_regular_release(struct g_mirror_softc *sc)
 {
 	struct bio *bp;
 
 	if ((bp = TAILQ_FIRST(&sc->sc_regular_delayed)) == NULL)
 		return;
 	if (g_mirror_sync_collision(sc, bp))
 		return;
 
 	G_MIRROR_DEBUG(2, "Requeuing regular requests after collision.");
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_CONCAT(&sc->sc_regular_delayed, &sc->sc_queue, bio_queue);
 	TAILQ_SWAP(&sc->sc_regular_delayed, &sc->sc_queue, bio, bio_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 }
 
 /*
  * Releases delayed sync requests which don't collide anymore with regular
  * requests.
  */
 static void
 g_mirror_sync_release(struct g_mirror_softc *sc)
 {
 	struct bio *bp, *bp2;
 
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
 		if (g_mirror_regular_collision(sc, bp))
 			continue;
 		TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
 		G_MIRROR_LOGREQ(2, bp,
 		    "Releasing delayed synchronization request.");
 		g_io_request(bp, bp->bio_from);
 	}
 }
 
 /*
  * Free a synchronization request and clear its slot in the array.
  */
 static void
 g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
 {
 	int idx;
 
 	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
 		idx = (int)(uintptr_t)bp->bio_caller1;
 		KASSERT(disk->d_sync.ds_bios[idx] == bp,
 		    ("unexpected sync BIO at %p:%d", disk, idx));
 		disk->d_sync.ds_bios[idx] = NULL;
 	}
 	free(bp->bio_data, M_MIRROR);
 	g_destroy_bio(bp);
 }
 
 /*
  * Handle synchronization requests.
  * Every synchronization request is a two-step process: first, a read request is
  * sent to the mirror provider via the sync consumer. If that request completes
  * successfully, it is converted to a write and sent to the disk being
  * synchronized. If the write also completes successfully, the synchronization
  * offset is advanced and a new read request is submitted.
  */
 static void
 g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct g_mirror_disk_sync *sync;
 
 	KASSERT((bp->bio_cmd == BIO_READ &&
 	    bp->bio_from->geom == sc->sc_sync.ds_geom) ||
 	    (bp->bio_cmd == BIO_WRITE && bp->bio_from->geom == sc->sc_geom),
 	    ("Sync BIO %p with unexpected origin", bp));
 
 	bp->bio_from->index--;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 		g_topology_lock();
 		g_mirror_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 		g_mirror_sync_request_free(NULL, bp);
 		sx_xlock(&sc->sc_lock);
 		return;
 	}
 
 	sync = &disk->d_sync;
 
 	/*
 	 * Synchronization request.
 	 */
 	switch (bp->bio_cmd) {
 	case BIO_READ: {
 		struct g_consumer *cp;
 
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
 		    bp->bio_error);
 
 		if (bp->bio_error != 0) {
 			G_MIRROR_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 
 			/*
 			 * The read error will trigger a syncid bump, so there's
 			 * no need to do that here.
 			 *
 			 * The read error handling for regular requests will
 			 * retry the read from all active mirrors before passing
 			 * the error back up, so there's no need to retry here.
 			 */
 			g_mirror_sync_request_free(disk, bp);
 			g_mirror_event_send(disk,
 			    G_MIRROR_DISK_STATE_DISCONNECTED,
 			    G_MIRROR_EVENT_DONTWAIT);
 			return;
 		}
 		G_MIRROR_LOGREQ(3, bp,
 		    "Synchronization request half-finished.");
 		bp->bio_cmd = BIO_WRITE;
 		bp->bio_cflags = 0;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(bp, cp);
 		return;
 	}
 	case BIO_WRITE: {
 		off_t offset;
 		int i;
 
 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
 		    bp->bio_error);
 
 		if (bp->bio_error != 0) {
 			G_MIRROR_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_mirror_sync_request_free(disk, bp);
 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 			g_mirror_event_send(disk,
 			    G_MIRROR_DISK_STATE_DISCONNECTED,
 			    G_MIRROR_EVENT_DONTWAIT);
 			return;
 		}
 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
 		if (sync->ds_offset >= sc->sc_mediasize ||
 		    sync->ds_consumer == NULL ||
 		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 			/* Don't send more synchronization requests. */
 			sync->ds_inflight--;
 			g_mirror_sync_request_free(disk, bp);
 			if (sync->ds_inflight > 0)
 				return;
 			if (sync->ds_consumer == NULL ||
 			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				return;
 			}
 			/* Disk up-to-date, activate it. */
 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
 			    G_MIRROR_EVENT_DONTWAIT);
 			return;
 		}
 
 		/* Send next synchronization request. */
 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
 		sync->ds_offset += bp->bio_length;
 
 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
 		sync->ds_consumer->index++;
 
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_mirror_regular_collision(sc, bp))
 			g_mirror_sync_delay(sc, bp);
 		else
 			g_io_request(bp, sync->ds_consumer);
 
 		/* Requeue delayed requests if possible. */
 		g_mirror_regular_release(sc);
 
 		/* Find the smallest offset */
 		offset = sc->sc_mediasize;
 		for (i = 0; i < g_mirror_syncreqs; i++) {
 			bp = sync->ds_bios[i];
 			if (bp != NULL && bp->bio_offset < offset)
 				offset = bp->bio_offset;
 		}
 		if (g_mirror_sync_period > 0 &&
 		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
 			sync->ds_offset_done = offset;
 			g_mirror_update_metadata(disk);
 			sync->ds_update_ts = time_uptime;
 		}
 		return;
 	}
 	default:
 		panic("Invalid I/O request %p", bp);
 	}
 }
 
 static void
 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
 			break;
 	}
 	if (disk == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENXIO;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	g_io_request(cbp, cp);
 }
 
 static void
 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	disk = g_mirror_get_disk(sc);
 	if (disk == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENXIO;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	g_io_request(cbp, cp);
 }
 
 #define TRACK_SIZE  (1 * 1024 * 1024)
 #define LOAD_SCALE	256
 #define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
 
 static void
 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk, *dp;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	int prio, best;
 
 	/* Find a disk with the smallest load. */
 	disk = NULL;
 	best = INT_MAX;
 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		prio = dp->load;
 		/* If disk head is precisely in position - highly prefer it. */
 		if (dp->d_last_offset == bp->bio_offset)
 			prio -= 2 * LOAD_SCALE;
 		else
 		/* If disk head is close to position - prefer it. */
 		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
 			prio -= 1 * LOAD_SCALE;
 		if (prio <= best) {
 			disk = dp;
 			best = prio;
 		}
 	}
 	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	/* Remember last head position */
 	disk->d_last_offset = bp->bio_offset + bp->bio_length;
 	/* Update loads. */
 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 		dp->load = (dp->d_consumer->index * LOAD_SCALE +
 		    dp->load * 7) / 8;
 	}
 	g_io_request(cbp, cp);
 }
 
 static void
 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct bio_queue queue;
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	off_t left, mod, offset, slice;
 	u_char *data;
 	u_int ndisks;
 
 	if (bp->bio_length <= sc->sc_slice) {
 		g_mirror_request_round_robin(sc, bp);
 		return;
 	}
 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
 	slice = bp->bio_length / ndisks;
 	mod = slice % sc->sc_provider->sectorsize;
 	if (mod != 0)
 		slice += sc->sc_provider->sectorsize - mod;
 	/*
 	 * Allocate all bios before sending any request, so we can
 	 * return ENOMEM in nice and clean way.
 	 */
 	left = bp->bio_length;
 	offset = bp->bio_offset;
 	data = bp->bio_data;
 	TAILQ_INIT(&queue);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 				TAILQ_REMOVE(&queue, cbp, bio_queue);
 				g_destroy_bio(cbp);
 			}
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 		cbp->bio_done = g_mirror_done;
 		cbp->bio_caller1 = disk;
 		cbp->bio_to = disk->d_consumer->provider;
 		cbp->bio_offset = offset;
 		cbp->bio_data = data;
 		cbp->bio_length = MIN(left, slice);
 		left -= cbp->bio_length;
 		if (left == 0)
 			break;
 		offset += cbp->bio_length;
 		data += cbp->bio_length;
 	}
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 		disk = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		disk->d_consumer->index++;
 		g_io_request(cbp, disk->d_consumer);
 	}
 }
 
 static void
 g_mirror_register_request(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct bio_queue queue;
 	struct bio *cbp;
 	struct g_consumer *cp;
 	struct g_mirror_disk *disk;
 
 	sx_assert(&sc->sc_lock, SA_XLOCKED);
 
 	/*
 	 * To avoid ordering issues, if a write is deferred because of a
 	 * collision with a sync request, all I/O is deferred until that
 	 * write is initiated.
 	 */
 	if (bp->bio_from->geom != sc->sc_sync.ds_geom &&
 	    !TAILQ_EMPTY(&sc->sc_regular_delayed)) {
 		g_mirror_regular_delay(sc, bp);
 		return;
 	}
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		switch (sc->sc_balance) {
 		case G_MIRROR_BALANCE_LOAD:
 			g_mirror_request_load(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_PREFER:
 			g_mirror_request_prefer(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_ROUND_ROBIN:
 			g_mirror_request_round_robin(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_SPLIT:
 			g_mirror_request_split(sc, bp);
 			break;
 		}
 		return;
 	case BIO_WRITE:
 	case BIO_DELETE:
 		/*
 		 * Delay the request if it is colliding with a synchronization
 		 * request.
 		 */
 		if (g_mirror_sync_collision(sc, bp)) {
 			g_mirror_regular_delay(sc, bp);
 			return;
 		}
 
 		if (sc->sc_idle)
 			g_mirror_unidle(sc);
 		else
 			sc->sc_last_write = time_uptime;
 
 		/*
 		 * Bump syncid on first write.
 		 */
 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
 			g_mirror_bump_syncid(sc);
 		}
 
 		/*
 		 * Allocate all bios before sending any request, so we can
 		 * return ENOMEM in nice and clean way.
 		 */
 		TAILQ_INIT(&queue);
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			switch (disk->d_state) {
 			case G_MIRROR_DISK_STATE_ACTIVE:
 				break;
 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 				if (bp->bio_offset >= disk->d_sync.ds_offset)
 					continue;
 				break;
 			default:
 				continue;
 			}
 			if (bp->bio_cmd == BIO_DELETE &&
 			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
 				continue;
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL) {
 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 					TAILQ_REMOVE(&queue, cbp, bio_queue);
 					g_destroy_bio(cbp);
 				}
 				if (bp->bio_error == 0)
 					bp->bio_error = ENOMEM;
 				g_io_deliver(bp, bp->bio_error);
 				return;
 			}
 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 			cbp->bio_done = g_mirror_done;
 			cp = disk->d_consumer;
 			cbp->bio_caller1 = cp;
 			cbp->bio_to = cp->provider;
 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 			    ("Consumer %s not opened (r%dw%de%d).",
 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
 		}
 		if (TAILQ_EMPTY(&queue)) {
 			KASSERT(bp->bio_cmd == BIO_DELETE,
 			    ("No consumers for regular request %p", bp));
 			g_io_deliver(bp, EOPNOTSUPP);
 			return;
 		}
 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 			TAILQ_REMOVE(&queue, cbp, bio_queue);
 			cp = cbp->bio_caller1;
 			cbp->bio_caller1 = NULL;
 			cp->index++;
 			sc->sc_writes++;
 			g_io_request(cbp, cp);
 		}
 		/*
 		 * Put request onto inflight queue, so we can check if new
 		 * synchronization requests don't collide with it.
 		 */
 		TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
 		return;
 	case BIO_FLUSH:
 		TAILQ_INIT(&queue);
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 				continue;
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL) {
 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 					TAILQ_REMOVE(&queue, cbp, bio_queue);
 					g_destroy_bio(cbp);
 				}
 				if (bp->bio_error == 0)
 					bp->bio_error = ENOMEM;
 				g_io_deliver(bp, bp->bio_error);
 				return;
 			}
 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 			cbp->bio_done = g_mirror_done;
 			cbp->bio_caller1 = disk;
 			cbp->bio_to = disk->d_consumer->provider;
 		}
 		KASSERT(!TAILQ_EMPTY(&queue),
 		    ("No consumers for regular request %p", bp));
 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 			TAILQ_REMOVE(&queue, cbp, bio_queue);
 			disk = cbp->bio_caller1;
 			cbp->bio_caller1 = NULL;
 			cp = disk->d_consumer;
 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 			    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 			    cp->acr, cp->acw, cp->ace));
 			cp->index++;
 			g_io_request(cbp, cp);
 		}
 		break;
 	default:
 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
 		    bp->bio_cmd, sc->sc_name));
 		break;
 	}
 }
 
 static int
 g_mirror_can_destroy(struct g_mirror_softc *sc)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	gp = sc->sc_geom;
 	if (gp->softc == NULL)
 		return (1);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
 		return (0);
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_mirror_is_busy(sc, cp))
 			return (0);
 	}
 	gp = sc->sc_sync.ds_geom;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_mirror_is_busy(sc, cp))
 			return (0);
 	}
 	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
 	    sc->sc_name);
 	return (1);
 }
 
 static int
 g_mirror_try_destroy(struct g_mirror_softc *sc)
 {
 
 	if (sc->sc_rootmount != NULL) {
 		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 		    sc->sc_rootmount);
 		root_mount_rel(sc->sc_rootmount);
 		sc->sc_rootmount = NULL;
 	}
 	g_topology_lock();
 	if (!g_mirror_can_destroy(sc)) {
 		g_topology_unlock();
 		return (0);
 	}
 	sc->sc_geom->softc = NULL;
 	sc->sc_sync.ds_geom->softc = NULL;
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
 		g_topology_unlock();
 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
 		    &sc->sc_worker);
 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
 		sx_xunlock(&sc->sc_lock);
 		wakeup(&sc->sc_worker);
 		sc->sc_worker = NULL;
 	} else {
 		g_topology_unlock();
 		g_mirror_destroy_device(sc);
 	}
 	return (1);
 }
 
 /*
  * Worker thread.
  */
 static void
 g_mirror_worker(void *arg)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_event *ep;
 	struct bio *bp;
 	int timeout;
 
 	sc = arg;
 	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
 	thread_unlock(curthread);
 
 	sx_xlock(&sc->sc_lock);
 	for (;;) {
 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
 		/*
 		 * First take a look at events.
 		 * This is important to handle events before any I/O requests.
 		 */
 		ep = g_mirror_event_first(sc);
 		if (ep != NULL) {
 			g_mirror_event_remove(sc, ep);
 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
 				/* Update only device status. */
 				G_MIRROR_DEBUG(3,
 				    "Running event for device %s.",
 				    sc->sc_name);
 				ep->e_error = 0;
 				g_mirror_update_device(sc, true);
 			} else {
 				/* Update disk status. */
 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
 				     g_mirror_get_diskname(ep->e_disk));
 				ep->e_error = g_mirror_update_disk(ep->e_disk,
 				    ep->e_state);
 				if (ep->e_error == 0)
 					g_mirror_update_device(sc, false);
 			}
 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
 				KASSERT(ep->e_error == 0,
 				    ("Error cannot be handled."));
 				g_mirror_event_free(ep);
 			} else {
 				ep->e_flags |= G_MIRROR_EVENT_DONE;
 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
 				    ep);
 				mtx_lock(&sc->sc_events_mtx);
 				wakeup(ep);
 				mtx_unlock(&sc->sc_events_mtx);
 			}
 			if ((sc->sc_flags &
 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				if (g_mirror_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_MIRROR_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 			}
 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
 			continue;
 		}
 
 		/*
 		 * Check if we can mark array as CLEAN and if we can't take
 		 * how much seconds should we wait.
 		 */
 		timeout = g_mirror_idle(sc, -1);
 
 		/*
 		 * Handle I/O requests.
 		 */
 		mtx_lock(&sc->sc_queue_mtx);
 		bp = TAILQ_FIRST(&sc->sc_queue);
 		if (bp != NULL)
 			TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
 		else {
 			if ((sc->sc_flags &
 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				if (g_mirror_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_MIRROR_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 				mtx_lock(&sc->sc_queue_mtx);
 				if (!TAILQ_EMPTY(&sc->sc_queue)) {
 					mtx_unlock(&sc->sc_queue_mtx);
 					continue;
 				}
 			}
 			if (g_mirror_event_first(sc) != NULL) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				continue;
 			}
 			sx_xunlock(&sc->sc_lock);
 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
 			    timeout * hz);
 			sx_xlock(&sc->sc_lock);
 			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
 			continue;
 		}
 		mtx_unlock(&sc->sc_queue_mtx);
 
 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
 		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
 			/*
 			 * Handle completion of the first half (the read) of a
 			 * block synchronization operation.
 			 */
 			g_mirror_sync_request(sc, bp);
 		} else if (bp->bio_to != sc->sc_provider) {
 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
 				/*
 				 * Handle completion of a regular I/O request.
 				 */
 				g_mirror_regular_request(sc, bp);
 			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
 				/*
 				 * Handle completion of the second half (the
 				 * write) of a block synchronization operation.
 				 */
 				g_mirror_sync_request(sc, bp);
 			else {
 				KASSERT(0,
 				    ("Invalid request cflags=0x%hx to=%s.",
 				    bp->bio_cflags, bp->bio_to->name));
 			}
 		} else {
 			/*
 			 * Initiate an I/O request.
 			 */
 			g_mirror_register_request(sc, bp);
 		}
 		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
 	}
 }
 
 static void
 g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
 {
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 	} else if (sc->sc_idle &&
 	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 	}
 }
 
 static void
 g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp,
     off_t offset)
 {
 	void *data;
 	int idx;
 
 	data = bp->bio_data;
 	idx = (int)(uintptr_t)bp->bio_caller1;
 	g_reset_bio(bp);
 
 	bp->bio_cmd = BIO_READ;
 	bp->bio_data = data;
 	bp->bio_done = g_mirror_sync_done;
 	bp->bio_from = disk->d_sync.ds_consumer;
 	bp->bio_to = disk->d_softc->sc_provider;
 	bp->bio_caller1 = (void *)(uintptr_t)idx;
 	bp->bio_offset = offset;
 	bp->bio_length = MIN(MAXPHYS,
 	    disk->d_softc->sc_mediasize - bp->bio_offset);
 }
 
 static void
 g_mirror_sync_start(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk_sync *sync;
 	struct g_consumer *cp;
 	struct bio *bp;
 	int error, i;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sync = &disk->d_sync;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 	    ("Disk %s is not marked for synchronization.",
 	    g_mirror_get_diskname(disk)));
 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
 	    sc->sc_state));
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	cp = g_new_consumer(sc->sc_sync.ds_geom);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, sc->sc_provider);
 	KASSERT(error == 0,
 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
 	error = g_access(cp, 1, 0, 0);
 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
 	    g_mirror_get_diskname(disk));
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 	KASSERT(sync->ds_consumer == NULL,
 	    ("Sync consumer already exists (device=%s, disk=%s).",
 	    sc->sc_name, g_mirror_get_diskname(disk)));
 
 	sync->ds_consumer = cp;
 	sync->ds_consumer->private = disk;
 	sync->ds_consumer->index = 0;
 
 	/*
 	 * Allocate memory for synchronization bios and initialize them.
 	 */
 	sync->ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
 	    M_MIRROR, M_WAITOK);
 	for (i = 0; i < g_mirror_syncreqs; i++) {
 		bp = g_alloc_bio();
 		sync->ds_bios[i] = bp;
 
 		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
 		bp->bio_caller1 = (void *)(uintptr_t)i;
 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
 		sync->ds_offset += bp->bio_length;
 	}
 
 	/* Increase the number of disks in SYNCHRONIZING state. */
 	sc->sc_sync.ds_ndisks++;
 	/* Set the number of in-flight synchronization requests. */
 	sync->ds_inflight = g_mirror_syncreqs;
 
 	/*
 	 * Fire off first synchronization requests.
 	 */
 	for (i = 0; i < g_mirror_syncreqs; i++) {
 		bp = sync->ds_bios[i];
 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
 		sync->ds_consumer->index++;
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_mirror_regular_collision(sc, bp))
 			g_mirror_sync_delay(sc, bp);
 		else
 			g_io_request(bp, sync->ds_consumer);
 	}
 }
 
 /*
  * Stop synchronization process.
  * type: 0 - synchronization finished
  *       1 - synchronization stopped
  */
 static void
 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
 {
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 	    g_mirror_disk_state2str(disk->d_state)));
 	if (disk->d_sync.ds_consumer == NULL)
 		return;
 
 	if (type == 0) {
 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 	} else /* if (type == 1) */ {
 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 	}
 	g_mirror_regular_release(sc);
 	free(disk->d_sync.ds_bios, M_MIRROR);
 	disk->d_sync.ds_bios = NULL;
 	cp = disk->d_sync.ds_consumer;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 	sc->sc_sync.ds_ndisks--;
 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 	g_topology_lock();
 	g_mirror_kill_consumer(sc, cp);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 }
 
 static void
 g_mirror_launch_provider(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct g_provider *pp, *dp;
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	g_topology_lock();
 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
 	pp->flags |= G_PF_DIRECT_RECEIVE;
 	pp->mediasize = sc->sc_mediasize;
 	pp->sectorsize = sc->sc_sectorsize;
 	pp->stripesize = 0;
 	pp->stripeoffset = 0;
 
 	/* Splitting of unmapped BIO's could work but isn't implemented now */
 	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
 		pp->flags |= G_PF_ACCEPT_UNMAPPED;
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_consumer && disk->d_consumer->provider) {
 			dp = disk->d_consumer->provider;
 			if (dp->stripesize > pp->stripesize) {
 				pp->stripesize = dp->stripesize;
 				pp->stripeoffset = dp->stripeoffset;
 			}
 			/* A provider underneath us doesn't support unmapped */
 			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
 				G_MIRROR_DEBUG(0, "Cancelling unmapped "
 				    "because of %s.", dp->name);
 				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
 			}
 		}
 	}
 	pp->private = sc;
 	sc->sc_refcnt++;
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 	g_topology_unlock();
 	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			g_mirror_sync_start(disk);
 	}
 }
 
 static void
 g_mirror_destroy_provider(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct bio *bp;
 
 	g_topology_assert_not();
 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
 	    sc->sc_name));
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			g_mirror_sync_stop(disk, 1);
 	}
 
 	g_topology_lock();
 	g_error_provider(sc->sc_provider, ENXIO);
 	mtx_lock(&sc->sc_queue_mtx);
 	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
 		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
 		/*
 		 * Abort any pending I/O that wasn't generated by us.
 		 * Synchronization requests and requests destined for individual
 		 * mirror components can be destroyed immediately.
 		 */
 		if (bp->bio_to == sc->sc_provider &&
 		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
 			g_io_deliver(bp, ENXIO);
 		} else {
 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
 				free(bp->bio_data, M_MIRROR);
 			g_destroy_bio(bp);
 		}
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	g_wither_provider(sc->sc_provider, ENXIO);
 	sc->sc_provider = NULL;
 	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
 	g_topology_unlock();
 }
 
 static void
 g_mirror_go(void *arg)
 {
 	struct g_mirror_softc *sc;
 
 	sc = arg;
 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
 	g_mirror_event_send(sc, 0,
 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
 }
 
 static u_int
 g_mirror_determine_state(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	u_int state;
 
 	sc = disk->d_softc;
 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
 		if ((disk->d_flags &
 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
 		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
 		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
 			/* Disk does not need synchronization. */
 			state = G_MIRROR_DISK_STATE_ACTIVE;
 		} else {
 			if ((sc->sc_flags &
 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 			    (disk->d_flags &
 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
 				/*
 				 * We can start synchronization from
 				 * the stored offset.
 				 */
 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
 			} else {
 				state = G_MIRROR_DISK_STATE_STALE;
 			}
 		}
 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
 		/*
 		 * Reset all synchronization data for this disk,
 		 * because if it even was synchronized, it was
 		 * synchronized to disks with different syncid.
 		 */
 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		disk->d_sync.ds_syncid = sc->sc_syncid;
 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
 		} else {
 			state = G_MIRROR_DISK_STATE_STALE;
 		}
 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
 		/*
 		 * Not good, NOT GOOD!
 		 * It means that mirror was started on stale disks
 		 * and more fresh disk just arrive.
 		 * If there were writes, mirror is broken, sorry.
 		 * I think the best choice here is don't touch
 		 * this disk and inform the user loudly.
 		 */
 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
 		    "disk (%s) arrives!! It will not be connected to the "
 		    "running device.", sc->sc_name,
 		    g_mirror_get_diskname(disk));
 		g_mirror_destroy_disk(disk);
 		state = G_MIRROR_DISK_STATE_NONE;
 		/* Return immediately, because disk was destroyed. */
 		return (state);
 	}
 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
 	return (state);
 }
 
 /*
  * Update device state.
  */
 static void
 g_mirror_update_device(struct g_mirror_softc *sc, bool force)
 {
 	struct g_mirror_disk *disk;
 	u_int state;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	switch (sc->sc_state) {
 	case G_MIRROR_DEVICE_STATE_STARTING:
 	    {
 		struct g_mirror_disk *pdisk, *tdisk;
 		u_int dirty, ndisks, genid, syncid;
 		bool broken;
 
 		KASSERT(sc->sc_provider == NULL,
 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
 		/*
 		 * Are we ready? We are, if all disks are connected or
 		 * if we have any disks and 'force' is true.
 		 */
 		ndisks = g_mirror_ndisks(sc, -1);
 		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
 			;
 		} else if (ndisks == 0) {
 			/*
 			 * Disks went down in starting phase, so destroy
 			 * device.
 			 */
 			callout_drain(&sc->sc_callout);
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 			    sc->sc_rootmount);
 			root_mount_rel(sc->sc_rootmount);
 			sc->sc_rootmount = NULL;
 			return;
 		} else {
 			return;
 		}
 
 		/*
 		 * Activate all disks with the biggest syncid.
 		 */
 		if (force) {
 			/*
 			 * If 'force' is true, we have been called due to
 			 * timeout, so don't bother canceling timeout.
 			 */
 			ndisks = 0;
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
 					ndisks++;
 				}
 			}
 			if (ndisks == 0) {
 				/* No valid disks found, destroy device. */
 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 				return;
 			}
 		} else {
 			/* Cancel timeout. */
 			callout_drain(&sc->sc_callout);
 		}
 
 		/*
 		 * Find the biggest genid.
 		 */
 		genid = 0;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_genid > genid)
 				genid = disk->d_genid;
 		}
 		sc->sc_genid = genid;
 		/*
 		 * Remove all disks without the biggest genid.
 		 */
 		broken = false;
 		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
 			if (disk->d_genid < genid) {
 				G_MIRROR_DEBUG(0,
 				    "Component %s (device %s) broken, skipping.",
 				    g_mirror_get_diskname(disk), sc->sc_name);
 				g_mirror_destroy_disk(disk);
 				/*
 				 * Bump the syncid in case we discover a healthy
 				 * replacement disk after starting the mirror.
 				 */
 				broken = true;
 			}
 		}
 
 		/*
 		 * Find the biggest syncid.
 		 */
 		syncid = 0;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_sync.ds_syncid > syncid)
 				syncid = disk->d_sync.ds_syncid;
 		}
 
 		/*
 		 * Here we need to look for dirty disks and if all disks
 		 * with the biggest syncid are dirty, we have to choose
 		 * one with the biggest priority and rebuild the rest.
 		 */
 		/*
 		 * Find the number of dirty disks with the biggest syncid.
 		 * Find the number of disks with the biggest syncid.
 		 * While here, find a disk with the biggest priority.
 		 */
 		dirty = ndisks = 0;
 		pdisk = NULL;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_sync.ds_syncid != syncid)
 				continue;
 			if ((disk->d_flags &
 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 				continue;
 			}
 			ndisks++;
 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
 				dirty++;
 				if (pdisk == NULL ||
 				    pdisk->d_priority < disk->d_priority) {
 					pdisk = disk;
 				}
 			}
 		}
 		if (dirty == 0) {
 			/* No dirty disks at all, great. */
 		} else if (dirty == ndisks) {
 			/*
 			 * Force synchronization for all dirty disks except one
 			 * with the biggest priority.
 			 */
 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
 			    "master disk for synchronization.",
 			    g_mirror_get_diskname(pdisk), sc->sc_name);
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_sync.ds_syncid != syncid)
 					continue;
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 					continue;
 				}
 				KASSERT((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
 				    ("Disk %s isn't marked as dirty.",
 				    g_mirror_get_diskname(disk)));
 				/* Skip the disk with the biggest priority. */
 				if (disk == pdisk)
 					continue;
 				disk->d_sync.ds_syncid = 0;
 			}
 		} else if (dirty < ndisks) {
 			/*
 			 * Force synchronization for all dirty disks.
 			 * We have some non-dirty disks.
 			 */
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_sync.ds_syncid != syncid)
 					continue;
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 					continue;
 				}
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
 					continue;
 				}
 				disk->d_sync.ds_syncid = 0;
 			}
 		}
 
 		/* Reset hint. */
 		sc->sc_hint = NULL;
 		sc->sc_syncid = syncid;
 		if (force || broken) {
 			/* Remember to bump syncid on first write. */
 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		}
 		state = G_MIRROR_DEVICE_STATE_RUNNING;
 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_device_state2str(state));
 		sc->sc_state = state;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			state = g_mirror_determine_state(disk);
 			g_mirror_event_send(disk, state,
 			    G_MIRROR_EVENT_DONTWAIT);
 			if (state == G_MIRROR_DISK_STATE_STALE)
 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		}
 		break;
 	    }
 	case G_MIRROR_DEVICE_STATE_RUNNING:
 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
 			/*
 			 * No usable disks, so destroy the device.
 			 */
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 			break;
 		} else if (g_mirror_ndisks(sc,
 		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
 			/*
 			 * We have active disks, launch provider if it doesn't
 			 * exist.
 			 */
 			if (sc->sc_provider == NULL)
 				g_mirror_launch_provider(sc);
 			if (sc->sc_rootmount != NULL) {
 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 			}
 		}
 		/*
 		 * Genid should be bumped immediately, so do it here.
 		 */
 		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
 			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
 			g_mirror_bump_genid(sc);
 		}
 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
 			g_mirror_bump_syncid(sc);
 		}
 		break;
 	default:
 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
 		break;
 	}
 }
 
 /*
  * Update disk state and device state if needed.
  */
 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
 	"Disk %s state changed from %s to %s (device %s).",		\
 	g_mirror_get_diskname(disk),					\
 	g_mirror_disk_state2str(disk->d_state),				\
 	g_mirror_disk_state2str(state), sc->sc_name)
 static int
 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
 {
 	struct g_mirror_softc *sc;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 again:
 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
 	    g_mirror_disk_state2str(state));
 	switch (state) {
 	case G_MIRROR_DISK_STATE_NEW:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk arrive.
 		 */
 		/* Previous state should be NONE. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_state = state;
 		if (LIST_EMPTY(&sc->sc_disks))
 			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
 		else {
 			struct g_mirror_disk *dp;
 
 			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 				if (disk->d_priority >= dp->d_priority) {
 					LIST_INSERT_BEFORE(dp, disk, d_next);
 					dp = NULL;
 					break;
 				}
 				if (LIST_NEXT(dp, d_next) == NULL)
 					break;
 			}
 			if (dp != NULL)
 				LIST_INSERT_AFTER(dp, disk, d_next);
 		}
 		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
 			break;
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		state = g_mirror_determine_state(disk);
 		if (state != G_MIRROR_DISK_STATE_NONE)
 			goto again;
 		break;
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk does not need synchronization.
 		 * 2. Synchronization process finished successfully.
 		 */
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		/* Previous state should be NEW or SYNCHRONIZING. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
 			g_mirror_sync_stop(disk, 0);
 		}
 		disk->d_state = state;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		g_mirror_update_idle(sc, disk);
 		g_mirror_update_metadata(disk);
 		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		break;
 	case G_MIRROR_DISK_STATE_STALE:
 		/*
 		 * Possible scenarios:
 		 * 1. Stale disk was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		/*
 		 * STALE state is only possible if device is marked
 		 * NOAUTOSYNC.
 		 */
 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		g_mirror_update_metadata(disk);
 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		break;
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		/*
 		 * Possible scenarios:
 		 * 1. Disk which needs synchronization was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		if (sc->sc_provider != NULL) {
 			g_mirror_sync_start(disk);
 			g_mirror_update_metadata(disk);
 		}
 		break;
 	case G_MIRROR_DISK_STATE_DISCONNECTED:
 		/*
 		 * Possible scenarios:
 		 * 1. Device wasn't running yet, but disk disappear.
 		 * 2. Disk was active and disapppear.
 		 * 3. Disk disappear during synchronization process.
 		 */
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
 			/*
 			 * Previous state should be ACTIVE, STALE or
 			 * SYNCHRONIZING.
 			 */
 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 			    ("Wrong disk state (%s, %s).",
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
 			/* Previous state should be NEW. */
 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 			    ("Wrong disk state (%s, %s).",
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 			/*
 			 * Reset bumping syncid if disk disappeared in STARTING
 			 * state.
 			 */
 			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
 				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
 #ifdef	INVARIANTS
 		} else {
 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
 			    sc->sc_name,
 			    g_mirror_device_state2str(sc->sc_state),
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 #endif
 		}
 		DISK_STATE_CHANGED();
 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 
 		g_mirror_destroy_disk(disk);
 		break;
 	case G_MIRROR_DISK_STATE_DESTROY:
 	    {
 		int error;
 
 		error = g_mirror_clear_metadata(disk);
 		if (error != 0) {
 			G_MIRROR_DEBUG(0,
 			    "Device %s: failed to clear metadata on %s: %d.",
 			    sc->sc_name, g_mirror_get_diskname(disk), error);
 			break;
 		}
 		DISK_STATE_CHANGED();
 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 
 		g_mirror_destroy_disk(disk);
 		sc->sc_ndisks--;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			g_mirror_update_metadata(disk);
 		}
 		break;
 	    }
 	default:
 		KASSERT(1 == 0, ("Unknown state (%u).", state));
 		break;
 	}
 	return (0);
 }
 #undef	DISK_STATE_CHANGED
 
 int
 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* Metadata are stored on last sector. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    cp->provider->name, error);
 		return (error);
 	}
 
 	/* Decode metadata. */
 	error = mirror_metadata_decode(buf, md);
 	g_free(buf);
 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
 		return (EINVAL);
 	if (md->md_version > G_MIRROR_VERSION) {
 		G_MIRROR_DEBUG(0,
 		    "Kernel module is too old to handle metadata from %s.",
 		    cp->provider->name);
 		return (EINVAL);
 	}
 	if (error != 0) {
 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
 		    cp->provider->name);
 		return (error);
 	}
 
 	return (0);
 }
 
 static int
 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md)
 {
 
 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
 		    pp->name, md->md_did);
 		return (EEXIST);
 	}
 	if (md->md_all != sc->sc_ndisks) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_all", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_slice != sc->sc_slice) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_slice", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_balance != sc->sc_balance) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_balance", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 #if 0
 	if (md->md_mediasize != sc->sc_mediasize) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_mediasize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 #endif
 	if (sc->sc_mediasize > pp->mediasize) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
 		    sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_sectorsize != sc->sc_sectorsize) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_sectorsize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid sector size of disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid device flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid disk flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md)
 {
 	struct g_mirror_disk *disk;
 	int error;
 
 	g_topology_assert_not();
 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
 
 	error = g_mirror_check_metadata(sc, pp, md);
 	if (error != 0)
 		return (error);
 	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
 	    md->md_genid < sc->sc_genid) {
 		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	disk = g_mirror_init_disk(sc, pp, md, &error);
 	if (disk == NULL)
 		return (error);
 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
 	    G_MIRROR_EVENT_WAIT);
 	if (error != 0)
 		return (error);
 	if (md->md_version < G_MIRROR_VERSION) {
 		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
 		    pp->name, md->md_version, G_MIRROR_VERSION);
 		g_mirror_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_mirror_destroy_delayed(void *arg, int flag)
 {
 	struct g_mirror_softc *sc;
 	int error;
 
 	if (flag == EV_CANCEL) {
 		G_MIRROR_DEBUG(1, "Destroying canceled.");
 		return;
 	}
 	sc = arg;
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
 	    ("DESTROY flag set on %s.", sc->sc_name));
 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
 	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
 	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
 	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
 	if (error != 0) {
 		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
 		    sc->sc_name, error);
 		sx_xunlock(&sc->sc_lock);
 	}
 	g_topology_lock();
 }
 
 static int
 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
 {
 	struct g_mirror_softc *sc;
 	int error = 0;
 
 	g_topology_assert();
 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
 	    acw, ace);
 
 	sc = pp->private;
 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
 	    LIST_EMPTY(&sc->sc_disks)) {
 		if (acr > 0 || acw > 0 || ace > 0)
 			error = ENXIO;
 		goto end;
 	}
 	sc->sc_provider_open += acr + acw + ace;
 	if (pp->acw + acw == 0)
 		g_mirror_idle(sc, 0);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
 	    sc->sc_provider_open == 0)
 		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
 end:
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 struct g_geom *
 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
     u_int type)
 {
 	struct g_mirror_softc *sc;
 	struct g_geom *gp;
 	int error, timeout;
 
 	g_topology_assert();
 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
 	    md->md_mid);
 
 	/* One disk is minimum. */
 	if (md->md_all < 1)
 		return (NULL);
 	/*
 	 * Action geom.
 	 */
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
 	gp->start = g_mirror_start;
 	gp->orphan = g_mirror_orphan;
 	gp->access = g_mirror_access;
 	gp->dumpconf = g_mirror_dumpconf;
 
 	sc->sc_type = type;
 	sc->sc_id = md->md_mid;
 	sc->sc_slice = md->md_slice;
 	sc->sc_balance = md->md_balance;
 	sc->sc_mediasize = md->md_mediasize;
 	sc->sc_sectorsize = md->md_sectorsize;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_flags = md->md_mflags;
 	sc->sc_bump_id = 0;
 	sc->sc_idle = 1;
 	sc->sc_last_write = time_uptime;
 	sc->sc_writes = 0;
 	sc->sc_refcnt = 1;
 	sx_init(&sc->sc_lock, "gmirror:lock");
 	TAILQ_INIT(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
 	TAILQ_INIT(&sc->sc_regular_delayed);
 	TAILQ_INIT(&sc->sc_inflight);
 	TAILQ_INIT(&sc->sc_sync_delayed);
 	LIST_INIT(&sc->sc_disks);
 	TAILQ_INIT(&sc->sc_events);
 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
 	callout_init(&sc->sc_callout, 1);
 	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 	sc->sc_provider_open = 0;
 	/*
 	 * Synchronization geom.
 	 */
 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
 	gp->softc = sc;
 	gp->orphan = g_mirror_orphan;
 	sc->sc_sync.ds_geom = gp;
 	sc->sc_sync.ds_ndisks = 0;
 	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
 	    "g_mirror %s", md->md_name);
 	if (error != 0) {
 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
 		    sc->sc_name);
 		g_destroy_geom(sc->sc_sync.ds_geom);
 		g_destroy_geom(sc->sc_geom);
 		g_mirror_free_device(sc);
 		return (NULL);
 	}
 
 	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
 
 	sc->sc_rootmount = root_mount_hold("GMIRROR");
 	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
 	/*
 	 * Run timeout.
 	 */
 	timeout = g_mirror_timeout * hz;
 	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
 	return (sc->sc_geom);
 }
 
 int
 g_mirror_destroy(struct g_mirror_softc *sc, int how)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (sc->sc_provider_open != 0) {
 		switch (how) {
 		case G_MIRROR_DESTROY_SOFT:
 			G_MIRROR_DEBUG(1,
 			    "Device %s is still open (%d).", sc->sc_name,
 			    sc->sc_provider_open);
 			return (EBUSY);
 		case G_MIRROR_DESTROY_DELAYED:
 			G_MIRROR_DEBUG(1,
 			    "Device %s will be destroyed on last close.",
 			    sc->sc_name);
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_state ==
 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 					g_mirror_sync_stop(disk, 1);
 				}
 			}
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
 			return (EBUSY);
 		case G_MIRROR_DESTROY_HARD:
 			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
 			    "can't be definitely removed.", sc->sc_name);
 		}
 	}
 
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 		sx_xunlock(&sc->sc_lock);
 		return (0);
 	}
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	sx_xunlock(&sc->sc_lock);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
 	while (sc->sc_worker != NULL)
 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
 	sx_xlock(&sc->sc_lock);
 	g_mirror_destroy_device(sc);
 	return (0);
 }
 
 static void
 g_mirror_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_mirror_metadata md;
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "mirror:taste");
 	/*
 	 * This orphan function should be never called.
 	 */
 	gp->orphan = g_mirror_taste_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_mirror_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
 		return (NULL);
 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
 		G_MIRROR_DEBUG(0,
 		    "Device %s: provider %s marked as inactive, skipping.",
 		    md.md_name, pp->name);
 		return (NULL);
 	}
 	if (g_mirror_debug >= 2)
 		mirror_metadata_dump(&md);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
 			continue;
 		if (sc->sc_sync.ds_geom == gp)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_mid != sc->sc_id) {
 			G_MIRROR_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 		break;
 	}
 	if (gp == NULL) {
 		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
 		if (gp == NULL) {
 			G_MIRROR_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 	}
 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
 	error = g_mirror_add_disk(sc, pp, &md);
 	if (error != 0) {
 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 		    pp->name, gp->name, error);
 		if (LIST_EMPTY(&sc->sc_disks)) {
 			g_cancel_event(sc);
 			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
 			g_topology_lock();
 			return (NULL);
 		}
 		gp = NULL;
 	}
 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
 		g_topology_lock();
 		return (NULL);
 	}
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (gp);
 }
 
 static void
 g_mirror_resize(struct g_consumer *cp)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
 
 	disk = cp->private;
 	if (disk == NULL)
 		return;
 	g_topology_unlock();
 	g_mirror_update_metadata(disk);
 	g_topology_lock();
 }
 
 static int
 g_mirror_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_mirror_softc *sc;
 	int error;
 
 	g_topology_unlock();
 	sc = gp->softc;
 	sx_xlock(&sc->sc_lock);
 	g_cancel_event(sc);
 	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
 	if (error != 0)
 		sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 static void
 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_mirror_softc *sc;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	/* Skip synchronization geom. */
 	if (gp == sc->sc_sync.ds_geom)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		struct g_mirror_disk *disk;
 
 		disk = cp->private;
 		if (disk == NULL)
 			return;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			sbuf_printf(sb, "%s<Synchronized>", indent);
 			if (disk->d_sync.ds_offset == 0)
 				sbuf_printf(sb, "0%%");
 			else {
 				sbuf_printf(sb, "%u%%",
 				    (u_int)((disk->d_sync.ds_offset * 100) /
 				    sc->sc_provider->mediasize));
 			}
 			sbuf_printf(sb, "</Synchronized>\n");
 			if (disk->d_sync.ds_offset > 0) {
 				sbuf_printf(sb, "%s<BytesSynced>%jd"
 				    "</BytesSynced>\n", indent,
 				    (intmax_t)disk->d_sync.ds_offset);
 			}
 		}
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
 		    disk->d_sync.ds_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
 		    disk->d_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (disk->d_flags == 0)
 			sbuf_printf(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((disk->d_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
 			    "SYNCHRONIZING");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
 #undef	ADD_FLAG
 		}
 		sbuf_printf(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
 		    disk->d_priority);
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_mirror_disk_state2str(disk->d_state));
 		sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	} else {
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		sbuf_printf(sb, "%s<Type>", indent);
 		switch (sc->sc_type) {
 		case G_MIRROR_TYPE_AUTOMATIC:
 			sbuf_printf(sb, "AUTOMATIC");
 			break;
 		case G_MIRROR_TYPE_MANUAL:
 			sbuf_printf(sb, "MANUAL");
 			break;
 		default:
 			sbuf_printf(sb, "UNKNOWN");
 			break;
 		}
 		sbuf_printf(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (sc->sc_flags == 0)
 			sbuf_printf(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((sc->sc_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
 #undef	ADD_FLAG
 		}
 		sbuf_printf(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
 		    (u_int)sc->sc_slice);
 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
 		    balance_name(sc->sc_balance));
 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
 		    sc->sc_ndisks);
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
 			sbuf_printf(sb, "%s", "STARTING");
 		else if (sc->sc_ndisks ==
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
 			sbuf_printf(sb, "%s", "COMPLETE");
 		else
 			sbuf_printf(sb, "%s", "DEGRADED");
 		sbuf_printf(sb, "</State>\n");
 		sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 }
 
 static void
 g_mirror_shutdown_post_sync(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_mirror_softc *sc;
 	int error;
 
 	if (panicstr != NULL)
 		return;
 
 	mp = arg;
 	g_topology_lock();
 	g_mirror_shutdown = 1;
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if ((sc = gp->softc) == NULL)
 			continue;
 		/* Skip synchronization geom. */
 		if (gp == sc->sc_sync.ds_geom)
 			continue;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		g_mirror_idle(sc, -1);
 		g_cancel_event(sc);
 		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
 		if (error != 0)
 			sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 	g_topology_unlock();
 }
 
 static void
 g_mirror_init(struct g_class *mp)
 {
 
 	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
 	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_mirror_post_sync == NULL)
 		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_mirror_fini(struct g_class *mp)
 {
 
 	if (g_mirror_post_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
 }
 
 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
+MODULE_VERSION(geom_mirror, 0);
Index: stable/11/sys/geom/mountver/g_mountver.c
===================================================================
--- stable/11/sys/geom/mountver/g_mountver.c	(revision 332639)
+++ stable/11/sys/geom/mountver/g_mountver.c	(revision 332640)
@@ -1,660 +1,661 @@
 /*-
  * Copyright (c) 2010 Edward Tomasz Napierala <trasz@FreeBSD.org>
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/disk.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/eventhandler.h>
 #include <geom/geom.h>
 #include <geom/mountver/g_mountver.h>
 
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, mountver, CTLFLAG_RW,
     0, "GEOM_MOUNTVER stuff");
 static u_int g_mountver_debug = 0;
 static u_int g_mountver_check_ident = 1;
 SYSCTL_UINT(_kern_geom_mountver, OID_AUTO, debug, CTLFLAG_RW,
     &g_mountver_debug, 0, "Debug level");
 SYSCTL_UINT(_kern_geom_mountver, OID_AUTO, check_ident, CTLFLAG_RW,
     &g_mountver_check_ident, 0, "Check disk ident when reattaching");
 
 static eventhandler_tag g_mountver_pre_sync = NULL;
 
 static void g_mountver_queue(struct bio *bp);
 static void g_mountver_orphan(struct g_consumer *cp);
 static void g_mountver_resize(struct g_consumer *cp);
 static int g_mountver_destroy(struct g_geom *gp, boolean_t force);
 static g_taste_t g_mountver_taste;
 static int g_mountver_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static void g_mountver_config(struct gctl_req *req, struct g_class *mp,
     const char *verb);
 static void g_mountver_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 static void g_mountver_init(struct g_class *mp);
 static void g_mountver_fini(struct g_class *mp);
 
 struct g_class g_mountver_class = {
 	.name = G_MOUNTVER_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_mountver_config,
 	.taste = g_mountver_taste,
 	.destroy_geom = g_mountver_destroy_geom,
 	.init = g_mountver_init,
 	.fini = g_mountver_fini
 };
 
 static void
 g_mountver_done(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct bio *pbp;
 
 	if (bp->bio_error != ENXIO) {
 		g_std_done(bp);
 		return;
 	}
 
 	/*
 	 * When the device goes away, it's possible that few requests
 	 * will be completed with ENXIO before g_mountver_orphan()
 	 * gets called.  To work around that, we have to queue requests
 	 * that failed with ENXIO, in order to send them later.
 	 */
 	gp = bp->bio_from->geom;
 
 	pbp = bp->bio_parent;
 	KASSERT(pbp->bio_to == LIST_FIRST(&gp->provider),
 	    ("parent request was for someone else"));
 	g_destroy_bio(bp);
 	pbp->bio_inbed++;
 	g_mountver_queue(pbp);
 }
 
 static void
 g_mountver_send(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct bio *cbp;
 
 	gp = bp->bio_to->geom;
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 
 	cbp->bio_done = g_mountver_done;
 	g_io_request(cbp, LIST_FIRST(&gp->consumer));
 }
 
 static void
 g_mountver_queue(struct bio *bp)
 {
 	struct g_mountver_softc *sc;
 	struct g_geom *gp;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 
 	mtx_lock(&sc->sc_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_mountver_send_queued(struct g_geom *gp)
 {
 	struct g_mountver_softc *sc;
 	struct bio *bp;
 
 	sc = gp->softc;
 
 	mtx_lock(&sc->sc_mtx);
 	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
 		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
 		G_MOUNTVER_LOGREQ(bp, "Sending queued request.");
 		g_mountver_send(bp);
 	}
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_mountver_discard_queued(struct g_geom *gp)
 {
 	struct g_mountver_softc *sc;
 	struct bio *bp;
 
 	sc = gp->softc;
 
 	mtx_lock(&sc->sc_mtx);
 	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
 		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
 		G_MOUNTVER_LOGREQ(bp, "Discarding queued request.");
 		g_io_deliver(bp, ENXIO);
 	}
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_mountver_start(struct bio *bp)
 {
 	struct g_mountver_softc *sc;
 	struct g_geom *gp;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 	G_MOUNTVER_LOGREQ(bp, "Request received.");
 
 	/*
 	 * It is possible that some bios were returned with ENXIO, even though
 	 * orphaning didn't happen yet.  In that case, queue all subsequent
 	 * requests in order to maintain ordering.
 	 */
 	if (sc->sc_orphaned || !TAILQ_EMPTY(&sc->sc_queue)) {
 		if (sc->sc_shutting_down) {
 			G_MOUNTVER_LOGREQ(bp, "Discarding request due to shutdown.");
 			g_io_deliver(bp, ENXIO);
 			return;
 		}
 		G_MOUNTVER_LOGREQ(bp, "Queueing request.");
 		g_mountver_queue(bp);
 		if (!sc->sc_orphaned)
 			g_mountver_send_queued(gp);
 	} else {
 		G_MOUNTVER_LOGREQ(bp, "Sending request.");
 		g_mountver_send(bp);
 	}
 }
 
 static int
 g_mountver_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_mountver_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	sc = gp->softc;
 	if (sc == NULL && dr <= 0 && dw <= 0 && de <= 0)
 		return (0);
 	KASSERT(sc != NULL, ("Trying to access withered provider \"%s\".", pp->name));
 
 	sc->sc_access_r += dr;
 	sc->sc_access_w += dw;
 	sc->sc_access_e += de;
 
 	if (sc->sc_orphaned)
 		return (0);
 
 	return (g_access(cp, dr, dw, de));
 }
 
 static int
 g_mountver_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp)
 {
 	struct g_mountver_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *newpp;
 	struct g_consumer *cp;
 	char name[64];
 	int error;
 	int identsize = DISK_IDENT_SIZE;
 
 	g_topology_assert();
 
 	gp = NULL;
 	newpp = NULL;
 	cp = NULL;
 
 	snprintf(name, sizeof(name), "%s%s", pp->name, G_MOUNTVER_SUFFIX);
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (strcmp(gp->name, name) == 0) {
 			gctl_error(req, "Provider %s already exists.", name);
 			return (EEXIST);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", name);
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	mtx_init(&sc->sc_mtx, "gmountver", NULL, MTX_DEF | MTX_RECURSE);
 	TAILQ_INIT(&sc->sc_queue);
 	sc->sc_provider_name = strdup(pp->name, M_GEOM);
 	gp->softc = sc;
 	gp->start = g_mountver_start;
 	gp->orphan = g_mountver_orphan;
 	gp->resize = g_mountver_resize;
 	gp->access = g_mountver_access;
 	gp->dumpconf = g_mountver_dumpconf;
 
 	newpp = g_new_providerf(gp, "%s", gp->name);
 	newpp->mediasize = pp->mediasize;
 	newpp->sectorsize = pp->sectorsize;
 	newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
 
 	if ((pp->flags & G_PF_ACCEPT_UNMAPPED) != 0) {
 		G_MOUNTVER_DEBUG(0, "Unmapped supported for %s.", gp->name);
 		newpp->flags |= G_PF_ACCEPT_UNMAPPED;
 	} else {
 		G_MOUNTVER_DEBUG(0, "Unmapped unsupported for %s.", gp->name);
 		newpp->flags &= ~G_PF_ACCEPT_UNMAPPED;
 	}
 
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		gctl_error(req, "Cannot attach to provider %s.", pp->name);
 		goto fail;
 	}
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0) {
 		gctl_error(req, "Cannot access provider %s.", pp->name);
 		goto fail;
 	}
 	error = g_io_getattr("GEOM::ident", cp, &identsize, sc->sc_ident);
 	g_access(cp, -1, 0, 0);
 	if (error != 0) {
 		if (g_mountver_check_ident) {
 			gctl_error(req, "Cannot get disk ident from %s; error = %d.", pp->name, error);
 			goto fail;
 		}
 
 		G_MOUNTVER_DEBUG(0, "Cannot get disk ident from %s; error = %d.", pp->name, error);
 		sc->sc_ident[0] = '\0';
 	}
 
 	g_error_provider(newpp, 0);
 	G_MOUNTVER_DEBUG(0, "Device %s created.", gp->name);
 	return (0);
 fail:
 	g_free(sc->sc_provider_name);
 	if (cp->provider != NULL)
 		g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_provider(newpp);
 	g_free(gp->softc);
 	g_destroy_geom(gp);
 	return (error);
 }
 
 static int
 g_mountver_destroy(struct g_geom *gp, boolean_t force)
 {
 	struct g_mountver_softc *sc;
 	struct g_provider *pp;
 
 	g_topology_assert();
 	if (gp->softc == NULL)
 		return (ENXIO);
 	sc = gp->softc;
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_MOUNTVER_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_MOUNTVER_DEBUG(1, "Device %s is still open (r%dw%de%d).",
 			    pp->name, pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	} else {
 		G_MOUNTVER_DEBUG(0, "Device %s removed.", gp->name);
 	}
 	if (pp != NULL)
 		g_wither_provider(pp, ENXIO);
 	g_mountver_discard_queued(gp);
 	g_free(sc->sc_provider_name);
 	g_free(gp->softc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_mountver_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 
 	return (g_mountver_destroy(gp, 0));
 }
 
 static void
 g_mountver_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_provider *pp;
 	const char *name;
 	char param[16];
 	int i, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			G_MOUNTVER_DEBUG(1, "Provider %s is invalid.", name);
 			gctl_error(req, "Provider %s is invalid.", name);
 			return;
 		}
 		if (g_mountver_create(req, mp, pp) != 0)
 			return;
 	}
 }
 
 static struct g_geom *
 g_mountver_find_geom(struct g_class *mp, const char *name)
 {
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (strcmp(gp->name, name) == 0)
 			return (gp);
 	}
 	return (NULL);
 }
 
 static void
 g_mountver_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	int *nargs, *force, error, i;
 	struct g_geom *gp;
 	const char *name;
 	char param[16];
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No 'force' argument");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		gp = g_mountver_find_geom(mp, name);
 		if (gp == NULL) {
 			G_MOUNTVER_DEBUG(1, "Device %s is invalid.", name);
 			gctl_error(req, "Device %s is invalid.", name);
 			return;
 		}
 		error = g_mountver_destroy(gp, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    gp->name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_mountver_orphan(struct g_consumer *cp)
 {
 	struct g_mountver_softc *sc;
 
 	g_topology_assert();
 
 	sc = cp->geom->softc;
 	sc->sc_orphaned = 1;
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	g_detach(cp);
 	G_MOUNTVER_DEBUG(0, "%s is offline.  Mount verification in progress.", sc->sc_provider_name);
 }
 
 static void
 g_mountver_resize(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	gp = cp->geom;
 
 	LIST_FOREACH(pp, &gp->provider, provider)
 		g_resize_provider(pp, cp->provider->mediasize);
 }
 
 static int
 g_mountver_ident_matches(struct g_geom *gp)
 {
 	struct g_consumer *cp;
 	struct g_mountver_softc *sc;
 	char ident[DISK_IDENT_SIZE];
 	int error, identsize = DISK_IDENT_SIZE;
 
 	sc = gp->softc;
 	cp = LIST_FIRST(&gp->consumer);
 
 	if (g_mountver_check_ident == 0)
 		return (0);
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0) {
 		G_MOUNTVER_DEBUG(0, "Cannot access %s; "
 		    "not attaching; error = %d.", gp->name, error);
 		return (1);
 	}
 	error = g_io_getattr("GEOM::ident", cp, &identsize, ident);
 	g_access(cp, -1, 0, 0);
 	if (error != 0) {
 		G_MOUNTVER_DEBUG(0, "Cannot get disk ident for %s; "
 		    "not attaching; error = %d.", gp->name, error);
 		return (1);
 	}
 	if (strcmp(ident, sc->sc_ident) != 0) {
 		G_MOUNTVER_DEBUG(1, "Disk ident for %s (\"%s\") is different "
 		    "from expected \"%s\", not attaching.", gp->name, ident,
 		    sc->sc_ident);
 		return (1);
 	}
 
 	return (0);
 }
 	
 static struct g_geom *
 g_mountver_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_mountver_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	G_MOUNTVER_DEBUG(2, "Tasting %s.", pp->name);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 
 		/* Already attached? */
 		if (pp == LIST_FIRST(&gp->provider))
 			return (NULL);
 
 		if (sc->sc_orphaned && strcmp(pp->name, sc->sc_provider_name) == 0)
 			break;
 	}
 	if (gp == NULL)
 		return (NULL);
 
 	cp = LIST_FIRST(&gp->consumer);
 	g_attach(cp, pp);
 	error = g_mountver_ident_matches(gp);
 	if (error != 0) {
 		g_detach(cp);
 		return (NULL);
 	}
 	if (sc->sc_access_r > 0 || sc->sc_access_w > 0 || sc->sc_access_e > 0) {
 		error = g_access(cp, sc->sc_access_r, sc->sc_access_w, sc->sc_access_e);
 		if (error != 0) {
 			G_MOUNTVER_DEBUG(0, "Cannot access %s; error = %d.", pp->name, error);
 			g_detach(cp);
 			return (NULL);
 		}
 	}
 	g_mountver_send_queued(gp);
 	sc->sc_orphaned = 0;
 	G_MOUNTVER_DEBUG(0, "%s has completed mount verification.", sc->sc_provider_name);
 
 	return (gp);
 }
 
 static void
 g_mountver_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_MOUNTVER_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_mountver_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0) {
 		g_mountver_ctl_destroy(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_mountver_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_mountver_softc *sc;
 
 	if (pp != NULL || cp != NULL)
 		return;
 
 	sc = gp->softc;
 	sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 	    sc->sc_orphaned ? "OFFLINE" : "ONLINE");
 	sbuf_printf(sb, "%s<Provider-Name>%s</Provider-Name>\n", indent, sc->sc_provider_name);
 	sbuf_printf(sb, "%s<Disk-Ident>%s</Disk-Ident>\n", indent, sc->sc_ident);
 }
 
 static void
 g_mountver_shutdown_pre_sync(void *arg, int howto)
 {
 	struct g_mountver_softc *sc;
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 
 	mp = arg;
 	g_topology_lock();
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if (gp->softc == NULL)
 			continue;
 		sc = gp->softc;
 		sc->sc_shutting_down = 1;
 		if (sc->sc_orphaned)
 			g_mountver_destroy(gp, 1);
 	}
 	g_topology_unlock();
 }
 
 static void
 g_mountver_init(struct g_class *mp)
 {
 
 	g_mountver_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
 	    g_mountver_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_mountver_pre_sync == NULL)
 		G_MOUNTVER_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_mountver_fini(struct g_class *mp)
 {
 
 	if (g_mountver_pre_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_mountver_pre_sync);
 }
 
 DECLARE_GEOM_CLASS(g_mountver_class, g_mountver);
+MODULE_VERSION(geom_mountver, 0);
Index: stable/11/sys/geom/multipath/g_multipath.c
===================================================================
--- stable/11/sys/geom/multipath/g_multipath.c	(revision 332639)
+++ stable/11/sys/geom/multipath/g_multipath.c	(revision 332640)
@@ -1,1532 +1,1533 @@
 /*-
  * Copyright (c) 2011-2013 Alexander Motin <mav@FreeBSD.org>
  * Copyright (c) 2006-2007 Matthew Jacob <mjacob@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*
  * Based upon work by Pawel Jakub Dawidek <pjd@FreeBSD.org> for all of the
  * fine geom examples, and by Poul Henning Kamp <phk@FreeBSD.org> for GEOM
  * itself, all of which is most gratefully acknowledged.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <geom/geom.h>
 #include <geom/multipath/g_multipath.h>
 
 FEATURE(geom_multipath, "GEOM multipath support");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, multipath, CTLFLAG_RW, 0,
     "GEOM_MULTIPATH tunables");
 static u_int g_multipath_debug = 0;
 SYSCTL_UINT(_kern_geom_multipath, OID_AUTO, debug, CTLFLAG_RW,
     &g_multipath_debug, 0, "Debug level");
 static u_int g_multipath_exclusive = 1;
 SYSCTL_UINT(_kern_geom_multipath, OID_AUTO, exclusive, CTLFLAG_RW,
     &g_multipath_exclusive, 0, "Exclusively open providers");
 
 static enum {
 	GKT_NIL,
 	GKT_RUN,
 	GKT_DIE
 } g_multipath_kt_state;
 static struct bio_queue_head gmtbq;
 static struct mtx gmtbq_mtx;
 
 static int g_multipath_read_metadata(struct g_consumer *cp,
     struct g_multipath_metadata *md);
 static int g_multipath_write_metadata(struct g_consumer *cp,
     struct g_multipath_metadata *md);
 
 static void g_multipath_orphan(struct g_consumer *);
 static void g_multipath_resize(struct g_consumer *);
 static void g_multipath_start(struct bio *);
 static void g_multipath_done(struct bio *);
 static void g_multipath_done_error(struct bio *);
 static void g_multipath_kt(void *);
 
 static int g_multipath_destroy(struct g_geom *);
 static int
 g_multipath_destroy_geom(struct gctl_req *, struct g_class *, struct g_geom *);
 
 static struct g_geom *g_multipath_find_geom(struct g_class *, const char *);
 static int g_multipath_rotate(struct g_geom *);
 
 static g_taste_t g_multipath_taste;
 static g_ctl_req_t g_multipath_config;
 static g_init_t g_multipath_init;
 static g_fini_t g_multipath_fini;
 static g_dumpconf_t g_multipath_dumpconf;
 
 struct g_class g_multipath_class = {
 	.name		= G_MULTIPATH_CLASS_NAME,
 	.version	= G_VERSION,
 	.ctlreq		= g_multipath_config,
 	.taste		= g_multipath_taste,
 	.destroy_geom	= g_multipath_destroy_geom,
 	.init		= g_multipath_init,
 	.fini		= g_multipath_fini
 };
 
 #define	MP_FAIL		0x00000001
 #define	MP_LOST		0x00000002
 #define	MP_NEW		0x00000004
 #define	MP_POSTED	0x00000008
 #define	MP_BAD		(MP_FAIL | MP_LOST | MP_NEW)
 #define	MP_WITHER	0x00000010
 #define	MP_IDLE		0x00000020
 #define	MP_IDLE_MASK	0xffffffe0
 
 static int
 g_multipath_good(struct g_geom *gp)
 {
 	struct g_consumer *cp;
 	int n = 0;
 
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if ((cp->index & MP_BAD) == 0)
 			n++;
 	}
 	return (n);
 }
 
 static void
 g_multipath_fault(struct g_consumer *cp, int cause)
 {
 	struct g_multipath_softc *sc;
 	struct g_consumer *lcp;
 	struct g_geom *gp;
 
 	gp = cp->geom;
 	sc = gp->softc;
 	cp->index |= cause;
 	if (g_multipath_good(gp) == 0 && sc->sc_ndisks > 0) {
 		LIST_FOREACH(lcp, &gp->consumer, consumer) {
 			if (lcp->provider == NULL ||
 			    (lcp->index & (MP_LOST | MP_NEW)))
 				continue;
 			if (sc->sc_ndisks > 1 && lcp == cp)
 				continue;
 			printf("GEOM_MULTIPATH: "
 			    "all paths in %s were marked FAIL, restore %s\n",
 			    sc->sc_name, lcp->provider->name);
 			lcp->index &= ~MP_FAIL;
 		}
 	}
 	if (cp != sc->sc_active)
 		return;
 	sc->sc_active = NULL;
 	LIST_FOREACH(lcp, &gp->consumer, consumer) {
 		if ((lcp->index & MP_BAD) == 0) {
 			sc->sc_active = lcp;
 			break;
 		}
 	}
 	if (sc->sc_active == NULL) {
 		printf("GEOM_MULTIPATH: out of providers for %s\n",
 		    sc->sc_name);
 	} else if (sc->sc_active_active != 1) {
 		printf("GEOM_MULTIPATH: %s is now active path in %s\n",
 		    sc->sc_active->provider->name, sc->sc_name);
 	}
 }
 
 static struct g_consumer *
 g_multipath_choose(struct g_geom *gp, struct bio *bp)
 {
 	struct g_multipath_softc *sc;
 	struct g_consumer *best, *cp;
 
 	sc = gp->softc;
 	if (sc->sc_active_active == 0 ||
 	    (sc->sc_active_active == 2 && bp->bio_cmd != BIO_READ))
 		return (sc->sc_active);
 	best = NULL;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->index & MP_BAD)
 			continue;
 		cp->index += MP_IDLE;
 		if (best == NULL || cp->private < best->private ||
 		    (cp->private == best->private && cp->index > best->index))
 			best = cp;
 	}
 	if (best != NULL)
 		best->index &= ~MP_IDLE_MASK;
 	return (best);
 }
 
 static void
 g_mpd(void *arg, int flags __unused)
 {
 	struct g_geom *gp;
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	int w;
 
 	g_topology_assert();
 	cp = arg;
 	gp = cp->geom;
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
 		w = cp->acw;
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 		if (w > 0 && cp->provider != NULL &&
 		    (cp->provider->geom->flags & G_GEOM_WITHER) == 0) {
 			cp->index |= MP_WITHER;
 			g_post_event(g_mpd, cp, M_WAITOK, NULL);
 			return;
 		}
 	}
 	sc = gp->softc;
 	mtx_lock(&sc->sc_mtx);
 	if (cp->provider) {
 		printf("GEOM_MULTIPATH: %s removed from %s\n",
 		    cp->provider->name, gp->name);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	mtx_unlock(&sc->sc_mtx);
 	if (LIST_EMPTY(&gp->consumer))
 		g_multipath_destroy(gp);
 }
 
 static void
 g_multipath_orphan(struct g_consumer *cp)
 {
 	struct g_multipath_softc *sc;
 	uintptr_t *cnt;
 
 	g_topology_assert();
 	printf("GEOM_MULTIPATH: %s in %s was disconnected\n",
 	    cp->provider->name, cp->geom->name);
 	sc = cp->geom->softc;
 	cnt = (uintptr_t *)&cp->private;
 	mtx_lock(&sc->sc_mtx);
 	sc->sc_ndisks--;
 	g_multipath_fault(cp, MP_LOST);
 	if (*cnt == 0 && (cp->index & MP_POSTED) == 0) {
 		cp->index |= MP_POSTED;
 		mtx_unlock(&sc->sc_mtx);
 		g_mpd(cp, 0);
 	} else
 		mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_multipath_resize(struct g_consumer *cp)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp1;
 	struct g_provider *pp;
 	struct g_multipath_metadata md;
 	off_t size, psize, ssize;
 	int error;
 
 	g_topology_assert();
 
 	gp = cp->geom;
 	pp = cp->provider;
 	sc = gp->softc;
 
 	if (sc->sc_stopping)
 		return;
 
 	if (pp->mediasize < sc->sc_size) {
 		size = pp->mediasize;
 		ssize = pp->sectorsize;
 	} else {
 		size = ssize = OFF_MAX;
 		mtx_lock(&sc->sc_mtx);
 		LIST_FOREACH(cp1, &gp->consumer, consumer) {
 			pp = cp1->provider;
 			if (pp == NULL)
 				continue;
 			if (pp->mediasize < size) {
 				size = pp->mediasize;
 				ssize = pp->sectorsize;
 			}
 		}
 		mtx_unlock(&sc->sc_mtx);
 		if (size == OFF_MAX || size == sc->sc_size)
 			return;
 	}
 	psize = size - ((sc->sc_uuid[0] != 0) ? ssize : 0);
 	printf("GEOM_MULTIPATH: %s size changed from %jd to %jd\n",
 	    sc->sc_name, sc->sc_pp->mediasize, psize);
 	if (sc->sc_uuid[0] != 0 && size < sc->sc_size) {
 		error = g_multipath_read_metadata(cp, &md);
 		if (error ||
 		    (strcmp(md.md_magic, G_MULTIPATH_MAGIC) != 0) ||
 		    (memcmp(md.md_uuid, sc->sc_uuid, sizeof(sc->sc_uuid)) != 0) ||
 		    (strcmp(md.md_name, sc->sc_name) != 0) ||
 		    (md.md_size != 0 && md.md_size != size) ||
 		    (md.md_sectorsize != 0 && md.md_sectorsize != ssize)) {
 			g_multipath_destroy(gp);
 			return;
 		}
 	}
 	sc->sc_size = size;
 	g_resize_provider(sc->sc_pp, psize);
 
 	if (sc->sc_uuid[0] != 0) {
 		pp = cp->provider;
 		strlcpy(md.md_magic, G_MULTIPATH_MAGIC, sizeof(md.md_magic));
 		memcpy(md.md_uuid, sc->sc_uuid, sizeof (sc->sc_uuid));
 		strlcpy(md.md_name, sc->sc_name, sizeof(md.md_name));
 		md.md_version = G_MULTIPATH_VERSION;
 		md.md_size = size;
 		md.md_sectorsize = ssize;
 		md.md_active_active = sc->sc_active_active;
 		error = g_multipath_write_metadata(cp, &md);
 		if (error != 0)
 			printf("GEOM_MULTIPATH: Can't update metadata on %s "
 			    "(%d)\n", pp->name, error);
 	}
 }
 
 static void
 g_multipath_start(struct bio *bp)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	uintptr_t *cnt;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("NULL sc"));
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	mtx_lock(&sc->sc_mtx);
 	cp = g_multipath_choose(gp, bp);
 	if (cp == NULL) {
 		mtx_unlock(&sc->sc_mtx);
 		g_destroy_bio(cbp);
 		g_io_deliver(bp, ENXIO);
 		return;
 	}
 	if ((uintptr_t)bp->bio_driver1 < sc->sc_ndisks)
 		bp->bio_driver1 = (void *)(uintptr_t)sc->sc_ndisks;
 	cnt = (uintptr_t *)&cp->private;
 	(*cnt)++;
 	mtx_unlock(&sc->sc_mtx);
 	cbp->bio_done = g_multipath_done;
 	g_io_request(cbp, cp);
 }
 
 static void
 g_multipath_done(struct bio *bp)
 {
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	uintptr_t *cnt;
 
 	if (bp->bio_error == ENXIO || bp->bio_error == EIO) {
 		mtx_lock(&gmtbq_mtx);
 		bioq_insert_tail(&gmtbq, bp);
 		mtx_unlock(&gmtbq_mtx);
 		wakeup(&g_multipath_kt_state);
 	} else {
 		cp = bp->bio_from;
 		sc = cp->geom->softc;
 		cnt = (uintptr_t *)&cp->private;
 		mtx_lock(&sc->sc_mtx);
 		(*cnt)--;
 		if (*cnt == 0 && (cp->index & MP_LOST)) {
 			if (g_post_event(g_mpd, cp, M_NOWAIT, NULL) == 0)
 				cp->index |= MP_POSTED;
 			mtx_unlock(&sc->sc_mtx);
 		} else
 			mtx_unlock(&sc->sc_mtx);
 		g_std_done(bp);
 	}
 }
 
 static void
 g_multipath_done_error(struct bio *bp)
 {
 	struct bio *pbp;
 	struct g_geom *gp;
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	uintptr_t *cnt;
 
 	/*
 	 * If we had a failure, we have to check first to see
 	 * whether the consumer it failed on was the currently
 	 * active consumer (i.e., this is the first in perhaps
 	 * a number of failures). If so, we then switch consumers
 	 * to the next available consumer.
 	 */
 
 	pbp = bp->bio_parent;
 	gp = pbp->bio_to->geom;
 	sc = gp->softc;
 	cp = bp->bio_from;
 	pp = cp->provider;
 	cnt = (uintptr_t *)&cp->private;
 
 	mtx_lock(&sc->sc_mtx);
 	if ((cp->index & MP_FAIL) == 0) {
 		printf("GEOM_MULTIPATH: Error %d, %s in %s marked FAIL\n",
 		    bp->bio_error, pp->name, sc->sc_name);
 		g_multipath_fault(cp, MP_FAIL);
 	}
 	(*cnt)--;
 	if (*cnt == 0 && (cp->index & (MP_LOST | MP_POSTED)) == MP_LOST) {
 		cp->index |= MP_POSTED;
 		mtx_unlock(&sc->sc_mtx);
 		g_post_event(g_mpd, cp, M_WAITOK, NULL);
 	} else
 		mtx_unlock(&sc->sc_mtx);
 
 	/*
 	 * If we can fruitfully restart the I/O, do so.
 	 */
 	if (pbp->bio_children < (uintptr_t)pbp->bio_driver1) {
 		pbp->bio_inbed++;
 		g_destroy_bio(bp);
 		g_multipath_start(pbp);
 	} else {
 		g_std_done(bp);
 	}
 }
 
 static void
 g_multipath_kt(void *arg)
 {
 
 	g_multipath_kt_state = GKT_RUN;
 	mtx_lock(&gmtbq_mtx);
 	while (g_multipath_kt_state == GKT_RUN) {
 		for (;;) {
 			struct bio *bp;
 
 			bp = bioq_takefirst(&gmtbq);
 			if (bp == NULL)
 				break;
 			mtx_unlock(&gmtbq_mtx);
 			g_multipath_done_error(bp);
 			mtx_lock(&gmtbq_mtx);
 		}
 		if (g_multipath_kt_state != GKT_RUN)
 			break;
 		msleep(&g_multipath_kt_state, &gmtbq_mtx, PRIBIO,
 		    "gkt:wait", 0);
 	}
 	mtx_unlock(&gmtbq_mtx);
 	wakeup(&g_multipath_kt_state);
 	kproc_exit(0);
 }
 
 
 static int
 g_multipath_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp, *badcp = NULL;
 	struct g_multipath_softc *sc;
 	int error;
 
 	gp = pp->geom;
 
 	/* Error used if we have no valid consumers. */
 	error = (dr > 0 || dw > 0 || de > 0) ? ENXIO : 0;
 
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->index & MP_WITHER)
 			continue;
 
 		error = g_access(cp, dr, dw, de);
 		if (error) {
 			badcp = cp;
 			goto fail;
 		}
 	}
 
 	if (error != 0)
 		return (error);
 
 	sc = gp->softc;
 	sc->sc_opened += dr + dw + de;
 	if (sc->sc_stopping && sc->sc_opened == 0)
 		g_multipath_destroy(gp);
 
 	return (0);
 
 fail:
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp == badcp)
 			break;
 		if (cp->index & MP_WITHER)
 			continue;
 
 		(void) g_access(cp, -dr, -dw, -de);
 	}
 	return (error);
 }
 
 static struct g_geom *
 g_multipath_create(struct g_class *mp, struct g_multipath_metadata *md)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL || sc->sc_stopping)
 			continue;
 		if (strcmp(gp->name, md->md_name) == 0) {
 			printf("GEOM_MULTIPATH: name %s already exists\n",
 			    md->md_name);
 			return (NULL);
 		}
 	}
 
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	mtx_init(&sc->sc_mtx, "multipath", NULL, MTX_DEF);
 	memcpy(sc->sc_uuid, md->md_uuid, sizeof (sc->sc_uuid));
 	memcpy(sc->sc_name, md->md_name, sizeof (sc->sc_name));
 	sc->sc_active_active = md->md_active_active;
 	sc->sc_size = md->md_size;
 	gp->softc = sc;
 	gp->start = g_multipath_start;
 	gp->orphan = g_multipath_orphan;
 	gp->resize = g_multipath_resize;
 	gp->access = g_multipath_access;
 	gp->dumpconf = g_multipath_dumpconf;
 
 	pp = g_new_providerf(gp, "multipath/%s", md->md_name);
 	pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
 	if (md->md_size != 0) {
 		pp->mediasize = md->md_size -
 		    ((md->md_uuid[0] != 0) ? md->md_sectorsize : 0);
 		pp->sectorsize = md->md_sectorsize;
 	}
 	sc->sc_pp = pp;
 	g_error_provider(pp, 0);
 	printf("GEOM_MULTIPATH: %s created\n", gp->name);
 	return (gp);
 }
 
 static int
 g_multipath_add_disk(struct g_geom *gp, struct g_provider *pp)
 {
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp, *nxtcp;
 	int error, acr, acw, ace;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	KASSERT(sc, ("no softc"));
 
 	/*
 	 * Make sure that the passed provider isn't already attached
 	 */
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->provider == pp)
 			break;
 	}
 	if (cp) {
 		printf("GEOM_MULTIPATH: provider %s already attached to %s\n",
 		    pp->name, gp->name);
 		return (EEXIST);
 	}
 	nxtcp = LIST_FIRST(&gp->consumer);
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	cp->private = NULL;
 	cp->index = MP_NEW;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		printf("GEOM_MULTIPATH: cannot attach %s to %s",
 		    pp->name, sc->sc_name);
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	/*
 	 * Set access permissions on new consumer to match other consumers
 	 */
 	if (sc->sc_pp) {
 		acr = sc->sc_pp->acr;
 		acw = sc->sc_pp->acw;
 		ace = sc->sc_pp->ace;
 	} else
 		acr = acw = ace = 0;
 	if (g_multipath_exclusive) {
 		acr++;
 		acw++;
 		ace++;
 	}
 	error = g_access(cp, acr, acw, ace);
 	if (error) {
 		printf("GEOM_MULTIPATH: cannot set access in "
 		    "attaching %s to %s (%d)\n",
 		    pp->name, sc->sc_name, error);
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		return (error);
 	}
 	if (sc->sc_size == 0) {
 		sc->sc_size = pp->mediasize -
 		    ((sc->sc_uuid[0] != 0) ? pp->sectorsize : 0);
 		sc->sc_pp->mediasize = sc->sc_size;
 		sc->sc_pp->sectorsize = pp->sectorsize;
 	}
 	if (sc->sc_pp->stripesize == 0 && sc->sc_pp->stripeoffset == 0) {
 		sc->sc_pp->stripesize = pp->stripesize;
 		sc->sc_pp->stripeoffset = pp->stripeoffset;
 	}
 	sc->sc_pp->flags |= pp->flags & G_PF_ACCEPT_UNMAPPED;
 	mtx_lock(&sc->sc_mtx);
 	cp->index = 0;
 	sc->sc_ndisks++;
 	mtx_unlock(&sc->sc_mtx);
 	printf("GEOM_MULTIPATH: %s added to %s\n",
 	    pp->name, sc->sc_name);
 	if (sc->sc_active == NULL) {
 		sc->sc_active = cp;
 		if (sc->sc_active_active != 1)
 			printf("GEOM_MULTIPATH: %s is now active path in %s\n",
 			    pp->name, sc->sc_name);
 	}
 	return (0);
 }
 
 static int
 g_multipath_destroy(struct g_geom *gp)
 {
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp, *cp1;
 
 	g_topology_assert();
 	if (gp->softc == NULL)
 		return (ENXIO);
 	sc = gp->softc;
 	if (!sc->sc_stopping) {
 		printf("GEOM_MULTIPATH: destroying %s\n", gp->name);
 		sc->sc_stopping = 1;
 	}
 	if (sc->sc_opened != 0) {
 		g_wither_provider(sc->sc_pp, ENXIO);
 		sc->sc_pp = NULL;
 		return (EINPROGRESS);
 	}
 	LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
 		mtx_lock(&sc->sc_mtx);
 		if ((cp->index & MP_POSTED) == 0) {
 			cp->index |= MP_POSTED;
 			mtx_unlock(&sc->sc_mtx);
 			g_mpd(cp, 0);
 			if (cp1 == NULL)
 				return(0);	/* Recursion happened. */
 		} else
 			mtx_unlock(&sc->sc_mtx);
 	}
 	if (!LIST_EMPTY(&gp->consumer))
 		return (EINPROGRESS);
 	mtx_destroy(&sc->sc_mtx);
 	g_free(gp->softc);
 	gp->softc = NULL;
 	printf("GEOM_MULTIPATH: %s destroyed\n", gp->name);
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static int
 g_multipath_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp)
 {
 
 	return (g_multipath_destroy(gp));
 }
 
 static int
 g_multipath_rotate(struct g_geom *gp)
 {
 	struct g_consumer *lcp, *first_good_cp = NULL;
 	struct g_multipath_softc *sc = gp->softc;
 	int active_cp_seen = 0;
 
 	g_topology_assert();
 	if (sc == NULL)
 		return (ENXIO);
 	LIST_FOREACH(lcp, &gp->consumer, consumer) {
 		if ((lcp->index & MP_BAD) == 0) {
 			if (first_good_cp == NULL)
 				first_good_cp = lcp;
 			if (active_cp_seen)
 				break;
 		}
 		if (sc->sc_active == lcp)
 			active_cp_seen = 1;
 	}
 	if (lcp == NULL)
 		lcp = first_good_cp;
 	if (lcp && lcp != sc->sc_active) {
 		sc->sc_active = lcp;
 		if (sc->sc_active_active != 1)
 			printf("GEOM_MULTIPATH: %s is now active path in %s\n",
 			    lcp->provider->name, sc->sc_name);
 	}
 	return (0);
 }
 
 static void
 g_multipath_init(struct g_class *mp)
 {
 	bioq_init(&gmtbq);
 	mtx_init(&gmtbq_mtx, "gmtbq", NULL, MTX_DEF);
 	kproc_create(g_multipath_kt, mp, NULL, 0, 0, "g_mp_kt");
 }
 
 static void
 g_multipath_fini(struct g_class *mp)
 {
 	if (g_multipath_kt_state == GKT_RUN) {
 		mtx_lock(&gmtbq_mtx);
 		g_multipath_kt_state = GKT_DIE;
 		wakeup(&g_multipath_kt_state);
 		msleep(&g_multipath_kt_state, &gmtbq_mtx, PRIBIO,
 		    "gmp:fini", 0);
 		mtx_unlock(&gmtbq_mtx);
 	}
 }
 
 static int
 g_multipath_read_metadata(struct g_consumer *cp,
     struct g_multipath_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize,
 	    pp->sectorsize, &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 	multipath_metadata_decode(buf, md);
 	g_free(buf);
 	return (0);
 }
 
 static int
 g_multipath_write_metadata(struct g_consumer *cp,
     struct g_multipath_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 	error = g_access(cp, 1, 1, 1);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO);
 	multipath_metadata_encode(md, buf);
 	error = g_write_data(cp, pp->mediasize - pp->sectorsize,
 	    buf, pp->sectorsize);
 	g_topology_lock();
 	g_access(cp, -1, -1, -1);
 	g_free(buf);
 	return (error);
 }
 
 static struct g_geom *
 g_multipath_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_multipath_metadata md;
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp, *gp1;
 	int error, isnew;
 
 	g_topology_assert();
 
 	gp = g_new_geomf(mp, "multipath:taste");
 	gp->start = g_multipath_start;
 	gp->access = g_multipath_access;
 	gp->orphan = g_multipath_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_multipath_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_MULTIPATH_MAGIC) != 0) {
 		if (g_multipath_debug)
 			printf("%s is not MULTIPATH\n", pp->name);
 		return (NULL);
 	}
 	if (md.md_version != G_MULTIPATH_VERSION) {
 		printf("%s has version %d multipath id- this module is version "
 		    " %d: rejecting\n", pp->name, md.md_version,
 		    G_MULTIPATH_VERSION);
 		return (NULL);
 	}
 	if (md.md_size != 0 && md.md_size != pp->mediasize)
 		return (NULL);
 	if (md.md_sectorsize != 0 && md.md_sectorsize != pp->sectorsize)
 		return (NULL);
 	if (g_multipath_debug)
 		printf("MULTIPATH: %s/%s\n", md.md_name, md.md_uuid);
 
 	/*
 	 * Let's check if such a device already is present. We check against
 	 * uuid alone first because that's the true distinguishor. If that
 	 * passes, then we check for name conflicts. If there are conflicts, 
 	 * modify the name.
 	 *
 	 * The whole purpose of this is to solve the problem that people don't
 	 * pick good unique names, but good unique names (like uuids) are a
 	 * pain to use. So, we allow people to build GEOMs with friendly names
 	 * and uuids, and modify the names in case there's a collision.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL || sc->sc_stopping)
 			continue;
 		if (strncmp(md.md_uuid, sc->sc_uuid, sizeof(md.md_uuid)) == 0)
 			break;
 	}
 
 	LIST_FOREACH(gp1, &mp->geom, geom) {
 		if (gp1 == gp)
 			continue;
 		sc = gp1->softc;
 		if (sc == NULL || sc->sc_stopping)
 			continue;
 		if (strncmp(md.md_name, sc->sc_name, sizeof(md.md_name)) == 0)
 			break;
 	}
 
 	/*
 	 * If gp is NULL, we had no extant MULTIPATH geom with this uuid.
 	 *
 	 * If gp1 is *not* NULL, that means we have a MULTIPATH geom extant
 	 * with the same name (but a different UUID).
 	 *
 	 * If gp is NULL, then modify the name with a random number and
   	 * complain, but allow the creation of the geom to continue.
 	 *
 	 * If gp is *not* NULL, just use the geom's name as we're attaching
 	 * this disk to the (previously generated) name.
 	 */
 
 	if (gp1) {
 		sc = gp1->softc;
 		if (gp == NULL) {
 			char buf[16];
 			u_long rand = random();
 
 			snprintf(buf, sizeof (buf), "%s-%lu", md.md_name, rand);
 			printf("GEOM_MULTIPATH: geom %s/%s exists already\n",
 			    sc->sc_name, sc->sc_uuid);
 			printf("GEOM_MULTIPATH: %s will be (temporarily) %s\n",
 			    md.md_uuid, buf);
 			strlcpy(md.md_name, buf, sizeof(md.md_name));
 		} else {
 			strlcpy(md.md_name, sc->sc_name, sizeof(md.md_name));
 		}
 	}
 
 	if (gp == NULL) {
 		gp = g_multipath_create(mp, &md);
 		if (gp == NULL) {
 			printf("GEOM_MULTIPATH: cannot create geom %s/%s\n",
 			    md.md_name, md.md_uuid);
 			return (NULL);
 		}
 		isnew = 1;
 	} else {
 		isnew = 0;
 	}
 
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("sc is NULL"));
 	error = g_multipath_add_disk(gp, pp);
 	if (error != 0) {
 		if (isnew)
 			g_multipath_destroy(gp);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_multipath_ctl_add_name(struct gctl_req *req, struct g_class *mp,
     const char *name)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	const char *mpname;
 	static const char devpf[6] = "/dev/";
 	int error;
 
 	g_topology_assert();
 
 	mpname = gctl_get_asciiparam(req, "arg0");
         if (mpname == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, mpname);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", mpname);
 		return;
 	}
 	sc = gp->softc;
 
 	if (strncmp(name, devpf, 5) == 0)
 		name += 5;
 	pp = g_provider_by_name(name);
 	if (pp == NULL) {
 		gctl_error(req, "Provider %s is invalid", name);
 		return;
 	}
 
 	/*
 	 * Check to make sure parameters match.
 	 */
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->provider == pp) {
 			gctl_error(req, "provider %s is already there",
 			    pp->name);
 			return;
 		}
 	}
 	if (sc->sc_pp->mediasize != 0 &&
 	    sc->sc_pp->mediasize + (sc->sc_uuid[0] != 0 ? pp->sectorsize : 0)
 	     != pp->mediasize) {
 		gctl_error(req, "Providers size mismatch %jd != %jd",
 		    (intmax_t) sc->sc_pp->mediasize +
 			(sc->sc_uuid[0] != 0 ? pp->sectorsize : 0),
 		    (intmax_t) pp->mediasize);
 		return;
 	}
 	if (sc->sc_pp->sectorsize != 0 &&
 	    sc->sc_pp->sectorsize != pp->sectorsize) {
 		gctl_error(req, "Providers sectorsize mismatch %u != %u",
 		    sc->sc_pp->sectorsize, pp->sectorsize);
 		return;
 	}
 
 	error = g_multipath_add_disk(gp, pp);
 	if (error != 0)
 		gctl_error(req, "Provider addition error: %d", error);
 }
 
 static void
 g_multipath_ctl_prefer(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_geom *gp;
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	const char *name, *mpname;
 	static const char devpf[6] = "/dev/";
 	int *nargs;
 
 	g_topology_assert();
 
 	mpname = gctl_get_asciiparam(req, "arg0");
         if (mpname == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, mpname);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", mpname);
 		return;
 	}
 	sc = gp->softc;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No 'nargs' argument");
 		return;
 	}
 	if (*nargs != 2) {
 		gctl_error(req, "missing device");
 		return;
 	}
 
 	name = gctl_get_asciiparam(req, "arg1");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg1' argument");
 		return;
 	}
 	if (strncmp(name, devpf, 5) == 0) {
 		name += 5;
 	}
 
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->provider != NULL
                       && strcmp(cp->provider->name, name) == 0)
 		    break;
 	}
 
 	if (cp == NULL) {
 		gctl_error(req, "Provider %s not found", name);
 		return;
 	}
 
 	mtx_lock(&sc->sc_mtx);
 
 	if (cp->index & MP_BAD) {
 		gctl_error(req, "Consumer %s is invalid", name);
 		mtx_unlock(&sc->sc_mtx);
 		return;
 	}
 
 	/* Here when the consumer is present and in good shape */
 
 	sc->sc_active = cp;
 	if (!sc->sc_active_active)
 	    printf("GEOM_MULTIPATH: %s now active path in %s\n",
 		sc->sc_active->provider->name, sc->sc_name);
 
 	mtx_unlock(&sc->sc_mtx);
 }
 
 static void
 g_multipath_ctl_add(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	const char *mpname, *name;
 
 	mpname = gctl_get_asciiparam(req, "arg0");
         if (mpname == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, mpname);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s not found", mpname);
 		return;
 	}
 	sc = gp->softc;
 
 	name = gctl_get_asciiparam(req, "arg1");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg1' argument");
 		return;
 	}
 	g_multipath_ctl_add_name(req, mp, name);
 }
 
 static void
 g_multipath_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_multipath_metadata md;
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	const char *mpname, *name;
 	char param[16];
 	int *nargs, i, *val;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (*nargs < 2) {
 		gctl_error(req, "wrong number of arguments.");
 		return;
 	}
 
 	mpname = gctl_get_asciiparam(req, "arg0");
         if (mpname == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, mpname);
 	if (gp != NULL) {
 		gctl_error(req, "Device %s already exist", mpname);
 		return;
 	}
 
 	memset(&md, 0, sizeof(md));
 	strlcpy(md.md_magic, G_MULTIPATH_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_MULTIPATH_VERSION;
 	strlcpy(md.md_name, mpname, sizeof(md.md_name));
 	md.md_size = 0;
 	md.md_sectorsize = 0;
 	md.md_uuid[0] = 0;
 	md.md_active_active = 0;
 	val = gctl_get_paraml(req, "active_active", sizeof(*val));
 	if (val != NULL && *val != 0)
 		md.md_active_active = 1;
 	val = gctl_get_paraml(req, "active_read", sizeof(*val));
 	if (val != NULL && *val != 0)
 		md.md_active_active = 2;
 	gp = g_multipath_create(mp, &md);
 	if (gp == NULL) {
 		gctl_error(req, "GEOM_MULTIPATH: cannot create geom %s/%s\n",
 		    md.md_name, md.md_uuid);
 		return;
 	}
 	sc = gp->softc;
 
 	for (i = 1; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		g_multipath_ctl_add_name(req, mp, name);
 	}
 
 	if (sc->sc_ndisks != (*nargs - 1))
 		g_multipath_destroy(gp);
 }
 
 static void
 g_multipath_ctl_configure(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	struct g_multipath_metadata md;
 	const char *name;
 	int error, *val;
 
 	g_topology_assert();
 
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg0' argument");
 		return;
 	}
 	gp = g_multipath_find_geom(mp, name);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", name);
 		return;
 	}
 	sc = gp->softc;
 	val = gctl_get_paraml(req, "active_active", sizeof(*val));
 	if (val != NULL && *val != 0)
 		sc->sc_active_active = 1;
 	val = gctl_get_paraml(req, "active_read", sizeof(*val));
 	if (val != NULL && *val != 0)
 		sc->sc_active_active = 2;
 	val = gctl_get_paraml(req, "active_passive", sizeof(*val));
 	if (val != NULL && *val != 0)
 		sc->sc_active_active = 0;
 	if (sc->sc_uuid[0] != 0 && sc->sc_active != NULL) {
 		cp = sc->sc_active;
 		pp = cp->provider;
 		strlcpy(md.md_magic, G_MULTIPATH_MAGIC, sizeof(md.md_magic));
 		memcpy(md.md_uuid, sc->sc_uuid, sizeof (sc->sc_uuid));
 		strlcpy(md.md_name, name, sizeof(md.md_name));
 		md.md_version = G_MULTIPATH_VERSION;
 		md.md_size = pp->mediasize;
 		md.md_sectorsize = pp->sectorsize;
 		md.md_active_active = sc->sc_active_active;
 		error = g_multipath_write_metadata(cp, &md);
 		if (error != 0)
 			gctl_error(req, "Can't update metadata on %s (%d)",
 			    pp->name, error);
 	}
 }
 
 static void
 g_multipath_ctl_fail(struct gctl_req *req, struct g_class *mp, int fail)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	const char *mpname, *name;
 	int found;
 
 	mpname = gctl_get_asciiparam(req, "arg0");
         if (mpname == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, mpname);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s not found", mpname);
 		return;
 	}
 	sc = gp->softc;
 
 	name = gctl_get_asciiparam(req, "arg1");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg1' argument");
 		return;
 	}
 
 	found = 0;
 	mtx_lock(&sc->sc_mtx);
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->provider != NULL &&
 		    strcmp(cp->provider->name, name) == 0 &&
 		    (cp->index & MP_LOST) == 0) {
 			found = 1;
 			if (!fail == !(cp->index & MP_FAIL))
 				continue;
 			printf("GEOM_MULTIPATH: %s in %s is marked %s.\n",
 				name, sc->sc_name, fail ? "FAIL" : "OK");
 			if (fail) {
 				g_multipath_fault(cp, MP_FAIL);
 			} else {
 				cp->index &= ~MP_FAIL;
 			}
 		}
 	}
 	mtx_unlock(&sc->sc_mtx);
 	if (found == 0)
 		gctl_error(req, "Provider %s not found", name);
 }
 
 static void
 g_multipath_ctl_remove(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_multipath_softc *sc;
 	struct g_geom *gp;
 	struct g_consumer *cp, *cp1;
 	const char *mpname, *name;
 	uintptr_t *cnt;
 	int found;
 
 	mpname = gctl_get_asciiparam(req, "arg0");
         if (mpname == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, mpname);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s not found", mpname);
 		return;
 	}
 	sc = gp->softc;
 
 	name = gctl_get_asciiparam(req, "arg1");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg1' argument");
 		return;
 	}
 
 	found = 0;
 	mtx_lock(&sc->sc_mtx);
 	LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
 		if (cp->provider != NULL &&
 		    strcmp(cp->provider->name, name) == 0 &&
 		    (cp->index & MP_LOST) == 0) {
 			found = 1;
 			printf("GEOM_MULTIPATH: removing %s from %s\n",
 			    cp->provider->name, cp->geom->name);
 			sc->sc_ndisks--;
 			g_multipath_fault(cp, MP_LOST);
 			cnt = (uintptr_t *)&cp->private;
 			if (*cnt == 0 && (cp->index & MP_POSTED) == 0) {
 				cp->index |= MP_POSTED;
 				mtx_unlock(&sc->sc_mtx);
 				g_mpd(cp, 0);
 				if (cp1 == NULL)
 					return;	/* Recursion happened. */
 				mtx_lock(&sc->sc_mtx);
 			}
 		}
 	}
 	mtx_unlock(&sc->sc_mtx);
 	if (found == 0)
 		gctl_error(req, "Provider %s not found", name);
 }
 
 static struct g_geom *
 g_multipath_find_geom(struct g_class *mp, const char *name)
 {
 	struct g_geom *gp;
 	struct g_multipath_softc *sc;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL || sc->sc_stopping)
 			continue;
 		if (strcmp(gp->name, name) == 0)
 			return (gp);
 	}
 	return (NULL);
 }
 
 static void
 g_multipath_ctl_stop(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_geom *gp;
 	const char *name;
 	int error;
 
 	g_topology_assert();
 
 	name = gctl_get_asciiparam(req, "arg0");
         if (name == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, name);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", name);
 		return;
 	}
 	error = g_multipath_destroy(gp);
 	if (error != 0 && error != EINPROGRESS)
 		gctl_error(req, "failed to stop %s (err=%d)", name, error);
 }
 
 static void
 g_multipath_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_geom *gp;
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	const char *name;
 	uint8_t *buf;
 	int error;
 
 	g_topology_assert();
 
 	name = gctl_get_asciiparam(req, "arg0");
         if (name == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, name);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", name);
 		return;
 	}
 	sc = gp->softc;
 
 	if (sc->sc_uuid[0] != 0 && sc->sc_active != NULL) {
 		cp = sc->sc_active;
 		pp = cp->provider;
 		error = g_access(cp, 1, 1, 1);
 		if (error != 0) {
 			gctl_error(req, "Can't open %s (%d)", pp->name, error);
 			goto destroy;
 		}
 		g_topology_unlock();
 		buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO);
 		error = g_write_data(cp, pp->mediasize - pp->sectorsize,
 		    buf, pp->sectorsize);
 		g_topology_lock();
 		g_access(cp, -1, -1, -1);
 		if (error != 0)
 			gctl_error(req, "Can't erase metadata on %s (%d)",
 			    pp->name, error);
 	}
 
 destroy:
 	error = g_multipath_destroy(gp);
 	if (error != 0 && error != EINPROGRESS)
 		gctl_error(req, "failed to destroy %s (err=%d)", name, error);
 }
 
 static void
 g_multipath_ctl_rotate(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_geom *gp;
 	const char *name;
 	int error;
 
 	g_topology_assert();
 
 	name = gctl_get_asciiparam(req, "arg0");
         if (name == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, name);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", name);
 		return;
 	}
 	error = g_multipath_rotate(gp);
 	if (error != 0) {
 		gctl_error(req, "failed to rotate %s (err=%d)", name, error);
 	}
 }
 
 static void
 g_multipath_ctl_getactive(struct gctl_req *req, struct g_class *mp)
 {
 	struct sbuf *sb;
 	struct g_geom *gp;
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	const char *name;
 	int empty;
 
 	sb = sbuf_new_auto();
 
 	g_topology_assert();
 	name = gctl_get_asciiparam(req, "arg0");
         if (name == NULL) {
                 gctl_error(req, "No 'arg0' argument");
                 return;
         }
 	gp = g_multipath_find_geom(mp, name);
 	if (gp == NULL) {
 		gctl_error(req, "Device %s is invalid", name);
 		return;
 	}
 	sc = gp->softc;
 	if (sc->sc_active_active == 1) {
 		empty = 1;
 		LIST_FOREACH(cp, &gp->consumer, consumer) {
 			if (cp->index & MP_BAD)
 				continue;
 			if (!empty)
 				sbuf_cat(sb, " ");
 			sbuf_cat(sb, cp->provider->name);
 			empty = 0;
 		}
 		if (empty)
 			sbuf_cat(sb, "none");
 		sbuf_cat(sb, "\n");
 	} else if (sc->sc_active && sc->sc_active->provider) {
 		sbuf_printf(sb, "%s\n", sc->sc_active->provider->name);
 	} else {
 		sbuf_printf(sb, "none\n");
 	}
 	sbuf_finish(sb);
 	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
 	sbuf_delete(sb);
 }
 
 static void
 g_multipath_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 	g_topology_assert();
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No 'version' argument");
 	} else if (*version != G_MULTIPATH_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync");
 	} else if (strcmp(verb, "add") == 0) {
 		g_multipath_ctl_add(req, mp);
 	} else if (strcmp(verb, "prefer") == 0) {
 		g_multipath_ctl_prefer(req, mp);
 	} else if (strcmp(verb, "create") == 0) {
 		g_multipath_ctl_create(req, mp);
 	} else if (strcmp(verb, "configure") == 0) {
 		g_multipath_ctl_configure(req, mp);
 	} else if (strcmp(verb, "stop") == 0) {
 		g_multipath_ctl_stop(req, mp);
 	} else if (strcmp(verb, "destroy") == 0) {
 		g_multipath_ctl_destroy(req, mp);
 	} else if (strcmp(verb, "fail") == 0) {
 		g_multipath_ctl_fail(req, mp, 1);
 	} else if (strcmp(verb, "restore") == 0) {
 		g_multipath_ctl_fail(req, mp, 0);
 	} else if (strcmp(verb, "remove") == 0) {
 		g_multipath_ctl_remove(req, mp);
 	} else if (strcmp(verb, "rotate") == 0) {
 		g_multipath_ctl_rotate(req, mp);
 	} else if (strcmp(verb, "getactive") == 0) {
 		g_multipath_ctl_getactive(req, mp);
 	} else {
 		gctl_error(req, "Unknown verb %s", verb);
 	}
 }
 
 static void
 g_multipath_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_multipath_softc *sc;
 	int good;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (cp != NULL) {
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    (cp->index & MP_NEW) ? "NEW" :
 		    (cp->index & MP_LOST) ? "LOST" :
 		    (cp->index & MP_FAIL) ? "FAIL" :
 		    (sc->sc_active_active == 1 || sc->sc_active == cp) ?
 		     "ACTIVE" :
 		     sc->sc_active_active == 2 ? "READ" : "PASSIVE");
 	} else {
 		good = g_multipath_good(gp);
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    good == 0 ? "BROKEN" :
 		    (good != sc->sc_ndisks || sc->sc_ndisks == 1) ?
 		    "DEGRADED" : "OPTIMAL");
 	}
 	if (cp == NULL && pp == NULL) {
 		sbuf_printf(sb, "%s<UUID>%s</UUID>\n", indent, sc->sc_uuid);
 		sbuf_printf(sb, "%s<Mode>Active/%s</Mode>\n", indent,
 		    sc->sc_active_active == 2 ? "Read" :
 		    sc->sc_active_active == 1 ? "Active" : "Passive");
 		sbuf_printf(sb, "%s<Type>%s</Type>\n", indent,
 		    sc->sc_uuid[0] == 0 ? "MANUAL" : "AUTOMATIC");
 	}
 }
 
 DECLARE_GEOM_CLASS(g_multipath_class, g_multipath);
+MODULE_VERSION(geom_multipath, 0);
Index: stable/11/sys/geom/nop/g_nop.c
===================================================================
--- stable/11/sys/geom/nop/g_nop.c	(revision 332639)
+++ stable/11/sys/geom/nop/g_nop.c	(revision 332640)
@@ -1,717 +1,718 @@
 /*-
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <geom/geom.h>
 #include <geom/nop/g_nop.h>
 
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, nop, CTLFLAG_RW, 0, "GEOM_NOP stuff");
 static u_int g_nop_debug = 0;
 SYSCTL_UINT(_kern_geom_nop, OID_AUTO, debug, CTLFLAG_RW, &g_nop_debug, 0,
     "Debug level");
 
 static int g_nop_destroy(struct g_geom *gp, boolean_t force);
 static int g_nop_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static void g_nop_config(struct gctl_req *req, struct g_class *mp,
     const char *verb);
 static void g_nop_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 
 struct g_class g_nop_class = {
 	.name = G_NOP_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_nop_config,
 	.destroy_geom = g_nop_destroy_geom
 };
 
 
 static void
 g_nop_orphan(struct g_consumer *cp)
 {
 
 	g_topology_assert();
 	g_nop_destroy(cp->geom, 1);
 }
 
 static void
 g_nop_resize(struct g_consumer *cp)
 {
 	struct g_nop_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	off_t size;
 
 	g_topology_assert();
 
 	gp = cp->geom;
 	sc = gp->softc;
 
 	if (sc->sc_explicitsize != 0)
 		return;
 	if (cp->provider->mediasize < sc->sc_offset) {
 		g_nop_destroy(gp, 1);
 		return;
 	}
 	size = cp->provider->mediasize - sc->sc_offset;
 	LIST_FOREACH(pp, &gp->provider, provider)
 		g_resize_provider(pp, size);
 }
 
 static void
 g_nop_start(struct bio *bp)
 {
 	struct g_nop_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct bio *cbp;
 	u_int failprob = 0;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 	G_NOP_LOGREQ(bp, "Request received.");
 	mtx_lock(&sc->sc_lock);
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		sc->sc_reads++;
 		sc->sc_readbytes += bp->bio_length;
 		failprob = sc->sc_rfailprob;
 		break;
 	case BIO_WRITE:
 		sc->sc_writes++;
 		sc->sc_wrotebytes += bp->bio_length;
 		failprob = sc->sc_wfailprob;
 		break;
 	case BIO_DELETE:
 		sc->sc_deletes++;
 		break;
 	case BIO_GETATTR:
 		sc->sc_getattrs++;
 		if (sc->sc_physpath && 
 		    g_handleattr_str(bp, "GEOM::physpath", sc->sc_physpath)) {
 			mtx_unlock(&sc->sc_lock);
 			return;
 		}
 		break;
 	case BIO_FLUSH:
 		sc->sc_flushes++;
 		break;
 	case BIO_CMD0:
 		sc->sc_cmd0s++;
 		break;
 	case BIO_CMD1:
 		sc->sc_cmd1s++;
 		break;
 	case BIO_CMD2:
 		sc->sc_cmd2s++;
 		break;
 	}
 	mtx_unlock(&sc->sc_lock);
 	if (failprob > 0) {
 		u_int rval;
 
 		rval = arc4random() % 100;
 		if (rval < failprob) {
 			G_NOP_LOGREQLVL(1, bp, "Returning error=%d.", sc->sc_error);
 			g_io_deliver(bp, sc->sc_error);
 			return;
 		}
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_std_done;
 	cbp->bio_offset = bp->bio_offset + sc->sc_offset;
 	pp = LIST_FIRST(&gp->provider);
 	KASSERT(pp != NULL, ("NULL pp"));
 	cbp->bio_to = pp;
 	G_NOP_LOGREQ(cbp, "Sending request.");
 	g_io_request(cbp, LIST_FIRST(&gp->consumer));
 }
 
 static int
 g_nop_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error;
 
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	error = g_access(cp, dr, dw, de);
 
 	return (error);
 }
 
 static int
 g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp,
     int ioerror, u_int rfailprob, u_int wfailprob, off_t offset, off_t size,
     u_int secsize, u_int stripesize, u_int stripeoffset, const char *physpath)
 {
 	struct g_nop_softc *sc;
 	struct g_geom *gp;
 	struct g_provider *newpp;
 	struct g_consumer *cp;
 	char name[64];
 	int error;
 	off_t explicitsize;
 
 	g_topology_assert();
 
 	gp = NULL;
 	newpp = NULL;
 	cp = NULL;
 
 	if ((offset % pp->sectorsize) != 0) {
 		gctl_error(req, "Invalid offset for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	if ((size % pp->sectorsize) != 0) {
 		gctl_error(req, "Invalid size for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	if (offset >= pp->mediasize) {
 		gctl_error(req, "Invalid offset for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	explicitsize = size;
 	if (size == 0)
 		size = pp->mediasize - offset;
 	if (offset + size > pp->mediasize) {
 		gctl_error(req, "Invalid size for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	if (secsize == 0)
 		secsize = pp->sectorsize;
 	else if ((secsize % pp->sectorsize) != 0) {
 		gctl_error(req, "Invalid secsize for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	if (secsize > MAXPHYS) {
 		gctl_error(req, "secsize is too big.");
 		return (EINVAL);
 	}
 	size -= size % secsize;
 	if ((stripesize % pp->sectorsize) != 0) {
 		gctl_error(req, "Invalid stripesize for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	if ((stripeoffset % pp->sectorsize) != 0) {
 		gctl_error(req, "Invalid stripeoffset for provider %s.", pp->name);
 		return (EINVAL);
 	}
 	if (stripesize != 0 && stripeoffset >= stripesize) {
 		gctl_error(req, "stripeoffset is too big.");
 		return (EINVAL);
 	}
 	snprintf(name, sizeof(name), "%s%s", pp->name, G_NOP_SUFFIX);
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (strcmp(gp->name, name) == 0) {
 			gctl_error(req, "Provider %s already exists.", name);
 			return (EEXIST);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", name);
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	sc->sc_offset = offset;
 	sc->sc_explicitsize = explicitsize;
 	sc->sc_stripesize = stripesize;
 	sc->sc_stripeoffset = stripeoffset;
 	if (physpath && strcmp(physpath, G_NOP_PHYSPATH_PASSTHROUGH)) {
 		sc->sc_physpath = strndup(physpath, MAXPATHLEN, M_GEOM);
 	} else
 		sc->sc_physpath = NULL;
 	sc->sc_error = ioerror;
 	sc->sc_rfailprob = rfailprob;
 	sc->sc_wfailprob = wfailprob;
 	sc->sc_reads = 0;
 	sc->sc_writes = 0;
 	sc->sc_deletes = 0;
 	sc->sc_getattrs = 0;
 	sc->sc_flushes = 0;
 	sc->sc_cmd0s = 0;
 	sc->sc_cmd1s = 0;
 	sc->sc_cmd2s = 0;
 	sc->sc_readbytes = 0;
 	sc->sc_wrotebytes = 0;
 	mtx_init(&sc->sc_lock, "gnop lock", NULL, MTX_DEF);
 	gp->softc = sc;
 	gp->start = g_nop_start;
 	gp->orphan = g_nop_orphan;
 	gp->resize = g_nop_resize;
 	gp->access = g_nop_access;
 	gp->dumpconf = g_nop_dumpconf;
 
 	newpp = g_new_providerf(gp, "%s", gp->name);
 	newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
 	newpp->mediasize = size;
 	newpp->sectorsize = secsize;
 	newpp->stripesize = stripesize;
 	newpp->stripeoffset = stripeoffset;
 
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		gctl_error(req, "Cannot attach to provider %s.", pp->name);
 		goto fail;
 	}
 
 	newpp->flags |= pp->flags & G_PF_ACCEPT_UNMAPPED;
 	g_error_provider(newpp, 0);
 	G_NOP_DEBUG(0, "Device %s created.", gp->name);
 	return (0);
 fail:
 	if (cp->provider != NULL)
 		g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_provider(newpp);
 	mtx_destroy(&sc->sc_lock);
 	free(sc->sc_physpath, M_GEOM);
 	g_free(gp->softc);
 	g_destroy_geom(gp);
 	return (error);
 }
 
 static int
 g_nop_destroy(struct g_geom *gp, boolean_t force)
 {
 	struct g_nop_softc *sc;
 	struct g_provider *pp;
 
 	g_topology_assert();
 	sc = gp->softc;
 	if (sc == NULL)
 		return (ENXIO);
 	free(sc->sc_physpath, M_GEOM);
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_NOP_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_NOP_DEBUG(1, "Device %s is still open (r%dw%de%d).",
 			    pp->name, pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	} else {
 		G_NOP_DEBUG(0, "Device %s removed.", gp->name);
 	}
 	gp->softc = NULL;
 	mtx_destroy(&sc->sc_lock);
 	g_free(sc);
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_nop_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 
 	return (g_nop_destroy(gp, 0));
 }
 
 static void
 g_nop_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_provider *pp;
 	intmax_t *error, *rfailprob, *wfailprob, *offset, *secsize, *size,
 	    *stripesize, *stripeoffset;
 	const char *name, *physpath;
 	char param[16];
 	int i, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	error = gctl_get_paraml(req, "error", sizeof(*error));
 	if (error == NULL) {
 		gctl_error(req, "No '%s' argument", "error");
 		return;
 	}
 	rfailprob = gctl_get_paraml(req, "rfailprob", sizeof(*rfailprob));
 	if (rfailprob == NULL) {
 		gctl_error(req, "No '%s' argument", "rfailprob");
 		return;
 	}
 	if (*rfailprob < -1 || *rfailprob > 100) {
 		gctl_error(req, "Invalid '%s' argument", "rfailprob");
 		return;
 	}
 	wfailprob = gctl_get_paraml(req, "wfailprob", sizeof(*wfailprob));
 	if (wfailprob == NULL) {
 		gctl_error(req, "No '%s' argument", "wfailprob");
 		return;
 	}
 	if (*wfailprob < -1 || *wfailprob > 100) {
 		gctl_error(req, "Invalid '%s' argument", "wfailprob");
 		return;
 	}
 	offset = gctl_get_paraml(req, "offset", sizeof(*offset));
 	if (offset == NULL) {
 		gctl_error(req, "No '%s' argument", "offset");
 		return;
 	}
 	if (*offset < 0) {
 		gctl_error(req, "Invalid '%s' argument", "offset");
 		return;
 	}
 	size = gctl_get_paraml(req, "size", sizeof(*size));
 	if (size == NULL) {
 		gctl_error(req, "No '%s' argument", "size");
 		return;
 	}
 	if (*size < 0) {
 		gctl_error(req, "Invalid '%s' argument", "size");
 		return;
 	}
 	secsize = gctl_get_paraml(req, "secsize", sizeof(*secsize));
 	if (secsize == NULL) {
 		gctl_error(req, "No '%s' argument", "secsize");
 		return;
 	}
 	if (*secsize < 0) {
 		gctl_error(req, "Invalid '%s' argument", "secsize");
 		return;
 	}
 	stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
 	if (stripesize == NULL) {
 		gctl_error(req, "No '%s' argument", "stripesize");
 		return;
 	}
 	if (*stripesize < 0) {
 		gctl_error(req, "Invalid '%s' argument", "stripesize");
 		return;
 	}
 	stripeoffset = gctl_get_paraml(req, "stripeoffset", sizeof(*stripeoffset));
 	if (stripeoffset == NULL) {
 		gctl_error(req, "No '%s' argument", "stripeoffset");
 		return;
 	}
 	if (*stripeoffset < 0) {
 		gctl_error(req, "Invalid '%s' argument", "stripeoffset");
 		return;
 	}
 	physpath = gctl_get_asciiparam(req, "physpath");
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			G_NOP_DEBUG(1, "Provider %s is invalid.", name);
 			gctl_error(req, "Provider %s is invalid.", name);
 			return;
 		}
 		if (g_nop_create(req, mp, pp,
 		    *error == -1 ? EIO : (int)*error,
 		    *rfailprob == -1 ? 0 : (u_int)*rfailprob,
 		    *wfailprob == -1 ? 0 : (u_int)*wfailprob,
 		    (off_t)*offset, (off_t)*size, (u_int)*secsize,
 		    (u_int)*stripesize, (u_int)*stripeoffset,
 		    physpath) != 0) {
 			return;
 		}
 	}
 }
 
 static void
 g_nop_ctl_configure(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_nop_softc *sc;
 	struct g_provider *pp;
 	intmax_t *error, *rfailprob, *wfailprob;
 	const char *name;
 	char param[16];
 	int i, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	error = gctl_get_paraml(req, "error", sizeof(*error));
 	if (error == NULL) {
 		gctl_error(req, "No '%s' argument", "error");
 		return;
 	}
 	rfailprob = gctl_get_paraml(req, "rfailprob", sizeof(*rfailprob));
 	if (rfailprob == NULL) {
 		gctl_error(req, "No '%s' argument", "rfailprob");
 		return;
 	}
 	if (*rfailprob < -1 || *rfailprob > 100) {
 		gctl_error(req, "Invalid '%s' argument", "rfailprob");
 		return;
 	}
 	wfailprob = gctl_get_paraml(req, "wfailprob", sizeof(*wfailprob));
 	if (wfailprob == NULL) {
 		gctl_error(req, "No '%s' argument", "wfailprob");
 		return;
 	}
 	if (*wfailprob < -1 || *wfailprob > 100) {
 		gctl_error(req, "Invalid '%s' argument", "wfailprob");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL || pp->geom->class != mp) {
 			G_NOP_DEBUG(1, "Provider %s is invalid.", name);
 			gctl_error(req, "Provider %s is invalid.", name);
 			return;
 		}
 		sc = pp->geom->softc;
 		if (*error != -1)
 			sc->sc_error = (int)*error;
 		if (*rfailprob != -1)
 			sc->sc_rfailprob = (u_int)*rfailprob;
 		if (*wfailprob != -1)
 			sc->sc_wfailprob = (u_int)*wfailprob;
 	}
 }
 
 static struct g_geom *
 g_nop_find_geom(struct g_class *mp, const char *name)
 {
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (strcmp(gp->name, name) == 0)
 			return (gp);
 	}
 	return (NULL);
 }
 
 static void
 g_nop_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	int *nargs, *force, error, i;
 	struct g_geom *gp;
 	const char *name;
 	char param[16];
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No 'force' argument");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		gp = g_nop_find_geom(mp, name);
 		if (gp == NULL) {
 			G_NOP_DEBUG(1, "Device %s is invalid.", name);
 			gctl_error(req, "Device %s is invalid.", name);
 			return;
 		}
 		error = g_nop_destroy(gp, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    gp->name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_nop_ctl_reset(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_nop_softc *sc;
 	struct g_provider *pp;
 	const char *name;
 	char param[16];
 	int i, *nargs;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 
 	for (i = 0; i < *nargs; i++) {
 		snprintf(param, sizeof(param), "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL || pp->geom->class != mp) {
 			G_NOP_DEBUG(1, "Provider %s is invalid.", name);
 			gctl_error(req, "Provider %s is invalid.", name);
 			return;
 		}
 		sc = pp->geom->softc;
 		sc->sc_reads = 0;
 		sc->sc_writes = 0;
 		sc->sc_deletes = 0;
 		sc->sc_getattrs = 0;
 		sc->sc_flushes = 0;
 		sc->sc_cmd0s = 0;
 		sc->sc_cmd1s = 0;
 		sc->sc_cmd2s = 0;
 		sc->sc_readbytes = 0;
 		sc->sc_wrotebytes = 0;
 	}
 }
 
 static void
 g_nop_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_NOP_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_nop_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "configure") == 0) {
 		g_nop_ctl_configure(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0) {
 		g_nop_ctl_destroy(req, mp);
 		return;
 	} else if (strcmp(verb, "reset") == 0) {
 		g_nop_ctl_reset(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_nop_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_nop_softc *sc;
 
 	if (pp != NULL || cp != NULL)
 		return;
 	sc = gp->softc;
 	sbuf_printf(sb, "%s<Offset>%jd</Offset>\n", indent,
 	    (intmax_t)sc->sc_offset);
 	sbuf_printf(sb, "%s<ReadFailProb>%u</ReadFailProb>\n", indent,
 	    sc->sc_rfailprob);
 	sbuf_printf(sb, "%s<WriteFailProb>%u</WriteFailProb>\n", indent,
 	    sc->sc_wfailprob);
 	sbuf_printf(sb, "%s<Error>%d</Error>\n", indent, sc->sc_error);
 	sbuf_printf(sb, "%s<Reads>%ju</Reads>\n", indent, sc->sc_reads);
 	sbuf_printf(sb, "%s<Writes>%ju</Writes>\n", indent, sc->sc_writes);
 	sbuf_printf(sb, "%s<Deletes>%ju</Deletes>\n", indent, sc->sc_deletes);
 	sbuf_printf(sb, "%s<Getattrs>%ju</Getattrs>\n", indent, sc->sc_getattrs);
 	sbuf_printf(sb, "%s<Flushes>%ju</Flushes>\n", indent, sc->sc_flushes);
 	sbuf_printf(sb, "%s<Cmd0s>%ju</Cmd0s>\n", indent, sc->sc_cmd0s);
 	sbuf_printf(sb, "%s<Cmd1s>%ju</Cmd1s>\n", indent, sc->sc_cmd1s);
 	sbuf_printf(sb, "%s<Cmd2s>%ju</Cmd2s>\n", indent, sc->sc_cmd2s);
 	sbuf_printf(sb, "%s<ReadBytes>%ju</ReadBytes>\n", indent,
 	    sc->sc_readbytes);
 	sbuf_printf(sb, "%s<WroteBytes>%ju</WroteBytes>\n", indent,
 	    sc->sc_wrotebytes);
 }
 
 DECLARE_GEOM_CLASS(g_nop_class, g_nop);
+MODULE_VERSION(geom_nop, 0);
Index: stable/11/sys/geom/part/g_part_apm.c
===================================================================
--- stable/11/sys/geom/part/g_part_apm.c	(revision 332639)
+++ stable/11/sys/geom/part/g_part_apm.c	(revision 332640)
@@ -1,594 +1,595 @@
 /*-
  * Copyright (c) 2006-2008 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/apm.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 #include <geom/part/g_part.h>
 
 #include "g_part_if.h"
 
 FEATURE(geom_part_apm, "GEOM partitioning class for Apple-style partitions");
 
 struct g_part_apm_table {
 	struct g_part_table	base;
 	struct apm_ddr		ddr;
 	struct apm_ent		self;
 	int			tivo_series1;
 };
 
 struct g_part_apm_entry {
 	struct g_part_entry	base;
 	struct apm_ent		ent;
 };
 
 static int g_part_apm_add(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_apm_create(struct g_part_table *, struct g_part_parms *);
 static int g_part_apm_destroy(struct g_part_table *, struct g_part_parms *);
 static void g_part_apm_dumpconf(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 static int g_part_apm_dumpto(struct g_part_table *, struct g_part_entry *);
 static int g_part_apm_modify(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static const char *g_part_apm_name(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_apm_probe(struct g_part_table *, struct g_consumer *);
 static int g_part_apm_read(struct g_part_table *, struct g_consumer *);
 static const char *g_part_apm_type(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_apm_write(struct g_part_table *, struct g_consumer *);
 static int g_part_apm_resize(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 
 static kobj_method_t g_part_apm_methods[] = {
 	KOBJMETHOD(g_part_add,		g_part_apm_add),
 	KOBJMETHOD(g_part_create,	g_part_apm_create),
 	KOBJMETHOD(g_part_destroy,	g_part_apm_destroy),
 	KOBJMETHOD(g_part_dumpconf,	g_part_apm_dumpconf),
 	KOBJMETHOD(g_part_dumpto,	g_part_apm_dumpto),
 	KOBJMETHOD(g_part_modify,	g_part_apm_modify),
 	KOBJMETHOD(g_part_resize,	g_part_apm_resize),
 	KOBJMETHOD(g_part_name,		g_part_apm_name),
 	KOBJMETHOD(g_part_probe,	g_part_apm_probe),
 	KOBJMETHOD(g_part_read,		g_part_apm_read),
 	KOBJMETHOD(g_part_type,		g_part_apm_type),
 	KOBJMETHOD(g_part_write,	g_part_apm_write),
 	{ 0, 0 }
 };
 
 static struct g_part_scheme g_part_apm_scheme = {
 	"APM",
 	g_part_apm_methods,
 	sizeof(struct g_part_apm_table),
 	.gps_entrysz = sizeof(struct g_part_apm_entry),
 	.gps_minent = 16,
 	.gps_maxent = 4096,
 };
 G_PART_SCHEME_DECLARE(g_part_apm);
+MODULE_VERSION(geom_part_apm, 0);
 
 static void
 swab(char *buf, size_t bufsz)
 {
 	int i;
 	char ch;
 
 	for (i = 0; i < bufsz; i += 2) {
 		ch = buf[i];
 		buf[i] = buf[i + 1];
 		buf[i + 1] = ch;
 	}
 }
 
 static int
 apm_parse_type(const char *type, char *buf, size_t bufsz)
 {
 	const char *alias;
 
 	if (type[0] == '!') {
 		type++;
 		if (strlen(type) > bufsz)
 			return (EINVAL);
 		if (!strcmp(type, APM_ENT_TYPE_SELF) ||
 		    !strcmp(type, APM_ENT_TYPE_UNUSED))
 			return (EINVAL);
 		strncpy(buf, type, bufsz);
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_APPLE_BOOT);
 	if (!strcasecmp(type, alias)) {
 		strcpy(buf, APM_ENT_TYPE_APPLE_BOOT);
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_APPLE_HFS);
 	if (!strcasecmp(type, alias)) {
 		strcpy(buf, APM_ENT_TYPE_APPLE_HFS);
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_APPLE_UFS);
 	if (!strcasecmp(type, alias)) {
 		strcpy(buf, APM_ENT_TYPE_APPLE_UFS);
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD);
 	if (!strcasecmp(type, alias)) {
 		strcpy(buf, APM_ENT_TYPE_FREEBSD);
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_NANDFS);
 	if (!strcasecmp(type, alias)) {
 		strcpy(buf, APM_ENT_TYPE_FREEBSD_NANDFS);
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_SWAP);
 	if (!strcasecmp(type, alias)) {
 		strcpy(buf, APM_ENT_TYPE_FREEBSD_SWAP);
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_UFS);
 	if (!strcasecmp(type, alias)) {
 		strcpy(buf, APM_ENT_TYPE_FREEBSD_UFS);
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_VINUM);
 	if (!strcasecmp(type, alias)) {
 		strcpy(buf, APM_ENT_TYPE_FREEBSD_VINUM);
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_ZFS);
 	if (!strcasecmp(type, alias)) {
 		strcpy(buf, APM_ENT_TYPE_FREEBSD_ZFS);
 		return (0);
 	}
 	return (EINVAL);
 }
 
 static int
 apm_read_ent(struct g_consumer *cp, uint32_t blk, struct apm_ent *ent,
     int tivo_series1)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error;
 
 	pp = cp->provider;
 	buf = g_read_data(cp, pp->sectorsize * blk, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 	if (tivo_series1)
 		swab(buf, pp->sectorsize);
 	ent->ent_sig = be16dec(buf);
 	ent->ent_pmblkcnt = be32dec(buf + 4);
 	ent->ent_start = be32dec(buf + 8);
 	ent->ent_size = be32dec(buf + 12);
 	bcopy(buf + 16, ent->ent_name, sizeof(ent->ent_name));
 	bcopy(buf + 48, ent->ent_type, sizeof(ent->ent_type));
 	g_free(buf);
 	return (0);
 }
 
 static int
 g_part_apm_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
     struct g_part_parms *gpp)
 {
 	struct g_part_apm_entry *entry;
 	struct g_part_apm_table *table;
 	int error;
 
 	entry = (struct g_part_apm_entry *)baseentry;
 	table = (struct g_part_apm_table *)basetable;
 	entry->ent.ent_sig = APM_ENT_SIG;
 	entry->ent.ent_pmblkcnt = table->self.ent_pmblkcnt;
 	entry->ent.ent_start = gpp->gpp_start;
 	entry->ent.ent_size = gpp->gpp_size;
 	if (baseentry->gpe_deleted) {
 		bzero(entry->ent.ent_type, sizeof(entry->ent.ent_type));
 		bzero(entry->ent.ent_name, sizeof(entry->ent.ent_name));
 	}
 	error = apm_parse_type(gpp->gpp_type, entry->ent.ent_type,
 	    sizeof(entry->ent.ent_type));
 	if (error)
 		return (error);
 	if (gpp->gpp_parms & G_PART_PARM_LABEL) {
 		if (strlen(gpp->gpp_label) > sizeof(entry->ent.ent_name))
 			return (EINVAL);
 		strncpy(entry->ent.ent_name, gpp->gpp_label,
 		    sizeof(entry->ent.ent_name));
 	}
 	if (baseentry->gpe_index >= table->self.ent_pmblkcnt)
 		table->self.ent_pmblkcnt = baseentry->gpe_index + 1;
 	KASSERT(table->self.ent_size >= table->self.ent_pmblkcnt,
 	    ("%s", __func__));
 	KASSERT(table->self.ent_size > baseentry->gpe_index,
 	    ("%s", __func__));
 	return (0);
 }
 
 static int
 g_part_apm_create(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_provider *pp;
 	struct g_part_apm_table *table;
 	uint32_t last;
 
 	/* We don't nest, which means that our depth should be 0. */
 	if (basetable->gpt_depth != 0)
 		return (ENXIO);
 
 	table = (struct g_part_apm_table *)basetable;
 	pp = gpp->gpp_provider;
 	if (pp->sectorsize != 512 ||
 	    pp->mediasize < (2 + 2 * basetable->gpt_entries) * pp->sectorsize)
 		return (ENOSPC);
 
 	/* APM uses 32-bit LBAs. */
 	last = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX) - 1;
 
 	basetable->gpt_first = 2 + basetable->gpt_entries;
 	basetable->gpt_last = last;
 
 	table->ddr.ddr_sig = APM_DDR_SIG;
 	table->ddr.ddr_blksize = pp->sectorsize;
 	table->ddr.ddr_blkcount = last + 1;
 
 	table->self.ent_sig = APM_ENT_SIG;
 	table->self.ent_pmblkcnt = basetable->gpt_entries + 1;
 	table->self.ent_start = 1;
 	table->self.ent_size = table->self.ent_pmblkcnt;
 	strcpy(table->self.ent_name, "Apple");
 	strcpy(table->self.ent_type, APM_ENT_TYPE_SELF);
 	return (0);
 }
 
 static int
 g_part_apm_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 
 	/* Wipe the first 2 sectors to clear the partitioning. */
 	basetable->gpt_smhead |= 3;
 	return (0);
 }
 
 static void
 g_part_apm_dumpconf(struct g_part_table *table, struct g_part_entry *baseentry,
     struct sbuf *sb, const char *indent)
 {
 	union {
 		char name[APM_ENT_NAMELEN + 1];
 		char type[APM_ENT_TYPELEN + 1];
 	} u;
 	struct g_part_apm_entry *entry;
 
 	entry = (struct g_part_apm_entry *)baseentry;
 	if (indent == NULL) {
 		/* conftxt: libdisk compatibility */
 		sbuf_printf(sb, " xs APPLE xt %s", entry->ent.ent_type);
 	} else if (entry != NULL) {
 		/* confxml: partition entry information */
 		strncpy(u.name, entry->ent.ent_name, APM_ENT_NAMELEN);
 		u.name[APM_ENT_NAMELEN] = '\0';
 		sbuf_printf(sb, "%s<label>", indent);
 		g_conf_printf_escaped(sb, "%s", u.name);
 		sbuf_printf(sb, "</label>\n");
 		strncpy(u.type, entry->ent.ent_type, APM_ENT_TYPELEN);
 		u.type[APM_ENT_TYPELEN] = '\0';
 		sbuf_printf(sb, "%s<rawtype>", indent);
 		g_conf_printf_escaped(sb, "%s", u.type);
 		sbuf_printf(sb, "</rawtype>\n");
 	} else {
 		/* confxml: scheme information */
 	}
 }
 
 static int
 g_part_apm_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
 {
 	struct g_part_apm_entry *entry;
 
 	entry = (struct g_part_apm_entry *)baseentry;
 	return ((!strcmp(entry->ent.ent_type, APM_ENT_TYPE_FREEBSD_SWAP))
 	    ? 1 : 0);
 }
 
 static int
 g_part_apm_modify(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_apm_entry *entry;
 	int error;
 
 	entry = (struct g_part_apm_entry *)baseentry;
 	if (gpp->gpp_parms & G_PART_PARM_LABEL) {
 		if (strlen(gpp->gpp_label) > sizeof(entry->ent.ent_name))
 			return (EINVAL);
 	}
 	if (gpp->gpp_parms & G_PART_PARM_TYPE) {
 		error = apm_parse_type(gpp->gpp_type, entry->ent.ent_type,
 		    sizeof(entry->ent.ent_type));
 		if (error)
 			return (error);
 	}
 	if (gpp->gpp_parms & G_PART_PARM_LABEL) {
 		strncpy(entry->ent.ent_name, gpp->gpp_label,
 		    sizeof(entry->ent.ent_name));
 	}
 	return (0);
 }
 
 static int
 g_part_apm_resize(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_apm_entry *entry;
 	struct g_provider *pp;
 
 	if (baseentry == NULL) {
 		pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 		basetable->gpt_last = MIN(pp->mediasize / pp->sectorsize,
 		    UINT32_MAX) - 1;
 		return (0);
 	}
 
 	entry = (struct g_part_apm_entry *)baseentry;
 	baseentry->gpe_end = baseentry->gpe_start + gpp->gpp_size - 1;
 	entry->ent.ent_size = gpp->gpp_size;
 
 	return (0);
 }
 
 static const char *
 g_part_apm_name(struct g_part_table *table, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 
 	snprintf(buf, bufsz, "s%d", baseentry->gpe_index + 1);
 	return (buf);
 }
 
 static int
 g_part_apm_probe(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct g_part_apm_table *table;
 	char *buf;
 	int error;
 
 	/* We don't nest, which means that our depth should be 0. */
 	if (basetable->gpt_depth != 0)
 		return (ENXIO);
 
 	table = (struct g_part_apm_table *)basetable;
 	table->tivo_series1 = 0;
 	pp = cp->provider;
 
 	/* Sanity-check the provider. */
 	if (pp->mediasize < 4 * pp->sectorsize)
 		return (ENOSPC);
 
 	/* Check that there's a Driver Descriptor Record (DDR). */
 	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 	if (be16dec(buf) == APM_DDR_SIG) {
 		/* Normal Apple DDR */
 		table->ddr.ddr_sig = be16dec(buf);
 		table->ddr.ddr_blksize = be16dec(buf + 2);
 		table->ddr.ddr_blkcount = be32dec(buf + 4);
 		g_free(buf);
 		if (table->ddr.ddr_blksize != pp->sectorsize)
 			return (ENXIO);
 		if (table->ddr.ddr_blkcount > pp->mediasize / pp->sectorsize)
 			return (ENXIO);
 	} else {
 		/*
 		 * Check for Tivo drives, which have no DDR and a different
 		 * signature.  Those whose first two bytes are 14 92 are
 		 * Series 2 drives, and aren't supported.  Those that start
 		 * with 92 14 are series 1 drives and are supported.
 		 */
 		if (be16dec(buf) != 0x9214) {
 			/* If this is 0x1492 it could be a series 2 drive */
 			g_free(buf);
 			return (ENXIO);
 		}
 		table->ddr.ddr_sig = APM_DDR_SIG;		/* XXX */
 		table->ddr.ddr_blksize = pp->sectorsize;	/* XXX */
 		table->ddr.ddr_blkcount =
 		    MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 		table->tivo_series1 = 1;
 		g_free(buf);
 	}
 
 	/* Check that there's a Partition Map. */
 	error = apm_read_ent(cp, 1, &table->self, table->tivo_series1);
 	if (error)
 		return (error);
 	if (table->self.ent_sig != APM_ENT_SIG)
 		return (ENXIO);
 	if (strcmp(table->self.ent_type, APM_ENT_TYPE_SELF))
 		return (ENXIO);
 	if (table->self.ent_pmblkcnt >= table->ddr.ddr_blkcount)
 		return (ENXIO);
 	return (G_PART_PROBE_PRI_NORM);
 }
 
 static int
 g_part_apm_read(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct apm_ent ent;
 	struct g_part_apm_entry *entry;
 	struct g_part_apm_table *table;
 	int error, index;
 
 	table = (struct g_part_apm_table *)basetable;
 
 	basetable->gpt_first = table->self.ent_size + 1;
 	basetable->gpt_last = table->ddr.ddr_blkcount - 1;
 	basetable->gpt_entries = table->self.ent_size - 1;
 
 	for (index = table->self.ent_pmblkcnt - 1; index > 0; index--) {
 		error = apm_read_ent(cp, index + 1, &ent, table->tivo_series1);
 		if (error)
 			continue;
 		if (!strcmp(ent.ent_type, APM_ENT_TYPE_UNUSED))
 			continue;
 		entry = (struct g_part_apm_entry *)g_part_new_entry(basetable,
 		    index, ent.ent_start, ent.ent_start + ent.ent_size - 1);
 		entry->ent = ent;
 	}
 
 	return (0);
 }
 
 static const char *
 g_part_apm_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 	struct g_part_apm_entry *entry;
 	const char *type;
 	size_t len;
 
 	entry = (struct g_part_apm_entry *)baseentry;
 	type = entry->ent.ent_type;
 	if (!strcmp(type, APM_ENT_TYPE_APPLE_BOOT))
 		return (g_part_alias_name(G_PART_ALIAS_APPLE_BOOT));
 	if (!strcmp(type, APM_ENT_TYPE_APPLE_HFS))
 		return (g_part_alias_name(G_PART_ALIAS_APPLE_HFS));
 	if (!strcmp(type, APM_ENT_TYPE_APPLE_UFS))
 		return (g_part_alias_name(G_PART_ALIAS_APPLE_UFS));
 	if (!strcmp(type, APM_ENT_TYPE_FREEBSD))
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD));
 	if (!strcmp(type, APM_ENT_TYPE_FREEBSD_NANDFS))
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_NANDFS));
 	if (!strcmp(type, APM_ENT_TYPE_FREEBSD_SWAP))
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_SWAP));
 	if (!strcmp(type, APM_ENT_TYPE_FREEBSD_UFS))
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_UFS));
 	if (!strcmp(type, APM_ENT_TYPE_FREEBSD_VINUM))
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_VINUM));
 	if (!strcmp(type, APM_ENT_TYPE_FREEBSD_ZFS))
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_ZFS));
 	buf[0] = '!';
 	len = MIN(sizeof(entry->ent.ent_type), bufsz - 2);
 	bcopy(type, buf + 1, len);
 	buf[len + 1] = '\0';
 	return (buf);
 }
 
 static int
 g_part_apm_write(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct g_part_entry *baseentry;
 	struct g_part_apm_entry *entry;
 	struct g_part_apm_table *table;
 	char *buf, *ptr;
 	uint32_t index;
 	int error;
 	size_t tblsz;
 
 	pp = cp->provider;
 	table = (struct g_part_apm_table *)basetable;
 	/*
 	 * Tivo Series 1 disk partitions are currently read-only.
 	 */
 	if (table->tivo_series1)
 		return (EOPNOTSUPP);
 
 	/* Write the DDR only when we're newly created. */
 	if (basetable->gpt_created) {
 		buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO);
 		be16enc(buf, table->ddr.ddr_sig);
 		be16enc(buf + 2, table->ddr.ddr_blksize);
 		be32enc(buf + 4, table->ddr.ddr_blkcount);
 		error = g_write_data(cp, 0, buf, pp->sectorsize);
 		g_free(buf);
 		if (error)
 			return (error);
 	}
 
 	/* Allocate the buffer for all entries */
 	tblsz = table->self.ent_pmblkcnt;
 	buf = g_malloc(tblsz * pp->sectorsize, M_WAITOK | M_ZERO);
 
 	/* Fill the self entry */
 	be16enc(buf, APM_ENT_SIG);
 	be32enc(buf + 4, table->self.ent_pmblkcnt);
 	be32enc(buf + 8, table->self.ent_start);
 	be32enc(buf + 12, table->self.ent_size);
 	bcopy(table->self.ent_name, buf + 16, sizeof(table->self.ent_name));
 	bcopy(table->self.ent_type, buf + 48, sizeof(table->self.ent_type));
 
 	baseentry = LIST_FIRST(&basetable->gpt_entry);
 	for (index = 1; index < tblsz; index++) {
 		entry = (baseentry != NULL && index == baseentry->gpe_index)
 		    ? (struct g_part_apm_entry *)baseentry : NULL;
 		ptr = buf + index * pp->sectorsize;
 		be16enc(ptr, APM_ENT_SIG);
 		be32enc(ptr + 4, table->self.ent_pmblkcnt);
 		if (entry != NULL && !baseentry->gpe_deleted) {
 			be32enc(ptr + 8, entry->ent.ent_start);
 			be32enc(ptr + 12, entry->ent.ent_size);
 			bcopy(entry->ent.ent_name, ptr + 16,
 			    sizeof(entry->ent.ent_name));
 			bcopy(entry->ent.ent_type, ptr + 48,
 			    sizeof(entry->ent.ent_type));
 		} else {
 			strcpy(ptr + 48, APM_ENT_TYPE_UNUSED);
 		}
 		if (entry != NULL)
 			baseentry = LIST_NEXT(baseentry, gpe_entry);
 	}
 
 	for (index = 0; index < tblsz; index += MAXPHYS / pp->sectorsize) {
 		error = g_write_data(cp, (1 + index) * pp->sectorsize,
 		    buf + index * pp->sectorsize,
 		    (tblsz - index > MAXPHYS / pp->sectorsize) ? MAXPHYS:
 		    (tblsz - index) * pp->sectorsize);
 		if (error) {
 			g_free(buf);
 			return (error);
 		}
 	}
 	g_free(buf);
 	return (0);
 }
Index: stable/11/sys/geom/part/g_part_bsd.c
===================================================================
--- stable/11/sys/geom/part/g_part_bsd.c	(revision 332639)
+++ stable/11/sys/geom/part/g_part_bsd.c	(revision 332640)
@@ -1,539 +1,540 @@
 /*-
  * Copyright (c) 2007 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/disklabel.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <geom/geom.h>
 #include <geom/part/g_part.h>
 
 #include "g_part_if.h"
 
 #define	BOOT1_SIZE	512
 #define	LABEL_SIZE	512
 #define	BOOT2_OFF	(BOOT1_SIZE + LABEL_SIZE)
 #define	BOOT2_SIZE	(BBSIZE - BOOT2_OFF)
 
 FEATURE(geom_part_bsd, "GEOM partitioning class for BSD disklabels");
 
 struct g_part_bsd_table {
 	struct g_part_table	base;
 	u_char			*bbarea;
 	uint32_t		offset;
 };
 
 struct g_part_bsd_entry {
 	struct g_part_entry	base;
 	struct partition	part;
 };
 
 static int g_part_bsd_add(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_bsd_bootcode(struct g_part_table *, struct g_part_parms *);
 static int g_part_bsd_create(struct g_part_table *, struct g_part_parms *);
 static int g_part_bsd_destroy(struct g_part_table *, struct g_part_parms *);
 static void g_part_bsd_dumpconf(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 static int g_part_bsd_dumpto(struct g_part_table *, struct g_part_entry *);
 static int g_part_bsd_modify(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static const char *g_part_bsd_name(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_bsd_probe(struct g_part_table *, struct g_consumer *);
 static int g_part_bsd_read(struct g_part_table *, struct g_consumer *);
 static const char *g_part_bsd_type(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_bsd_write(struct g_part_table *, struct g_consumer *);
 static int g_part_bsd_resize(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 
 static kobj_method_t g_part_bsd_methods[] = {
 	KOBJMETHOD(g_part_add,		g_part_bsd_add),
 	KOBJMETHOD(g_part_bootcode,	g_part_bsd_bootcode),
 	KOBJMETHOD(g_part_create,	g_part_bsd_create),
 	KOBJMETHOD(g_part_destroy,	g_part_bsd_destroy),
 	KOBJMETHOD(g_part_dumpconf,	g_part_bsd_dumpconf),
 	KOBJMETHOD(g_part_dumpto,	g_part_bsd_dumpto),
 	KOBJMETHOD(g_part_modify,	g_part_bsd_modify),
 	KOBJMETHOD(g_part_resize,	g_part_bsd_resize),
 	KOBJMETHOD(g_part_name,		g_part_bsd_name),
 	KOBJMETHOD(g_part_probe,	g_part_bsd_probe),
 	KOBJMETHOD(g_part_read,		g_part_bsd_read),
 	KOBJMETHOD(g_part_type,		g_part_bsd_type),
 	KOBJMETHOD(g_part_write,	g_part_bsd_write),
 	{ 0, 0 }
 };
 
 static struct g_part_scheme g_part_bsd_scheme = {
 	"BSD",
 	g_part_bsd_methods,
 	sizeof(struct g_part_bsd_table),
 	.gps_entrysz = sizeof(struct g_part_bsd_entry),
 	.gps_minent = 8,
 	.gps_maxent = 20,	/* Only 22 entries fit in 512 byte sectors */
 	.gps_bootcodesz = BBSIZE,
 };
 G_PART_SCHEME_DECLARE(g_part_bsd);
+MODULE_VERSION(geom_part_bsd, 0);
 
 static struct g_part_bsd_alias {
 	uint8_t		type;
 	int		alias;
 } bsd_alias_match[] = {
 	{ FS_BSDFFS,	G_PART_ALIAS_FREEBSD_UFS },
 	{ FS_SWAP,	G_PART_ALIAS_FREEBSD_SWAP },
 	{ FS_ZFS,	G_PART_ALIAS_FREEBSD_ZFS },
 	{ FS_VINUM,	G_PART_ALIAS_FREEBSD_VINUM },
 	{ FS_NANDFS,	G_PART_ALIAS_FREEBSD_NANDFS },
 	{ FS_HAMMER,	G_PART_ALIAS_DFBSD_HAMMER },
 	{ FS_HAMMER2,	G_PART_ALIAS_DFBSD_HAMMER2 },
 };
 
 static int
 bsd_parse_type(const char *type, uint8_t *fstype)
 {
 	const char *alias;
 	char *endp;
 	long lt;
 	int i;
 
 	if (type[0] == '!') {
 		lt = strtol(type + 1, &endp, 0);
 		if (type[1] == '\0' || *endp != '\0' || lt <= 0 || lt >= 256)
 			return (EINVAL);
 		*fstype = (u_int)lt;
 		return (0);
 	}
 	for (i = 0; i < nitems(bsd_alias_match); i++) {
 		alias = g_part_alias_name(bsd_alias_match[i].alias);
 		if (strcasecmp(type, alias) == 0) {
 			*fstype = bsd_alias_match[i].type;
 			return (0);
 		}
 	}
 	return (EINVAL);
 }
 
 static int
 g_part_bsd_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
     struct g_part_parms *gpp)
 {
 	struct g_part_bsd_entry *entry;
 	struct g_part_bsd_table *table;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	entry = (struct g_part_bsd_entry *)baseentry;
 	table = (struct g_part_bsd_table *)basetable;
 
 	entry->part.p_size = gpp->gpp_size;
 	entry->part.p_offset = gpp->gpp_start + table->offset;
 	entry->part.p_fsize = 0;
 	entry->part.p_frag = 0;
 	entry->part.p_cpg = 0;
 	return (bsd_parse_type(gpp->gpp_type, &entry->part.p_fstype));
 }
 
 static int
 g_part_bsd_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_part_bsd_table *table;
 	const u_char *codeptr;
 
 	if (gpp->gpp_codesize != BOOT1_SIZE && gpp->gpp_codesize != BBSIZE)
 		return (ENODEV);
 
 	table = (struct g_part_bsd_table *)basetable;
 	codeptr = gpp->gpp_codeptr;
 	bcopy(codeptr, table->bbarea, BOOT1_SIZE);
 	if (gpp->gpp_codesize == BBSIZE)
 		bcopy(codeptr + BOOT2_OFF, table->bbarea + BOOT2_OFF,
 		    BOOT2_SIZE);
 	return (0);
 }
 
 static int
 g_part_bsd_create(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_provider *pp;
 	struct g_part_entry *baseentry;
 	struct g_part_bsd_entry *entry;
 	struct g_part_bsd_table *table;
 	u_char *ptr;
 	uint32_t msize, ncyls, secpercyl;
 
 	pp = gpp->gpp_provider;
 
 	if (pp->sectorsize < sizeof(struct disklabel))
 		return (ENOSPC);
 	if (BBSIZE % pp->sectorsize)
 		return (ENOTBLK);
 
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 	secpercyl = basetable->gpt_sectors * basetable->gpt_heads;
 	ncyls = msize / secpercyl;
 
 	table = (struct g_part_bsd_table *)basetable;
 	table->bbarea = g_malloc(BBSIZE, M_WAITOK | M_ZERO);
 	ptr = table->bbarea + pp->sectorsize;
 
 	le32enc(ptr + 0, DISKMAGIC);			/* d_magic */
 	le32enc(ptr + 40, pp->sectorsize);		/* d_secsize */
 	le32enc(ptr + 44, basetable->gpt_sectors);	/* d_nsectors */
 	le32enc(ptr + 48, basetable->gpt_heads);	/* d_ntracks */
 	le32enc(ptr + 52, ncyls);			/* d_ncylinders */
 	le32enc(ptr + 56, secpercyl);			/* d_secpercyl */
 	le32enc(ptr + 60, msize);			/* d_secperunit */
 	le16enc(ptr + 72, 3600);			/* d_rpm */
 	le32enc(ptr + 132, DISKMAGIC);			/* d_magic2 */
 	le16enc(ptr + 138, basetable->gpt_entries);	/* d_npartitions */
 	le32enc(ptr + 140, BBSIZE);			/* d_bbsize */
 
 	basetable->gpt_first = 0;
 	basetable->gpt_last = msize - 1;
 	basetable->gpt_isleaf = 1;
 
 	baseentry = g_part_new_entry(basetable, RAW_PART + 1,
 	    basetable->gpt_first, basetable->gpt_last);
 	baseentry->gpe_internal = 1;
 	entry = (struct g_part_bsd_entry *)baseentry;
 	entry->part.p_size = basetable->gpt_last + 1;
 	entry->part.p_offset = table->offset;
 
 	return (0);
 }
 
 static int
 g_part_bsd_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_part_bsd_table *table;
 
 	table = (struct g_part_bsd_table *)basetable;
 	if (table->bbarea != NULL)
 		g_free(table->bbarea);
 	table->bbarea = NULL;
 
 	/* Wipe the second sector to clear the partitioning. */
 	basetable->gpt_smhead |= 2;
 	return (0);
 }
 
 static void
 g_part_bsd_dumpconf(struct g_part_table *table, struct g_part_entry *baseentry,
     struct sbuf *sb, const char *indent)
 {
 	struct g_part_bsd_entry *entry;
 
 	entry = (struct g_part_bsd_entry *)baseentry;
 	if (indent == NULL) {
 		/* conftxt: libdisk compatibility */
 		sbuf_printf(sb, " xs BSD xt %u", entry->part.p_fstype);
 	} else if (entry != NULL) {
 		/* confxml: partition entry information */
 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
 		    entry->part.p_fstype);
 	} else {
 		/* confxml: scheme information */
 	}
 }
 
 static int
 g_part_bsd_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
 {
 	struct g_part_bsd_entry *entry;
 
 	/* Allow dumping to a swap partition or an unused partition. */
 	entry = (struct g_part_bsd_entry *)baseentry;
 	return ((entry->part.p_fstype == FS_UNUSED ||
 	    entry->part.p_fstype == FS_SWAP) ? 1 : 0);
 }
 
 static int
 g_part_bsd_modify(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_bsd_entry *entry;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	entry = (struct g_part_bsd_entry *)baseentry;
 	if (gpp->gpp_parms & G_PART_PARM_TYPE)
 		return (bsd_parse_type(gpp->gpp_type, &entry->part.p_fstype));
 	return (0);
 }
 
 static void
 bsd_set_rawsize(struct g_part_table *basetable, struct g_provider *pp)
 {
 	struct g_part_bsd_table *table;
 	struct g_part_bsd_entry *entry;
 	struct g_part_entry *baseentry;
 	uint32_t msize;
 
 	table = (struct g_part_bsd_table *)basetable;
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 	le32enc(table->bbarea + pp->sectorsize + 60, msize); /* d_secperunit */
 	basetable->gpt_last = msize - 1;
 	LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) {
 		if (baseentry->gpe_index != RAW_PART + 1)
 			continue;
 		baseentry->gpe_end = basetable->gpt_last;
 		entry = (struct g_part_bsd_entry *)baseentry;
 		entry->part.p_size = msize;
 		return;
 	}
 }
 
 static int
 g_part_bsd_resize(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_bsd_entry *entry;
 	struct g_provider *pp;
 
 	if (baseentry == NULL) {
 		pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 		bsd_set_rawsize(basetable, pp);
 		return (0);
 	}
 	entry = (struct g_part_bsd_entry *)baseentry;
 	baseentry->gpe_end = baseentry->gpe_start + gpp->gpp_size - 1;
 	entry->part.p_size = gpp->gpp_size;
 
 	return (0);
 }
 
 static const char *
 g_part_bsd_name(struct g_part_table *table, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 
 	snprintf(buf, bufsz, "%c", 'a' + baseentry->gpe_index - 1);
 	return (buf);
 }
 
 static int
 g_part_bsd_probe(struct g_part_table *table, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	uint32_t magic1, magic2;
 	int error;
 
 	pp = cp->provider;
 
 	/* Sanity-check the provider. */
 	if (pp->sectorsize < sizeof(struct disklabel) ||
 	    pp->mediasize < BBSIZE)
 		return (ENOSPC);
 	if (BBSIZE % pp->sectorsize)
 		return (ENOTBLK);
 
 	/* Check that there's a disklabel. */
 	buf = g_read_data(cp, pp->sectorsize, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 	magic1 = le32dec(buf + 0);
 	magic2 = le32dec(buf + 132);
 	g_free(buf);
 	return ((magic1 == DISKMAGIC && magic2 == DISKMAGIC)
 	    ? G_PART_PROBE_PRI_HIGH : ENXIO);
 }
 
 static int
 g_part_bsd_read(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct g_part_bsd_table *table;
 	struct g_part_entry *baseentry;
 	struct g_part_bsd_entry *entry;
 	struct partition part;
 	u_char *buf, *p;
 	off_t chs, msize;
 	u_int sectors, heads;
 	int error, index;
 
 	pp = cp->provider;
 	table = (struct g_part_bsd_table *)basetable;
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 
 	table->bbarea = g_read_data(cp, 0, BBSIZE, &error);
 	if (table->bbarea == NULL)
 		return (error);
 
 	buf = table->bbarea + pp->sectorsize;
 
 	if (le32dec(buf + 40) != pp->sectorsize)
 		goto invalid_label;
 	sectors = le32dec(buf + 44);
 	if (sectors < 1 || sectors > 255)
 		goto invalid_label;
 	if (sectors != basetable->gpt_sectors && !basetable->gpt_fixgeom) {
 		g_part_geometry_heads(msize, sectors, &chs, &heads);
 		if (chs != 0) {
 			basetable->gpt_sectors = sectors;
 			basetable->gpt_heads = heads;
 		}
 	}
 	heads = le32dec(buf + 48);
 	if (heads < 1 || heads > 255)
 		goto invalid_label;
 	if (heads != basetable->gpt_heads && !basetable->gpt_fixgeom)
 		basetable->gpt_heads = heads;
 
 	chs = le32dec(buf + 60);
 	if (chs < 1)
 		goto invalid_label;
 	/* Fix-up a sysinstall bug. */
 	if (chs > msize) {
 		chs = msize;
 		le32enc(buf + 60, msize);
 	}
 
 	basetable->gpt_first = 0;
 	basetable->gpt_last = msize - 1;
 	basetable->gpt_isleaf = 1;
 
 	basetable->gpt_entries = le16dec(buf + 138);
 	if (basetable->gpt_entries < g_part_bsd_scheme.gps_minent ||
 	    basetable->gpt_entries > g_part_bsd_scheme.gps_maxent)
 		goto invalid_label;
 
 	table->offset = le32dec(buf + 148 + RAW_PART * 16 + 4);
 	for (index = basetable->gpt_entries - 1; index >= 0; index--) {
 		p = buf + 148 + index * 16;
 		part.p_size = le32dec(p + 0);
 		part.p_offset = le32dec(p + 4);
 		part.p_fsize = le32dec(p + 8);
 		part.p_fstype = p[12];
 		part.p_frag = p[13];
 		part.p_cpg = le16dec(p + 14);
 		if (part.p_size == 0)
 			continue;
 		if (part.p_offset < table->offset)
 			continue;
 		if (part.p_offset - table->offset > basetable->gpt_last)
 			goto invalid_label;
 		baseentry = g_part_new_entry(basetable, index + 1,
 		    part.p_offset - table->offset,
 		    part.p_offset - table->offset + part.p_size - 1);
 		entry = (struct g_part_bsd_entry *)baseentry;
 		entry->part = part;
 		if (index == RAW_PART)
 			baseentry->gpe_internal = 1;
 	}
 
 	return (0);
 
  invalid_label:
 	printf("GEOM: %s: invalid disklabel.\n", pp->name);
 	g_free(table->bbarea);
 	table->bbarea = NULL;
 	return (EINVAL);
 }
 
 static const char *
 g_part_bsd_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 	struct g_part_bsd_entry *entry;
 	int type;
 
 	entry = (struct g_part_bsd_entry *)baseentry;
 	type = entry->part.p_fstype;
 	if (type == FS_NANDFS)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_NANDFS));
 	if (type == FS_SWAP)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_SWAP));
 	if (type == FS_BSDFFS)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_UFS));
 	if (type == FS_VINUM)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_VINUM));
 	if (type == FS_ZFS)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_ZFS));
 	snprintf(buf, bufsz, "!%d", type);
 	return (buf);
 }
 
 static int
 g_part_bsd_write(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct g_part_entry *baseentry;
 	struct g_part_bsd_entry *entry;
 	struct g_part_bsd_table *table;
 	uint16_t sum;
 	u_char *label, *p, *pe;
 	int error, index;
 
 	pp = cp->provider;
 	table = (struct g_part_bsd_table *)basetable;
 	baseentry = LIST_FIRST(&basetable->gpt_entry);
 	label = table->bbarea + pp->sectorsize;
 	for (index = 1; index <= basetable->gpt_entries; index++) {
 		p = label + 148 + (index - 1) * 16;
 		entry = (baseentry != NULL && index == baseentry->gpe_index)
 		    ? (struct g_part_bsd_entry *)baseentry : NULL;
 		if (entry != NULL && !baseentry->gpe_deleted) {
 			le32enc(p + 0, entry->part.p_size);
 			le32enc(p + 4, entry->part.p_offset);
 			le32enc(p + 8, entry->part.p_fsize);
 			p[12] = entry->part.p_fstype;
 			p[13] = entry->part.p_frag;
 			le16enc(p + 14, entry->part.p_cpg);
 		} else
 			bzero(p, 16);
 
 		if (entry != NULL)
 			baseentry = LIST_NEXT(baseentry, gpe_entry);
 	}
 
 	/* Calculate checksum. */
 	le16enc(label + 136, 0);
 	pe = label + 148 + basetable->gpt_entries * 16;
 	sum = 0;
 	for (p = label; p < pe; p += 2)
 		sum ^= le16dec(p);
 	le16enc(label + 136, sum);
 
 	error = g_write_data(cp, 0, table->bbarea, BBSIZE);
 	return (error);
 }
Index: stable/11/sys/geom/part/g_part_bsd64.c
===================================================================
--- stable/11/sys/geom/part/g_part_bsd64.c	(revision 332639)
+++ stable/11/sys/geom/part/g_part_bsd64.c	(revision 332640)
@@ -1,664 +1,665 @@
 /*-
  * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/disklabel.h>
 #include <sys/endian.h>
 #include <sys/gpt.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 #include <geom/part/g_part.h>
 
 #include "g_part_if.h"
 
 FEATURE(geom_part_bsd64, "GEOM partitioning class for 64-bit BSD disklabels");
 
 /* XXX: move this to sys/disklabel64.h */
 #define	DISKMAGIC64     ((uint32_t)0xc4464c59)
 #define	MAXPARTITIONS64	16
 #define	RESPARTITIONS64	32
 
 struct disklabel64 {
 	char	  d_reserved0[512];	/* reserved or unused */
 	u_int32_t d_magic;		/* the magic number */
 	u_int32_t d_crc;		/* crc32() d_magic through last part */
 	u_int32_t d_align;		/* partition alignment requirement */
 	u_int32_t d_npartitions;	/* number of partitions */
 	struct uuid d_stor_uuid;	/* unique uuid for label */
 
 	u_int64_t d_total_size;		/* total size incl everything (bytes) */
 	u_int64_t d_bbase;		/* boot area base offset (bytes) */
 					/* boot area is pbase - bbase */
 	u_int64_t d_pbase;		/* first allocatable offset (bytes) */
 	u_int64_t d_pstop;		/* last allocatable offset+1 (bytes) */
 	u_int64_t d_abase;		/* location of backup copy if not 0 */
 
 	u_char	  d_packname[64];
 	u_char    d_reserved[64];
 
 	/*
 	 * Note: offsets are relative to the base of the slice, NOT to
 	 * d_pbase.  Unlike 32 bit disklabels the on-disk format for
 	 * a 64 bit disklabel remains slice-relative.
 	 *
 	 * An uninitialized partition has a p_boffset and p_bsize of 0.
 	 *
 	 * If p_fstype is not supported for a live partition it is set
 	 * to FS_OTHER.  This is typically the case when the filesystem
 	 * is identified by its uuid.
 	 */
 	struct partition64 {		/* the partition table */
 		u_int64_t p_boffset;	/* slice relative offset, in bytes */
 		u_int64_t p_bsize;	/* size of partition, in bytes */
 		u_int8_t  p_fstype;
 		u_int8_t  p_unused01;	/* reserved, must be 0 */
 		u_int8_t  p_unused02;	/* reserved, must be 0 */
 		u_int8_t  p_unused03;	/* reserved, must be 0 */
 		u_int32_t p_unused04;	/* reserved, must be 0 */
 		u_int32_t p_unused05;	/* reserved, must be 0 */
 		u_int32_t p_unused06;	/* reserved, must be 0 */
 		struct uuid p_type_uuid;/* mount type as UUID */
 		struct uuid p_stor_uuid;/* unique uuid for storage */
 	} d_partitions[MAXPARTITIONS64];/* actually may be more */
 };
 
 struct g_part_bsd64_table {
 	struct g_part_table	base;
 
 	uint32_t		d_align;
 	uint64_t		d_bbase;
 	uint64_t		d_abase;
 	struct uuid		d_stor_uuid;
 	char			d_reserved0[512];
 	u_char			d_packname[64];
 	u_char			d_reserved[64];
 };
 
 struct g_part_bsd64_entry {
 	struct g_part_entry	base;
 
 	uint8_t			fstype;
 	struct uuid		type_uuid;
 	struct uuid		stor_uuid;
 };
 
 static int g_part_bsd64_add(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_bsd64_bootcode(struct g_part_table *, struct g_part_parms *);
 static int g_part_bsd64_create(struct g_part_table *, struct g_part_parms *);
 static int g_part_bsd64_destroy(struct g_part_table *, struct g_part_parms *);
 static void g_part_bsd64_dumpconf(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 static int g_part_bsd64_dumpto(struct g_part_table *, struct g_part_entry *);
 static int g_part_bsd64_modify(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static const char *g_part_bsd64_name(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_bsd64_probe(struct g_part_table *, struct g_consumer *);
 static int g_part_bsd64_read(struct g_part_table *, struct g_consumer *);
 static const char *g_part_bsd64_type(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_bsd64_write(struct g_part_table *, struct g_consumer *);
 static int g_part_bsd64_resize(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 
 static kobj_method_t g_part_bsd64_methods[] = {
 	KOBJMETHOD(g_part_add,		g_part_bsd64_add),
 	KOBJMETHOD(g_part_bootcode,	g_part_bsd64_bootcode),
 	KOBJMETHOD(g_part_create,	g_part_bsd64_create),
 	KOBJMETHOD(g_part_destroy,	g_part_bsd64_destroy),
 	KOBJMETHOD(g_part_dumpconf,	g_part_bsd64_dumpconf),
 	KOBJMETHOD(g_part_dumpto,	g_part_bsd64_dumpto),
 	KOBJMETHOD(g_part_modify,	g_part_bsd64_modify),
 	KOBJMETHOD(g_part_resize,	g_part_bsd64_resize),
 	KOBJMETHOD(g_part_name,		g_part_bsd64_name),
 	KOBJMETHOD(g_part_probe,	g_part_bsd64_probe),
 	KOBJMETHOD(g_part_read,		g_part_bsd64_read),
 	KOBJMETHOD(g_part_type,		g_part_bsd64_type),
 	KOBJMETHOD(g_part_write,	g_part_bsd64_write),
 	{ 0, 0 }
 };
 
 static struct g_part_scheme g_part_bsd64_scheme = {
 	"BSD64",
 	g_part_bsd64_methods,
 	sizeof(struct g_part_bsd64_table),
 	.gps_entrysz = sizeof(struct g_part_bsd64_entry),
 	.gps_minent = MAXPARTITIONS64,
 	.gps_maxent = MAXPARTITIONS64
 };
 G_PART_SCHEME_DECLARE(g_part_bsd64);
+MODULE_VERSION(geom_part_bsd64, 0);
 
 #define	EQUUID(a, b)	(memcmp(a, b, sizeof(struct uuid)) == 0)
 static struct uuid bsd64_uuid_unused = GPT_ENT_TYPE_UNUSED;
 static struct uuid bsd64_uuid_dfbsd_swap = GPT_ENT_TYPE_DRAGONFLY_SWAP;
 static struct uuid bsd64_uuid_dfbsd_ufs1 = GPT_ENT_TYPE_DRAGONFLY_UFS1;
 static struct uuid bsd64_uuid_dfbsd_vinum = GPT_ENT_TYPE_DRAGONFLY_VINUM;
 static struct uuid bsd64_uuid_dfbsd_ccd = GPT_ENT_TYPE_DRAGONFLY_CCD;
 static struct uuid bsd64_uuid_dfbsd_legacy = GPT_ENT_TYPE_DRAGONFLY_LEGACY;
 static struct uuid bsd64_uuid_dfbsd_hammer = GPT_ENT_TYPE_DRAGONFLY_HAMMER;
 static struct uuid bsd64_uuid_dfbsd_hammer2 = GPT_ENT_TYPE_DRAGONFLY_HAMMER2;
 static struct uuid bsd64_uuid_freebsd_boot = GPT_ENT_TYPE_FREEBSD_BOOT;
 static struct uuid bsd64_uuid_freebsd_nandfs = GPT_ENT_TYPE_FREEBSD_NANDFS;
 static struct uuid bsd64_uuid_freebsd_swap = GPT_ENT_TYPE_FREEBSD_SWAP;
 static struct uuid bsd64_uuid_freebsd_ufs = GPT_ENT_TYPE_FREEBSD_UFS;
 static struct uuid bsd64_uuid_freebsd_vinum = GPT_ENT_TYPE_FREEBSD_VINUM;
 static struct uuid bsd64_uuid_freebsd_zfs = GPT_ENT_TYPE_FREEBSD_ZFS;
 
 struct bsd64_uuid_alias {
 	struct uuid *uuid;
 	uint8_t fstype;
 	int alias;
 };
 static struct bsd64_uuid_alias dfbsd_alias_match[] = {
 	{ &bsd64_uuid_dfbsd_swap, FS_SWAP, G_PART_ALIAS_DFBSD_SWAP },
 	{ &bsd64_uuid_dfbsd_ufs1, FS_BSDFFS, G_PART_ALIAS_DFBSD_UFS },
 	{ &bsd64_uuid_dfbsd_vinum, FS_VINUM, G_PART_ALIAS_DFBSD_VINUM },
 	{ &bsd64_uuid_dfbsd_ccd, FS_CCD, G_PART_ALIAS_DFBSD_CCD },
 	{ &bsd64_uuid_dfbsd_legacy, FS_OTHER, G_PART_ALIAS_DFBSD_LEGACY },
 	{ &bsd64_uuid_dfbsd_hammer, FS_HAMMER, G_PART_ALIAS_DFBSD_HAMMER },
 	{ &bsd64_uuid_dfbsd_hammer2, FS_HAMMER2, G_PART_ALIAS_DFBSD_HAMMER2 },
 	{ NULL, 0, 0}
 };
 static struct bsd64_uuid_alias fbsd_alias_match[] = {
 	{ &bsd64_uuid_freebsd_boot, FS_OTHER, G_PART_ALIAS_FREEBSD_BOOT },
 	{ &bsd64_uuid_freebsd_swap, FS_OTHER, G_PART_ALIAS_FREEBSD_SWAP },
 	{ &bsd64_uuid_freebsd_ufs, FS_OTHER, G_PART_ALIAS_FREEBSD_UFS },
 	{ &bsd64_uuid_freebsd_zfs, FS_OTHER, G_PART_ALIAS_FREEBSD_ZFS },
 	{ &bsd64_uuid_freebsd_vinum, FS_OTHER, G_PART_ALIAS_FREEBSD_VINUM },
 	{ &bsd64_uuid_freebsd_nandfs, FS_OTHER, G_PART_ALIAS_FREEBSD_NANDFS },
 	{ NULL, 0, 0}
 };
 
 static int
 bsd64_parse_type(const char *type, struct g_part_bsd64_entry *entry)
 {
 	struct uuid tmp;
 	const struct bsd64_uuid_alias *uap;
 	const char *alias;
 	char *p;
 	long lt;
 	int error;
 
 	if (type[0] == '!') {
 		if (type[1] == '\0')
 			return (EINVAL);
 		lt = strtol(type + 1, &p, 0);
 		/* The type specified as number */
 		if (*p == '\0') {
 			if (lt <= 0 || lt > 255)
 				return (EINVAL);
 			entry->fstype = lt;
 			entry->type_uuid = bsd64_uuid_unused;
 			return (0);
 		}
 		/* The type specified as uuid */
 		error = parse_uuid(type + 1, &tmp);
 		if (error != 0)
 			return (error);
 		if (EQUUID(&tmp, &bsd64_uuid_unused))
 			return (EINVAL);
 		for (uap = &dfbsd_alias_match[0]; uap->uuid != NULL; uap++) {
 			if (EQUUID(&tmp, uap->uuid)) {
 				/* Prefer fstype for known uuids */
 				entry->type_uuid = bsd64_uuid_unused;
 				entry->fstype = uap->fstype;
 				return (0);
 			}
 		}
 		entry->type_uuid = tmp;
 		entry->fstype = FS_OTHER;
 		return (0);
 	}
 	/* The type specified as symbolic alias name */
 	for (uap = &fbsd_alias_match[0]; uap->uuid != NULL; uap++) {
 		alias = g_part_alias_name(uap->alias);
 		if (!strcasecmp(type, alias)) {
 			entry->type_uuid = *uap->uuid;
 			entry->fstype = uap->fstype;
 			return (0);
 		}
 	}
 	for (uap = &dfbsd_alias_match[0]; uap->uuid != NULL; uap++) {
 		alias = g_part_alias_name(uap->alias);
 		if (!strcasecmp(type, alias)) {
 			entry->type_uuid = bsd64_uuid_unused;
 			entry->fstype = uap->fstype;
 			return (0);
 		}
 	}
 	return (EINVAL);
 }
 
 static int
 g_part_bsd64_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
     struct g_part_parms *gpp)
 {
 	struct g_part_bsd64_entry *entry;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	entry = (struct g_part_bsd64_entry *)baseentry;
 	if (bsd64_parse_type(gpp->gpp_type, entry) != 0)
 		return (EINVAL);
 	kern_uuidgen(&entry->stor_uuid, 1);
 	return (0);
 }
 
 static int
 g_part_bsd64_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 
 	return (EOPNOTSUPP);
 }
 
 #define	PALIGN_SIZE	(1024 * 1024)
 #define	PALIGN_MASK	(PALIGN_SIZE - 1)
 #define	BLKSIZE		(4 * 1024)
 #define	BOOTSIZE	(32 * 1024)
 #define	DALIGN_SIZE	(32 * 1024)
 static int
 g_part_bsd64_create(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_part_bsd64_table *table;
 	struct g_part_entry *baseentry;
 	struct g_provider *pp;
 	uint64_t blkmask, pbase;
 	uint32_t blksize, ressize;
 
 	pp = gpp->gpp_provider;
 	if (pp->mediasize < 2* PALIGN_SIZE)
 		return (ENOSPC);
 
 	/*
 	 * Use at least 4KB block size. Blksize is stored in the d_align.
 	 * XXX: Actually it is used just for calculate d_bbase and used
 	 * for better alignment in bsdlabel64(8).
 	 */
 	blksize = pp->sectorsize < BLKSIZE ? BLKSIZE: pp->sectorsize;
 	blkmask = blksize - 1;
 	/* Reserve enough space for RESPARTITIONS64 partitions. */
 	ressize = offsetof(struct disklabel64, d_partitions[RESPARTITIONS64]);
 	ressize = (ressize + blkmask) & ~blkmask;
 	/*
 	 * Reserve enough space for bootcode and align first allocatable
 	 * offset to PALIGN_SIZE.
 	 * XXX: Currently DragonFlyBSD has 32KB bootcode, but the size could
 	 * be bigger, because it is possible change it (it is equal pbase-bbase)
 	 * in the bsdlabel64(8).
 	 */
 	pbase = ressize + ((BOOTSIZE + blkmask) & ~blkmask);
 	pbase = (pbase + PALIGN_MASK) & ~PALIGN_MASK;
 	/*
 	 * Take physical offset into account and make first allocatable
 	 * offset 32KB aligned to the start of the physical disk.
 	 * XXX: Actually there are no such restrictions, this is how
 	 * DragonFlyBSD behaves.
 	 */
 	pbase += DALIGN_SIZE - pp->stripeoffset % DALIGN_SIZE;
 
 	table = (struct g_part_bsd64_table *)basetable;
 	table->d_align = blksize;
 	table->d_bbase = ressize / pp->sectorsize;
 	table->d_abase = ((pp->mediasize - ressize) &
 	    ~blkmask) / pp->sectorsize;
 	kern_uuidgen(&table->d_stor_uuid, 1);
 	basetable->gpt_first = pbase / pp->sectorsize;
 	basetable->gpt_last = table->d_abase - 1; /* XXX */
 	/*
 	 * Create 'c' partition and make it internal, so user will not be
 	 * able use it.
 	 */
 	baseentry = g_part_new_entry(basetable, RAW_PART + 1, 0, 0);
 	baseentry->gpe_internal = 1;
 	return (0);
 }
 
 static int
 g_part_bsd64_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_provider *pp;
 
 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 	if (pp->sectorsize > offsetof(struct disklabel64, d_magic))
 		basetable->gpt_smhead |= 1;
 	else
 		basetable->gpt_smhead |= 3;
 	return (0);
 }
 
 static void
 g_part_bsd64_dumpconf(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct sbuf *sb, const char *indent)
 {
 	struct g_part_bsd64_table *table;
 	struct g_part_bsd64_entry *entry;
 	char buf[sizeof(table->d_packname)];
 
 	entry = (struct g_part_bsd64_entry *)baseentry;
 	if (indent == NULL) {
 		/* conftxt: libdisk compatibility */
 		sbuf_printf(sb, " xs BSD64 xt %u", entry->fstype);
 	} else if (entry != NULL) {
 		/* confxml: partition entry information */
 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
 		    entry->fstype);
 		if (!EQUUID(&bsd64_uuid_unused, &entry->type_uuid)) {
 			sbuf_printf(sb, "%s<type_uuid>", indent);
 			sbuf_printf_uuid(sb, &entry->type_uuid);
 			sbuf_printf(sb, "</type_uuid>\n");
 		}
 		sbuf_printf(sb, "%s<stor_uuid>", indent);
 		sbuf_printf_uuid(sb, &entry->stor_uuid);
 		sbuf_printf(sb, "</stor_uuid>\n");
 	} else {
 		/* confxml: scheme information */
 		table = (struct g_part_bsd64_table *)basetable;
 		sbuf_printf(sb, "%s<bootbase>%ju</bootbase>\n", indent,
 		    (uintmax_t)table->d_bbase);
 		if (table->d_abase)
 			sbuf_printf(sb, "%s<backupbase>%ju</backupbase>\n",
 			    indent, (uintmax_t)table->d_abase);
 		sbuf_printf(sb, "%s<stor_uuid>", indent);
 		sbuf_printf_uuid(sb, &table->d_stor_uuid);
 		sbuf_printf(sb, "</stor_uuid>\n");
 		sbuf_printf(sb, "%s<label>", indent);
 		strncpy(buf, table->d_packname, sizeof(buf) - 1);
 		buf[sizeof(buf) - 1] = '\0';
 		g_conf_printf_escaped(sb, "%s", buf);
 		sbuf_printf(sb, "</label>\n");
 	}
 }
 
 static int
 g_part_bsd64_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
 {
 	struct g_part_bsd64_entry *entry;
 
 	/* Allow dumping to a swap partition. */
 	entry = (struct g_part_bsd64_entry *)baseentry;
 	if (entry->fstype == FS_SWAP ||
 	    EQUUID(&entry->type_uuid, &bsd64_uuid_dfbsd_swap) ||
 	    EQUUID(&entry->type_uuid, &bsd64_uuid_freebsd_swap))
 		return (1);
 	return (0);
 }
 
 static int
 g_part_bsd64_modify(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_bsd64_entry *entry;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	entry = (struct g_part_bsd64_entry *)baseentry;
 	if (gpp->gpp_parms & G_PART_PARM_TYPE)
 		return (bsd64_parse_type(gpp->gpp_type, entry));
 	return (0);
 }
 
 static int
 g_part_bsd64_resize(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_bsd64_table *table;
 	struct g_provider *pp;
 
 	if (baseentry == NULL) {
 		pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 		table = (struct g_part_bsd64_table *)basetable;
 		table->d_abase =
 		    rounddown2(pp->mediasize - table->d_bbase * pp->sectorsize,
 		        table->d_align) / pp->sectorsize;
 		basetable->gpt_last = table->d_abase - 1;
 		return (0);
 	}
 	baseentry->gpe_end = baseentry->gpe_start + gpp->gpp_size - 1;
 	return (0);
 }
 
 static const char *
 g_part_bsd64_name(struct g_part_table *table, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 
 	snprintf(buf, bufsz, "%c", 'a' + baseentry->gpe_index - 1);
 	return (buf);
 }
 
 static int
 g_part_bsd64_probe(struct g_part_table *table, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	uint32_t v;
 	int error;
 	u_char *buf;
 
 	pp = cp->provider;
 	if (pp->mediasize < 2 * PALIGN_SIZE)
 		return (ENOSPC);
 	v = rounddown2(pp->sectorsize + offsetof(struct disklabel64, d_magic),
 		       pp->sectorsize);
 	buf = g_read_data(cp, 0, v, &error);
 	if (buf == NULL)
 		return (error);
 	v = le32dec(buf + offsetof(struct disklabel64, d_magic));
 	g_free(buf);
 	return (v == DISKMAGIC64 ? G_PART_PROBE_PRI_HIGH: ENXIO);
 }
 
 static int
 g_part_bsd64_read(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_part_bsd64_table *table;
 	struct g_part_bsd64_entry *entry;
 	struct g_part_entry *baseentry;
 	struct g_provider *pp;
 	struct disklabel64 *dlp;
 	uint64_t v64, sz;
 	uint32_t v32;
 	int error, index;
 	u_char *buf;
 
 	pp = cp->provider;
 	table = (struct g_part_bsd64_table *)basetable;
 	v32 = roundup2(sizeof(struct disklabel64), pp->sectorsize);
 	buf = g_read_data(cp, 0, v32, &error);
 	if (buf == NULL)
 		return (error);
 
 	dlp = (struct disklabel64 *)buf;
 	basetable->gpt_entries = le32toh(dlp->d_npartitions);
 	if (basetable->gpt_entries > MAXPARTITIONS64 ||
 	    basetable->gpt_entries < 1)
 		goto invalid_label;
 	v32 = le32toh(dlp->d_crc);
 	dlp->d_crc = 0;
 	if (crc32(&dlp->d_magic, offsetof(struct disklabel64,
 	    d_partitions[basetable->gpt_entries]) -
 	    offsetof(struct disklabel64, d_magic)) != v32)
 		goto invalid_label;
 	table->d_align = le32toh(dlp->d_align);
 	if (table->d_align == 0 || (table->d_align & (pp->sectorsize - 1)))
 		goto invalid_label;
 	if (le64toh(dlp->d_total_size) > pp->mediasize)
 		goto invalid_label;
 	v64 = le64toh(dlp->d_pbase);
 	if (v64 % pp->sectorsize)
 		goto invalid_label;
 	basetable->gpt_first = v64 / pp->sectorsize;
 	v64 = le64toh(dlp->d_pstop);
 	if (v64 % pp->sectorsize)
 		goto invalid_label;
 	basetable->gpt_last = v64 / pp->sectorsize;
 	basetable->gpt_isleaf = 1;
 	v64 = le64toh(dlp->d_bbase);
 	if (v64 % pp->sectorsize)
 		goto invalid_label;
 	table->d_bbase = v64 / pp->sectorsize;
 	v64 = le64toh(dlp->d_abase);
 	if (v64 % pp->sectorsize)
 		goto invalid_label;
 	table->d_abase = v64 / pp->sectorsize;
 	le_uuid_dec(&dlp->d_stor_uuid, &table->d_stor_uuid);
 	for (index = basetable->gpt_entries - 1; index >= 0; index--) {
 		if (index == RAW_PART) {
 			/* Skip 'c' partition. */
 			baseentry = g_part_new_entry(basetable,
 			    index + 1, 0, 0);
 			baseentry->gpe_internal = 1;
 			continue;
 		}
 		v64 = le64toh(dlp->d_partitions[index].p_boffset);
 		sz = le64toh(dlp->d_partitions[index].p_bsize);
 		if (sz == 0 && v64 == 0)
 			continue;
 		if (sz == 0 || (v64 % pp->sectorsize) || (sz % pp->sectorsize))
 			goto invalid_label;
 		baseentry = g_part_new_entry(basetable, index + 1,
 		    v64 / pp->sectorsize, (v64 + sz) / pp->sectorsize - 1);
 		entry = (struct g_part_bsd64_entry *)baseentry;
 		le_uuid_dec(&dlp->d_partitions[index].p_type_uuid,
 		    &entry->type_uuid);
 		le_uuid_dec(&dlp->d_partitions[index].p_stor_uuid,
 		    &entry->stor_uuid);
 		entry->fstype = dlp->d_partitions[index].p_fstype;
 	}
 	bcopy(dlp->d_reserved0, table->d_reserved0,
 	    sizeof(table->d_reserved0));
 	bcopy(dlp->d_packname, table->d_packname, sizeof(table->d_packname));
 	bcopy(dlp->d_reserved, table->d_reserved, sizeof(table->d_reserved));
 	g_free(buf);
 	return (0);
 
 invalid_label:
 	g_free(buf);
 	return (EINVAL);
 }
 
 static const char *
 g_part_bsd64_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 	struct g_part_bsd64_entry *entry;
 	struct bsd64_uuid_alias *uap;
 
 	entry = (struct g_part_bsd64_entry *)baseentry;
 	if (entry->fstype != FS_OTHER) {
 		for (uap = &dfbsd_alias_match[0]; uap->uuid != NULL; uap++)
 			if (uap->fstype == entry->fstype)
 				return (g_part_alias_name(uap->alias));
 	} else {
 		for (uap = &fbsd_alias_match[0]; uap->uuid != NULL; uap++)
 			if (EQUUID(uap->uuid, &entry->type_uuid))
 				return (g_part_alias_name(uap->alias));
 		for (uap = &dfbsd_alias_match[0]; uap->uuid != NULL; uap++)
 			if (EQUUID(uap->uuid, &entry->type_uuid))
 				return (g_part_alias_name(uap->alias));
 	}
 	if (EQUUID(&bsd64_uuid_unused, &entry->type_uuid))
 		snprintf(buf, bufsz, "!%d", entry->fstype);
 	else {
 		buf[0] = '!';
 		snprintf_uuid(buf + 1, bufsz - 1, &entry->type_uuid);
 	}
 	return (buf);
 }
 
 static int
 g_part_bsd64_write(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct g_part_entry *baseentry;
 	struct g_part_bsd64_entry *entry;
 	struct g_part_bsd64_table *table;
 	struct disklabel64 *dlp;
 	uint32_t v, sz;
 	int error, index;
 
 	pp = cp->provider;
 	table = (struct g_part_bsd64_table *)basetable;
 	sz = roundup2(sizeof(struct disklabel64), pp->sectorsize);
 	dlp = g_malloc(sz, M_WAITOK | M_ZERO);
 
 	memcpy(dlp->d_reserved0, table->d_reserved0,
 	    sizeof(table->d_reserved0));
 	memcpy(dlp->d_packname, table->d_packname, sizeof(table->d_packname));
 	memcpy(dlp->d_reserved, table->d_reserved, sizeof(table->d_reserved));
 	le32enc(&dlp->d_magic, DISKMAGIC64);
 	le32enc(&dlp->d_align, table->d_align);
 	le32enc(&dlp->d_npartitions, basetable->gpt_entries);
 	le_uuid_enc(&dlp->d_stor_uuid, &table->d_stor_uuid);
 	le64enc(&dlp->d_total_size, pp->mediasize);
 	le64enc(&dlp->d_bbase, table->d_bbase * pp->sectorsize);
 	le64enc(&dlp->d_pbase, basetable->gpt_first * pp->sectorsize);
 	le64enc(&dlp->d_pstop, basetable->gpt_last * pp->sectorsize);
 	le64enc(&dlp->d_abase, table->d_abase * pp->sectorsize);
 
 	LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) {
 		if (baseentry->gpe_deleted)
 			continue;
 		index = baseentry->gpe_index - 1;
 		entry = (struct g_part_bsd64_entry *)baseentry;
 		if (index == RAW_PART)
 			continue;
 		le64enc(&dlp->d_partitions[index].p_boffset,
 		    baseentry->gpe_start * pp->sectorsize);
 		le64enc(&dlp->d_partitions[index].p_bsize, pp->sectorsize *
 		    (baseentry->gpe_end - baseentry->gpe_start + 1));
 		dlp->d_partitions[index].p_fstype = entry->fstype;
 		le_uuid_enc(&dlp->d_partitions[index].p_type_uuid,
 		    &entry->type_uuid);
 		le_uuid_enc(&dlp->d_partitions[index].p_stor_uuid,
 		    &entry->stor_uuid);
 	}
 	/* Calculate checksum. */
 	v = offsetof(struct disklabel64,
 	    d_partitions[basetable->gpt_entries]) -
 	    offsetof(struct disklabel64, d_magic);
 	le32enc(&dlp->d_crc, crc32(&dlp->d_magic, v));
 	error = g_write_data(cp, 0, dlp, sz);
 	g_free(dlp);
 	return (error);
 }
 
Index: stable/11/sys/geom/part/g_part_ebr.c
===================================================================
--- stable/11/sys/geom/part/g_part_ebr.c	(revision 332639)
+++ stable/11/sys/geom/part/g_part_ebr.c	(revision 332640)
@@ -1,694 +1,695 @@
 /*-
  * Copyright (c) 2007-2009 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "opt_geom.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/diskmbr.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <geom/geom.h>
 #include <geom/part/g_part.h>
 
 #include "g_part_if.h"
 
 FEATURE(geom_part_ebr,
     "GEOM partitioning class for extended boot records support");
 #if defined(GEOM_PART_EBR_COMPAT)
 FEATURE(geom_part_ebr_compat,
     "GEOM EBR partitioning class: backward-compatible partition names");
 #endif
 
 #define	EBRSIZE		512
 
 struct g_part_ebr_table {
 	struct g_part_table	base;
 #ifndef GEOM_PART_EBR_COMPAT
 	u_char		ebr[EBRSIZE];
 #endif
 };
 
 struct g_part_ebr_entry {
 	struct g_part_entry	base;
 	struct dos_partition	ent;
 };
 
 static int g_part_ebr_add(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_ebr_create(struct g_part_table *, struct g_part_parms *);
 static int g_part_ebr_destroy(struct g_part_table *, struct g_part_parms *);
 static void g_part_ebr_dumpconf(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 static int g_part_ebr_dumpto(struct g_part_table *, struct g_part_entry *);
 #if defined(GEOM_PART_EBR_COMPAT)
 static void g_part_ebr_fullname(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 #endif
 static int g_part_ebr_modify(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static const char *g_part_ebr_name(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_ebr_precheck(struct g_part_table *, enum g_part_ctl,
     struct g_part_parms *);
 static int g_part_ebr_probe(struct g_part_table *, struct g_consumer *);
 static int g_part_ebr_read(struct g_part_table *, struct g_consumer *);
 static int g_part_ebr_setunset(struct g_part_table *, struct g_part_entry *,
     const char *, unsigned int);
 static const char *g_part_ebr_type(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_ebr_write(struct g_part_table *, struct g_consumer *);
 static int g_part_ebr_resize(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 
 static kobj_method_t g_part_ebr_methods[] = {
 	KOBJMETHOD(g_part_add,		g_part_ebr_add),
 	KOBJMETHOD(g_part_create,	g_part_ebr_create),
 	KOBJMETHOD(g_part_destroy,	g_part_ebr_destroy),
 	KOBJMETHOD(g_part_dumpconf,	g_part_ebr_dumpconf),
 	KOBJMETHOD(g_part_dumpto,	g_part_ebr_dumpto),
 #if defined(GEOM_PART_EBR_COMPAT)
 	KOBJMETHOD(g_part_fullname,	g_part_ebr_fullname),
 #endif
 	KOBJMETHOD(g_part_modify,	g_part_ebr_modify),
 	KOBJMETHOD(g_part_name,		g_part_ebr_name),
 	KOBJMETHOD(g_part_precheck,	g_part_ebr_precheck),
 	KOBJMETHOD(g_part_probe,	g_part_ebr_probe),
 	KOBJMETHOD(g_part_read,		g_part_ebr_read),
 	KOBJMETHOD(g_part_resize,	g_part_ebr_resize),
 	KOBJMETHOD(g_part_setunset,	g_part_ebr_setunset),
 	KOBJMETHOD(g_part_type,		g_part_ebr_type),
 	KOBJMETHOD(g_part_write,	g_part_ebr_write),
 	{ 0, 0 }
 };
 
 static struct g_part_scheme g_part_ebr_scheme = {
 	"EBR",
 	g_part_ebr_methods,
 	sizeof(struct g_part_ebr_table),
 	.gps_entrysz = sizeof(struct g_part_ebr_entry),
 	.gps_minent = 1,
 	.gps_maxent = INT_MAX,
 };
 G_PART_SCHEME_DECLARE(g_part_ebr);
+MODULE_VERSION(geom_part_ebr, 0);
 
 static struct g_part_ebr_alias {
 	u_char		typ;
 	int		alias;
 } ebr_alias_match[] = {
 	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
 	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
 	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
 	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
 	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
 	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
 	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
 };
 
 static void ebr_set_chs(struct g_part_table *, uint32_t, u_char *, u_char *,
     u_char *);
 
 static void
 ebr_entry_decode(const char *p, struct dos_partition *ent)
 {
 	ent->dp_flag = p[0];
 	ent->dp_shd = p[1];
 	ent->dp_ssect = p[2];
 	ent->dp_scyl = p[3];
 	ent->dp_typ = p[4];
 	ent->dp_ehd = p[5];
 	ent->dp_esect = p[6];
 	ent->dp_ecyl = p[7];
 	ent->dp_start = le32dec(p + 8);
 	ent->dp_size = le32dec(p + 12);
 }
 
 static void
 ebr_entry_link(struct g_part_table *table, uint32_t start, uint32_t end,
    u_char *buf)
 {
 
 	buf[0] = 0 /* dp_flag */;
 	ebr_set_chs(table, start, &buf[3] /* dp_scyl */, &buf[1] /* dp_shd */,
 	    &buf[2] /* dp_ssect */);
 	buf[4] = 5 /* dp_typ */;
 	ebr_set_chs(table, end, &buf[7] /* dp_ecyl */, &buf[5] /* dp_ehd */,
 	    &buf[6] /* dp_esect */);
 	le32enc(buf + 8, start);
 	le32enc(buf + 12, end - start + 1);
 }
 
 static int
 ebr_parse_type(const char *type, u_char *dp_typ)
 {
 	const char *alias;
 	char *endp;
 	long lt;
 	int i;
 
 	if (type[0] == '!') {
 		lt = strtol(type + 1, &endp, 0);
 		if (type[1] == '\0' || *endp != '\0' || lt <= 0 || lt >= 256)
 			return (EINVAL);
 		*dp_typ = (u_char)lt;
 		return (0);
 	}
 	for (i = 0; i < nitems(ebr_alias_match); i++) {
 		alias = g_part_alias_name(ebr_alias_match[i].alias);
 		if (strcasecmp(type, alias) == 0) {
 			*dp_typ = ebr_alias_match[i].typ;
 			return (0);
 		}
 	}
 	return (EINVAL);
 }
 
 
 static void
 ebr_set_chs(struct g_part_table *table, uint32_t lba, u_char *cylp, u_char *hdp,
     u_char *secp)
 {
 	uint32_t cyl, hd, sec;
 
 	sec = lba % table->gpt_sectors + 1;
 	lba /= table->gpt_sectors;
 	hd = lba % table->gpt_heads;
 	lba /= table->gpt_heads;
 	cyl = lba;
 	if (cyl > 1023)
 		sec = hd = cyl = ~0;
 
 	*cylp = cyl & 0xff;
 	*hdp = hd & 0xff;
 	*secp = (sec & 0x3f) | ((cyl >> 2) & 0xc0);
 }
 
 static int
 ebr_align(struct g_part_table *basetable, uint32_t *start, uint32_t *size)
 {
 	uint32_t sectors;
 
 	sectors = basetable->gpt_sectors;
 	if (*size < 2 * sectors)
 		return (EINVAL);
 	if (*start % sectors) {
 		*size += (*start % sectors) - sectors;
 		*start -= (*start % sectors) - sectors;
 	}
 	if (*size % sectors)
 		*size -= (*size % sectors);
 	if (*size < 2 * sectors)
 		return (EINVAL);
 	return (0);
 }
 
 
 static int
 g_part_ebr_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
     struct g_part_parms *gpp)
 {
 	struct g_provider *pp;
 	struct g_part_ebr_entry *entry;
 	uint32_t start, size;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 	entry = (struct g_part_ebr_entry *)baseentry;
 	start = gpp->gpp_start;
 	size = gpp->gpp_size;
 	if (ebr_align(basetable, &start, &size) != 0)
 		return (EINVAL);
 	if (baseentry->gpe_deleted)
 		bzero(&entry->ent, sizeof(entry->ent));
 
 	KASSERT(baseentry->gpe_start <= start, ("%s", __func__));
 	KASSERT(baseentry->gpe_end >= start + size - 1, ("%s", __func__));
 	baseentry->gpe_index = (start / basetable->gpt_sectors) + 1;
 	baseentry->gpe_offset =
 	    (off_t)(start + basetable->gpt_sectors) * pp->sectorsize;
 	baseentry->gpe_start = start;
 	baseentry->gpe_end = start + size - 1;
 	entry->ent.dp_start = basetable->gpt_sectors;
 	entry->ent.dp_size = size - basetable->gpt_sectors;
 	ebr_set_chs(basetable, entry->ent.dp_start, &entry->ent.dp_scyl,
 	    &entry->ent.dp_shd, &entry->ent.dp_ssect);
 	ebr_set_chs(basetable, baseentry->gpe_end, &entry->ent.dp_ecyl,
 	    &entry->ent.dp_ehd, &entry->ent.dp_esect);
 	return (ebr_parse_type(gpp->gpp_type, &entry->ent.dp_typ));
 }
 
 static int
 g_part_ebr_create(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	char type[64];
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	uint32_t msize;
 	int error;
 
 	pp = gpp->gpp_provider;
 
 	if (pp->sectorsize < EBRSIZE)
 		return (ENOSPC);
 	if (pp->sectorsize > 4096)
 		return (ENXIO);
 
 	/* Check that we have a parent and that it's a MBR. */
 	if (basetable->gpt_depth == 0)
 		return (ENXIO);
 	cp = LIST_FIRST(&pp->consumers);
 	error = g_getattr("PART::scheme", cp, &type);
 	if (error != 0)
 		return (error);
 	if (strcmp(type, "MBR") != 0)
 		return (ENXIO);
 	error = g_getattr("PART::type", cp, &type);
 	if (error != 0)
 		return (error);
 	if (strcmp(type, "ebr") != 0)
 		return (ENXIO);
 
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 	basetable->gpt_first = 0;
 	basetable->gpt_last = msize - 1;
 	basetable->gpt_entries = msize / basetable->gpt_sectors;
 	return (0);
 }
 
 static int
 g_part_ebr_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 
 	/* Wipe the first sector to clear the partitioning. */
 	basetable->gpt_smhead |= 1;
 	return (0);
 }
 
 static void
 g_part_ebr_dumpconf(struct g_part_table *table, struct g_part_entry *baseentry,
     struct sbuf *sb, const char *indent)
 {
 	struct g_part_ebr_entry *entry;
 
 	entry = (struct g_part_ebr_entry *)baseentry;
 	if (indent == NULL) {
 		/* conftxt: libdisk compatibility */
 		sbuf_printf(sb, " xs MBREXT xt %u", entry->ent.dp_typ);
 	} else if (entry != NULL) {
 		/* confxml: partition entry information */
 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
 		    entry->ent.dp_typ);
 		if (entry->ent.dp_flag & 0x80)
 			sbuf_printf(sb, "%s<attrib>active</attrib>\n", indent);
 	} else {
 		/* confxml: scheme information */
 	}
 }
 
 static int
 g_part_ebr_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
 {
 	struct g_part_ebr_entry *entry;
 
 	/* Allow dumping to a FreeBSD partition or Linux swap partition only. */
 	entry = (struct g_part_ebr_entry *)baseentry;
 	return ((entry->ent.dp_typ == DOSPTYP_386BSD ||
 	    entry->ent.dp_typ == DOSPTYP_LINSWP) ? 1 : 0);
 }
 
 #if defined(GEOM_PART_EBR_COMPAT)
 static void
 g_part_ebr_fullname(struct g_part_table *table, struct g_part_entry *entry,
     struct sbuf *sb, const char *pfx)
 {
 	struct g_part_entry *iter;
 	u_int idx;
 
 	idx = 5;
 	LIST_FOREACH(iter, &table->gpt_entry, gpe_entry) {
 		if (iter == entry)
 			break;
 		idx++;
 	}
 	sbuf_printf(sb, "%.*s%u", (int)strlen(pfx) - 1, pfx, idx);
 }
 #endif
 
 static int
 g_part_ebr_modify(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_ebr_entry *entry;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	entry = (struct g_part_ebr_entry *)baseentry;
 	if (gpp->gpp_parms & G_PART_PARM_TYPE)
 		return (ebr_parse_type(gpp->gpp_type, &entry->ent.dp_typ));
 	return (0);
 }
 
 static int
 g_part_ebr_resize(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_provider *pp;
 
 	if (baseentry != NULL)
 		return (EOPNOTSUPP);
 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 	basetable->gpt_last = MIN(pp->mediasize / pp->sectorsize,
 	    UINT32_MAX) - 1;
 	return (0);
 }
 
 static const char *
 g_part_ebr_name(struct g_part_table *table, struct g_part_entry *entry,
     char *buf, size_t bufsz)
 {
 
 	snprintf(buf, bufsz, "+%08u", entry->gpe_index);
 	return (buf);
 }
 
 static int
 g_part_ebr_precheck(struct g_part_table *table, enum g_part_ctl req,
     struct g_part_parms *gpp)
 {
 #if defined(GEOM_PART_EBR_COMPAT)
 	if (req == G_PART_CTL_DESTROY)
 		return (0);
 	return (ECANCELED);
 #else
 	/*
 	 * The index is a function of the start of the partition.
 	 * This is not something the user can override, nor is it
 	 * something the common code will do right. We can set the
 	 * index now so that we get what we need.
 	 */
 	if (req == G_PART_CTL_ADD)
 		gpp->gpp_index = (gpp->gpp_start / table->gpt_sectors) + 1;
 	return (0);
 #endif
 }
 
 static int
 g_part_ebr_probe(struct g_part_table *table, struct g_consumer *cp)
 {
 	char type[64];
 	struct g_provider *pp;
 	u_char *buf, *p;
 	int error, index, res;
 	uint16_t magic;
 
 	pp = cp->provider;
 
 	/* Sanity-check the provider. */
 	if (pp->sectorsize < EBRSIZE || pp->mediasize < pp->sectorsize)
 		return (ENOSPC);
 	if (pp->sectorsize > 4096)
 		return (ENXIO);
 
 	/* Check that we have a parent and that it's a MBR. */
 	if (table->gpt_depth == 0)
 		return (ENXIO);
 	error = g_getattr("PART::scheme", cp, &type);
 	if (error != 0)
 		return (error);
 	if (strcmp(type, "MBR") != 0)
 		return (ENXIO);
 	/* Check that partition has type DOSPTYP_EBR. */
 	error = g_getattr("PART::type", cp, &type);
 	if (error != 0)
 		return (error);
 	if (strcmp(type, "ebr") != 0)
 		return (ENXIO);
 
 	/* Check that there's a EBR. */
 	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 
 	/* We goto out on mismatch. */
 	res = ENXIO;
 
 	magic = le16dec(buf + DOSMAGICOFFSET);
 	if (magic != DOSMAGIC)
 		goto out;
 
 	for (index = 0; index < 2; index++) {
 		p = buf + DOSPARTOFF + index * DOSPARTSIZE;
 		if (p[0] != 0 && p[0] != 0x80)
 			goto out;
 	}
 	res = G_PART_PROBE_PRI_NORM;
 
  out:
 	g_free(buf);
 	return (res);
 }
 
 static int
 g_part_ebr_read(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct dos_partition ent[2];
 	struct g_provider *pp;
 	struct g_part_entry *baseentry;
 	struct g_part_ebr_table *table;
 	struct g_part_ebr_entry *entry;
 	u_char *buf;
 	off_t ofs, msize;
 	u_int lba;
 	int error, index;
 
 	pp = cp->provider;
 	table = (struct g_part_ebr_table *)basetable;
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 
 	lba = 0;
 	while (1) {
 		ofs = (off_t)lba * pp->sectorsize;
 		buf = g_read_data(cp, ofs, pp->sectorsize, &error);
 		if (buf == NULL)
 			return (error);
 
 		ebr_entry_decode(buf + DOSPARTOFF + 0 * DOSPARTSIZE, ent + 0);
 		ebr_entry_decode(buf + DOSPARTOFF + 1 * DOSPARTSIZE, ent + 1);
 
 		/* The 3rd & 4th entries should be zeroes. */
 		if (le64dec(buf + DOSPARTOFF + 2 * DOSPARTSIZE) +
 		    le64dec(buf + DOSPARTOFF + 3 * DOSPARTSIZE) != 0) {
 			basetable->gpt_corrupt = 1;
 			printf("GEOM: %s: invalid entries in the EBR ignored.\n",
 			    pp->name);
 		}
 #ifndef GEOM_PART_EBR_COMPAT
 		/* Save the first EBR, it can contain a boot code */
 		if (lba == 0)
 			bcopy(buf, table->ebr, sizeof(table->ebr));
 #endif
 		g_free(buf);
 
 		if (ent[0].dp_typ == 0)
 			break;
 
 		if (ent[0].dp_typ == 5 && ent[1].dp_typ == 0) {
 			lba = ent[0].dp_start;
 			continue;
 		}
 
 		index = (lba / basetable->gpt_sectors) + 1;
 		baseentry = (struct g_part_entry *)g_part_new_entry(basetable,
 		    index, lba, lba + ent[0].dp_start + ent[0].dp_size - 1);
 		baseentry->gpe_offset = (off_t)(lba + ent[0].dp_start) *
 		    pp->sectorsize;
 		entry = (struct g_part_ebr_entry *)baseentry;
 		entry->ent = ent[0];
 
 		if (ent[1].dp_typ == 0)
 			break;
 
 		lba = ent[1].dp_start;
 	}
 
 	basetable->gpt_entries = msize / basetable->gpt_sectors;
 	basetable->gpt_first = 0;
 	basetable->gpt_last = msize - 1;
 	return (0);
 }
 
 static int
 g_part_ebr_setunset(struct g_part_table *table, struct g_part_entry *baseentry,
     const char *attrib, unsigned int set)
 {
 	struct g_part_entry *iter;
 	struct g_part_ebr_entry *entry;
 	int changed;
 
 	if (baseentry == NULL)
 		return (ENODEV);
 	if (strcasecmp(attrib, "active") != 0)
 		return (EINVAL);
 
 	/* Only one entry can have the active attribute. */
 	LIST_FOREACH(iter, &table->gpt_entry, gpe_entry) {
 		if (iter->gpe_deleted)
 			continue;
 		changed = 0;
 		entry = (struct g_part_ebr_entry *)iter;
 		if (iter == baseentry) {
 			if (set && (entry->ent.dp_flag & 0x80) == 0) {
 				entry->ent.dp_flag |= 0x80;
 				changed = 1;
 			} else if (!set && (entry->ent.dp_flag & 0x80)) {
 				entry->ent.dp_flag &= ~0x80;
 				changed = 1;
 			}
 		} else {
 			if (set && (entry->ent.dp_flag & 0x80)) {
 				entry->ent.dp_flag &= ~0x80;
 				changed = 1;
 			}
 		}
 		if (changed && !iter->gpe_created)
 			iter->gpe_modified = 1;
 	}
 	return (0);
 }
 
 static const char *
 g_part_ebr_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 	struct g_part_ebr_entry *entry;
 	int i;
 
 	entry = (struct g_part_ebr_entry *)baseentry;
 	for (i = 0; i < nitems(ebr_alias_match); i++) {
 		if (ebr_alias_match[i].typ == entry->ent.dp_typ)
 			return (g_part_alias_name(ebr_alias_match[i].alias));
 	}
 	snprintf(buf, bufsz, "!%d", entry->ent.dp_typ);
 	return (buf);
 }
 
 static int
 g_part_ebr_write(struct g_part_table *basetable, struct g_consumer *cp)
 {
 #ifndef GEOM_PART_EBR_COMPAT
 	struct g_part_ebr_table *table;
 #endif
 	struct g_provider *pp;
 	struct g_part_entry *baseentry, *next;
 	struct g_part_ebr_entry *entry;
 	u_char *buf;
 	u_char *p;
 	int error;
 
 	pp = cp->provider;
 	buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO);
 #ifndef GEOM_PART_EBR_COMPAT
 	table = (struct g_part_ebr_table *)basetable;
 	bcopy(table->ebr, buf, DOSPARTOFF);
 #endif
 	le16enc(buf + DOSMAGICOFFSET, DOSMAGIC);
 
 	baseentry = LIST_FIRST(&basetable->gpt_entry);
 	while (baseentry != NULL && baseentry->gpe_deleted)
 		baseentry = LIST_NEXT(baseentry, gpe_entry);
 
 	/* Wipe-out the first EBR when there are no slices. */
 	if (baseentry == NULL) {
 		error = g_write_data(cp, 0, buf, pp->sectorsize);
 		goto out;
 	}
 
 	/*
 	 * If the first partition is not in LBA 0, we need to
 	 * put a "link" EBR in LBA 0.
 	 */
 	if (baseentry->gpe_start != 0) {
 		ebr_entry_link(basetable, (uint32_t)baseentry->gpe_start,
 		    (uint32_t)baseentry->gpe_end, buf + DOSPARTOFF);
 		error = g_write_data(cp, 0, buf, pp->sectorsize);
 		if (error)
 			goto out;
 	}
 
 	do {
 		entry = (struct g_part_ebr_entry *)baseentry;
 
 		p = buf + DOSPARTOFF;
 		p[0] = entry->ent.dp_flag;
 		p[1] = entry->ent.dp_shd;
 		p[2] = entry->ent.dp_ssect;
 		p[3] = entry->ent.dp_scyl;
 		p[4] = entry->ent.dp_typ;
 		p[5] = entry->ent.dp_ehd;
 		p[6] = entry->ent.dp_esect;
 		p[7] = entry->ent.dp_ecyl;
 		le32enc(p + 8, entry->ent.dp_start);
 		le32enc(p + 12, entry->ent.dp_size);
 
 		next = LIST_NEXT(baseentry, gpe_entry);
 		while (next != NULL && next->gpe_deleted)
 			next = LIST_NEXT(next, gpe_entry);
 
 		p += DOSPARTSIZE;
 		if (next != NULL)
 			ebr_entry_link(basetable, (uint32_t)next->gpe_start,
 			    (uint32_t)next->gpe_end, p);
 		else
 			bzero(p, DOSPARTSIZE);
 
 		error = g_write_data(cp, baseentry->gpe_start * pp->sectorsize,
 		    buf, pp->sectorsize);
 #ifndef GEOM_PART_EBR_COMPAT
 		if (baseentry->gpe_start == 0)
 			bzero(buf, DOSPARTOFF);
 #endif
 		baseentry = next;
 	} while (!error && baseentry != NULL);
 
  out:
 	g_free(buf);
 	return (error);
 }
Index: stable/11/sys/geom/part/g_part_gpt.c
===================================================================
--- stable/11/sys/geom/part/g_part_gpt.c	(revision 332639)
+++ stable/11/sys/geom/part/g_part_gpt.c	(revision 332640)
@@ -1,1398 +1,1399 @@
 /*-
  * Copyright (c) 2002, 2005-2007, 2011 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/diskmbr.h>
 #include <sys/endian.h>
 #include <sys/gpt.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/uuid.h>
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 #include <geom/part/g_part.h>
 
 #include "g_part_if.h"
 
 FEATURE(geom_part_gpt, "GEOM partitioning class for GPT partitions support");
 
 CTASSERT(offsetof(struct gpt_hdr, padding) == 92);
 CTASSERT(sizeof(struct gpt_ent) == 128);
 
 #define	EQUUID(a,b)	(memcmp(a, b, sizeof(struct uuid)) == 0)
 
 #define	MBRSIZE		512
 
 enum gpt_elt {
 	GPT_ELT_PRIHDR,
 	GPT_ELT_PRITBL,
 	GPT_ELT_SECHDR,
 	GPT_ELT_SECTBL,
 	GPT_ELT_COUNT
 };
 
 enum gpt_state {
 	GPT_STATE_UNKNOWN,	/* Not determined. */
 	GPT_STATE_MISSING,	/* No signature found. */
 	GPT_STATE_CORRUPT,	/* Checksum mismatch. */
 	GPT_STATE_INVALID,	/* Nonconformant/invalid. */
 	GPT_STATE_OK		/* Perfectly fine. */
 };
 
 struct g_part_gpt_table {
 	struct g_part_table	base;
 	u_char			mbr[MBRSIZE];
 	struct gpt_hdr		*hdr;
 	quad_t			lba[GPT_ELT_COUNT];
 	enum gpt_state		state[GPT_ELT_COUNT];
 	int			bootcamp;
 };
 
 struct g_part_gpt_entry {
 	struct g_part_entry	base;
 	struct gpt_ent		ent;
 };
 
 static void g_gpt_printf_utf16(struct sbuf *, uint16_t *, size_t);
 static void g_gpt_utf8_to_utf16(const uint8_t *, uint16_t *, size_t);
 static void g_gpt_set_defaults(struct g_part_table *, struct g_provider *);
 
 static int g_part_gpt_add(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_gpt_bootcode(struct g_part_table *, struct g_part_parms *);
 static int g_part_gpt_create(struct g_part_table *, struct g_part_parms *);
 static int g_part_gpt_destroy(struct g_part_table *, struct g_part_parms *);
 static void g_part_gpt_dumpconf(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 static int g_part_gpt_dumpto(struct g_part_table *, struct g_part_entry *);
 static int g_part_gpt_modify(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static const char *g_part_gpt_name(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_gpt_probe(struct g_part_table *, struct g_consumer *);
 static int g_part_gpt_read(struct g_part_table *, struct g_consumer *);
 static int g_part_gpt_setunset(struct g_part_table *table,
     struct g_part_entry *baseentry, const char *attrib, unsigned int set);
 static const char *g_part_gpt_type(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_gpt_write(struct g_part_table *, struct g_consumer *);
 static int g_part_gpt_resize(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_gpt_recover(struct g_part_table *);
 
 static kobj_method_t g_part_gpt_methods[] = {
 	KOBJMETHOD(g_part_add,		g_part_gpt_add),
 	KOBJMETHOD(g_part_bootcode,	g_part_gpt_bootcode),
 	KOBJMETHOD(g_part_create,	g_part_gpt_create),
 	KOBJMETHOD(g_part_destroy,	g_part_gpt_destroy),
 	KOBJMETHOD(g_part_dumpconf,	g_part_gpt_dumpconf),
 	KOBJMETHOD(g_part_dumpto,	g_part_gpt_dumpto),
 	KOBJMETHOD(g_part_modify,	g_part_gpt_modify),
 	KOBJMETHOD(g_part_resize,	g_part_gpt_resize),
 	KOBJMETHOD(g_part_name,		g_part_gpt_name),
 	KOBJMETHOD(g_part_probe,	g_part_gpt_probe),
 	KOBJMETHOD(g_part_read,		g_part_gpt_read),
 	KOBJMETHOD(g_part_recover,	g_part_gpt_recover),
 	KOBJMETHOD(g_part_setunset,	g_part_gpt_setunset),
 	KOBJMETHOD(g_part_type,		g_part_gpt_type),
 	KOBJMETHOD(g_part_write,	g_part_gpt_write),
 	{ 0, 0 }
 };
 
 static struct g_part_scheme g_part_gpt_scheme = {
 	"GPT",
 	g_part_gpt_methods,
 	sizeof(struct g_part_gpt_table),
 	.gps_entrysz = sizeof(struct g_part_gpt_entry),
 	.gps_minent = 128,
 	.gps_maxent = 4096,
 	.gps_bootcodesz = MBRSIZE,
 };
 G_PART_SCHEME_DECLARE(g_part_gpt);
+MODULE_VERSION(geom_part_gpt, 0);
 
 static struct uuid gpt_uuid_apple_boot = GPT_ENT_TYPE_APPLE_BOOT;
 static struct uuid gpt_uuid_apple_core_storage =
     GPT_ENT_TYPE_APPLE_CORE_STORAGE;
 static struct uuid gpt_uuid_apple_hfs = GPT_ENT_TYPE_APPLE_HFS;
 static struct uuid gpt_uuid_apple_label = GPT_ENT_TYPE_APPLE_LABEL;
 static struct uuid gpt_uuid_apple_raid = GPT_ENT_TYPE_APPLE_RAID;
 static struct uuid gpt_uuid_apple_raid_offline = GPT_ENT_TYPE_APPLE_RAID_OFFLINE;
 static struct uuid gpt_uuid_apple_tv_recovery = GPT_ENT_TYPE_APPLE_TV_RECOVERY;
 static struct uuid gpt_uuid_apple_ufs = GPT_ENT_TYPE_APPLE_UFS;
 static struct uuid gpt_uuid_bios_boot = GPT_ENT_TYPE_BIOS_BOOT;
 static struct uuid gpt_uuid_chromeos_firmware = GPT_ENT_TYPE_CHROMEOS_FIRMWARE;
 static struct uuid gpt_uuid_chromeos_kernel = GPT_ENT_TYPE_CHROMEOS_KERNEL;
 static struct uuid gpt_uuid_chromeos_reserved = GPT_ENT_TYPE_CHROMEOS_RESERVED;
 static struct uuid gpt_uuid_chromeos_root = GPT_ENT_TYPE_CHROMEOS_ROOT;
 static struct uuid gpt_uuid_dfbsd_ccd = GPT_ENT_TYPE_DRAGONFLY_CCD;
 static struct uuid gpt_uuid_dfbsd_hammer = GPT_ENT_TYPE_DRAGONFLY_HAMMER;
 static struct uuid gpt_uuid_dfbsd_hammer2 = GPT_ENT_TYPE_DRAGONFLY_HAMMER2;
 static struct uuid gpt_uuid_dfbsd_label32 = GPT_ENT_TYPE_DRAGONFLY_LABEL32;
 static struct uuid gpt_uuid_dfbsd_label64 = GPT_ENT_TYPE_DRAGONFLY_LABEL64;
 static struct uuid gpt_uuid_dfbsd_legacy = GPT_ENT_TYPE_DRAGONFLY_LEGACY;
 static struct uuid gpt_uuid_dfbsd_swap = GPT_ENT_TYPE_DRAGONFLY_SWAP;
 static struct uuid gpt_uuid_dfbsd_ufs1 = GPT_ENT_TYPE_DRAGONFLY_UFS1;
 static struct uuid gpt_uuid_dfbsd_vinum = GPT_ENT_TYPE_DRAGONFLY_VINUM;
 static struct uuid gpt_uuid_efi = GPT_ENT_TYPE_EFI;
 static struct uuid gpt_uuid_freebsd = GPT_ENT_TYPE_FREEBSD;
 static struct uuid gpt_uuid_freebsd_boot = GPT_ENT_TYPE_FREEBSD_BOOT;
 static struct uuid gpt_uuid_freebsd_nandfs = GPT_ENT_TYPE_FREEBSD_NANDFS;
 static struct uuid gpt_uuid_freebsd_swap = GPT_ENT_TYPE_FREEBSD_SWAP;
 static struct uuid gpt_uuid_freebsd_ufs = GPT_ENT_TYPE_FREEBSD_UFS;
 static struct uuid gpt_uuid_freebsd_vinum = GPT_ENT_TYPE_FREEBSD_VINUM;
 static struct uuid gpt_uuid_freebsd_zfs = GPT_ENT_TYPE_FREEBSD_ZFS;
 static struct uuid gpt_uuid_linux_data = GPT_ENT_TYPE_LINUX_DATA;
 static struct uuid gpt_uuid_linux_lvm = GPT_ENT_TYPE_LINUX_LVM;
 static struct uuid gpt_uuid_linux_raid = GPT_ENT_TYPE_LINUX_RAID;
 static struct uuid gpt_uuid_linux_swap = GPT_ENT_TYPE_LINUX_SWAP;
 static struct uuid gpt_uuid_mbr = GPT_ENT_TYPE_MBR;
 static struct uuid gpt_uuid_ms_basic_data = GPT_ENT_TYPE_MS_BASIC_DATA;
 static struct uuid gpt_uuid_ms_ldm_data = GPT_ENT_TYPE_MS_LDM_DATA;
 static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA;
 static struct uuid gpt_uuid_ms_recovery = GPT_ENT_TYPE_MS_RECOVERY;
 static struct uuid gpt_uuid_ms_reserved = GPT_ENT_TYPE_MS_RESERVED;
 static struct uuid gpt_uuid_ms_spaces = GPT_ENT_TYPE_MS_SPACES;
 static struct uuid gpt_uuid_netbsd_ccd = GPT_ENT_TYPE_NETBSD_CCD;
 static struct uuid gpt_uuid_netbsd_cgd = GPT_ENT_TYPE_NETBSD_CGD;
 static struct uuid gpt_uuid_netbsd_ffs = GPT_ENT_TYPE_NETBSD_FFS;
 static struct uuid gpt_uuid_netbsd_lfs = GPT_ENT_TYPE_NETBSD_LFS;
 static struct uuid gpt_uuid_netbsd_raid = GPT_ENT_TYPE_NETBSD_RAID;
 static struct uuid gpt_uuid_netbsd_swap = GPT_ENT_TYPE_NETBSD_SWAP;
 static struct uuid gpt_uuid_openbsd_data = GPT_ENT_TYPE_OPENBSD_DATA;
 static struct uuid gpt_uuid_prep_boot = GPT_ENT_TYPE_PREP_BOOT;
 static struct uuid gpt_uuid_unused = GPT_ENT_TYPE_UNUSED;
 static struct uuid gpt_uuid_vmfs = GPT_ENT_TYPE_VMFS;
 static struct uuid gpt_uuid_vmkdiag = GPT_ENT_TYPE_VMKDIAG;
 static struct uuid gpt_uuid_vmreserved = GPT_ENT_TYPE_VMRESERVED;
 static struct uuid gpt_uuid_vmvsanhdr = GPT_ENT_TYPE_VMVSANHDR;
 
 static struct g_part_uuid_alias {
 	struct uuid *uuid;
 	int alias;
 	int mbrtype;
 } gpt_uuid_alias_match[] = {
 	{ &gpt_uuid_apple_boot,		G_PART_ALIAS_APPLE_BOOT,	 0xab },
 	{ &gpt_uuid_apple_core_storage,	G_PART_ALIAS_APPLE_CORE_STORAGE, 0 },
 	{ &gpt_uuid_apple_hfs,		G_PART_ALIAS_APPLE_HFS,		 0xaf },
 	{ &gpt_uuid_apple_label,	G_PART_ALIAS_APPLE_LABEL,	 0 },
 	{ &gpt_uuid_apple_raid,		G_PART_ALIAS_APPLE_RAID,	 0 },
 	{ &gpt_uuid_apple_raid_offline,	G_PART_ALIAS_APPLE_RAID_OFFLINE, 0 },
 	{ &gpt_uuid_apple_tv_recovery,	G_PART_ALIAS_APPLE_TV_RECOVERY,	 0 },
 	{ &gpt_uuid_apple_ufs,		G_PART_ALIAS_APPLE_UFS,		 0 },
 	{ &gpt_uuid_bios_boot,		G_PART_ALIAS_BIOS_BOOT,		 0 },
 	{ &gpt_uuid_chromeos_firmware,	G_PART_ALIAS_CHROMEOS_FIRMWARE,	 0 },
 	{ &gpt_uuid_chromeos_kernel,	G_PART_ALIAS_CHROMEOS_KERNEL,	 0 },
 	{ &gpt_uuid_chromeos_reserved,	G_PART_ALIAS_CHROMEOS_RESERVED,	 0 },
 	{ &gpt_uuid_chromeos_root,	G_PART_ALIAS_CHROMEOS_ROOT,	 0 },
 	{ &gpt_uuid_dfbsd_ccd,		G_PART_ALIAS_DFBSD_CCD,		 0 },
 	{ &gpt_uuid_dfbsd_hammer,	G_PART_ALIAS_DFBSD_HAMMER,	 0 },
 	{ &gpt_uuid_dfbsd_hammer2,	G_PART_ALIAS_DFBSD_HAMMER2,	 0 },
 	{ &gpt_uuid_dfbsd_label32,	G_PART_ALIAS_DFBSD,		 0xa5 },
 	{ &gpt_uuid_dfbsd_label64,	G_PART_ALIAS_DFBSD64,		 0xa5 },
 	{ &gpt_uuid_dfbsd_legacy,	G_PART_ALIAS_DFBSD_LEGACY,	 0 },
 	{ &gpt_uuid_dfbsd_swap,		G_PART_ALIAS_DFBSD_SWAP,	 0 },
 	{ &gpt_uuid_dfbsd_ufs1,		G_PART_ALIAS_DFBSD_UFS,		 0 },
 	{ &gpt_uuid_dfbsd_vinum,	G_PART_ALIAS_DFBSD_VINUM,	 0 },
 	{ &gpt_uuid_efi, 		G_PART_ALIAS_EFI,		 0xee },
 	{ &gpt_uuid_freebsd,		G_PART_ALIAS_FREEBSD,		 0xa5 },
 	{ &gpt_uuid_freebsd_boot, 	G_PART_ALIAS_FREEBSD_BOOT,	 0 },
 	{ &gpt_uuid_freebsd_nandfs, 	G_PART_ALIAS_FREEBSD_NANDFS,	 0 },
 	{ &gpt_uuid_freebsd_swap,	G_PART_ALIAS_FREEBSD_SWAP,	 0 },
 	{ &gpt_uuid_freebsd_ufs,	G_PART_ALIAS_FREEBSD_UFS,	 0 },
 	{ &gpt_uuid_freebsd_vinum,	G_PART_ALIAS_FREEBSD_VINUM,	 0 },
 	{ &gpt_uuid_freebsd_zfs,	G_PART_ALIAS_FREEBSD_ZFS,	 0 },
 	{ &gpt_uuid_linux_data,		G_PART_ALIAS_LINUX_DATA,	 0x0b },
 	{ &gpt_uuid_linux_lvm,		G_PART_ALIAS_LINUX_LVM,		 0 },
 	{ &gpt_uuid_linux_raid,		G_PART_ALIAS_LINUX_RAID,	 0 },
 	{ &gpt_uuid_linux_swap,		G_PART_ALIAS_LINUX_SWAP,	 0 },
 	{ &gpt_uuid_mbr,		G_PART_ALIAS_MBR,		 0 },
 	{ &gpt_uuid_ms_basic_data,	G_PART_ALIAS_MS_BASIC_DATA,	 0x0b },
 	{ &gpt_uuid_ms_ldm_data,	G_PART_ALIAS_MS_LDM_DATA,	 0 },
 	{ &gpt_uuid_ms_ldm_metadata,	G_PART_ALIAS_MS_LDM_METADATA,	 0 },
 	{ &gpt_uuid_ms_recovery,	G_PART_ALIAS_MS_RECOVERY,	 0 },
 	{ &gpt_uuid_ms_reserved,	G_PART_ALIAS_MS_RESERVED,	 0 },
 	{ &gpt_uuid_ms_spaces,		G_PART_ALIAS_MS_SPACES,		 0 },
 	{ &gpt_uuid_netbsd_ccd,		G_PART_ALIAS_NETBSD_CCD,	 0 },
 	{ &gpt_uuid_netbsd_cgd,		G_PART_ALIAS_NETBSD_CGD,	 0 },
 	{ &gpt_uuid_netbsd_ffs,		G_PART_ALIAS_NETBSD_FFS,	 0 },
 	{ &gpt_uuid_netbsd_lfs,		G_PART_ALIAS_NETBSD_LFS,	 0 },
 	{ &gpt_uuid_netbsd_raid,	G_PART_ALIAS_NETBSD_RAID,	 0 },
 	{ &gpt_uuid_netbsd_swap,	G_PART_ALIAS_NETBSD_SWAP,	 0 },
 	{ &gpt_uuid_openbsd_data,	G_PART_ALIAS_OPENBSD_DATA,	 0 },
 	{ &gpt_uuid_prep_boot,		G_PART_ALIAS_PREP_BOOT,		 0x41 },
 	{ &gpt_uuid_vmfs,		G_PART_ALIAS_VMFS,		 0 },
 	{ &gpt_uuid_vmkdiag,		G_PART_ALIAS_VMKDIAG,		 0 },
 	{ &gpt_uuid_vmreserved,		G_PART_ALIAS_VMRESERVED,	 0 },
 	{ &gpt_uuid_vmvsanhdr,		G_PART_ALIAS_VMVSANHDR,		 0 },
 	{ NULL, 0, 0 }
 };
 
 static int
 gpt_write_mbr_entry(u_char *mbr, int idx, int typ, quad_t start,
     quad_t end)
 {
 
 	if (typ == 0 || start > UINT32_MAX || end > UINT32_MAX)
 		return (EINVAL);
 
 	mbr += DOSPARTOFF + idx * DOSPARTSIZE;
 	mbr[0] = 0;
 	if (start == 1) {
 		/*
 		 * Treat the PMBR partition specially to maximize
 		 * interoperability with BIOSes.
 		 */
 		mbr[1] = mbr[3] = 0;
 		mbr[2] = 2;
 	} else
 		mbr[1] = mbr[2] = mbr[3] = 0xff;
 	mbr[4] = typ;
 	mbr[5] = mbr[6] = mbr[7] = 0xff;
 	le32enc(mbr + 8, (uint32_t)start);
 	le32enc(mbr + 12, (uint32_t)(end - start + 1));
 	return (0);
 }
 
 static int
 gpt_map_type(struct uuid *t)
 {
 	struct g_part_uuid_alias *uap;
 
 	for (uap = &gpt_uuid_alias_match[0]; uap->uuid; uap++) {
 		if (EQUUID(t, uap->uuid))
 			return (uap->mbrtype);
 	}
 	return (0);
 }
 
 static void
 gpt_create_pmbr(struct g_part_gpt_table *table, struct g_provider *pp)
 {
 
 	bzero(table->mbr + DOSPARTOFF, DOSPARTSIZE * NDOSPART);
 	gpt_write_mbr_entry(table->mbr, 0, 0xee, 1,
 	    MIN(pp->mediasize / pp->sectorsize - 1, UINT32_MAX));
 	le16enc(table->mbr + DOSMAGICOFFSET, DOSMAGIC);
 }
 
 /*
  * Under Boot Camp the PMBR partition (type 0xEE) doesn't cover the
  * whole disk anymore. Rather, it covers the GPT table and the EFI
  * system partition only. This way the HFS+ partition and any FAT
  * partitions can be added to the MBR without creating an overlap.
  */
 static int
 gpt_is_bootcamp(struct g_part_gpt_table *table, const char *provname)
 {
 	uint8_t *p;
 
 	p = table->mbr + DOSPARTOFF;
 	if (p[4] != 0xee || le32dec(p + 8) != 1)
 		return (0);
 
 	p += DOSPARTSIZE;
 	if (p[4] != 0xaf)
 		return (0);
 
 	printf("GEOM: %s: enabling Boot Camp\n", provname);
 	return (1);
 }
 
 static void
 gpt_update_bootcamp(struct g_part_table *basetable, struct g_provider *pp)
 {
 	struct g_part_entry *baseentry;
 	struct g_part_gpt_entry *entry;
 	struct g_part_gpt_table *table;
 	int bootable, error, index, slices, typ;
 
 	table = (struct g_part_gpt_table *)basetable;
 
 	bootable = -1;
 	for (index = 0; index < NDOSPART; index++) {
 		if (table->mbr[DOSPARTOFF + DOSPARTSIZE * index])
 			bootable = index;
 	}
 
 	bzero(table->mbr + DOSPARTOFF, DOSPARTSIZE * NDOSPART);
 	slices = 0;
 	LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) {
 		if (baseentry->gpe_deleted)
 			continue;
 		index = baseentry->gpe_index - 1;
 		if (index >= NDOSPART)
 			continue;
 
 		entry = (struct g_part_gpt_entry *)baseentry;
 
 		switch (index) {
 		case 0:	/* This must be the EFI system partition. */
 			if (!EQUUID(&entry->ent.ent_type, &gpt_uuid_efi))
 				goto disable;
 			error = gpt_write_mbr_entry(table->mbr, index, 0xee,
 			    1ull, entry->ent.ent_lba_end);
 			break;
 		case 1:	/* This must be the HFS+ partition. */
 			if (!EQUUID(&entry->ent.ent_type, &gpt_uuid_apple_hfs))
 				goto disable;
 			error = gpt_write_mbr_entry(table->mbr, index, 0xaf,
 			    entry->ent.ent_lba_start, entry->ent.ent_lba_end);
 			break;
 		default:
 			typ = gpt_map_type(&entry->ent.ent_type);
 			error = gpt_write_mbr_entry(table->mbr, index, typ,
 			    entry->ent.ent_lba_start, entry->ent.ent_lba_end);
 			break;
 		}
 		if (error)
 			continue;
 
 		if (index == bootable)
 			table->mbr[DOSPARTOFF + DOSPARTSIZE * index] = 0x80;
 		slices |= 1 << index;
 	}
 	if ((slices & 3) == 3)
 		return;
 
  disable:
 	table->bootcamp = 0;
 	gpt_create_pmbr(table, pp);
 }
 
 static struct gpt_hdr *
 gpt_read_hdr(struct g_part_gpt_table *table, struct g_consumer *cp,
     enum gpt_elt elt)
 {
 	struct gpt_hdr *buf, *hdr;
 	struct g_provider *pp;
 	quad_t lba, last;
 	int error;
 	uint32_t crc, sz;
 
 	pp = cp->provider;
 	last = (pp->mediasize / pp->sectorsize) - 1;
 	table->state[elt] = GPT_STATE_MISSING;
 	/*
 	 * If the primary header is valid look for secondary
 	 * header in AlternateLBA, otherwise in the last medium's LBA.
 	 */
 	if (elt == GPT_ELT_SECHDR) {
 		if (table->state[GPT_ELT_PRIHDR] != GPT_STATE_OK)
 			table->lba[elt] = last;
 	} else
 		table->lba[elt] = 1;
 	buf = g_read_data(cp, table->lba[elt] * pp->sectorsize, pp->sectorsize,
 	    &error);
 	if (buf == NULL)
 		return (NULL);
 	hdr = NULL;
 	if (memcmp(buf->hdr_sig, GPT_HDR_SIG, sizeof(buf->hdr_sig)) != 0)
 		goto fail;
 
 	table->state[elt] = GPT_STATE_CORRUPT;
 	sz = le32toh(buf->hdr_size);
 	if (sz < 92 || sz > pp->sectorsize)
 		goto fail;
 
 	hdr = g_malloc(sz, M_WAITOK | M_ZERO);
 	bcopy(buf, hdr, sz);
 	hdr->hdr_size = sz;
 
 	crc = le32toh(buf->hdr_crc_self);
 	buf->hdr_crc_self = 0;
 	if (crc32(buf, sz) != crc)
 		goto fail;
 	hdr->hdr_crc_self = crc;
 
 	table->state[elt] = GPT_STATE_INVALID;
 	hdr->hdr_revision = le32toh(buf->hdr_revision);
 	if (hdr->hdr_revision < GPT_HDR_REVISION)
 		goto fail;
 	hdr->hdr_lba_self = le64toh(buf->hdr_lba_self);
 	if (hdr->hdr_lba_self != table->lba[elt])
 		goto fail;
 	hdr->hdr_lba_alt = le64toh(buf->hdr_lba_alt);
 	if (hdr->hdr_lba_alt == hdr->hdr_lba_self ||
 	    hdr->hdr_lba_alt > last)
 		goto fail;
 
 	/* Check the managed area. */
 	hdr->hdr_lba_start = le64toh(buf->hdr_lba_start);
 	if (hdr->hdr_lba_start < 2 || hdr->hdr_lba_start >= last)
 		goto fail;
 	hdr->hdr_lba_end = le64toh(buf->hdr_lba_end);
 	if (hdr->hdr_lba_end < hdr->hdr_lba_start || hdr->hdr_lba_end >= last)
 		goto fail;
 
 	/* Check the table location and size of the table. */
 	hdr->hdr_entries = le32toh(buf->hdr_entries);
 	hdr->hdr_entsz = le32toh(buf->hdr_entsz);
 	if (hdr->hdr_entries == 0 || hdr->hdr_entsz < 128 ||
 	    (hdr->hdr_entsz & 7) != 0)
 		goto fail;
 	hdr->hdr_lba_table = le64toh(buf->hdr_lba_table);
 	if (hdr->hdr_lba_table < 2 || hdr->hdr_lba_table >= last)
 		goto fail;
 	if (hdr->hdr_lba_table >= hdr->hdr_lba_start &&
 	    hdr->hdr_lba_table <= hdr->hdr_lba_end)
 		goto fail;
 	lba = hdr->hdr_lba_table +
 	    howmany(hdr->hdr_entries * hdr->hdr_entsz, pp->sectorsize) - 1;
 	if (lba >= last)
 		goto fail;
 	if (lba >= hdr->hdr_lba_start && lba <= hdr->hdr_lba_end)
 		goto fail;
 
 	table->state[elt] = GPT_STATE_OK;
 	le_uuid_dec(&buf->hdr_uuid, &hdr->hdr_uuid);
 	hdr->hdr_crc_table = le32toh(buf->hdr_crc_table);
 
 	/* save LBA for secondary header */
 	if (elt == GPT_ELT_PRIHDR)
 		table->lba[GPT_ELT_SECHDR] = hdr->hdr_lba_alt;
 
 	g_free(buf);
 	return (hdr);
 
  fail:
 	if (hdr != NULL)
 		g_free(hdr);
 	g_free(buf);
 	return (NULL);
 }
 
 static struct gpt_ent *
 gpt_read_tbl(struct g_part_gpt_table *table, struct g_consumer *cp,
     enum gpt_elt elt, struct gpt_hdr *hdr)
 {
 	struct g_provider *pp;
 	struct gpt_ent *ent, *tbl;
 	char *buf, *p;
 	unsigned int idx, sectors, tblsz, size;
 	int error;
 
 	if (hdr == NULL)
 		return (NULL);
 
 	pp = cp->provider;
 	table->lba[elt] = hdr->hdr_lba_table;
 
 	table->state[elt] = GPT_STATE_MISSING;
 	tblsz = hdr->hdr_entries * hdr->hdr_entsz;
 	sectors = howmany(tblsz, pp->sectorsize);
 	buf = g_malloc(sectors * pp->sectorsize, M_WAITOK | M_ZERO);
 	for (idx = 0; idx < sectors; idx += MAXPHYS / pp->sectorsize) {
 		size = (sectors - idx > MAXPHYS / pp->sectorsize) ?  MAXPHYS:
 		    (sectors - idx) * pp->sectorsize;
 		p = g_read_data(cp, (table->lba[elt] + idx) * pp->sectorsize,
 		    size, &error);
 		if (p == NULL) {
 			g_free(buf);
 			return (NULL);
 		}
 		bcopy(p, buf + idx * pp->sectorsize, size);
 		g_free(p);
 	}
 	table->state[elt] = GPT_STATE_CORRUPT;
 	if (crc32(buf, tblsz) != hdr->hdr_crc_table) {
 		g_free(buf);
 		return (NULL);
 	}
 
 	table->state[elt] = GPT_STATE_OK;
 	tbl = g_malloc(hdr->hdr_entries * sizeof(struct gpt_ent),
 	    M_WAITOK | M_ZERO);
 
 	for (idx = 0, ent = tbl, p = buf;
 	     idx < hdr->hdr_entries;
 	     idx++, ent++, p += hdr->hdr_entsz) {
 		le_uuid_dec(p, &ent->ent_type);
 		le_uuid_dec(p + 16, &ent->ent_uuid);
 		ent->ent_lba_start = le64dec(p + 32);
 		ent->ent_lba_end = le64dec(p + 40);
 		ent->ent_attr = le64dec(p + 48);
 		/* Keep UTF-16 in little-endian. */
 		bcopy(p + 56, ent->ent_name, sizeof(ent->ent_name));
 	}
 
 	g_free(buf);
 	return (tbl);
 }
 
 static int
 gpt_matched_hdrs(struct gpt_hdr *pri, struct gpt_hdr *sec)
 {
 
 	if (pri == NULL || sec == NULL)
 		return (0);
 
 	if (!EQUUID(&pri->hdr_uuid, &sec->hdr_uuid))
 		return (0);
 	return ((pri->hdr_revision == sec->hdr_revision &&
 	    pri->hdr_size == sec->hdr_size &&
 	    pri->hdr_lba_start == sec->hdr_lba_start &&
 	    pri->hdr_lba_end == sec->hdr_lba_end &&
 	    pri->hdr_entries == sec->hdr_entries &&
 	    pri->hdr_entsz == sec->hdr_entsz &&
 	    pri->hdr_crc_table == sec->hdr_crc_table) ? 1 : 0);
 }
 
 static int
 gpt_parse_type(const char *type, struct uuid *uuid)
 {
 	struct uuid tmp;
 	const char *alias;
 	int error;
 	struct g_part_uuid_alias *uap;
 
 	if (type[0] == '!') {
 		error = parse_uuid(type + 1, &tmp);
 		if (error)
 			return (error);
 		if (EQUUID(&tmp, &gpt_uuid_unused))
 			return (EINVAL);
 		*uuid = tmp;
 		return (0);
 	}
 	for (uap = &gpt_uuid_alias_match[0]; uap->uuid; uap++) {
 		alias = g_part_alias_name(uap->alias);
 		if (!strcasecmp(type, alias)) {
 			*uuid = *uap->uuid;
 			return (0);
 		}
 	}
 	return (EINVAL);
 }
 
 static int
 g_part_gpt_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
     struct g_part_parms *gpp)
 {
 	struct g_part_gpt_entry *entry;
 	int error;
 
 	entry = (struct g_part_gpt_entry *)baseentry;
 	error = gpt_parse_type(gpp->gpp_type, &entry->ent.ent_type);
 	if (error)
 		return (error);
 	kern_uuidgen(&entry->ent.ent_uuid, 1);
 	entry->ent.ent_lba_start = baseentry->gpe_start;
 	entry->ent.ent_lba_end = baseentry->gpe_end;
 	if (baseentry->gpe_deleted) {
 		entry->ent.ent_attr = 0;
 		bzero(entry->ent.ent_name, sizeof(entry->ent.ent_name));
 	}
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		g_gpt_utf8_to_utf16(gpp->gpp_label, entry->ent.ent_name,
 		    sizeof(entry->ent.ent_name) /
 		    sizeof(entry->ent.ent_name[0]));
 	return (0);
 }
 
 static int
 g_part_gpt_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_part_gpt_table *table;
 	size_t codesz;
 
 	codesz = DOSPARTOFF;
 	table = (struct g_part_gpt_table *)basetable;
 	bzero(table->mbr, codesz);
 	codesz = MIN(codesz, gpp->gpp_codesize);
 	if (codesz > 0)
 		bcopy(gpp->gpp_codeptr, table->mbr, codesz);
 	return (0);
 }
 
 static int
 g_part_gpt_create(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_provider *pp;
 	struct g_part_gpt_table *table;
 	size_t tblsz;
 
 	/* We don't nest, which means that our depth should be 0. */
 	if (basetable->gpt_depth != 0)
 		return (ENXIO);
 
 	table = (struct g_part_gpt_table *)basetable;
 	pp = gpp->gpp_provider;
 	tblsz = howmany(basetable->gpt_entries * sizeof(struct gpt_ent),
 	    pp->sectorsize);
 	if (pp->sectorsize < MBRSIZE ||
 	    pp->mediasize < (3 + 2 * tblsz + basetable->gpt_entries) *
 	    pp->sectorsize)
 		return (ENOSPC);
 
 	gpt_create_pmbr(table, pp);
 
 	/* Allocate space for the header */
 	table->hdr = g_malloc(sizeof(struct gpt_hdr), M_WAITOK | M_ZERO);
 
 	bcopy(GPT_HDR_SIG, table->hdr->hdr_sig, sizeof(table->hdr->hdr_sig));
 	table->hdr->hdr_revision = GPT_HDR_REVISION;
 	table->hdr->hdr_size = offsetof(struct gpt_hdr, padding);
 	kern_uuidgen(&table->hdr->hdr_uuid, 1);
 	table->hdr->hdr_entries = basetable->gpt_entries;
 	table->hdr->hdr_entsz = sizeof(struct gpt_ent);
 
 	g_gpt_set_defaults(basetable, pp);
 	return (0);
 }
 
 static int
 g_part_gpt_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_part_gpt_table *table;
 	struct g_provider *pp;
 
 	table = (struct g_part_gpt_table *)basetable;
 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 	g_free(table->hdr);
 	table->hdr = NULL;
 
 	/*
 	 * Wipe the first 2 sectors and last one to clear the partitioning.
 	 * Wipe sectors only if they have valid metadata.
 	 */
 	if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK)
 		basetable->gpt_smhead |= 3;
 	if (table->state[GPT_ELT_SECHDR] == GPT_STATE_OK &&
 	    table->lba[GPT_ELT_SECHDR] == pp->mediasize / pp->sectorsize - 1)
 		basetable->gpt_smtail |= 1;
 	return (0);
 }
 
 static void
 g_part_gpt_dumpconf(struct g_part_table *table, struct g_part_entry *baseentry,
     struct sbuf *sb, const char *indent)
 {
 	struct g_part_gpt_entry *entry;
 
 	entry = (struct g_part_gpt_entry *)baseentry;
 	if (indent == NULL) {
 		/* conftxt: libdisk compatibility */
 		sbuf_printf(sb, " xs GPT xt ");
 		sbuf_printf_uuid(sb, &entry->ent.ent_type);
 	} else if (entry != NULL) {
 		/* confxml: partition entry information */
 		sbuf_printf(sb, "%s<label>", indent);
 		g_gpt_printf_utf16(sb, entry->ent.ent_name,
 		    sizeof(entry->ent.ent_name) >> 1);
 		sbuf_printf(sb, "</label>\n");
 		if (entry->ent.ent_attr & GPT_ENT_ATTR_BOOTME)
 			sbuf_printf(sb, "%s<attrib>bootme</attrib>\n", indent);
 		if (entry->ent.ent_attr & GPT_ENT_ATTR_BOOTONCE) {
 			sbuf_printf(sb, "%s<attrib>bootonce</attrib>\n",
 			    indent);
 		}
 		if (entry->ent.ent_attr & GPT_ENT_ATTR_BOOTFAILED) {
 			sbuf_printf(sb, "%s<attrib>bootfailed</attrib>\n",
 			    indent);
 		}
 		sbuf_printf(sb, "%s<rawtype>", indent);
 		sbuf_printf_uuid(sb, &entry->ent.ent_type);
 		sbuf_printf(sb, "</rawtype>\n");
 		sbuf_printf(sb, "%s<rawuuid>", indent);
 		sbuf_printf_uuid(sb, &entry->ent.ent_uuid);
 		sbuf_printf(sb, "</rawuuid>\n");
 		sbuf_printf(sb, "%s<efimedia>", indent);
 		sbuf_printf(sb, "HD(%d,GPT,", entry->base.gpe_index);
 		sbuf_printf_uuid(sb, &entry->ent.ent_uuid);
 		sbuf_printf(sb, ",%#jx,%#jx)", (intmax_t)entry->base.gpe_start,
 		    (intmax_t)(entry->base.gpe_end - entry->base.gpe_start + 1));
 		sbuf_printf(sb, "</efimedia>\n");
 	} else {
 		/* confxml: scheme information */
 	}
 }
 
 static int
 g_part_gpt_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
 {
 	struct g_part_gpt_entry *entry;
 
 	entry = (struct g_part_gpt_entry *)baseentry;
 	return ((EQUUID(&entry->ent.ent_type, &gpt_uuid_freebsd_swap) ||
 	    EQUUID(&entry->ent.ent_type, &gpt_uuid_linux_swap) ||
 	    EQUUID(&entry->ent.ent_type, &gpt_uuid_dfbsd_swap)) ? 1 : 0);
 }
 
 static int
 g_part_gpt_modify(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_gpt_entry *entry;
 	int error;
 
 	entry = (struct g_part_gpt_entry *)baseentry;
 	if (gpp->gpp_parms & G_PART_PARM_TYPE) {
 		error = gpt_parse_type(gpp->gpp_type, &entry->ent.ent_type);
 		if (error)
 			return (error);
 	}
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		g_gpt_utf8_to_utf16(gpp->gpp_label, entry->ent.ent_name,
 		    sizeof(entry->ent.ent_name) /
 		    sizeof(entry->ent.ent_name[0]));
 	return (0);
 }
 
 static int
 g_part_gpt_resize(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_gpt_entry *entry;
 
 	if (baseentry == NULL)
 		return (g_part_gpt_recover(basetable));
 
 	entry = (struct g_part_gpt_entry *)baseentry;
 	baseentry->gpe_end = baseentry->gpe_start + gpp->gpp_size - 1;
 	entry->ent.ent_lba_end = baseentry->gpe_end;
 
 	return (0);
 }
 
 static const char *
 g_part_gpt_name(struct g_part_table *table, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 	struct g_part_gpt_entry *entry;
 	char c;
 
 	entry = (struct g_part_gpt_entry *)baseentry;
 	c = (EQUUID(&entry->ent.ent_type, &gpt_uuid_freebsd)) ? 's' : 'p';
 	snprintf(buf, bufsz, "%c%d", c, baseentry->gpe_index);
 	return (buf);
 }
 
 static int
 g_part_gpt_probe(struct g_part_table *table, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error, index, pri, res;
 
 	/* We don't nest, which means that our depth should be 0. */
 	if (table->gpt_depth != 0)
 		return (ENXIO);
 
 	pp = cp->provider;
 
 	/*
 	 * Sanity-check the provider. Since the first sector on the provider
 	 * must be a PMBR and a PMBR is 512 bytes large, the sector size
 	 * must be at least 512 bytes.  Also, since the theoretical minimum
 	 * number of sectors needed by GPT is 6, any medium that has less
 	 * than 6 sectors is never going to be able to hold a GPT. The
 	 * number 6 comes from:
 	 *	1 sector for the PMBR
 	 *	2 sectors for the GPT headers (each 1 sector)
 	 *	2 sectors for the GPT tables (each 1 sector)
 	 *	1 sector for an actual partition
 	 * It's better to catch this pathological case early than behaving
 	 * pathologically later on...
 	 */
 	if (pp->sectorsize < MBRSIZE || pp->mediasize < 6 * pp->sectorsize)
 		return (ENOSPC);
 
 	/*
 	 * Check that there's a MBR or a PMBR. If it's a PMBR, we return
 	 * as the highest priority on a match, otherwise we assume some
 	 * GPT-unaware tool has destroyed the GPT by recreating a MBR and
 	 * we really want the MBR scheme to take precedence.
 	 */
 	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 	res = le16dec(buf + DOSMAGICOFFSET);
 	pri = G_PART_PROBE_PRI_LOW;
 	if (res == DOSMAGIC) {
 		for (index = 0; index < NDOSPART; index++) {
 			if (buf[DOSPARTOFF + DOSPARTSIZE * index + 4] == 0xee)
 				pri = G_PART_PROBE_PRI_HIGH;
 		}
 		g_free(buf);
 
 		/* Check that there's a primary header. */
 		buf = g_read_data(cp, pp->sectorsize, pp->sectorsize, &error);
 		if (buf == NULL)
 			return (error);
 		res = memcmp(buf, GPT_HDR_SIG, 8);
 		g_free(buf);
 		if (res == 0)
 			return (pri);
 	} else
 		g_free(buf);
 
 	/* No primary? Check that there's a secondary. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	if (buf == NULL)
 		return (error);
 	res = memcmp(buf, GPT_HDR_SIG, 8);
 	g_free(buf);
 	return ((res == 0) ? pri : ENXIO);
 }
 
 static int
 g_part_gpt_read(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct gpt_hdr *prihdr, *sechdr;
 	struct gpt_ent *tbl, *pritbl, *sectbl;
 	struct g_provider *pp;
 	struct g_part_gpt_table *table;
 	struct g_part_gpt_entry *entry;
 	u_char *buf;
 	uint64_t last;
 	int error, index;
 
 	table = (struct g_part_gpt_table *)basetable;
 	pp = cp->provider;
 	last = (pp->mediasize / pp->sectorsize) - 1;
 
 	/* Read the PMBR */
 	buf = g_read_data(cp, 0, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 	bcopy(buf, table->mbr, MBRSIZE);
 	g_free(buf);
 
 	/* Read the primary header and table. */
 	prihdr = gpt_read_hdr(table, cp, GPT_ELT_PRIHDR);
 	if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK) {
 		pritbl = gpt_read_tbl(table, cp, GPT_ELT_PRITBL, prihdr);
 	} else {
 		table->state[GPT_ELT_PRITBL] = GPT_STATE_MISSING;
 		pritbl = NULL;
 	}
 
 	/* Read the secondary header and table. */
 	sechdr = gpt_read_hdr(table, cp, GPT_ELT_SECHDR);
 	if (table->state[GPT_ELT_SECHDR] == GPT_STATE_OK) {
 		sectbl = gpt_read_tbl(table, cp, GPT_ELT_SECTBL, sechdr);
 	} else {
 		table->state[GPT_ELT_SECTBL] = GPT_STATE_MISSING;
 		sectbl = NULL;
 	}
 
 	/* Fail if we haven't got any good tables at all. */
 	if (table->state[GPT_ELT_PRITBL] != GPT_STATE_OK &&
 	    table->state[GPT_ELT_SECTBL] != GPT_STATE_OK) {
 		printf("GEOM: %s: corrupt or invalid GPT detected.\n",
 		    pp->name);
 		printf("GEOM: %s: GPT rejected -- may not be recoverable.\n",
 		    pp->name);
 		return (EINVAL);
 	}
 
 	/*
 	 * If both headers are good but they disagree with each other,
 	 * then invalidate one. We prefer to keep the primary header,
 	 * unless the primary table is corrupt.
 	 */
 	if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK &&
 	    table->state[GPT_ELT_SECHDR] == GPT_STATE_OK &&
 	    !gpt_matched_hdrs(prihdr, sechdr)) {
 		if (table->state[GPT_ELT_PRITBL] == GPT_STATE_OK) {
 			table->state[GPT_ELT_SECHDR] = GPT_STATE_INVALID;
 			table->state[GPT_ELT_SECTBL] = GPT_STATE_MISSING;
 			g_free(sechdr);
 			sechdr = NULL;
 		} else {
 			table->state[GPT_ELT_PRIHDR] = GPT_STATE_INVALID;
 			table->state[GPT_ELT_PRITBL] = GPT_STATE_MISSING;
 			g_free(prihdr);
 			prihdr = NULL;
 		}
 	}
 
 	if (table->state[GPT_ELT_PRITBL] != GPT_STATE_OK) {
 		printf("GEOM: %s: the primary GPT table is corrupt or "
 		    "invalid.\n", pp->name);
 		printf("GEOM: %s: using the secondary instead -- recovery "
 		    "strongly advised.\n", pp->name);
 		table->hdr = sechdr;
 		basetable->gpt_corrupt = 1;
 		if (prihdr != NULL)
 			g_free(prihdr);
 		tbl = sectbl;
 		if (pritbl != NULL)
 			g_free(pritbl);
 	} else {
 		if (table->state[GPT_ELT_SECTBL] != GPT_STATE_OK) {
 			printf("GEOM: %s: the secondary GPT table is corrupt "
 			    "or invalid.\n", pp->name);
 			printf("GEOM: %s: using the primary only -- recovery "
 			    "suggested.\n", pp->name);
 			basetable->gpt_corrupt = 1;
 		} else if (table->lba[GPT_ELT_SECHDR] != last) {
 			printf( "GEOM: %s: the secondary GPT header is not in "
 			    "the last LBA.\n", pp->name);
 			basetable->gpt_corrupt = 1;
 		}
 		table->hdr = prihdr;
 		if (sechdr != NULL)
 			g_free(sechdr);
 		tbl = pritbl;
 		if (sectbl != NULL)
 			g_free(sectbl);
 	}
 
 	basetable->gpt_first = table->hdr->hdr_lba_start;
 	basetable->gpt_last = table->hdr->hdr_lba_end;
 	basetable->gpt_entries = (table->hdr->hdr_lba_start - 2) *
 	    pp->sectorsize / table->hdr->hdr_entsz;
 
 	for (index = table->hdr->hdr_entries - 1; index >= 0; index--) {
 		if (EQUUID(&tbl[index].ent_type, &gpt_uuid_unused))
 			continue;
 		entry = (struct g_part_gpt_entry *)g_part_new_entry(
 		    basetable, index + 1, tbl[index].ent_lba_start,
 		    tbl[index].ent_lba_end);
 		entry->ent = tbl[index];
 	}
 
 	g_free(tbl);
 
 	/*
 	 * Under Mac OS X, the MBR mirrors the first 4 GPT partitions
 	 * if (and only if) any FAT32 or FAT16 partitions have been
 	 * created. This happens irrespective of whether Boot Camp is
 	 * used/enabled, though it's generally understood to be done
 	 * to support legacy Windows under Boot Camp. We refer to this
 	 * mirroring simply as Boot Camp. We try to detect Boot Camp
 	 * so that we can update the MBR if and when GPT changes have
 	 * been made. Note that we do not enable Boot Camp if not
 	 * previously enabled because we can't assume that we're on a
 	 * Mac alongside Mac OS X.
 	 */
 	table->bootcamp = gpt_is_bootcamp(table, pp->name);
 
 	return (0);
 }
 
 static int
 g_part_gpt_recover(struct g_part_table *basetable)
 {
 	struct g_part_gpt_table *table;
 	struct g_provider *pp;
 
 	table = (struct g_part_gpt_table *)basetable;
 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 	gpt_create_pmbr(table, pp);
 	g_gpt_set_defaults(basetable, pp);
 	basetable->gpt_corrupt = 0;
 	return (0);
 }
 
 static int
 g_part_gpt_setunset(struct g_part_table *basetable,
     struct g_part_entry *baseentry, const char *attrib, unsigned int set)
 {
 	struct g_part_gpt_entry *entry;
 	struct g_part_gpt_table *table;
 	struct g_provider *pp;
 	uint8_t *p;
 	uint64_t attr;
 	int i;
 
 	table = (struct g_part_gpt_table *)basetable;
 	entry = (struct g_part_gpt_entry *)baseentry;
 
 	if (strcasecmp(attrib, "active") == 0) {
 		if (table->bootcamp) {
 			/* The active flag must be set on a valid entry. */
 			if (entry == NULL)
 				return (ENXIO);
 			if (baseentry->gpe_index > NDOSPART)
 				return (EINVAL);
 			for (i = 0; i < NDOSPART; i++) {
 				p = &table->mbr[DOSPARTOFF + i * DOSPARTSIZE];
 				p[0] = (i == baseentry->gpe_index - 1)
 				    ? ((set) ? 0x80 : 0) : 0;
 			}
 		} else {
 			/* The PMBR is marked as active without an entry. */
 			if (entry != NULL)
 				return (ENXIO);
 			for (i = 0; i < NDOSPART; i++) {
 				p = &table->mbr[DOSPARTOFF + i * DOSPARTSIZE];
 				p[0] = (p[4] == 0xee) ? ((set) ? 0x80 : 0) : 0;
 			}
 		}
 		return (0);
 	} else if (strcasecmp(attrib, "lenovofix") == 0) {
 		/*
 		 * Write the 0xee GPT entry to slot #1 (2nd slot) in the pMBR.
 		 * This workaround allows Lenovo X220, T420, T520, etc to boot
 		 * from GPT Partitions in BIOS mode.
 		 */
 
 		if (entry != NULL)
 			return (ENXIO);
 
 		pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 		bzero(table->mbr + DOSPARTOFF, DOSPARTSIZE * NDOSPART);
 		gpt_write_mbr_entry(table->mbr, ((set) ? 1 : 0), 0xee, 1,
 		    MIN(pp->mediasize / pp->sectorsize - 1, UINT32_MAX));
 		return (0);
 	}
 
 	if (entry == NULL)
 		return (ENODEV);
 
 	attr = 0;
 	if (strcasecmp(attrib, "bootme") == 0) {
 		attr |= GPT_ENT_ATTR_BOOTME;
 	} else if (strcasecmp(attrib, "bootonce") == 0) {
 		attr |= GPT_ENT_ATTR_BOOTONCE;
 		if (set)
 			attr |= GPT_ENT_ATTR_BOOTME;
 	} else if (strcasecmp(attrib, "bootfailed") == 0) {
 		/*
 		 * It should only be possible to unset BOOTFAILED, but it might
 		 * be useful for test purposes to also be able to set it.
 		 */
 		attr |= GPT_ENT_ATTR_BOOTFAILED;
 	}
 	if (attr == 0)
 		return (EINVAL);
 
 	if (set)
 		attr = entry->ent.ent_attr | attr;
 	else
 		attr = entry->ent.ent_attr & ~attr;
 	if (attr != entry->ent.ent_attr) {
 		entry->ent.ent_attr = attr;
 		if (!baseentry->gpe_created)
 			baseentry->gpe_modified = 1;
 	}
 	return (0);
 }
 
 static const char *
 g_part_gpt_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 	struct g_part_gpt_entry *entry;
 	struct uuid *type;
 	struct g_part_uuid_alias *uap;
 
 	entry = (struct g_part_gpt_entry *)baseentry;
 	type = &entry->ent.ent_type;
 	for (uap = &gpt_uuid_alias_match[0]; uap->uuid; uap++)
 		if (EQUUID(type, uap->uuid))
 			return (g_part_alias_name(uap->alias));
 	buf[0] = '!';
 	snprintf_uuid(buf + 1, bufsz - 1, type);
 
 	return (buf);
 }
 
 static int
 g_part_gpt_write(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	unsigned char *buf, *bp;
 	struct g_provider *pp;
 	struct g_part_entry *baseentry;
 	struct g_part_gpt_entry *entry;
 	struct g_part_gpt_table *table;
 	size_t tblsz;
 	uint32_t crc;
 	int error, index;
 
 	pp = cp->provider;
 	table = (struct g_part_gpt_table *)basetable;
 	tblsz = howmany(table->hdr->hdr_entries * table->hdr->hdr_entsz,
 	    pp->sectorsize);
 
 	/* Reconstruct the MBR from the GPT if under Boot Camp. */
 	if (table->bootcamp)
 		gpt_update_bootcamp(basetable, pp);
 
 	/* Write the PMBR */
 	buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO);
 	bcopy(table->mbr, buf, MBRSIZE);
 	error = g_write_data(cp, 0, buf, pp->sectorsize);
 	g_free(buf);
 	if (error)
 		return (error);
 
 	/* Allocate space for the header and entries. */
 	buf = g_malloc((tblsz + 1) * pp->sectorsize, M_WAITOK | M_ZERO);
 
 	memcpy(buf, table->hdr->hdr_sig, sizeof(table->hdr->hdr_sig));
 	le32enc(buf + 8, table->hdr->hdr_revision);
 	le32enc(buf + 12, table->hdr->hdr_size);
 	le64enc(buf + 40, table->hdr->hdr_lba_start);
 	le64enc(buf + 48, table->hdr->hdr_lba_end);
 	le_uuid_enc(buf + 56, &table->hdr->hdr_uuid);
 	le32enc(buf + 80, table->hdr->hdr_entries);
 	le32enc(buf + 84, table->hdr->hdr_entsz);
 
 	LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) {
 		if (baseentry->gpe_deleted)
 			continue;
 		entry = (struct g_part_gpt_entry *)baseentry;
 		index = baseentry->gpe_index - 1;
 		bp = buf + pp->sectorsize + table->hdr->hdr_entsz * index;
 		le_uuid_enc(bp, &entry->ent.ent_type);
 		le_uuid_enc(bp + 16, &entry->ent.ent_uuid);
 		le64enc(bp + 32, entry->ent.ent_lba_start);
 		le64enc(bp + 40, entry->ent.ent_lba_end);
 		le64enc(bp + 48, entry->ent.ent_attr);
 		memcpy(bp + 56, entry->ent.ent_name,
 		    sizeof(entry->ent.ent_name));
 	}
 
 	crc = crc32(buf + pp->sectorsize,
 	    table->hdr->hdr_entries * table->hdr->hdr_entsz);
 	le32enc(buf + 88, crc);
 
 	/* Write primary meta-data. */
 	le32enc(buf + 16, 0);	/* hdr_crc_self. */
 	le64enc(buf + 24, table->lba[GPT_ELT_PRIHDR]);	/* hdr_lba_self. */
 	le64enc(buf + 32, table->lba[GPT_ELT_SECHDR]);	/* hdr_lba_alt. */
 	le64enc(buf + 72, table->lba[GPT_ELT_PRITBL]);	/* hdr_lba_table. */
 	crc = crc32(buf, table->hdr->hdr_size);
 	le32enc(buf + 16, crc);
 
 	for (index = 0; index < tblsz; index += MAXPHYS / pp->sectorsize) {
 		error = g_write_data(cp,
 		    (table->lba[GPT_ELT_PRITBL] + index) * pp->sectorsize,
 		    buf + (index + 1) * pp->sectorsize,
 		    (tblsz - index > MAXPHYS / pp->sectorsize) ? MAXPHYS:
 		    (tblsz - index) * pp->sectorsize);
 		if (error)
 			goto out;
 	}
 	error = g_write_data(cp, table->lba[GPT_ELT_PRIHDR] * pp->sectorsize,
 	    buf, pp->sectorsize);
 	if (error)
 		goto out;
 
 	/* Write secondary meta-data. */
 	le32enc(buf + 16, 0);	/* hdr_crc_self. */
 	le64enc(buf + 24, table->lba[GPT_ELT_SECHDR]);	/* hdr_lba_self. */
 	le64enc(buf + 32, table->lba[GPT_ELT_PRIHDR]);	/* hdr_lba_alt. */
 	le64enc(buf + 72, table->lba[GPT_ELT_SECTBL]);	/* hdr_lba_table. */
 	crc = crc32(buf, table->hdr->hdr_size);
 	le32enc(buf + 16, crc);
 
 	for (index = 0; index < tblsz; index += MAXPHYS / pp->sectorsize) {
 		error = g_write_data(cp,
 		    (table->lba[GPT_ELT_SECTBL] + index) * pp->sectorsize,
 		    buf + (index + 1) * pp->sectorsize,
 		    (tblsz - index > MAXPHYS / pp->sectorsize) ? MAXPHYS:
 		    (tblsz - index) * pp->sectorsize);
 		if (error)
 			goto out;
 	}
 	error = g_write_data(cp, table->lba[GPT_ELT_SECHDR] * pp->sectorsize,
 	    buf, pp->sectorsize);
 
  out:
 	g_free(buf);
 	return (error);
 }
 
 static void
 g_gpt_set_defaults(struct g_part_table *basetable, struct g_provider *pp)
 {
 	struct g_part_entry *baseentry;
 	struct g_part_gpt_entry *entry;
 	struct g_part_gpt_table *table;
 	quad_t start, end, min, max;
 	quad_t lba, last;
 	size_t spb, tblsz;
 
 	table = (struct g_part_gpt_table *)basetable;
 	last = pp->mediasize / pp->sectorsize - 1;
 	tblsz = howmany(basetable->gpt_entries * sizeof(struct gpt_ent),
 	    pp->sectorsize);
 
 	table->lba[GPT_ELT_PRIHDR] = 1;
 	table->lba[GPT_ELT_PRITBL] = 2;
 	table->lba[GPT_ELT_SECHDR] = last;
 	table->lba[GPT_ELT_SECTBL] = last - tblsz;
 	table->state[GPT_ELT_PRIHDR] = GPT_STATE_OK;
 	table->state[GPT_ELT_PRITBL] = GPT_STATE_OK;
 	table->state[GPT_ELT_SECHDR] = GPT_STATE_OK;
 	table->state[GPT_ELT_SECTBL] = GPT_STATE_OK;
 
 	max = start = 2 + tblsz;
 	min = end = last - tblsz - 1;
 	LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) {
 		if (baseentry->gpe_deleted)
 			continue;
 		entry = (struct g_part_gpt_entry *)baseentry;
 		if (entry->ent.ent_lba_start < min)
 			min = entry->ent.ent_lba_start;
 		if (entry->ent.ent_lba_end > max)
 			max = entry->ent.ent_lba_end;
 	}
 	spb = 4096 / pp->sectorsize;
 	if (spb > 1) {
 		lba = start + ((start % spb) ? spb - start % spb : 0);
 		if (lba <= min)
 			start = lba;
 		lba = end - (end + 1) % spb;
 		if (max <= lba)
 			end = lba;
 	}
 	table->hdr->hdr_lba_start = start;
 	table->hdr->hdr_lba_end = end;
 
 	basetable->gpt_first = start;
 	basetable->gpt_last = end;
 }
 
 static void
 g_gpt_printf_utf16(struct sbuf *sb, uint16_t *str, size_t len)
 {
 	u_int bo;
 	uint32_t ch;
 	uint16_t c;
 
 	bo = LITTLE_ENDIAN;	/* GPT is little-endian */
 	while (len > 0 && *str != 0) {
 		ch = (bo == BIG_ENDIAN) ? be16toh(*str) : le16toh(*str);
 		str++, len--;
 		if ((ch & 0xf800) == 0xd800) {
 			if (len > 0) {
 				c = (bo == BIG_ENDIAN) ? be16toh(*str)
 				    : le16toh(*str);
 				str++, len--;
 			} else
 				c = 0xfffd;
 			if ((ch & 0x400) == 0 && (c & 0xfc00) == 0xdc00) {
 				ch = ((ch & 0x3ff) << 10) + (c & 0x3ff);
 				ch += 0x10000;
 			} else
 				ch = 0xfffd;
 		} else if (ch == 0xfffe) { /* BOM (U+FEFF) swapped. */
 			bo = (bo == BIG_ENDIAN) ? LITTLE_ENDIAN : BIG_ENDIAN;
 			continue;
 		} else if (ch == 0xfeff) /* BOM (U+FEFF) unswapped. */
 			continue;
 
 		/* Write the Unicode character in UTF-8 */
 		if (ch < 0x80)
 			g_conf_printf_escaped(sb, "%c", ch);
 		else if (ch < 0x800)
 			g_conf_printf_escaped(sb, "%c%c", 0xc0 | (ch >> 6),
 			    0x80 | (ch & 0x3f));
 		else if (ch < 0x10000)
 			g_conf_printf_escaped(sb, "%c%c%c", 0xe0 | (ch >> 12),
 			    0x80 | ((ch >> 6) & 0x3f), 0x80 | (ch & 0x3f));
 		else if (ch < 0x200000)
 			g_conf_printf_escaped(sb, "%c%c%c%c", 0xf0 |
 			    (ch >> 18), 0x80 | ((ch >> 12) & 0x3f),
 			    0x80 | ((ch >> 6) & 0x3f), 0x80 | (ch & 0x3f));
 	}
 }
 
 static void
 g_gpt_utf8_to_utf16(const uint8_t *s8, uint16_t *s16, size_t s16len)
 {
 	size_t s16idx, s8idx;
 	uint32_t utfchar;
 	unsigned int c, utfbytes;
 
 	s8idx = s16idx = 0;
 	utfchar = 0;
 	utfbytes = 0;
 	bzero(s16, s16len << 1);
 	while (s8[s8idx] != 0 && s16idx < s16len) {
 		c = s8[s8idx++];
 		if ((c & 0xc0) != 0x80) {
 			/* Initial characters. */
 			if (utfbytes != 0) {
 				/* Incomplete encoding of previous char. */
 				s16[s16idx++] = htole16(0xfffd);
 			}
 			if ((c & 0xf8) == 0xf0) {
 				utfchar = c & 0x07;
 				utfbytes = 3;
 			} else if ((c & 0xf0) == 0xe0) {
 				utfchar = c & 0x0f;
 				utfbytes = 2;
 			} else if ((c & 0xe0) == 0xc0) {
 				utfchar = c & 0x1f;
 				utfbytes = 1;
 			} else {
 				utfchar = c & 0x7f;
 				utfbytes = 0;
 			}
 		} else {
 			/* Followup characters. */
 			if (utfbytes > 0) {
 				utfchar = (utfchar << 6) + (c & 0x3f);
 				utfbytes--;
 			} else if (utfbytes == 0)
 				utfbytes = ~0;
 		}
 		/*
 		 * Write the complete Unicode character as UTF-16 when we
 		 * have all the UTF-8 charactars collected.
 		 */
 		if (utfbytes == 0) {
 			/*
 			 * If we need to write 2 UTF-16 characters, but
 			 * we only have room for 1, then we truncate the
 			 * string by writing a 0 instead.
 			 */
 			if (utfchar >= 0x10000 && s16idx < s16len - 1) {
 				s16[s16idx++] =
 				    htole16(0xd800 | ((utfchar >> 10) - 0x40));
 				s16[s16idx++] =
 				    htole16(0xdc00 | (utfchar & 0x3ff));
 			} else
 				s16[s16idx++] = (utfchar >= 0x10000) ? 0 :
 				    htole16(utfchar);
 		}
 	}
 	/*
 	 * If our input string was truncated, append an invalid encoding
 	 * character to the output string.
 	 */
 	if (utfbytes != 0 && s16idx < s16len)
 		s16[s16idx++] = htole16(0xfffd);
 }
Index: stable/11/sys/geom/part/g_part_ldm.c
===================================================================
--- stable/11/sys/geom/part/g_part_ldm.c	(revision 332639)
+++ stable/11/sys/geom/part/g_part_ldm.c	(revision 332640)
@@ -1,1482 +1,1483 @@
 /*-
  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/diskmbr.h>
 #include <sys/endian.h>
 #include <sys/gpt.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/uuid.h>
 #include <geom/geom.h>
 #include <geom/part/g_part.h>
 
 #include "g_part_if.h"
 
 FEATURE(geom_part_ldm, "GEOM partitioning class for LDM support");
 
 SYSCTL_DECL(_kern_geom_part);
 static SYSCTL_NODE(_kern_geom_part, OID_AUTO, ldm, CTLFLAG_RW, 0,
     "GEOM_PART_LDM Logical Disk Manager");
 
 static u_int ldm_debug = 0;
 SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, debug,
     CTLFLAG_RWTUN, &ldm_debug, 0, "Debug level");
 
 /*
  * This allows access to mirrored LDM volumes. Since we do not
  * doing mirroring here, it is not enabled by default.
  */
 static u_int show_mirrors = 0;
 SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, show_mirrors,
     CTLFLAG_RWTUN, &show_mirrors, 0, "Show mirrored volumes");
 
 #define	LDM_DEBUG(lvl, fmt, ...)	do {				\
 	if (ldm_debug >= (lvl)) {					\
 		printf("GEOM_PART: " fmt "\n", __VA_ARGS__);		\
 	}								\
 } while (0)
 #define	LDM_DUMP(buf, size)	do {					\
 	if (ldm_debug > 1) {						\
 		hexdump(buf, size, NULL, 0);				\
 	}								\
 } while (0)
 
 /*
  * There are internal representations of LDM structures.
  *
  * We do not keep all fields of on-disk structures, only most useful.
  * All numbers in an on-disk structures are in big-endian format.
  */
 
 /*
  * Private header is 512 bytes long. There are three copies on each disk.
  * Offset and sizes are in sectors. Location of each copy:
  * - the first offset is relative to the disk start;
  * - the second and third offset are relative to the LDM database start.
  *
  * On a disk partitioned with GPT, the LDM has not first private header.
  */
 #define	LDM_PH_MBRINDEX		0
 #define	LDM_PH_GPTINDEX		2
 static const uint64_t	ldm_ph_off[] = {6, 1856, 2047};
 #define	LDM_VERSION_2K		0x2000b
 #define	LDM_VERSION_VISTA	0x2000c
 #define	LDM_PH_VERSION_OFF	0x00c
 #define	LDM_PH_DISKGUID_OFF	0x030
 #define	LDM_PH_DGGUID_OFF	0x0b0
 #define	LDM_PH_DGNAME_OFF	0x0f0
 #define	LDM_PH_START_OFF	0x11b
 #define	LDM_PH_SIZE_OFF		0x123
 #define	LDM_PH_DB_OFF		0x12b
 #define	LDM_PH_DBSIZE_OFF	0x133
 #define	LDM_PH_TH1_OFF		0x13b
 #define	LDM_PH_TH2_OFF		0x143
 #define	LDM_PH_CONFSIZE_OFF	0x153
 #define	LDM_PH_LOGSIZE_OFF	0x15b
 #define	LDM_PH_SIGN		"PRIVHEAD"
 struct ldm_privhdr {
 	struct uuid	disk_guid;
 	struct uuid	dg_guid;
 	u_char		dg_name[32];
 	uint64_t	start;		/* logical disk start */
 	uint64_t	size;		/* logical disk size */
 	uint64_t	db_offset;	/* LDM database start */
 #define	LDM_DB_SIZE		2048
 	uint64_t	db_size;	/* LDM database size */
 #define	LDM_TH_COUNT		2
 	uint64_t	th_offset[LDM_TH_COUNT]; /* TOC header offsets */
 	uint64_t	conf_size;	/* configuration size */
 	uint64_t	log_size;	/* size of log */
 };
 
 /*
  * Table of contents header is 512 bytes long.
  * There are two identical copies at offsets from the private header.
  * Offsets are relative to the LDM database start.
  */
 #define	LDM_TH_SIGN		"TOCBLOCK"
 #define	LDM_TH_NAME1		"config"
 #define	LDM_TH_NAME2		"log"
 #define	LDM_TH_NAME1_OFF	0x024
 #define	LDM_TH_CONF_OFF		0x02e
 #define	LDM_TH_CONFSIZE_OFF	0x036
 #define	LDM_TH_NAME2_OFF	0x046
 #define	LDM_TH_LOG_OFF		0x050
 #define	LDM_TH_LOGSIZE_OFF	0x058
 struct ldm_tochdr {
 	uint64_t	conf_offset;	/* configuration offset */
 	uint64_t	log_offset;	/* log offset */
 };
 
 /*
  * LDM database header is 512 bytes long.
  */
 #define	LDM_VMDB_SIGN		"VMDB"
 #define	LDM_DB_LASTSEQ_OFF	0x004
 #define	LDM_DB_SIZE_OFF		0x008
 #define	LDM_DB_STATUS_OFF	0x010
 #define	LDM_DB_VERSION_OFF	0x012
 #define	LDM_DB_DGNAME_OFF	0x016
 #define	LDM_DB_DGGUID_OFF	0x035
 struct ldm_vmdbhdr {
 	uint32_t	last_seq;	/* sequence number of last VBLK */
 	uint32_t	size;		/* size of VBLK */
 };
 
 /*
  * The LDM database configuration section contains VMDB header and
  * many VBLKs. Each VBLK represents a disk group, disk partition,
  * component or volume.
  *
  * The most interesting for us are volumes, they are represents
  * partitions in the GEOM_PART meaning. But volume VBLK does not
  * contain all information needed to create GEOM provider. And we
  * should get this information from the related VBLK. This is how
  * VBLK releated:
  *	Volumes <- Components <- Partitions -> Disks
  *
  * One volume can contain several components. In this case LDM
  * does mirroring of volume data to each component.
  *
  * Also each component can contain several partitions (spanned or
  * striped volumes).
  */
 
 struct ldm_component {
 	uint64_t	id;		/* object id */
 	uint64_t	vol_id;		/* parent volume object id */
 
 	int		count;
 	LIST_HEAD(, ldm_partition) partitions;
 	LIST_ENTRY(ldm_component) entry;
 };
 
 struct ldm_volume {
 	uint64_t	id;		/* object id */
 	uint64_t	size;		/* volume size */
 	uint8_t		number;		/* used for ordering */
 	uint8_t		part_type;	/* partition type */
 
 	int		count;
 	LIST_HEAD(, ldm_component) components;
 	LIST_ENTRY(ldm_volume)	entry;
 };
 
 struct ldm_disk {
 	uint64_t	id;		/* object id */
 	struct uuid	guid;		/* disk guid */
 
 	LIST_ENTRY(ldm_disk) entry;
 };
 
 #if 0
 struct ldm_disk_group {
 	uint64_t	id;		/* object id */
 	struct uuid	guid;		/* disk group guid */
 	u_char		name[32];	/* disk group name */
 
 	LIST_ENTRY(ldm_disk_group) entry;
 };
 #endif
 
 struct ldm_partition {
 	uint64_t	id;		/* object id */
 	uint64_t	disk_id;	/* disk object id */
 	uint64_t	comp_id;	/* parent component object id */
 	uint64_t	start;		/* offset relative to disk start */
 	uint64_t	offset;		/* offset for spanned volumes */
 	uint64_t	size;		/* partition size */
 
 	LIST_ENTRY(ldm_partition) entry;
 };
 
 /*
  * Each VBLK is 128 bytes long and has standard 16 bytes header.
  * Some of VBLK's fields are fixed size, but others has variable size.
  * Fields with variable size are prefixed with one byte length marker.
  * Some fields are strings and also can have fixed size and variable.
  * Strings with fixed size are NULL-terminated, others are not.
  * All VBLKs have same several first fields:
  *	Offset		Size		Description
  *	---------------+---------------+--------------------------
  *	0x00		16		standard VBLK header
  *	0x10		2		update status
  *	0x13		1		VBLK type
  *	0x18		PS		object id
  *	0x18+		PN		object name
  *
  *  o Offset 0x18+ means '0x18 + length of all variable-width fields'
  *  o 'P' in size column means 'prefixed' (variable-width),
  *    'S' - string, 'N' - number.
  */
 #define	LDM_VBLK_SIGN		"VBLK"
 #define	LDM_VBLK_SEQ_OFF	0x04
 #define	LDM_VBLK_GROUP_OFF	0x08
 #define	LDM_VBLK_INDEX_OFF	0x0c
 #define	LDM_VBLK_COUNT_OFF	0x0e
 #define	LDM_VBLK_TYPE_OFF	0x13
 #define	LDM_VBLK_OID_OFF	0x18
 struct ldm_vblkhdr {
 	uint32_t	seq;		/* sequence number */
 	uint32_t	group;		/* group number */
 	uint16_t	index;		/* index in the group */
 	uint16_t	count;		/* number of entries in the group */
 };
 
 #define	LDM_VBLK_T_COMPONENT	0x32
 #define	LDM_VBLK_T_PARTITION	0x33
 #define	LDM_VBLK_T_DISK		0x34
 #define	LDM_VBLK_T_DISKGROUP	0x35
 #define	LDM_VBLK_T_DISK4	0x44
 #define	LDM_VBLK_T_DISKGROUP4	0x45
 #define	LDM_VBLK_T_VOLUME	0x51
 struct ldm_vblk {
 	uint8_t		type;		/* VBLK type */
 	union {
 		uint64_t		id;
 		struct ldm_volume	vol;
 		struct ldm_component	comp;
 		struct ldm_disk		disk;
 		struct ldm_partition	part;
 #if 0
 		struct ldm_disk_group	disk_group;
 #endif
 	} u;
 	LIST_ENTRY(ldm_vblk) entry;
 };
 
 /*
  * Some VBLKs contains a bit more data than can fit into 128 bytes. These
  * VBLKs are called eXtended VBLK. Before parsing, the data from these VBLK
  * should be placed into continuous memory buffer. We can determine xVBLK
  * by the count field in the standard VBLK header (count > 1).
  */
 struct ldm_xvblk {
 	uint32_t	group;		/* xVBLK group number */
 	uint32_t	size;		/* the total size of xVBLK */
 	uint8_t		map;		/* bitmask of currently saved VBLKs */
 	u_char		*data;		/* xVBLK data */
 
 	LIST_ENTRY(ldm_xvblk)	entry;
 };
 
 /* The internal representation of LDM database. */
 struct ldm_db {
 	struct ldm_privhdr		ph;	/* private header */
 	struct ldm_tochdr		th;	/* TOC header */
 	struct ldm_vmdbhdr		dh;	/* VMDB header */
 
 	LIST_HEAD(, ldm_volume)		volumes;
 	LIST_HEAD(, ldm_disk)		disks;
 	LIST_HEAD(, ldm_vblk)		vblks;
 	LIST_HEAD(, ldm_xvblk)		xvblks;
 };
 
 static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA;
 
 struct g_part_ldm_table {
 	struct g_part_table	base;
 	uint64_t		db_offset;
 	int			is_gpt;
 };
 struct g_part_ldm_entry {
 	struct g_part_entry	base;
 	uint8_t			type;
 };
 
 static int g_part_ldm_add(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_ldm_bootcode(struct g_part_table *, struct g_part_parms *);
 static int g_part_ldm_create(struct g_part_table *, struct g_part_parms *);
 static int g_part_ldm_destroy(struct g_part_table *, struct g_part_parms *);
 static void g_part_ldm_dumpconf(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 static int g_part_ldm_dumpto(struct g_part_table *, struct g_part_entry *);
 static int g_part_ldm_modify(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static const char *g_part_ldm_name(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_ldm_probe(struct g_part_table *, struct g_consumer *);
 static int g_part_ldm_read(struct g_part_table *, struct g_consumer *);
 static const char *g_part_ldm_type(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_ldm_write(struct g_part_table *, struct g_consumer *);
 
 static kobj_method_t g_part_ldm_methods[] = {
 	KOBJMETHOD(g_part_add,		g_part_ldm_add),
 	KOBJMETHOD(g_part_bootcode,	g_part_ldm_bootcode),
 	KOBJMETHOD(g_part_create,	g_part_ldm_create),
 	KOBJMETHOD(g_part_destroy,	g_part_ldm_destroy),
 	KOBJMETHOD(g_part_dumpconf,	g_part_ldm_dumpconf),
 	KOBJMETHOD(g_part_dumpto,	g_part_ldm_dumpto),
 	KOBJMETHOD(g_part_modify,	g_part_ldm_modify),
 	KOBJMETHOD(g_part_name,		g_part_ldm_name),
 	KOBJMETHOD(g_part_probe,	g_part_ldm_probe),
 	KOBJMETHOD(g_part_read,		g_part_ldm_read),
 	KOBJMETHOD(g_part_type,		g_part_ldm_type),
 	KOBJMETHOD(g_part_write,	g_part_ldm_write),
 	{ 0, 0 }
 };
 
 static struct g_part_scheme g_part_ldm_scheme = {
 	"LDM",
 	g_part_ldm_methods,
 	sizeof(struct g_part_ldm_table),
 	.gps_entrysz = sizeof(struct g_part_ldm_entry)
 };
 G_PART_SCHEME_DECLARE(g_part_ldm);
+MODULE_VERSION(geom_part_ldm, 0);
 
 static struct g_part_ldm_alias {
 	u_char		typ;
 	int		alias;
 } ldm_alias_match[] = {
 	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
 	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
 	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
 	{ DOSPTYP_LDM,		G_PART_ALIAS_MS_LDM_DATA },
 	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
 	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
 	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
 	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
 };
 
 static u_char*
 ldm_privhdr_read(struct g_consumer *cp, uint64_t off, int *error)
 {
 	struct g_provider *pp;
 	u_char *buf;
 
 	pp = cp->provider;
 	buf = g_read_data(cp, off, pp->sectorsize, error);
 	if (buf == NULL)
 		return (NULL);
 
 	if (memcmp(buf, LDM_PH_SIGN, strlen(LDM_PH_SIGN)) != 0) {
 		LDM_DEBUG(1, "%s: invalid LDM private header signature",
 		    pp->name);
 		g_free(buf);
 		buf = NULL;
 		*error = EINVAL;
 	}
 	return (buf);
 }
 
 static int
 ldm_privhdr_parse(struct g_consumer *cp, struct ldm_privhdr *hdr,
     const u_char *buf)
 {
 	uint32_t version;
 	int error;
 
 	memset(hdr, 0, sizeof(*hdr));
 	version = be32dec(buf + LDM_PH_VERSION_OFF);
 	if (version != LDM_VERSION_2K &&
 	    version != LDM_VERSION_VISTA) {
 		LDM_DEBUG(0, "%s: unsupported LDM version %u.%u",
 		    cp->provider->name, version >> 16,
 		    version & 0xFFFF);
 		return (ENXIO);
 	}
 	error = parse_uuid(buf + LDM_PH_DISKGUID_OFF, &hdr->disk_guid);
 	if (error != 0)
 		return (error);
 	error = parse_uuid(buf + LDM_PH_DGGUID_OFF, &hdr->dg_guid);
 	if (error != 0)
 		return (error);
 	strncpy(hdr->dg_name, buf + LDM_PH_DGNAME_OFF, sizeof(hdr->dg_name));
 	hdr->start = be64dec(buf + LDM_PH_START_OFF);
 	hdr->size = be64dec(buf + LDM_PH_SIZE_OFF);
 	hdr->db_offset = be64dec(buf + LDM_PH_DB_OFF);
 	hdr->db_size = be64dec(buf + LDM_PH_DBSIZE_OFF);
 	hdr->th_offset[0] = be64dec(buf + LDM_PH_TH1_OFF);
 	hdr->th_offset[1] = be64dec(buf + LDM_PH_TH2_OFF);
 	hdr->conf_size = be64dec(buf + LDM_PH_CONFSIZE_OFF);
 	hdr->log_size = be64dec(buf + LDM_PH_LOGSIZE_OFF);
 	return (0);
 }
 
 static int
 ldm_privhdr_check(struct ldm_db *db, struct g_consumer *cp, int is_gpt)
 {
 	struct g_consumer *cp2;
 	struct g_provider *pp;
 	struct ldm_privhdr hdr;
 	uint64_t offset, last;
 	int error, found, i;
 	u_char *buf;
 
 	pp = cp->provider;
 	if (is_gpt) {
 		/*
 		 * The last LBA is used in several checks below, for the
 		 * GPT case it should be calculated relative to the whole
 		 * disk.
 		 */
 		cp2 = LIST_FIRST(&pp->geom->consumer);
 		last =
 		    cp2->provider->mediasize / cp2->provider->sectorsize - 1;
 	} else
 		last = pp->mediasize / pp->sectorsize - 1;
 	for (found = 0, i = is_gpt; i < nitems(ldm_ph_off); i++) {
 		offset = ldm_ph_off[i];
 		/*
 		 * In the GPT case consumer is attached to the LDM metadata
 		 * partition and we don't need add db_offset.
 		 */
 		if (!is_gpt)
 			offset += db->ph.db_offset;
 		if (i == LDM_PH_MBRINDEX) {
 			/*
 			 * Prepare to errors and setup new base offset
 			 * to read backup private headers. Assume that LDM
 			 * database is in the last 1Mbyte area.
 			 */
 			db->ph.db_offset = last - LDM_DB_SIZE;
 		}
 		buf = ldm_privhdr_read(cp, offset * pp->sectorsize, &error);
 		if (buf == NULL) {
 			LDM_DEBUG(1, "%s: failed to read private header "
 			    "%d at LBA %ju", pp->name, i, (uintmax_t)offset);
 			continue;
 		}
 		error = ldm_privhdr_parse(cp, &hdr, buf);
 		if (error != 0) {
 			LDM_DEBUG(1, "%s: failed to parse private "
 			    "header %d", pp->name, i);
 			LDM_DUMP(buf, pp->sectorsize);
 			g_free(buf);
 			continue;
 		}
 		g_free(buf);
 		if (hdr.start > last ||
 		    hdr.start + hdr.size - 1 > last ||
 		    (hdr.start + hdr.size - 1 > hdr.db_offset && !is_gpt) ||
 		    hdr.db_size != LDM_DB_SIZE ||
 		    hdr.db_offset + LDM_DB_SIZE - 1 > last ||
 		    hdr.th_offset[0] >= LDM_DB_SIZE ||
 		    hdr.th_offset[1] >= LDM_DB_SIZE ||
 		    hdr.conf_size + hdr.log_size >= LDM_DB_SIZE) {
 			LDM_DEBUG(1, "%s: invalid values in the "
 			    "private header %d", pp->name, i);
 			LDM_DEBUG(2, "%s: start: %jd, size: %jd, "
 			    "db_offset: %jd, db_size: %jd, th_offset0: %jd, "
 			    "th_offset1: %jd, conf_size: %jd, log_size: %jd, "
 			    "last: %jd", pp->name, hdr.start, hdr.size,
 			    hdr.db_offset, hdr.db_size, hdr.th_offset[0],
 			    hdr.th_offset[1], hdr.conf_size, hdr.log_size,
 			    last);
 			continue;
 		}
 		if (found != 0 && memcmp(&db->ph, &hdr, sizeof(hdr)) != 0) {
 			LDM_DEBUG(0, "%s: private headers are not equal",
 			    pp->name);
 			if (i > 1) {
 				/*
 				 * We have different headers in the LDM.
 				 * We can not trust this metadata.
 				 */
 				LDM_DEBUG(0, "%s: refuse LDM metadata",
 				    pp->name);
 				return (EINVAL);
 			}
 			/*
 			 * We already have read primary private header
 			 * and it differs from this backup one.
 			 * Prefer the backup header and save it.
 			 */
 			found = 0;
 		}
 		if (found == 0)
 			memcpy(&db->ph, &hdr, sizeof(hdr));
 		found = 1;
 	}
 	if (found == 0) {
 		LDM_DEBUG(1, "%s: valid LDM private header not found",
 		    pp->name);
 		return (ENXIO);
 	}
 	return (0);
 }
 
 static int
 ldm_gpt_check(struct ldm_db *db, struct g_consumer *cp)
 {
 	struct g_part_table *gpt;
 	struct g_part_entry *e;
 	struct g_consumer *cp2;
 	int error;
 
 	cp2 = LIST_NEXT(cp, consumer);
 	g_topology_lock();
 	gpt = cp->provider->geom->softc;
 	error = 0;
 	LIST_FOREACH(e, &gpt->gpt_entry, gpe_entry) {
 		if (cp->provider == e->gpe_pp) {
 			/* ms-ldm-metadata partition */
 			if (e->gpe_start != db->ph.db_offset ||
 			    e->gpe_end != db->ph.db_offset + LDM_DB_SIZE - 1)
 				error++;
 		} else if (cp2->provider == e->gpe_pp) {
 			/* ms-ldm-data partition */
 			if (e->gpe_start != db->ph.start ||
 			    e->gpe_end != db->ph.start + db->ph.size - 1)
 				error++;
 		}
 		if (error != 0) {
 			LDM_DEBUG(0, "%s: GPT partition %d boundaries "
 			    "do not match with the LDM metadata",
 			    e->gpe_pp->name, e->gpe_index);
 			error = ENXIO;
 			break;
 		}
 	}
 	g_topology_unlock();
 	return (error);
 }
 
 static int
 ldm_tochdr_check(struct ldm_db *db, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct ldm_tochdr hdr;
 	uint64_t offset, conf_size, log_size;
 	int error, found, i;
 	u_char *buf;
 
 	pp = cp->provider;
 	for (i = 0, found = 0; i < LDM_TH_COUNT; i++) {
 		offset = db->ph.db_offset + db->ph.th_offset[i];
 		buf = g_read_data(cp,
 		    offset * pp->sectorsize, pp->sectorsize, &error);
 		if (buf == NULL) {
 			LDM_DEBUG(1, "%s: failed to read TOC header "
 			    "at LBA %ju", pp->name, (uintmax_t)offset);
 			continue;
 		}
 		if (memcmp(buf, LDM_TH_SIGN, strlen(LDM_TH_SIGN)) != 0 ||
 		    memcmp(buf + LDM_TH_NAME1_OFF, LDM_TH_NAME1,
 		    strlen(LDM_TH_NAME1)) != 0 ||
 		    memcmp(buf + LDM_TH_NAME2_OFF, LDM_TH_NAME2,
 		    strlen(LDM_TH_NAME2)) != 0) {
 			LDM_DEBUG(1, "%s: failed to parse TOC header "
 			    "at LBA %ju", pp->name, (uintmax_t)offset);
 			LDM_DUMP(buf, pp->sectorsize);
 			g_free(buf);
 			continue;
 		}
 		hdr.conf_offset = be64dec(buf + LDM_TH_CONF_OFF);
 		hdr.log_offset = be64dec(buf + LDM_TH_LOG_OFF);
 		conf_size = be64dec(buf + LDM_TH_CONFSIZE_OFF);
 		log_size = be64dec(buf + LDM_TH_LOGSIZE_OFF);
 		if (conf_size != db->ph.conf_size ||
 		    hdr.conf_offset + conf_size >= LDM_DB_SIZE ||
 		    log_size != db->ph.log_size ||
 		    hdr.log_offset + log_size >= LDM_DB_SIZE) {
 			LDM_DEBUG(1, "%s: invalid values in the "
 			    "TOC header at LBA %ju", pp->name,
 			    (uintmax_t)offset);
 			LDM_DUMP(buf, pp->sectorsize);
 			g_free(buf);
 			continue;
 		}
 		g_free(buf);
 		if (found == 0)
 			memcpy(&db->th, &hdr, sizeof(hdr));
 		found = 1;
 	}
 	if (found == 0) {
 		LDM_DEBUG(0, "%s: valid LDM TOC header not found.",
 		    pp->name);
 		return (ENXIO);
 	}
 	return (0);
 }
 
 static int
 ldm_vmdbhdr_check(struct ldm_db *db, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct uuid dg_guid;
 	uint64_t offset;
 	uint32_t version;
 	int error;
 	u_char *buf;
 
 	pp = cp->provider;
 	offset = db->ph.db_offset + db->th.conf_offset;
 	buf = g_read_data(cp, offset * pp->sectorsize, pp->sectorsize,
 	    &error);
 	if (buf == NULL) {
 		LDM_DEBUG(0, "%s: failed to read VMDB header at "
 		    "LBA %ju", pp->name, (uintmax_t)offset);
 		return (error);
 	}
 	if (memcmp(buf, LDM_VMDB_SIGN, strlen(LDM_VMDB_SIGN)) != 0) {
 		g_free(buf);
 		LDM_DEBUG(0, "%s: failed to parse VMDB header at "
 		    "LBA %ju", pp->name, (uintmax_t)offset);
 		return (ENXIO);
 	}
 	/* Check version. */
 	version = be32dec(buf + LDM_DB_VERSION_OFF);
 	if (version != 0x4000A) {
 		g_free(buf);
 		LDM_DEBUG(0, "%s: unsupported VMDB version %u.%u",
 		    pp->name, version >> 16, version & 0xFFFF);
 		return (ENXIO);
 	}
 	/*
 	 * Check VMDB update status:
 	 *	1 - in a consistent state;
 	 *	2 - in a creation phase;
 	 *	3 - in a deletion phase;
 	 */
 	if (be16dec(buf + LDM_DB_STATUS_OFF) != 1) {
 		g_free(buf);
 		LDM_DEBUG(0, "%s: VMDB is not in a consistent state",
 		    pp->name);
 		return (ENXIO);
 	}
 	db->dh.last_seq = be32dec(buf + LDM_DB_LASTSEQ_OFF);
 	db->dh.size = be32dec(buf + LDM_DB_SIZE_OFF);
 	error = parse_uuid(buf + LDM_DB_DGGUID_OFF, &dg_guid);
 	/* Compare disk group name and guid from VMDB and private headers */
 	if (error != 0 || db->dh.size == 0 ||
 	    pp->sectorsize % db->dh.size != 0 ||
 	    strncmp(buf + LDM_DB_DGNAME_OFF, db->ph.dg_name, 31) != 0 ||
 	    memcmp(&dg_guid, &db->ph.dg_guid, sizeof(dg_guid)) != 0 ||
 	    db->dh.size * db->dh.last_seq >
 	    db->ph.conf_size * pp->sectorsize) {
 		LDM_DEBUG(0, "%s: invalid values in the VMDB header",
 		    pp->name);
 		LDM_DUMP(buf, pp->sectorsize);
 		g_free(buf);
 		return (EINVAL);
 	}
 	g_free(buf);
 	return (0);
 }
 
 static int
 ldm_xvblk_handle(struct ldm_db *db, struct ldm_vblkhdr *vh, const u_char *p)
 {
 	struct ldm_xvblk *blk;
 	size_t size;
 
 	size = db->dh.size - 16;
 	LIST_FOREACH(blk, &db->xvblks, entry)
 		if (blk->group == vh->group)
 			break;
 	if (blk == NULL) {
 		blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
 		blk->group = vh->group;
 		blk->size = size * vh->count + 16;
 		blk->data = g_malloc(blk->size, M_WAITOK | M_ZERO);
 		blk->map = 0xFF << vh->count;
 		LIST_INSERT_HEAD(&db->xvblks, blk, entry);
 	}
 	if ((blk->map & (1 << vh->index)) != 0) {
 		/* Block with given index has been already saved. */
 		return (EINVAL);
 	}
 	/* Copy the data block to the place related to index. */
 	memcpy(blk->data + size * vh->index + 16, p + 16, size);
 	blk->map |= 1 << vh->index;
 	return (0);
 }
 
 /* Read the variable-width numeric field and return new offset */
 static int
 ldm_vnum_get(const u_char *buf, int offset, uint64_t *result, size_t range)
 {
 	uint64_t num;
 	uint8_t len;
 
 	len = buf[offset++];
 	if (len > sizeof(uint64_t) || len + offset >= range)
 		return (-1);
 	for (num = 0; len > 0; len--)
 		num = (num << 8) | buf[offset++];
 	*result = num;
 	return (offset);
 }
 
 /* Read the variable-width string and return new offset */
 static int
 ldm_vstr_get(const u_char *buf, int offset, u_char *result,
     size_t maxlen, size_t range)
 {
 	uint8_t len;
 
 	len = buf[offset++];
 	if (len >= maxlen || len + offset >= range)
 		return (-1);
 	memcpy(result, buf + offset, len);
 	result[len] = '\0';
 	return (offset + len);
 }
 
 /* Just skip the variable-width variable and return new offset */
 static int
 ldm_vparm_skip(const u_char *buf, int offset, size_t range)
 {
 	uint8_t len;
 
 	len = buf[offset++];
 	if (offset + len >= range)
 		return (-1);
 
 	return (offset + len);
 }
 
 static int
 ldm_vblk_handle(struct ldm_db *db, const u_char *p, size_t size)
 {
 	struct ldm_vblk *blk;
 	struct ldm_volume *volume, *last;
 	const char *errstr;
 	u_char vstr[64];
 	int error, offset;
 
 	blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
 	blk->type = p[LDM_VBLK_TYPE_OFF];
 	offset = ldm_vnum_get(p, LDM_VBLK_OID_OFF, &blk->u.id, size);
 	if (offset < 0) {
 		errstr = "object id";
 		goto fail;
 	}
 	offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
 	if (offset < 0) {
 		errstr = "object name";
 		goto fail;
 	}
 	switch (blk->type) {
 	/*
 	 * Component VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+	PS	volume state
 	 *  0x18+5	PN	component children count
 	 *  0x1D+16	PN	parent's volume object id
 	 *  0x2D+1	PN	stripe size
 	 */
 	case LDM_VBLK_T_COMPONENT:
 		offset = ldm_vparm_skip(p, offset, size);
 		if (offset < 0) {
 			errstr = "volume state";
 			goto fail;
 		}
 		offset = ldm_vparm_skip(p, offset + 5, size);
 		if (offset < 0) {
 			errstr = "children count";
 			goto fail;
 		}
 		offset = ldm_vnum_get(p, offset + 16,
 		    &blk->u.comp.vol_id, size);
 		if (offset < 0) {
 			errstr = "volume id";
 			goto fail;
 		}
 		break;
 	/*
 	 * Partition VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+12	8	partition start offset
 	 *  0x18+20	8	volume offset
 	 *  0x18+28	PN	partition size
 	 *  0x34+	PN	parent's component object id
 	 *  0x34+	PN	disk's object id
 	 */
 	case LDM_VBLK_T_PARTITION:
 		if (offset + 28 >= size) {
 			errstr = "too small buffer";
 			goto fail;
 		}
 		blk->u.part.start = be64dec(p + offset + 12);
 		blk->u.part.offset = be64dec(p + offset + 20);
 		offset = ldm_vnum_get(p, offset + 28, &blk->u.part.size, size);
 		if (offset < 0) {
 			errstr = "partition size";
 			goto fail;
 		}
 		offset = ldm_vnum_get(p, offset, &blk->u.part.comp_id, size);
 		if (offset < 0) {
 			errstr = "component id";
 			goto fail;
 		}
 		offset = ldm_vnum_get(p, offset, &blk->u.part.disk_id, size);
 		if (offset < 0) {
 			errstr = "disk id";
 			goto fail;
 		}
 		break;
 	/*
 	 * Disk VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+	PS	disk GUID
 	 */
 	case LDM_VBLK_T_DISK:
 		errstr = "disk guid";
 		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
 		if (offset < 0)
 			goto fail;
 		error = parse_uuid(vstr, &blk->u.disk.guid);
 		if (error != 0)
 			goto fail;
 		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
 		break;
 	/*
 	 * Disk group VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+	PS	disk group GUID
 	 */
 	case LDM_VBLK_T_DISKGROUP:
 #if 0
 		strncpy(blk->u.disk_group.name, vstr,
 		    sizeof(blk->u.disk_group.name));
 		offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
 		if (offset < 0) {
 			errstr = "disk group guid";
 			goto fail;
 		}
 		error = parse_uuid(name, &blk->u.disk_group.guid);
 		if (error != 0) {
 			errstr = "disk group guid";
 			goto fail;
 		}
 		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
 #endif
 		break;
 	/*
 	 * Disk VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+	16	disk GUID
 	 */
 	case LDM_VBLK_T_DISK4:
 		be_uuid_dec(p + offset, &blk->u.disk.guid);
 		LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
 		break;
 	/*
 	 * Disk group VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+	16	disk GUID
 	 */
 	case LDM_VBLK_T_DISKGROUP4:
 #if 0
 		strncpy(blk->u.disk_group.name, vstr,
 		    sizeof(blk->u.disk_group.name));
 		be_uuid_dec(p + offset, &blk->u.disk.guid);
 		LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
 #endif
 		break;
 	/*
 	 * Volume VBLK fields:
 	 * Offset	Size	Description
 	 * ------------+-------+------------------------
 	 *  0x18+	PS	volume type
 	 *  0x18+	PS	unknown
 	 *  0x18+	14(S)	volume state
 	 *  0x18+16	1	volume number
 	 *  0x18+21	PN	volume children count
 	 *  0x2D+16	PN	volume size
 	 *  0x3D+4	1	partition type
 	 */
 	case LDM_VBLK_T_VOLUME:
 		offset = ldm_vparm_skip(p, offset, size);
 		if (offset < 0) {
 			errstr = "volume type";
 			goto fail;
 		}
 		offset = ldm_vparm_skip(p, offset, size);
 		if (offset < 0) {
 			errstr = "unknown param";
 			goto fail;
 		}
 		if (offset + 21 >= size) {
 			errstr = "too small buffer";
 			goto fail;
 		}
 		blk->u.vol.number = p[offset + 16];
 		offset = ldm_vparm_skip(p, offset + 21, size);
 		if (offset < 0) {
 			errstr = "children count";
 			goto fail;
 		}
 		offset = ldm_vnum_get(p, offset + 16, &blk->u.vol.size, size);
 		if (offset < 0) {
 			errstr = "volume size";
 			goto fail;
 		}
 		if (offset + 4 >= size) {
 			errstr = "too small buffer";
 			goto fail;
 		}
 		blk->u.vol.part_type = p[offset + 4];
 		/* keep volumes ordered by volume number */
 		last = NULL;
 		LIST_FOREACH(volume, &db->volumes, entry) {
 			if (volume->number > blk->u.vol.number)
 				break;
 			last = volume;
 		}
 		if (last != NULL)
 			LIST_INSERT_AFTER(last, &blk->u.vol, entry);
 		else
 			LIST_INSERT_HEAD(&db->volumes, &blk->u.vol, entry);
 		break;
 	default:
 		LDM_DEBUG(1, "unknown VBLK type 0x%02x\n", blk->type);
 		LDM_DUMP(p, size);
 	}
 	LIST_INSERT_HEAD(&db->vblks, blk, entry);
 	return (0);
 fail:
 	LDM_DEBUG(0, "failed to parse '%s' in VBLK of type 0x%02x\n",
 	    errstr, blk->type);
 	LDM_DUMP(p, size);
 	g_free(blk);
 	return (EINVAL);
 }
 
 static void
 ldm_vmdb_free(struct ldm_db *db)
 {
 	struct ldm_vblk *vblk;
 	struct ldm_xvblk *xvblk;
 
 	while (!LIST_EMPTY(&db->xvblks)) {
 		xvblk = LIST_FIRST(&db->xvblks);
 		LIST_REMOVE(xvblk, entry);
 		g_free(xvblk->data);
 		g_free(xvblk);
 	}
 	while (!LIST_EMPTY(&db->vblks)) {
 		vblk = LIST_FIRST(&db->vblks);
 		LIST_REMOVE(vblk, entry);
 		g_free(vblk);
 	}
 }
 
 static int
 ldm_vmdb_parse(struct ldm_db *db, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct ldm_vblk *vblk;
 	struct ldm_xvblk *xvblk;
 	struct ldm_volume *volume;
 	struct ldm_component *comp;
 	struct ldm_vblkhdr vh;
 	u_char *buf, *p;
 	size_t size, n, sectors;
 	uint64_t offset;
 	int error;
 
 	pp = cp->provider;
 	size = howmany(db->dh.last_seq * db->dh.size, pp->sectorsize);
 	size -= 1; /* one sector takes vmdb header */
 	for (n = 0; n < size; n += MAXPHYS / pp->sectorsize) {
 		offset = db->ph.db_offset + db->th.conf_offset + n + 1;
 		sectors = (size - n) > (MAXPHYS / pp->sectorsize) ?
 		    MAXPHYS / pp->sectorsize: size - n;
 		/* read VBLKs */
 		buf = g_read_data(cp, offset * pp->sectorsize,
 		    sectors * pp->sectorsize, &error);
 		if (buf == NULL) {
 			LDM_DEBUG(0, "%s: failed to read VBLK\n",
 			    pp->name);
 			goto fail;
 		}
 		for (p = buf; p < buf + sectors * pp->sectorsize;
 		    p += db->dh.size) {
 			if (memcmp(p, LDM_VBLK_SIGN,
 			    strlen(LDM_VBLK_SIGN)) != 0) {
 				LDM_DEBUG(0, "%s: no VBLK signature\n",
 				    pp->name);
 				LDM_DUMP(p, db->dh.size);
 				goto fail;
 			}
 			vh.seq = be32dec(p + LDM_VBLK_SEQ_OFF);
 			vh.group = be32dec(p + LDM_VBLK_GROUP_OFF);
 			/* skip empty blocks */
 			if (vh.seq == 0 || vh.group == 0)
 				continue;
 			vh.index = be16dec(p + LDM_VBLK_INDEX_OFF);
 			vh.count = be16dec(p + LDM_VBLK_COUNT_OFF);
 			if (vh.count == 0 || vh.count > 4 ||
 			    vh.seq > db->dh.last_seq) {
 				LDM_DEBUG(0, "%s: invalid values "
 				    "in the VBLK header\n", pp->name);
 				LDM_DUMP(p, db->dh.size);
 				goto fail;
 			}
 			if (vh.count > 1) {
 				error = ldm_xvblk_handle(db, &vh, p);
 				if (error != 0) {
 					LDM_DEBUG(0, "%s: xVBLK "
 					    "is corrupted\n", pp->name);
 					LDM_DUMP(p, db->dh.size);
 					goto fail;
 				}
 				continue;
 			}
 			if (be16dec(p + 16) != 0)
 				LDM_DEBUG(1, "%s: VBLK update"
 				    " status is %u\n", pp->name,
 				    be16dec(p + 16));
 			error = ldm_vblk_handle(db, p, db->dh.size);
 			if (error != 0)
 				goto fail;
 		}
 		g_free(buf);
 		buf = NULL;
 	}
 	/* Parse xVBLKs */
 	while (!LIST_EMPTY(&db->xvblks)) {
 		xvblk = LIST_FIRST(&db->xvblks);
 		if (xvblk->map == 0xFF) {
 			error = ldm_vblk_handle(db, xvblk->data, xvblk->size);
 			if (error != 0)
 				goto fail;
 		} else {
 			LDM_DEBUG(0, "%s: incomplete or corrupt "
 			    "xVBLK found\n", pp->name);
 			goto fail;
 		}
 		LIST_REMOVE(xvblk, entry);
 		g_free(xvblk->data);
 		g_free(xvblk);
 	}
 	/* construct all VBLKs relations */
 	LIST_FOREACH(volume, &db->volumes, entry) {
 		LIST_FOREACH(vblk, &db->vblks, entry)
 			if (vblk->type == LDM_VBLK_T_COMPONENT &&
 			    vblk->u.comp.vol_id == volume->id) {
 				LIST_INSERT_HEAD(&volume->components,
 				    &vblk->u.comp, entry);
 				volume->count++;
 			}
 		LIST_FOREACH(comp, &volume->components, entry)
 			LIST_FOREACH(vblk, &db->vblks, entry)
 				if (vblk->type == LDM_VBLK_T_PARTITION &&
 				    vblk->u.part.comp_id == comp->id) {
 					LIST_INSERT_HEAD(&comp->partitions,
 					    &vblk->u.part, entry);
 					comp->count++;
 				}
 	}
 	return (0);
 fail:
 	ldm_vmdb_free(db);
 	g_free(buf);
 	return (ENXIO);
 }
 
 static int
 g_part_ldm_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
     struct g_part_parms *gpp)
 {
 
 	return (ENOSYS);
 }
 
 static int
 g_part_ldm_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 
 	return (ENOSYS);
 }
 
 static int
 g_part_ldm_create(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 
 	return (ENOSYS);
 }
 
 static int
 g_part_ldm_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_part_ldm_table *table;
 	struct g_provider *pp;
 
 	table = (struct g_part_ldm_table *)basetable;
 	/*
 	 * To destroy LDM on a disk partitioned with GPT we should delete
 	 * ms-ldm-metadata partition, but we can't do this via standard
 	 * GEOM_PART method.
 	 */
 	if (table->is_gpt)
 		return (ENOSYS);
 	pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 	/*
 	 * To destroy LDM we should wipe MBR, first private header and
 	 * backup private headers.
 	 */
 	basetable->gpt_smhead = (1 << ldm_ph_off[0]) | 1;
 	/*
 	 * Don't touch last backup private header when LDM database is
 	 * not located in the last 1MByte area.
 	 * XXX: can't remove all blocks.
 	 */
 	if (table->db_offset + LDM_DB_SIZE ==
 	    pp->mediasize / pp->sectorsize)
 		basetable->gpt_smtail = 1;
 	return (0);
 }
 
 static void
 g_part_ldm_dumpconf(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct sbuf *sb, const char *indent)
 {
 	struct g_part_ldm_entry *entry;
 
 	entry = (struct g_part_ldm_entry *)baseentry;
 	if (indent == NULL) {
 		/* conftxt: libdisk compatibility */
 		sbuf_printf(sb, " xs LDM xt %u", entry->type);
 	} else if (entry != NULL) {
 		/* confxml: partition entry information */
 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
 		    entry->type);
 	} else {
 		/* confxml: scheme information */
 	}
 }
 
 static int
 g_part_ldm_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
 {
 
 	return (0);
 }
 
 static int
 g_part_ldm_modify(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 
 	return (ENOSYS);
 }
 
 static const char *
 g_part_ldm_name(struct g_part_table *table, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 
 	snprintf(buf, bufsz, "s%d", baseentry->gpe_index);
 	return (buf);
 }
 
 static int
 ldm_gpt_probe(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_part_ldm_table *table;
 	struct g_part_table *gpt;
 	struct g_part_entry *entry;
 	struct g_consumer *cp2;
 	struct gpt_ent *part;
 	u_char *buf;
 	int error;
 
 	/*
 	 * XXX: We use some knowledge about GEOM_PART_GPT internal
 	 * structures, but it is easier than parse GPT by himself.
 	 */
 	g_topology_lock();
 	gpt = cp->provider->geom->softc;
 	LIST_FOREACH(entry, &gpt->gpt_entry, gpe_entry) {
 		part = (struct gpt_ent *)(entry + 1);
 		/* Search ms-ldm-metadata partition */
 		if (memcmp(&part->ent_type,
 		    &gpt_uuid_ms_ldm_metadata, sizeof(struct uuid)) != 0 ||
 		    entry->gpe_end - entry->gpe_start < LDM_DB_SIZE - 1)
 			continue;
 
 		/* Create new consumer and attach it to metadata partition */
 		cp2 = g_new_consumer(cp->geom);
 		error = g_attach(cp2, entry->gpe_pp);
 		if (error != 0) {
 			g_destroy_consumer(cp2);
 			g_topology_unlock();
 			return (ENXIO);
 		}
 		error = g_access(cp2, 1, 0, 0);
 		if (error != 0) {
 			g_detach(cp2);
 			g_destroy_consumer(cp2);
 			g_topology_unlock();
 			return (ENXIO);
 		}
 		g_topology_unlock();
 
 		LDM_DEBUG(2, "%s: LDM metadata partition %s found in the GPT",
 		    cp->provider->name, cp2->provider->name);
 		/* Read the LDM private header */
 		buf = ldm_privhdr_read(cp2,
 		    ldm_ph_off[LDM_PH_GPTINDEX] * cp2->provider->sectorsize,
 		    &error);
 		if (buf != NULL) {
 			table = (struct g_part_ldm_table *)basetable;
 			table->is_gpt = 1;
 			g_free(buf);
 			return (G_PART_PROBE_PRI_HIGH);
 		}
 
 		/* second consumer is no longer needed. */
 		g_topology_lock();
 		g_access(cp2, -1, 0, 0);
 		g_detach(cp2);
 		g_destroy_consumer(cp2);
 		break;
 	}
 	g_topology_unlock();
 	return (ENXIO);
 }
 
 static int
 g_part_ldm_probe(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	u_char *buf, type[64];
 	int error, idx;
 
 
 	pp = cp->provider;
 	if (pp->sectorsize != 512)
 		return (ENXIO);
 
 	error = g_getattr("PART::scheme", cp, &type);
 	if (error == 0 && strcmp(type, "GPT") == 0) {
 		if (g_getattr("PART::type", cp, &type) != 0 ||
 		    strcmp(type, "ms-ldm-data") != 0)
 			return (ENXIO);
 		error = ldm_gpt_probe(basetable, cp);
 		return (error);
 	}
 
 	if (basetable->gpt_depth != 0)
 		return (ENXIO);
 
 	/* LDM has 1M metadata area */
 	if (pp->mediasize <= 1024 * 1024)
 		return (ENOSPC);
 
 	/* Check that there's a MBR */
 	buf = g_read_data(cp, 0, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 
 	if (le16dec(buf + DOSMAGICOFFSET) != DOSMAGIC) {
 		g_free(buf);
 		return (ENXIO);
 	}
 	error = ENXIO;
 	/* Check that we have LDM partitions in the MBR */
 	for (idx = 0; idx < NDOSPART && error != 0; idx++) {
 		if (buf[DOSPARTOFF + idx * DOSPARTSIZE + 4] == DOSPTYP_LDM)
 			error = 0;
 	}
 	g_free(buf);
 	if (error == 0) {
 		LDM_DEBUG(2, "%s: LDM data partitions found in MBR",
 		    pp->name);
 		/* Read the LDM private header */
 		buf = ldm_privhdr_read(cp,
 		    ldm_ph_off[LDM_PH_MBRINDEX] * pp->sectorsize, &error);
 		if (buf == NULL)
 			return (error);
 		g_free(buf);
 		return (G_PART_PROBE_PRI_HIGH);
 	}
 	return (error);
 }
 
 static int
 g_part_ldm_read(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_part_ldm_table *table;
 	struct g_part_ldm_entry *entry;
 	struct g_consumer *cp2;
 	struct ldm_component *comp;
 	struct ldm_partition *part;
 	struct ldm_volume *vol;
 	struct ldm_disk *disk;
 	struct ldm_db db;
 	int error, index, skipped;
 
 	table = (struct g_part_ldm_table *)basetable;
 	memset(&db, 0, sizeof(db));
 	cp2 = cp;					/* ms-ldm-data */
 	if (table->is_gpt)
 		cp = LIST_FIRST(&cp->geom->consumer);	/* ms-ldm-metadata */
 	/* Read and parse LDM private headers. */
 	error = ldm_privhdr_check(&db, cp, table->is_gpt);
 	if (error != 0)
 		goto gpt_cleanup;
 	basetable->gpt_first = table->is_gpt ? 0: db.ph.start;
 	basetable->gpt_last = basetable->gpt_first + db.ph.size - 1;
 	table->db_offset = db.ph.db_offset;
 	/* Make additional checks for GPT */
 	if (table->is_gpt) {
 		error = ldm_gpt_check(&db, cp);
 		if (error != 0)
 			goto gpt_cleanup;
 		/*
 		 * Now we should reset database offset to zero, because our
 		 * consumer cp is attached to the ms-ldm-metadata partition
 		 * and we don't need add db_offset to read from it.
 		 */
 		db.ph.db_offset = 0;
 	}
 	/* Read and parse LDM TOC headers. */
 	error = ldm_tochdr_check(&db, cp);
 	if (error != 0)
 		goto gpt_cleanup;
 	/* Read and parse LDM VMDB header. */
 	error = ldm_vmdbhdr_check(&db, cp);
 	if (error != 0)
 		goto gpt_cleanup;
 	error = ldm_vmdb_parse(&db, cp);
 	/*
 	 * For the GPT case we must detach and destroy
 	 * second consumer before return.
 	 */
 gpt_cleanup:
 	if (table->is_gpt) {
 		g_topology_lock();
 		g_access(cp, -1, 0, 0);
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		cp = cp2;
 	}
 	if (error != 0)
 		return (error);
 	/* Search current disk in the disk list. */
 	LIST_FOREACH(disk, &db.disks, entry)
 	    if (memcmp(&disk->guid, &db.ph.disk_guid,
 		sizeof(struct uuid)) == 0)
 		    break;
 	if (disk == NULL) {
 		LDM_DEBUG(1, "%s: no LDM volumes on this disk",
 		    cp->provider->name);
 		ldm_vmdb_free(&db);
 		return (ENXIO);
 	}
 	index = 1;
 	LIST_FOREACH(vol, &db.volumes, entry) {
 		LIST_FOREACH(comp, &vol->components, entry) {
 			/* Skip volumes from different disks. */
 			part = LIST_FIRST(&comp->partitions);
 			if (part->disk_id != disk->id)
 				continue;
 			skipped = 0;
 			/* We don't support spanned and striped volumes. */
 			if (comp->count > 1 || part->offset != 0) {
 				LDM_DEBUG(1, "%s: LDM volume component "
 				    "%ju has %u partitions. Skipped",
 				    cp->provider->name, (uintmax_t)comp->id,
 				    comp->count);
 				skipped = 1;
 			}
 			/*
 			 * Allow mirrored volumes only when they are explicitly
 			 * allowed with kern.geom.part.ldm.show_mirrors=1.
 			 */
 			if (vol->count > 1 && show_mirrors == 0) {
 				LDM_DEBUG(1, "%s: LDM volume %ju has %u "
 				    "components. Skipped",
 				    cp->provider->name, (uintmax_t)vol->id,
 				    vol->count);
 				skipped = 1;
 			}
 			entry = (struct g_part_ldm_entry *)g_part_new_entry(
 			    basetable, index++,
 			    basetable->gpt_first + part->start,
 			    basetable->gpt_first + part->start +
 			    part->size - 1);
 			/*
 			 * Mark skipped partition as ms-ldm-data partition.
 			 * We do not support them, but it is better to show
 			 * that we have something there, than just show
 			 * free space.
 			 */
 			if (skipped == 0)
 				entry->type = vol->part_type;
 			else
 				entry->type = DOSPTYP_LDM;
 			LDM_DEBUG(1, "%s: new volume id: %ju, start: %ju,"
 			    " end: %ju, type: 0x%02x\n", cp->provider->name,
 			    (uintmax_t)part->id,(uintmax_t)part->start +
 			    basetable->gpt_first, (uintmax_t)part->start +
 			    part->size + basetable->gpt_first - 1,
 			    vol->part_type);
 		}
 	}
 	ldm_vmdb_free(&db);
 	return (error);
 }
 
 static const char *
 g_part_ldm_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 	struct g_part_ldm_entry *entry;
 	int i;
 
 	entry = (struct g_part_ldm_entry *)baseentry;
 	for (i = 0; i < nitems(ldm_alias_match); i++) {
 		if (ldm_alias_match[i].typ == entry->type)
 			return (g_part_alias_name(ldm_alias_match[i].alias));
 	}
 	snprintf(buf, bufsz, "!%d", entry->type);
 	return (buf);
 }
 
 static int
 g_part_ldm_write(struct g_part_table *basetable, struct g_consumer *cp)
 {
 
 	return (ENOSYS);
 }
Index: stable/11/sys/geom/part/g_part_mbr.c
===================================================================
--- stable/11/sys/geom/part/g_part_mbr.c	(revision 332639)
+++ stable/11/sys/geom/part/g_part_mbr.c	(revision 332640)
@@ -1,613 +1,614 @@
 /*-
  * Copyright (c) 2007, 2008 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/diskmbr.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 #include <geom/part/g_part.h>
 
 #include "g_part_if.h"
 
 FEATURE(geom_part_mbr, "GEOM partitioning class for MBR support");
 
 SYSCTL_DECL(_kern_geom_part);
 static SYSCTL_NODE(_kern_geom_part, OID_AUTO, mbr, CTLFLAG_RW, 0,
     "GEOM_PART_MBR Master Boot Record");
 
 static u_int enforce_chs = 0;
 SYSCTL_UINT(_kern_geom_part_mbr, OID_AUTO, enforce_chs,
     CTLFLAG_RWTUN, &enforce_chs, 0, "Enforce alignment to CHS addressing");
 
 #define	MBRSIZE		512
 
 struct g_part_mbr_table {
 	struct g_part_table	base;
 	u_char		mbr[MBRSIZE];
 };
 
 struct g_part_mbr_entry {
 	struct g_part_entry	base;
 	struct dos_partition ent;
 };
 
 static int g_part_mbr_add(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_mbr_bootcode(struct g_part_table *, struct g_part_parms *);
 static int g_part_mbr_create(struct g_part_table *, struct g_part_parms *);
 static int g_part_mbr_destroy(struct g_part_table *, struct g_part_parms *);
 static void g_part_mbr_dumpconf(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 static int g_part_mbr_dumpto(struct g_part_table *, struct g_part_entry *);
 static int g_part_mbr_modify(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static const char *g_part_mbr_name(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_mbr_probe(struct g_part_table *, struct g_consumer *);
 static int g_part_mbr_read(struct g_part_table *, struct g_consumer *);
 static int g_part_mbr_setunset(struct g_part_table *, struct g_part_entry *,
     const char *, unsigned int);
 static const char *g_part_mbr_type(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_mbr_write(struct g_part_table *, struct g_consumer *);
 static int g_part_mbr_resize(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 
 static kobj_method_t g_part_mbr_methods[] = {
 	KOBJMETHOD(g_part_add,		g_part_mbr_add),
 	KOBJMETHOD(g_part_bootcode,	g_part_mbr_bootcode),
 	KOBJMETHOD(g_part_create,	g_part_mbr_create),
 	KOBJMETHOD(g_part_destroy,	g_part_mbr_destroy),
 	KOBJMETHOD(g_part_dumpconf,	g_part_mbr_dumpconf),
 	KOBJMETHOD(g_part_dumpto,	g_part_mbr_dumpto),
 	KOBJMETHOD(g_part_modify,	g_part_mbr_modify),
 	KOBJMETHOD(g_part_resize,	g_part_mbr_resize),
 	KOBJMETHOD(g_part_name,		g_part_mbr_name),
 	KOBJMETHOD(g_part_probe,	g_part_mbr_probe),
 	KOBJMETHOD(g_part_read,		g_part_mbr_read),
 	KOBJMETHOD(g_part_setunset,	g_part_mbr_setunset),
 	KOBJMETHOD(g_part_type,		g_part_mbr_type),
 	KOBJMETHOD(g_part_write,	g_part_mbr_write),
 	{ 0, 0 }
 };
 
 static struct g_part_scheme g_part_mbr_scheme = {
 	"MBR",
 	g_part_mbr_methods,
 	sizeof(struct g_part_mbr_table),
 	.gps_entrysz = sizeof(struct g_part_mbr_entry),
 	.gps_minent = NDOSPART,
 	.gps_maxent = NDOSPART,
 	.gps_bootcodesz = MBRSIZE,
 };
 G_PART_SCHEME_DECLARE(g_part_mbr);
+MODULE_VERSION(geom_part_mbr, 0);
 
 static struct g_part_mbr_alias {
 	u_char		typ;
 	int		alias;
 } mbr_alias_match[] = {
 	{ DOSPTYP_386BSD,	G_PART_ALIAS_FREEBSD },
 	{ DOSPTYP_EXT,		G_PART_ALIAS_EBR },
 	{ DOSPTYP_NTFS,		G_PART_ALIAS_MS_NTFS },
 	{ DOSPTYP_FAT16,	G_PART_ALIAS_MS_FAT16 },
 	{ DOSPTYP_FAT32,	G_PART_ALIAS_MS_FAT32 },
 	{ DOSPTYP_EXTLBA,	G_PART_ALIAS_EBR },
 	{ DOSPTYP_LDM,		G_PART_ALIAS_MS_LDM_DATA },
 	{ DOSPTYP_LINSWP,	G_PART_ALIAS_LINUX_SWAP },
 	{ DOSPTYP_LINUX,	G_PART_ALIAS_LINUX_DATA },
 	{ DOSPTYP_LINLVM,	G_PART_ALIAS_LINUX_LVM },
 	{ DOSPTYP_LINRAID,	G_PART_ALIAS_LINUX_RAID },
 	{ DOSPTYP_PPCBOOT,	G_PART_ALIAS_PREP_BOOT },
 	{ DOSPTYP_VMFS,		G_PART_ALIAS_VMFS },
 	{ DOSPTYP_VMKDIAG,	G_PART_ALIAS_VMKDIAG },
 	{ DOSPTYP_APPLE_UFS,	G_PART_ALIAS_APPLE_UFS },
 	{ DOSPTYP_APPLE_BOOT,	G_PART_ALIAS_APPLE_BOOT },
 	{ DOSPTYP_HFS,		G_PART_ALIAS_APPLE_HFS },
 };
 
 static int
 mbr_parse_type(const char *type, u_char *dp_typ)
 {
 	const char *alias;
 	char *endp;
 	long lt;
 	int i;
 
 	if (type[0] == '!') {
 		lt = strtol(type + 1, &endp, 0);
 		if (type[1] == '\0' || *endp != '\0' || lt <= 0 || lt >= 256)
 			return (EINVAL);
 		*dp_typ = (u_char)lt;
 		return (0);
 	}
 	for (i = 0; i < nitems(mbr_alias_match); i++) {
 		alias = g_part_alias_name(mbr_alias_match[i].alias);
 		if (strcasecmp(type, alias) == 0) {
 			*dp_typ = mbr_alias_match[i].typ;
 			return (0);
 		}
 	}
 	return (EINVAL);
 }
 
 static int
 mbr_probe_bpb(u_char *bpb)
 {
 	uint16_t secsz;
 	uint8_t clstsz;
 
 #define PO2(x)	((x & (x - 1)) == 0)
 	secsz = le16dec(bpb);
 	if (secsz < 512 || secsz > 4096 || !PO2(secsz))
 		return (0);
 	clstsz = bpb[2];
 	if (clstsz < 1 || clstsz > 128 || !PO2(clstsz))
 		return (0);
 #undef PO2
 
 	return (1);
 }
 
 static void
 mbr_set_chs(struct g_part_table *table, uint32_t lba, u_char *cylp, u_char *hdp,
     u_char *secp)
 {
 	uint32_t cyl, hd, sec;
 
 	sec = lba % table->gpt_sectors + 1;
 	lba /= table->gpt_sectors;
 	hd = lba % table->gpt_heads;
 	lba /= table->gpt_heads;
 	cyl = lba;
 	if (cyl > 1023)
 		sec = hd = cyl = ~0;
 
 	*cylp = cyl & 0xff;
 	*hdp = hd & 0xff;
 	*secp = (sec & 0x3f) | ((cyl >> 2) & 0xc0);
 }
 
 static int
 mbr_align(struct g_part_table *basetable, uint32_t *start, uint32_t *size)
 {
 	uint32_t sectors;
 
 	if (enforce_chs == 0)
 		return (0);
 	sectors = basetable->gpt_sectors;
 	if (*size < sectors)
 		return (EINVAL);
 	if (start != NULL && (*start % sectors)) {
 		*size += (*start % sectors) - sectors;
 		*start -= (*start % sectors) - sectors;
 	}
 	if (*size % sectors)
 		*size -= (*size % sectors);
 	if (*size < sectors)
 		return (EINVAL);
 	return (0);
 }
 
 static int
 g_part_mbr_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
     struct g_part_parms *gpp)
 {
 	struct g_part_mbr_entry *entry;
 	uint32_t start, size;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	entry = (struct g_part_mbr_entry *)baseentry;
 	start = gpp->gpp_start;
 	size = gpp->gpp_size;
 	if (mbr_align(basetable, &start, &size) != 0)
 		return (EINVAL);
 	if (baseentry->gpe_deleted)
 		bzero(&entry->ent, sizeof(entry->ent));
 
 	KASSERT(baseentry->gpe_start <= start, ("%s", __func__));
 	KASSERT(baseentry->gpe_end >= start + size - 1, ("%s", __func__));
 	baseentry->gpe_start = start;
 	baseentry->gpe_end = start + size - 1;
 	entry->ent.dp_start = start;
 	entry->ent.dp_size = size;
 	mbr_set_chs(basetable, baseentry->gpe_start, &entry->ent.dp_scyl,
 	    &entry->ent.dp_shd, &entry->ent.dp_ssect);
 	mbr_set_chs(basetable, baseentry->gpe_end, &entry->ent.dp_ecyl,
 	    &entry->ent.dp_ehd, &entry->ent.dp_esect);
 	return (mbr_parse_type(gpp->gpp_type, &entry->ent.dp_typ));
 }
 
 static int
 g_part_mbr_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_part_mbr_table *table;
 	uint32_t dsn;
 
 	if (gpp->gpp_codesize != MBRSIZE)
 		return (ENODEV);
 
 	table = (struct g_part_mbr_table *)basetable;
 	dsn = *(uint32_t *)(table->mbr + DOSDSNOFF);
 	bcopy(gpp->gpp_codeptr, table->mbr, DOSPARTOFF);
 	if (dsn != 0)
 		*(uint32_t *)(table->mbr + DOSDSNOFF) = dsn;
 	return (0);
 }
 
 static int
 g_part_mbr_create(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_provider *pp;
 	struct g_part_mbr_table *table;
 
 	pp = gpp->gpp_provider;
 	if (pp->sectorsize < MBRSIZE)
 		return (ENOSPC);
 
 	basetable->gpt_first = basetable->gpt_sectors;
 	basetable->gpt_last = MIN(pp->mediasize / pp->sectorsize,
 	    UINT32_MAX) - 1;
 
 	table = (struct g_part_mbr_table *)basetable;
 	le16enc(table->mbr + DOSMAGICOFFSET, DOSMAGIC);
 	return (0);
 }
 
 static int
 g_part_mbr_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 
 	/* Wipe the first sector to clear the partitioning. */
 	basetable->gpt_smhead |= 1;
 	return (0);
 }
 
 static void
 g_part_mbr_dumpconf(struct g_part_table *basetable, struct g_part_entry *baseentry,
     struct sbuf *sb, const char *indent)
 {
 	struct g_part_mbr_entry *entry;
 	struct g_part_mbr_table *table;
 	uint32_t dsn;
 
 	table = (struct g_part_mbr_table *)basetable;
 	entry = (struct g_part_mbr_entry *)baseentry;
 	if (indent == NULL) {
 		/* conftxt: libdisk compatibility */
 		sbuf_printf(sb, " xs MBR xt %u", entry->ent.dp_typ);
 	} else if (entry != NULL) {
 		/* confxml: partition entry information */
 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
 		    entry->ent.dp_typ);
 		if (entry->ent.dp_flag & 0x80)
 			sbuf_printf(sb, "%s<attrib>active</attrib>\n", indent);
 		dsn = le32dec(table->mbr + DOSDSNOFF);
 		sbuf_printf(sb, "%s<efimedia>HD(%d,MBR,%#08x,%#jx,%#jx)", indent,
 		    entry->base.gpe_index, dsn, (intmax_t)entry->base.gpe_start,
 		    (intmax_t)(entry->base.gpe_end - entry->base.gpe_start + 1));
 		sbuf_printf(sb, "</efimedia>\n");
 	} else {
 		/* confxml: scheme information */
 	}
 }
 
 static int
 g_part_mbr_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)
 {
 	struct g_part_mbr_entry *entry;
 
 	/* Allow dumping to a FreeBSD partition or Linux swap partition only. */
 	entry = (struct g_part_mbr_entry *)baseentry;
 	return ((entry->ent.dp_typ == DOSPTYP_386BSD ||
 	    entry->ent.dp_typ == DOSPTYP_LINSWP) ? 1 : 0);
 }
 
 static int
 g_part_mbr_modify(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_mbr_entry *entry;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	entry = (struct g_part_mbr_entry *)baseentry;
 	if (gpp->gpp_parms & G_PART_PARM_TYPE)
 		return (mbr_parse_type(gpp->gpp_type, &entry->ent.dp_typ));
 	return (0);
 }
 
 static int
 g_part_mbr_resize(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_mbr_entry *entry;
 	struct g_provider *pp;
 	uint32_t size;
 
 	if (baseentry == NULL) {
 		pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 		basetable->gpt_last = MIN(pp->mediasize / pp->sectorsize,
 		    UINT32_MAX) - 1;
 		return (0);
 	}
 	size = gpp->gpp_size;
 	if (mbr_align(basetable, NULL, &size) != 0)
 		return (EINVAL);
 	/* XXX: prevent unexpected shrinking. */
 	pp = baseentry->gpe_pp;
 	if ((g_debugflags & 0x10) == 0 && size < gpp->gpp_size &&
 	    pp->mediasize / pp->sectorsize > size)
 		return (EBUSY);
 	entry = (struct g_part_mbr_entry *)baseentry;
 	baseentry->gpe_end = baseentry->gpe_start + size - 1;
 	entry->ent.dp_size = size;
 	mbr_set_chs(basetable, baseentry->gpe_end, &entry->ent.dp_ecyl,
 	    &entry->ent.dp_ehd, &entry->ent.dp_esect);
 	return (0);
 }
 
 static const char *
 g_part_mbr_name(struct g_part_table *table, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 
 	snprintf(buf, bufsz, "s%d", baseentry->gpe_index);
 	return (buf);
 }
 
 static int
 g_part_mbr_probe(struct g_part_table *table, struct g_consumer *cp)
 {
 	char psn[8];
 	struct g_provider *pp;
 	u_char *buf, *p;
 	int error, index, res, sum;
 	uint16_t magic;
 
 	pp = cp->provider;
 
 	/* Sanity-check the provider. */
 	if (pp->sectorsize < MBRSIZE || pp->mediasize < pp->sectorsize)
 		return (ENOSPC);
 	if (pp->sectorsize > 4096)
 		return (ENXIO);
 
 	/* We don't nest under an MBR (see EBR instead). */
 	error = g_getattr("PART::scheme", cp, &psn);
 	if (error == 0 && strcmp(psn, g_part_mbr_scheme.name) == 0)
 		return (ELOOP);
 
 	/* Check that there's a MBR. */
 	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 
 	/* We goto out on mismatch. */
 	res = ENXIO;
 
 	magic = le16dec(buf + DOSMAGICOFFSET);
 	if (magic != DOSMAGIC)
 		goto out;
 
 	for (index = 0; index < NDOSPART; index++) {
 		p = buf + DOSPARTOFF + index * DOSPARTSIZE;
 		if (p[0] != 0 && p[0] != 0x80)
 			goto out;
 	}
 
 	/*
 	 * If the partition table does not consist of all zeroes,
 	 * assume we have a MBR. If it's all zeroes, we could have
 	 * a boot sector. For example, a boot sector that doesn't
 	 * have boot code -- common on non-i386 hardware. In that
 	 * case we check if we have a possible BPB. If so, then we
 	 * assume we have a boot sector instead.
 	 */
 	sum = 0;
 	for (index = 0; index < NDOSPART * DOSPARTSIZE; index++)
 		sum += buf[DOSPARTOFF + index];
 	if (sum != 0 || !mbr_probe_bpb(buf + 0x0b))
 		res = G_PART_PROBE_PRI_NORM;
 
  out:
 	g_free(buf);
 	return (res);
 }
 
 static int
 g_part_mbr_read(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct dos_partition ent;
 	struct g_provider *pp;
 	struct g_part_mbr_table *table;
 	struct g_part_mbr_entry *entry;
 	u_char *buf, *p;
 	off_t chs, msize, first;
 	u_int sectors, heads;
 	int error, index;
 
 	pp = cp->provider;
 	table = (struct g_part_mbr_table *)basetable;
 	first = basetable->gpt_sectors;
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 
 	buf = g_read_data(cp, 0L, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 
 	bcopy(buf, table->mbr, sizeof(table->mbr));
 	for (index = NDOSPART - 1; index >= 0; index--) {
 		p = buf + DOSPARTOFF + index * DOSPARTSIZE;
 		ent.dp_flag = p[0];
 		ent.dp_shd = p[1];
 		ent.dp_ssect = p[2];
 		ent.dp_scyl = p[3];
 		ent.dp_typ = p[4];
 		ent.dp_ehd = p[5];
 		ent.dp_esect = p[6];
 		ent.dp_ecyl = p[7];
 		ent.dp_start = le32dec(p + 8);
 		ent.dp_size = le32dec(p + 12);
 		if (ent.dp_typ == 0 || ent.dp_typ == DOSPTYP_PMBR)
 			continue;
 		if (ent.dp_start == 0 || ent.dp_size == 0)
 			continue;
 		sectors = ent.dp_esect & 0x3f;
 		if (sectors > basetable->gpt_sectors &&
 		    !basetable->gpt_fixgeom) {
 			g_part_geometry_heads(msize, sectors, &chs, &heads);
 			if (chs != 0) {
 				basetable->gpt_sectors = sectors;
 				basetable->gpt_heads = heads;
 			}
 		}
 		if (ent.dp_start < first)
 			first = ent.dp_start;
 		entry = (struct g_part_mbr_entry *)g_part_new_entry(basetable,
 		    index + 1, ent.dp_start, ent.dp_start + ent.dp_size - 1);
 		entry->ent = ent;
 	}
 
 	basetable->gpt_entries = NDOSPART;
 	basetable->gpt_first = basetable->gpt_sectors;
 	basetable->gpt_last = msize - 1;
 
 	if (first < basetable->gpt_first)
 		basetable->gpt_first = 1;
 
 	g_free(buf);
 	return (0);
 }
 
 static int
 g_part_mbr_setunset(struct g_part_table *table, struct g_part_entry *baseentry,
     const char *attrib, unsigned int set)
 {
 	struct g_part_entry *iter;
 	struct g_part_mbr_entry *entry;
 	int changed;
 
 	if (baseentry == NULL)
 		return (ENODEV);
 	if (strcasecmp(attrib, "active") != 0)
 		return (EINVAL);
 
 	/* Only one entry can have the active attribute. */
 	LIST_FOREACH(iter, &table->gpt_entry, gpe_entry) {
 		if (iter->gpe_deleted)
 			continue;
 		changed = 0;
 		entry = (struct g_part_mbr_entry *)iter;
 		if (iter == baseentry) {
 			if (set && (entry->ent.dp_flag & 0x80) == 0) {
 				entry->ent.dp_flag |= 0x80;
 				changed = 1;
 			} else if (!set && (entry->ent.dp_flag & 0x80)) {
 				entry->ent.dp_flag &= ~0x80;
 				changed = 1;
 			}
 		} else {
 			if (set && (entry->ent.dp_flag & 0x80)) {
 				entry->ent.dp_flag &= ~0x80;
 				changed = 1;
 			}
 		}
 		if (changed && !iter->gpe_created)
 			iter->gpe_modified = 1;
 	}
 	return (0);
 }
 
 static const char *
 g_part_mbr_type(struct g_part_table *basetable, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 	struct g_part_mbr_entry *entry;
 	int i;
 
 	entry = (struct g_part_mbr_entry *)baseentry;
 	for (i = 0; i < nitems(mbr_alias_match); i++) {
 		if (mbr_alias_match[i].typ == entry->ent.dp_typ)
 			return (g_part_alias_name(mbr_alias_match[i].alias));
 	}
 	snprintf(buf, bufsz, "!%d", entry->ent.dp_typ);
 	return (buf);
 }
 
 static int
 g_part_mbr_write(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_part_entry *baseentry;
 	struct g_part_mbr_entry *entry;
 	struct g_part_mbr_table *table;
 	u_char *p;
 	int error, index;
 
 	table = (struct g_part_mbr_table *)basetable;
 	baseentry = LIST_FIRST(&basetable->gpt_entry);
 	for (index = 1; index <= basetable->gpt_entries; index++) {
 		p = table->mbr + DOSPARTOFF + (index - 1) * DOSPARTSIZE;
 		entry = (baseentry != NULL && index == baseentry->gpe_index)
 		    ? (struct g_part_mbr_entry *)baseentry : NULL;
 		if (entry != NULL && !baseentry->gpe_deleted) {
 			p[0] = entry->ent.dp_flag;
 			p[1] = entry->ent.dp_shd;
 			p[2] = entry->ent.dp_ssect;
 			p[3] = entry->ent.dp_scyl;
 			p[4] = entry->ent.dp_typ;
 			p[5] = entry->ent.dp_ehd;
 			p[6] = entry->ent.dp_esect;
 			p[7] = entry->ent.dp_ecyl;
 			le32enc(p + 8, entry->ent.dp_start);
 			le32enc(p + 12, entry->ent.dp_size);
 		} else
 			bzero(p, DOSPARTSIZE);
 
 		if (entry != NULL)
 			baseentry = LIST_NEXT(baseentry, gpe_entry);
 	}
 
 	error = g_write_data(cp, 0, table->mbr, cp->provider->sectorsize);
 	return (error);
 }
Index: stable/11/sys/geom/part/g_part_pc98.c
===================================================================
--- stable/11/sys/geom/part/g_part_pc98.c	(revision 332639)
+++ stable/11/sys/geom/part/g_part_pc98.c	(revision 332640)
@@ -1,617 +1,618 @@
 /*-
  * Copyright (c) 2008 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/diskpc98.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 #include <geom/part/g_part.h>
 
 #include "g_part_if.h"
 
 FEATURE(geom_part_pc98, "GEOM partitioning class for PC-9800 disk partitions");
 
 #define	SECSIZE		512
 #define	MENUSIZE	7168
 #define	BOOTSIZE	8192
 
 struct g_part_pc98_table {
 	struct g_part_table	base;
 	u_char		boot[SECSIZE];
 	u_char		table[SECSIZE];
 	u_char		menu[MENUSIZE];
 };
 
 struct g_part_pc98_entry {
 	struct g_part_entry	base;
 	struct pc98_partition ent;
 };
 
 static int g_part_pc98_add(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_pc98_bootcode(struct g_part_table *, struct g_part_parms *);
 static int g_part_pc98_create(struct g_part_table *, struct g_part_parms *);
 static int g_part_pc98_destroy(struct g_part_table *, struct g_part_parms *);
 static void g_part_pc98_dumpconf(struct g_part_table *, struct g_part_entry *,
     struct sbuf *, const char *);
 static int g_part_pc98_dumpto(struct g_part_table *, struct g_part_entry *);
 static int g_part_pc98_modify(struct g_part_table *, struct g_part_entry *,  
     struct g_part_parms *);
 static const char *g_part_pc98_name(struct g_part_table *, struct g_part_entry *,
     char *, size_t);
 static int g_part_pc98_probe(struct g_part_table *, struct g_consumer *);
 static int g_part_pc98_read(struct g_part_table *, struct g_consumer *);
 static int g_part_pc98_setunset(struct g_part_table *, struct g_part_entry *,
     const char *, unsigned int);
 static const char *g_part_pc98_type(struct g_part_table *,
     struct g_part_entry *, char *, size_t);
 static int g_part_pc98_write(struct g_part_table *, struct g_consumer *);
 static int g_part_pc98_resize(struct g_part_table *, struct g_part_entry *,  
     struct g_part_parms *);
 
 static kobj_method_t g_part_pc98_methods[] = {
 	KOBJMETHOD(g_part_add,		g_part_pc98_add),
 	KOBJMETHOD(g_part_bootcode,	g_part_pc98_bootcode),
 	KOBJMETHOD(g_part_create,	g_part_pc98_create),
 	KOBJMETHOD(g_part_destroy,	g_part_pc98_destroy),
 	KOBJMETHOD(g_part_dumpconf,	g_part_pc98_dumpconf),
 	KOBJMETHOD(g_part_dumpto,	g_part_pc98_dumpto),
 	KOBJMETHOD(g_part_modify,	g_part_pc98_modify),
 	KOBJMETHOD(g_part_resize,	g_part_pc98_resize),
 	KOBJMETHOD(g_part_name,		g_part_pc98_name),
 	KOBJMETHOD(g_part_probe,	g_part_pc98_probe),
 	KOBJMETHOD(g_part_read,		g_part_pc98_read),
 	KOBJMETHOD(g_part_setunset,	g_part_pc98_setunset),
 	KOBJMETHOD(g_part_type,		g_part_pc98_type),
 	KOBJMETHOD(g_part_write,	g_part_pc98_write),
 	{ 0, 0 }
 };
 
 static struct g_part_scheme g_part_pc98_scheme = {
 	"PC98",
 	g_part_pc98_methods,
 	sizeof(struct g_part_pc98_table),
 	.gps_entrysz = sizeof(struct g_part_pc98_entry),
 	.gps_minent = PC98_NPARTS,
 	.gps_maxent = PC98_NPARTS,
 	.gps_bootcodesz = BOOTSIZE,
 };
 G_PART_SCHEME_DECLARE(g_part_pc98);
+MODULE_VERSION(geom_part_pc98, 0);
 
 static int
 pc98_parse_type(const char *type, u_char *dp_mid, u_char *dp_sid)
 {
 	const char *alias;
 	char *endp;
 	long lt;
 
 	if (type[0] == '!') {
 		lt = strtol(type + 1, &endp, 0);
 		if (type[1] == '\0' || *endp != '\0' || lt <= 0 ||
 		    lt >= 65536)
 			return (EINVAL);
 		/* Make sure the active and bootable flags aren't set. */
 		if (lt & ((PC98_SID_ACTIVE << 8) | PC98_MID_BOOTABLE))
 			return (ENOATTR);
 		*dp_mid = (*dp_mid & PC98_MID_BOOTABLE) | (u_char)lt;
 		*dp_sid = (*dp_sid & PC98_SID_ACTIVE) | (u_char)(lt >> 8);
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD);
 	if (!strcasecmp(type, alias)) {
 		*dp_mid = (*dp_mid & PC98_MID_BOOTABLE) | PC98_MID_386BSD;
 		*dp_sid = (*dp_sid & PC98_SID_ACTIVE) | PC98_SID_386BSD;
 		return (0);
 	}
 	return (EINVAL);
 }
 
 static int
 pc98_set_slicename(const char *label, u_char *dp_name)
 {
 	int len;
 
 	len = strlen(label);
 	if (len > sizeof(((struct pc98_partition *)NULL)->dp_name))
 		return (EINVAL);
 	bzero(dp_name, sizeof(((struct pc98_partition *)NULL)->dp_name));
 	strncpy(dp_name, label, len);
 
 	return (0);
 }
 
 static void
 pc98_set_chs(struct g_part_table *table, uint32_t lba, u_short *cylp,
     u_char *hdp, u_char *secp)
 {
 	uint32_t cyl, hd, sec;
 
 	sec = lba % table->gpt_sectors + 1;
 	lba /= table->gpt_sectors;
 	hd = lba % table->gpt_heads;
 	lba /= table->gpt_heads;
 	cyl = lba;
 
 	*cylp = htole16(cyl);
 	*hdp = hd;
 	*secp = sec;
 }
 
 static int
 pc98_align(struct g_part_table *basetable, uint32_t *start, uint32_t *size)
 {
 	uint32_t cyl;
 
 	cyl = basetable->gpt_heads * basetable->gpt_sectors;
 	if (*size < cyl)
 		return (EINVAL);
 	if (start != NULL && (*start % cyl)) {
 		*size += (*start % cyl) - cyl;
 		*start -= (*start % cyl) - cyl;
 	}
 	if (*size % cyl)
 		*size -= (*size % cyl);
 	if (*size < cyl)
 		return (EINVAL);
 	return (0);
 }
 
 static int
 g_part_pc98_add(struct g_part_table *basetable, struct g_part_entry *baseentry,
     struct g_part_parms *gpp)
 {
 	struct g_part_pc98_entry *entry;
 	uint32_t start, size;
 	int error;
 
 	entry = (struct g_part_pc98_entry *)baseentry;
 	start = gpp->gpp_start;
 	size = gpp->gpp_size;
 	if (pc98_align(basetable, &start, &size) != 0)
 		return (EINVAL);
 	if (baseentry->gpe_deleted)
 		bzero(&entry->ent, sizeof(entry->ent));
 	else
 		entry->ent.dp_mid = entry->ent.dp_sid = 0;
 
 	KASSERT(baseentry->gpe_start <= start, (__func__));
 	KASSERT(baseentry->gpe_end >= start + size - 1, (__func__));
 	baseentry->gpe_start = start;
 	baseentry->gpe_end = start + size - 1;
 	pc98_set_chs(basetable, baseentry->gpe_start, &entry->ent.dp_scyl,
 	    &entry->ent.dp_shd, &entry->ent.dp_ssect);
 	pc98_set_chs(basetable, baseentry->gpe_end, &entry->ent.dp_ecyl,
 	    &entry->ent.dp_ehd, &entry->ent.dp_esect);
 
 	error = pc98_parse_type(gpp->gpp_type, &entry->ent.dp_mid,
 	    &entry->ent.dp_sid);
 	if (error)
 		return (error);
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (pc98_set_slicename(gpp->gpp_label, entry->ent.dp_name));
 
 	return (0);
 }
 
 static int
 g_part_pc98_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_part_pc98_table *table;
 	const u_char *codeptr;
 
 	if (gpp->gpp_codesize != BOOTSIZE)
 		return (EINVAL);
 
 	table = (struct g_part_pc98_table *)basetable;
 	codeptr = gpp->gpp_codeptr;
 	bcopy(codeptr, table->boot, SECSIZE);
 	bcopy(codeptr + SECSIZE*2, table->menu, MENUSIZE);
 
 	return (0);
 }
 
 static int
 g_part_pc98_create(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_provider *pp;
 	struct g_part_pc98_table *table;
 
 	pp = gpp->gpp_provider;
 	if (pp->sectorsize < SECSIZE || pp->mediasize < BOOTSIZE)
 		return (ENOSPC);
 	if (pp->sectorsize > SECSIZE)
 		return (ENXIO);
 
 	basetable->gpt_first = basetable->gpt_heads * basetable->gpt_sectors;
 	basetable->gpt_last = MIN(pp->mediasize / SECSIZE, UINT32_MAX) - 1;
 
 	table = (struct g_part_pc98_table *)basetable;
 	le16enc(table->boot + PC98_MAGICOFS, PC98_MAGIC);
 	return (0);
 }
 
 static int
 g_part_pc98_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 
 	/* Wipe the first two sectors to clear the partitioning. */
 	basetable->gpt_smhead |= 3;
 	return (0);
 }
 
 static void
 g_part_pc98_dumpconf(struct g_part_table *table,
     struct g_part_entry *baseentry, struct sbuf *sb, const char *indent)
 {
 	struct g_part_pc98_entry *entry;
 	char name[sizeof(entry->ent.dp_name) + 1];
 	u_int type;
 
 	entry = (struct g_part_pc98_entry *)baseentry;
 	if (entry == NULL) {
 		/* confxml: scheme information */
 		return;
 	}
 
 	type = entry->ent.dp_mid + (entry->ent.dp_sid << 8);
 	strncpy(name, entry->ent.dp_name, sizeof(name) - 1);
 	name[sizeof(name) - 1] = '\0';
 	if (indent == NULL) {
 		/* conftxt: libdisk compatibility */
 		sbuf_printf(sb, " xs PC98 xt %u sn %s", type, name);
 	} else {
 		/* confxml: partition entry information */
 		sbuf_printf(sb, "%s<label>", indent);
 		g_conf_printf_escaped(sb, "%s", name);
 		sbuf_printf(sb, "</label>\n");
 		if (entry->ent.dp_mid & PC98_MID_BOOTABLE)
 			sbuf_printf(sb, "%s<attrib>bootable</attrib>\n",
 			    indent);
 		if (entry->ent.dp_sid & PC98_SID_ACTIVE)
 			sbuf_printf(sb, "%s<attrib>active</attrib>\n", indent);
 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
 		    type & 0x7f7f);
 	}
 }
 
 static int
 g_part_pc98_dumpto(struct g_part_table *table, struct g_part_entry *baseentry)  
 {
 	struct g_part_pc98_entry *entry;
 
 	/* Allow dumping to a FreeBSD partition only. */
 	entry = (struct g_part_pc98_entry *)baseentry;
 	return (((entry->ent.dp_mid & PC98_MID_MASK) == PC98_MID_386BSD &&
 	    (entry->ent.dp_sid & PC98_SID_MASK) == PC98_SID_386BSD) ? 1 : 0);
 }
 
 static int
 g_part_pc98_modify(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_pc98_entry *entry;
 	int error;
 
 	entry = (struct g_part_pc98_entry *)baseentry;
 
 	if (gpp->gpp_parms & G_PART_PARM_TYPE) {
 		error = pc98_parse_type(gpp->gpp_type, &entry->ent.dp_mid,
 		    &entry->ent.dp_sid);
 		if (error)
 			return (error);
 	}
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (pc98_set_slicename(gpp->gpp_label, entry->ent.dp_name));
 
 	return (0);
 }
 
 static int
 g_part_pc98_resize(struct g_part_table *basetable,
     struct g_part_entry *baseentry, struct g_part_parms *gpp)
 {
 	struct g_part_pc98_entry *entry;
 	struct g_provider *pp;
 	uint32_t size;
 
 	if (baseentry == NULL) {
 		pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 		basetable->gpt_last = MIN(pp->mediasize / SECSIZE,
 		    UINT32_MAX) - 1;
 		return (0);
 	}
 	size = gpp->gpp_size;
 	if (pc98_align(basetable, NULL, &size) != 0)
 		return (EINVAL);
 	/* XXX: prevent unexpected shrinking. */
 	pp = baseentry->gpe_pp;
 	if ((g_debugflags & 0x10) == 0 && size < gpp->gpp_size &&
 	    pp->mediasize / pp->sectorsize > size)
 		return (EBUSY);
 	entry = (struct g_part_pc98_entry *)baseentry;
 	baseentry->gpe_end = baseentry->gpe_start + size - 1;
 	pc98_set_chs(basetable, baseentry->gpe_end, &entry->ent.dp_ecyl,
 	    &entry->ent.dp_ehd, &entry->ent.dp_esect);
 
 	return (0);
 }
 
 static const char *
 g_part_pc98_name(struct g_part_table *table, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 
 	snprintf(buf, bufsz, "s%d", baseentry->gpe_index);
 	return (buf);
 }
 
 static int
 g_part_pc98_probe(struct g_part_table *table, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	u_char *buf, *p;
 	int error, index, res, sum;
 	uint16_t magic, ecyl, scyl;
 
 	pp = cp->provider;
 
 	/* Sanity-check the provider. */
 	if (pp->sectorsize < SECSIZE || pp->mediasize < BOOTSIZE)
 		return (ENOSPC);
 	if (pp->sectorsize > SECSIZE)
 		return (ENXIO);
 
 	/* Check that there's a PC98 partition table. */
 	buf = g_read_data(cp, 0L, 2 * SECSIZE, &error);
 	if (buf == NULL)
 		return (error);
 
 	/* We goto out on mismatch. */
 	res = ENXIO;
 
 	magic = le16dec(buf + PC98_MAGICOFS);
 	if (magic != PC98_MAGIC)
 		goto out;
 
 	sum = 0;
 	for (index = SECSIZE; index < 2 * SECSIZE; index++)
 		sum += buf[index];
 	if (sum == 0) {
 		res = G_PART_PROBE_PRI_LOW;
 		goto out;
 	}
 
 	for (index = 0; index < PC98_NPARTS; index++) {
 		p = buf + SECSIZE + index * PC98_PARTSIZE;
 		if (p[0] == 0 || p[1] == 0)	/* !dp_mid || !dp_sid */
 			continue;
 		scyl = le16dec(p + 10);
 		ecyl = le16dec(p + 14);
 		if (scyl == 0 || ecyl == 0)
 			goto out;
 		if (p[8] == p[12] &&		/* dp_ssect == dp_esect */
 		    p[9] == p[13] &&		/* dp_shd == dp_ehd */
 		    scyl == ecyl)
 			goto out;
 	}
 
 	res = G_PART_PROBE_PRI_HIGH;
 
  out:
 	g_free(buf);
 	return (res);
 }
 
 static int
 g_part_pc98_read(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct pc98_partition ent;
 	struct g_provider *pp;
 	struct g_part_pc98_table *table;
 	struct g_part_pc98_entry *entry;
 	u_char *buf, *p;
 	off_t msize;
 	off_t start, end;
 	u_int cyl;
 	int error, index;
 
 	pp = cp->provider;
 	table = (struct g_part_pc98_table *)basetable;
 	msize = MIN(pp->mediasize / SECSIZE, UINT32_MAX);
 
 	buf = g_read_data(cp, 0L, BOOTSIZE, &error);
 	if (buf == NULL)
 		return (error);
 
 	cyl = basetable->gpt_heads * basetable->gpt_sectors;
 
 	bcopy(buf, table->boot, sizeof(table->boot));
 	bcopy(buf + SECSIZE, table->table, sizeof(table->table));
 	bcopy(buf + SECSIZE*2, table->menu, sizeof(table->menu));
 
 	for (index = PC98_NPARTS - 1; index >= 0; index--) {
 		p = buf + SECSIZE + index * PC98_PARTSIZE;
 		ent.dp_mid = p[0];
 		ent.dp_sid = p[1];
 		ent.dp_dum1 = p[2];
 		ent.dp_dum2 = p[3];
 		ent.dp_ipl_sct = p[4];
 		ent.dp_ipl_head = p[5];
 		ent.dp_ipl_cyl = le16dec(p + 6);
 		ent.dp_ssect = p[8];
 		ent.dp_shd = p[9];
 		ent.dp_scyl = le16dec(p + 10);
 		ent.dp_esect = p[12];
 		ent.dp_ehd = p[13];
 		ent.dp_ecyl = le16dec(p + 14);
 		bcopy(p + 16, ent.dp_name, sizeof(ent.dp_name));
 		if (ent.dp_sid == 0)
 			continue;
 
 		start = ent.dp_scyl * cyl;
 		end = (ent.dp_ecyl + 1) * cyl - 1;
 		entry = (struct g_part_pc98_entry *)g_part_new_entry(basetable,
 		    index + 1, start, end);
 		entry->ent = ent;
 	}
 
 	basetable->gpt_entries = PC98_NPARTS;
 	basetable->gpt_first = cyl;
 	basetable->gpt_last = msize - 1;
 
 	g_free(buf);
 	return (0);
 }
 
 static int
 g_part_pc98_setunset(struct g_part_table *table, struct g_part_entry *baseentry,
     const char *attrib, unsigned int set)
 {
 	struct g_part_entry *iter;
 	struct g_part_pc98_entry *entry;
 	int changed, mid, sid;
 
 	if (baseentry == NULL)
 		return (ENODEV);
 
 	mid = sid = 0;
 	if (strcasecmp(attrib, "active") == 0)
 		sid = 1;
 	else if (strcasecmp(attrib, "bootable") == 0)
 		mid = 1;
 	if (mid == 0 && sid == 0)
 		return (EINVAL);
 
 	LIST_FOREACH(iter, &table->gpt_entry, gpe_entry) {
 		if (iter->gpe_deleted)
 			continue;
 		if (iter != baseentry)
 			continue;
 		changed = 0;
 		entry = (struct g_part_pc98_entry *)iter;
 		if (set) {
 			if (mid && !(entry->ent.dp_mid & PC98_MID_BOOTABLE)) {
 				entry->ent.dp_mid |= PC98_MID_BOOTABLE;
 				changed = 1;
 			}
 			if (sid && !(entry->ent.dp_sid & PC98_SID_ACTIVE)) {
 				entry->ent.dp_sid |= PC98_SID_ACTIVE;
 				changed = 1;
 			}
 		} else {
 			if (mid && (entry->ent.dp_mid & PC98_MID_BOOTABLE)) {
 				entry->ent.dp_mid &= ~PC98_MID_BOOTABLE;
 				changed = 1;
 			}
 			if (sid && (entry->ent.dp_sid & PC98_SID_ACTIVE)) {
 				entry->ent.dp_sid &= ~PC98_SID_ACTIVE;
 				changed = 1;
 			}
 		}
 		if (changed && !iter->gpe_created)
 			iter->gpe_modified = 1;
 	}
 	return (0);
 }
 
 static const char *
 g_part_pc98_type(struct g_part_table *basetable, struct g_part_entry *baseentry, 
     char *buf, size_t bufsz)
 {
 	struct g_part_pc98_entry *entry;
 	u_int type;
 
 	entry = (struct g_part_pc98_entry *)baseentry;
 	type = (entry->ent.dp_mid & PC98_MID_MASK) |
 	    ((entry->ent.dp_sid & PC98_SID_MASK) << 8);
 	if (type == (PC98_MID_386BSD | (PC98_SID_386BSD << 8)))
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD));
 	snprintf(buf, bufsz, "!%d", type);
 	return (buf);
 }
 
 static int
 g_part_pc98_write(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_part_entry *baseentry;
 	struct g_part_pc98_entry *entry;
 	struct g_part_pc98_table *table;
 	u_char *p;
 	int error, index;
 
 	table = (struct g_part_pc98_table *)basetable;
 	baseentry = LIST_FIRST(&basetable->gpt_entry);
 	for (index = 1; index <= basetable->gpt_entries; index++) {
 		p = table->table + (index - 1) * PC98_PARTSIZE;
 		entry = (baseentry != NULL && index == baseentry->gpe_index)
 		    ? (struct g_part_pc98_entry *)baseentry : NULL;
 		if (entry != NULL && !baseentry->gpe_deleted) {
 			p[0] = entry->ent.dp_mid;
 			p[1] = entry->ent.dp_sid;
 			p[2] = entry->ent.dp_dum1;
 			p[3] = entry->ent.dp_dum2;
 			p[4] = entry->ent.dp_ipl_sct;
 			p[5] = entry->ent.dp_ipl_head;
 			le16enc(p + 6, entry->ent.dp_ipl_cyl);
 			p[8] = entry->ent.dp_ssect;
 			p[9] = entry->ent.dp_shd;
 			le16enc(p + 10, entry->ent.dp_scyl);
 			p[12] = entry->ent.dp_esect;
 			p[13] = entry->ent.dp_ehd;
 			le16enc(p + 14, entry->ent.dp_ecyl);
 			bcopy(entry->ent.dp_name, p + 16,
 			    sizeof(entry->ent.dp_name));
 		} else
 			bzero(p, PC98_PARTSIZE);
 
 		if (entry != NULL)
 			baseentry = LIST_NEXT(baseentry, gpe_entry);
 	}
 
 	error = g_write_data(cp, 0, table->boot, SECSIZE);
 	if (!error)
 		error = g_write_data(cp, SECSIZE, table->table, SECSIZE);
 	if (!error)
 		error = g_write_data(cp, SECSIZE*2, table->menu, MENUSIZE);
 	return (error);
 }
Index: stable/11/sys/geom/part/g_part_vtoc8.c
===================================================================
--- stable/11/sys/geom/part/g_part_vtoc8.c	(revision 332639)
+++ stable/11/sys/geom/part/g_part_vtoc8.c	(revision 332640)
@@ -1,599 +1,600 @@
 /*-
  * Copyright (c) 2008 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/vtoc.h>
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 #include <geom/part/g_part.h>
 
 #include "g_part_if.h"
 
 FEATURE(geom_part_vtoc8, "GEOM partitioning class for SMI VTOC8 disk labels");
 
 struct g_part_vtoc8_table {
 	struct g_part_table	base;
 	struct vtoc8		vtoc;
 	uint32_t		secpercyl;
 };
 
 static int g_part_vtoc8_add(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static int g_part_vtoc8_create(struct g_part_table *, struct g_part_parms *);
 static int g_part_vtoc8_destroy(struct g_part_table *, struct g_part_parms *);
 static void g_part_vtoc8_dumpconf(struct g_part_table *,
     struct g_part_entry *, struct sbuf *, const char *);
 static int g_part_vtoc8_dumpto(struct g_part_table *, struct g_part_entry *);
 static int g_part_vtoc8_modify(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 static const char *g_part_vtoc8_name(struct g_part_table *,
     struct g_part_entry *, char *, size_t);
 static int g_part_vtoc8_probe(struct g_part_table *, struct g_consumer *);
 static int g_part_vtoc8_read(struct g_part_table *, struct g_consumer *);
 static const char *g_part_vtoc8_type(struct g_part_table *,
     struct g_part_entry *, char *, size_t);
 static int g_part_vtoc8_write(struct g_part_table *, struct g_consumer *);
 static int g_part_vtoc8_resize(struct g_part_table *, struct g_part_entry *,
     struct g_part_parms *);
 
 static kobj_method_t g_part_vtoc8_methods[] = {
 	KOBJMETHOD(g_part_add,		g_part_vtoc8_add),
 	KOBJMETHOD(g_part_create,	g_part_vtoc8_create),
 	KOBJMETHOD(g_part_destroy,	g_part_vtoc8_destroy),
 	KOBJMETHOD(g_part_dumpconf,	g_part_vtoc8_dumpconf),
 	KOBJMETHOD(g_part_dumpto,	g_part_vtoc8_dumpto),
 	KOBJMETHOD(g_part_modify,	g_part_vtoc8_modify),
 	KOBJMETHOD(g_part_resize,	g_part_vtoc8_resize),
 	KOBJMETHOD(g_part_name,		g_part_vtoc8_name),
 	KOBJMETHOD(g_part_probe,	g_part_vtoc8_probe),
 	KOBJMETHOD(g_part_read,		g_part_vtoc8_read),
 	KOBJMETHOD(g_part_type,		g_part_vtoc8_type),
 	KOBJMETHOD(g_part_write,	g_part_vtoc8_write),
 	{ 0, 0 }
 };
 
 static struct g_part_scheme g_part_vtoc8_scheme = {
 	"VTOC8",
 	g_part_vtoc8_methods,
 	sizeof(struct g_part_vtoc8_table),
 	.gps_entrysz = sizeof(struct g_part_entry),
 	.gps_minent = VTOC8_NPARTS,
 	.gps_maxent = VTOC8_NPARTS,
 };
 G_PART_SCHEME_DECLARE(g_part_vtoc8);
+MODULE_VERSION(geom_part_vtoc8, 0);
 
 static int
 vtoc8_parse_type(const char *type, uint16_t *tag)
 {
 	const char *alias;
 	char *endp;
 	long lt;
 
 	if (type[0] == '!') {
 		lt = strtol(type + 1, &endp, 0);
 		if (type[1] == '\0' || *endp != '\0' || lt <= 0 ||
 		    lt >= 65536)
 			return (EINVAL);
 		*tag = (uint16_t)lt;
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_NANDFS);
 	if (!strcasecmp(type, alias)) {
 		*tag = VTOC_TAG_FREEBSD_NANDFS;
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_SWAP);
 	if (!strcasecmp(type, alias)) {
 		*tag = VTOC_TAG_FREEBSD_SWAP;
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_UFS);
 	if (!strcasecmp(type, alias)) {
 		*tag = VTOC_TAG_FREEBSD_UFS;
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_VINUM);
 	if (!strcasecmp(type, alias)) {
 		*tag = VTOC_TAG_FREEBSD_VINUM;
 		return (0);
 	}
 	alias = g_part_alias_name(G_PART_ALIAS_FREEBSD_ZFS);
 	if (!strcasecmp(type, alias)) {
 		*tag = VTOC_TAG_FREEBSD_ZFS;
 		return (0);
 	}
 	return (EINVAL);
 }
 
 static int
 vtoc8_align(struct g_part_vtoc8_table *table, uint64_t *start, uint64_t *size)
 {
 
 	if (*size < table->secpercyl)
 		return (EINVAL);
 	if (start != NULL && (*start % table->secpercyl)) {
 		*size += (*start % table->secpercyl) - table->secpercyl;
 		*start -= (*start % table->secpercyl) - table->secpercyl;
 	}
 	if (*size % table->secpercyl)
 		*size -= (*size % table->secpercyl);
 	if (*size < table->secpercyl)
 		return (EINVAL);
 	return (0);
 }
 
 static int
 g_part_vtoc8_add(struct g_part_table *basetable, struct g_part_entry *entry,
     struct g_part_parms *gpp)
 {
 	struct g_part_vtoc8_table *table;
 	int error, index;
 	uint64_t start, size;
 	uint16_t tag;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	error = vtoc8_parse_type(gpp->gpp_type, &tag);
 	if (error)
 		return (error);
 
 	table = (struct g_part_vtoc8_table *)basetable;
 	index = entry->gpe_index - 1;
 	start = gpp->gpp_start;
 	size = gpp->gpp_size;
 	if (vtoc8_align(table, &start, &size) != 0)
 		return (EINVAL);
 
 	KASSERT(entry->gpe_start <= start, (__func__));
 	KASSERT(entry->gpe_end >= start + size - 1, (__func__));
 	entry->gpe_start = start;
 	entry->gpe_end = start + size - 1;
 
 	be16enc(&table->vtoc.part[index].tag, tag);
 	be16enc(&table->vtoc.part[index].flag, 0);
 	be32enc(&table->vtoc.timestamp[index], 0);
 	be32enc(&table->vtoc.map[index].cyl, start / table->secpercyl);
 	be32enc(&table->vtoc.map[index].nblks, size);
 	return (0);
 }
 
 static int
 g_part_vtoc8_create(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 	struct g_provider *pp;
 	struct g_part_entry *entry;
 	struct g_part_vtoc8_table *table;
 	uint64_t msize;
 	uint32_t acyls, ncyls, pcyls;
 
 	pp = gpp->gpp_provider;
 
 	if (pp->sectorsize < sizeof(struct vtoc8))
 		return (ENOSPC);
 	if (pp->sectorsize > sizeof(struct vtoc8))
 		return (ENXIO);
 
 	table = (struct g_part_vtoc8_table *)basetable;
 
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 	table->secpercyl = basetable->gpt_sectors * basetable->gpt_heads;
 	pcyls = msize / table->secpercyl;
 	acyls = 2;
 	ncyls = pcyls - acyls;
 	msize = ncyls * table->secpercyl;
 
 	sprintf(table->vtoc.ascii, "FreeBSD%lldM cyl %u alt %u hd %u sec %u",
 	    (long long)(msize / 2048), ncyls, acyls, basetable->gpt_heads,
 	    basetable->gpt_sectors);
 	be32enc(&table->vtoc.version, VTOC_VERSION);
 	be16enc(&table->vtoc.nparts, VTOC8_NPARTS);
 	be32enc(&table->vtoc.sanity, VTOC_SANITY);
 	be16enc(&table->vtoc.rpm, 3600);
 	be16enc(&table->vtoc.physcyls, pcyls);
 	be16enc(&table->vtoc.ncyls, ncyls);
 	be16enc(&table->vtoc.altcyls, acyls);
 	be16enc(&table->vtoc.nheads, basetable->gpt_heads);
 	be16enc(&table->vtoc.nsecs, basetable->gpt_sectors);
 	be16enc(&table->vtoc.magic, VTOC_MAGIC);
 
 	basetable->gpt_first = 0;
 	basetable->gpt_last = msize - 1;
 	basetable->gpt_isleaf = 1;
 
 	entry = g_part_new_entry(basetable, VTOC_RAW_PART + 1,
 	    basetable->gpt_first, basetable->gpt_last);
 	entry->gpe_internal = 1;
 	be16enc(&table->vtoc.part[VTOC_RAW_PART].tag, VTOC_TAG_BACKUP);
 	be32enc(&table->vtoc.map[VTOC_RAW_PART].nblks, msize);
 	return (0);
 }
 
 static int
 g_part_vtoc8_destroy(struct g_part_table *basetable, struct g_part_parms *gpp)
 {
 
 	/* Wipe the first sector to clear the partitioning. */
 	basetable->gpt_smhead |= 1;
 	return (0);
 }
 
 static void
 g_part_vtoc8_dumpconf(struct g_part_table *basetable,
     struct g_part_entry *entry, struct sbuf *sb, const char *indent)
 {
 	struct g_part_vtoc8_table *table;
 
 	table = (struct g_part_vtoc8_table *)basetable;
 	if (indent == NULL) {
 		/* conftxt: libdisk compatibility */
 		sbuf_printf(sb, " xs SUN sc %u hd %u alt %u",
 		    be16dec(&table->vtoc.nsecs), be16dec(&table->vtoc.nheads),
 		    be16dec(&table->vtoc.altcyls));
 	} else if (entry != NULL) {
 		/* confxml: partition entry information */
 		sbuf_printf(sb, "%s<rawtype>%u</rawtype>\n", indent,
 		    be16dec(&table->vtoc.part[entry->gpe_index - 1].tag));
 	} else {
 		/* confxml: scheme information */
 	}
 }
 
 static int
 g_part_vtoc8_dumpto(struct g_part_table *basetable,
     struct g_part_entry *entry)
 {
 	struct g_part_vtoc8_table *table;
 	uint16_t tag;
 
 	/*
 	 * Allow dumping to a swap partition or a partition that
 	 * has no type.
 	 */
 	table = (struct g_part_vtoc8_table *)basetable;
 	tag = be16dec(&table->vtoc.part[entry->gpe_index - 1].tag);
 	return ((tag == 0 || tag == VTOC_TAG_FREEBSD_SWAP ||
 	    tag == VTOC_TAG_SWAP) ? 1 : 0);
 }
 
 static int
 g_part_vtoc8_modify(struct g_part_table *basetable,
     struct g_part_entry *entry, struct g_part_parms *gpp)
 {
 	struct g_part_vtoc8_table *table;
 	int error;
 	uint16_t tag;
 
 	if (gpp->gpp_parms & G_PART_PARM_LABEL)
 		return (EINVAL);
 
 	table = (struct g_part_vtoc8_table *)basetable;
 	if (gpp->gpp_parms & G_PART_PARM_TYPE) {
 		error = vtoc8_parse_type(gpp->gpp_type, &tag);
 		if (error)
 			return(error);
 
 		be16enc(&table->vtoc.part[entry->gpe_index - 1].tag, tag);
 	}
 	return (0);
 }
 
 static int
 vtoc8_set_rawsize(struct g_part_table *basetable, struct g_provider *pp)
 {
 	struct g_part_vtoc8_table *table;
 	struct g_part_entry *baseentry;
 	off_t msize;
 	uint32_t acyls, ncyls, pcyls;
 
 	table = (struct g_part_vtoc8_table *)basetable;
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 	pcyls = msize / table->secpercyl;
 	if (pcyls > UINT16_MAX)
 		return (ERANGE);
 	acyls = be16dec(&table->vtoc.altcyls);
 	ncyls = pcyls - acyls;
 	msize = ncyls * table->secpercyl;
 	basetable->gpt_last = msize - 1;
 
 	bzero(table->vtoc.ascii, sizeof(table->vtoc.ascii));
 	sprintf(table->vtoc.ascii, "FreeBSD%lldM cyl %u alt %u hd %u sec %u",
 	    (long long)(msize / 2048), ncyls, acyls, basetable->gpt_heads,
 	    basetable->gpt_sectors);
 	be16enc(&table->vtoc.physcyls, pcyls);
 	be16enc(&table->vtoc.ncyls, ncyls);
 	be32enc(&table->vtoc.map[VTOC_RAW_PART].nblks, msize);
 	if (be32dec(&table->vtoc.sanity) == VTOC_SANITY)
 		be16enc(&table->vtoc.part[VTOC_RAW_PART].tag, VTOC_TAG_BACKUP);
 	LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) {
 		if (baseentry->gpe_index == VTOC_RAW_PART + 1) {
 			baseentry->gpe_end = basetable->gpt_last;
 			return (0);
 		}
 	}
 	return (ENXIO);
 }
 
 static int
 g_part_vtoc8_resize(struct g_part_table *basetable,
     struct g_part_entry *entry, struct g_part_parms *gpp)
 {
 	struct g_part_vtoc8_table *table;
 	struct g_provider *pp;
 	uint64_t size;
 
 	if (entry == NULL) {
 		pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider;
 		return (vtoc8_set_rawsize(basetable, pp));
 	}
 	table = (struct g_part_vtoc8_table *)basetable;
 	size = gpp->gpp_size;
 	if (vtoc8_align(table, NULL, &size) != 0)
 		return (EINVAL);
 	/* XXX: prevent unexpected shrinking. */
 	pp = entry->gpe_pp;
 	if ((g_debugflags & 0x10) == 0 && size < gpp->gpp_size &&
 	    pp->mediasize / pp->sectorsize > size)
 		return (EBUSY);
 	entry->gpe_end = entry->gpe_start + size - 1;
 	be32enc(&table->vtoc.map[entry->gpe_index - 1].nblks, size);
 
 	return (0);
 }
 
 static const char *
 g_part_vtoc8_name(struct g_part_table *table, struct g_part_entry *baseentry,
     char *buf, size_t bufsz)
 {
 
 	snprintf(buf, bufsz, "%c", 'a' + baseentry->gpe_index - 1);
 	return (buf);
 }
 
 static int
 g_part_vtoc8_probe(struct g_part_table *table, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error, ofs, res;
 	uint16_t cksum, magic;
 
 	pp = cp->provider;
 
 	/* Sanity-check the provider. */
 	if (pp->sectorsize != sizeof(struct vtoc8))
 		return (ENOSPC);
 
 	/* Check that there's a disklabel. */
 	buf = g_read_data(cp, 0, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 
 	res = ENXIO;	/* Assume mismatch */
 
 	/* Check the magic */
 	magic = be16dec(buf + offsetof(struct vtoc8, magic));
 	if (magic != VTOC_MAGIC)
 		goto out;
 
 	/* Check the sum */
 	cksum = 0;
 	for (ofs = 0; ofs < sizeof(struct vtoc8); ofs += 2)
 		cksum ^= be16dec(buf + ofs);
 	if (cksum != 0)
 		goto out;
 
 	res = G_PART_PROBE_PRI_NORM;
 
  out:
 	g_free(buf);
 	return (res);
 }
 
 static int
 g_part_vtoc8_read(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct g_part_vtoc8_table *table;
 	struct g_part_entry *entry;
 	u_char *buf;
 	off_t chs, msize;
 	uint64_t offset, size;
 	u_int cyls, heads, sectors;
 	int error, index, withtags;
 	uint16_t tag;
 
 	pp = cp->provider;
 	buf = g_read_data(cp, 0, pp->sectorsize, &error);
 	if (buf == NULL)
 		return (error);
 
 	table = (struct g_part_vtoc8_table *)basetable;
 	bcopy(buf, &table->vtoc, sizeof(table->vtoc));
 	g_free(buf);
 
 	msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX);
 	sectors = be16dec(&table->vtoc.nsecs);
 	if (sectors < 1)
 		goto invalid_label;
 	if (sectors != basetable->gpt_sectors && !basetable->gpt_fixgeom) {
 		g_part_geometry_heads(msize, sectors, &chs, &heads);
 		if (chs != 0) {
 			basetable->gpt_sectors = sectors;
 			basetable->gpt_heads = heads;
 		}
 	}
 
 	heads = be16dec(&table->vtoc.nheads);
 	if (heads < 1)
 		goto invalid_label;
 	if (heads != basetable->gpt_heads && !basetable->gpt_fixgeom)
 		basetable->gpt_heads = heads;
 	/*
 	 * Except for ATA disks > 32GB, Solaris uses the native geometry
 	 * as reported by the target for the labels while da(4) typically
 	 * uses a synthetic one so we don't complain too loudly if these
 	 * geometries don't match.
 	 */
 	if (bootverbose && (sectors != basetable->gpt_sectors ||
 	    heads != basetable->gpt_heads))
 		printf("GEOM: %s: geometry does not match VTOC8 label "
 		    "(label: %uh,%us GEOM: %uh,%us).\n", pp->name, heads,
 		    sectors, basetable->gpt_heads, basetable->gpt_sectors);
 
 	table->secpercyl = heads * sectors;
 	cyls = be16dec(&table->vtoc.ncyls);
 	chs = cyls * table->secpercyl;
 	if (chs < 1 || chs > msize)
 		goto invalid_label;
 
 	basetable->gpt_first = 0;
 	basetable->gpt_last = chs - 1;
 	basetable->gpt_isleaf = 1;
 
 	withtags = (be32dec(&table->vtoc.sanity) == VTOC_SANITY) ? 1 : 0;
 	if (!withtags) {
 		printf("GEOM: %s: adding VTOC8 information.\n", pp->name);
 		be32enc(&table->vtoc.version, VTOC_VERSION);
 		bzero(&table->vtoc.volume, VTOC_VOLUME_LEN);
 		be16enc(&table->vtoc.nparts, VTOC8_NPARTS);
 		bzero(&table->vtoc.part, sizeof(table->vtoc.part));
 		be32enc(&table->vtoc.sanity, VTOC_SANITY);
 	}
 
 	basetable->gpt_entries = be16dec(&table->vtoc.nparts);
 	if (basetable->gpt_entries < g_part_vtoc8_scheme.gps_minent ||
 	    basetable->gpt_entries > g_part_vtoc8_scheme.gps_maxent)
 		goto invalid_label;
 
 	for (index = basetable->gpt_entries - 1; index >= 0; index--) {
 		offset = be32dec(&table->vtoc.map[index].cyl) *
 		    table->secpercyl;
 		size = be32dec(&table->vtoc.map[index].nblks);
 		if (size == 0)
 			continue;
 		if (withtags)
 			tag = be16dec(&table->vtoc.part[index].tag);
 		else
 			tag = (index == VTOC_RAW_PART)
 			    ? VTOC_TAG_BACKUP
 			    : VTOC_TAG_UNASSIGNED;
 
 		if (index == VTOC_RAW_PART && tag != VTOC_TAG_BACKUP)
 			continue;
 		if (index != VTOC_RAW_PART && tag == VTOC_TAG_BACKUP)
 			continue;
 		entry = g_part_new_entry(basetable, index + 1, offset,
 		    offset + size - 1);
 		if (tag == VTOC_TAG_BACKUP)
 			entry->gpe_internal = 1;
 
 		if (!withtags)
 			be16enc(&table->vtoc.part[index].tag, tag);
 	}
 
 	return (0);
 
  invalid_label:
 	printf("GEOM: %s: invalid VTOC8 label.\n", pp->name);
 	return (EINVAL);
 }
 
 static const char *
 g_part_vtoc8_type(struct g_part_table *basetable, struct g_part_entry *entry,
     char *buf, size_t bufsz)
 {
 	struct g_part_vtoc8_table *table;
 	uint16_t tag;
 
 	table = (struct g_part_vtoc8_table *)basetable;
 	tag = be16dec(&table->vtoc.part[entry->gpe_index - 1].tag);
 	if (tag == VTOC_TAG_FREEBSD_NANDFS)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_NANDFS));
 	if (tag == VTOC_TAG_FREEBSD_SWAP)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_SWAP));
 	if (tag == VTOC_TAG_FREEBSD_UFS)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_UFS));
 	if (tag == VTOC_TAG_FREEBSD_VINUM)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_VINUM));
 	if (tag == VTOC_TAG_FREEBSD_ZFS)
 		return (g_part_alias_name(G_PART_ALIAS_FREEBSD_ZFS));
 	snprintf(buf, bufsz, "!%d", tag);
 	return (buf);
 }
 
 static int
 g_part_vtoc8_write(struct g_part_table *basetable, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct g_part_entry *entry;
 	struct g_part_vtoc8_table *table;
 	uint16_t sum;
 	u_char *p;
 	int error, index, match, offset;
 
 	pp = cp->provider;
 	table = (struct g_part_vtoc8_table *)basetable;
 	entry = LIST_FIRST(&basetable->gpt_entry);
 	for (index = 0; index < basetable->gpt_entries; index++) {
 		match = (entry != NULL && index == entry->gpe_index - 1)
 		    ? 1 : 0;
 		if (match) {
 			if (entry->gpe_deleted) {
 				be16enc(&table->vtoc.part[index].tag, 0);
 				be16enc(&table->vtoc.part[index].flag, 0);
 				be32enc(&table->vtoc.map[index].cyl, 0);
 				be32enc(&table->vtoc.map[index].nblks, 0);
 			}
 			entry = LIST_NEXT(entry, gpe_entry);
 		}
 	}
 
 	/* Calculate checksum. */
 	sum = 0;
 	p = (void *)&table->vtoc;
 	for (offset = 0; offset < sizeof(table->vtoc) - 2; offset += 2)
 		sum ^= be16dec(p + offset);
 	be16enc(&table->vtoc.cksum, sum);
 
 	error = g_write_data(cp, 0, p, pp->sectorsize);
 	return (error);
 }
Index: stable/11/sys/geom/raid3/g_raid3.c
===================================================================
--- stable/11/sys/geom/raid3/g_raid3.c	(revision 332639)
+++ stable/11/sys/geom/raid3/g_raid3.c	(revision 332640)
@@ -1,3583 +1,3584 @@
 /*-
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/eventhandler.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/sched.h>
 #include <geom/raid3/g_raid3.h>
 
 FEATURE(geom_raid3, "GEOM RAID-3 functionality");
 
 static MALLOC_DEFINE(M_RAID3, "raid3_data", "GEOM_RAID3 Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, raid3, CTLFLAG_RW, 0,
     "GEOM_RAID3 stuff");
 u_int g_raid3_debug = 0;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, debug, CTLFLAG_RWTUN, &g_raid3_debug, 0,
     "Debug level");
 static u_int g_raid3_timeout = 4;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_raid3_timeout,
     0, "Time to wait on all raid3 components");
 static u_int g_raid3_idletime = 5;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, idletime, CTLFLAG_RWTUN,
     &g_raid3_idletime, 0, "Mark components as clean when idling");
 static u_int g_raid3_disconnect_on_failure = 1;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
     &g_raid3_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
 static u_int g_raid3_syncreqs = 2;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
     &g_raid3_syncreqs, 0, "Parallel synchronization I/O requests.");
 static u_int g_raid3_use_malloc = 0;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, use_malloc, CTLFLAG_RDTUN,
     &g_raid3_use_malloc, 0, "Use malloc(9) instead of uma(9).");
 
 static u_int g_raid3_n64k = 50;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n64k, CTLFLAG_RDTUN, &g_raid3_n64k, 0,
     "Maximum number of 64kB allocations");
 static u_int g_raid3_n16k = 200;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n16k, CTLFLAG_RDTUN, &g_raid3_n16k, 0,
     "Maximum number of 16kB allocations");
 static u_int g_raid3_n4k = 1200;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n4k, CTLFLAG_RDTUN, &g_raid3_n4k, 0,
     "Maximum number of 4kB allocations");
 
 static SYSCTL_NODE(_kern_geom_raid3, OID_AUTO, stat, CTLFLAG_RW, 0,
     "GEOM_RAID3 statistics");
 static u_int g_raid3_parity_mismatch = 0;
 SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, parity_mismatch, CTLFLAG_RD,
     &g_raid3_parity_mismatch, 0, "Number of failures in VERIFY mode");
 
 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
 	G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
 	G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
 } while (0)
 
 static eventhandler_tag g_raid3_post_sync = NULL;
 static int g_raid3_shutdown = 0;
 
 static int g_raid3_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static g_taste_t g_raid3_taste;
 static void g_raid3_init(struct g_class *mp);
 static void g_raid3_fini(struct g_class *mp);
 
 struct g_class g_raid3_class = {
 	.name = G_RAID3_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_raid3_config,
 	.taste = g_raid3_taste,
 	.destroy_geom = g_raid3_destroy_geom,
 	.init = g_raid3_init,
 	.fini = g_raid3_fini
 };
 
 
 static void g_raid3_destroy_provider(struct g_raid3_softc *sc);
 static int g_raid3_update_disk(struct g_raid3_disk *disk, u_int state);
 static void g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force);
 static void g_raid3_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 static void g_raid3_sync_stop(struct g_raid3_softc *sc, int type);
 static int g_raid3_register_request(struct bio *pbp);
 static void g_raid3_sync_release(struct g_raid3_softc *sc);
 
 
 static const char *
 g_raid3_disk_state2str(int state)
 {
 
 	switch (state) {
 	case G_RAID3_DISK_STATE_NODISK:
 		return ("NODISK");
 	case G_RAID3_DISK_STATE_NONE:
 		return ("NONE");
 	case G_RAID3_DISK_STATE_NEW:
 		return ("NEW");
 	case G_RAID3_DISK_STATE_ACTIVE:
 		return ("ACTIVE");
 	case G_RAID3_DISK_STATE_STALE:
 		return ("STALE");
 	case G_RAID3_DISK_STATE_SYNCHRONIZING:
 		return ("SYNCHRONIZING");
 	case G_RAID3_DISK_STATE_DISCONNECTED:
 		return ("DISCONNECTED");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_raid3_device_state2str(int state)
 {
 
 	switch (state) {
 	case G_RAID3_DEVICE_STATE_STARTING:
 		return ("STARTING");
 	case G_RAID3_DEVICE_STATE_DEGRADED:
 		return ("DEGRADED");
 	case G_RAID3_DEVICE_STATE_COMPLETE:
 		return ("COMPLETE");
 	default:
 		return ("INVALID");
 	}
 }
 
 const char *
 g_raid3_get_diskname(struct g_raid3_disk *disk)
 {
 
 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
 		return ("[unknown]");
 	return (disk->d_name);
 }
 
 static void *
 g_raid3_alloc(struct g_raid3_softc *sc, size_t size, int flags)
 {
 	void *ptr;
 	enum g_raid3_zones zone;
 
 	if (g_raid3_use_malloc ||
 	    (zone = g_raid3_zone(size)) == G_RAID3_NUM_ZONES)
 		ptr = malloc(size, M_RAID3, flags);
 	else {
 		ptr = uma_zalloc_arg(sc->sc_zones[zone].sz_zone,
 		   &sc->sc_zones[zone], flags);
 		sc->sc_zones[zone].sz_requested++;
 		if (ptr == NULL)
 			sc->sc_zones[zone].sz_failed++;
 	}
 	return (ptr);
 }
 
 static void
 g_raid3_free(struct g_raid3_softc *sc, void *ptr, size_t size)
 {
 	enum g_raid3_zones zone;
 
 	if (g_raid3_use_malloc ||
 	    (zone = g_raid3_zone(size)) == G_RAID3_NUM_ZONES)
 		free(ptr, M_RAID3);
 	else {
 		uma_zfree_arg(sc->sc_zones[zone].sz_zone,
 		    ptr, &sc->sc_zones[zone]);
 	}
 }
 
 static int
 g_raid3_uma_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct g_raid3_zone *sz = arg;
 
 	if (sz->sz_max > 0 && sz->sz_inuse == sz->sz_max)
 		return (ENOMEM);
 	sz->sz_inuse++;
 	return (0);
 }
 
 static void
 g_raid3_uma_dtor(void *mem, int size, void *arg)
 {
 	struct g_raid3_zone *sz = arg;
 
 	sz->sz_inuse--;
 }
 
 #define	g_raid3_xor(src, dst, size)					\
 	_g_raid3_xor((uint64_t *)(src),					\
 	    (uint64_t *)(dst), (size_t)size)
 static void
 _g_raid3_xor(uint64_t *src, uint64_t *dst, size_t size)
 {
 
 	KASSERT((size % 128) == 0, ("Invalid size: %zu.", size));
 	for (; size > 0; size -= 128) {
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 		*dst++ ^= (*src++);
 	}
 }
 
 static int
 g_raid3_is_zero(struct bio *bp)
 {
 	static const uint64_t zeros[] = {
 	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 	};
 	u_char *addr;
 	ssize_t size;
 
 	size = bp->bio_length;
 	addr = (u_char *)bp->bio_data;
 	for (; size > 0; size -= sizeof(zeros), addr += sizeof(zeros)) {
 		if (bcmp(addr, zeros, sizeof(zeros)) != 0)
 			return (0);
 	}
 	return (1);
 }
 
 /*
  * --- Events handling functions ---
  * Events in geom_raid3 are used to maintain disks and device status
  * from one thread to simplify locking.
  */
 static void
 g_raid3_event_free(struct g_raid3_event *ep)
 {
 
 	free(ep, M_RAID3);
 }
 
 int
 g_raid3_event_send(void *arg, int state, int flags)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct g_raid3_event *ep;
 	int error;
 
 	ep = malloc(sizeof(*ep), M_RAID3, M_WAITOK);
 	G_RAID3_DEBUG(4, "%s: Sending event %p.", __func__, ep);
 	if ((flags & G_RAID3_EVENT_DEVICE) != 0) {
 		disk = NULL;
 		sc = arg;
 	} else {
 		disk = arg;
 		sc = disk->d_softc;
 	}
 	ep->e_disk = disk;
 	ep->e_state = state;
 	ep->e_flags = flags;
 	ep->e_error = 0;
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 	G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	if ((flags & G_RAID3_EVENT_DONTWAIT) != 0)
 		return (0);
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
 	sx_xunlock(&sc->sc_lock);
 	while ((ep->e_flags & G_RAID3_EVENT_DONE) == 0) {
 		mtx_lock(&sc->sc_events_mtx);
 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "r3:event",
 		    hz * 5);
 	}
 	error = ep->e_error;
 	g_raid3_event_free(ep);
 	sx_xlock(&sc->sc_lock);
 	return (error);
 }
 
 static struct g_raid3_event *
 g_raid3_event_get(struct g_raid3_softc *sc)
 {
 	struct g_raid3_event *ep;
 
 	mtx_lock(&sc->sc_events_mtx);
 	ep = TAILQ_FIRST(&sc->sc_events);
 	mtx_unlock(&sc->sc_events_mtx);
 	return (ep);
 }
 
 static void
 g_raid3_event_remove(struct g_raid3_softc *sc, struct g_raid3_event *ep)
 {
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 static void
 g_raid3_event_cancel(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_event *ep, *tmpep;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
 		if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0)
 			continue;
 		if (ep->e_disk != disk)
 			continue;
 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 		if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0)
 			g_raid3_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			wakeup(ep);
 		}
 	}
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 /*
  * Return the number of disks in the given state.
  * If state is equal to -1, count all connected disks.
  */
 u_int
 g_raid3_ndisks(struct g_raid3_softc *sc, int state)
 {
 	struct g_raid3_disk *disk;
 	u_int n, ndisks;
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	for (n = ndisks = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 			continue;
 		if (state == -1 || disk->d_state == state)
 			ndisks++;
 	}
 	return (ndisks);
 }
 
 static u_int
 g_raid3_nrequests(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 	struct bio *bp;
 	u_int nreqs = 0;
 
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 		if (bp->bio_from == cp)
 			nreqs++;
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	return (nreqs);
 }
 
 static int
 g_raid3_is_busy(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 
 	if (cp->index > 0) {
 		G_RAID3_DEBUG(2,
 		    "I/O requests for %s exist, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	if (g_raid3_nrequests(sc, cp) > 0) {
 		G_RAID3_DEBUG(2,
 		    "I/O requests for %s in queue, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	return (0);
 }
 
 static void
 g_raid3_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	cp = arg;
 	G_RAID3_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_raid3_kill_consumer(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	int retaste_wait;
 
 	g_topology_assert();
 
 	cp->private = NULL;
 	if (g_raid3_is_busy(sc, cp))
 		return;
 	G_RAID3_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
 	pp = cp->provider;
 	retaste_wait = 0;
 	if (cp->acw == 1) {
 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
 			retaste_wait = 1;
 	}
 	G_RAID3_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
 	    -cp->acw, -cp->ace, 0);
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	if (retaste_wait) {
 		/*
 		 * After retaste event was send (inside g_access()), we can send
 		 * event to detach and destroy consumer.
 		 * A class, which has consumer to the given provider connected
 		 * will not receive retaste event for the provider.
 		 * This is the way how I ignore retaste events when I close
 		 * consumers opened for write: I detach and destroy consumer
 		 * after retaste event is sent.
 		 */
 		g_post_event(g_raid3_destroy_consumer, cp, M_WAITOK, NULL);
 		return;
 	}
 	G_RAID3_DEBUG(1, "Consumer %s destroyed.", pp->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static int
 g_raid3_connect_disk(struct g_raid3_disk *disk, struct g_provider *pp)
 {
 	struct g_consumer *cp;
 	int error;
 
 	g_topology_assert_not();
 	KASSERT(disk->d_consumer == NULL,
 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
 
 	g_topology_lock();
 	cp = g_new_consumer(disk->d_softc->sc_geom);
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		return (error);
 	}
 	error = g_access(cp, 1, 1, 1);
 		g_topology_unlock();
 	if (error != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		G_RAID3_DEBUG(0, "Cannot open consumer %s (error=%d).",
 		    pp->name, error);
 		return (error);
 	}
 	disk->d_consumer = cp;
 	disk->d_consumer->private = disk;
 	disk->d_consumer->index = 0;
 	G_RAID3_DEBUG(2, "Disk %s connected.", g_raid3_get_diskname(disk));
 	return (0);
 }
 
 static void
 g_raid3_disconnect_consumer(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 
 	g_topology_assert();
 
 	if (cp == NULL)
 		return;
 	if (cp->provider != NULL)
 		g_raid3_kill_consumer(sc, cp);
 	else
 		g_destroy_consumer(cp);
 }
 
 /*
  * Initialize disk. This means allocate memory, create consumer, attach it
  * to the provider and open access (r1w1e1) to it.
  */
 static struct g_raid3_disk *
 g_raid3_init_disk(struct g_raid3_softc *sc, struct g_provider *pp,
     struct g_raid3_metadata *md, int *errorp)
 {
 	struct g_raid3_disk *disk;
 	int error;
 
 	disk = &sc->sc_disks[md->md_no];
 	error = g_raid3_connect_disk(disk, pp);
 	if (error != 0) {
 		if (errorp != NULL)
 			*errorp = error;
 		return (NULL);
 	}
 	disk->d_state = G_RAID3_DISK_STATE_NONE;
 	disk->d_flags = md->md_dflags;
 	if (md->md_provider[0] != '\0')
 		disk->d_flags |= G_RAID3_DISK_FLAG_HARDCODED;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_sync.ds_offset = md->md_sync_offset;
 	disk->d_sync.ds_offset_done = md->md_sync_offset;
 	disk->d_genid = md->md_genid;
 	disk->d_sync.ds_syncid = md->md_syncid;
 	if (errorp != NULL)
 		*errorp = 0;
 	return (disk);
 }
 
 static void
 g_raid3_destroy_disk(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 		return;
 	g_raid3_event_cancel(disk);
 	switch (disk->d_state) {
 	case G_RAID3_DISK_STATE_SYNCHRONIZING:
 		if (sc->sc_syncdisk != NULL)
 			g_raid3_sync_stop(sc, 1);
 		/* FALLTHROUGH */
 	case G_RAID3_DISK_STATE_NEW:
 	case G_RAID3_DISK_STATE_STALE:
 	case G_RAID3_DISK_STATE_ACTIVE:
 		g_topology_lock();
 		g_raid3_disconnect_consumer(sc, disk->d_consumer);
 		g_topology_unlock();
 		disk->d_consumer = NULL;
 		break;
 	default:
 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 	}
 	disk->d_state = G_RAID3_DISK_STATE_NODISK;
 }
 
 static void
 g_raid3_destroy_device(struct g_raid3_softc *sc)
 {
 	struct g_raid3_event *ep;
 	struct g_raid3_disk *disk;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	u_int n;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	gp = sc->sc_geom;
 	if (sc->sc_provider != NULL)
 		g_raid3_destroy_provider(sc);
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state != G_RAID3_DISK_STATE_NODISK) {
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 			g_raid3_update_metadata(disk);
 			g_raid3_destroy_disk(disk);
 		}
 	}
 	while ((ep = g_raid3_event_get(sc)) != NULL) {
 		g_raid3_event_remove(sc, ep);
 		if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0)
 			g_raid3_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			ep->e_flags |= G_RAID3_EVENT_DONE;
 			G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, ep);
 			mtx_lock(&sc->sc_events_mtx);
 			wakeup(ep);
 			mtx_unlock(&sc->sc_events_mtx);
 		}
 	}
 	callout_drain(&sc->sc_callout);
 	cp = LIST_FIRST(&sc->sc_sync.ds_geom->consumer);
 	g_topology_lock();
 	if (cp != NULL)
 		g_raid3_disconnect_consumer(sc, cp);
 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
 	G_RAID3_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	g_topology_unlock();
 	if (!g_raid3_use_malloc) {
 		uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone);
 		uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone);
 		uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone);
 	}
 	mtx_destroy(&sc->sc_queue_mtx);
 	mtx_destroy(&sc->sc_events_mtx);
 	sx_xunlock(&sc->sc_lock);
 	sx_destroy(&sc->sc_lock);
 }
 
 static void
 g_raid3_orphan(struct g_consumer *cp)
 {
 	struct g_raid3_disk *disk;
 
 	g_topology_assert();
 
 	disk = cp->private;
 	if (disk == NULL)
 		return;
 	disk->d_softc->sc_bump_id = G_RAID3_BUMP_SYNCID;
 	g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED,
 	    G_RAID3_EVENT_DONTWAIT);
 }
 
 static int
 g_raid3_write_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md)
 {
 	struct g_raid3_softc *sc;
 	struct g_consumer *cp;
 	off_t offset, length;
 	u_char *sector;
 	int error = 0;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	cp = disk->d_consumer;
 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	length = cp->provider->sectorsize;
 	offset = cp->provider->mediasize - length;
 	sector = malloc((size_t)length, M_RAID3, M_WAITOK | M_ZERO);
 	if (md != NULL)
 		raid3_metadata_encode(md, sector);
 	error = g_write_data(cp, offset, sector, length);
 	free(sector, M_RAID3);
 	if (error != 0) {
 		if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) {
 			G_RAID3_DEBUG(0, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_raid3_get_diskname(disk), sc->sc_name, error);
 			disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN;
 		} else {
 			G_RAID3_DEBUG(1, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_raid3_get_diskname(disk), sc->sc_name, error);
 		}
 		if (g_raid3_disconnect_on_failure &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 			g_raid3_event_send(disk,
 			    G_RAID3_DISK_STATE_DISCONNECTED,
 			    G_RAID3_EVENT_DONTWAIT);
 		}
 	}
 	return (error);
 }
 
 int
 g_raid3_clear_metadata(struct g_raid3_disk *disk)
 {
 	int error;
 
 	g_topology_assert_not();
 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
 
 	error = g_raid3_write_metadata(disk, NULL);
 	if (error == 0) {
 		G_RAID3_DEBUG(2, "Metadata on %s cleared.",
 		    g_raid3_get_diskname(disk));
 	} else {
 		G_RAID3_DEBUG(0,
 		    "Cannot clear metadata on disk %s (error=%d).",
 		    g_raid3_get_diskname(disk), error);
 	}
 	return (error);
 }
 
 void
 g_raid3_fill_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md)
 {
 	struct g_raid3_softc *sc;
 	struct g_provider *pp;
 
 	sc = disk->d_softc;
 	strlcpy(md->md_magic, G_RAID3_MAGIC, sizeof(md->md_magic));
 	md->md_version = G_RAID3_VERSION;
 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
 	md->md_id = sc->sc_id;
 	md->md_all = sc->sc_ndisks;
 	md->md_genid = sc->sc_genid;
 	md->md_mediasize = sc->sc_mediasize;
 	md->md_sectorsize = sc->sc_sectorsize;
 	md->md_mflags = (sc->sc_flags & G_RAID3_DEVICE_FLAG_MASK);
 	md->md_no = disk->d_no;
 	md->md_syncid = disk->d_sync.ds_syncid;
 	md->md_dflags = (disk->d_flags & G_RAID3_DISK_FLAG_MASK);
 	if (disk->d_state != G_RAID3_DISK_STATE_SYNCHRONIZING)
 		md->md_sync_offset = 0;
 	else {
 		md->md_sync_offset =
 		    disk->d_sync.ds_offset_done / (sc->sc_ndisks - 1);
 	}
 	if (disk->d_consumer != NULL && disk->d_consumer->provider != NULL)
 		pp = disk->d_consumer->provider;
 	else
 		pp = NULL;
 	if ((disk->d_flags & G_RAID3_DISK_FLAG_HARDCODED) != 0 && pp != NULL)
 		strlcpy(md->md_provider, pp->name, sizeof(md->md_provider));
 	else
 		bzero(md->md_provider, sizeof(md->md_provider));
 	if (pp != NULL)
 		md->md_provsize = pp->mediasize;
 	else
 		md->md_provsize = 0;
 }
 
 void
 g_raid3_update_metadata(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_metadata md;
 	int error;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	g_raid3_fill_metadata(disk, &md);
 	error = g_raid3_write_metadata(disk, &md);
 	if (error == 0) {
 		G_RAID3_DEBUG(2, "Metadata on %s updated.",
 		    g_raid3_get_diskname(disk));
 	} else {
 		G_RAID3_DEBUG(0,
 		    "Cannot update metadata on disk %s (error=%d).",
 		    g_raid3_get_diskname(disk), error);
 	}
 }
 
 static void
 g_raid3_bump_syncid(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	u_int n;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_syncid++;
 	G_RAID3_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
 	    sc->sc_syncid);
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			disk->d_sync.ds_syncid = sc->sc_syncid;
 			g_raid3_update_metadata(disk);
 		}
 	}
 }
 
 static void
 g_raid3_bump_genid(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	u_int n;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_genid++;
 	G_RAID3_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
 	    sc->sc_genid);
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			disk->d_genid = sc->sc_genid;
 			g_raid3_update_metadata(disk);
 		}
 	}
 }
 
 static int
 g_raid3_idle(struct g_raid3_softc *sc, int acw)
 {
 	struct g_raid3_disk *disk;
 	u_int i;
 	int timeout;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (sc->sc_provider == NULL)
 		return (0);
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return (0);
 	if (sc->sc_idle)
 		return (0);
 	if (sc->sc_writes > 0)
 		return (0);
 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
 		timeout = g_raid3_idletime - (time_uptime - sc->sc_last_write);
 		if (!g_raid3_shutdown && timeout > 0)
 			return (timeout);
 	}
 	sc->sc_idle = 1;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
 			continue;
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		g_raid3_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_raid3_unidle(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	u_int i;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	sc->sc_idle = 0;
 	sc->sc_last_write = time_uptime;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
 			continue;
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 		g_raid3_update_metadata(disk);
 	}
 }
 
 /*
  * Treat bio_driver1 field in parent bio as list head and field bio_caller1
  * in child bio as pointer to the next element on the list.
  */
 #define	G_RAID3_HEAD_BIO(pbp)	(pbp)->bio_driver1
 
 #define	G_RAID3_NEXT_BIO(cbp)	(cbp)->bio_caller1
 
 #define	G_RAID3_FOREACH_BIO(pbp, bp)					\
 	for ((bp) = G_RAID3_HEAD_BIO(pbp); (bp) != NULL;		\
 	    (bp) = G_RAID3_NEXT_BIO(bp))
 
 #define	G_RAID3_FOREACH_SAFE_BIO(pbp, bp, tmpbp)			\
 	for ((bp) = G_RAID3_HEAD_BIO(pbp);				\
 	    (bp) != NULL && ((tmpbp) = G_RAID3_NEXT_BIO(bp), 1);	\
 	    (bp) = (tmpbp))
 
 static void
 g_raid3_init_bio(struct bio *pbp)
 {
 
 	G_RAID3_HEAD_BIO(pbp) = NULL;
 }
 
 static void
 g_raid3_remove_bio(struct bio *cbp)
 {
 	struct bio *pbp, *bp;
 
 	pbp = cbp->bio_parent;
 	if (G_RAID3_HEAD_BIO(pbp) == cbp)
 		G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp);
 	else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == cbp) {
 				G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp);
 				break;
 			}
 		}
 	}
 	G_RAID3_NEXT_BIO(cbp) = NULL;
 }
 
 static void
 g_raid3_replace_bio(struct bio *sbp, struct bio *dbp)
 {
 	struct bio *pbp, *bp;
 
 	g_raid3_remove_bio(sbp);
 	pbp = dbp->bio_parent;
 	G_RAID3_NEXT_BIO(sbp) = G_RAID3_NEXT_BIO(dbp);
 	if (G_RAID3_HEAD_BIO(pbp) == dbp)
 		G_RAID3_HEAD_BIO(pbp) = sbp;
 	else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == dbp) {
 				G_RAID3_NEXT_BIO(bp) = sbp;
 				break;
 			}
 		}
 	}
 	G_RAID3_NEXT_BIO(dbp) = NULL;
 }
 
 static void
 g_raid3_destroy_bio(struct g_raid3_softc *sc, struct bio *cbp)
 {
 	struct bio *bp, *pbp;
 	size_t size;
 
 	pbp = cbp->bio_parent;
 	pbp->bio_children--;
 	KASSERT(cbp->bio_data != NULL, ("NULL bio_data"));
 	size = pbp->bio_length / (sc->sc_ndisks - 1);
 	g_raid3_free(sc, cbp->bio_data, size);
 	if (G_RAID3_HEAD_BIO(pbp) == cbp) {
 		G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp);
 		G_RAID3_NEXT_BIO(cbp) = NULL;
 		g_destroy_bio(cbp);
 	} else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == cbp)
 				break;
 		}
 		if (bp != NULL) {
 			KASSERT(G_RAID3_NEXT_BIO(bp) != NULL,
 			    ("NULL bp->bio_driver1"));
 			G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp);
 			G_RAID3_NEXT_BIO(cbp) = NULL;
 		}
 		g_destroy_bio(cbp);
 	}
 }
 
 static struct bio *
 g_raid3_clone_bio(struct g_raid3_softc *sc, struct bio *pbp)
 {
 	struct bio *bp, *cbp;
 	size_t size;
 	int memflag;
 
 	cbp = g_clone_bio(pbp);
 	if (cbp == NULL)
 		return (NULL);
 	size = pbp->bio_length / (sc->sc_ndisks - 1);
 	if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0)
 		memflag = M_WAITOK;
 	else
 		memflag = M_NOWAIT;
 	cbp->bio_data = g_raid3_alloc(sc, size, memflag);
 	if (cbp->bio_data == NULL) {
 		pbp->bio_children--;
 		g_destroy_bio(cbp);
 		return (NULL);
 	}
 	G_RAID3_NEXT_BIO(cbp) = NULL;
 	if (G_RAID3_HEAD_BIO(pbp) == NULL)
 		G_RAID3_HEAD_BIO(pbp) = cbp;
 	else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == NULL) {
 				G_RAID3_NEXT_BIO(bp) = cbp;
 				break;
 			}
 		}
 	}
 	return (cbp);
 }
 
 static void
 g_raid3_scatter(struct bio *pbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct bio *bp, *cbp, *tmpbp;
 	off_t atom, cadd, padd, left;
 	int first;
 
 	sc = pbp->bio_to->geom->softc;
 	bp = NULL;
 	if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) {
 		/*
 		 * Find bio for which we should calculate data.
 		 */
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) {
 				bp = cbp;
 				break;
 			}
 		}
 		KASSERT(bp != NULL, ("NULL parity bio."));
 	}
 	atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
 	cadd = padd = 0;
 	for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) {
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			if (cbp == bp)
 				continue;
 			bcopy(pbp->bio_data + padd, cbp->bio_data + cadd, atom);
 			padd += atom;
 		}
 		cadd += atom;
 	}
 	if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) {
 		/*
 		 * Calculate parity.
 		 */
 		first = 1;
 		G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) {
 			if (cbp == bp)
 				continue;
 			if (first) {
 				bcopy(cbp->bio_data, bp->bio_data,
 				    bp->bio_length);
 				first = 0;
 			} else {
 				g_raid3_xor(cbp->bio_data, bp->bio_data,
 				    bp->bio_length);
 			}
 			if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_NODISK) != 0)
 				g_raid3_destroy_bio(sc, cbp);
 		}
 	}
 	G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) {
 		struct g_consumer *cp;
 
 		disk = cbp->bio_caller2;
 		cp = disk->d_consumer;
 		cbp->bio_to = cp->provider;
 		G_RAID3_LOGREQ(3, cbp, "Sending request.");
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		sc->sc_writes++;
 		g_io_request(cbp, cp);
 	}
 }
 
 static void
 g_raid3_gather(struct bio *pbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct bio *xbp, *fbp, *cbp;
 	off_t atom, cadd, padd, left;
 
 	sc = pbp->bio_to->geom->softc;
 	/*
 	 * Find bio for which we have to calculate data.
 	 * While going through this path, check if all requests
 	 * succeeded, if not, deny whole request.
 	 * If we're in COMPLETE mode, we allow one request to fail,
 	 * so if we find one, we're sending it to the parity consumer.
 	 * If there are more failed requests, we deny whole request.
 	 */
 	xbp = fbp = NULL;
 	G_RAID3_FOREACH_BIO(pbp, cbp) {
 		if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) {
 			KASSERT(xbp == NULL, ("More than one parity bio."));
 			xbp = cbp;
 		}
 		if (cbp->bio_error == 0)
 			continue;
 		/*
 		 * Found failed request.
 		 */
 		if (fbp == NULL) {
 			if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) {
 				/*
 				 * We are already in degraded mode, so we can't
 				 * accept any failures.
 				 */
 				if (pbp->bio_error == 0)
 					pbp->bio_error = cbp->bio_error;
 			} else {
 				fbp = cbp;
 			}
 		} else {
 			/*
 			 * Next failed request, that's too many.
 			 */
 			if (pbp->bio_error == 0)
 				pbp->bio_error = fbp->bio_error;
 		}
 		disk = cbp->bio_caller2;
 		if (disk == NULL)
 			continue;
 		if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) {
 			disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN;
 			G_RAID3_LOGREQ(0, cbp, "Request failed (error=%d).",
 			    cbp->bio_error);
 		} else {
 			G_RAID3_LOGREQ(1, cbp, "Request failed (error=%d).",
 			    cbp->bio_error);
 		}
 		if (g_raid3_disconnect_on_failure &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 			g_raid3_event_send(disk,
 			    G_RAID3_DISK_STATE_DISCONNECTED,
 			    G_RAID3_EVENT_DONTWAIT);
 		}
 	}
 	if (pbp->bio_error != 0)
 		goto finish;
 	if (fbp != NULL && (pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) {
 		pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_VERIFY;
 		if (xbp != fbp)
 			g_raid3_replace_bio(xbp, fbp);
 		g_raid3_destroy_bio(sc, fbp);
 	} else if (fbp != NULL) {
 		struct g_consumer *cp;
 
 		/*
 		 * One request failed, so send the same request to
 		 * the parity consumer.
 		 */
 		disk = pbp->bio_driver2;
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) {
 			pbp->bio_error = fbp->bio_error;
 			goto finish;
 		}
 		pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
 		pbp->bio_inbed--;
 		fbp->bio_flags &= ~(BIO_DONE | BIO_ERROR);
 		if (disk->d_no == sc->sc_ndisks - 1)
 			fbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 		fbp->bio_error = 0;
 		fbp->bio_completed = 0;
 		fbp->bio_children = 0;
 		fbp->bio_inbed = 0;
 		cp = disk->d_consumer;
 		fbp->bio_caller2 = disk;
 		fbp->bio_to = cp->provider;
 		G_RAID3_LOGREQ(3, fbp, "Sending request (recover).");
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(fbp, cp);
 		return;
 	}
 	if (xbp != NULL) {
 		/*
 		 * Calculate parity.
 		 */
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0)
 				continue;
 			g_raid3_xor(cbp->bio_data, xbp->bio_data,
 			    xbp->bio_length);
 		}
 		xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY;
 		if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) {
 			if (!g_raid3_is_zero(xbp)) {
 				g_raid3_parity_mismatch++;
 				pbp->bio_error = EIO;
 				goto finish;
 			}
 			g_raid3_destroy_bio(sc, xbp);
 		}
 	}
 	atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
 	cadd = padd = 0;
 	for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) {
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			bcopy(cbp->bio_data + cadd, pbp->bio_data + padd, atom);
 			pbp->bio_completed += atom;
 			padd += atom;
 		}
 		cadd += atom;
 	}
 finish:
 	if (pbp->bio_error == 0)
 		G_RAID3_LOGREQ(3, pbp, "Request finished.");
 	else {
 		if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0)
 			G_RAID3_LOGREQ(1, pbp, "Verification error.");
 		else
 			G_RAID3_LOGREQ(0, pbp, "Request failed.");
 	}
 	pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_MASK;
 	while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL)
 		g_raid3_destroy_bio(sc, cbp);
 	g_io_deliver(pbp, pbp->bio_error);
 }
 
 static void
 g_raid3_done(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags |= G_RAID3_BIO_CFLAG_REGULAR;
 	G_RAID3_LOGREQ(3, bp, "Regular request done (error=%d).", bp->bio_error);
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_head(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 }
 
 static void
 g_raid3_regular_request(struct bio *cbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct bio *pbp;
 
 	g_topology_assert_not();
 
 	pbp = cbp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	cbp->bio_from->index--;
 	if (cbp->bio_cmd == BIO_WRITE)
 		sc->sc_writes--;
 	disk = cbp->bio_from->private;
 	if (disk == NULL) {
 		g_topology_lock();
 		g_raid3_kill_consumer(sc, cbp->bio_from);
 		g_topology_unlock();
 	}
 
 	G_RAID3_LOGREQ(3, cbp, "Request finished.");
 	pbp->bio_inbed++;
 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
 	    pbp->bio_children));
 	if (pbp->bio_inbed != pbp->bio_children)
 		return;
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		g_raid3_gather(pbp);
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 	    {
 		int error = 0;
 
 		pbp->bio_completed = pbp->bio_length;
 		while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) {
 			if (cbp->bio_error == 0) {
 				g_raid3_destroy_bio(sc, cbp);
 				continue;
 			}
 
 			if (error == 0)
 				error = cbp->bio_error;
 			else if (pbp->bio_error == 0) {
 				/*
 				 * Next failed request, that's too many.
 				 */
 				pbp->bio_error = error;
 			}
 
 			disk = cbp->bio_caller2;
 			if (disk == NULL) {
 				g_raid3_destroy_bio(sc, cbp);
 				continue;
 			}
 
 			if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) {
 				disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN;
 				G_RAID3_LOGREQ(0, cbp,
 				    "Request failed (error=%d).",
 				    cbp->bio_error);
 			} else {
 				G_RAID3_LOGREQ(1, cbp,
 				    "Request failed (error=%d).",
 				    cbp->bio_error);
 			}
 			if (g_raid3_disconnect_on_failure &&
 			    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 				sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 				g_raid3_event_send(disk,
 				    G_RAID3_DISK_STATE_DISCONNECTED,
 				    G_RAID3_EVENT_DONTWAIT);
 			}
 			g_raid3_destroy_bio(sc, cbp);
 		}
 		if (pbp->bio_error == 0)
 			G_RAID3_LOGREQ(3, pbp, "Request finished.");
 		else
 			G_RAID3_LOGREQ(0, pbp, "Request failed.");
 		pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_DEGRADED;
 		pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_NOPARITY;
 		bioq_remove(&sc->sc_inflight, pbp);
 		/* Release delayed sync requests if possible. */
 		g_raid3_sync_release(sc);
 		g_io_deliver(pbp, pbp->bio_error);
 		break;
 	    }
 	}
 }
 
 static void
 g_raid3_sync_done(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 
 	G_RAID3_LOGREQ(3, bp, "Synchronization request delivered.");
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags |= G_RAID3_BIO_CFLAG_SYNC;
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_head(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 }
 
 static void
 g_raid3_flush(struct g_raid3_softc *sc, struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_raid3_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	u_int i;
 
 	bioq_init(&queue);
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
 			continue;
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			for (cbp = bioq_first(&queue); cbp != NULL;
 			    cbp = bioq_first(&queue)) {
 				bioq_remove(&queue, cbp);
 				g_destroy_bio(cbp);
 			}
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		cbp->bio_done = g_std_done;
 		cbp->bio_caller1 = disk;
 		cbp->bio_to = disk->d_consumer->provider;
 	}
 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
 		bioq_remove(&queue, cbp);
 		G_RAID3_LOGREQ(3, cbp, "Sending request.");
 		disk = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		g_io_request(cbp, disk->d_consumer);
 	}
 }
 
 static void
 g_raid3_start(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL or there are no valid disks, provider's error
 	 * should be set and g_raid3_start() should not be called at all.
 	 */
 	KASSERT(sc != NULL && (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 	    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE),
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_RAID3_LOGREQ(3, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_FLUSH:
 		g_raid3_flush(sc, bp);
 		return;
 	case BIO_GETATTR:
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	wakeup(sc);
 }
 
 /*
  * Return TRUE if the given request is colliding with a in-progress
  * synchronization request.
  */
 static int
 g_raid3_sync_collision(struct g_raid3_softc *sc, struct bio *bp)
 {
 	struct g_raid3_disk *disk;
 	struct bio *sbp;
 	off_t rstart, rend, sstart, send;
 	int i;
 
 	disk = sc->sc_syncdisk;
 	if (disk == NULL)
 		return (0);
 	rstart = bp->bio_offset;
 	rend = bp->bio_offset + bp->bio_length;
 	for (i = 0; i < g_raid3_syncreqs; i++) {
 		sbp = disk->d_sync.ds_bios[i];
 		if (sbp == NULL)
 			continue;
 		sstart = sbp->bio_offset;
 		send = sbp->bio_length;
 		if (sbp->bio_cmd == BIO_WRITE) {
 			sstart *= sc->sc_ndisks - 1;
 			send *= sc->sc_ndisks - 1;
 		}
 		send += sstart;
 		if (rend > sstart && rstart < send)
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Return TRUE if the given sync request is colliding with a in-progress regular
  * request.
  */
 static int
 g_raid3_regular_collision(struct g_raid3_softc *sc, struct bio *sbp)
 {
 	off_t rstart, rend, sstart, send;
 	struct bio *bp;
 
 	if (sc->sc_syncdisk == NULL)
 		return (0);
 	sstart = sbp->bio_offset;
 	send = sstart + sbp->bio_length;
 	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
 		rstart = bp->bio_offset;
 		rend = bp->bio_offset + bp->bio_length;
 		if (rend > sstart && rstart < send)
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Puts request onto delayed queue.
  */
 static void
 g_raid3_regular_delay(struct g_raid3_softc *sc, struct bio *bp)
 {
 
 	G_RAID3_LOGREQ(2, bp, "Delaying request.");
 	bioq_insert_head(&sc->sc_regular_delayed, bp);
 }
 
 /*
  * Puts synchronization request onto delayed queue.
  */
 static void
 g_raid3_sync_delay(struct g_raid3_softc *sc, struct bio *bp)
 {
 
 	G_RAID3_LOGREQ(2, bp, "Delaying synchronization request.");
 	bioq_insert_tail(&sc->sc_sync_delayed, bp);
 }
 
 /*
  * Releases delayed regular requests which don't collide anymore with sync
  * requests.
  */
 static void
 g_raid3_regular_release(struct g_raid3_softc *sc)
 {
 	struct bio *bp, *bp2;
 
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
 		if (g_raid3_sync_collision(sc, bp))
 			continue;
 		bioq_remove(&sc->sc_regular_delayed, bp);
 		G_RAID3_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
 		mtx_lock(&sc->sc_queue_mtx);
 		bioq_insert_head(&sc->sc_queue, bp);
 #if 0
 		/*
 		 * wakeup() is not needed, because this function is called from
 		 * the worker thread.
 		 */
 		wakeup(&sc->sc_queue);
 #endif
 		mtx_unlock(&sc->sc_queue_mtx);
 	}
 }
 
 /*
  * Releases delayed sync requests which don't collide anymore with regular
  * requests.
  */
 static void
 g_raid3_sync_release(struct g_raid3_softc *sc)
 {
 	struct bio *bp, *bp2;
 
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
 		if (g_raid3_regular_collision(sc, bp))
 			continue;
 		bioq_remove(&sc->sc_sync_delayed, bp);
 		G_RAID3_LOGREQ(2, bp,
 		    "Releasing delayed synchronization request.");
 		g_io_request(bp, bp->bio_from);
 	}
 }
 
 /*
  * Handle synchronization requests.
  * Every synchronization request is two-steps process: first, READ request is
  * send to active provider and then WRITE request (with read data) to the provider
  * being synchronized. When WRITE is finished, new synchronization request is
  * send.
  */
 static void
 g_raid3_sync_request(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 
 	bp->bio_from->index--;
 	sc = bp->bio_from->geom->softc;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 		g_topology_lock();
 		g_raid3_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 		free(bp->bio_data, M_RAID3);
 		g_destroy_bio(bp);
 		sx_xlock(&sc->sc_lock);
 		return;
 	}
 
 	/*
 	 * Synchronization request.
 	 */
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	    {
 		struct g_consumer *cp;
 		u_char *dst, *src;
 		off_t left;
 		u_int atom;
 
 		if (bp->bio_error != 0) {
 			G_RAID3_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_destroy_bio(bp);
 			return;
 		}
 		G_RAID3_LOGREQ(3, bp, "Synchronization request finished.");
 		atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
 		dst = src = bp->bio_data;
 		if (disk->d_no == sc->sc_ndisks - 1) {
 			u_int n;
 
 			/* Parity component. */
 			for (left = bp->bio_length; left > 0;
 			    left -= sc->sc_sectorsize) {
 				bcopy(src, dst, atom);
 				src += atom;
 				for (n = 1; n < sc->sc_ndisks - 1; n++) {
 					g_raid3_xor(src, dst, atom);
 					src += atom;
 				}
 				dst += atom;
 			}
 		} else {
 			/* Regular component. */
 			src += atom * disk->d_no;
 			for (left = bp->bio_length; left > 0;
 			    left -= sc->sc_sectorsize) {
 				bcopy(src, dst, atom);
 				src += sc->sc_sectorsize;
 				dst += atom;
 			}
 		}
 		bp->bio_driver1 = bp->bio_driver2 = NULL;
 		bp->bio_pflags = 0;
 		bp->bio_offset /= sc->sc_ndisks - 1;
 		bp->bio_length /= sc->sc_ndisks - 1;
 		bp->bio_cmd = BIO_WRITE;
 		bp->bio_cflags = 0;
 		bp->bio_children = bp->bio_inbed = 0;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(bp, cp);
 		return;
 	    }
 	case BIO_WRITE:
 	    {
 		struct g_raid3_disk_sync *sync;
 		off_t boffset, moffset;
 		void *data;
 		int i;
 
 		if (bp->bio_error != 0) {
 			G_RAID3_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_destroy_bio(bp);
 			sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 			g_raid3_event_send(disk,
 			    G_RAID3_DISK_STATE_DISCONNECTED,
 			    G_RAID3_EVENT_DONTWAIT);
 			return;
 		}
 		G_RAID3_LOGREQ(3, bp, "Synchronization request finished.");
 		sync = &disk->d_sync;
 		if (sync->ds_offset == sc->sc_mediasize / (sc->sc_ndisks - 1) ||
 		    sync->ds_consumer == NULL ||
 		    (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 			/* Don't send more synchronization requests. */
 			sync->ds_inflight--;
 			if (sync->ds_bios != NULL) {
 				i = (int)(uintptr_t)bp->bio_caller1;
 				sync->ds_bios[i] = NULL;
 			}
 			free(bp->bio_data, M_RAID3);
 			g_destroy_bio(bp);
 			if (sync->ds_inflight > 0)
 				return;
 			if (sync->ds_consumer == NULL ||
 			    (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 				return;
 			}
 			/*
 			 * Disk up-to-date, activate it.
 			 */
 			g_raid3_event_send(disk, G_RAID3_DISK_STATE_ACTIVE,
 			    G_RAID3_EVENT_DONTWAIT);
 			return;
 		}
 
 		/* Send next synchronization request. */
 		data = bp->bio_data;
 		g_reset_bio(bp);
 		bp->bio_cmd = BIO_READ;
 		bp->bio_offset = sync->ds_offset * (sc->sc_ndisks - 1);
 		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
 		sync->ds_offset += bp->bio_length / (sc->sc_ndisks - 1);
 		bp->bio_done = g_raid3_sync_done;
 		bp->bio_data = data;
 		bp->bio_from = sync->ds_consumer;
 		bp->bio_to = sc->sc_provider;
 		G_RAID3_LOGREQ(3, bp, "Sending synchronization request.");
 		sync->ds_consumer->index++;
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_raid3_regular_collision(sc, bp))
 			g_raid3_sync_delay(sc, bp);
 		else
 			g_io_request(bp, sync->ds_consumer);
 
 		/* Release delayed requests if possible. */
 		g_raid3_regular_release(sc);
 
 		/* Find the smallest offset. */
 		moffset = sc->sc_mediasize;
 		for (i = 0; i < g_raid3_syncreqs; i++) {
 			bp = sync->ds_bios[i];
 			boffset = bp->bio_offset;
 			if (bp->bio_cmd == BIO_WRITE)
 				boffset *= sc->sc_ndisks - 1;
 			if (boffset < moffset)
 				moffset = boffset;
 		}
 		if (sync->ds_offset_done + (MAXPHYS * 100) < moffset) {
 			/* Update offset_done on every 100 blocks. */
 			sync->ds_offset_done = moffset;
 			g_raid3_update_metadata(disk);
 		}
 		return;
 	    }
 	default:
 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
 		    bp->bio_cmd, sc->sc_name));
 		break;
 	}
 }
 
 static int
 g_raid3_register_request(struct bio *pbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp, *tmpbp;
 	off_t offset, length;
 	u_int n, ndisks;
 	int round_robin, verify;
 
 	ndisks = 0;
 	sc = pbp->bio_to->geom->softc;
 	if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGSYNC) != 0 &&
 	    sc->sc_syncdisk == NULL) {
 		g_io_deliver(pbp, EIO);
 		return (0);
 	}
 	g_raid3_init_bio(pbp);
 	length = pbp->bio_length / (sc->sc_ndisks - 1);
 	offset = pbp->bio_offset / (sc->sc_ndisks - 1);
 	round_robin = verify = 0;
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			pbp->bio_pflags |= G_RAID3_BIO_PFLAG_VERIFY;
 			verify = 1;
 			ndisks = sc->sc_ndisks;
 		} else {
 			verify = 0;
 			ndisks = sc->sc_ndisks - 1;
 		}
 		if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0 &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			round_robin = 1;
 		} else {
 			round_robin = 0;
 		}
 		KASSERT(!round_robin || !verify,
 		    ("ROUND-ROBIN and VERIFY are mutually exclusive."));
 		pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1];
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 		/*
 		 * Delay the request if it is colliding with a synchronization
 		 * request.
 		 */
 		if (g_raid3_sync_collision(sc, pbp)) {
 			g_raid3_regular_delay(sc, pbp);
 			return (0);
 		}
 
 		if (sc->sc_idle)
 			g_raid3_unidle(sc);
 		else
 			sc->sc_last_write = time_uptime;
 
 		ndisks = sc->sc_ndisks;
 		break;
 	}
 	for (n = 0; n < ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		cbp = g_raid3_clone_bio(sc, pbp);
 		if (cbp == NULL) {
 			while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL)
 				g_raid3_destroy_bio(sc, cbp);
 			/*
 			 * To prevent deadlock, we must run back up
 			 * with the ENOMEM for failed requests of any
 			 * of our consumers.  Our own sync requests
 			 * can stick around, as they are finite.
 			 */
 			if ((pbp->bio_cflags &
 			    G_RAID3_BIO_CFLAG_REGULAR) != 0) {
 				g_io_deliver(pbp, ENOMEM);
 				return (0);
 			}
 			return (ENOMEM);
 		}
 		cbp->bio_offset = offset;
 		cbp->bio_length = length;
 		cbp->bio_done = g_raid3_done;
 		switch (pbp->bio_cmd) {
 		case BIO_READ:
 			if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) {
 				/*
 				 * Replace invalid component with the parity
 				 * component.
 				 */
 				disk = &sc->sc_disks[sc->sc_ndisks - 1];
 				cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 				pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
 			} else if (round_robin &&
 			    disk->d_no == sc->sc_round_robin) {
 				/*
 				 * In round-robin mode skip one data component
 				 * and use parity component when reading.
 				 */
 				pbp->bio_driver2 = disk;
 				disk = &sc->sc_disks[sc->sc_ndisks - 1];
 				cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 				sc->sc_round_robin++;
 				round_robin = 0;
 			} else if (verify && disk->d_no == sc->sc_ndisks - 1) {
 				cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 			}
 			break;
 		case BIO_WRITE:
 		case BIO_DELETE:
 			if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 			    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 				if (n == ndisks - 1) {
 					/*
 					 * Active parity component, mark it as such.
 					 */
 					cbp->bio_cflags |=
 					    G_RAID3_BIO_CFLAG_PARITY;
 				}
 			} else {
 				pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
 				if (n == ndisks - 1) {
 					/*
 					 * Parity component is not connected,
 					 * so destroy its request.
 					 */
 					pbp->bio_pflags |=
 					    G_RAID3_BIO_PFLAG_NOPARITY;
 					g_raid3_destroy_bio(sc, cbp);
 					cbp = NULL;
 				} else {
 					cbp->bio_cflags |=
 					    G_RAID3_BIO_CFLAG_NODISK;
 					disk = NULL;
 				}
 			}
 			break;
 		}
 		if (cbp != NULL)
 			cbp->bio_caller2 = disk;
 	}
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if (round_robin) {
 			/*
 			 * If we are in round-robin mode and 'round_robin' is
 			 * still 1, it means, that we skipped parity component
 			 * for this read and must reset sc_round_robin field.
 			 */
 			sc->sc_round_robin = 0;
 		}
 		G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) {
 			disk = cbp->bio_caller2;
 			cp = disk->d_consumer;
 			cbp->bio_to = cp->provider;
 			G_RAID3_LOGREQ(3, cbp, "Sending request.");
 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 			    ("Consumer %s not opened (r%dw%de%d).",
 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
 			cp->index++;
 			g_io_request(cbp, cp);
 		}
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 		/*
 		 * Put request onto inflight queue, so we can check if new
 		 * synchronization requests don't collide with it.
 		 */
 		bioq_insert_tail(&sc->sc_inflight, pbp);
 
 		/*
 		 * Bump syncid on first write.
 		 */
 		if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) {
 			sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID;
 			g_raid3_bump_syncid(sc);
 		}
 		g_raid3_scatter(pbp);
 		break;
 	}
 	return (0);
 }
 
 static int
 g_raid3_can_destroy(struct g_raid3_softc *sc)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	gp = sc->sc_geom;
 	if (gp->softc == NULL)
 		return (1);
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_raid3_is_busy(sc, cp))
 			return (0);
 	}
 	gp = sc->sc_sync.ds_geom;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_raid3_is_busy(sc, cp))
 			return (0);
 	}
 	G_RAID3_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
 	    sc->sc_name);
 	return (1);
 }
 
 static int
 g_raid3_try_destroy(struct g_raid3_softc *sc)
 {
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (sc->sc_rootmount != NULL) {
 		G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 		    sc->sc_rootmount);
 		root_mount_rel(sc->sc_rootmount);
 		sc->sc_rootmount = NULL;
 	}
 
 	g_topology_lock();
 	if (!g_raid3_can_destroy(sc)) {
 		g_topology_unlock();
 		return (0);
 	}
 	sc->sc_geom->softc = NULL;
 	sc->sc_sync.ds_geom->softc = NULL;
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_WAIT) != 0) {
 		g_topology_unlock();
 		G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__,
 		    &sc->sc_worker);
 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
 		sx_xunlock(&sc->sc_lock);
 		wakeup(&sc->sc_worker);
 		sc->sc_worker = NULL;
 	} else {
 		g_topology_unlock();
 		g_raid3_destroy_device(sc);
 		free(sc->sc_disks, M_RAID3);
 		free(sc, M_RAID3);
 	}
 	return (1);
 }
 
 /*
  * Worker thread.
  */
 static void
 g_raid3_worker(void *arg)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_event *ep;
 	struct bio *bp;
 	int timeout;
 
 	sc = arg;
 	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
 	thread_unlock(curthread);
 
 	sx_xlock(&sc->sc_lock);
 	for (;;) {
 		G_RAID3_DEBUG(5, "%s: Let's see...", __func__);
 		/*
 		 * First take a look at events.
 		 * This is important to handle events before any I/O requests.
 		 */
 		ep = g_raid3_event_get(sc);
 		if (ep != NULL) {
 			g_raid3_event_remove(sc, ep);
 			if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) {
 				/* Update only device status. */
 				G_RAID3_DEBUG(3,
 				    "Running event for device %s.",
 				    sc->sc_name);
 				ep->e_error = 0;
 				g_raid3_update_device(sc, 1);
 			} else {
 				/* Update disk status. */
 				G_RAID3_DEBUG(3, "Running event for disk %s.",
 				     g_raid3_get_diskname(ep->e_disk));
 				ep->e_error = g_raid3_update_disk(ep->e_disk,
 				    ep->e_state);
 				if (ep->e_error == 0)
 					g_raid3_update_device(sc, 0);
 			}
 			if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) {
 				KASSERT(ep->e_error == 0,
 				    ("Error cannot be handled."));
 				g_raid3_event_free(ep);
 			} else {
 				ep->e_flags |= G_RAID3_EVENT_DONE;
 				G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__,
 				    ep);
 				mtx_lock(&sc->sc_events_mtx);
 				wakeup(ep);
 				mtx_unlock(&sc->sc_events_mtx);
 			}
 			if ((sc->sc_flags &
 			    G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 				if (g_raid3_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_RAID3_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 			}
 			G_RAID3_DEBUG(5, "%s: I'm here 1.", __func__);
 			continue;
 		}
 		/*
 		 * Check if we can mark array as CLEAN and if we can't take
 		 * how much seconds should we wait.
 		 */
 		timeout = g_raid3_idle(sc, -1);
 		/*
 		 * Now I/O requests.
 		 */
 		/* Get first request from the queue. */
 		mtx_lock(&sc->sc_queue_mtx);
 		bp = bioq_first(&sc->sc_queue);
 		if (bp == NULL) {
 			if ((sc->sc_flags &
 			    G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				if (g_raid3_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_RAID3_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 				mtx_lock(&sc->sc_queue_mtx);
 			}
 			sx_xunlock(&sc->sc_lock);
 			/*
 			 * XXX: We can miss an event here, because an event
 			 *      can be added without sx-device-lock and without
 			 *      mtx-queue-lock. Maybe I should just stop using
 			 *      dedicated mutex for events synchronization and
 			 *      stick with the queue lock?
 			 *      The event will hang here until next I/O request
 			 *      or next event is received.
 			 */
 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "r3:w1",
 			    timeout * hz);
 			sx_xlock(&sc->sc_lock);
 			G_RAID3_DEBUG(5, "%s: I'm here 4.", __func__);
 			continue;
 		}
 process:
 		bioq_remove(&sc->sc_queue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 
 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
 		    (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) {
 			g_raid3_sync_request(bp);	/* READ */
 		} else if (bp->bio_to != sc->sc_provider) {
 			if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0)
 				g_raid3_regular_request(bp);
 			else if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0)
 				g_raid3_sync_request(bp);	/* WRITE */
 			else {
 				KASSERT(0,
 				    ("Invalid request cflags=0x%hx to=%s.",
 				    bp->bio_cflags, bp->bio_to->name));
 			}
 		} else if (g_raid3_register_request(bp) != 0) {
 			mtx_lock(&sc->sc_queue_mtx);
 			bioq_insert_head(&sc->sc_queue, bp);
 			/*
 			 * We are short in memory, let see if there are finished
 			 * request we can free.
 			 */
 			TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 				if (bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR)
 					goto process;
 			}
 			/*
 			 * No finished regular request, so at least keep
 			 * synchronization running.
 			 */
 			TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 				if (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC)
 					goto process;
 			}
 			sx_xunlock(&sc->sc_lock);
 			MSLEEP(&sc->sc_queue, &sc->sc_queue_mtx, PRIBIO | PDROP,
 			    "r3:lowmem", hz / 10);
 			sx_xlock(&sc->sc_lock);
 		}
 		G_RAID3_DEBUG(5, "%s: I'm here 9.", __func__);
 	}
 }
 
 static void
 g_raid3_update_idle(struct g_raid3_softc *sc, struct g_raid3_disk *disk)
 {
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	if (!sc->sc_idle && (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) == 0) {
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 	} else if (sc->sc_idle &&
 	    (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) {
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 	}
 }
 
 static void
 g_raid3_sync_start(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	struct g_consumer *cp;
 	struct bio *bp;
 	int error;
 	u_int n;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
 	    ("Device not in DEGRADED state (%s, %u).", sc->sc_name,
 	    sc->sc_state));
 	KASSERT(sc->sc_syncdisk == NULL, ("Syncdisk is not NULL (%s, %u).",
 	    sc->sc_name, sc->sc_state));
 	disk = NULL;
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		if (sc->sc_disks[n].d_state != G_RAID3_DISK_STATE_SYNCHRONIZING)
 			continue;
 		disk = &sc->sc_disks[n];
 		break;
 	}
 	if (disk == NULL)
 		return;
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	cp = g_new_consumer(sc->sc_sync.ds_geom);
 	error = g_attach(cp, sc->sc_provider);
 	KASSERT(error == 0,
 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
 	error = g_access(cp, 1, 0, 0);
 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
 	    g_raid3_get_diskname(disk));
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) == 0)
 		disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 	KASSERT(disk->d_sync.ds_consumer == NULL,
 	    ("Sync consumer already exists (device=%s, disk=%s).",
 	    sc->sc_name, g_raid3_get_diskname(disk)));
 
 	disk->d_sync.ds_consumer = cp;
 	disk->d_sync.ds_consumer->private = disk;
 	disk->d_sync.ds_consumer->index = 0;
 	sc->sc_syncdisk = disk;
 
 	/*
 	 * Allocate memory for synchronization bios and initialize them.
 	 */
 	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_raid3_syncreqs,
 	    M_RAID3, M_WAITOK);
 	for (n = 0; n < g_raid3_syncreqs; n++) {
 		bp = g_alloc_bio();
 		disk->d_sync.ds_bios[n] = bp;
 		bp->bio_parent = NULL;
 		bp->bio_cmd = BIO_READ;
 		bp->bio_data = malloc(MAXPHYS, M_RAID3, M_WAITOK);
 		bp->bio_cflags = 0;
 		bp->bio_offset = disk->d_sync.ds_offset * (sc->sc_ndisks - 1);
 		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
 		disk->d_sync.ds_offset += bp->bio_length / (sc->sc_ndisks - 1);
 		bp->bio_done = g_raid3_sync_done;
 		bp->bio_from = disk->d_sync.ds_consumer;
 		bp->bio_to = sc->sc_provider;
 		bp->bio_caller1 = (void *)(uintptr_t)n;
 	}
 
 	/* Set the number of in-flight synchronization requests. */
 	disk->d_sync.ds_inflight = g_raid3_syncreqs;
 
 	/*
 	 * Fire off first synchronization requests.
 	 */
 	for (n = 0; n < g_raid3_syncreqs; n++) {
 		bp = disk->d_sync.ds_bios[n];
 		G_RAID3_LOGREQ(3, bp, "Sending synchronization request.");
 		disk->d_sync.ds_consumer->index++;
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_raid3_regular_collision(sc, bp))
 			g_raid3_sync_delay(sc, bp);
 		else
 			g_io_request(bp, disk->d_sync.ds_consumer);
 	}
 }
 
 /*
  * Stop synchronization process.
  * type: 0 - synchronization finished
  *       1 - synchronization stopped
  */
 static void
 g_raid3_sync_stop(struct g_raid3_softc *sc, int type)
 {
 	struct g_raid3_disk *disk;
 	struct g_consumer *cp;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
 	    ("Device not in DEGRADED state (%s, %u).", sc->sc_name,
 	    sc->sc_state));
 	disk = sc->sc_syncdisk;
 	sc->sc_syncdisk = NULL;
 	KASSERT(disk != NULL, ("No disk was synchronized (%s).", sc->sc_name));
 	KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
 	    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 	    g_raid3_disk_state2str(disk->d_state)));
 	if (disk->d_sync.ds_consumer == NULL)
 		return;
 
 	if (type == 0) {
 		G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s finished.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 	} else /* if (type == 1) */ {
 		G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 	}
 	free(disk->d_sync.ds_bios, M_RAID3);
 	disk->d_sync.ds_bios = NULL;
 	cp = disk->d_sync.ds_consumer;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 	g_topology_lock();
 	g_raid3_kill_consumer(sc, cp);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 }
 
 static void
 g_raid3_launch_provider(struct g_raid3_softc *sc)
 {
 	struct g_provider *pp;
 	struct g_raid3_disk *disk;
 	int n;
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	g_topology_lock();
 	pp = g_new_providerf(sc->sc_geom, "raid3/%s", sc->sc_name);
 	pp->mediasize = sc->sc_mediasize;
 	pp->sectorsize = sc->sc_sectorsize;
 	pp->stripesize = 0;
 	pp->stripeoffset = 0;
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_consumer && disk->d_consumer->provider &&
 		    disk->d_consumer->provider->stripesize > pp->stripesize) {
 			pp->stripesize = disk->d_consumer->provider->stripesize;
 			pp->stripeoffset = disk->d_consumer->provider->stripeoffset;
 		}
 	}
 	pp->stripesize *= sc->sc_ndisks - 1;
 	pp->stripeoffset *= sc->sc_ndisks - 1;
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 	g_topology_unlock();
 	G_RAID3_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
 	    g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE), sc->sc_ndisks);
 
 	if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED)
 		g_raid3_sync_start(sc);
 }
 
 static void
 g_raid3_destroy_provider(struct g_raid3_softc *sc)
 {
 	struct bio *bp;
 
 	g_topology_assert_not();
 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
 	    sc->sc_name));
 
 	g_topology_lock();
 	g_error_provider(sc->sc_provider, ENXIO);
 	mtx_lock(&sc->sc_queue_mtx);
 	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
 		bioq_remove(&sc->sc_queue, bp);
 		g_io_deliver(bp, ENXIO);
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_RAID3_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
 	    sc->sc_provider->name);
 	g_wither_provider(sc->sc_provider, ENXIO);
 	g_topology_unlock();
 	sc->sc_provider = NULL;
 	if (sc->sc_syncdisk != NULL)
 		g_raid3_sync_stop(sc, 1);
 }
 
 static void
 g_raid3_go(void *arg)
 {
 	struct g_raid3_softc *sc;
 
 	sc = arg;
 	G_RAID3_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
 	g_raid3_event_send(sc, 0,
 	    G_RAID3_EVENT_DONTWAIT | G_RAID3_EVENT_DEVICE);
 }
 
 static u_int
 g_raid3_determine_state(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 	u_int state;
 
 	sc = disk->d_softc;
 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
 		if ((disk->d_flags &
 		    G_RAID3_DISK_FLAG_SYNCHRONIZING) == 0) {
 			/* Disk does not need synchronization. */
 			state = G_RAID3_DISK_STATE_ACTIVE;
 		} else {
 			if ((sc->sc_flags &
 			     G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 			    (disk->d_flags &
 			     G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) {
 				/*
 				 * We can start synchronization from
 				 * the stored offset.
 				 */
 				state = G_RAID3_DISK_STATE_SYNCHRONIZING;
 			} else {
 				state = G_RAID3_DISK_STATE_STALE;
 			}
 		}
 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
 		/*
 		 * Reset all synchronization data for this disk,
 		 * because if it even was synchronized, it was
 		 * synchronized to disks with different syncid.
 		 */
 		disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		disk->d_sync.ds_syncid = sc->sc_syncid;
 		if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 		    (disk->d_flags & G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) {
 			state = G_RAID3_DISK_STATE_SYNCHRONIZING;
 		} else {
 			state = G_RAID3_DISK_STATE_STALE;
 		}
 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
 		/*
 		 * Not good, NOT GOOD!
 		 * It means that device was started on stale disks
 		 * and more fresh disk just arrive.
 		 * If there were writes, device is broken, sorry.
 		 * I think the best choice here is don't touch
 		 * this disk and inform the user loudly.
 		 */
 		G_RAID3_DEBUG(0, "Device %s was started before the freshest "
 		    "disk (%s) arrives!! It will not be connected to the "
 		    "running device.", sc->sc_name,
 		    g_raid3_get_diskname(disk));
 		g_raid3_destroy_disk(disk);
 		state = G_RAID3_DISK_STATE_NONE;
 		/* Return immediately, because disk was destroyed. */
 		return (state);
 	}
 	G_RAID3_DEBUG(3, "State for %s disk: %s.",
 	    g_raid3_get_diskname(disk), g_raid3_disk_state2str(state));
 	return (state);
 }
 
 /*
  * Update device state.
  */
 static void
 g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force)
 {
 	struct g_raid3_disk *disk;
 	u_int state;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	switch (sc->sc_state) {
 	case G_RAID3_DEVICE_STATE_STARTING:
 	    {
 		u_int n, ndirty, ndisks, genid, syncid;
 
 		KASSERT(sc->sc_provider == NULL,
 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
 		/*
 		 * Are we ready? We are, if all disks are connected or
 		 * one disk is missing and 'force' is true.
 		 */
 		if (g_raid3_ndisks(sc, -1) + force == sc->sc_ndisks) {
 			if (!force)
 				callout_drain(&sc->sc_callout);
 		} else {
 			if (force) {
 				/*
 				 * Timeout expired, so destroy device.
 				 */
 				sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 				G_RAID3_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 			}
 			return;
 		}
 
 		/*
 		 * Find the biggest genid.
 		 */
 		genid = 0;
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			if (disk->d_genid > genid)
 				genid = disk->d_genid;
 		}
 		sc->sc_genid = genid;
 		/*
 		 * Remove all disks without the biggest genid.
 		 */
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			if (disk->d_genid < genid) {
 				G_RAID3_DEBUG(0,
 				    "Component %s (device %s) broken, skipping.",
 				    g_raid3_get_diskname(disk), sc->sc_name);
 				g_raid3_destroy_disk(disk);
 			}
 		}
 
 		/*
 		 * There must be at least 'sc->sc_ndisks - 1' components
 		 * with the same syncid and without SYNCHRONIZING flag.
 		 */
 
 		/*
 		 * Find the biggest syncid, number of valid components and
 		 * number of dirty components.
 		 */
 		ndirty = ndisks = syncid = 0;
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0)
 				ndirty++;
 			if (disk->d_sync.ds_syncid > syncid) {
 				syncid = disk->d_sync.ds_syncid;
 				ndisks = 0;
 			} else if (disk->d_sync.ds_syncid < syncid) {
 				continue;
 			}
 			if ((disk->d_flags &
 			    G_RAID3_DISK_FLAG_SYNCHRONIZING) != 0) {
 				continue;
 			}
 			ndisks++;
 		}
 		/*
 		 * Do we have enough valid components?
 		 */
 		if (ndisks + 1 < sc->sc_ndisks) {
 			G_RAID3_DEBUG(0,
 			    "Device %s is broken, too few valid components.",
 			    sc->sc_name);
 			sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 			return;
 		}
 		/*
 		 * If there is one DIRTY component and all disks are present,
 		 * mark it for synchronization. If there is more than one DIRTY
 		 * component, mark parity component for synchronization.
 		 */
 		if (ndisks == sc->sc_ndisks && ndirty == 1) {
 			for (n = 0; n < sc->sc_ndisks; n++) {
 				disk = &sc->sc_disks[n];
 				if ((disk->d_flags &
 				    G_RAID3_DISK_FLAG_DIRTY) == 0) {
 					continue;
 				}
 				disk->d_flags |=
 				    G_RAID3_DISK_FLAG_SYNCHRONIZING;
 			}
 		} else if (ndisks == sc->sc_ndisks && ndirty > 1) {
 			disk = &sc->sc_disks[sc->sc_ndisks - 1];
 			disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING;
 		}
 
 		sc->sc_syncid = syncid;
 		if (force) {
 			/* Remember to bump syncid on first write. */
 			sc->sc_bump_id |= G_RAID3_BUMP_SYNCID;
 		}
 		if (ndisks == sc->sc_ndisks)
 			state = G_RAID3_DEVICE_STATE_COMPLETE;
 		else /* if (ndisks == sc->sc_ndisks - 1) */
 			state = G_RAID3_DEVICE_STATE_DEGRADED;
 		G_RAID3_DEBUG(1, "Device %s state changed from %s to %s.",
 		    sc->sc_name, g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_device_state2str(state));
 		sc->sc_state = state;
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			state = g_raid3_determine_state(disk);
 			g_raid3_event_send(disk, state, G_RAID3_EVENT_DONTWAIT);
 			if (state == G_RAID3_DISK_STATE_STALE)
 				sc->sc_bump_id |= G_RAID3_BUMP_SYNCID;
 		}
 		break;
 	    }
 	case G_RAID3_DEVICE_STATE_DEGRADED:
 		/*
 		 * Genid need to be bumped immediately, so do it here.
 		 */
 		if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) {
 			sc->sc_bump_id &= ~G_RAID3_BUMP_GENID;
 			g_raid3_bump_genid(sc);
 		}
 
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0)
 			return;
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) <
 		    sc->sc_ndisks - 1) {
 			if (sc->sc_provider != NULL)
 				g_raid3_destroy_provider(sc);
 			sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 			return;
 		}
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) ==
 		    sc->sc_ndisks) {
 			state = G_RAID3_DEVICE_STATE_COMPLETE;
 			G_RAID3_DEBUG(1,
 			    "Device %s state changed from %s to %s.",
 			    sc->sc_name, g_raid3_device_state2str(sc->sc_state),
 			    g_raid3_device_state2str(state));
 			sc->sc_state = state;
 		}
 		if (sc->sc_provider == NULL)
 			g_raid3_launch_provider(sc);
 		if (sc->sc_rootmount != NULL) {
 			G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 			    sc->sc_rootmount);
 			root_mount_rel(sc->sc_rootmount);
 			sc->sc_rootmount = NULL;
 		}
 		break;
 	case G_RAID3_DEVICE_STATE_COMPLETE:
 		/*
 		 * Genid need to be bumped immediately, so do it here.
 		 */
 		if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) {
 			sc->sc_bump_id &= ~G_RAID3_BUMP_GENID;
 			g_raid3_bump_genid(sc);
 		}
 
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0)
 			return;
 		KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) >=
 		    sc->sc_ndisks - 1,
 		    ("Too few ACTIVE components in COMPLETE state (device %s).",
 		    sc->sc_name));
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) ==
 		    sc->sc_ndisks - 1) {
 			state = G_RAID3_DEVICE_STATE_DEGRADED;
 			G_RAID3_DEBUG(1,
 			    "Device %s state changed from %s to %s.",
 			    sc->sc_name, g_raid3_device_state2str(sc->sc_state),
 			    g_raid3_device_state2str(state));
 			sc->sc_state = state;
 		}
 		if (sc->sc_provider == NULL)
 			g_raid3_launch_provider(sc);
 		if (sc->sc_rootmount != NULL) {
 			G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 			    sc->sc_rootmount);
 			root_mount_rel(sc->sc_rootmount);
 			sc->sc_rootmount = NULL;
 		}
 		break;
 	default:
 		KASSERT(1 == 0, ("Wrong device state (%s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state)));
 		break;
 	}
 }
 
 /*
  * Update disk state and device state if needed.
  */
 #define	DISK_STATE_CHANGED()	G_RAID3_DEBUG(1,			\
 	"Disk %s state changed from %s to %s (device %s).",		\
 	g_raid3_get_diskname(disk),					\
 	g_raid3_disk_state2str(disk->d_state),				\
 	g_raid3_disk_state2str(state), sc->sc_name)
 static int
 g_raid3_update_disk(struct g_raid3_disk *disk, u_int state)
 {
 	struct g_raid3_softc *sc;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 again:
 	G_RAID3_DEBUG(3, "Changing disk %s state from %s to %s.",
 	    g_raid3_get_diskname(disk), g_raid3_disk_state2str(disk->d_state),
 	    g_raid3_disk_state2str(state));
 	switch (state) {
 	case G_RAID3_DISK_STATE_NEW:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk arrive.
 		 */
 		/* Previous state should be NONE. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NONE,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_state = state;
 		G_RAID3_DEBUG(1, "Device %s: provider %s detected.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING)
 			break;
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		state = g_raid3_determine_state(disk);
 		if (state != G_RAID3_DISK_STATE_NONE)
 			goto again;
 		break;
 	case G_RAID3_DISK_STATE_ACTIVE:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk does not need synchronization.
 		 * 2. Synchronization process finished successfully.
 		 */
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		/* Previous state should be NEW or SYNCHRONIZING. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW ||
 		    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_SYNCHRONIZING;
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_FORCE_SYNC;
 			g_raid3_sync_stop(sc, 0);
 		}
 		disk->d_state = state;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		g_raid3_update_idle(sc, disk);
 		g_raid3_update_metadata(disk);
 		G_RAID3_DEBUG(1, "Device %s: provider %s activated.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		break;
 	case G_RAID3_DISK_STATE_STALE:
 		/*
 		 * Possible scenarios:
 		 * 1. Stale disk was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		/*
 		 * STALE state is only possible if device is marked
 		 * NOAUTOSYNC.
 		 */
 		KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		g_raid3_update_metadata(disk);
 		G_RAID3_DEBUG(0, "Device %s: provider %s is stale.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		break;
 	case G_RAID3_DISK_STATE_SYNCHRONIZING:
 		/*
 		 * Possible scenarios:
 		 * 1. Disk which needs synchronization was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_RAID3_DISK_STATE_NEW)
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		if (sc->sc_provider != NULL) {
 			g_raid3_sync_start(sc);
 			g_raid3_update_metadata(disk);
 		}
 		break;
 	case G_RAID3_DISK_STATE_DISCONNECTED:
 		/*
 		 * Possible scenarios:
 		 * 1. Device wasn't running yet, but disk disappear.
 		 * 2. Disk was active and disapppear.
 		 * 3. Disk disappear during synchronization process.
 		 */
 		if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			/*
 			 * Previous state should be ACTIVE, STALE or
 			 * SYNCHRONIZING.
 			 */
 			KASSERT(disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 			    disk->d_state == G_RAID3_DISK_STATE_STALE ||
 			    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
 			    ("Wrong disk state (%s, %s).",
 			    g_raid3_get_diskname(disk),
 			    g_raid3_disk_state2str(disk->d_state)));
 		} else if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) {
 			/* Previous state should be NEW. */
 			KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
 			    ("Wrong disk state (%s, %s).",
 			    g_raid3_get_diskname(disk),
 			    g_raid3_disk_state2str(disk->d_state)));
 			/*
 			 * Reset bumping syncid if disk disappeared in STARTING
 			 * state.
 			 */
 			if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0)
 				sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID;
 #ifdef	INVARIANTS
 		} else {
 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
 			    sc->sc_name,
 			    g_raid3_device_state2str(sc->sc_state),
 			    g_raid3_get_diskname(disk),
 			    g_raid3_disk_state2str(disk->d_state)));
 #endif
 		}
 		DISK_STATE_CHANGED();
 		G_RAID3_DEBUG(0, "Device %s: provider %s disconnected.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 
 		g_raid3_destroy_disk(disk);
 		break;
 	default:
 		KASSERT(1 == 0, ("Unknown state (%u).", state));
 		break;
 	}
 	return (0);
 }
 #undef	DISK_STATE_CHANGED
 
 int
 g_raid3_read_metadata(struct g_consumer *cp, struct g_raid3_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* Metadata are stored on last sector. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_RAID3_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    cp->provider->name, error);
 		return (error);
 	}
 
 	/* Decode metadata. */
 	error = raid3_metadata_decode(buf, md);
 	g_free(buf);
 	if (strcmp(md->md_magic, G_RAID3_MAGIC) != 0)
 		return (EINVAL);
 	if (md->md_version > G_RAID3_VERSION) {
 		G_RAID3_DEBUG(0,
 		    "Kernel module is too old to handle metadata from %s.",
 		    cp->provider->name);
 		return (EINVAL);
 	}
 	if (error != 0) {
 		G_RAID3_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
 		    cp->provider->name);
 		return (error);
 	}
 	if (md->md_sectorsize > MAXPHYS) {
 		G_RAID3_DEBUG(0, "The blocksize is too big.");
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static int
 g_raid3_check_metadata(struct g_raid3_softc *sc, struct g_provider *pp,
     struct g_raid3_metadata *md)
 {
 
 	if (md->md_no >= sc->sc_ndisks) {
 		G_RAID3_DEBUG(1, "Invalid disk %s number (no=%u), skipping.",
 		    pp->name, md->md_no);
 		return (EINVAL);
 	}
 	if (sc->sc_disks[md->md_no].d_state != G_RAID3_DISK_STATE_NODISK) {
 		G_RAID3_DEBUG(1, "Disk %s (no=%u) already exists, skipping.",
 		    pp->name, md->md_no);
 		return (EEXIST);
 	}
 	if (md->md_all != sc->sc_ndisks) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_all", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mediasize % md->md_sectorsize) != 0) {
 		G_RAID3_DEBUG(1, "Invalid metadata (mediasize %% sectorsize != "
 		    "0) on disk %s (device %s), skipping.", pp->name,
 		    sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_mediasize != sc->sc_mediasize) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_mediasize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mediasize % (sc->sc_ndisks - 1)) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_mediasize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((sc->sc_mediasize / (sc->sc_ndisks - 1)) > pp->mediasize) {
 		G_RAID3_DEBUG(1,
 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
 		    sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_sectorsize / pp->sectorsize) < sc->sc_ndisks - 1) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_sectorsize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_sectorsize != sc->sc_sectorsize) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_sectorsize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid sector size of disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mflags & ~G_RAID3_DEVICE_FLAG_MASK) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid device flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 &&
 	    (md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) {
 		/*
 		 * VERIFY and ROUND-ROBIN options are mutally exclusive.
 		 */
 		G_RAID3_DEBUG(1, "Both VERIFY and ROUND-ROBIN flags exist on "
 		    "disk %s (device %s), skipping.", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_dflags & ~G_RAID3_DISK_FLAG_MASK) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid disk flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 g_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp,
     struct g_raid3_metadata *md)
 {
 	struct g_raid3_disk *disk;
 	int error;
 
 	g_topology_assert_not();
 	G_RAID3_DEBUG(2, "Adding disk %s.", pp->name);
 
 	error = g_raid3_check_metadata(sc, pp, md);
 	if (error != 0)
 		return (error);
 	if (sc->sc_state != G_RAID3_DEVICE_STATE_STARTING &&
 	    md->md_genid < sc->sc_genid) {
 		G_RAID3_DEBUG(0, "Component %s (device %s) broken, skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	disk = g_raid3_init_disk(sc, pp, md, &error);
 	if (disk == NULL)
 		return (error);
 	error = g_raid3_event_send(disk, G_RAID3_DISK_STATE_NEW,
 	    G_RAID3_EVENT_WAIT);
 	if (error != 0)
 		return (error);
 	if (md->md_version < G_RAID3_VERSION) {
 		G_RAID3_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
 		    pp->name, md->md_version, G_RAID3_VERSION);
 		g_raid3_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_raid3_destroy_delayed(void *arg, int flag)
 {
 	struct g_raid3_softc *sc;
 	int error;
 
 	if (flag == EV_CANCEL) {
 		G_RAID3_DEBUG(1, "Destroying canceled.");
 		return;
 	}
 	sc = arg;
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) == 0,
 	    ("DESTROY flag set on %s.", sc->sc_name));
 	KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0,
 	    ("DESTROYING flag not set on %s.", sc->sc_name));
 	G_RAID3_DEBUG(0, "Destroying %s (delayed).", sc->sc_name);
 	error = g_raid3_destroy(sc, G_RAID3_DESTROY_SOFT);
 	if (error != 0) {
 		G_RAID3_DEBUG(0, "Cannot destroy %s.", sc->sc_name);
 		sx_xunlock(&sc->sc_lock);
 	}
 	g_topology_lock();
 }
 
 static int
 g_raid3_access(struct g_provider *pp, int acr, int acw, int ace)
 {
 	struct g_raid3_softc *sc;
 	int dcr, dcw, dce, error = 0;
 
 	g_topology_assert();
 	G_RAID3_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
 	    acw, ace);
 
 	sc = pp->geom->softc;
 	if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0)
 		return (0);
 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
 
 	dcr = pp->acr + acr;
 	dcw = pp->acw + acw;
 	dce = pp->ace + ace;
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0 ||
 	    g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks - 1) {
 		if (acr > 0 || acw > 0 || ace > 0)
 			error = ENXIO;
 		goto end;
 	}
 	if (dcw == 0)
 		g_raid3_idle(sc, dcw);
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0) {
 		if (acr > 0 || acw > 0 || ace > 0) {
 			error = ENXIO;
 			goto end;
 		}
 		if (dcr == 0 && dcw == 0 && dce == 0) {
 			g_post_event(g_raid3_destroy_delayed, sc, M_WAITOK,
 			    sc, NULL);
 		}
 	}
 end:
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 static struct g_geom *
 g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md)
 {
 	struct g_raid3_softc *sc;
 	struct g_geom *gp;
 	int error, timeout;
 	u_int n;
 
 	g_topology_assert();
 	G_RAID3_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id);
 
 	/* One disk is minimum. */
 	if (md->md_all < 1)
 		return (NULL);
 	/*
 	 * Action geom.
 	 */
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_RAID3, M_WAITOK | M_ZERO);
 	sc->sc_disks = malloc(sizeof(struct g_raid3_disk) * md->md_all, M_RAID3,
 	    M_WAITOK | M_ZERO);
 	gp->start = g_raid3_start;
 	gp->orphan = g_raid3_orphan;
 	gp->access = g_raid3_access;
 	gp->dumpconf = g_raid3_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_mediasize = md->md_mediasize;
 	sc->sc_sectorsize = md->md_sectorsize;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_round_robin = 0;
 	sc->sc_flags = md->md_mflags;
 	sc->sc_bump_id = 0;
 	sc->sc_idle = 1;
 	sc->sc_last_write = time_uptime;
 	sc->sc_writes = 0;
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		sc->sc_disks[n].d_softc = sc;
 		sc->sc_disks[n].d_no = n;
 		sc->sc_disks[n].d_state = G_RAID3_DISK_STATE_NODISK;
 	}
 	sx_init(&sc->sc_lock, "graid3:lock");
 	bioq_init(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "graid3:queue", NULL, MTX_DEF);
 	bioq_init(&sc->sc_regular_delayed);
 	bioq_init(&sc->sc_inflight);
 	bioq_init(&sc->sc_sync_delayed);
 	TAILQ_INIT(&sc->sc_events);
 	mtx_init(&sc->sc_events_mtx, "graid3:events", NULL, MTX_DEF);
 	callout_init(&sc->sc_callout, 1);
 	sc->sc_state = G_RAID3_DEVICE_STATE_STARTING;
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 	/*
 	 * Synchronization geom.
 	 */
 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
 	gp->softc = sc;
 	gp->orphan = g_raid3_orphan;
 	sc->sc_sync.ds_geom = gp;
 
 	if (!g_raid3_use_malloc) {
 		sc->sc_zones[G_RAID3_ZONE_64K].sz_zone = uma_zcreate("gr3:64k",
 		    65536, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		sc->sc_zones[G_RAID3_ZONE_64K].sz_inuse = 0;
 		sc->sc_zones[G_RAID3_ZONE_64K].sz_max = g_raid3_n64k;
 		sc->sc_zones[G_RAID3_ZONE_64K].sz_requested =
 		    sc->sc_zones[G_RAID3_ZONE_64K].sz_failed = 0;
 		sc->sc_zones[G_RAID3_ZONE_16K].sz_zone = uma_zcreate("gr3:16k",
 		    16384, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		sc->sc_zones[G_RAID3_ZONE_16K].sz_inuse = 0;
 		sc->sc_zones[G_RAID3_ZONE_16K].sz_max = g_raid3_n16k;
 		sc->sc_zones[G_RAID3_ZONE_16K].sz_requested =
 		    sc->sc_zones[G_RAID3_ZONE_16K].sz_failed = 0;
 		sc->sc_zones[G_RAID3_ZONE_4K].sz_zone = uma_zcreate("gr3:4k",
 		    4096, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		sc->sc_zones[G_RAID3_ZONE_4K].sz_inuse = 0;
 		sc->sc_zones[G_RAID3_ZONE_4K].sz_max = g_raid3_n4k;
 		sc->sc_zones[G_RAID3_ZONE_4K].sz_requested =
 		    sc->sc_zones[G_RAID3_ZONE_4K].sz_failed = 0;
 	}
 
 	error = kproc_create(g_raid3_worker, sc, &sc->sc_worker, 0, 0,
 	    "g_raid3 %s", md->md_name);
 	if (error != 0) {
 		G_RAID3_DEBUG(1, "Cannot create kernel thread for %s.",
 		    sc->sc_name);
 		if (!g_raid3_use_malloc) {
 			uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone);
 			uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone);
 			uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone);
 		}
 		g_destroy_geom(sc->sc_sync.ds_geom);
 		mtx_destroy(&sc->sc_events_mtx);
 		mtx_destroy(&sc->sc_queue_mtx);
 		sx_destroy(&sc->sc_lock);
 		g_destroy_geom(sc->sc_geom);
 		free(sc->sc_disks, M_RAID3);
 		free(sc, M_RAID3);
 		return (NULL);
 	}
 
 	G_RAID3_DEBUG(1, "Device %s created (%u components, id=%u).",
 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
 
 	sc->sc_rootmount = root_mount_hold("GRAID3");
 	G_RAID3_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
 
 	/*
 	 * Run timeout.
 	 */
 	timeout = atomic_load_acq_int(&g_raid3_timeout);
 	callout_reset(&sc->sc_callout, timeout * hz, g_raid3_go, sc);
 	return (sc->sc_geom);
 }
 
 int
 g_raid3_destroy(struct g_raid3_softc *sc, int how)
 {
 	struct g_provider *pp;
 
 	g_topology_assert_not();
 	if (sc == NULL)
 		return (ENXIO);
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		switch (how) {
 		case G_RAID3_DESTROY_SOFT:
 			G_RAID3_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		case G_RAID3_DESTROY_DELAYED:
 			G_RAID3_DEBUG(1,
 			    "Device %s will be destroyed on last close.",
 			    pp->name);
 			if (sc->sc_syncdisk != NULL)
 				g_raid3_sync_stop(sc, 1);
 			sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROYING;
 			return (EBUSY);
 		case G_RAID3_DESTROY_HARD:
 			G_RAID3_DEBUG(1, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 			break;
 		}
 	}
 
 	g_topology_lock();
 	if (sc->sc_geom->softc == NULL) {
 		g_topology_unlock();
 		return (0);
 	}
 	sc->sc_geom->softc = NULL;
 	sc->sc_sync.ds_geom->softc = NULL;
 	g_topology_unlock();
 
 	sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 	sc->sc_flags |= G_RAID3_DEVICE_FLAG_WAIT;
 	G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	sx_xunlock(&sc->sc_lock);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
 	while (sc->sc_worker != NULL)
 		tsleep(&sc->sc_worker, PRIBIO, "r3:destroy", hz / 5);
 	G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
 	sx_xlock(&sc->sc_lock);
 	g_raid3_destroy_device(sc);
 	free(sc->sc_disks, M_RAID3);
 	free(sc, M_RAID3);
 	return (0);
 }
 
 static void
 g_raid3_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_raid3_metadata md;
 	struct g_raid3_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	G_RAID3_DEBUG(2, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "raid3:taste");
 	/* This orphan function should be never called. */
 	gp->orphan = g_raid3_taste_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_raid3_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
 		return (NULL);
 	if (g_raid3_debug >= 2)
 		raid3_metadata_dump(&md);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_sync.ds_geom == gp)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id) {
 			G_RAID3_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 		break;
 	}
 	if (gp == NULL) {
 		gp = g_raid3_create(mp, &md);
 		if (gp == NULL) {
 			G_RAID3_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 	}
 	G_RAID3_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	error = g_raid3_add_disk(sc, pp, &md);
 	if (error != 0) {
 		G_RAID3_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 		    pp->name, gp->name, error);
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NODISK) ==
 		    sc->sc_ndisks) {
 			g_cancel_event(sc);
 			g_raid3_destroy(sc, G_RAID3_DESTROY_HARD);
 			g_topology_lock();
 			return (NULL);
 		}
 		gp = NULL;
 	}
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (gp);
 }
 
 static int
 g_raid3_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
     struct g_geom *gp)
 {
 	struct g_raid3_softc *sc;
 	int error;
 
 	g_topology_unlock();
 	sc = gp->softc;
 	sx_xlock(&sc->sc_lock);
 	g_cancel_event(sc);
 	error = g_raid3_destroy(gp->softc, G_RAID3_DESTROY_SOFT);
 	if (error != 0)
 		sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 static void
 g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_raid3_softc *sc;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	/* Skip synchronization geom. */
 	if (gp == sc->sc_sync.ds_geom)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		struct g_raid3_disk *disk;
 
 		disk = cp->private;
 		if (disk == NULL)
 			return;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		sbuf_printf(sb, "%s<Type>", indent);
 		if (disk->d_no == sc->sc_ndisks - 1)
 			sbuf_printf(sb, "PARITY");
 		else
 			sbuf_printf(sb, "DATA");
 		sbuf_printf(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
 		    (u_int)disk->d_no);
 		if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			sbuf_printf(sb, "%s<Synchronized>", indent);
 			if (disk->d_sync.ds_offset == 0)
 				sbuf_printf(sb, "0%%");
 			else {
 				sbuf_printf(sb, "%u%%",
 				    (u_int)((disk->d_sync.ds_offset * 100) /
 				    (sc->sc_mediasize / (sc->sc_ndisks - 1))));
 			}
 			sbuf_printf(sb, "</Synchronized>\n");
 			if (disk->d_sync.ds_offset > 0) {
 				sbuf_printf(sb, "%s<BytesSynced>%jd"
 				    "</BytesSynced>\n", indent,
 				    (intmax_t)disk->d_sync.ds_offset);
 			}
 		}
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
 		    disk->d_sync.ds_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, disk->d_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (disk->d_flags == 0)
 			sbuf_printf(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((disk->d_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_RAID3_DISK_FLAG_DIRTY, "DIRTY");
 			ADD_FLAG(G_RAID3_DISK_FLAG_HARDCODED, "HARDCODED");
 			ADD_FLAG(G_RAID3_DISK_FLAG_SYNCHRONIZING,
 			    "SYNCHRONIZING");
 			ADD_FLAG(G_RAID3_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
 			ADD_FLAG(G_RAID3_DISK_FLAG_BROKEN, "BROKEN");
 #undef	ADD_FLAG
 		}
 		sbuf_printf(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_raid3_disk_state2str(disk->d_state));
 		sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	} else {
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		if (!g_raid3_use_malloc) {
 			sbuf_printf(sb,
 			    "%s<Zone4kRequested>%u</Zone4kRequested>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_4K].sz_requested);
 			sbuf_printf(sb,
 			    "%s<Zone4kFailed>%u</Zone4kFailed>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_4K].sz_failed);
 			sbuf_printf(sb,
 			    "%s<Zone16kRequested>%u</Zone16kRequested>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_16K].sz_requested);
 			sbuf_printf(sb,
 			    "%s<Zone16kFailed>%u</Zone16kFailed>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_16K].sz_failed);
 			sbuf_printf(sb,
 			    "%s<Zone64kRequested>%u</Zone64kRequested>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_64K].sz_requested);
 			sbuf_printf(sb,
 			    "%s<Zone64kFailed>%u</Zone64kFailed>\n", indent,
 			    sc->sc_zones[G_RAID3_ZONE_64K].sz_failed);
 		}
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (sc->sc_flags == 0)
 			sbuf_printf(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((sc->sc_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN,
 			    "ROUND-ROBIN");
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_VERIFY, "VERIFY");
 #undef	ADD_FLAG
 		}
 		sbuf_printf(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
 		    sc->sc_ndisks);
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_raid3_device_state2str(sc->sc_state));
 		sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 }
 
 static void
 g_raid3_shutdown_post_sync(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_raid3_softc *sc;
 	int error;
 
 	mp = arg;
 	g_topology_lock();
 	g_raid3_shutdown = 1;
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if ((sc = gp->softc) == NULL)
 			continue;
 		/* Skip synchronization geom. */
 		if (gp == sc->sc_sync.ds_geom)
 			continue;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		g_raid3_idle(sc, -1);
 		g_cancel_event(sc);
 		error = g_raid3_destroy(sc, G_RAID3_DESTROY_DELAYED);
 		if (error != 0)
 			sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 	g_topology_unlock();
 }
 
 static void
 g_raid3_init(struct g_class *mp)
 {
 
 	g_raid3_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
 	    g_raid3_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_raid3_post_sync == NULL)
 		G_RAID3_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_raid3_fini(struct g_class *mp)
 {
 
 	if (g_raid3_post_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid3_post_sync);
 }
 
 DECLARE_GEOM_CLASS(g_raid3_class, g_raid3);
+MODULE_VERSION(geom_raid3, 0);
Index: stable/11/sys/geom/shsec/g_shsec.c
===================================================================
--- stable/11/sys/geom/shsec/g_shsec.c	(revision 332639)
+++ stable/11/sys/geom/shsec/g_shsec.c	(revision 332640)
@@ -1,836 +1,837 @@
 /*-
  * Copyright (c) 2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 #include <geom/shsec/g_shsec.h>
 
 FEATURE(geom_shsec, "GEOM shared secret device support");
 
 static MALLOC_DEFINE(M_SHSEC, "shsec_data", "GEOM_SHSEC Data");
 
 static uma_zone_t g_shsec_zone;
 
 static int g_shsec_destroy(struct g_shsec_softc *sc, boolean_t force);
 static int g_shsec_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 
 static g_taste_t g_shsec_taste;
 static g_ctl_req_t g_shsec_config;
 static g_dumpconf_t g_shsec_dumpconf;
 static g_init_t g_shsec_init;
 static g_fini_t g_shsec_fini;
 
 struct g_class g_shsec_class = {
 	.name = G_SHSEC_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_shsec_config,
 	.taste = g_shsec_taste,
 	.destroy_geom = g_shsec_destroy_geom,
 	.init = g_shsec_init,
 	.fini = g_shsec_fini
 };
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, shsec, CTLFLAG_RW, 0,
     "GEOM_SHSEC stuff");
 static u_int g_shsec_debug = 0;
 SYSCTL_UINT(_kern_geom_shsec, OID_AUTO, debug, CTLFLAG_RWTUN, &g_shsec_debug, 0,
     "Debug level");
 static u_int g_shsec_maxmem = MAXPHYS * 100;
 SYSCTL_UINT(_kern_geom_shsec, OID_AUTO, maxmem, CTLFLAG_RDTUN, &g_shsec_maxmem,
     0, "Maximum memory that can be allocated for I/O (in bytes)");
 static u_int g_shsec_alloc_failed = 0;
 SYSCTL_UINT(_kern_geom_shsec, OID_AUTO, alloc_failed, CTLFLAG_RD,
     &g_shsec_alloc_failed, 0, "How many times I/O allocation failed");
 
 /*
  * Greatest Common Divisor.
  */
 static u_int
 gcd(u_int a, u_int b)
 {
 	u_int c;
 
 	while (b != 0) {
 		c = a;
 		a = b;
 		b = (c % b);
 	}
 	return (a);
 }
 
 /*
  * Least Common Multiple.
  */
 static u_int
 lcm(u_int a, u_int b)
 {
 
 	return ((a * b) / gcd(a, b));
 }
 
 static void
 g_shsec_init(struct g_class *mp __unused)
 {
 
 	g_shsec_zone = uma_zcreate("g_shsec_zone", MAXPHYS, NULL, NULL, NULL,
 	    NULL, 0, 0);
 	g_shsec_maxmem -= g_shsec_maxmem % MAXPHYS;
 	uma_zone_set_max(g_shsec_zone, g_shsec_maxmem / MAXPHYS);
 }
 
 static void
 g_shsec_fini(struct g_class *mp __unused)
 {
 
 	uma_zdestroy(g_shsec_zone);
 }
 
 /*
  * Return the number of valid disks.
  */
 static u_int
 g_shsec_nvalid(struct g_shsec_softc *sc)
 {
 	u_int i, no;
 
 	no = 0;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		if (sc->sc_disks[i] != NULL)
 			no++;
 	}
 
 	return (no);
 }
 
 static void
 g_shsec_remove_disk(struct g_consumer *cp)
 {
 	struct g_shsec_softc *sc;
 	u_int no;
 
 	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
 	sc = (struct g_shsec_softc *)cp->private;
 	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
 	no = cp->index;
 
 	G_SHSEC_DEBUG(0, "Disk %s removed from %s.", cp->provider->name,
 	    sc->sc_name);
 
 	sc->sc_disks[no] = NULL;
 	if (sc->sc_provider != NULL) {
 		g_wither_provider(sc->sc_provider, ENXIO);
 		sc->sc_provider = NULL;
 		G_SHSEC_DEBUG(0, "Device %s removed.", sc->sc_name);
 	}
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_shsec_orphan(struct g_consumer *cp)
 {
 	struct g_shsec_softc *sc;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	g_shsec_remove_disk(cp);
 	/* If there are no valid disks anymore, remove device. */
 	if (g_shsec_nvalid(sc) == 0)
 		g_shsec_destroy(sc, 1);
 }
 
 static int
 g_shsec_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *cp1, *cp2;
 	struct g_shsec_softc *sc;
 	struct g_geom *gp;
 	int error;
 
 	gp = pp->geom;
 	sc = gp->softc;
 
 	if (sc == NULL) {
 		/*
 		 * It looks like geom is being withered.
 		 * In that case we allow only negative requests.
 		 */
 		KASSERT(dr <= 0 && dw <= 0 && de <= 0,
 		    ("Positive access request (device=%s).", pp->name));
 		if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 &&
 		    (pp->ace + de) == 0) {
 			G_SHSEC_DEBUG(0, "Device %s definitely destroyed.",
 			    gp->name);
 		}
 		return (0);
 	}
 
 	/* On first open, grab an extra "exclusive" bit */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... and let go of it on last close */
 	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
 		de--;
 
 	error = ENXIO;
 	LIST_FOREACH(cp1, &gp->consumer, consumer) {
 		error = g_access(cp1, dr, dw, de);
 		if (error == 0)
 			continue;
 		/*
 		 * If we fail here, backout all previous changes.
 		 */
 		LIST_FOREACH(cp2, &gp->consumer, consumer) {
 			if (cp1 == cp2)
 				return (error);
 			g_access(cp2, -dr, -dw, -de);
 		}
 		/* NOTREACHED */
 	}
 
 	return (error);
 }
 
 static void
 g_shsec_xor1(uint32_t *src, uint32_t *dst, ssize_t len)
 {
 
 	for (; len > 0; len -= sizeof(uint32_t), dst++)
 		*dst = *dst ^ *src++;
 	KASSERT(len == 0, ("len != 0 (len=%zd)", len));
 }
 
 static void
 g_shsec_done(struct bio *bp)
 {
 	struct g_shsec_softc *sc;
 	struct bio *pbp;
 
 	pbp = bp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	if (bp->bio_error == 0)
 		G_SHSEC_LOGREQ(2, bp, "Request done.");
 	else {
 		G_SHSEC_LOGREQ(0, bp, "Request failed (error=%d).",
 		    bp->bio_error);
 		if (pbp->bio_error == 0)
 			pbp->bio_error = bp->bio_error;
 	}
 	if (pbp->bio_cmd == BIO_READ) {
 		if ((pbp->bio_pflags & G_SHSEC_BFLAG_FIRST) != 0) {
 			bcopy(bp->bio_data, pbp->bio_data, pbp->bio_length);
 			pbp->bio_pflags = 0;
 		} else {
 			g_shsec_xor1((uint32_t *)bp->bio_data,
 			    (uint32_t *)pbp->bio_data,
 			    (ssize_t)pbp->bio_length);
 		}
 	}
 	bzero(bp->bio_data, bp->bio_length);
 	uma_zfree(g_shsec_zone, bp->bio_data);
 	g_destroy_bio(bp);
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
 		pbp->bio_completed = pbp->bio_length;
 		g_io_deliver(pbp, pbp->bio_error);
 	}
 }
 
 static void
 g_shsec_xor2(uint32_t *rand, uint32_t *dst, ssize_t len)
 {
 
 	for (; len > 0; len -= sizeof(uint32_t), dst++) {
 		*rand = arc4random();
 		*dst = *dst ^ *rand++;
 	}
 	KASSERT(len == 0, ("len != 0 (len=%zd)", len));
 }
 
 static void
 g_shsec_start(struct bio *bp)
 {
 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
 	struct g_shsec_softc *sc;
 	struct bio *cbp;
 	uint32_t *dst;
 	ssize_t len;
 	u_int no;
 	int error;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL, provider's error should be set and g_shsec_start()
 	 * should not be called at all.
 	 */
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 
 	G_SHSEC_LOGREQ(2, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_FLUSH:
 		/*
 		 * Only those requests are supported.
 		 */
 		break;
 	case BIO_DELETE:
 	case BIO_GETATTR:
 		/* To which provider it should be delivered? */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	/*
 	 * Allocate all bios first and calculate XOR.
 	 */
 	dst = NULL;
 	len = bp->bio_length;
 	if (bp->bio_cmd == BIO_READ)
 		bp->bio_pflags = G_SHSEC_BFLAG_FIRST;
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			error = ENOMEM;
 			goto failure;
 		}
 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 
 		/*
 		 * Fill in the component buf structure.
 		 */
 		cbp->bio_done = g_shsec_done;
 		cbp->bio_data = uma_zalloc(g_shsec_zone, M_NOWAIT);
 		if (cbp->bio_data == NULL) {
 			g_shsec_alloc_failed++;
 			error = ENOMEM;
 			goto failure;
 		}
 		cbp->bio_caller2 = sc->sc_disks[no];
 		if (bp->bio_cmd == BIO_WRITE) {
 			if (no == 0) {
 				dst = (uint32_t *)cbp->bio_data;
 				bcopy(bp->bio_data, dst, len);
 			} else {
 				g_shsec_xor2((uint32_t *)cbp->bio_data, dst,
 				    len);
 			}
 		}
 	}
 	/*
 	 * Fire off all allocated requests!
 	 */
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		struct g_consumer *cp;
 
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		cbp->bio_to = cp->provider;
 		G_SHSEC_LOGREQ(2, cbp, "Sending request.");
 		g_io_request(cbp, cp);
 	}
 	return;
 failure:
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		bp->bio_children--;
 		if (cbp->bio_data != NULL) {
 			bzero(cbp->bio_data, cbp->bio_length);
 			uma_zfree(g_shsec_zone, cbp->bio_data);
 		}
 		g_destroy_bio(cbp);
 	}
 	if (bp->bio_error == 0)
 		bp->bio_error = error;
 	g_io_deliver(bp, bp->bio_error);
 }
 
 static void
 g_shsec_check_and_run(struct g_shsec_softc *sc)
 {
 	off_t mediasize, ms;
 	u_int no, sectorsize = 0;
 
 	if (g_shsec_nvalid(sc) != sc->sc_ndisks)
 		return;
 
 	sc->sc_provider = g_new_providerf(sc->sc_geom, "shsec/%s", sc->sc_name);
 	/*
 	 * Find the smallest disk.
 	 */
 	mediasize = sc->sc_disks[0]->provider->mediasize;
 	mediasize -= sc->sc_disks[0]->provider->sectorsize;
 	sectorsize = sc->sc_disks[0]->provider->sectorsize;
 	for (no = 1; no < sc->sc_ndisks; no++) {
 		ms = sc->sc_disks[no]->provider->mediasize;
 		ms -= sc->sc_disks[no]->provider->sectorsize;
 		if (ms < mediasize)
 			mediasize = ms;
 		sectorsize = lcm(sectorsize,
 		    sc->sc_disks[no]->provider->sectorsize);
 	}
 	sc->sc_provider->sectorsize = sectorsize;
 	sc->sc_provider->mediasize = mediasize;
 	g_error_provider(sc->sc_provider, 0);
 
 	G_SHSEC_DEBUG(0, "Device %s activated.", sc->sc_name);
 }
 
 static int
 g_shsec_read_metadata(struct g_consumer *cp, struct g_shsec_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	/* Decode metadata. */
 	shsec_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /*
  * Add disk to given device.
  */
 static int
 g_shsec_add_disk(struct g_shsec_softc *sc, struct g_provider *pp, u_int no)
 {
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	struct g_shsec_metadata md;
 	int error;
 
 	/* Metadata corrupted? */
 	if (no >= sc->sc_ndisks)
 		return (EINVAL);
 
 	/* Check if disk is not already attached. */
 	if (sc->sc_disks[no] != NULL)
 		return (EEXIST);
 
 	gp = sc->sc_geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
 		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 		if (error != 0) {
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			return (error);
 		}
 	}
 
 	/* Reread metadata. */
 	error = g_shsec_read_metadata(cp, &md);
 	if (error != 0)
 		goto fail;
 
 	if (strcmp(md.md_magic, G_SHSEC_MAGIC) != 0 ||
 	    strcmp(md.md_name, sc->sc_name) != 0 || md.md_id != sc->sc_id) {
 		G_SHSEC_DEBUG(0, "Metadata on %s changed.", pp->name);
 		goto fail;
 	}
 
 	cp->private = sc;
 	cp->index = no;
 	sc->sc_disks[no] = cp;
 
 	G_SHSEC_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
 
 	g_shsec_check_and_run(sc);
 
 	return (0);
 fail:
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
 		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	return (error);
 }
 
 static struct g_geom *
 g_shsec_create(struct g_class *mp, const struct g_shsec_metadata *md)
 {
 	struct g_shsec_softc *sc;
 	struct g_geom *gp;
 	u_int no;
 
 	G_SHSEC_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id);
 
 	/* Two disks is minimum. */
 	if (md->md_all < 2) {
 		G_SHSEC_DEBUG(0, "Too few disks defined for %s.", md->md_name);
 		return (NULL);
 	}
 
 	/* Check for duplicate unit */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
 			G_SHSEC_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_SHSEC, M_WAITOK | M_ZERO);
 	gp->start = g_shsec_start;
 	gp->spoiled = g_shsec_orphan;
 	gp->orphan = g_shsec_orphan;
 	gp->access = g_shsec_access;
 	gp->dumpconf = g_shsec_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
 	    M_SHSEC, M_WAITOK | M_ZERO);
 	for (no = 0; no < sc->sc_ndisks; no++)
 		sc->sc_disks[no] = NULL;
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 
 	G_SHSEC_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
 
 	return (gp);
 }
 
 static int
 g_shsec_destroy(struct g_shsec_softc *sc, boolean_t force)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	u_int no;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_SHSEC_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_SHSEC_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		if (sc->sc_disks[no] != NULL)
 			g_shsec_remove_disk(sc->sc_disks[no]);
 	}
 
 	gp = sc->sc_geom;
 	gp->softc = NULL;
 	KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
 	    gp->name));
 	free(sc->sc_disks, M_SHSEC);
 	free(sc, M_SHSEC);
 
 	pp = LIST_FIRST(&gp->provider);
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		G_SHSEC_DEBUG(0, "Device %s destroyed.", gp->name);
 
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_shsec_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
     struct g_geom *gp)
 {
 	struct g_shsec_softc *sc;
 
 	sc = gp->softc;
 	return (g_shsec_destroy(sc, 0));
 }
 
 static struct g_geom *
 g_shsec_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_shsec_metadata md;
 	struct g_shsec_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	/* Skip providers that are already open for writing. */
 	if (pp->acw > 0)
 		return (NULL);
 
 	G_SHSEC_DEBUG(3, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "shsec:taste");
 	gp->start = g_shsec_start;
 	gp->access = g_shsec_access;
 	gp->orphan = g_shsec_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_shsec_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_SHSEC_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_SHSEC_VERSION) {
 		G_SHSEC_DEBUG(0, "Kernel module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	/*
 	 * Backward compatibility:
 	 */
 	/* There was no md_provsize field in earlier versions of metadata. */
 	if (md.md_version < 1)
 		md.md_provsize = pp->mediasize;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id)
 			continue;
 		break;
 	}
 	if (gp != NULL) {
 		G_SHSEC_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_shsec_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_SHSEC_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 			    pp->name, gp->name, error);
 			return (NULL);
 		}
 	} else {
 		gp = g_shsec_create(mp, &md);
 		if (gp == NULL) {
 			G_SHSEC_DEBUG(0, "Cannot create device %s.", md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		G_SHSEC_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_shsec_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_SHSEC_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 			    pp->name, gp->name, error);
 			g_shsec_destroy(sc, 1);
 			return (NULL);
 		}
 	}
 	return (gp);
 }
 
 static struct g_shsec_softc *
 g_shsec_find_device(struct g_class *mp, const char *name)
 {
 	struct g_shsec_softc *sc;
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(sc->sc_name, name) == 0)
 			return (sc);
 	}
 	return (NULL);
 }
 
 static void
 g_shsec_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_shsec_softc *sc;
 	int *force, *nargs, error;
 	const char *name;
 	char param[16];
 	u_int i;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No '%s' argument.", "force");
 		return;
 	}
 
 	for (i = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			return;
 		}
 		sc = g_shsec_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		error = g_shsec_destroy(sc, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_shsec_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_SHSEC_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "stop") == 0) {
 		g_shsec_ctl_destroy(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_shsec_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_shsec_softc *sc;
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
 		    (u_int)cp->index);
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
 		    indent, sc->sc_ndisks, g_shsec_nvalid(sc));
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
 			sbuf_printf(sb, "UP");
 		else
 			sbuf_printf(sb, "DOWN");
 		sbuf_printf(sb, "</State>\n");
 	}
 }
 
 DECLARE_GEOM_CLASS(g_shsec_class, g_shsec);
+MODULE_VERSION(geom_shsec, 0);
Index: stable/11/sys/geom/stripe/g_stripe.c
===================================================================
--- stable/11/sys/geom/stripe/g_stripe.c	(revision 332639)
+++ stable/11/sys/geom/stripe/g_stripe.c	(revision 332640)
@@ -1,1270 +1,1271 @@
 /*-
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 #include <geom/stripe/g_stripe.h>
 
 FEATURE(geom_stripe, "GEOM striping support");
 
 static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data");
 
 static uma_zone_t g_stripe_zone;
 
 static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force);
 static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 
 static g_taste_t g_stripe_taste;
 static g_ctl_req_t g_stripe_config;
 static g_dumpconf_t g_stripe_dumpconf;
 static g_init_t g_stripe_init;
 static g_fini_t g_stripe_fini;
 
 struct g_class g_stripe_class = {
 	.name = G_STRIPE_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_stripe_config,
 	.taste = g_stripe_taste,
 	.destroy_geom = g_stripe_destroy_geom,
 	.init = g_stripe_init,
 	.fini = g_stripe_fini
 };
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0,
     "GEOM_STRIPE stuff");
 static u_int g_stripe_debug = 0;
 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0,
     "Debug level");
 static int g_stripe_fast = 0;
 static int
 g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS)
 {
 	int error, fast;
 
 	fast = g_stripe_fast;
 	error = sysctl_handle_int(oidp, &fast, 0, req);
 	if (error == 0 && req->newptr != NULL)
 		g_stripe_fast = fast;
 	return (error);
 }
 SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RWTUN,
     NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming, mode");
 static u_int g_stripe_maxmem = MAXPHYS * 100;
 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RDTUN, &g_stripe_maxmem,
     0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)");
 static u_int g_stripe_fast_failed = 0;
 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD,
     &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed");
 
 /*
  * Greatest Common Divisor.
  */
 static u_int
 gcd(u_int a, u_int b)
 {
 	u_int c;
 
 	while (b != 0) {
 		c = a;
 		a = b;
 		b = (c % b);
 	}
 	return (a);
 }
 
 /*
  * Least Common Multiple.
  */
 static u_int
 lcm(u_int a, u_int b)
 {
 
 	return ((a * b) / gcd(a, b));
 }
 
 static void
 g_stripe_init(struct g_class *mp __unused)
 {
 
 	g_stripe_zone = uma_zcreate("g_stripe_zone", MAXPHYS, NULL, NULL,
 	    NULL, NULL, 0, 0);
 	g_stripe_maxmem -= g_stripe_maxmem % MAXPHYS;
 	uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAXPHYS);
 }
 
 static void
 g_stripe_fini(struct g_class *mp __unused)
 {
 
 	uma_zdestroy(g_stripe_zone);
 }
 
 /*
  * Return the number of valid disks.
  */
 static u_int
 g_stripe_nvalid(struct g_stripe_softc *sc)
 {
 	u_int i, no;
 
 	no = 0;
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		if (sc->sc_disks[i] != NULL)
 			no++;
 	}
 
 	return (no);
 }
 
 static void
 g_stripe_remove_disk(struct g_consumer *cp)
 {
 	struct g_stripe_softc *sc;
 
 	g_topology_assert();
 	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
 	sc = (struct g_stripe_softc *)cp->geom->softc;
 	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
 
 	if (cp->private == NULL) {
 		G_STRIPE_DEBUG(0, "Disk %s removed from %s.",
 		    cp->provider->name, sc->sc_name);
 		cp->private = (void *)(uintptr_t)-1;
 	}
 
 	if (sc->sc_provider != NULL) {
 		G_STRIPE_DEBUG(0, "Device %s deactivated.",
 		    sc->sc_provider->name);
 		g_wither_provider(sc->sc_provider, ENXIO);
 		sc->sc_provider = NULL;
 	}
 
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		return;
 	sc->sc_disks[cp->index] = NULL;
 	cp->index = 0;
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	/* If there are no valid disks anymore, remove device. */
 	if (LIST_EMPTY(&sc->sc_geom->consumer))
 		g_stripe_destroy(sc, 1);
 }
 
 static void
 g_stripe_orphan(struct g_consumer *cp)
 {
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	g_stripe_remove_disk(cp);
 }
 
 static int
 g_stripe_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *cp1, *cp2, *tmp;
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	gp = pp->geom;
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
 
 	/* On first open, grab an extra "exclusive" bit */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... and let go of it on last close */
 	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
 		de--;
 
 	LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) {
 		error = g_access(cp1, dr, dw, de);
 		if (error != 0)
 			goto fail;
 		if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 &&
 		    cp1->private != NULL) {
 			g_stripe_remove_disk(cp1); /* May destroy geom. */
 		}
 	}
 	return (0);
 
 fail:
 	LIST_FOREACH(cp2, &gp->consumer, consumer) {
 		if (cp1 == cp2)
 			break;
 		g_access(cp2, -dr, -dw, -de);
 	}
 	return (error);
 }
 
 static void
 g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset,
     off_t length, int mode)
 {
 	u_int stripesize;
 	size_t len;
 
 	stripesize = sc->sc_stripesize;
 	len = (size_t)(stripesize - (offset & (stripesize - 1)));
 	do {
 		bcopy(src, dst, len);
 		if (mode) {
 			dst += len + stripesize * (sc->sc_ndisks - 1);
 			src += len;
 		} else {
 			dst += len;
 			src += len + stripesize * (sc->sc_ndisks - 1);
 		}
 		length -= len;
 		KASSERT(length >= 0,
 		    ("Length < 0 (stripesize=%zu, offset=%jd, length=%jd).",
 		    (size_t)stripesize, (intmax_t)offset, (intmax_t)length));
 		if (length > stripesize)
 			len = stripesize;
 		else
 			len = length;
 	} while (length > 0);
 }
 
 static void
 g_stripe_done(struct bio *bp)
 {
 	struct g_stripe_softc *sc;
 	struct bio *pbp;
 
 	pbp = bp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) {
 		g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset,
 		    bp->bio_length, 1);
 		bp->bio_data = bp->bio_caller1;
 		bp->bio_caller1 = NULL;
 	}
 	mtx_lock(&sc->sc_lock);
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	pbp->bio_completed += bp->bio_completed;
 	pbp->bio_inbed++;
 	if (pbp->bio_children == pbp->bio_inbed) {
 		mtx_unlock(&sc->sc_lock);
 		if (pbp->bio_driver1 != NULL)
 			uma_zfree(g_stripe_zone, pbp->bio_driver1);
 		g_io_deliver(pbp, pbp->bio_error);
 	} else
 		mtx_unlock(&sc->sc_lock);
 	g_destroy_bio(bp);
 }
 
 static int
 g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length)
 {
 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
 	u_int nparts = 0, stripesize;
 	struct g_stripe_softc *sc;
 	char *addr, *data = NULL;
 	struct bio *cbp;
 	int error;
 
 	sc = bp->bio_to->geom->softc;
 
 	addr = bp->bio_data;
 	stripesize = sc->sc_stripesize;
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		error = ENOMEM;
 		goto failure;
 	}
 	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 	nparts++;
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cbp->bio_done = g_stripe_done;
 	cbp->bio_offset = offset;
 	cbp->bio_data = addr;
 	cbp->bio_caller1 = NULL;
 	cbp->bio_length = length;
 	cbp->bio_caller2 = sc->sc_disks[no];
 
 	/* offset -= offset % stripesize; */
 	offset -= offset & (stripesize - 1);
 	addr += length;
 	length = bp->bio_length - length;
 	for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
 		if (no > sc->sc_ndisks - 1) {
 			no = 0;
 			offset += stripesize;
 		}
 		if (nparts >= sc->sc_ndisks) {
 			cbp = TAILQ_NEXT(cbp, bio_queue);
 			if (cbp == NULL)
 				cbp = TAILQ_FIRST(&queue);
 			nparts++;
 			/*
 			 * Update bio structure.
 			 */
 			/*
 			 * MIN() is in case when
 			 * (bp->bio_length % sc->sc_stripesize) != 0.
 			 */
 			cbp->bio_length += MIN(stripesize, length);
 			if (cbp->bio_caller1 == NULL) {
 				cbp->bio_caller1 = cbp->bio_data;
 				cbp->bio_data = NULL;
 				if (data == NULL) {
 					data = uma_zalloc(g_stripe_zone,
 					    M_NOWAIT);
 					if (data == NULL) {
 						error = ENOMEM;
 						goto failure;
 					}
 				}
 			}
 		} else {
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL) {
 				error = ENOMEM;
 				goto failure;
 			}
 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 			nparts++;
 			/*
 			 * Fill in the component buf structure.
 			 */
 			cbp->bio_done = g_stripe_done;
 			cbp->bio_offset = offset;
 			cbp->bio_data = addr;
 			cbp->bio_caller1 = NULL;
 			/*
 			 * MIN() is in case when
 			 * (bp->bio_length % sc->sc_stripesize) != 0.
 			 */
 			cbp->bio_length = MIN(stripesize, length);
 			cbp->bio_caller2 = sc->sc_disks[no];
 		}
 	}
 	if (data != NULL)
 		bp->bio_driver1 = data;
 	/*
 	 * Fire off all allocated requests!
 	 */
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		struct g_consumer *cp;
 
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		cbp->bio_to = cp->provider;
 		if (cbp->bio_caller1 != NULL) {
 			cbp->bio_data = data;
 			if (bp->bio_cmd == BIO_WRITE) {
 				g_stripe_copy(sc, cbp->bio_caller1, data,
 				    cbp->bio_offset, cbp->bio_length, 0);
 			}
 			data += cbp->bio_length;
 		}
 		G_STRIPE_LOGREQ(cbp, "Sending request.");
 		g_io_request(cbp, cp);
 	}
 	return (0);
 failure:
 	if (data != NULL)
 		uma_zfree(g_stripe_zone, data);
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		if (cbp->bio_caller1 != NULL) {
 			cbp->bio_data = cbp->bio_caller1;
 			cbp->bio_caller1 = NULL;
 		}
 		bp->bio_children--;
 		g_destroy_bio(cbp);
 	}
 	return (error);
 }
 
 static int
 g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
 {
 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
 	struct g_stripe_softc *sc;
 	uint32_t stripesize;
 	struct bio *cbp;
 	char *addr;
 	int error;
 
 	sc = bp->bio_to->geom->softc;
 
 	stripesize = sc->sc_stripesize;
 
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		error = ENOMEM;
 		goto failure;
 	}
 	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 	/*
 	 * Fill in the component buf structure.
 	 */
 	if (bp->bio_length == length)
 		cbp->bio_done = g_std_done;	/* Optimized lockless case. */
 	else
 		cbp->bio_done = g_stripe_done;
 	cbp->bio_offset = offset;
 	cbp->bio_length = length;
 	if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 		bp->bio_ma_n = round_page(bp->bio_ma_offset +
 		    bp->bio_length) / PAGE_SIZE;
 		addr = NULL;
 	} else
 		addr = bp->bio_data;
 	cbp->bio_caller2 = sc->sc_disks[no];
 
 	/* offset -= offset % stripesize; */
 	offset -= offset & (stripesize - 1);
 	if (bp->bio_cmd != BIO_DELETE)
 		addr += length;
 	length = bp->bio_length - length;
 	for (no++; length > 0; no++, length -= stripesize) {
 		if (no > sc->sc_ndisks - 1) {
 			no = 0;
 			offset += stripesize;
 		}
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			error = ENOMEM;
 			goto failure;
 		}
 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
 
 		/*
 		 * Fill in the component buf structure.
 		 */
 		cbp->bio_done = g_stripe_done;
 		cbp->bio_offset = offset;
 		/*
 		 * MIN() is in case when
 		 * (bp->bio_length % sc->sc_stripesize) != 0.
 		 */
 		cbp->bio_length = MIN(stripesize, length);
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 			cbp->bio_ma_offset += (uintptr_t)addr;
 			cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
 			cbp->bio_ma_offset %= PAGE_SIZE;
 			cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
 			    cbp->bio_length) / PAGE_SIZE;
 		} else
 			cbp->bio_data = addr;
 
 		cbp->bio_caller2 = sc->sc_disks[no];
 
 		if (bp->bio_cmd != BIO_DELETE)
 			addr += stripesize;
 	}
 	/*
 	 * Fire off all allocated requests!
 	 */
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		struct g_consumer *cp;
 
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		cbp->bio_to = cp->provider;
 		G_STRIPE_LOGREQ(cbp, "Sending request.");
 		g_io_request(cbp, cp);
 	}
 	return (0);
 failure:
 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, cbp, bio_queue);
 		bp->bio_children--;
 		g_destroy_bio(cbp);
 	}
 	return (error);
 }
 
 static void
 g_stripe_flush(struct g_stripe_softc *sc, struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	u_int no;
 
 	bioq_init(&queue);
 	for (no = 0; no < sc->sc_ndisks; no++) {
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			for (cbp = bioq_first(&queue); cbp != NULL;
 			    cbp = bioq_first(&queue)) {
 				bioq_remove(&queue, cbp);
 				g_destroy_bio(cbp);
 			}
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		cbp->bio_done = g_stripe_done;
 		cbp->bio_caller2 = sc->sc_disks[no];
 		cbp->bio_to = sc->sc_disks[no]->provider;
 	}
 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
 		bioq_remove(&queue, cbp);
 		G_STRIPE_LOGREQ(cbp, "Sending request.");
 		cp = cbp->bio_caller2;
 		cbp->bio_caller2 = NULL;
 		g_io_request(cbp, cp);
 	}
 }
 
 static void
 g_stripe_start(struct bio *bp)
 {
 	off_t offset, start, length, nstripe;
 	struct g_stripe_softc *sc;
 	u_int no, stripesize;
 	int error, fast = 0;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL, provider's error should be set and g_stripe_start()
 	 * should not be called at all.
 	 */
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 
 	G_STRIPE_LOGREQ(bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_FLUSH:
 		g_stripe_flush(sc, bp);
 		return;
 	case BIO_GETATTR:
 		/* To which provider it should be delivered? */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	stripesize = sc->sc_stripesize;
 
 	/*
 	 * Calculations are quite messy, but fast I hope.
 	 */
 
 	/* Stripe number. */
 	/* nstripe = bp->bio_offset / stripesize; */
 	nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits;
 	/* Disk number. */
 	no = nstripe % sc->sc_ndisks;
 	/* Start position in stripe. */
 	/* start = bp->bio_offset % stripesize; */
 	start = bp->bio_offset & (stripesize - 1);
 	/* Start position in disk. */
 	/* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */
 	offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start;
 	/* Length of data to operate. */
 	length = MIN(bp->bio_length, stripesize - start);
 
 	/*
 	 * Do use "fast" mode when:
 	 * 1. "Fast" mode is ON.
 	 * and
 	 * 2. Request size is less than or equal to MAXPHYS,
 	 *    which should always be true.
 	 * and
 	 * 3. Request size is bigger than stripesize * ndisks. If it isn't,
 	 *    there will be no need to send more than one I/O request to
 	 *    a provider, so there is nothing to optmize.
 	 * and
 	 * 4. Request is not unmapped.
 	 * and
 	 * 5. It is not a BIO_DELETE.
 	 */
 	if (g_stripe_fast && bp->bio_length <= MAXPHYS &&
 	    bp->bio_length >= stripesize * sc->sc_ndisks &&
 	    (bp->bio_flags & BIO_UNMAPPED) == 0 &&
 	    bp->bio_cmd != BIO_DELETE) {
 		fast = 1;
 	}
 	error = 0;
 	if (fast) {
 		error = g_stripe_start_fast(bp, no, offset, length);
 		if (error != 0)
 			g_stripe_fast_failed++;
 	}
 	/*
 	 * Do use "economic" when:
 	 * 1. "Economic" mode is ON.
 	 * or
 	 * 2. "Fast" mode failed. It can only fail if there is no memory.
 	 */
 	if (!fast || error != 0)
 		error = g_stripe_start_economic(bp, no, offset, length);
 	if (error != 0) {
 		if (bp->bio_error == 0)
 			bp->bio_error = error;
 		g_io_deliver(bp, bp->bio_error);
 	}
 }
 
 static void
 g_stripe_check_and_run(struct g_stripe_softc *sc)
 {
 	struct g_provider *dp;
 	off_t mediasize, ms;
 	u_int no, sectorsize = 0;
 
 	g_topology_assert();
 	if (g_stripe_nvalid(sc) != sc->sc_ndisks)
 		return;
 
 	sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s",
 	    sc->sc_name);
 	sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
 	if (g_stripe_fast == 0)
 		sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED;
 	/*
 	 * Find the smallest disk.
 	 */
 	mediasize = sc->sc_disks[0]->provider->mediasize;
 	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
 		mediasize -= sc->sc_disks[0]->provider->sectorsize;
 	mediasize -= mediasize % sc->sc_stripesize;
 	sectorsize = sc->sc_disks[0]->provider->sectorsize;
 	for (no = 1; no < sc->sc_ndisks; no++) {
 		dp = sc->sc_disks[no]->provider;
 		ms = dp->mediasize;
 		if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
 			ms -= dp->sectorsize;
 		ms -= ms % sc->sc_stripesize;
 		if (ms < mediasize)
 			mediasize = ms;
 		sectorsize = lcm(sectorsize, dp->sectorsize);
 
 		/* A provider underneath us doesn't support unmapped */
 		if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
 			G_STRIPE_DEBUG(1, "Cancelling unmapped "
 			    "because of %s.", dp->name);
 			sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED;
 		}
 	}
 	sc->sc_provider->sectorsize = sectorsize;
 	sc->sc_provider->mediasize = mediasize * sc->sc_ndisks;
 	sc->sc_provider->stripesize = sc->sc_stripesize;
 	sc->sc_provider->stripeoffset = 0;
 	g_error_provider(sc->sc_provider, 0);
 
 	G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name);
 }
 
 static int
 g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	/* Decode metadata. */
 	stripe_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /*
  * Add disk to given device.
  */
 static int
 g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no)
 {
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	/* Metadata corrupted? */
 	if (no >= sc->sc_ndisks)
 		return (EINVAL);
 
 	/* Check if disk is not already attached. */
 	if (sc->sc_disks[no] != NULL)
 		return (EEXIST);
 
 	gp = sc->sc_geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	cp->private = NULL;
 	cp->index = no;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
 		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 		if (error != 0) {
 			g_detach(cp);
 			g_destroy_consumer(cp);
 			return (error);
 		}
 	}
 	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) {
 		struct g_stripe_metadata md;
 
 		/* Reread metadata. */
 		error = g_stripe_read_metadata(cp, &md);
 		if (error != 0)
 			goto fail;
 
 		if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 ||
 		    strcmp(md.md_name, sc->sc_name) != 0 ||
 		    md.md_id != sc->sc_id) {
 			G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name);
 			goto fail;
 		}
 	}
 
 	sc->sc_disks[no] = cp;
 	G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
 	g_stripe_check_and_run(sc);
 
 	return (0);
 fail:
 	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
 		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	return (error);
 }
 
 static struct g_geom *
 g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
     u_int type)
 {
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 	u_int no;
 
 	g_topology_assert();
 	G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
 	    md->md_id);
 
 	/* Two disks is minimum. */
 	if (md->md_all < 2) {
 		G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name);
 		return (NULL);
 	}
 #if 0
 	/* Stripe size have to be grater than or equal to sector size. */
 	if (md->md_stripesize < sectorsize) {
 		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
 		return (NULL);
 	}
 #endif
 	/* Stripe size have to be power of 2. */
 	if (!powerof2(md->md_stripesize)) {
 		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
 		return (NULL);
 	}
 
 	/* Check for duplicate unit */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
 			G_STRIPE_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO);
 	gp->start = g_stripe_start;
 	gp->spoiled = g_stripe_orphan;
 	gp->orphan = g_stripe_orphan;
 	gp->access = g_stripe_access;
 	gp->dumpconf = g_stripe_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_stripesize = md->md_stripesize;
 	sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1);
 	sc->sc_ndisks = md->md_all;
 	sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
 	    M_STRIPE, M_WAITOK | M_ZERO);
 	for (no = 0; no < sc->sc_ndisks; no++)
 		sc->sc_disks[no] = NULL;
 	sc->sc_type = type;
 	mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF);
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 
 	G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
 
 	return (gp);
 }
 
 static int
 g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp, *cp1;
 	struct g_geom *gp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_STRIPE_DEBUG(0, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_STRIPE_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	gp = sc->sc_geom;
 	LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
 		g_stripe_remove_disk(cp);
 		if (cp1 == NULL)
 			return (0);	/* Recursion happened. */
 	}
 	if (!LIST_EMPTY(&gp->consumer))
 		return (EINPROGRESS);
 
 	gp->softc = NULL;
 	KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
 	    gp->name));
 	free(sc->sc_disks, M_STRIPE);
 	mtx_destroy(&sc->sc_lock);
 	free(sc, M_STRIPE);
 	G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static int
 g_stripe_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_stripe_softc *sc;
 
 	sc = gp->softc;
 	return (g_stripe_destroy(sc, 0));
 }
 
 static struct g_geom *
 g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_stripe_metadata md;
 	struct g_stripe_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	/* Skip providers that are already open for writing. */
 	if (pp->acw > 0)
 		return (NULL);
 
 	G_STRIPE_DEBUG(3, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "stripe:taste");
 	gp->start = g_stripe_start;
 	gp->access = g_stripe_access;
 	gp->orphan = g_stripe_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_stripe_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_STRIPE_VERSION) {
 		printf("geom_stripe.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	/*
 	 * Backward compatibility:
 	 */
 	/* There was no md_provider field in earlier versions of metadata. */
 	if (md.md_version < 2)
 		bzero(md.md_provider, sizeof(md.md_provider));
 	/* There was no md_provsize field in earlier versions of metadata. */
 	if (md.md_version < 3)
 		md.md_provsize = pp->mediasize;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id)
 			continue;
 		break;
 	}
 	if (gp != NULL) {
 		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_stripe_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_STRIPE_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			return (NULL);
 		}
 	} else {
 		gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC);
 		if (gp == NULL) {
 			G_STRIPE_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 		error = g_stripe_add_disk(sc, pp, md.md_no);
 		if (error != 0) {
 			G_STRIPE_DEBUG(0,
 			    "Cannot add disk %s to %s (error=%d).", pp->name,
 			    gp->name, error);
 			g_stripe_destroy(sc, 1);
 			return (NULL);
 		}
 	}
 
 	return (gp);
 }
 
 static void
 g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp)
 {
 	u_int attached, no;
 	struct g_stripe_metadata md;
 	struct g_provider *pp;
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 	struct sbuf *sb;
 	intmax_t *stripesize;
 	const char *name;
 	char param[16];
 	int *nargs;
 
 	g_topology_assert();
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 2) {
 		gctl_error(req, "Too few arguments.");
 		return;
 	}
 
 	strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic));
 	md.md_version = G_STRIPE_VERSION;
 	name = gctl_get_asciiparam(req, "arg0");
 	if (name == NULL) {
 		gctl_error(req, "No 'arg%u' argument.", 0);
 		return;
 	}
 	strlcpy(md.md_name, name, sizeof(md.md_name));
 	md.md_id = arc4random();
 	md.md_no = 0;
 	md.md_all = *nargs - 1;
 	stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
 	if (stripesize == NULL) {
 		gctl_error(req, "No '%s' argument.", "stripesize");
 		return;
 	}
 	md.md_stripesize = *stripesize;
 	bzero(md.md_provider, sizeof(md.md_provider));
 	/* This field is not important here. */
 	md.md_provsize = 0;
 
 	/* Check all providers are valid */
 	for (no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", no);
 			return;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		if (pp == NULL) {
 			G_STRIPE_DEBUG(1, "Disk %s is invalid.", name);
 			gctl_error(req, "Disk %s is invalid.", name);
 			return;
 		}
 	}
 
 	gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL);
 	if (gp == NULL) {
 		gctl_error(req, "Can't configure %s.", md.md_name);
 		return;
 	}
 
 	sc = gp->softc;
 	sb = sbuf_new_auto();
 	sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
 	for (attached = 0, no = 1; no < *nargs; no++) {
 		snprintf(param, sizeof(param), "arg%u", no);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", no);
 			continue;
 		}
 		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
 			name += strlen("/dev/");
 		pp = g_provider_by_name(name);
 		KASSERT(pp != NULL, ("Provider %s disappear?!", name));
 		if (g_stripe_add_disk(sc, pp, no - 1) != 0) {
 			G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.",
 			    no, pp->name, gp->name);
 			sbuf_printf(sb, " %s", pp->name);
 			continue;
 		}
 		attached++;
 	}
 	sbuf_finish(sb);
 	if (md.md_all != attached) {
 		g_stripe_destroy(gp->softc, 1);
 		gctl_error(req, "%s", sbuf_data(sb));
 	}
 	sbuf_delete(sb);
 }
 
 static struct g_stripe_softc *
 g_stripe_find_device(struct g_class *mp, const char *name)
 {
 	struct g_stripe_softc *sc;
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(sc->sc_name, name) == 0)
 			return (sc);
 	}
 	return (NULL);
 }
 
 static void
 g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp)
 {
 	struct g_stripe_softc *sc;
 	int *force, *nargs, error;
 	const char *name;
 	char param[16];
 	u_int i;
 
 	g_topology_assert();
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "No '%s' argument.", "nargs");
 		return;
 	}
 	if (*nargs <= 0) {
 		gctl_error(req, "Missing device(s).");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "No '%s' argument.", "force");
 		return;
 	}
 
 	for (i = 0; i < (u_int)*nargs; i++) {
 		snprintf(param, sizeof(param), "arg%u", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%u' argument.", i);
 			return;
 		}
 		sc = g_stripe_find_device(mp, name);
 		if (sc == NULL) {
 			gctl_error(req, "No such device: %s.", name);
 			return;
 		}
 		error = g_stripe_destroy(sc, *force);
 		if (error != 0) {
 			gctl_error(req, "Cannot destroy device %s (error=%d).",
 			    sc->sc_name, error);
 			return;
 		}
 	}
 }
 
 static void
 g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "No '%s' argument.", "version");
 		return;
 	}
 	if (*version != G_STRIPE_VERSION) {
 		gctl_error(req, "Userland and kernel parts are out of sync.");
 		return;
 	}
 
 	if (strcmp(verb, "create") == 0) {
 		g_stripe_ctl_create(req, mp);
 		return;
 	} else if (strcmp(verb, "destroy") == 0 ||
 	    strcmp(verb, "stop") == 0) {
 		g_stripe_ctl_destroy(req, mp);
 		return;
 	}
 
 	gctl_error(req, "Unknown verb.");
 }
 
 static void
 g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_stripe_softc *sc;
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
 		    (u_int)cp->index);
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<Stripesize>%u</Stripesize>\n", indent,
 		    (u_int)sc->sc_stripesize);
 		sbuf_printf(sb, "%s<Type>", indent);
 		switch (sc->sc_type) {
 		case G_STRIPE_TYPE_AUTOMATIC:
 			sbuf_printf(sb, "AUTOMATIC");
 			break;
 		case G_STRIPE_TYPE_MANUAL:
 			sbuf_printf(sb, "MANUAL");
 			break;
 		default:
 			sbuf_printf(sb, "UNKNOWN");
 			break;
 		}
 		sbuf_printf(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
 		    indent, sc->sc_ndisks, g_stripe_nvalid(sc));
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
 			sbuf_printf(sb, "UP");
 		else
 			sbuf_printf(sb, "DOWN");
 		sbuf_printf(sb, "</State>\n");
 	}
 }
 
 DECLARE_GEOM_CLASS(g_stripe_class, g_stripe);
+MODULE_VERSION(geom_stripe, 0);
Index: stable/11/sys/geom/uzip/g_uzip.c
===================================================================
--- stable/11/sys/geom/uzip/g_uzip.c	(revision 332639)
+++ stable/11/sys/geom/uzip/g_uzip.c	(revision 332640)
@@ -1,901 +1,902 @@
 /*-
  * Copyright (c) 2004 Max Khon
  * Copyright (c) 2014 Juniper Networks, Inc.
  * Copyright (c) 2006-2016 Maxim Sobolev <sobomax@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/kthread.h>
 
 #include <geom/geom.h>
 
 #include <geom/uzip/g_uzip.h>
 #include <geom/uzip/g_uzip_cloop.h>
 #include <geom/uzip/g_uzip_softc.h>
 #include <geom/uzip/g_uzip_dapi.h>
 #include <geom/uzip/g_uzip_zlib.h>
 #include <geom/uzip/g_uzip_lzma.h>
 #include <geom/uzip/g_uzip_wrkthr.h>
 
 #include "opt_geom.h"
 
 MALLOC_DEFINE(M_GEOM_UZIP, "geom_uzip", "GEOM UZIP data structures");
 
 FEATURE(geom_uzip, "GEOM read-only compressed disks support");
 
 struct g_uzip_blk {
         uint64_t offset;
         uint32_t blen;
         unsigned char last:1;
         unsigned char padded:1;
 #define BLEN_UNDEF      UINT32_MAX
 };
 
 #ifndef ABS
 #define	ABS(a)			((a) < 0 ? -(a) : (a))
 #endif
 
 #define BLK_IN_RANGE(mcn, bcn, ilen)	\
     (((bcn) != BLEN_UNDEF) && ( \
 	((ilen) >= 0 && (mcn >= bcn) && (mcn <= ((intmax_t)(bcn) + (ilen)))) || \
 	((ilen) < 0 && (mcn <= bcn) && (mcn >= ((intmax_t)(bcn) + (ilen)))) \
     ))
 
 #ifdef GEOM_UZIP_DEBUG
 # define GEOM_UZIP_DBG_DEFAULT	3
 #else
 # define GEOM_UZIP_DBG_DEFAULT	0
 #endif
 
 #define	GUZ_DBG_ERR	1
 #define	GUZ_DBG_INFO	2
 #define	GUZ_DBG_IO	3
 #define	GUZ_DBG_TOC	4
 
 #define	GUZ_DEV_SUFX	".uzip"
 #define	GUZ_DEV_NAME(p)	(p GUZ_DEV_SUFX)
 
 static char g_uzip_attach_to[MAXPATHLEN] = {"*"};
 static char g_uzip_noattach_to[MAXPATHLEN] = {GUZ_DEV_NAME("*")};
 TUNABLE_STR("kern.geom.uzip.attach_to", g_uzip_attach_to,
     sizeof(g_uzip_attach_to));
 TUNABLE_STR("kern.geom.uzip.noattach_to", g_uzip_noattach_to,
     sizeof(g_uzip_noattach_to));
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, uzip, CTLFLAG_RW, 0, "GEOM_UZIP stuff");
 static u_int g_uzip_debug = GEOM_UZIP_DBG_DEFAULT;
 SYSCTL_UINT(_kern_geom_uzip, OID_AUTO, debug, CTLFLAG_RWTUN, &g_uzip_debug, 0,
     "Debug level (0-4)");
 static u_int g_uzip_debug_block = BLEN_UNDEF;
 SYSCTL_UINT(_kern_geom_uzip, OID_AUTO, debug_block, CTLFLAG_RWTUN,
     &g_uzip_debug_block, 0, "Debug operations around specific cluster#");
 
 #define	DPRINTF(lvl, a)		\
 	if ((lvl) <= g_uzip_debug) { \
 		printf a; \
 	}
 #define	DPRINTF_BLK(lvl, cn, a)	\
 	if ((lvl) <= g_uzip_debug || \
 	    BLK_IN_RANGE(cn, g_uzip_debug_block, 8) || \
 	    BLK_IN_RANGE(cn, g_uzip_debug_block, -8)) { \
 		printf a; \
 	}
 #define	DPRINTF_BRNG(lvl, bcn, ecn, a) \
 	KASSERT(bcn < ecn, ("DPRINTF_BRNG: invalid range (%ju, %ju)", \
 	    (uintmax_t)bcn, (uintmax_t)ecn)); \
 	if (((lvl) <= g_uzip_debug) || \
 	    BLK_IN_RANGE(g_uzip_debug_block, bcn, \
 	     (intmax_t)ecn - (intmax_t)bcn)) { \
 		printf a; \
 	}
 
 #define	UZIP_CLASS_NAME	"UZIP"
 
 /*
  * Maximum allowed valid block size (to prevent foot-shooting)
  */
 #define	MAX_BLKSZ	(MAXPHYS)
 
 static char CLOOP_MAGIC_START[] = "#!/bin/sh\n";
 
 static void g_uzip_read_done(struct bio *bp);
 static void g_uzip_do(struct g_uzip_softc *, struct bio *bp);
 
 static void
 g_uzip_softc_free(struct g_uzip_softc *sc, struct g_geom *gp)
 {
 
 	if (gp != NULL) {
 		DPRINTF(GUZ_DBG_INFO, ("%s: %d requests, %d cached\n",
 		    gp->name, sc->req_total, sc->req_cached));
 	}
 
 	mtx_lock(&sc->queue_mtx);
 	sc->wrkthr_flags |= GUZ_SHUTDOWN;
 	wakeup(sc);
 	while (!(sc->wrkthr_flags & GUZ_EXITING)) {
 		msleep(sc->procp, &sc->queue_mtx, PRIBIO, "guzfree",
 		    hz / 10);
 	}
 	mtx_unlock(&sc->queue_mtx);
 
 	sc->dcp->free(sc->dcp);
 	free(sc->toc, M_GEOM_UZIP);
 	mtx_destroy(&sc->queue_mtx);
 	mtx_destroy(&sc->last_mtx);
 	free(sc->last_buf, M_GEOM_UZIP);
 	free(sc, M_GEOM_UZIP);
 }
 
 static int
 g_uzip_cached(struct g_geom *gp, struct bio *bp)
 {
 	struct g_uzip_softc *sc;
 	off_t ofs;
 	size_t blk, blkofs, usz;
 
 	sc = gp->softc;
 	ofs = bp->bio_offset + bp->bio_completed;
 	blk = ofs / sc->blksz;
 	mtx_lock(&sc->last_mtx);
 	if (blk == sc->last_blk) {
 		blkofs = ofs % sc->blksz;
 		usz = sc->blksz - blkofs;
 		if (bp->bio_resid < usz)
 			usz = bp->bio_resid;
 		memcpy(bp->bio_data + bp->bio_completed, sc->last_buf + blkofs,
 		    usz);
 		sc->req_cached++;
 		mtx_unlock(&sc->last_mtx);
 
 		DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: offset=%jd: got %jd bytes "
 		    "from cache\n", __func__, gp->name, bp, (intmax_t)ofs,
 		    (intmax_t)usz));
 
 		bp->bio_completed += usz;
 		bp->bio_resid -= usz;
 
 		if (bp->bio_resid == 0) {
 			g_io_deliver(bp, 0);
 			return (1);
 		}
 	} else
 		mtx_unlock(&sc->last_mtx);
 
 	return (0);
 }
 
 #define BLK_ENDS(sc, bi)	((sc)->toc[(bi)].offset + \
     (sc)->toc[(bi)].blen)
 
 #define BLK_IS_CONT(sc, bi)	(BLK_ENDS((sc), (bi) - 1) == \
     (sc)->toc[(bi)].offset)
 #define	BLK_IS_NIL(sc, bi)	((sc)->toc[(bi)].blen == 0)
 
 #define TOFF_2_BOFF(sc, pp, bi)	    ((sc)->toc[(bi)].offset - \
     (sc)->toc[(bi)].offset % (pp)->sectorsize)
 #define	TLEN_2_BLEN(sc, pp, bp, ei) roundup(BLK_ENDS((sc), (ei)) - \
     (bp)->bio_offset, (pp)->sectorsize)
 
 static int
 g_uzip_request(struct g_geom *gp, struct bio *bp)
 {
 	struct g_uzip_softc *sc;
 	struct bio *bp2;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	off_t ofs, start_blk_ofs;
 	size_t i, start_blk, end_blk, zsize;
 
 	if (g_uzip_cached(gp, bp) != 0)
 		return (1);
 
 	sc = gp->softc;
 
 	cp = LIST_FIRST(&gp->consumer);
 	pp = cp->provider;
 
 	ofs = bp->bio_offset + bp->bio_completed;
 	start_blk = ofs / sc->blksz;
 	KASSERT(start_blk < sc->nblocks, ("start_blk out of range"));
 	end_blk = howmany(ofs + bp->bio_resid, sc->blksz);
 	KASSERT(end_blk <= sc->nblocks, ("end_blk out of range"));
 
 	for (; BLK_IS_NIL(sc, start_blk) && start_blk < end_blk; start_blk++) {
 		/* Fill in any leading Nil blocks */
 		start_blk_ofs = ofs % sc->blksz;
 		zsize = MIN(sc->blksz - start_blk_ofs, bp->bio_resid);
 		DPRINTF_BLK(GUZ_DBG_IO, start_blk, ("%s/%s: %p/%ju: "
 		    "filling %ju zero bytes\n", __func__, gp->name, gp,
 		    (uintmax_t)bp->bio_completed, (uintmax_t)zsize));
 		bzero(bp->bio_data + bp->bio_completed, zsize);
 		bp->bio_completed += zsize;
 		bp->bio_resid -= zsize;
 		ofs += zsize;
 	}
 
 	if (start_blk == end_blk) {
 		KASSERT(bp->bio_resid == 0, ("bp->bio_resid is invalid"));
 		/*
 		 * No non-Nil data is left, complete request immediately.
 		 */
 		DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: all done returning %ju "
 		    "bytes\n", __func__, gp->name, gp,
 		    (uintmax_t)bp->bio_completed));
 		g_io_deliver(bp, 0);
 		return (1);
 	}
 
 	for (i = start_blk + 1; i < end_blk; i++) {
 		/* Trim discontinuous areas if any */
 		if (!BLK_IS_CONT(sc, i)) {
 			end_blk = i;
 			break;
 		}
 	}
 
 	DPRINTF_BRNG(GUZ_DBG_IO, start_blk, end_blk, ("%s/%s: %p: "
 	    "start=%u (%ju[%jd]), end=%u (%ju)\n", __func__, gp->name, bp,
 	    (u_int)start_blk, (uintmax_t)sc->toc[start_blk].offset,
 	    (intmax_t)sc->toc[start_blk].blen,
 	    (u_int)end_blk, (uintmax_t)BLK_ENDS(sc, end_blk - 1)));
 
 	bp2 = g_clone_bio(bp);
 	if (bp2 == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return (1);
 	}
 	bp2->bio_done = g_uzip_read_done;
 
 	bp2->bio_offset = TOFF_2_BOFF(sc, pp, start_blk);
 	while (1) {
 		bp2->bio_length = TLEN_2_BLEN(sc, pp, bp2, end_blk - 1);
 		if (bp2->bio_length <= MAXPHYS) {
 			break;
 		}
 		if (end_blk == (start_blk + 1)) {
 			break;
 		}
 		end_blk--;
 	}
 
 	DPRINTF(GUZ_DBG_IO, ("%s/%s: bp2->bio_length = %jd, "
 	    "bp2->bio_offset = %jd\n", __func__, gp->name,
 	    (intmax_t)bp2->bio_length, (intmax_t)bp2->bio_offset));
 
 	bp2->bio_data = malloc(bp2->bio_length, M_GEOM_UZIP, M_NOWAIT);
 	if (bp2->bio_data == NULL) {
 		g_destroy_bio(bp2);
 		g_io_deliver(bp, ENOMEM);
 		return (1);
 	}
 
 	DPRINTF_BRNG(GUZ_DBG_IO, start_blk, end_blk, ("%s/%s: %p: "
 	    "reading %jd bytes from offset %jd\n", __func__, gp->name, bp,
 	    (intmax_t)bp2->bio_length, (intmax_t)bp2->bio_offset));
 
 	g_io_request(bp2, cp);
 	return (0);
 }
 
 static void
 g_uzip_read_done(struct bio *bp)
 {
 	struct bio *bp2;
 	struct g_geom *gp;
 	struct g_uzip_softc *sc;
 
 	bp2 = bp->bio_parent;
 	gp = bp2->bio_to->geom;
 	sc = gp->softc;
 
 	mtx_lock(&sc->queue_mtx);
 	bioq_disksort(&sc->bio_queue, bp);
 	mtx_unlock(&sc->queue_mtx);
 	wakeup(sc);
 }
 
 static int
 g_uzip_memvcmp(const void *memory, unsigned char val, size_t size)
 {
 	const u_char *mm;
 
 	mm = (const u_char *)memory;
 	return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
 }
 
 static void
 g_uzip_do(struct g_uzip_softc *sc, struct bio *bp)
 {
 	struct bio *bp2;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	char *data, *data2;
 	off_t ofs;
 	size_t blk, blkofs, len, ulen, firstblk;
 	int err;
 
 	bp2 = bp->bio_parent;
 	gp = bp2->bio_to->geom;
 
 	cp = LIST_FIRST(&gp->consumer);
 	pp = cp->provider;
 
 	bp2->bio_error = bp->bio_error;
 	if (bp2->bio_error != 0)
 		goto done;
 
 	/* Make sure there's forward progress. */
 	if (bp->bio_completed == 0) {
 		bp2->bio_error = ECANCELED;
 		goto done;
 	}
 
 	ofs = bp2->bio_offset + bp2->bio_completed;
 	firstblk = blk = ofs / sc->blksz;
 	blkofs = ofs % sc->blksz;
 	data = bp->bio_data + sc->toc[blk].offset % pp->sectorsize;
 	data2 = bp2->bio_data + bp2->bio_completed;
 	while (bp->bio_completed && bp2->bio_resid) {
 		if (blk > firstblk && !BLK_IS_CONT(sc, blk)) {
 			DPRINTF_BLK(GUZ_DBG_IO, blk, ("%s/%s: %p: backref'ed "
 			    "cluster #%u requested, looping around\n",
 			    __func__, gp->name, bp2, (u_int)blk));
 			goto done;
 		}
 		ulen = MIN(sc->blksz - blkofs, bp2->bio_resid);
 		len = sc->toc[blk].blen;
 		DPRINTF(GUZ_DBG_IO, ("%s/%s: %p/%ju: data2=%p, ulen=%u, "
 		    "data=%p, len=%u\n", __func__, gp->name, gp,
 		    bp->bio_completed, data2, (u_int)ulen, data, (u_int)len));
 		if (len == 0) {
 			/* All zero block: no cache update */
 zero_block:
 			bzero(data2, ulen);
 		} else if (len <= bp->bio_completed) {
 			mtx_lock(&sc->last_mtx);
 			err = sc->dcp->decompress(sc->dcp, gp->name, data,
 			    len, sc->last_buf);
 			if (err != 0 && sc->toc[blk].last != 0) {
 				/*
 				 * Last block decompression has failed, check
 				 * if it's just zero padding.
 				 */
 				if (g_uzip_memvcmp(data, '\0', len) == 0) {
 					sc->toc[blk].blen = 0;
 					sc->last_blk = -1;
 					mtx_unlock(&sc->last_mtx);
 					len = 0;
 					goto zero_block;
 				}
 			}
 			if (err != 0) {
 				sc->last_blk = -1;
 				mtx_unlock(&sc->last_mtx);
 				bp2->bio_error = EILSEQ;
 				DPRINTF(GUZ_DBG_ERR, ("%s/%s: decompress"
 				    "(%p, %ju, %ju) failed\n", __func__,
 				    gp->name, sc->dcp, (uintmax_t)blk,
 				    (uintmax_t)len));
 				goto done;
 			}
 			sc->last_blk = blk;
 			memcpy(data2, sc->last_buf + blkofs, ulen);
 			mtx_unlock(&sc->last_mtx);
 			err = sc->dcp->rewind(sc->dcp, gp->name);
 			if (err != 0) {
 				bp2->bio_error = EILSEQ;
 				DPRINTF(GUZ_DBG_ERR, ("%s/%s: rewind(%p) "
 				    "failed\n", __func__, gp->name, sc->dcp));
 				goto done;
 			}
 			data += len;
 		} else
 			break;
 
 		data2 += ulen;
 		bp2->bio_completed += ulen;
 		bp2->bio_resid -= ulen;
 		bp->bio_completed -= len;
 		blkofs = 0;
 		blk++;
 	}
 
 done:
 	/* Finish processing the request. */
 	free(bp->bio_data, M_GEOM_UZIP);
 	g_destroy_bio(bp);
 	if (bp2->bio_error != 0 || bp2->bio_resid == 0)
 		g_io_deliver(bp2, bp2->bio_error);
 	else
 		g_uzip_request(gp, bp2);
 }
 
 static void
 g_uzip_start(struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	struct g_uzip_softc *sc;
 
 	pp = bp->bio_to;
 	gp = pp->geom;
 
 	DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: cmd=%d, offset=%jd, length=%jd, "
 	    "buffer=%p\n", __func__, gp->name, bp, bp->bio_cmd,
 	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length, bp->bio_data));
 
 	sc = gp->softc;
 	sc->req_total++;
 
 	if (bp->bio_cmd != BIO_READ) {
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	bp->bio_resid = bp->bio_length;
 	bp->bio_completed = 0;
 
 	g_uzip_request(gp, bp);
 }
 
 static void
 g_uzip_orphan(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 
 	g_trace(G_T_TOPOLOGY, "%s(%p/%s)", __func__, cp, cp->provider->name);
 	g_topology_assert();
 
 	gp = cp->geom;
 	g_uzip_softc_free(gp->softc, gp);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 }
 
 static int
 g_uzip_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	KASSERT (cp != NULL, ("g_uzip_access but no consumer"));
 
 	if (cp->acw + dw > 0)
 		return (EROFS);
 
 	return (g_access(cp, dr, dw, de));
 }
 
 static void
 g_uzip_spoiled(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 
 	G_VALID_CONSUMER(cp);
 	gp = cp->geom;
 	g_trace(G_T_TOPOLOGY, "%s(%p/%s)", __func__, cp, gp->name);
 	g_topology_assert();
 
 	g_uzip_softc_free(gp->softc, gp);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 }
 
 static int
 g_uzip_parse_toc(struct g_uzip_softc *sc, struct g_provider *pp,
     struct g_geom *gp)
 {
 	uint32_t i, j, backref_to;
 	uint64_t max_offset, min_offset;
 	struct g_uzip_blk *last_blk;
 
 	min_offset = sizeof(struct cloop_header) +
 	    (sc->nblocks + 1) * sizeof(uint64_t);
 	max_offset = sc->toc[0].offset - 1;
 	last_blk = &sc->toc[0];
 	for (i = 0; i < sc->nblocks; i++) {
 		/* First do some bounds checking */
 		if ((sc->toc[i].offset < min_offset) ||
 		    (sc->toc[i].offset > pp->mediasize)) {
 			goto error_offset;
 		}
 		DPRINTF_BLK(GUZ_DBG_IO, i, ("%s: cluster #%u "
 		    "offset=%ju max_offset=%ju\n", gp->name,
 		    (u_int)i, (uintmax_t)sc->toc[i].offset,
 		    (uintmax_t)max_offset));
 		backref_to = BLEN_UNDEF;
 		if (sc->toc[i].offset < max_offset) {
 			/*
 			 * For the backref'ed blocks search already parsed
 			 * TOC entries for the matching offset and copy the
 			 * size from matched entry.
 			 */
 			for (j = 0; j <= i; j++) {
                                 if (sc->toc[j].offset == sc->toc[i].offset &&
 				    !BLK_IS_NIL(sc, j)) {
                                         break;
                                 }
                                 if (j != i) {
 					continue;
 				}
 				DPRINTF(GUZ_DBG_ERR, ("%s: cannot match "
 				    "backref'ed offset at cluster #%u\n",
 				    gp->name, i));
 				return (-1);
 			}
 			sc->toc[i].blen = sc->toc[j].blen;
 			backref_to = j;
 		} else {
 			last_blk = &sc->toc[i];
 			/*
 			 * For the "normal blocks" seek forward until we hit
 			 * block whose offset is larger than ours and assume
 			 * it's going to be the next one.
 			 */
 			for (j = i + 1; j < sc->nblocks; j++) {
 				if (sc->toc[j].offset > max_offset) {
 					break;
 				}
 			}
 			sc->toc[i].blen = sc->toc[j].offset -
 			    sc->toc[i].offset;
 			if (BLK_ENDS(sc, i) > pp->mediasize) {
 				DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u "
 				    "extends past media boundary (%ju > %ju)\n",
 				    gp->name, (u_int)i,
 				    (uintmax_t)BLK_ENDS(sc, i),
 				    (intmax_t)pp->mediasize));
 				return (-1);
 			}
 			KASSERT(max_offset <= sc->toc[i].offset, (
 			    "%s: max_offset is incorrect: %ju",
 			    gp->name, (uintmax_t)max_offset));
 			max_offset = BLK_ENDS(sc, i) - 1;
 		}
 		DPRINTF_BLK(GUZ_DBG_TOC, i, ("%s: cluster #%u, original %u "
 		    "bytes, in %u bytes", gp->name, i, sc->blksz,
 		    sc->toc[i].blen));
 		if (backref_to != BLEN_UNDEF) {
 			DPRINTF_BLK(GUZ_DBG_TOC, i, (" (->#%u)",
 			    (u_int)backref_to));
 		}
 		DPRINTF_BLK(GUZ_DBG_TOC, i, ("\n"));
 	}
 	last_blk->last = 1;
 	/* Do a second pass to validate block lengths */
 	for (i = 0; i < sc->nblocks; i++) {
 		if (sc->toc[i].blen > sc->dcp->max_blen) {
 			if (sc->toc[i].last == 0) {
 				DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u "
 				    "length (%ju) exceeds "
 				    "max_blen (%ju)\n", gp->name, i,
 				    (uintmax_t)sc->toc[i].blen,
 				    (uintmax_t)sc->dcp->max_blen));
 				return (-1);
 			}
 			DPRINTF(GUZ_DBG_INFO, ("%s: cluster #%u extra "
 			    "padding is detected, trimmed to %ju\n",
 			    gp->name, i, (uintmax_t)sc->dcp->max_blen));
 			    sc->toc[i].blen = sc->dcp->max_blen;
 			sc->toc[i].padded = 1;
 		}
 	}
 	return (0);
 
 error_offset:
 	DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u: invalid offset %ju, "
 	    "min_offset=%ju mediasize=%jd\n", gp->name, (u_int)i,
 	    sc->toc[i].offset, min_offset, pp->mediasize));
 	return (-1);
 }
 
 static struct g_geom *
 g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	int error;
 	uint32_t i, total_offsets, offsets_read, blk;
 	void *buf;
 	struct cloop_header *header;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	struct g_provider *pp2;
 	struct g_uzip_softc *sc;
 	enum {
 		G_UZIP = 1,
 		G_ULZMA
 	} type;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	/* Skip providers that are already open for writing. */
 	if (pp->acw > 0)
 		return (NULL);
 
 	if ((fnmatch(g_uzip_attach_to, pp->name, 0) != 0) ||
 	    (fnmatch(g_uzip_noattach_to, pp->name, 0) == 0)) {
 		DPRINTF(GUZ_DBG_INFO, ("%s(%s,%s), ignoring\n", __func__,
 		    mp->name, pp->name));
 		return (NULL);
 	}
 
 	buf = NULL;
 
 	/*
 	 * Create geom instance.
 	 */
 	gp = g_new_geomf(mp, GUZ_DEV_NAME("%s"), pp->name);
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	if (error == 0)
 		error = g_access(cp, 1, 0, 0);
 	if (error) {
 		goto e1;
 	}
 	g_topology_unlock();
 
 	/*
 	 * Read cloop header, look for CLOOP magic, perform
 	 * other validity checks.
 	 */
 	DPRINTF(GUZ_DBG_INFO, ("%s: media sectorsize %u, mediasize %jd\n",
 	    gp->name, pp->sectorsize, (intmax_t)pp->mediasize));
 	buf = g_read_data(cp, 0, pp->sectorsize, NULL);
 	if (buf == NULL)
 		goto e2;
 	header = (struct cloop_header *) buf;
 	if (strncmp(header->magic, CLOOP_MAGIC_START,
 	    sizeof(CLOOP_MAGIC_START) - 1) != 0) {
 		DPRINTF(GUZ_DBG_ERR, ("%s: no CLOOP magic\n", gp->name));
 		goto e3;
 	}
 
 	switch (header->magic[CLOOP_OFS_COMPR]) {
 	case CLOOP_COMP_LZMA:
 	case CLOOP_COMP_LZMA_DDP:
 		type = G_ULZMA;
 		if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_LZMA) {
 			DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n",
 			    gp->name));
 			goto e3;
 		}
 		DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_LZMA image found\n",
 		    gp->name));
 		break;
 	case CLOOP_COMP_LIBZ:
 	case CLOOP_COMP_LIBZ_DDP:
 		type = G_UZIP;
 		if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_ZLIB) {
 			DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n",
 			    gp->name));
 			goto e3;
 		}
 		DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_ZLIB image found\n",
 		    gp->name));
 		break;
 	default:
 		DPRINTF(GUZ_DBG_ERR, ("%s: unsupported image type\n",
 		    gp->name));
                 goto e3;
         }
 
 	/*
 	 * Initialize softc and read offsets.
 	 */
 	sc = malloc(sizeof(*sc), M_GEOM_UZIP, M_WAITOK | M_ZERO);
 	gp->softc = sc;
 	sc->blksz = ntohl(header->blksz);
 	sc->nblocks = ntohl(header->nblocks);
 	if (sc->blksz % 512 != 0) {
 		printf("%s: block size (%u) should be multiple of 512.\n",
 		    gp->name, sc->blksz);
 		goto e4;
 	}
 	if (sc->blksz > MAX_BLKSZ) {
 		printf("%s: block size (%u) should not be larger than %d.\n",
 		    gp->name, sc->blksz, MAX_BLKSZ);
 	}
 	total_offsets = sc->nblocks + 1;
 	if (sizeof(struct cloop_header) +
 	    total_offsets * sizeof(uint64_t) > pp->mediasize) {
 		printf("%s: media too small for %u blocks\n",
 		    gp->name, sc->nblocks);
 		goto e4;
 	}
 	sc->toc = malloc(total_offsets * sizeof(struct g_uzip_blk),
 	    M_GEOM_UZIP, M_WAITOK | M_ZERO);
 	offsets_read = MIN(total_offsets,
 	    (pp->sectorsize - sizeof(*header)) / sizeof(uint64_t));
 	for (i = 0; i < offsets_read; i++) {
 		sc->toc[i].offset = be64toh(((uint64_t *) (header + 1))[i]);
 		sc->toc[i].blen = BLEN_UNDEF;
 	}
 	DPRINTF(GUZ_DBG_INFO, ("%s: %u offsets in the first sector\n",
 	       gp->name, offsets_read));
 	for (blk = 1; offsets_read < total_offsets; blk++) {
 		uint32_t nread;
 
 		free(buf, M_GEOM);
 		buf = g_read_data(
 		    cp, blk * pp->sectorsize, pp->sectorsize, NULL);
 		if (buf == NULL)
 			goto e5;
 		nread = MIN(total_offsets - offsets_read,
 		     pp->sectorsize / sizeof(uint64_t));
 		DPRINTF(GUZ_DBG_TOC, ("%s: %u offsets read from sector %d\n",
 		    gp->name, nread, blk));
 		for (i = 0; i < nread; i++) {
 			sc->toc[offsets_read + i].offset =
 			    be64toh(((uint64_t *) buf)[i]);
 			sc->toc[offsets_read + i].blen = BLEN_UNDEF;
 		}
 		offsets_read += nread;
 	}
 	free(buf, M_GEOM);
 	buf = NULL;
 	offsets_read -= 1;
 	DPRINTF(GUZ_DBG_INFO, ("%s: done reading %u block offsets from %u "
 	    "sectors\n", gp->name, offsets_read, blk));
 	if (sc->nblocks != offsets_read) {
 		DPRINTF(GUZ_DBG_ERR, ("%s: read %s offsets than expected "
 		    "blocks\n", gp->name,
 		    sc->nblocks < offsets_read ? "more" : "less"));
 		goto e5;
 	}
 
 	if (type == G_UZIP) {
 		sc->dcp = g_uzip_zlib_ctor(sc->blksz);
 	} else {
 		sc->dcp = g_uzip_lzma_ctor(sc->blksz);
 	}
 	if (sc->dcp == NULL) {
 		goto e5;
 	}
 
 	/*
 	 * "Fake" last+1 block, to make it easier for the TOC parser to
 	 * iterate without making the last element a special case.
 	 */
 	sc->toc[sc->nblocks].offset = pp->mediasize;
 	/* Massage TOC (table of contents), make sure it is sound */
 	if (g_uzip_parse_toc(sc, pp, gp) != 0) {
 		DPRINTF(GUZ_DBG_ERR, ("%s: TOC error\n", gp->name));
 		goto e6;
 	}
 	mtx_init(&sc->last_mtx, "geom_uzip cache", NULL, MTX_DEF);
 	mtx_init(&sc->queue_mtx, "geom_uzip wrkthread", NULL, MTX_DEF);
 	bioq_init(&sc->bio_queue);
 	sc->last_blk = -1;
 	sc->last_buf = malloc(sc->blksz, M_GEOM_UZIP, M_WAITOK);
 	sc->req_total = 0;
 	sc->req_cached = 0;
 
 	sc->uzip_do = &g_uzip_do;
 
 	error = kproc_create(g_uzip_wrkthr, sc, &sc->procp, 0, 0, "%s",
 	    gp->name);
 	if (error != 0) {
 		goto e7;
 	}
 
 	g_topology_lock();
 	pp2 = g_new_providerf(gp, "%s", gp->name);
 	pp2->sectorsize = 512;
 	pp2->mediasize = (off_t)sc->nblocks * sc->blksz;
 	pp2->stripesize = pp->stripesize;
 	pp2->stripeoffset = pp->stripeoffset;
 	g_error_provider(pp2, 0);
 	g_access(cp, -1, 0, 0);
 
 	DPRINTF(GUZ_DBG_INFO, ("%s: taste ok (%d, %jd), (%d, %d), %x\n",
 	    gp->name, pp2->sectorsize, (intmax_t)pp2->mediasize,
 	    pp2->stripeoffset, pp2->stripesize, pp2->flags));
 	DPRINTF(GUZ_DBG_INFO, ("%s: %u x %u blocks\n", gp->name, sc->nblocks,
 	    sc->blksz));
 	return (gp);
 
 e7:
 	free(sc->last_buf, M_GEOM);
 	mtx_destroy(&sc->queue_mtx);
 	mtx_destroy(&sc->last_mtx);
 e6:
 	sc->dcp->free(sc->dcp);
 e5:
 	free(sc->toc, M_GEOM);
 e4:
 	free(gp->softc, M_GEOM_UZIP);
 e3:
 	if (buf != NULL) {
 		free(buf, M_GEOM);
 	}
 e2:
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 e1:
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 
 	return (NULL);
 }
 
 static int
 g_uzip_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 	struct g_provider *pp;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, gp->name);
 	g_topology_assert();
 
 	if (gp->softc == NULL) {
 		DPRINTF(GUZ_DBG_ERR, ("%s(%s): gp->softc == NULL\n", __func__,
 		    gp->name));
 		return (ENXIO);
 	}
 
 	KASSERT(gp != NULL, ("NULL geom"));
 	pp = LIST_FIRST(&gp->provider);
 	KASSERT(pp != NULL, ("NULL provider"));
 	if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
 		return (EBUSY);
 
 	g_uzip_softc_free(gp->softc, gp);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 static struct g_class g_uzip_class = {
 	.name = UZIP_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_uzip_taste,
 	.destroy_geom = g_uzip_destroy_geom,
 
 	.start = g_uzip_start,
 	.orphan = g_uzip_orphan,
 	.access = g_uzip_access,
 	.spoiled = g_uzip_spoiled,
 };
 
 DECLARE_GEOM_CLASS(g_uzip_class, g_uzip);
 MODULE_DEPEND(g_uzip, zlib, 1, 1, 1);
+MODULE_VERSION(geom_uzip, 0);
Index: stable/11/sys/geom/vinum/geom_vinum.c
===================================================================
--- stable/11/sys/geom/vinum/geom_vinum.c	(revision 332639)
+++ stable/11/sys/geom/vinum/geom_vinum.c	(revision 332640)
@@ -1,1048 +1,1049 @@
 /*-
  *  Copyright (c) 2004, 2007 Lukas Ertl
  *  Copyright (c) 2007, 2009 Ulf Lilleengen
  *  All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum.h>
 #include <geom/vinum/geom_vinum_raid5.h>
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, vinum, CTLFLAG_RW, 0,
     "GEOM_VINUM stuff");
 u_int g_vinum_debug = 0;
 SYSCTL_UINT(_kern_geom_vinum, OID_AUTO, debug, CTLFLAG_RWTUN, &g_vinum_debug, 0,
     "Debug level");
 
 static int	gv_create(struct g_geom *, struct gctl_req *);
 static void	gv_attach(struct gv_softc *, struct gctl_req *);
 static void	gv_detach(struct gv_softc *, struct gctl_req *);
 static void	gv_parityop(struct gv_softc *, struct gctl_req *);
 
 
 static void
 gv_orphan(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 	struct gv_drive *d;
 	
 	g_topology_assert();
 
 	KASSERT(cp != NULL, ("gv_orphan: null cp"));
 	gp = cp->geom;
 	KASSERT(gp != NULL, ("gv_orphan: null gp"));
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("gv_orphan: null sc"));
 	d = cp->private;
 	KASSERT(d != NULL, ("gv_orphan: null d"));
 
 	g_trace(G_T_TOPOLOGY, "gv_orphan(%s)", gp->name);
 
 	gv_post_event(sc, GV_EVENT_DRIVE_LOST, d, NULL, 0, 0);
 }
 
 void
 gv_start(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 	
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_GETATTR:
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	mtx_lock(&sc->bqueue_mtx);
 	bioq_disksort(sc->bqueue_down, bp);
 	wakeup(sc);
 	mtx_unlock(&sc->bqueue_mtx);
 }
 
 void
 gv_done(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 	
 	KASSERT(bp != NULL, ("NULL bp"));
 
 	gp = bp->bio_from->geom;
 	sc = gp->softc;
 
 	mtx_lock(&sc->bqueue_mtx);
 	bioq_disksort(sc->bqueue_up, bp);
 	wakeup(sc);
 	mtx_unlock(&sc->bqueue_mtx);
 }
 
 int
 gv_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 	struct gv_drive *d, *d2;
 	int error;
 	
 	gp = pp->geom;
 	sc = gp->softc;
 	/*
 	 * We want to modify the read count with the write count in case we have
 	 * plexes in a RAID-5 organization.
 	 */
 	dr += dw;
 
 	LIST_FOREACH(d, &sc->drives, drive) {
 		if (d->consumer == NULL)
 			continue;
 		error = g_access(d->consumer, dr, dw, de);
 		if (error) {
 			LIST_FOREACH(d2, &sc->drives, drive) {
 				if (d == d2)
 					break;
 				g_access(d2->consumer, -dr, -dw, -de);
 			}
 			G_VINUM_DEBUG(0, "g_access '%s' failed: %d", d->name,
 			    error);
 			return (error);
 		}
 	}
 	return (0);
 }
 
 static void
 gv_init(struct g_class *mp)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 
 	g_trace(G_T_TOPOLOGY, "gv_init(%p)", mp);
 
 	gp = g_new_geomf(mp, "VINUM");
 	gp->spoiled = gv_orphan;
 	gp->orphan = gv_orphan;
 	gp->access = gv_access;
 	gp->start = gv_start;
 	gp->softc = g_malloc(sizeof(struct gv_softc), M_WAITOK | M_ZERO);
 	sc = gp->softc;
 	sc->geom = gp;
 	sc->bqueue_down = g_malloc(sizeof(struct bio_queue_head),
 	    M_WAITOK | M_ZERO);
 	sc->bqueue_up = g_malloc(sizeof(struct bio_queue_head),
 	    M_WAITOK | M_ZERO);
 	bioq_init(sc->bqueue_down);
 	bioq_init(sc->bqueue_up);
 	LIST_INIT(&sc->drives);
 	LIST_INIT(&sc->subdisks);
 	LIST_INIT(&sc->plexes);
 	LIST_INIT(&sc->volumes);
 	TAILQ_INIT(&sc->equeue);
 	mtx_init(&sc->config_mtx, "gv_config", NULL, MTX_DEF);
 	mtx_init(&sc->equeue_mtx, "gv_equeue", NULL, MTX_DEF);
 	mtx_init(&sc->bqueue_mtx, "gv_bqueue", NULL, MTX_DEF);
 	kproc_create(gv_worker, sc, &sc->worker, 0, 0, "gv_worker");
 }
 
 static int
 gv_unload(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 	struct gv_softc *sc;
 
 	g_trace(G_T_TOPOLOGY, "gv_unload(%p)", mp);
 
 	g_topology_assert();
 	sc = gp->softc;
 
 	if (sc != NULL) {
 		gv_worker_exit(sc);
 		gp->softc = NULL;
 		g_wither_geom(gp, ENXIO);
 	}
 
 	return (0);
 }
 
 /* Handle userland request of attaching object. */
 static void
 gv_attach(struct gv_softc *sc, struct gctl_req *req)
 {
 	struct gv_volume *v;
 	struct gv_plex *p;
 	struct gv_sd *s;
 	off_t *offset;
 	int *rename, type_child, type_parent;
 	char *child, *parent;
 
 	child = gctl_get_param(req, "child", NULL);
 	if (child == NULL) {
 		gctl_error(req, "no child given");
 		return;
 	}
 	parent = gctl_get_param(req, "parent", NULL);
 	if (parent == NULL) {
 		gctl_error(req, "no parent given");
 		return;
 	}
 	offset = gctl_get_paraml(req, "offset", sizeof(*offset));
 	if (offset == NULL) {
 		gctl_error(req, "no offset given");
 		return;
 	}
 	rename = gctl_get_paraml(req, "rename", sizeof(*rename));
 	if (rename == NULL) {
 		gctl_error(req, "no rename flag given");
 		return;
 	}
 
 	type_child = gv_object_type(sc, child);
 	type_parent = gv_object_type(sc, parent);
 
 	switch (type_child) {
 	case GV_TYPE_PLEX:
 		if (type_parent != GV_TYPE_VOL) {
 			gctl_error(req, "no such volume to attach to");
 			return;
 		}
 		v = gv_find_vol(sc, parent);
 		p = gv_find_plex(sc, child);
 		gv_post_event(sc, GV_EVENT_ATTACH_PLEX, p, v, *offset, *rename);
 		break;
 	case GV_TYPE_SD:
 		if (type_parent != GV_TYPE_PLEX) {
 			gctl_error(req, "no such plex to attach to");
 			return;
 		}
 		p = gv_find_plex(sc, parent);
 		s = gv_find_sd(sc, child);
 		gv_post_event(sc, GV_EVENT_ATTACH_SD, s, p, *offset, *rename);
 		break;
 	default:
 		gctl_error(req, "invalid child type");
 		break;
 	}
 }
 
 /* Handle userland request of detaching object. */
 static void
 gv_detach(struct gv_softc *sc, struct gctl_req *req)
 {
 	struct gv_plex *p;
 	struct gv_sd *s;
 	int *flags, type;
 	char *object;
 
 	object = gctl_get_param(req, "object", NULL);
 	if (object == NULL) {
 		gctl_error(req, "no argument given");
 		return;
 	}
 
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	type = gv_object_type(sc, object);
 	switch (type) {
 	case GV_TYPE_PLEX:
 		p = gv_find_plex(sc, object);
 		gv_post_event(sc, GV_EVENT_DETACH_PLEX, p, NULL, *flags, 0);
 		break;
 	case GV_TYPE_SD:
 		s = gv_find_sd(sc, object);
 		gv_post_event(sc, GV_EVENT_DETACH_SD, s, NULL, *flags, 0);
 		break;
 	default:
 		gctl_error(req, "invalid object type");
 		break;
 	}
 }
 
 /* Handle userland requests for creating new objects. */
 static int
 gv_create(struct g_geom *gp, struct gctl_req *req)
 {
 	struct gv_softc *sc;
 	struct gv_drive *d, *d2;
 	struct gv_plex *p, *p2;
 	struct gv_sd *s, *s2;
 	struct gv_volume *v, *v2;
 	struct g_provider *pp;
 	int error, i, *drives, *flags, *plexes, *subdisks, *volumes;
 	char buf[20];
 
 	g_topology_assert();
 
 	sc = gp->softc;
 
 	/* Find out how many of each object have been passed in. */
 	volumes = gctl_get_paraml(req, "volumes", sizeof(*volumes));
 	plexes = gctl_get_paraml(req, "plexes", sizeof(*plexes));
 	subdisks = gctl_get_paraml(req, "subdisks", sizeof(*subdisks));
 	drives = gctl_get_paraml(req, "drives", sizeof(*drives));
 	if (volumes == NULL || plexes == NULL || subdisks == NULL ||
 	    drives == NULL) {
 		gctl_error(req, "number of objects not given");
 		return (-1);
 	}
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	if (flags == NULL) {
 		gctl_error(req, "flags not given");
 		return (-1);
 	}
 
 	/* First, handle drive definitions ... */
 	for (i = 0; i < *drives; i++) {
 		snprintf(buf, sizeof(buf), "drive%d", i);
 		d2 = gctl_get_paraml(req, buf, sizeof(*d2));
 		if (d2 == NULL) {
 			gctl_error(req, "no drive definition given");
 			return (-1);
 		}
 		/*
 		 * Make sure that the device specified in the drive config is
 		 * an active GEOM provider.
 		 */
 		pp = g_provider_by_name(d2->device);
 		if (pp == NULL) {
 			gctl_error(req, "%s: device not found", d2->device);
 			goto error;
 		}
 		if (gv_find_drive(sc, d2->name) != NULL) {
 			/* Ignore error. */
 			if (*flags & GV_FLAG_F)
 				continue;
 			gctl_error(req, "drive '%s' already exists", d2->name);
 			goto error;
 		}
 		if (gv_find_drive_device(sc, d2->device) != NULL) {
 			gctl_error(req, "device '%s' already configured in "
 			    "gvinum", d2->device);
 			goto error;
 		}
 
 
 		d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
 		bcopy(d2, d, sizeof(*d));
 
 		gv_post_event(sc, GV_EVENT_CREATE_DRIVE, d, NULL, 0, 0);
 	}
 
 	/* ... then volume definitions ... */
 	for (i = 0; i < *volumes; i++) {
 		error = 0;
 		snprintf(buf, sizeof(buf), "volume%d", i);
 		v2 = gctl_get_paraml(req, buf, sizeof(*v2));
 		if (v2 == NULL) {
 			gctl_error(req, "no volume definition given");
 			return (-1);
 		}
 		if (gv_find_vol(sc, v2->name) != NULL) {
 			/* Ignore error. */
 			if (*flags & GV_FLAG_F)
 				continue;
 			gctl_error(req, "volume '%s' already exists", v2->name);
 			goto error;
 		}
 
 		v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO);
 		bcopy(v2, v, sizeof(*v));
 
 		gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0);
 	}
 
 	/* ... then plex definitions ... */
 	for (i = 0; i < *plexes; i++) {
 		error = 0;
 		snprintf(buf, sizeof(buf), "plex%d", i);
 		p2 = gctl_get_paraml(req, buf, sizeof(*p2));
 		if (p2 == NULL) {
 			gctl_error(req, "no plex definition given");
 			return (-1);
 		}
 		if (gv_find_plex(sc, p2->name) != NULL) {
 			/* Ignore error. */
 			if (*flags & GV_FLAG_F)
 				continue;
 			gctl_error(req, "plex '%s' already exists", p2->name);
 			goto error;
 		}
 
 		p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO);
 		bcopy(p2, p, sizeof(*p));
 
 		gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0);
 	}
 
 	/* ... and, finally, subdisk definitions. */
 	for (i = 0; i < *subdisks; i++) {
 		error = 0;
 		snprintf(buf, sizeof(buf), "sd%d", i);
 		s2 = gctl_get_paraml(req, buf, sizeof(*s2));
 		if (s2 == NULL) {
 			gctl_error(req, "no subdisk definition given");
 			return (-1);
 		}
 		if (gv_find_sd(sc, s2->name) != NULL) {
 			/* Ignore error. */
 			if (*flags & GV_FLAG_F)
 				continue;
 			gctl_error(req, "sd '%s' already exists", s2->name);
 			goto error;
 		}
 
 		s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO);
 		bcopy(s2, s, sizeof(*s));
 
 		gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0);
 	}
 
 error:
 	gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0);
 	gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 
 	return (0);
 }
 
 static void
 gv_config(struct gctl_req *req, struct g_class *mp, char const *verb)
 {
 	struct g_geom *gp;
 	struct gv_softc *sc;
 	struct sbuf *sb;
 	char *comment;
 
 	g_topology_assert();
 
 	gp = LIST_FIRST(&mp->geom);
 	sc = gp->softc;
 
 	if (!strcmp(verb, "attach")) {
 		gv_attach(sc, req);
 
 	} else if (!strcmp(verb, "concat")) {
 		gv_concat(gp, req);
 
 	} else if (!strcmp(verb, "detach")) {
 		gv_detach(sc, req);
 
 	} else if (!strcmp(verb, "list")) {
 		gv_list(gp, req);
 
 	/* Save our configuration back to disk. */
 	} else if (!strcmp(verb, "saveconfig")) {
 		gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 
 	/* Return configuration in string form. */
 	} else if (!strcmp(verb, "getconfig")) {
 		comment = gctl_get_param(req, "comment", NULL);
 		if (comment == NULL) {
 			gctl_error(req, "no comment parameter given");
 			return;
 		}
 		sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
 		gv_format_config(sc, sb, 0, comment);
 		sbuf_finish(sb);
 		gctl_set_param(req, "config", sbuf_data(sb), sbuf_len(sb) + 1);
 		sbuf_delete(sb);
 
 	} else if (!strcmp(verb, "create")) {
 		gv_create(gp, req);
 
 	} else if (!strcmp(verb, "mirror")) {
 		gv_mirror(gp, req);
 
 	} else if (!strcmp(verb, "move")) {
 		gv_move(gp, req);
 
 	} else if (!strcmp(verb, "raid5")) {
 		gv_raid5(gp, req);
 
 	} else if (!strcmp(verb, "rebuildparity") ||
 	    !strcmp(verb, "checkparity")) {
 		gv_parityop(sc, req);
 
 	} else if (!strcmp(verb, "remove")) {
 		gv_remove(gp, req);
 
 	} else if (!strcmp(verb, "rename")) {
 		gv_rename(gp, req);
 	
 	} else if (!strcmp(verb, "resetconfig")) {
 		gv_post_event(sc, GV_EVENT_RESET_CONFIG, sc, NULL, 0, 0);
 
 	} else if (!strcmp(verb, "start")) {
 		gv_start_obj(gp, req);
 
 	} else if (!strcmp(verb, "stripe")) {
 		gv_stripe(gp, req);
 
 	} else if (!strcmp(verb, "setstate")) {
 		gv_setstate(gp, req);
 	} else
 		gctl_error(req, "Unknown verb parameter");
 }
 
 static void
 gv_parityop(struct gv_softc *sc, struct gctl_req *req)
 {
 	struct gv_plex *p;
 	int *flags, *rebuild, type;
 	char *plex;
 
 	plex = gctl_get_param(req, "plex", NULL);
 	if (plex == NULL) {
 		gctl_error(req, "no plex given");
 		return;
 	}
 
 	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
 	if (flags == NULL) {
 		gctl_error(req, "no flags given");
 		return;
 	}
 
 	rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild));
 	if (rebuild == NULL) {
 		gctl_error(req, "no operation given");
 		return;
 	}
 
 	type = gv_object_type(sc, plex);
 	if (type != GV_TYPE_PLEX) {
 		gctl_error(req, "'%s' is not a plex", plex);
 		return;
 	}
 	p = gv_find_plex(sc, plex);
 
 	if (p->state != GV_PLEX_UP) {
 		gctl_error(req, "plex %s is not completely accessible",
 		    p->name);
 		return;
 	}
 
 	if (p->org != GV_PLEX_RAID5) {
 		gctl_error(req, "plex %s is not a RAID5 plex", p->name);
 		return;
 	}
 
 	/* Put it in the event queue. */
 	/* XXX: The state of the plex might have changed when this event is
 	 * picked up ... We should perhaps check this afterwards. */
 	if (*rebuild)
 		gv_post_event(sc, GV_EVENT_PARITY_REBUILD, p, NULL, 0, 0);
 	else
 		gv_post_event(sc, GV_EVENT_PARITY_CHECK, p, NULL, 0, 0);
 }
 
 
 static struct g_geom *
 gv_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct gv_softc *sc;
 	struct gv_hdr vhdr;
 	int error;
 
  	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "gv_taste(%s, %s)", mp->name, pp->name);
 
 	gp = LIST_FIRST(&mp->geom);
 	if (gp == NULL) {
 		G_VINUM_DEBUG(0, "error: tasting, but not initialized?");
 		return (NULL);
 	}
 	sc = gp->softc;
 
 	cp = g_new_consumer(gp);
 	if (g_attach(cp, pp) != 0) {
 		g_destroy_consumer(cp);
 		return (NULL);
 	}
 	if (g_access(cp, 1, 0, 0) != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		return (NULL);
 	}
 	g_topology_unlock();
 
 	error = gv_read_header(cp, &vhdr);
 
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 
 	/* Check if what we've been given is a valid vinum drive. */
 	if (!error)
 		gv_post_event(sc, GV_EVENT_DRIVE_TASTED, pp, NULL, 0, 0);
 
 	return (NULL);
 }
 
 void
 gv_worker(void *arg)
 {
 	struct g_provider *pp;
 	struct gv_softc *sc;
 	struct gv_event *ev;
 	struct gv_volume *v;
 	struct gv_plex *p;
 	struct gv_sd *s;
 	struct gv_drive *d;
 	struct bio *bp;
 	int newstate, flags, err, rename;
 	char *newname;
 	off_t offset;
 
 	sc = arg;
 	KASSERT(sc != NULL, ("NULL sc"));
 	for (;;) {
 		/* Look at the events first... */
 		ev = gv_get_event(sc);
 		if (ev != NULL) {
 			gv_remove_event(sc, ev);
 
 			switch (ev->type) {
 			case GV_EVENT_DRIVE_TASTED:
 				G_VINUM_DEBUG(2, "event 'drive tasted'");
 				pp = ev->arg1;
 				gv_drive_tasted(sc, pp);
 				break;
 
 			case GV_EVENT_DRIVE_LOST:
 				G_VINUM_DEBUG(2, "event 'drive lost'");
 				d = ev->arg1;
 				gv_drive_lost(sc, d);
 				break;
 
 			case GV_EVENT_CREATE_DRIVE:
 				G_VINUM_DEBUG(2, "event 'create drive'");
 				d = ev->arg1;
 				gv_create_drive(sc, d);
 				break;
 
 			case GV_EVENT_CREATE_VOLUME:
 				G_VINUM_DEBUG(2, "event 'create volume'");
 				v = ev->arg1;
 				gv_create_volume(sc, v);
 				break;
 
 			case GV_EVENT_CREATE_PLEX:
 				G_VINUM_DEBUG(2, "event 'create plex'");
 				p = ev->arg1;
 				gv_create_plex(sc, p);
 				break;
 
 			case GV_EVENT_CREATE_SD:
 				G_VINUM_DEBUG(2, "event 'create sd'");
 				s = ev->arg1;
 				gv_create_sd(sc, s);
 				break;
 
 			case GV_EVENT_RM_DRIVE:
 				G_VINUM_DEBUG(2, "event 'remove drive'");
 				d = ev->arg1;
 				flags = ev->arg3;
 				gv_rm_drive(sc, d, flags);
 				/*gv_setup_objects(sc);*/
 				break;
 
 			case GV_EVENT_RM_VOLUME:
 				G_VINUM_DEBUG(2, "event 'remove volume'");
 				v = ev->arg1;
 				gv_rm_vol(sc, v);
 				/*gv_setup_objects(sc);*/
 				break;
 
 			case GV_EVENT_RM_PLEX:
 				G_VINUM_DEBUG(2, "event 'remove plex'");
 				p = ev->arg1;
 				gv_rm_plex(sc, p);
 				/*gv_setup_objects(sc);*/
 				break;
 
 			case GV_EVENT_RM_SD:
 				G_VINUM_DEBUG(2, "event 'remove sd'");
 				s = ev->arg1;
 				gv_rm_sd(sc, s);
 				/*gv_setup_objects(sc);*/
 				break;
 
 			case GV_EVENT_SAVE_CONFIG:
 				G_VINUM_DEBUG(2, "event 'save config'");
 				gv_save_config(sc);
 				break;
 
 			case GV_EVENT_SET_SD_STATE:
 				G_VINUM_DEBUG(2, "event 'setstate sd'");
 				s = ev->arg1;
 				newstate = ev->arg3;
 				flags = ev->arg4;
 				err = gv_set_sd_state(s, newstate, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error setting subdisk"
 					    " state: error code %d", err);
 				break;
 
 			case GV_EVENT_SET_DRIVE_STATE:
 				G_VINUM_DEBUG(2, "event 'setstate drive'");
 				d = ev->arg1;
 				newstate = ev->arg3;
 				flags = ev->arg4;
 				err = gv_set_drive_state(d, newstate, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error setting drive "
 					    "state: error code %d", err);
 				break;
 
 			case GV_EVENT_SET_VOL_STATE:
 				G_VINUM_DEBUG(2, "event 'setstate volume'");
 				v = ev->arg1;
 				newstate = ev->arg3;
 				flags = ev->arg4;
 				err = gv_set_vol_state(v, newstate, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error setting volume "
 					    "state: error code %d", err);
 				break;
 
 			case GV_EVENT_SET_PLEX_STATE:
 				G_VINUM_DEBUG(2, "event 'setstate plex'");
 				p = ev->arg1;
 				newstate = ev->arg3;
 				flags = ev->arg4;
 				err = gv_set_plex_state(p, newstate, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error setting plex "
 					    "state: error code %d", err);
 				break;
 
 			case GV_EVENT_SETUP_OBJECTS:
 				G_VINUM_DEBUG(2, "event 'setup objects'");
 				gv_setup_objects(sc);
 				break;
 
 			case GV_EVENT_RESET_CONFIG:
 				G_VINUM_DEBUG(2, "event 'resetconfig'");
 				err = gv_resetconfig(sc);
 				if (err)
 					G_VINUM_DEBUG(0, "error resetting "
 					    "config: error code %d", err);
 				break;
 
 			case GV_EVENT_PARITY_REBUILD:
 				/*
 				 * Start the rebuild. The gv_plex_done will
 				 * handle issuing of the remaining rebuild bio's
 				 * until it's finished. 
 				 */
 				G_VINUM_DEBUG(2, "event 'rebuild'");
 				p = ev->arg1;
 				if (p->state != GV_PLEX_UP) {
 					G_VINUM_DEBUG(0, "plex %s is not "
 					    "completely accessible", p->name);
 					break;
 				}
 				if (p->flags & GV_PLEX_SYNCING ||
 				    p->flags & GV_PLEX_REBUILDING ||
 				    p->flags & GV_PLEX_GROWING) {
 					G_VINUM_DEBUG(0, "plex %s is busy with "
 					    "syncing or parity build", p->name);
 					break;
 				}
 				p->synced = 0;
 				p->flags |= GV_PLEX_REBUILDING;
 				g_topology_assert_not();
 				g_topology_lock();
 				err = gv_access(p->vol_sc->provider, 1, 1, 0);
 				if (err) {
 					G_VINUM_DEBUG(0, "unable to access "
 					    "provider");
 					break;
 				}
 				g_topology_unlock();
 				gv_parity_request(p, GV_BIO_CHECK |
 				    GV_BIO_PARITY, 0);
 				break;
 
 			case GV_EVENT_PARITY_CHECK:
 				/* Start parity check. */
 				G_VINUM_DEBUG(2, "event 'check'");
 				p = ev->arg1;
 				if (p->state != GV_PLEX_UP) {
 					G_VINUM_DEBUG(0, "plex %s is not "
 					    "completely accessible", p->name);
 					break;
 				}
 				if (p->flags & GV_PLEX_SYNCING ||
 				    p->flags & GV_PLEX_REBUILDING ||
 				    p->flags & GV_PLEX_GROWING) {
 					G_VINUM_DEBUG(0, "plex %s is busy with "
 					    "syncing or parity build", p->name);
 					break;
 				}
 				p->synced = 0;
 				g_topology_assert_not();
 				g_topology_lock();
 				err = gv_access(p->vol_sc->provider, 1, 1, 0);
 				if (err) {
 					G_VINUM_DEBUG(0, "unable to access "
 					    "provider");
 					break;
 				}
 				g_topology_unlock();
 				gv_parity_request(p, GV_BIO_CHECK, 0);
 				break;
 
 			case GV_EVENT_START_PLEX:
 				G_VINUM_DEBUG(2, "event 'start' plex");
 				p = ev->arg1;
 				gv_start_plex(p);
 				break;
 
 			case GV_EVENT_START_VOLUME:
 				G_VINUM_DEBUG(2, "event 'start' volume");
 				v = ev->arg1;
 				gv_start_vol(v);
 				break;
 
 			case GV_EVENT_ATTACH_PLEX:
 				G_VINUM_DEBUG(2, "event 'attach' plex");
 				p = ev->arg1;
 				v = ev->arg2;
 				rename = ev->arg4;
 				err = gv_attach_plex(p, v, rename);
 				if (err)
 					G_VINUM_DEBUG(0, "error attaching %s to"
 					    " %s: error code %d", p->name,
 					    v->name, err);
 				break;
 
 			case GV_EVENT_ATTACH_SD:
 				G_VINUM_DEBUG(2, "event 'attach' sd");
 				s = ev->arg1;
 				p = ev->arg2;
 				offset = ev->arg3;
 				rename = ev->arg4;
 				err = gv_attach_sd(s, p, offset, rename);
 				if (err)
 					G_VINUM_DEBUG(0, "error attaching %s to"
 					    " %s: error code %d", s->name,
 					    p->name, err);
 				break;
 
 			case GV_EVENT_DETACH_PLEX:
 				G_VINUM_DEBUG(2, "event 'detach' plex");
 				p = ev->arg1;
 				flags = ev->arg3;
 				err = gv_detach_plex(p, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error detaching %s: "
 					    "error code %d", p->name, err);
 				break;
 
 			case GV_EVENT_DETACH_SD:
 				G_VINUM_DEBUG(2, "event 'detach' sd");
 				s = ev->arg1;
 				flags = ev->arg3;
 				err = gv_detach_sd(s, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error detaching %s: "
 					    "error code %d", s->name, err);
 				break;
 
 			case GV_EVENT_RENAME_VOL:
 				G_VINUM_DEBUG(2, "event 'rename' volume");
 				v = ev->arg1;
 				newname = ev->arg2;
 				flags = ev->arg3;
 				err = gv_rename_vol(sc, v, newname, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error renaming %s to "
 					    "%s: error code %d", v->name,
 					    newname, err);
 				g_free(newname);
 				/* Destroy and recreate the provider if we can. */
 				if (gv_provider_is_open(v->provider)) {
 					G_VINUM_DEBUG(0, "unable to rename "
 					    "provider to %s: provider in use",
 					    v->name);
 					break;
 				}
 				g_topology_lock();
 				g_wither_provider(v->provider, ENOENT);
 				g_topology_unlock();
 				v->provider = NULL;
 				gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc,
 				    NULL, 0, 0);
 				break;
 
 			case GV_EVENT_RENAME_PLEX:
 				G_VINUM_DEBUG(2, "event 'rename' plex");
 				p = ev->arg1;
 				newname = ev->arg2;
 				flags = ev->arg3;
 				err = gv_rename_plex(sc, p, newname, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error renaming %s to "
 					    "%s: error code %d", p->name,
 					    newname, err);
 				g_free(newname);
 				break;
 
 			case GV_EVENT_RENAME_SD:
 				G_VINUM_DEBUG(2, "event 'rename' sd");
 				s = ev->arg1;
 				newname = ev->arg2;
 				flags = ev->arg3;
 				err = gv_rename_sd(sc, s, newname, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error renaming %s to "
 					    "%s: error code %d", s->name,
 					    newname, err);
 				g_free(newname);
 				break;
 
 			case GV_EVENT_RENAME_DRIVE:
 				G_VINUM_DEBUG(2, "event 'rename' drive");
 				d = ev->arg1;
 				newname = ev->arg2;
 				flags = ev->arg3;
 				err = gv_rename_drive(sc, d, newname, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error renaming %s to "
 					    "%s: error code %d", d->name,
 					    newname, err);
 				g_free(newname);
 				break;
 
 			case GV_EVENT_MOVE_SD:
 				G_VINUM_DEBUG(2, "event 'move' sd");
 				s = ev->arg1;
 				d = ev->arg2;
 				flags = ev->arg3;
 				err = gv_move_sd(sc, s, d, flags);
 				if (err)
 					G_VINUM_DEBUG(0, "error moving %s to "
 					    "%s: error code %d", s->name,
 					    d->name, err);
 				break;
 
 			case GV_EVENT_THREAD_EXIT:
 				G_VINUM_DEBUG(2, "event 'thread exit'");
 				g_free(ev);
 				mtx_lock(&sc->equeue_mtx);
 				mtx_lock(&sc->bqueue_mtx);
 				gv_cleanup(sc);
 				mtx_destroy(&sc->bqueue_mtx);
 				mtx_destroy(&sc->equeue_mtx);
 				g_free(sc->bqueue_down);
 				g_free(sc->bqueue_up);
 				g_free(sc);
 				kproc_exit(0);
 				/* NOTREACHED */
 
 			default:
 				G_VINUM_DEBUG(1, "unknown event %d", ev->type);
 			}
 
 			g_free(ev);
 			continue;
 		}
 
 		/* ... then do I/O processing. */
 		mtx_lock(&sc->bqueue_mtx);
 		/* First do new requests. */
 		bp = bioq_takefirst(sc->bqueue_down);
 		if (bp != NULL) {
 			mtx_unlock(&sc->bqueue_mtx);
 			/* A bio that interfered with another bio. */
 			if (bp->bio_pflags & GV_BIO_ONHOLD) {
 				s = bp->bio_caller1;
 				p = s->plex_sc;
 				/* Is it still locked out? */
 				if (gv_stripe_active(p, bp)) {
 					/* Park the bio on the waiting queue. */
 					bioq_disksort(p->wqueue, bp);
 				} else {
 					bp->bio_pflags &= ~GV_BIO_ONHOLD;
 					g_io_request(bp, s->drive_sc->consumer);
 				}
 			/* A special request requireing special handling. */
 			} else if (bp->bio_pflags & GV_BIO_INTERNAL) {
 				p = bp->bio_caller1;
 				gv_plex_start(p, bp);
 			} else {
 				gv_volume_start(sc, bp);
 			}
 			mtx_lock(&sc->bqueue_mtx);
 		}
 		/* Then do completed requests. */
 		bp = bioq_takefirst(sc->bqueue_up);
 		if (bp == NULL) {
 			msleep(sc, &sc->bqueue_mtx, PRIBIO, "-", hz/10);
 			mtx_unlock(&sc->bqueue_mtx);
 			continue;
 		}
 		mtx_unlock(&sc->bqueue_mtx);
 		gv_bio_done(sc, bp);
 	}
 }
 
 #define	VINUM_CLASS_NAME "VINUM"
 
 static struct g_class g_vinum_class	= {
 	.name = VINUM_CLASS_NAME,
 	.version = G_VERSION,
 	.init = gv_init,
 	.taste = gv_taste,
 	.ctlreq = gv_config,
 	.destroy_geom = gv_unload,
 };
 
 DECLARE_GEOM_CLASS(g_vinum_class, g_vinum);
+MODULE_VERSION(geom_vinum, 0);
Index: stable/11/sys/geom/virstor/g_virstor.c
===================================================================
--- stable/11/sys/geom/virstor/g_virstor.c	(revision 332639)
+++ stable/11/sys/geom/virstor/g_virstor.c	(revision 332640)
@@ -1,1891 +1,1892 @@
 /*-
  * Copyright (c) 2006-2007 Ivan Voras <ivoras@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /* Implementation notes:
  * - "Components" are wrappers around providers that make up the
  *   virtual storage (i.e. a virstor has "physical" components)
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sx.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/time.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/mutex.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 
 #include <geom/virstor/g_virstor.h>
 #include <geom/virstor/g_virstor_md.h>
 
 FEATURE(g_virstor, "GEOM virtual storage support");
 
 /* Declare malloc(9) label */
 static MALLOC_DEFINE(M_GVIRSTOR, "gvirstor", "GEOM_VIRSTOR Data");
 
 /* GEOM class methods */
 static g_init_t g_virstor_init;
 static g_fini_t g_virstor_fini;
 static g_taste_t g_virstor_taste;
 static g_ctl_req_t g_virstor_config;
 static g_ctl_destroy_geom_t g_virstor_destroy_geom;
 
 /* Declare & initialize class structure ("geom class") */
 struct g_class g_virstor_class = {
 	.name =		G_VIRSTOR_CLASS_NAME,
 	.version =	G_VERSION,
 	.init =		g_virstor_init,
 	.fini =		g_virstor_fini,
 	.taste =	g_virstor_taste,
 	.ctlreq =	g_virstor_config,
 	.destroy_geom = g_virstor_destroy_geom
 	/* The .dumpconf and the rest are only usable for a geom instance, so
 	 * they will be set when such instance is created. */
 };
 
 /* Declare sysctl's and loader tunables */
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, virstor, CTLFLAG_RW, 0,
     "GEOM_GVIRSTOR information");
 
 static u_int g_virstor_debug = 2; /* XXX: lower to 2 when released to public */
 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, debug, CTLFLAG_RWTUN, &g_virstor_debug,
     0, "Debug level (2=production, 5=normal, 15=excessive)");
 
 static u_int g_virstor_chunk_watermark = 100;
 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, chunk_watermark, CTLFLAG_RWTUN,
     &g_virstor_chunk_watermark, 0,
     "Minimum number of free chunks before issuing administrative warning");
 
 static u_int g_virstor_component_watermark = 1;
 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, component_watermark, CTLFLAG_RWTUN,
     &g_virstor_component_watermark, 0,
     "Minimum number of free components before issuing administrative warning");
 
 static int read_metadata(struct g_consumer *, struct g_virstor_metadata *);
 static void write_metadata(struct g_consumer *, struct g_virstor_metadata *);
 static int clear_metadata(struct g_virstor_component *);
 static int add_provider_to_geom(struct g_virstor_softc *, struct g_provider *,
     struct g_virstor_metadata *);
 static struct g_geom *create_virstor_geom(struct g_class *,
     struct g_virstor_metadata *);
 static void virstor_check_and_run(struct g_virstor_softc *);
 static u_int virstor_valid_components(struct g_virstor_softc *);
 static int virstor_geom_destroy(struct g_virstor_softc *, boolean_t,
     boolean_t);
 static void remove_component(struct g_virstor_softc *,
     struct g_virstor_component *, boolean_t);
 static void bioq_dismantle(struct bio_queue_head *);
 static int allocate_chunk(struct g_virstor_softc *,
     struct g_virstor_component **, u_int *, u_int *);
 static void delay_destroy_consumer(void *, int);
 static void dump_component(struct g_virstor_component *comp);
 #if 0
 static void dump_me(struct virstor_map_entry *me, unsigned int nr);
 #endif
 
 static void virstor_ctl_stop(struct gctl_req *, struct g_class *);
 static void virstor_ctl_add(struct gctl_req *, struct g_class *);
 static void virstor_ctl_remove(struct gctl_req *, struct g_class *);
 static struct g_virstor_softc * virstor_find_geom(const struct g_class *,
     const char *);
 static void update_metadata(struct g_virstor_softc *);
 static void fill_metadata(struct g_virstor_softc *, struct g_virstor_metadata *,
     u_int, u_int);
 
 static void g_virstor_orphan(struct g_consumer *);
 static int g_virstor_access(struct g_provider *, int, int, int);
 static void g_virstor_start(struct bio *);
 static void g_virstor_dumpconf(struct sbuf *, const char *, struct g_geom *,
     struct g_consumer *, struct g_provider *);
 static void g_virstor_done(struct bio *);
 
 static void invalid_call(void);
 /*
  * Initialise GEOM class (per-class callback)
  */
 static void
 g_virstor_init(struct g_class *mp __unused)
 {
 
 	/* Catch map struct size mismatch at compile time; Map entries must
 	 * fit into MAXPHYS exactly, with no wasted space. */
 	CTASSERT(VIRSTOR_MAP_BLOCK_ENTRIES*VIRSTOR_MAP_ENTRY_SIZE == MAXPHYS);
 
 	/* Init UMA zones, TAILQ's, other global vars */
 }
 
 /*
  * Finalise GEOM class (per-class callback)
  */
 static void
 g_virstor_fini(struct g_class *mp __unused)
 {
 
 	/* Deinit UMA zones & global vars */
 }
 
 /*
  * Config (per-class callback)
  */
 static void
 g_virstor_config(struct gctl_req *req, struct g_class *cp, char const *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "Failed to get 'version' argument");
 		return;
 	}
 	if (*version != G_VIRSTOR_VERSION) {
 		gctl_error(req, "Userland and kernel versions out of sync");
 		return;
 	}
 
 	g_topology_unlock();
 	if (strcmp(verb, "add") == 0)
 		virstor_ctl_add(req, cp);
 	else if (strcmp(verb, "stop") == 0 || strcmp(verb, "destroy") == 0)
 		virstor_ctl_stop(req, cp);
 	else if (strcmp(verb, "remove") == 0)
 		virstor_ctl_remove(req, cp);
 	else
 		gctl_error(req, "unknown verb: '%s'", verb);
 	g_topology_lock();
 }
 
 /*
  * "stop" verb from userland
  */
 static void
 virstor_ctl_stop(struct gctl_req *req, struct g_class *cp)
 {
 	int *force, *nargs;
 	int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof *nargs);
 	if (nargs == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "nargs");
 		return;
 	}
 	if (*nargs < 1) {
 		gctl_error(req, "Invalid number of arguments");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof *force);
 	if (force == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "force");
 		return;
 	}
 
 	g_topology_lock();
 	for (i = 0; i < *nargs; i++) {
 		char param[8];
 		const char *name;
 		struct g_virstor_softc *sc;
 		int error;
 
 		sprintf(param, "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			g_topology_unlock();
 			return;
 		}
 		sc = virstor_find_geom(cp, name);
 		if (sc == NULL) {
 			gctl_error(req, "Don't know anything about '%s'", name);
 			g_topology_unlock();
 			return;
 		}
 
 		LOG_MSG(LVL_INFO, "Stopping %s by the userland command",
 		    sc->geom->name);
 		update_metadata(sc);
 		if ((error = virstor_geom_destroy(sc, TRUE, TRUE)) != 0) {
 			LOG_MSG(LVL_ERROR, "Cannot destroy %s: %d",
 			    sc->geom->name, error);
 		}
 	}
 	g_topology_unlock();
 }
 
 /*
  * "add" verb from userland - add new component(s) to the structure.
  * This will be done all at once in here, without going through the
  * .taste function for new components.
  */
 static void
 virstor_ctl_add(struct gctl_req *req, struct g_class *cp)
 {
 	/* Note: while this is going on, I/O is being done on
 	 * the g_up and g_down threads. The idea is to make changes
 	 * to softc members in a way that can atomically activate
 	 * them all at once. */
 	struct g_virstor_softc *sc;
 	int *hardcode, *nargs;
 	const char *geom_name;	/* geom to add a component to */
 	struct g_consumer *fcp;
 	struct g_virstor_bio_q *bq;
 	u_int added;
 	int error;
 	int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Invalid number of arguments");
 		return;
 	}
 	hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode));
 	if (hardcode == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "hardcode");
 		return;
 	}
 
 	/* Find "our" geom */
 	geom_name = gctl_get_asciiparam(req, "arg0");
 	if (geom_name == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "geom_name (arg0)");
 		return;
 	}
 	sc = virstor_find_geom(cp, geom_name);
 	if (sc == NULL) {
 		gctl_error(req, "Don't know anything about '%s'", geom_name);
 		return;
 	}
 
 	if (virstor_valid_components(sc) != sc->n_components) {
 		LOG_MSG(LVL_ERROR, "Cannot add components to incomplete "
 		    "virstor %s", sc->geom->name);
 		gctl_error(req, "Virstor %s is incomplete", sc->geom->name);
 		return;
 	}
 
 	fcp = sc->components[0].gcons;
 	added = 0;
 	g_topology_lock();
 	for (i = 1; i < *nargs; i++) {
 		struct g_virstor_metadata md;
 		char aname[8];
 		const char *prov_name;
 		struct g_provider *pp;
 		struct g_consumer *cp;
 		u_int nc;
 		u_int j;
 
 		snprintf(aname, sizeof aname, "arg%d", i);
 		prov_name = gctl_get_asciiparam(req, aname);
 		if (prov_name == NULL) {
 			gctl_error(req, "Error fetching argument '%s'", aname);
 			g_topology_unlock();
 			return;
 		}
 		if (strncmp(prov_name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
 			prov_name += sizeof(_PATH_DEV) - 1;
 
 		pp = g_provider_by_name(prov_name);
 		if (pp == NULL) {
 			/* This is the most common error so be verbose about it */
 			if (added != 0) {
 				gctl_error(req, "Invalid provider: '%s' (added"
 				    " %u components)", prov_name, added);
 				update_metadata(sc);
 			} else {
 				gctl_error(req, "Invalid provider: '%s'",
 				    prov_name);
 			}
 			g_topology_unlock();
 			return;
 		}
 		cp = g_new_consumer(sc->geom);
 		if (cp == NULL) {
 			gctl_error(req, "Cannot create consumer");
 			g_topology_unlock();
 			return;
 		}
 		error = g_attach(cp, pp);
 		if (error != 0) {
 			gctl_error(req, "Cannot attach a consumer to %s",
 			    pp->name);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			return;
 		}
 		if (fcp->acr != 0 || fcp->acw != 0 || fcp->ace != 0) {
 			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 			if (error != 0) {
 				gctl_error(req, "Access request failed for %s",
 				    pp->name);
 				g_destroy_consumer(cp);
 				g_topology_unlock();
 				return;
 			}
 		}
 		if (fcp->provider->sectorsize != pp->sectorsize) {
 			gctl_error(req, "Sector size doesn't fit for %s",
 			    pp->name);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			return;
 		}
 		for (j = 0; j < sc->n_components; j++) {
 			if (strcmp(sc->components[j].gcons->provider->name,
 			    pp->name) == 0) {
 				gctl_error(req, "Component %s already in %s",
 				    pp->name, sc->geom->name);
 				g_destroy_consumer(cp);
 				g_topology_unlock();
 				return;
 			}
 		}
 		sc->components = realloc(sc->components,
 		    sizeof(*sc->components) * (sc->n_components + 1),
 		    M_GVIRSTOR, M_WAITOK);
 
 		nc = sc->n_components;
 		sc->components[nc].gcons = cp;
 		sc->components[nc].sc = sc;
 		sc->components[nc].index = nc;
 		sc->components[nc].chunk_count = cp->provider->mediasize /
 		    sc->chunk_size;
 		sc->components[nc].chunk_next = 0;
 		sc->components[nc].chunk_reserved = 0;
 
 		if (sc->components[nc].chunk_count < 4) {
 			gctl_error(req, "Provider too small: %s",
 			    cp->provider->name);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			return;
 		}
 		fill_metadata(sc, &md, nc, *hardcode);
 		write_metadata(cp, &md);
 		/* The new component becomes visible when n_components is
 		 * incremented */
 		sc->n_components++;
 		added++;
 
 	}
 	/* This call to update_metadata() is critical. In case there's a
 	 * power failure in the middle of it and some components are updated
 	 * while others are not, there will be trouble on next .taste() iff
 	 * a non-updated component is detected first */
 	update_metadata(sc);
 	g_topology_unlock();
 	LOG_MSG(LVL_INFO, "Added %d component(s) to %s", added,
 	    sc->geom->name);
 	/* Fire off BIOs previously queued because there wasn't any
 	 * physical space left. If the BIOs still can't be satisfied
 	 * they will again be added to the end of the queue (during
 	 * which the mutex will be recursed) */
 	bq = malloc(sizeof(*bq), M_GVIRSTOR, M_WAITOK);
 	bq->bio = NULL;
 	mtx_lock(&sc->delayed_bio_q_mtx);
 	/* First, insert a sentinel to the queue end, so we don't
 	 * end up in an infinite loop if there's still no free
 	 * space available. */
 	STAILQ_INSERT_TAIL(&sc->delayed_bio_q, bq, linkage);
 	while (!STAILQ_EMPTY(&sc->delayed_bio_q)) {
 		bq = STAILQ_FIRST(&sc->delayed_bio_q);
 		if (bq->bio != NULL) {
 			g_virstor_start(bq->bio);
 			STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
 			free(bq, M_GVIRSTOR);
 		} else {
 			STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
 			free(bq, M_GVIRSTOR);
 			break;
 		}
 	}
 	mtx_unlock(&sc->delayed_bio_q_mtx);
 
 }
 
 /*
  * Find a geom handled by the class
  */
 static struct g_virstor_softc *
 virstor_find_geom(const struct g_class *cp, const char *name)
 {
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &cp->geom, geom) {
 		if (strcmp(name, gp->name) == 0)
 			return (gp->softc);
 	}
 	return (NULL);
 }
 
 /*
  * Update metadata on all components to reflect the current state
  * of these fields:
  *    - chunk_next
  *    - flags
  *    - md_count
  * Expects things to be set up so write_metadata() can work, i.e.
  * the topology lock must be held.
  */
 static void
 update_metadata(struct g_virstor_softc *sc)
 {
 	struct g_virstor_metadata md;
 	u_int n;
 
 	if (virstor_valid_components(sc) != sc->n_components)
 		return; /* Incomplete device */
 	LOG_MSG(LVL_DEBUG, "Updating metadata on components for %s",
 	    sc->geom->name);
 	/* Update metadata on components */
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__,
 	    sc->geom->class->name, sc->geom->name);
 	g_topology_assert();
 	for (n = 0; n < sc->n_components; n++) {
 		read_metadata(sc->components[n].gcons, &md);
 		md.chunk_next = sc->components[n].chunk_next;
 		md.flags = sc->components[n].flags;
 		md.md_count = sc->n_components;
 		write_metadata(sc->components[n].gcons, &md);
 	}
 }
 
 /*
  * Fills metadata (struct md) from information stored in softc and the nc'th
  * component of virstor
  */
 static void
 fill_metadata(struct g_virstor_softc *sc, struct g_virstor_metadata *md,
     u_int nc, u_int hardcode)
 {
 	struct g_virstor_component *c;
 
 	bzero(md, sizeof *md);
 	c = &sc->components[nc];
 
 	strncpy(md->md_magic, G_VIRSTOR_MAGIC, sizeof md->md_magic);
 	md->md_version = G_VIRSTOR_VERSION;
 	strncpy(md->md_name, sc->geom->name, sizeof md->md_name);
 	md->md_id = sc->id;
 	md->md_virsize = sc->virsize;
 	md->md_chunk_size = sc->chunk_size;
 	md->md_count = sc->n_components;
 
 	if (hardcode) {
 		strncpy(md->provider, c->gcons->provider->name,
 		    sizeof md->provider);
 	}
 	md->no = nc;
 	md->provsize = c->gcons->provider->mediasize;
 	md->chunk_count = c->chunk_count;
 	md->chunk_next = c->chunk_next;
 	md->chunk_reserved = c->chunk_reserved;
 	md->flags = c->flags;
 }
 
 /*
  * Remove a component from virstor device.
  * Can only be done if the component is unallocated.
  */
 static void
 virstor_ctl_remove(struct gctl_req *req, struct g_class *cp)
 {
 	/* As this is executed in parallel to I/O, operations on virstor
 	 * structures must be as atomic as possible. */
 	struct g_virstor_softc *sc;
 	int *nargs;
 	const char *geom_name;
 	u_int removed;
 	int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Invalid number of arguments");
 		return;
 	}
 	/* Find "our" geom */
 	geom_name = gctl_get_asciiparam(req, "arg0");
 	if (geom_name == NULL) {
 		gctl_error(req, "Error fetching argument '%s'",
 		    "geom_name (arg0)");
 		return;
 	}
 	sc = virstor_find_geom(cp, geom_name);
 	if (sc == NULL) {
 		gctl_error(req, "Don't know anything about '%s'", geom_name);
 		return;
 	}
 
 	if (virstor_valid_components(sc) != sc->n_components) {
 		LOG_MSG(LVL_ERROR, "Cannot remove components from incomplete "
 		    "virstor %s", sc->geom->name);
 		gctl_error(req, "Virstor %s is incomplete", sc->geom->name);
 		return;
 	}
 
 	removed = 0;
 	for (i = 1; i < *nargs; i++) {
 		char param[8];
 		const char *prov_name;
 		int j, found;
 		struct g_virstor_component *newcomp, *compbak;
 
 		sprintf(param, "arg%d", i);
 		prov_name = gctl_get_asciiparam(req, param);
 		if (prov_name == NULL) {
 			gctl_error(req, "Error fetching argument '%s'", param);
 			return;
 		}
 		if (strncmp(prov_name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
 			prov_name += sizeof(_PATH_DEV) - 1;
 
 		found = -1;
 		for (j = 0; j < sc->n_components; j++) {
 			if (strcmp(sc->components[j].gcons->provider->name,
 			    prov_name) == 0) {
 				found = j;
 				break;
 			}
 		}
 		if (found == -1) {
 			LOG_MSG(LVL_ERROR, "No %s component in %s",
 			    prov_name, sc->geom->name);
 			continue;
 		}
 
 		compbak = sc->components;
 		newcomp = malloc(sc->n_components * sizeof(*sc->components),
 		    M_GVIRSTOR, M_WAITOK | M_ZERO);
 		bcopy(sc->components, newcomp, found * sizeof(*sc->components));
 		bcopy(&sc->components[found + 1], newcomp + found,
 		    found * sizeof(*sc->components));
 		if ((sc->components[j].flags & VIRSTOR_PROVIDER_ALLOCATED) != 0) {
 			LOG_MSG(LVL_ERROR, "Allocated provider %s cannot be "
 			    "removed from %s",
 			    prov_name, sc->geom->name);
 			free(newcomp, M_GVIRSTOR);
 			/* We'll consider this non-fatal error */
 			continue;
 		}
 		/* Renumerate unallocated components */
 		for (j = 0; j < sc->n_components-1; j++) {
 			if ((sc->components[j].flags &
 			    VIRSTOR_PROVIDER_ALLOCATED) == 0) {
 				sc->components[j].index = j;
 			}
 		}
 		/* This is the critical section. If a component allocation
 		 * event happens while both variables are not yet set,
 		 * there will be trouble. Something will panic on encountering
 		 * NULL sc->components[x].gcomp member.
 		 * Luckily, component allocation happens very rarely and
 		 * removing components is an abnormal action in any case. */
 		sc->components = newcomp;
 		sc->n_components--;
 		/* End critical section */
 
 		g_topology_lock();
 		if (clear_metadata(&compbak[found]) != 0) {
 			LOG_MSG(LVL_WARNING, "Trouble ahead: cannot clear "
 			    "metadata on %s", prov_name);
 		}
 		g_detach(compbak[found].gcons);
 		g_destroy_consumer(compbak[found].gcons);
 		g_topology_unlock();
 
 		free(compbak, M_GVIRSTOR);
 
 		removed++;
 	}
 
 	/* This call to update_metadata() is critical. In case there's a
 	 * power failure in the middle of it and some components are updated
 	 * while others are not, there will be trouble on next .taste() iff
 	 * a non-updated component is detected first */
 	g_topology_lock();
 	update_metadata(sc);
 	g_topology_unlock();
 	LOG_MSG(LVL_INFO, "Removed %d component(s) from %s", removed,
 	    sc->geom->name);
 }
 
 /*
  * Clear metadata sector on component
  */
 static int
 clear_metadata(struct g_virstor_component *comp)
 {
 	char *buf;
 	int error;
 
 	LOG_MSG(LVL_INFO, "Clearing metadata on %s",
 	    comp->gcons->provider->name);
 	g_topology_assert();
 	error = g_access(comp->gcons, 0, 1, 0);
 	if (error != 0)
 		return (error);
 	buf = malloc(comp->gcons->provider->sectorsize, M_GVIRSTOR,
 	    M_WAITOK | M_ZERO);
 	error = g_write_data(comp->gcons,
 	    comp->gcons->provider->mediasize -
 	    comp->gcons->provider->sectorsize,
 	    buf,
 	    comp->gcons->provider->sectorsize);
 	free(buf, M_GVIRSTOR);
 	g_access(comp->gcons, 0, -1, 0);
 	return (error);
 }
 
 /*
  * Destroy geom forcibly.
  */
 static int
 g_virstor_destroy_geom(struct gctl_req *req __unused, struct g_class *mp,
     struct g_geom *gp)
 {
 	struct g_virstor_softc *sc;
 	int exitval;
 
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("%s: NULL sc", __func__));
 	
 	exitval = 0;
 	LOG_MSG(LVL_DEBUG, "%s called for %s, sc=%p", __func__, gp->name,
 	    gp->softc);
 
 	if (sc != NULL) {
 #ifdef INVARIANTS
 		char *buf;
 		int error;
 		off_t off;
 		int isclean, count;
 		int n;
 
 		LOG_MSG(LVL_INFO, "INVARIANTS detected");
 		LOG_MSG(LVL_INFO, "Verifying allocation "
 		    "table for %s", sc->geom->name);
 		count = 0;
 		for (n = 0; n < sc->chunk_count; n++) {
 			if (sc->map[n].flags || VIRSTOR_MAP_ALLOCATED != 0)
 				count++;
 		}
 		LOG_MSG(LVL_INFO, "Device %s has %d allocated chunks",
 		    sc->geom->name, count);
 		n = off = count = 0;
 		isclean = 1;
 		if (virstor_valid_components(sc) != sc->n_components) {
 			/* This is a incomplete virstor device (not all
 			 * components have been found) */
 			LOG_MSG(LVL_ERROR, "Device %s is incomplete",
 			    sc->geom->name);
 			goto bailout;
 		}
 		error = g_access(sc->components[0].gcons, 1, 0, 0);
 		KASSERT(error == 0, ("%s: g_access failed (%d)", __func__,
 		    error));
 		/* Compare the whole on-disk allocation table with what's
 		 * currently in memory */
 		while (n < sc->chunk_count) {
 			buf = g_read_data(sc->components[0].gcons, off,
 			    sc->sectorsize, &error);
 			KASSERT(buf != NULL, ("g_read_data returned NULL (%d) "
 			    "for read at %jd", error, off));
 			if (bcmp(buf, &sc->map[n], sc->sectorsize) != 0) {
 				LOG_MSG(LVL_ERROR, "ERROR in allocation table, "
 				    "entry %d, offset %jd", n, off);
 				isclean = 0;
 				count++;
 			}
 			n += sc->me_per_sector;
 			off += sc->sectorsize;
 			g_free(buf);
 		}
 		error = g_access(sc->components[0].gcons, -1, 0, 0);
 		KASSERT(error == 0, ("%s: g_access failed (%d) on exit",
 		    __func__, error));
 		if (isclean != 1) {
 			LOG_MSG(LVL_ERROR, "ALLOCATION TABLE CORRUPTED FOR %s "
 			    "(%d sectors don't match, max %zu allocations)",
 			    sc->geom->name, count,
 			    count * sc->me_per_sector);
 		} else {
 			LOG_MSG(LVL_INFO, "Allocation table ok for %s",
 			    sc->geom->name);
 		}
 bailout:
 #endif
 		update_metadata(sc);
 		virstor_geom_destroy(sc, FALSE, FALSE);
 		exitval = EAGAIN;
 	} else
 		exitval = 0;
 	return (exitval);
 }
 
 /*
  * Taste event (per-class callback)
  * Examines a provider and creates geom instances if needed
  */
 static struct g_geom *
 g_virstor_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	struct g_virstor_metadata md;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_virstor_softc *sc;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 	LOG_MSG(LVL_DEBUG, "Tasting %s", pp->name);
 
 	/* We need a dummy geom to attach a consumer to the given provider */
 	gp = g_new_geomf(mp, "virstor:taste.helper");
 	gp->start = (void *)invalid_call;	/* XXX: hacked up so the        */
 	gp->access = (void *)invalid_call;	/* compiler doesn't complain.   */
 	gp->orphan = (void *)invalid_call;	/* I really want these to fail. */
 
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 
 	if (error != 0)
 		return (NULL);
 
 	if (strcmp(md.md_magic, G_VIRSTOR_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version != G_VIRSTOR_VERSION) {
 		LOG_MSG(LVL_ERROR, "Kernel module version invalid "
 		    "to handle %s (%s) : %d should be %d",
 		    md.md_name, pp->name, md.md_version, G_VIRSTOR_VERSION);
 		return (NULL);
 	}
 	if (md.provsize != pp->mediasize)
 		return (NULL);
 
 	/* If the provider name is hardcoded, use the offered provider only
 	 * if it's been offered with its proper name (the one used in
 	 * the label command). */
 	if (md.provider[0] != '\0' &&
 	    !g_compare_names(md.provider, pp->name))
 		return (NULL);
 
 	/* Iterate all geoms this class already knows about to see if a new
 	 * geom instance of this class needs to be created (in case the provider
 	 * is first from a (possibly) multi-consumer geom) or it just needs
 	 * to be added to an existing instance. */
 	sc = NULL;
 	gp = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(md.md_name, sc->geom->name) != 0)
 			continue;
 		if (md.md_id != sc->id)
 			continue;
 		break;
 	}
 	if (gp != NULL) { /* We found an existing geom instance; add to it */
 		LOG_MSG(LVL_INFO, "Adding %s to %s", pp->name, md.md_name);
 		error = add_provider_to_geom(sc, pp, &md);
 		if (error != 0) {
 			LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)",
 			    pp->name, md.md_name, error);
 			return (NULL);
 		}
 	} else { /* New geom instance needs to be created */
 		gp = create_virstor_geom(mp, &md);
 		if (gp == NULL) {
 			LOG_MSG(LVL_ERROR, "Error creating new instance of "
 			    "class %s: %s", mp->name, md.md_name);
 			LOG_MSG(LVL_DEBUG, "Error creating %s at %s",
 			    md.md_name, pp->name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		LOG_MSG(LVL_INFO, "Adding %s to %s (first found)", pp->name,
 		    md.md_name);
 		error = add_provider_to_geom(sc, pp, &md);
 		if (error != 0) {
 			LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)",
 			    pp->name, md.md_name, error);
 			virstor_geom_destroy(sc, TRUE, FALSE);
 			return (NULL);
 		}
 	}
 
 	return (gp);
 }
 
 /*
  * Destroyes consumer passed to it in arguments. Used as a callback
  * on g_event queue.
  */
 static void
 delay_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *c = arg;
 	KASSERT(c != NULL, ("%s: invalid consumer", __func__));
 	LOG_MSG(LVL_DEBUG, "Consumer %s destroyed with delay",
 	    c->provider->name);
 	g_detach(c);
 	g_destroy_consumer(c);
 }
 
 /*
  * Remove a component (consumer) from geom instance; If it's the first
  * component being removed, orphan the provider to announce geom's being
  * dismantled
  */
 static void
 remove_component(struct g_virstor_softc *sc, struct g_virstor_component *comp,
     boolean_t delay)
 {
 	struct g_consumer *c;
 
 	KASSERT(comp->gcons != NULL, ("Component with no consumer in %s",
 	    sc->geom->name));
 	c = comp->gcons;
 
 	comp->gcons = NULL;
 	KASSERT(c->provider != NULL, ("%s: no provider", __func__));
 	LOG_MSG(LVL_DEBUG, "Component %s removed from %s", c->provider->name,
 	    sc->geom->name);
 	if (sc->provider != NULL) {
 		LOG_MSG(LVL_INFO, "Removing provider %s", sc->provider->name);
 		g_wither_provider(sc->provider, ENXIO);
 		sc->provider = NULL;
 	}
 
 	if (c->acr > 0 || c->acw > 0 || c->ace > 0)
 		g_access(c, -c->acr, -c->acw, -c->ace);
 	if (delay) {
 		/* Destroy consumer after it's tasted */
 		g_post_event(delay_destroy_consumer, c, M_WAITOK, NULL);
 	} else {
 		g_detach(c);
 		g_destroy_consumer(c);
 	}
 }
 
 /*
  * Destroy geom - called internally
  * See g_virstor_destroy_geom for the other one
  */
 static int
 virstor_geom_destroy(struct g_virstor_softc *sc, boolean_t force,
     boolean_t delay)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	u_int n;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		LOG_MSG(force ? LVL_WARNING : LVL_ERROR,
 		    "Device %s is still open.", pp->name);
 		if (!force)
 			return (EBUSY);
 	}
 
 	for (n = 0; n < sc->n_components; n++) {
 		if (sc->components[n].gcons != NULL)
 			remove_component(sc, &sc->components[n], delay);
 	}
 
 	gp = sc->geom;
 	gp->softc = NULL;
 
 	KASSERT(sc->provider == NULL, ("Provider still exists for %s",
 	    gp->name));
 
 	/* XXX: This might or might not work, since we're called with
 	 * the topology lock held. Also, it might panic the kernel if
 	 * the error'd BIO is in softupdates code. */
 	mtx_lock(&sc->delayed_bio_q_mtx);
 	while (!STAILQ_EMPTY(&sc->delayed_bio_q)) {
 		struct g_virstor_bio_q *bq;
 		bq = STAILQ_FIRST(&sc->delayed_bio_q);
 		bq->bio->bio_error = ENOSPC;
 		g_io_deliver(bq->bio, EIO);
 		STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
 		free(bq, M_GVIRSTOR);
 	}
 	mtx_unlock(&sc->delayed_bio_q_mtx);
 	mtx_destroy(&sc->delayed_bio_q_mtx);
 
 	free(sc->map, M_GVIRSTOR);
 	free(sc->components, M_GVIRSTOR);
 	bzero(sc, sizeof *sc);
 	free(sc, M_GVIRSTOR);
 
 	pp = LIST_FIRST(&gp->provider); /* We only offer one provider */
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		LOG_MSG(LVL_DEBUG, "Device %s destroyed", gp->name);
 
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 /*
  * Utility function: read metadata & decode. Wants topology lock to be
  * held.
  */
 static int
 read_metadata(struct g_consumer *cp, struct g_virstor_metadata *md)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error;
 
 	g_topology_assert();
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	virstor_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /**
  * Utility function: encode & write metadata. Assumes topology lock is
  * held.
  *
  * There is no useful way of recovering from errors in this function,
  * not involving panicking the kernel. If the metadata cannot be written
  * the most we can do is notify the operator and hope he spots it and
  * replaces the broken drive.
  */
 static void
 write_metadata(struct g_consumer *cp, struct g_virstor_metadata *md)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error;
 
 	KASSERT(cp != NULL && md != NULL && cp->provider != NULL,
 	    ("Something's fishy in %s", __func__));
 	LOG_MSG(LVL_DEBUG, "Writing metadata on %s", cp->provider->name);
 	g_topology_assert();
 	error = g_access(cp, 0, 1, 0);
 	if (error != 0) {
 		LOG_MSG(LVL_ERROR, "g_access(0,1,0) failed for %s: %d",
 		    cp->provider->name, error);
 		return;
 	}
 	pp = cp->provider;
 
 	buf = malloc(pp->sectorsize, M_GVIRSTOR, M_WAITOK);
 	bzero(buf, pp->sectorsize);
 	virstor_metadata_encode(md, buf);
 	g_topology_unlock();
 	error = g_write_data(cp, pp->mediasize - pp->sectorsize, buf,
 	    pp->sectorsize);
 	g_topology_lock();
 	g_access(cp, 0, -1, 0);
 	free(buf, M_GVIRSTOR);
 
 	if (error != 0)
 		LOG_MSG(LVL_ERROR, "Error %d writing metadata to %s",
 		    error, cp->provider->name);
 }
 
 /*
  * Creates a new instance of this GEOM class, initialise softc
  */
 static struct g_geom *
 create_virstor_geom(struct g_class *mp, struct g_virstor_metadata *md)
 {
 	struct g_geom *gp;
 	struct g_virstor_softc *sc;
 
 	LOG_MSG(LVL_DEBUG, "Creating geom instance for %s (id=%u)",
 	    md->md_name, md->md_id);
 
 	if (md->md_count < 1 || md->md_chunk_size < 1 ||
 	    md->md_virsize < md->md_chunk_size) {
 		/* This is bogus configuration, and probably means data is
 		 * somehow corrupted. Panic, maybe? */
 		LOG_MSG(LVL_ERROR, "Nonsensical metadata information for %s",
 		    md->md_name);
 		return (NULL);
 	}
 
 	/* Check if it's already created */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->geom->name, md->md_name) == 0) {
 			LOG_MSG(LVL_WARNING, "Geom %s already exists",
 			    md->md_name);
 			if (sc->id != md->md_id) {
 				LOG_MSG(LVL_ERROR,
 				    "Some stale or invalid components "
 				    "exist for virstor device named %s. "
 				    "You will need to <CLEAR> all stale "
 				    "components and maybe reconfigure "
 				    "the virstor device. Tune "
 				    "kern.geom.virstor.debug sysctl up "
 				    "for more information.",
 				    sc->geom->name);
 			}
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	gp->softc = NULL; /* to circumevent races that test softc */
 
 	gp->start = g_virstor_start;
 	gp->spoiled = g_virstor_orphan;
 	gp->orphan = g_virstor_orphan;
 	gp->access = g_virstor_access;
 	gp->dumpconf = g_virstor_dumpconf;
 
 	sc = malloc(sizeof(*sc), M_GVIRSTOR, M_WAITOK | M_ZERO);
 	sc->id = md->md_id;
 	sc->n_components = md->md_count;
 	sc->components = malloc(sizeof(struct g_virstor_component) * md->md_count,
 	    M_GVIRSTOR, M_WAITOK | M_ZERO);
 	sc->chunk_size = md->md_chunk_size;
 	sc->virsize = md->md_virsize;
 	STAILQ_INIT(&sc->delayed_bio_q);
 	mtx_init(&sc->delayed_bio_q_mtx, "gvirstor_delayed_bio_q_mtx",
 	    "gvirstor", MTX_DEF | MTX_RECURSE);
 
 	sc->geom = gp;
 	sc->provider = NULL; /* virstor_check_and_run will create it */
 	gp->softc = sc;
 
 	LOG_MSG(LVL_ANNOUNCE, "Device %s created", sc->geom->name);
 
 	return (gp);
 }
 
 /*
  * Add provider to a GEOM class instance
  */
 static int
 add_provider_to_geom(struct g_virstor_softc *sc, struct g_provider *pp,
     struct g_virstor_metadata *md)
 {
 	struct g_virstor_component *component;
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	int error;
 
 	if (md->no >= sc->n_components)
 		return (EINVAL);
 
 	/* "Current" compontent */
 	component = &(sc->components[md->no]);
 	if (component->gcons != NULL)
 		return (EEXIST);
 
 	gp = sc->geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL) {
 		if (fcp->provider->sectorsize != pp->sectorsize) {
 			/* TODO: this can be made to work */
 			LOG_MSG(LVL_ERROR, "Provider %s of %s has invalid "
 			    "sector size (%d)", pp->name, sc->geom->name,
 			    pp->sectorsize);
 			return (EINVAL);
 		}
 		if (fcp->acr > 0 || fcp->acw || fcp->ace > 0) {
 			/* Replicate access permissions from first "live" consumer
 			 * to the new one */
 			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 			if (error != 0) {
 				g_detach(cp);
 				g_destroy_consumer(cp);
 				return (error);
 			}
 		}
 	}
 
 	/* Bring up a new component */
 	cp->private = component;
 	component->gcons = cp;
 	component->sc = sc;
 	component->index = md->no;
 	component->chunk_count = md->chunk_count;
 	component->chunk_next = md->chunk_next;
 	component->chunk_reserved = md->chunk_reserved;
 	component->flags = md->flags;
 
 	LOG_MSG(LVL_DEBUG, "%s attached to %s", pp->name, sc->geom->name);
 
 	virstor_check_and_run(sc);
 	return (0);
 }
 
 /*
  * Check if everything's ready to create the geom provider & device entry,
  * create and start provider.
  * Called ultimately by .taste, from g_event thread
  */
 static void
 virstor_check_and_run(struct g_virstor_softc *sc)
 {
 	off_t off;
 	size_t n, count;
 	int index;
 	int error;
 
 	if (virstor_valid_components(sc) != sc->n_components)
 		return;
 
 	if (virstor_valid_components(sc) == 0) {
 		/* This is actually a candidate for panic() */
 		LOG_MSG(LVL_ERROR, "No valid components for %s?",
 		    sc->provider->name);
 		return;
 	}
 
 	sc->sectorsize = sc->components[0].gcons->provider->sectorsize;
 
 	/* Initialise allocation map from the first consumer */
 	sc->chunk_count = sc->virsize / sc->chunk_size;
 	if (sc->chunk_count * (off_t)sc->chunk_size != sc->virsize) {
 		LOG_MSG(LVL_WARNING, "Device %s truncated to %ju bytes",
 		    sc->provider->name,
 		    sc->chunk_count * (off_t)sc->chunk_size);
 	}
 	sc->map_size = sc->chunk_count * sizeof *(sc->map);
 	/* The following allocation is in order of 4MB - 8MB */
 	sc->map = malloc(sc->map_size, M_GVIRSTOR, M_WAITOK);
 	KASSERT(sc->map != NULL, ("%s: Memory allocation error (%zu bytes) for %s",
 	    __func__, sc->map_size, sc->provider->name));
 	sc->map_sectors = sc->map_size / sc->sectorsize;
 
 	count = 0;
 	for (n = 0; n < sc->n_components; n++)
 		count += sc->components[n].chunk_count;
 	LOG_MSG(LVL_INFO, "Device %s has %zu physical chunks and %zu virtual "
 	    "(%zu KB chunks)",
 	    sc->geom->name, count, sc->chunk_count, sc->chunk_size / 1024);
 
 	error = g_access(sc->components[0].gcons, 1, 0, 0);
 	if (error != 0) {
 		LOG_MSG(LVL_ERROR, "Cannot acquire read access for %s to "
 		    "read allocation map for %s",
 		    sc->components[0].gcons->provider->name,
 		    sc->geom->name);
 		return;
 	}
 	/* Read in the allocation map */
 	LOG_MSG(LVL_DEBUG, "Reading map for %s from %s", sc->geom->name,
 	    sc->components[0].gcons->provider->name);
 	off = count = n = 0;
 	while (count < sc->map_size) {
 		struct g_virstor_map_entry *mapbuf;
 		size_t bs;
 
 		bs = MIN(MAXPHYS, sc->map_size - count);
 		if (bs % sc->sectorsize != 0) {
 			/* Check for alignment errors */
 			bs = rounddown(bs, sc->sectorsize);
 			if (bs == 0)
 				break;
 			LOG_MSG(LVL_ERROR, "Trouble: map is not sector-aligned "
 			    "for %s on %s", sc->geom->name,
 			    sc->components[0].gcons->provider->name);
 		}
 		mapbuf = g_read_data(sc->components[0].gcons, off, bs, &error);
 		if (mapbuf == NULL) {
 			free(sc->map, M_GVIRSTOR);
 			LOG_MSG(LVL_ERROR, "Error reading allocation map "
 			    "for %s from %s (offset %ju) (error %d)",
 			    sc->geom->name,
 			    sc->components[0].gcons->provider->name,
 			    off, error);
 			return;
 		}
 
 		bcopy(mapbuf, &sc->map[n], bs);
 		off += bs;
 		count += bs;
 		n += bs / sizeof *(sc->map);
 		g_free(mapbuf);
 	}
 	g_access(sc->components[0].gcons, -1, 0, 0);
 	LOG_MSG(LVL_DEBUG, "Read map for %s", sc->geom->name);
 
 	/* find first component with allocatable chunks */
 	index = -1;
 	for (n = 0; n < sc->n_components; n++) {
 		if (sc->components[n].chunk_next <
 		    sc->components[n].chunk_count) {
 			index = n;
 			break;
 		}
 	}
 	if (index == -1)
 		/* not found? set it to the last component and handle it
 		 * later */
 		index = sc->n_components - 1;
 
 	if (index >= sc->n_components - g_virstor_component_watermark - 1) {
 		LOG_MSG(LVL_WARNING, "Device %s running out of components "
 		    "(%d/%u: %s)", sc->geom->name,
 		    index+1,
 		    sc->n_components,
 		    sc->components[index].gcons->provider->name);
 	}
 	sc->curr_component = index;
 
 	if (sc->components[index].chunk_next >=
 	    sc->components[index].chunk_count - g_virstor_chunk_watermark) {
 		LOG_MSG(LVL_WARNING,
 		    "Component %s of %s is running out of free space "
 		    "(%u chunks left)",
 		    sc->components[index].gcons->provider->name,
 		    sc->geom->name, sc->components[index].chunk_count -
 		    sc->components[index].chunk_next);
 	}
 
 	sc->me_per_sector = sc->sectorsize / sizeof *(sc->map);
 	if (sc->sectorsize % sizeof *(sc->map) != 0) {
 		LOG_MSG(LVL_ERROR,
 		    "%s: Map entries don't fit exactly in a sector (%s)",
 		    __func__, sc->geom->name);
 		return;
 	}
 
 	/* Recalculate allocated chunks in components & at the same time
 	 * verify map data is sane. We could trust metadata on this, but
 	 * we want to make sure. */
 	for (n = 0; n < sc->n_components; n++)
 		sc->components[n].chunk_next = sc->components[n].chunk_reserved;
 
 	for (n = 0; n < sc->chunk_count; n++) {
 		if (sc->map[n].provider_no >= sc->n_components ||
 			sc->map[n].provider_chunk >=
 			sc->components[sc->map[n].provider_no].chunk_count) {
 			LOG_MSG(LVL_ERROR, "%s: Invalid entry %u in map for %s",
 			    __func__, (u_int)n, sc->geom->name);
 			LOG_MSG(LVL_ERROR, "%s: provider_no: %u, n_components: %u"
 			    " provider_chunk: %u, chunk_count: %u", __func__,
 			    sc->map[n].provider_no, sc->n_components,
 			    sc->map[n].provider_chunk,
 			    sc->components[sc->map[n].provider_no].chunk_count);
 			return;
 		}
 		if (sc->map[n].flags & VIRSTOR_MAP_ALLOCATED)
 			sc->components[sc->map[n].provider_no].chunk_next++;
 	}
 
 	sc->provider = g_new_providerf(sc->geom, "virstor/%s",
 	    sc->geom->name);
 
 	sc->provider->sectorsize = sc->sectorsize;
 	sc->provider->mediasize = sc->virsize;
 	g_error_provider(sc->provider, 0);
 
 	LOG_MSG(LVL_INFO, "%s activated", sc->provider->name);
 	LOG_MSG(LVL_DEBUG, "%s starting with current component %u, starting "
 	    "chunk %u", sc->provider->name, sc->curr_component,
 	    sc->components[sc->curr_component].chunk_next);
 }
 
 /*
  * Returns count of active providers in this geom instance
  */
 static u_int
 virstor_valid_components(struct g_virstor_softc *sc)
 {
 	unsigned int nc, i;
 
 	nc = 0;
 	KASSERT(sc != NULL, ("%s: softc is NULL", __func__));
 	KASSERT(sc->components != NULL, ("%s: sc->components is NULL", __func__));
 	for (i = 0; i < sc->n_components; i++)
 		if (sc->components[i].gcons != NULL)
 			nc++;
 	return (nc);
 }
 
 /*
  * Called when the consumer gets orphaned (?)
  */
 static void
 g_virstor_orphan(struct g_consumer *cp)
 {
 	struct g_virstor_softc *sc;
 	struct g_virstor_component *comp;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	comp = cp->private;
 	KASSERT(comp != NULL, ("%s: No component in private part of consumer",
 	    __func__));
 	remove_component(sc, comp, FALSE);
 	if (virstor_valid_components(sc) == 0)
 		virstor_geom_destroy(sc, TRUE, FALSE);
 }
 
 /*
  * Called to notify geom when it's been opened, and for what intent
  */
 static int
 g_virstor_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *c;
 	struct g_virstor_softc *sc;
 	struct g_geom *gp;
 	int error;
 
 	KASSERT(pp != NULL, ("%s: NULL provider", __func__));
 	gp = pp->geom;
 	KASSERT(gp != NULL, ("%s: NULL geom", __func__));
 	sc = gp->softc;
 
 	if (sc == NULL) {
 		/* It seems that .access can be called with negative dr,dw,dx
 		 * in this case but I want to check for myself */
 		LOG_MSG(LVL_WARNING, "access(%d, %d, %d) for %s",
 		    dr, dw, de, pp->name);
 		/* This should only happen when geom is withered so
 		 * allow only negative requests */
 		KASSERT(dr <= 0 && dw <= 0 && de <= 0,
 		    ("%s: Positive access for %s", __func__, pp->name));
 		if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0)
 			LOG_MSG(LVL_DEBUG, "Device %s definitely destroyed",
 			    pp->name);
 		return (0);
 	}
 
 	/* Grab an exclusive bit to propagate on our consumers on first open */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... drop it on close */
 	if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0) {
 		de--;
 		update_metadata(sc);	/* Writes statistical information */
 	}
 
 	error = ENXIO;
 	LIST_FOREACH(c, &gp->consumer, consumer) {
 		KASSERT(c != NULL, ("%s: consumer is NULL", __func__));
 		error = g_access(c, dr, dw, de);
 		if (error != 0) {
 			struct g_consumer *c2;
 
 			/* Backout earlier changes */
 			LIST_FOREACH(c2, &gp->consumer, consumer) {
 				if (c2 == c) /* all eariler components fixed */
 					return (error);
 				g_access(c2, -dr, -dw, -de);
 			}
 		}
 	}
 
 	return (error);
 }
 
 /*
  * Generate XML dump of current state
  */
 static void
 g_virstor_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_virstor_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 
 	if (sc == NULL || pp != NULL)
 		return;
 
 	if (cp != NULL) {
 		/* For each component */
 		struct g_virstor_component *comp;
 
 		comp = cp->private;
 		if (comp == NULL)
 			return;
 		sbuf_printf(sb, "%s<ComponentIndex>%u</ComponentIndex>\n",
 		    indent, comp->index);
 		sbuf_printf(sb, "%s<ChunkCount>%u</ChunkCount>\n",
 		    indent, comp->chunk_count);
 		sbuf_printf(sb, "%s<ChunksUsed>%u</ChunksUsed>\n",
 		    indent, comp->chunk_next);
 		sbuf_printf(sb, "%s<ChunksReserved>%u</ChunksReserved>\n",
 		    indent, comp->chunk_reserved);
 		sbuf_printf(sb, "%s<StorageFree>%u%%</StorageFree>\n",
 		    indent,
 		    comp->chunk_next > 0 ? 100 -
 		    ((comp->chunk_next + comp->chunk_reserved) * 100) /
 		    comp->chunk_count : 100);
 	} else {
 		/* For the whole thing */
 		u_int count, used, i;
 		off_t size;
 
 		count = used = size = 0;
 		for (i = 0; i < sc->n_components; i++) {
 			if (sc->components[i].gcons != NULL) {
 				count += sc->components[i].chunk_count;
 				used += sc->components[i].chunk_next +
 				    sc->components[i].chunk_reserved;
 				size += sc->components[i].gcons->
 				    provider->mediasize;
 			}
 		}
 
 		sbuf_printf(sb, "%s<Status>"
 		    "Components=%u, Online=%u</Status>\n", indent,
 		    sc->n_components, virstor_valid_components(sc));
 		sbuf_printf(sb, "%s<State>%u%% physical free</State>\n",
 		    indent, 100-(used * 100) / count);
 		sbuf_printf(sb, "%s<ChunkSize>%zu</ChunkSize>\n", indent,
 		    sc->chunk_size);
 		sbuf_printf(sb, "%s<PhysicalFree>%u%%</PhysicalFree>\n",
 		    indent, used > 0 ? 100 - (used * 100) / count : 100);
 		sbuf_printf(sb, "%s<ChunkPhysicalCount>%u</ChunkPhysicalCount>\n",
 		    indent, count);
 		sbuf_printf(sb, "%s<ChunkVirtualCount>%zu</ChunkVirtualCount>\n",
 		    indent, sc->chunk_count);
 		sbuf_printf(sb, "%s<PhysicalBacking>%zu%%</PhysicalBacking>\n",
 		    indent,
 		    (count * 100) / sc->chunk_count);
 		sbuf_printf(sb, "%s<PhysicalBackingSize>%jd</PhysicalBackingSize>\n",
 		    indent, size);
 		sbuf_printf(sb, "%s<VirtualSize>%jd</VirtualSize>\n", indent,
 		    sc->virsize);
 	}
 }
 
 /*
  * GEOM .done handler
  * Can't use standard handler because one requested IO may
  * fork into additional data IOs
  */
 static void
 g_virstor_done(struct bio *b)
 {
 	struct g_virstor_softc *sc;
 	struct bio *parent_b;
 
 	parent_b = b->bio_parent;
 	sc = parent_b->bio_to->geom->softc;
 
 	if (b->bio_error != 0) {
 		LOG_MSG(LVL_ERROR, "Error %d for offset=%ju, length=%ju, %s",
 		    b->bio_error, b->bio_offset, b->bio_length,
 		    b->bio_to->name);
 		if (parent_b->bio_error == 0)
 			parent_b->bio_error = b->bio_error;
 	}
 
 	parent_b->bio_inbed++;
 	parent_b->bio_completed += b->bio_completed;
 
 	if (parent_b->bio_children == parent_b->bio_inbed) {
 		parent_b->bio_completed = parent_b->bio_length;
 		g_io_deliver(parent_b, parent_b->bio_error);
 	}
 	g_destroy_bio(b);
 }
 
 /*
  * I/O starts here
  * Called in g_down thread
  */
 static void
 g_virstor_start(struct bio *b)
 {
 	struct g_virstor_softc *sc;
 	struct g_virstor_component *comp;
 	struct bio *cb;
 	struct g_provider *pp;
 	char *addr;
 	off_t offset, length;
 	struct bio_queue_head bq;
 	size_t chunk_size;	/* cached for convenience */
 	u_int count;
 
 	pp = b->bio_to;
 	sc = pp->geom->softc;
 	KASSERT(sc != NULL, ("%s: no softc (error=%d, device=%s)", __func__,
 	    b->bio_to->error, b->bio_to->name));
 
 	LOG_REQ(LVL_MOREDEBUG, b, "%s", __func__);
 
 	switch (b->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	default:
 		g_io_deliver(b, EOPNOTSUPP);
 		return;
 	}
 
 	LOG_MSG(LVL_DEBUG2, "BIO arrived, size=%ju", b->bio_length);
 	bioq_init(&bq);
 
 	chunk_size = sc->chunk_size;
 	addr = b->bio_data;
 	offset = b->bio_offset;	/* virtual offset and length */
 	length = b->bio_length;
 
 	while (length > 0) {
 		size_t chunk_index, in_chunk_offset, in_chunk_length;
 		struct virstor_map_entry *me;
 
 		chunk_index = offset / chunk_size; /* round downwards */
 		in_chunk_offset = offset % chunk_size;
 		in_chunk_length = min(length, chunk_size - in_chunk_offset);
 		LOG_MSG(LVL_DEBUG, "Mapped %s(%ju, %ju) to (%zu,%zu,%zu)",
 		    b->bio_cmd == BIO_READ ? "R" : "W",
 		    offset, length,
 		    chunk_index, in_chunk_offset, in_chunk_length);
 		me = &sc->map[chunk_index];
 
 		if (b->bio_cmd == BIO_READ || b->bio_cmd == BIO_DELETE) {
 			if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) {
 				/* Reads from unallocated chunks return zeroed
 				 * buffers */
 				if (b->bio_cmd == BIO_READ)
 					bzero(addr, in_chunk_length);
 			} else {
 				comp = &sc->components[me->provider_no];
 
 				cb = g_clone_bio(b);
 				if (cb == NULL) {
 					bioq_dismantle(&bq);
 					if (b->bio_error == 0)
 						b->bio_error = ENOMEM;
 					g_io_deliver(b, b->bio_error);
 					return;
 				}
 				cb->bio_to = comp->gcons->provider;
 				cb->bio_done = g_virstor_done;
 				cb->bio_offset =
 				    (off_t)me->provider_chunk * (off_t)chunk_size
 				    + in_chunk_offset;
 				cb->bio_length = in_chunk_length;
 				cb->bio_data = addr;
 				cb->bio_caller1 = comp;
 				bioq_disksort(&bq, cb);
 			}
 		} else { /* handle BIO_WRITE */
 			KASSERT(b->bio_cmd == BIO_WRITE,
 			    ("%s: Unknown command %d", __func__,
 			    b->bio_cmd));
 
 			if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) {
 				/* We have a virtual chunk, represented by
 				 * the "me" entry, but it's not yet allocated
 				 * (tied to) a physical chunk. So do it now. */
 				struct virstor_map_entry *data_me;
 				u_int phys_chunk, comp_no;
 				off_t s_offset;
 				int error;
 
 				error = allocate_chunk(sc, &comp, &comp_no,
 				    &phys_chunk);
 				if (error != 0) {
 					/* We cannot allocate a physical chunk
 					 * to satisfy this request, so we'll
 					 * delay it to when we can...
 					 * XXX: this will prevent the fs from
 					 * being umounted! */
 					struct g_virstor_bio_q *biq;
 					biq = malloc(sizeof *biq, M_GVIRSTOR,
 					    M_NOWAIT);
 					if (biq == NULL) {
 						bioq_dismantle(&bq);
 						if (b->bio_error == 0)
 							b->bio_error = ENOMEM;
 						g_io_deliver(b, b->bio_error);
 						return;
 					}
 					biq->bio = b;
 					mtx_lock(&sc->delayed_bio_q_mtx);
 					STAILQ_INSERT_TAIL(&sc->delayed_bio_q,
 					    biq, linkage);
 					mtx_unlock(&sc->delayed_bio_q_mtx);
 					LOG_MSG(LVL_WARNING, "Delaying BIO "
 					    "(size=%ju) until free physical "
 					    "space can be found on %s",
 					    b->bio_length,
 					    sc->provider->name);
 					return;
 				}
 				LOG_MSG(LVL_DEBUG, "Allocated chunk %u on %s "
 				    "for %s",
 				    phys_chunk,
 				    comp->gcons->provider->name,
 				    sc->provider->name);
 
 				me->provider_no = comp_no;
 				me->provider_chunk = phys_chunk;
 				me->flags |= VIRSTOR_MAP_ALLOCATED;
 
 				cb = g_clone_bio(b);
 				if (cb == NULL) {
 					me->flags &= ~VIRSTOR_MAP_ALLOCATED;
 					me->provider_no = 0;
 					me->provider_chunk = 0;
 					bioq_dismantle(&bq);
 					if (b->bio_error == 0)
 						b->bio_error = ENOMEM;
 					g_io_deliver(b, b->bio_error);
 					return;
 				}
 
 				/* The allocation table is stored continuously
 				 * at the start of the drive. We need to
 				 * calculate the offset of the sector that holds
 				 * this map entry both on the drive and in the
 				 * map array.
 				 * sc_offset will end up pointing to the drive
 				 * sector. */
 				s_offset = chunk_index * sizeof *me;
 				s_offset = rounddown(s_offset, sc->sectorsize);
 
 				/* data_me points to map entry sector
 				 * in memory (analogous to offset) */
 				data_me = &sc->map[rounddown(chunk_index,
 				    sc->me_per_sector)];
 
 				/* Commit sector with map entry to storage */
 				cb->bio_to = sc->components[0].gcons->provider;
 				cb->bio_done = g_virstor_done;
 				cb->bio_offset = s_offset;
 				cb->bio_data = (char *)data_me;
 				cb->bio_length = sc->sectorsize;
 				cb->bio_caller1 = &sc->components[0];
 				bioq_disksort(&bq, cb);
 			}
 
 			comp = &sc->components[me->provider_no];
 			cb = g_clone_bio(b);
 			if (cb == NULL) {
 				bioq_dismantle(&bq);
 				if (b->bio_error == 0)
 					b->bio_error = ENOMEM;
 				g_io_deliver(b, b->bio_error);
 				return;
 			}
 			/* Finally, handle the data */
 			cb->bio_to = comp->gcons->provider;
 			cb->bio_done = g_virstor_done;
 			cb->bio_offset = (off_t)me->provider_chunk*(off_t)chunk_size +
 			    in_chunk_offset;
 			cb->bio_length = in_chunk_length;
 			cb->bio_data = addr;
 			cb->bio_caller1 = comp;
 			bioq_disksort(&bq, cb);
 		}
 		addr += in_chunk_length;
 		length -= in_chunk_length;
 		offset += in_chunk_length;
 	}
 
 	/* Fire off bio's here */
 	count = 0;
 	for (cb = bioq_first(&bq); cb != NULL; cb = bioq_first(&bq)) {
 		bioq_remove(&bq, cb);
 		LOG_REQ(LVL_MOREDEBUG, cb, "Firing request");
 		comp = cb->bio_caller1;
 		cb->bio_caller1 = NULL;
 		LOG_MSG(LVL_DEBUG, " firing bio, offset=%ju, length=%ju",
 		    cb->bio_offset, cb->bio_length);
 		g_io_request(cb, comp->gcons);
 		count++;
 	}
 	if (count == 0) { /* We handled everything locally */
 		b->bio_completed = b->bio_length;
 		g_io_deliver(b, 0);
 	}
 
 }
 
 /*
  * Allocate a chunk from a physical provider. Returns physical component,
  * chunk index relative to the component and the component's index.
  */
 static int
 allocate_chunk(struct g_virstor_softc *sc, struct g_virstor_component **comp,
     u_int *comp_no_p, u_int *chunk)
 {
 	u_int comp_no;
 
 	KASSERT(sc->curr_component < sc->n_components,
 	    ("%s: Invalid curr_component: %u",  __func__, sc->curr_component));
 
 	comp_no = sc->curr_component;
 	*comp = &sc->components[comp_no];
 	dump_component(*comp);
 	if ((*comp)->chunk_next >= (*comp)->chunk_count) {
 		/* This component is full. Allocate next component */
 		if (comp_no >= sc->n_components-1) {
 			LOG_MSG(LVL_ERROR, "All physical space allocated for %s",
 			    sc->geom->name);
 			return (-1);
 		}
 		(*comp)->flags &= ~VIRSTOR_PROVIDER_CURRENT;
 		sc->curr_component = ++comp_no;
 
 		*comp = &sc->components[comp_no];
 		if (comp_no >= sc->n_components - g_virstor_component_watermark-1)
 			LOG_MSG(LVL_WARNING, "Device %s running out of components "
 			    "(switching to %u/%u: %s)", sc->geom->name,
 			    comp_no+1, sc->n_components,
 			    (*comp)->gcons->provider->name);
 		/* Take care not to overwrite reserved chunks */
 		if ( (*comp)->chunk_reserved > 0 &&
 		    (*comp)->chunk_next < (*comp)->chunk_reserved)
 			(*comp)->chunk_next = (*comp)->chunk_reserved;
 
 		(*comp)->flags |=
 		    VIRSTOR_PROVIDER_ALLOCATED | VIRSTOR_PROVIDER_CURRENT;
 		dump_component(*comp);
 		*comp_no_p = comp_no;
 		*chunk = (*comp)->chunk_next++;
 	} else {
 		*comp_no_p = comp_no;
 		*chunk = (*comp)->chunk_next++;
 	}
 	return (0);
 }
 
 /* Dump a component */
 static void
 dump_component(struct g_virstor_component *comp)
 {
 
 	if (g_virstor_debug < LVL_DEBUG2)
 		return;
 	printf("Component %d: %s\n", comp->index, comp->gcons->provider->name);
 	printf("  chunk_count: %u\n", comp->chunk_count);
 	printf("   chunk_next: %u\n", comp->chunk_next);
 	printf("        flags: %u\n", comp->flags);
 }
 
 #if 0
 /* Dump a map entry */
 static void
 dump_me(struct virstor_map_entry *me, unsigned int nr)
 {
 	if (g_virstor_debug < LVL_DEBUG)
 		return;
 	printf("VIRT. CHUNK #%d: ", nr);
 	if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0)
 		printf("(unallocated)\n");
 	else
 		printf("allocated at provider %u, provider_chunk %u\n",
 		    me->provider_no, me->provider_chunk);
 }
 #endif
 
 /*
  * Dismantle bio_queue and destroy its components
  */
 static void
 bioq_dismantle(struct bio_queue_head *bq)
 {
 	struct bio *b;
 
 	for (b = bioq_first(bq); b != NULL; b = bioq_first(bq)) {
 		bioq_remove(bq, b);
 		g_destroy_bio(b);
 	}
 }
 
 /*
  * The function that shouldn't be called.
  * When this is called, the stack is already garbled because of
  * argument mismatch. There's nothing to do now but panic, which is
  * accidentally the whole purpose of this function.
  * Motivation: to guard from accidentally calling geom methods when
  * they shouldn't be called. (see g_..._taste)
  */
 static void
 invalid_call(void)
 {
 	panic("invalid_call() has just been called. Something's fishy here.");
 }
 
 DECLARE_GEOM_CLASS(g_virstor_class, g_virstor); /* Let there be light */
+MODULE_VERSION(geom_virstor, 0);
Index: stable/11/sys/geom/zero/g_zero.c
===================================================================
--- stable/11/sys/geom/zero/g_zero.c	(revision 332639)
+++ stable/11/sys/geom/zero/g_zero.c	(revision 332640)
@@ -1,143 +1,144 @@
 /*-
  * Copyright (c) 2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
 
 
 #define	G_ZERO_CLASS_NAME	"ZERO"
 
 static int	g_zero_clear_sysctl(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, zero, CTLFLAG_RW, 0,
     "GEOM_ZERO stuff");
 static int g_zero_clear = 1;
 SYSCTL_PROC(_kern_geom_zero, OID_AUTO, clear, CTLTYPE_INT|CTLFLAG_RW,
     &g_zero_clear, 0, g_zero_clear_sysctl, "I", "Clear read data buffer");
 static int g_zero_byte = 0;
 SYSCTL_INT(_kern_geom_zero, OID_AUTO, byte, CTLFLAG_RW, &g_zero_byte, 0,
     "Byte (octet) value to clear the buffers with");
 
 static struct g_provider *gpp;
 
 static int
 g_zero_clear_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	error = sysctl_handle_int(oidp, &g_zero_clear, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (gpp == NULL)
 		return (ENXIO);
 	if (g_zero_clear)
 		gpp->flags &= ~G_PF_ACCEPT_UNMAPPED;
 	else
 		gpp->flags |= G_PF_ACCEPT_UNMAPPED;
 	return (0);
 }
 
 static void
 g_zero_start(struct bio *bp)
 {
 	int error = ENXIO;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		if (g_zero_clear && (bp->bio_flags & BIO_UNMAPPED) == 0)
 			memset(bp->bio_data, g_zero_byte, bp->bio_length);
 		/* FALLTHROUGH */
 	case BIO_DELETE:
 	case BIO_WRITE:
 		bp->bio_completed = bp->bio_length;
 		error = 0;
 		break;
 	case BIO_GETATTR:
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	g_io_deliver(bp, error);
 }
 
 static void
 g_zero_init(struct g_class *mp)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 	gp = g_new_geomf(mp, "gzero");
 	gp->start = g_zero_start;
 	gp->access = g_std_access;
 	gpp = pp = g_new_providerf(gp, "%s", gp->name);
 	pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
 	if (!g_zero_clear)
 		pp->flags |= G_PF_ACCEPT_UNMAPPED;
 	pp->mediasize = 1152921504606846976LLU;
 	pp->sectorsize = 512;
 	g_error_provider(pp, 0);
 }
 
 static int
 g_zero_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
     struct g_geom *gp)
 {
 	struct g_provider *pp;
 
 	g_topology_assert();
 	if (gp == NULL)
 		return (0);
 	pp = LIST_FIRST(&gp->provider);
 	if (pp == NULL)
 		return (0);
 	if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
 		return (EBUSY);
 	gpp = NULL;
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static struct g_class g_zero_class = {
 	.name = G_ZERO_CLASS_NAME,
 	.version = G_VERSION,
 	.init = g_zero_init,
 	.destroy_geom = g_zero_destroy_geom
 };
 
 DECLARE_GEOM_CLASS(g_zero_class, g_zero);
+MODULE_VERSION(geom_zero, 0);
Index: stable/11
===================================================================
--- stable/11	(revision 332639)
+++ stable/11	(revision 332640)

Property changes on: stable/11
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r332387