Index: head/sys/geom/eli/g_eli.c
===================================================================
--- head/sys/geom/eli/g_eli.c	(revision 152966)
+++ head/sys/geom/eli/g_eli.c	(revision 152967)
@@ -1,1095 +1,1095 @@
 /*-
  * Copyright (c) 2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/kthread.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/uio.h>
 
 #include <vm/uma.h>
 
 #include <geom/geom.h>
 #include <geom/eli/g_eli.h>
 #include <geom/eli/pkcs5v2.h>
 
 
 MALLOC_DEFINE(M_ELI, "eli data", "GEOM_ELI Data");
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW, 0, "GEOM_ELI stuff");
 u_int g_eli_debug = 0;
 TUNABLE_INT("kern.geom.eli.debug", &g_eli_debug);
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RW, &g_eli_debug, 0,
     "Debug level");
 static u_int g_eli_tries = 3;
 TUNABLE_INT("kern.geom.eli.tries", &g_eli_tries);
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RW, &g_eli_tries, 0,
     "Number of tries when asking for passphrase");
 static u_int g_eli_visible_passphrase = 0;
 TUNABLE_INT("kern.geom.eli.visible_passphrase", &g_eli_visible_passphrase);
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RW,
     &g_eli_visible_passphrase, 0,
     "Turn on echo when entering passphrase (debug purposes only!!)");
 u_int g_eli_overwrites = 5;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RW, &g_eli_overwrites,
     0, "Number of overwrites on-disk keys when destroying");
 static u_int g_eli_threads = 0;
 TUNABLE_INT("kern.geom.eli.threads", &g_eli_threads);
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RW, &g_eli_threads, 0,
     "Number of threads doing crypto work");
 
 static int g_eli_do_taste = 0;
 
 static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static void g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp);
 
 static g_taste_t g_eli_taste;
 static g_dumpconf_t g_eli_dumpconf;
 
 struct g_class g_eli_class = {
 	.name = G_ELI_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_eli_config,
 	.taste = g_eli_taste,
 	.destroy_geom = g_eli_destroy_geom
 };
 
 
 /*
  * Code paths:
  * BIO_READ:
  *	g_eli_start -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  * BIO_WRITE:
  *	g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 
 
 /*
  * EAGAIN from crypto(9) means, that we were probably balanced to another crypto
  * accelerator or something like this.
  * The function updates the SID and rerun the operation.
  */
 static int
 g_eli_crypto_rerun(struct cryptop *crp)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct bio *bp;
 	int error;
 
 	bp = (struct bio *)crp->crp_opaque;
 	sc = bp->bio_to->geom->softc;
 	LIST_FOREACH(wr, &sc->sc_workers, w_next) {
 		if (wr->w_number == bp->bio_pflags)
 			break;
 	}
 	KASSERT(wr != NULL, ("Invalid worker (%u).", bp->bio_pflags));
 	G_ELI_DEBUG(1, "Reruning crypto %s request (sid: %ju -> %ju).",
 	    bp->bio_cmd == BIO_READ ? "READ" : "WRITE", (uintmax_t)wr->w_sid,
 	    (uintmax_t)crp->crp_sid);
 	wr->w_sid = crp->crp_sid;
 	crp->crp_etype = 0;
 	error = crypto_dispatch(crp);
 	if (error == 0)
 		return (0);
 	G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error);
 	crp->crp_etype = error;
 	return (error);
 }
 
 /*
  * The function is called afer reading encrypted data from the provider.
  *
  * g_eli_start -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  */
 static void
 g_eli_read_done(struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct bio *pbp;
 
 	G_ELI_LOGREQ(2, bp, "Request done.");
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	g_destroy_bio(bp);
 	if (pbp->bio_error != 0) {
 		G_ELI_LOGREQ(0, pbp, "%s() failed", __func__);
 		pbp->bio_completed = 0;
 		g_io_deliver(pbp, pbp->bio_error);
 		return;
 	}
 	sc = pbp->bio_to->geom->softc;
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, pbp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 }
 
 /*
  * The function is called after we read and decrypt data.
  *
  * g_eli_start -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> G_ELI_CRYPTO_READ_DONE -> g_io_deliver
  */
 static int
 g_eli_crypto_read_done(struct cryptop *crp)
 {
 	struct bio *bp;
 
 	if (crp->crp_etype == EAGAIN) {
 		if (g_eli_crypto_rerun(crp) == 0)
 			return (0);
 	}
 	bp = (struct bio *)crp->crp_opaque;
 	bp->bio_inbed++;
 	if (crp->crp_etype == 0) {
 		G_ELI_DEBUG(3, "Crypto READ request done (%d/%d).",
 		    bp->bio_inbed, bp->bio_children);
 		bp->bio_completed += crp->crp_olen;
 	} else {
 		G_ELI_DEBUG(1, "Crypto READ request failed (%d/%d) error=%d.",
 		    bp->bio_inbed, bp->bio_children, crp->crp_etype);
 		if (bp->bio_error == 0)
 			bp->bio_error = crp->crp_etype;
 	}
 	/*
 	 * Do we have all sectors already?
 	 */
 	if (bp->bio_inbed < bp->bio_children)
 		return (0);
 	free(bp->bio_driver2, M_ELI);
 	bp->bio_driver2 = NULL;
 	if (bp->bio_error != 0) {
 		G_ELI_LOGREQ(0, bp, "Crypto READ request failed (error=%d).",
 		    bp->bio_error);
 		bp->bio_completed = 0;
 	}
 	/*
 	 * Read is finished, send it up.
 	 */
 	g_io_deliver(bp, bp->bio_error);
 	return (0);
 }
 
 /*
  * The function is called after we encrypt and write data.
  *
  * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver
  */
 static void
 g_eli_write_done(struct bio *bp)
 {
 	struct bio *pbp;
 
 	G_ELI_LOGREQ(2, bp, "Request done.");
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0)
 		pbp->bio_error = bp->bio_error;
 	free(pbp->bio_driver2, M_ELI);
 	pbp->bio_driver2 = NULL;
 	if (pbp->bio_error == 0)
 		pbp->bio_completed = pbp->bio_length;
 	else {
 		G_ELI_LOGREQ(0, pbp, "Crypto WRITE request failed (error=%d).",
 		    pbp->bio_error);
 		pbp->bio_completed = 0;
 	}
 	g_destroy_bio(bp);
 	/*
 	 * Write is finished, send it up.
 	 */
 	g_io_deliver(pbp, pbp->bio_error);
 }
 
 /*
  * The function is called after data encryption.
  *
  * g_eli_start -> g_eli_crypto_run -> G_ELI_CRYPTO_WRITE_DONE -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 static int
 g_eli_crypto_write_done(struct cryptop *crp)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct bio *bp, *cbp;
 
 	if (crp->crp_etype == EAGAIN) {
 		if (g_eli_crypto_rerun(crp) == 0)
 			return (0);
 	}
 	bp = (struct bio *)crp->crp_opaque;
 	bp->bio_inbed++;
 	if (crp->crp_etype == 0) {
 		G_ELI_DEBUG(3, "Crypto WRITE request done (%d/%d).",
 		    bp->bio_inbed, bp->bio_children);
 	} else {
 		G_ELI_DEBUG(1, "Crypto WRITE request failed (%d/%d) error=%d.",
 		    bp->bio_inbed, bp->bio_children, crp->crp_etype);
 		if (bp->bio_error == 0)
 			bp->bio_error = crp->crp_etype;
 	}
 	/*
 	 * All sectors are already encrypted?
 	 */
 	if (bp->bio_inbed < bp->bio_children)
 		return (0);
 	bp->bio_inbed = 0;
 	bp->bio_children = 1;
 	cbp = bp->bio_driver1;
 	bp->bio_driver1 = NULL;
 	if (bp->bio_error != 0) {
 		G_ELI_LOGREQ(0, bp, "Crypto WRITE request failed (error=%d).",
 		    bp->bio_error);
 		free(bp->bio_driver2, M_ELI);
 		bp->bio_driver2 = NULL;
 		g_destroy_bio(cbp);
 		g_io_deliver(bp, bp->bio_error);
 		return (0);
 	}
 	cbp->bio_data = bp->bio_driver2;
 	cbp->bio_done = g_eli_write_done;
 	gp = bp->bio_to->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	cbp->bio_to = cp->provider;
 	G_ELI_LOGREQ(2, cbp, "Sending request.");
 	/*
 	 * Send encrypted data to the provider.
 	 */
 	g_io_request(cbp, cp);
 	return (0);
 }
 
 /*
  * This function should never be called, but GEOM made as it set ->orphan()
  * method for every geom.
  */
 static void
 g_eli_orphan_spoil_assert(struct g_consumer *cp)
 {
 
 	panic("Function %s() called for %s.", __func__, cp->geom->name);
 }
 
 static void
 g_eli_orphan(struct g_consumer *cp)
 {
 	struct g_eli_softc *sc;
 
 	g_topology_assert();
 	sc = cp->geom->softc;
 	if (sc == NULL)
 		return;
 	g_eli_destroy(sc, 1);
 }
 
 /*
  * BIO_READ : G_ELI_START -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  * BIO_WRITE: G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 static void     
 g_eli_start(struct bio *bp)
 {       
 	struct g_eli_softc *sc;
 	struct bio *cbp;
 
 	sc = bp->bio_to->geom->softc;
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_ELI_LOGREQ(2, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 		break;
 	case BIO_DELETE:
 		/*
 		 * We could eventually support BIO_DELETE request.
 		 * It could be done by overwritting requested sector with
 		 * random data g_eli_overwrites number of times.
 		 */
 	case BIO_GETATTR:
 	default:	
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	if (bp->bio_cmd == BIO_READ) {
 		struct g_consumer *cp;
 
 		cbp->bio_done = g_eli_read_done;
 		cp = LIST_FIRST(&sc->sc_geom->consumer);
 		cbp->bio_to = cp->provider;
 		G_ELI_LOGREQ(2, bp, "Sending request.");
 		/*
 		 * Read encrypted data from provider.
 		 */
 		g_io_request(cbp, cp);
 	} else /* if (bp->bio_cmd == BIO_WRITE) */ {
 		bp->bio_driver1 = cbp;
 		mtx_lock(&sc->sc_queue_mtx);
 		bioq_insert_tail(&sc->sc_queue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 		wakeup(sc);
 	}
 }
 
 /*
  * This is the main function for kernel worker thread when we don't have
  * hardware acceleration and we have to do cryptography in software.
  * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM
  * threads with crypto work.
  */
 static void
 g_eli_worker(void *arg)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct bio *bp;
 
 	wr = arg;
 	sc = wr->w_softc;
 	mtx_lock_spin(&sched_lock);
 	sched_prio(curthread, PRIBIO);
 	if (sc->sc_crypto == G_ELI_CRYPTO_SW && g_eli_threads == 0)
 		sched_bind(curthread, wr->w_number);
 	mtx_unlock_spin(&sched_lock);
  
 	G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm);
 
 	for (;;) {
 		mtx_lock(&sc->sc_queue_mtx);
 		bp = bioq_takefirst(&sc->sc_queue);
 		if (bp == NULL) {
 			if ((sc->sc_flags & G_ELI_FLAG_DESTROY) != 0) {
 				LIST_REMOVE(wr, w_next);
 				crypto_freesession(wr->w_sid);
 				free(wr, M_ELI);
 				G_ELI_DEBUG(1, "Thread %s exiting.",
 				    curthread->td_proc->p_comm);
 				wakeup(&sc->sc_workers);
 				mtx_unlock(&sc->sc_queue_mtx);
 				kthread_exit(0);
 			}
 			msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
 			    "geli:w", 0);
 			continue;
 		}
 		mtx_unlock(&sc->sc_queue_mtx);
 		g_eli_crypto_run(wr, bp);
 	}
 }
 
 /*
  * Here we generate IV. It is unique for every sector.
  */
 static void
 g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv,
     size_t size)
 {
 	u_char hash[SHA256_DIGEST_LENGTH];
 	SHA256_CTX ctx;
 
 	/* Copy precalculated SHA256 context for IV-Key. */
 	bcopy(&sc->sc_ivctx, &ctx, sizeof(ctx));
 	SHA256_Update(&ctx, (uint8_t *)&offset, sizeof(offset));
 	SHA256_Final(hash, &ctx);
 	bcopy(hash, iv, size);
 }
 
 /*
  * This is the main function responsible for cryptography (ie. communication
  * with crypto(9) subsystem).
  */
 static void
 g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct cryptop *crp;
 	struct cryptodesc *crd;
 	struct uio *uio;
 	struct iovec *iov;
 	u_int i, nsec, add, secsize;
 	int err, error, flags;
 	size_t size;
 	u_char *p, *data;
 
 	G_ELI_LOGREQ(3, bp, "%s", __func__);
 
 	bp->bio_pflags = wr->w_number;
 	sc = wr->w_softc;
 	secsize = LIST_FIRST(&sc->sc_geom->provider)->sectorsize;
 	nsec = bp->bio_length / secsize;
 
 	/*
 	 * Calculate how much memory do we need.
 	 * We need separate crypto operation for every single sector.
 	 * It is much faster to calculate total amount of needed memory here and
 	 * do the allocation once insteaf of allocate memory in pieces (many,
 	 * many pieces).
 	 */
 	size = sizeof(*crp) * nsec;
 	size += sizeof(*crd) * nsec;
 	size += sizeof(*uio) * nsec;
 	size += sizeof(*iov) * nsec;
 	/*
 	 * If we write the data we cannot destroy current bio_data content,
 	 * so we need to allocate more memory for encrypted data.
 	 */
 	if (bp->bio_cmd == BIO_WRITE)
 		size += bp->bio_length;
 	p = malloc(size, M_ELI, M_WAITOK);
 
 	bp->bio_inbed = 0;
 	bp->bio_children = nsec;
 	bp->bio_driver2 = p;
 
 	if (bp->bio_cmd == BIO_READ)
 		data = bp->bio_data;
 	else {
 		data = p;
 		p += bp->bio_length;
 		bcopy(bp->bio_data, data, bp->bio_length);
 	}
 
 	error = 0;
 	for (i = 0, add = 0; i < nsec; i++, add += secsize) {
 		crp = (struct cryptop *)p;	p += sizeof(*crp);
 		crd = (struct cryptodesc *)p;	p += sizeof(*crd);
 		uio = (struct uio *)p;		p += sizeof(*uio);
 		iov = (struct iovec *)p;	p += sizeof(*iov);
 
 		iov->iov_len = secsize;
 		iov->iov_base = data;
 		data += secsize;
 
 		uio->uio_iov = iov;
 		uio->uio_iovcnt = 1;
 		uio->uio_segflg = UIO_SYSSPACE;
 		uio->uio_resid = secsize;
 
 		crp->crp_sid = wr->w_sid;
 		crp->crp_ilen = secsize;
 		crp->crp_olen = secsize;
 		crp->crp_opaque = (void *)bp;
 		crp->crp_buf = (void *)uio;
 		if (bp->bio_cmd == BIO_WRITE)
 			crp->crp_callback = g_eli_crypto_write_done;
 		else /* if (bp->bio_cmd == BIO_READ) */
 			crp->crp_callback = g_eli_crypto_read_done;
 		crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIFSYNC | CRYPTO_F_REL;
 		crp->crp_desc = crd;
 
 		crd->crd_skip = 0;
 		crd->crd_len = secsize;
 		crd->crd_flags = flags;
 		crd->crd_flags =
 		    CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT | CRD_F_KEY_EXPLICIT;
 		if (bp->bio_cmd == BIO_WRITE)
 			crd->crd_flags |= CRD_F_ENCRYPT;
 		crd->crd_alg = sc->sc_algo;
 		crd->crd_key = sc->sc_datakey;
 		crd->crd_klen = sc->sc_keylen;
 		g_eli_crypto_ivgen(sc, bp->bio_offset + add, crd->crd_iv,
 		    sizeof(crd->crd_iv));
 		crd->crd_next = NULL;
 
 		crp->crp_etype = 0;
 		err = crypto_dispatch(crp);
 		if (error == 0)
 			error = err;
 	}
 	if (bp->bio_error == 0)
 		bp->bio_error = error;
 }
 
 int
 g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
     struct g_eli_metadata *md)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	u_char *buf = NULL;
 	int error;
 
 	g_topology_assert();
 
 	gp = g_new_geomf(mp, "eli:taste");
 	gp->start = g_eli_start;
 	gp->access = g_std_access;
 	/*
 	 * g_eli_read_metadata() is always called from the event thread.
 	 * Our geom is created and destroyed in the same event, so there
 	 * could be no orphan nor spoil event in the meantime.
 	 */
 	gp->orphan = g_eli_orphan_spoil_assert;
 	gp->spoiled = g_eli_orphan_spoil_assert;
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	if (error != 0)
 		goto end;
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		goto end;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
-	if (error != 0)
+	if (buf == NULL)
 		goto end;
 	eli_metadata_decode(buf, md);
 end:
 	if (buf != NULL)
 		g_free(buf);
 	if (cp->provider != NULL) {
 		if (cp->acr == 1)
 			g_access(cp, -1, 0, 0);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	return (error);
 }
 
 /*
  * The function is called when we had last close on provider and user requested
  * to close it when this situation occur.
  */
 static void
 g_eli_last_close(struct g_eli_softc *sc)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	char ppname[64];
 	int error;
 
 	g_topology_assert();
 	gp = sc->sc_geom;
 	pp = LIST_FIRST(&gp->provider);
 	strlcpy(ppname, pp->name, sizeof(ppname));
 	error = g_eli_destroy(sc, 1);
 	KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).",
 	    ppname, error));
 	G_ELI_DEBUG(0, "Detached %s on last close.", ppname);
 }
 
 int
 g_eli_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_eli_softc *sc;
 	struct g_geom *gp;
 
 	gp = pp->geom;
 	sc = gp->softc;
 
 	if (dw > 0) {
 		/* Someone is opening us for write, we need to remember that. */
 		sc->sc_flags |= G_ELI_FLAG_WOPEN;
 		return (0);
 	}
 	/* Is this the last close? */
 	if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0)
 		return (0);
 
 	/*
 	 * Automatically detach on last close if requested.
 	 */
 	if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) ||
 	    (sc->sc_flags & G_ELI_FLAG_WOPEN)) {
 		g_eli_last_close(sc);
 	}
 	return (0);
 }
 
 struct g_geom *
 g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
     const struct g_eli_metadata *md, const u_char *mkey, int nkey)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	struct cryptoini cri;
 	u_int i, threads;
 	int error;
 
 	G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX);
 
 	gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX);
 	gp->softc = NULL;	/* for a moment */
 
 	sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO);
 	gp->start = g_eli_start;
 	/*
 	 * Spoiling cannot happen actually, because we keep provider open for
 	 * writing all the time.
 	 */
 	gp->spoiled = g_eli_orphan_spoil_assert;
 	gp->orphan = g_eli_orphan;
 	/*
 	 * If detach-on-last-close feature is not enabled, we can simply use
 	 * g_std_access().
 	 */
 	if (md->md_flags & G_ELI_FLAG_WO_DETACH)
 		gp->access = g_eli_access;
 	else
 		gp->access = g_std_access;
 	gp->dumpconf = g_eli_dumpconf;
 
 	sc->sc_crypto = G_ELI_CRYPTO_SW;
 	sc->sc_flags = md->md_flags;
 	sc->sc_algo = md->md_algo;
 	sc->sc_nkey = nkey;
 	/*
 	 * Remember the keys in our softc structure.
 	 */
 	bcopy(mkey, sc->sc_ivkey, sizeof(sc->sc_ivkey));
 	mkey += sizeof(sc->sc_ivkey);
 	bcopy(mkey, sc->sc_datakey, sizeof(sc->sc_datakey));
 	sc->sc_keylen = md->md_keylen;
 
 	/*
 	 * Precalculate SHA256 for IV generation.
 	 * This is expensive operation and we can do it only once now or for
 	 * every access to sector, so now will be much better.
 	 */
 	SHA256_Init(&sc->sc_ivctx);
 	SHA256_Update(&sc->sc_ivctx, sc->sc_ivkey, sizeof(sc->sc_ivkey));
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 
 	bioq_init(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF);
 
 	pp = NULL;
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, bpp);
 	if (error != 0) {
 		if (req != NULL) {
 			gctl_error(req, "Cannot attach to %s (error=%d).",
 			    bpp->name, error);
 		} else {
 			G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).",
 			    bpp->name, error);
 		}
 		goto failed;
 	}
 	/*
 	 * Keep provider open all the time, so we can run critical tasks,
 	 * like Master Keys deletion, without wondering if we can open
 	 * provider or not.
 	 */
 	error = g_access(cp, 1, 1, 1);
 	if (error != 0) {
 		if (req != NULL) {
 			gctl_error(req, "Cannot access %s (error=%d).",
 			    bpp->name, error);
 		} else {
 			G_ELI_DEBUG(1, "Cannot access %s (error=%d).",
 			    bpp->name, error);
 		}
 		goto failed;
 	}
 
 	LIST_INIT(&sc->sc_workers);
 
 	bzero(&cri, sizeof(cri));
 	cri.cri_alg = sc->sc_algo;
 	cri.cri_klen = sc->sc_keylen;
 	cri.cri_key = sc->sc_datakey;
 
 	threads = g_eli_threads;
 	if (threads == 0)
 		threads = mp_ncpus;
 	else if (threads > mp_ncpus) {
 		/* There is really no need for too many worker threads. */
 		threads = mp_ncpus;
 		G_ELI_DEBUG(0, "Reducing number of threads to %u.", threads);
 	}
 	for (i = 0; i < threads; i++) {
 		wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO);
 		wr->w_softc = sc;
 		wr->w_number = i;
 
 		/*
 		 * If this is the first pass, try to get hardware support.
 		 * Use software cryptography, if we cannot get it.
 		 */
 		if (i == 0) {
 			error = crypto_newsession(&wr->w_sid, &cri, 1);
 			if (error == 0)
 				sc->sc_crypto = G_ELI_CRYPTO_HW;
 		}
 		if (sc->sc_crypto == G_ELI_CRYPTO_SW)
 			error = crypto_newsession(&wr->w_sid, &cri, 0);
 		if (error != 0) {
 			free(wr, M_ELI);
 			if (req != NULL) {
 				gctl_error(req, "Cannot setup crypto session "
 				    "for %s (error=%d).", bpp->name, error);
 			} else {
 				G_ELI_DEBUG(1, "Cannot setup crypto session "
 				    "for %s (error=%d).", bpp->name, error);
 			}
 			goto failed;
 		}
 
 		error = kthread_create(g_eli_worker, wr, &wr->w_proc, 0, 0,
 		    "g_eli[%u] %s", i, bpp->name);
 		if (error != 0) {
 			crypto_freesession(wr->w_sid);
 			free(wr, M_ELI);
 			if (req != NULL) {
 				gctl_error(req, "Cannot create kernel thread "
 				    "for %s (error=%d).", bpp->name, error);
 			} else {
 				G_ELI_DEBUG(1, "Cannot create kernel thread "
 				    "for %s (error=%d).", bpp->name, error);
 			}
 			goto failed;
 		}
 		LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next);
 		/* If we have hardware support, one thread is enough. */
 		if (sc->sc_crypto == G_ELI_CRYPTO_HW)
 			break;
 	}
 
 	/*
 	 * Create decrypted provider.
 	 */
 	pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX);
 	pp->sectorsize = md->md_sectorsize;
 	pp->mediasize = bpp->mediasize;
 	if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0)
 		pp->mediasize -= bpp->sectorsize;
 	pp->mediasize -= (pp->mediasize % pp->sectorsize);
 	g_error_provider(pp, 0);
 
 	G_ELI_DEBUG(0, "Device %s created.", pp->name);
 	G_ELI_DEBUG(0, "    Cipher: %s", g_eli_algo2str(sc->sc_algo));
 	G_ELI_DEBUG(0, "Key length: %u", sc->sc_keylen);
 	G_ELI_DEBUG(0, "    Crypto: %s",
 	    sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware");
 	return (gp);
 failed:
 	mtx_lock(&sc->sc_queue_mtx);
 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
 	wakeup(sc);
 	/*
 	 * Wait for kernel threads self destruction.
 	 */
 	while (!LIST_EMPTY(&sc->sc_workers)) {
 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
 		    "geli:destroy", 0);
 	}
 	mtx_destroy(&sc->sc_queue_mtx);
 	if (cp->provider != NULL) {
 		if (cp->acr == 1)
 			g_access(cp, -1, -1, -1);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	if (pp != NULL)
 		g_destroy_provider(pp);
 	g_destroy_geom(gp);
 	bzero(sc, sizeof(*sc));
 	free(sc, M_ELI);
 	return (NULL);
 }
 
 int
 g_eli_destroy(struct g_eli_softc *sc, boolean_t force)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	gp = sc->sc_geom;
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_ELI_DEBUG(1, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_ELI_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	mtx_lock(&sc->sc_queue_mtx);
 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
 	wakeup(sc);
 	while (!LIST_EMPTY(&sc->sc_workers)) {
 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
 		    "geli:destroy", 0);
 	}
 	mtx_destroy(&sc->sc_queue_mtx);
 	gp->softc = NULL;
 	bzero(sc, sizeof(*sc));
 	free(sc, M_ELI);
 
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		G_ELI_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom_close(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_eli_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_eli_softc *sc;
 
 	sc = gp->softc;
 	return (g_eli_destroy(sc, 0));
 }
 
 /*
  * Tasting is only made on boot.
  * We detect providers which should be attached before root is mounted.
  */
 static struct g_geom *
 g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_eli_metadata md;
 	struct g_geom *gp;
 	struct hmac_ctx ctx;
 	char passphrase[256];
 	u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN];
 	u_int nkey, i;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	if (!g_eli_do_taste || g_eli_tries == 0)
 		return (NULL);
 
 	G_ELI_DEBUG(3, "Tasting %s.", pp->name);
 
 	error = g_eli_read_metadata(mp, pp, &md);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_ELI_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_ELI_VERSION) {
 		printf("geom_eli.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 	/* Should we attach it on boot? */
 	if ((md.md_flags & G_ELI_FLAG_BOOT) == 0)
 		return (NULL);
 	if (md.md_keys == 0x00) {
 		G_ELI_DEBUG(0, "No valid keys on %s.", pp->name);
 		return (NULL);
 	}
 
 	/*
 	 * Ask for the passphrase no more than g_eli_tries times.
 	 */
 	for (i = 0; i < g_eli_tries; i++) {
 		printf("Enter passphrase for %s: ", pp->name);
 		gets(passphrase, sizeof(passphrase), g_eli_visible_passphrase);
 		KASSERT(md.md_iterations >= 0, ("md_iterations = %d for %s",
 		    (int)md.md_iterations, pp->name));
 		/*
 		 * Prepare Derived-Key from the user passphrase.
 		 */
 		g_eli_crypto_hmac_init(&ctx, NULL, 0);
 		if (md.md_iterations == 0) {
 			g_eli_crypto_hmac_update(&ctx, md.md_salt,
 			    sizeof(md.md_salt));
 			g_eli_crypto_hmac_update(&ctx, passphrase,
 			    strlen(passphrase));
 		} else {
 			u_char dkey[G_ELI_USERKEYLEN];
 
 			pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt,
 			    sizeof(md.md_salt), passphrase, md.md_iterations);
 			g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey));
 			bzero(dkey, sizeof(dkey));
 		}
 		g_eli_crypto_hmac_final(&ctx, key, 0);
 
 		/*
 		 * Decrypt Master-Key.
 		 */
 		error = g_eli_mkey_decrypt(&md, key, mkey, &nkey);
 		bzero(key, sizeof(key));
 		if (error == -1) {
 			if (i == g_eli_tries - 1) {
 				i++;
 				break;
 			}
 			G_ELI_DEBUG(0, "Wrong key for %s. Tries left: %u.",
 			    pp->name, g_eli_tries - i - 1);
 			/* Try again. */
 			continue;
 		} else if (error > 0) {
 			G_ELI_DEBUG(0, "Cannot decrypt Master Key for %s (error=%d).",
 			    pp->name, error);
 			return (NULL);
 		}
 		G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
 		break;
 	}
 	if (i == g_eli_tries) {
 		G_ELI_DEBUG(0, "Wrong key for %s. No tries left.", pp->name);
 		return (NULL);
 	}
 
 	/*
 	 * We have correct key, let's attach provider.
 	 */
 	gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey);
 	bzero(mkey, sizeof(mkey));
 	bzero(&md, sizeof(md));
 	if (gp == NULL) {
 		G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name,
 		    G_ELI_SUFFIX);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_eli_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL || cp != NULL)
 		return;	/* Nothing here. */
 	sbuf_printf(sb, "%s<Flags>", indent);
 	if (sc->sc_flags == 0)
 		sbuf_printf(sb, "NONE");
 	else {
 		int first = 1;
 
 #define ADD_FLAG(flag, name)	do {					\
 	if ((sc->sc_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 		ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME");
 		ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT");
 		ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH");
 		ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH");
 		ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN");
 		ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY");
 #undef  ADD_FLAG
 	}
 	sbuf_printf(sb, "</Flags>\n");
 
 	if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0) {
 		sbuf_printf(sb, "%s<UsedKey>%u</UsedKey>\n", indent,
 		    sc->sc_nkey);
 	}
 	sbuf_printf(sb, "%s<Crypto>", indent);
 	switch (sc->sc_crypto) {
 	case G_ELI_CRYPTO_HW:
 		sbuf_printf(sb, "hardware");
 		break;
 	case G_ELI_CRYPTO_SW:
 		sbuf_printf(sb, "software");
 		break;
 	default:
 		sbuf_printf(sb, "UNKNOWN");
 		break;
 	}
 	sbuf_printf(sb, "</Crypto>\n");
 	sbuf_printf(sb, "%s<KeyLength>%u</KeyLength>\n", indent, sc->sc_keylen);
 	sbuf_printf(sb, "%s<Cipher>%s</Cipher>\n", indent,
 	    g_eli_algo2str(sc->sc_algo));
 }
 
 static void
 g_eli_on_boot_start(void *dummy __unused)
 {
 
 	/* This prevents from tasting when module is loaded after boot. */
 	if (cold) {
 		G_ELI_DEBUG(1, "Start tasting.");
 		g_eli_do_taste = 1;
 	} else {
 		G_ELI_DEBUG(1, "Tasting not started.");
 	}
 }
 SYSINIT(geli_boot_start, SI_SUB_TUNABLES, SI_ORDER_ANY, g_eli_on_boot_start, NULL)
 
 static void
 g_eli_on_boot_end(void *dummy __unused)
 {
 
 	if (g_eli_do_taste) {
 		G_ELI_DEBUG(1, "Tasting no more.");
 		g_eli_do_taste = 0;
 	}
 }
 SYSINIT(geli_boot_end, SI_SUB_RUN_SCHEDULER, SI_ORDER_ANY, g_eli_on_boot_end, NULL)
 
 DECLARE_GEOM_CLASS(g_eli_class, g_eli);
 MODULE_DEPEND(geom_eli, crypto, 1, 1, 1);
Index: head/sys/geom/geom_aes.c
===================================================================
--- head/sys/geom/geom_aes.c	(revision 152966)
+++ head/sys/geom/geom_aes.c	(revision 152967)
@@ -1,375 +1,375 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * This method provides AES encryption with a compiled in key (default
  * all zeroes).
  *
  * XXX: This could probably save a lot of code by pretending to be a slicer.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/bio.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/libkern.h>
 #include <sys/endian.h>
 #include <sys/md5.h>
 #include <sys/errno.h>
 #include <geom/geom.h>
 
 #include <crypto/rijndael/rijndael-api-fst.h>
 
 #define AES_CLASS_NAME "AES"
 
 #define MASTER_KEY_LENGTH	(1024/8)
 
 static const u_char *aes_magic = "<<FreeBSD-GEOM-AES>>";
 static const u_char *aes_magic_random = "<<FreeBSD-GEOM-AES-RANDOM>>";
 static const u_char *aes_magic_test = "<<FreeBSD-GEOM-AES-TEST>>";
 
 
 struct g_aes_softc {
 	enum {
 		KEY_ZERO,
 		KEY_RANDOM,
 		KEY_TEST
 	} keying;
 	u_int	sectorsize;
 	off_t	mediasize;
 	cipherInstance ci;
 	u_char master_key[MASTER_KEY_LENGTH];
 };
 
 /*
  * Generate a sectorkey from the masterkey and the offset position.
  *
  * For KEY_ZERO we just return a key of all zeros.
  *
  * We feed the sector byte offset, 16 bytes of the master-key and
  * the sector byte offset once more to MD5.
  * The sector byte offset is converted to little-endian format first
  * to support multi-architecture operation.
  * We use 16 bytes from the master-key starting at the logical sector
  * number modulus he length of the master-key.  If need be we wrap
  * around to the start of the master-key.
  */
 
 static void
 g_aes_makekey(struct g_aes_softc *sc, off_t off, keyInstance *ki, int dir)
 {
 	MD5_CTX cx;
 	u_int64_t u64;
 	u_int u, u1;
 	u_char *p, buf[16];
 
 	if (sc->keying == KEY_ZERO) {
 		rijndael_makeKey(ki, dir, 128, sc->master_key);
 		return;
 	}
 	MD5Init(&cx);
 	u64 = htole64(off);
 	MD5Update(&cx, (u_char *)&u64, sizeof(u64));
 	u = off / sc->sectorsize;
 	u %= sizeof sc->master_key;
 	p = sc->master_key + u;
 	if (u + 16 <= sizeof(sc->master_key)) {
 		MD5Update(&cx, p, 16);
 	} else {
 		u1 = sizeof sc->master_key - u;
 		MD5Update(&cx, p, u1);
 		MD5Update(&cx, sc->master_key, 16 - u1);
 		u1 = 0;				/* destroy evidence */
 	}
 	u = 0;					/* destroy evidence */
 	MD5Update(&cx, (u_char *)&u64, sizeof(u64));
 	u64 = 0;				/* destroy evidence */
 	MD5Final(buf, &cx);
 	bzero(&cx, sizeof cx);			/* destroy evidence */
 	rijndael_makeKey(ki, dir, 128, buf);
 	bzero(buf, sizeof buf);			/* destroy evidence */
 
 }
 
 static void
 g_aes_read_done(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct g_aes_softc *sc;
 	u_char *p, *b, *e, *sb;
 	keyInstance dkey;
 	off_t o;
 
 	gp = bp->bio_from->geom;
 	sc = gp->softc;
 	sb = g_malloc(sc->sectorsize, M_WAITOK);
 	b = bp->bio_data;
 	e = bp->bio_data;
 	e += bp->bio_length;
 	o = bp->bio_offset - sc->sectorsize;
 	for (p = b; p < e; p += sc->sectorsize) {
 		g_aes_makekey(sc, o, &dkey, DIR_DECRYPT);
 		rijndael_blockDecrypt(&sc->ci, &dkey, p, sc->sectorsize * 8, sb);
 		bcopy(sb, p, sc->sectorsize);
 		o += sc->sectorsize;
 	}
 	bzero(&dkey, sizeof dkey);		/* destroy evidence */
 	bzero(sb, sc->sectorsize);		/* destroy evidence */
 	g_free(sb);
 	g_std_done(bp);
 }
 
 static void
 g_aes_write_done(struct bio *bp)
 {
 
 	bzero(bp->bio_data, bp->bio_length);	/* destroy evidence */
 	g_free(bp->bio_data);
 	g_std_done(bp);
 }
 
 static void
 g_aes_start(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_aes_softc *sc;
 	struct bio *bp2;
 	u_char *p1, *p2, *b, *e;
 	keyInstance ekey;
 	off_t o;
 
 	gp = bp->bio_to->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	sc = gp->softc;
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		bp2 = g_clone_bio(bp);
 		if (bp2 == NULL) {
 			g_io_deliver(bp, ENOMEM);
 			return;
 		}
 		bp2->bio_done = g_aes_read_done;
 		bp2->bio_offset += sc->sectorsize;
 		g_io_request(bp2, cp);
 		break;
 	case BIO_WRITE:
 		bp2 = g_clone_bio(bp);
 		if (bp2 == NULL) {
 			g_io_deliver(bp, ENOMEM);
 			return;
 		}
 		bp2->bio_done = g_aes_write_done;
 		bp2->bio_offset += sc->sectorsize;
 		bp2->bio_data = g_malloc(bp->bio_length, M_WAITOK);
 		b = bp->bio_data;
 		e = bp->bio_data;
 		e += bp->bio_length;
 		p2 = bp2->bio_data;
 		o = bp->bio_offset;
 		for (p1 = b; p1 < e; p1 += sc->sectorsize) {
 			g_aes_makekey(sc, o, &ekey, DIR_ENCRYPT);
 			rijndael_blockEncrypt(&sc->ci, &ekey,
 			    p1, sc->sectorsize * 8, p2);
 			p2 += sc->sectorsize;
 			o += sc->sectorsize;
 		}
 		bzero(&ekey, sizeof ekey);	/* destroy evidence */
 		g_io_request(bp2, cp);
 		break;
 	case BIO_GETATTR:
 		bp2 = g_clone_bio(bp);
 		if (bp2 == NULL) {
 			g_io_deliver(bp, ENOMEM);
 			return;
 		}
 		bp2->bio_done = g_std_done;
 		bp2->bio_offset += sc->sectorsize;
 		g_io_request(bp2, cp);
 		break;
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	return;
 }
 
 static void
 g_aes_orphan(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct g_aes_softc *sc;
 
 	g_trace(G_T_TOPOLOGY, "g_aes_orphan(%p/%s)", cp, cp->provider->name);
 	g_topology_assert();
 	KASSERT(cp->provider->error != 0,
 		("g_aes_orphan with error == 0"));
 
 	gp = cp->geom;
 	sc = gp->softc;
 	g_wither_geom(gp, cp->provider->error);
 	bzero(sc, sizeof(struct g_aes_softc));	/* destroy evidence */
 	g_free(sc);
 	return;
 }
 
 static int
 g_aes_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	/* On first open, grab an extra "exclusive" bit */
 	if (cp->acr == 0 && cp->acw == 0 && cp->ace == 0)
 		de++;
 	/* ... and let go of it on last close */
 	if ((cp->acr + dr) == 0 && (cp->acw + dw) == 0 && (cp->ace + de) == 1)
 		de--;
 	return (g_access(cp, dr, dw, de));
 }
 
 static struct g_geom *
 g_aes_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_aes_softc *sc;
 	int error;
 	u_int sectorsize;
 	off_t mediasize;
 	u_char *buf;
 
 	g_trace(G_T_TOPOLOGY, "aes_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 	gp = g_new_geomf(mp, "%s.aes", pp->name);
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_access(cp, 1, 0, 0);
 	if (error) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		g_destroy_geom(gp);
 		return (NULL);
 	}
 	buf = NULL;
 	g_topology_unlock();
 	do {
 		if (gp->rank != 2)
 			break;
 		sectorsize = cp->provider->sectorsize;
 		mediasize = cp->provider->mediasize;
 		buf = g_read_data(cp, 0, sectorsize, &error);
-		if (buf == NULL || error != 0) {
+		if (buf == NULL) {
 			break;
 		}
 		sc = g_malloc(sizeof(struct g_aes_softc), M_WAITOK | M_ZERO);
 		if (!memcmp(buf, aes_magic, strlen(aes_magic))) {
 			sc->keying = KEY_ZERO;
 		} else if (!memcmp(buf, aes_magic_random, 
 		    strlen(aes_magic_random))) {
 			sc->keying = KEY_RANDOM;
 		} else if (!memcmp(buf, aes_magic_test, 
 		    strlen(aes_magic_test))) {
 			sc->keying = KEY_TEST;
 		} else {
 			g_free(sc);
 			break;
 		}
 		g_free(buf);
 		gp->softc = sc;
 		sc->sectorsize = sectorsize;
 		sc->mediasize = mediasize - sectorsize;
 		rijndael_cipherInit(&sc->ci, MODE_CBC, NULL);
 		if (sc->keying == KEY_TEST) {
 			int i;
 			u_char *p;
 
 			p = sc->master_key;
 			for (i = 0; i < (int)sizeof sc->master_key; i ++) 
 				*p++ = i;
 		}
 		if (sc->keying == KEY_RANDOM) {
 			int i;
 			u_int32_t u;
 			u_char *p;
 
 			p = sc->master_key;
 			for (i = 0; i < (int)sizeof sc->master_key; i += sizeof u) {
 				u = arc4random();
 				*p++ = u;
 				*p++ = u >> 8;
 				*p++ = u >> 16;
 				*p++ = u >> 24;
 			}
 		}
 		g_topology_lock();
 		pp = g_new_providerf(gp, gp->name);
 		pp->mediasize = mediasize - sectorsize;
 		pp->sectorsize = sectorsize;
 		g_error_provider(pp, 0);
 		g_topology_unlock();
 	} while(0);
 	g_topology_lock();
 	if (buf)
 		g_free(buf);
 	g_access(cp, -1, 0, 0);
 	if (gp->softc != NULL) 
 		return (gp);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	return (NULL);
 }
 
 static struct g_class g_aes_class	= {
 	.name = AES_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_aes_taste,
 	.start = g_aes_start,
 	.orphan = g_aes_orphan,
 	.spoiled = g_std_spoiled,
 	.access = g_aes_access,
 };
 
 DECLARE_GEOM_CLASS(g_aes_class, g_aes);
Index: head/sys/geom/geom_apple.c
===================================================================
--- head/sys/geom/geom_apple.c	(revision 152966)
+++ head/sys/geom/geom_apple.c	(revision 152967)
@@ -1,263 +1,263 @@
 /*-
  * Copyright (c) 2002 Peter Grehan.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * GEOM module for Apple Partition Maps
  *  As described in 'Inside Macintosh Vol 3: About the SCSI Manager -
  *    The Structure of Block Devices"
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include <sys/sbuf.h>
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 
 #define APPLE_CLASS_NAME "APPLE"
 
 #define NAPMPART  16	/* Max partitions */
 
 struct apm_partition {
 	char       am_sig[2];
 	u_int32_t  am_mapcnt;
 	u_int32_t  am_start;
 	u_int32_t  am_partcnt;
 	char       am_name[32];
 	char       am_type[32];	
 };
 
 struct g_apple_softc {
 	u_int16_t dd_bsiz;
 	u_int32_t dd_blkcnt;
 	u_int16_t dd_drvrcnt;
 	u_int32_t am_mapcnt0;
 	struct apm_partition apmpart[NAPMPART];
 };
 
 static void
 g_dec_drvrdesc(u_char *ptr, struct g_apple_softc *sc)
 {
 	sc->dd_bsiz = be16dec(ptr + 2);
 	sc->dd_blkcnt = be32dec(ptr + 4);
 	sc->dd_drvrcnt = be32dec(ptr + 16);
 }
 
 static void
 g_dec_apple_partition(u_char *ptr, struct apm_partition *d)
 {
 	d->am_sig[0] = ptr[0];
 	d->am_sig[1] = ptr[1];
 	d->am_mapcnt = be32dec(ptr + 4);
 	d->am_start = be32dec(ptr + 8);
 	d->am_partcnt = be32dec(ptr + 12);
 	memcpy(d->am_name, ptr + 16, 32);
 	memcpy(d->am_type, ptr + 48, 32);
 }
 
 static int
 g_apple_start(struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	struct g_slicer *gsp;
 
 	pp = bp->bio_to;
 	gp = pp->geom;
 	gsp = gp->softc;
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr_off_t(bp, "APM::offset",
 		    gsp->slices[pp->index].offset))
 			return (1);
 	}
 	return (0);
 }
 
 static void
 g_apple_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 
     struct g_consumer *cp __unused, struct g_provider *pp)
 {
 	struct g_apple_softc *mp;
 	struct g_slicer *gsp;
 
 	gsp = gp->softc;
 	mp = gsp->softc;
 	g_slice_dumpconf(sb, indent, gp, cp, pp);
 	if (pp != NULL) {
 		if (indent == NULL) {
 			sbuf_printf(sb, " ty %s",
 			    mp->apmpart[pp->index].am_type);
                         if (*mp->apmpart[pp->index].am_name)
                                 sbuf_printf(sb, " sn %s",
                                     mp->apmpart[pp->index].am_name);
 		} else {
 			sbuf_printf(sb, "%s<name>%s</name>\n", indent,
 			    mp->apmpart[pp->index].am_name);
 			sbuf_printf(sb, "%s<type>%s</type>\n", indent,
 			    mp->apmpart[pp->index].am_type);
 		}
 	}
 }
 
 #if 0
 static void
 g_apple_print()
 {
 
 	/* XXX */
 }
 #endif
 
 static struct g_geom *
 g_apple_taste(struct g_class *mp, struct g_provider *pp, int insist)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error, i;
 	struct g_apple_softc *ms;
 	struct apm_partition *apm;
 	u_int sectorsize;
 	u_char *buf;
 
 	g_trace(G_T_TOPOLOGY, "apple_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 	gp = g_slice_new(mp, NAPMPART, pp, &cp, &ms, sizeof *ms, g_apple_start);
 	if (gp == NULL)
 		return (NULL);
 	g_topology_unlock();
 	do {
 		if (gp->rank != 2 && insist == 0)
 			break;
 
 		sectorsize = cp->provider->sectorsize;
 		if (sectorsize != 512)
 			break;
 
 		buf = g_read_data(cp, 0, sectorsize, &error);
-		if (buf == NULL || error != 0)
+		if (buf == NULL)
 			break;
 
 		/*
 		 * Test for the sector 0 driver record signature, and 
 		 * validate sector and disk size
 		 */
 		if (buf[0] != 'E' && buf[1] != 'R') {
 			g_free(buf);
 			break;
 		}
 		g_dec_drvrdesc(buf, ms);
 		g_free(buf);
 
 		if (ms->dd_bsiz != 512) {
 			break;
 		}
 
 		/*
 		 * Read in the first partition map
 		 */
 		buf = g_read_data(cp, sectorsize, sectorsize,  &error);
-		if (buf == NULL || error != 0)
+		if (buf == NULL)
 			break;
 
 		/*
 		 * Decode the first partition: it's another indication of
 		 * validity, as well as giving the size of the partition
 		 * map
 		 */
 		apm = &ms->apmpart[0];
 		g_dec_apple_partition(buf, apm);
 		g_free(buf);
 		
 		if (apm->am_sig[0] != 'P' || apm->am_sig[1] != 'M')
 			break;
 		ms->am_mapcnt0 = apm->am_mapcnt;
 	       
 		buf = g_read_data(cp, 2 * sectorsize, 
 		    (NAPMPART - 1) * sectorsize,  &error);
-		if (buf == NULL || error != 0)
+		if (buf == NULL)
 			break;
 
 		for (i = 1; i < NAPMPART; i++) {
 			g_dec_apple_partition(buf + ((i - 1) * sectorsize),
 			    &ms->apmpart[i]);
 		}
 
 		for (i = 0; i < NAPMPART; i++) {
 			apm = &ms->apmpart[i];
 
 			/*
 			 * Validate partition sig and global mapcount
 			 */
 			if (apm->am_sig[0] != 'P' ||
 			    apm->am_sig[1] != 'M')
 				continue;
 			if (apm->am_mapcnt != ms->am_mapcnt0)
 				continue;
 
 			if (bootverbose) {
 				printf("APM Slice %d (%s/%s) on %s:\n", 
 				    i + 1, apm->am_name, apm->am_type, 
 				    gp->name);
 				/* g_apple_print(i, dp + i); */
 			}
 			g_topology_lock();
 			g_slice_config(gp, i, G_SLICE_CONFIG_SET,
 			    (off_t)apm->am_start << 9ULL,
 			    (off_t)apm->am_partcnt << 9ULL,
 			    sectorsize,
 			    "%ss%d", gp->name, i + 1);
 			g_topology_unlock();
 		}
 		g_free(buf);
 		break;
 	} while(0);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (LIST_EMPTY(&gp->provider)) {
 		g_slice_spoiled(cp);
 		return (NULL);
 	}
 	return (gp);
 }
 
 
 static struct g_class g_apple_class	= {
 	.name = APPLE_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_apple_taste,
 	.dumpconf = g_apple_dumpconf,
 };
 
 DECLARE_GEOM_CLASS(g_apple_class, g_apple);
Index: head/sys/geom/geom_bsd.c
===================================================================
--- head/sys/geom/geom_bsd.c	(revision 152966)
+++ head/sys/geom/geom_bsd.c	(revision 152967)
@@ -1,678 +1,678 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * This is the method for dealing with BSD disklabels.  It has been
  * extensively (by my standards at least) commented, in the vain hope that
  * it will serve as the source in future copy&paste operations.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/conf.h>
 #include <sys/bio.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/md5.h>
 #include <sys/errno.h>
 #include <sys/disklabel.h>
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 
 #define	BSD_CLASS_NAME "BSD"
 
 #define ALPHA_LABEL_OFFSET	64
 
 #define LABELSIZE (148 + 16 * MAXPARTITIONS)
 
 static void g_bsd_hotwrite(void *arg, int flag);
 /*
  * Our private data about one instance.  All the rest is handled by the
  * slice code and stored in its softc, so this is just the stuff
  * specific to BSD disklabels.
  */
 struct g_bsd_softc {
 	off_t	labeloffset;
 	off_t	mbroffset;
 	off_t	rawoffset;
 	struct disklabel ondisk;
 	u_char	label[LABELSIZE];
 	u_char	labelsum[16];
 };
 
 /*
  * Modify our slicer to match proposed disklabel, if possible.
  * This is where we make sure we don't do something stupid.
  */
 static int
 g_bsd_modify(struct g_geom *gp, u_char *label)
 {
 	int i, error;
 	struct partition *ppp;
 	struct g_slicer *gsp;
 	struct g_consumer *cp;
 	struct g_bsd_softc *ms;
 	u_int secsize, u;
 	off_t rawoffset, o;
 	struct disklabel dl;
 	MD5_CTX md5sum;
 
 	g_topology_assert();
 	gsp = gp->softc;
 	ms = gsp->softc;
 
 	error = bsd_disklabel_le_dec(label, &dl, MAXPARTITIONS);
 	if (error) {
 		return (error);
 	}
 
 	/* Get dimensions of our device. */
 	cp = LIST_FIRST(&gp->consumer);
 	secsize = cp->provider->sectorsize;
 
 	/* ... or a smaller sector size. */
 	if (dl.d_secsize < secsize) {
 		return (EINVAL);
 	}
 
 	/* ... or a non-multiple sector size. */
 	if (dl.d_secsize % secsize != 0) {
 		return (EINVAL);
 	}
 
 	/* Historical braindamage... */
 	rawoffset = (off_t)dl.d_partitions[RAW_PART].p_offset * dl.d_secsize;
 
 	for (i = 0; i < dl.d_npartitions; i++) {
 		ppp = &dl.d_partitions[i];
 		if (ppp->p_size == 0)
 			continue;
 	        o = (off_t)ppp->p_offset * dl.d_secsize;
 
 		if (o < rawoffset)
 			rawoffset = 0;
 	}
 	
 	if (rawoffset != 0 && (off_t)rawoffset != ms->mbroffset)
 		printf("WARNING: Expected rawoffset %jd, found %jd\n",
 		    (intmax_t)ms->mbroffset/dl.d_secsize,
 		    (intmax_t)rawoffset/dl.d_secsize);
 
 	/* Don't munge open partitions. */
 	for (i = 0; i < dl.d_npartitions; i++) {
 		ppp = &dl.d_partitions[i];
 
 	        o = (off_t)ppp->p_offset * dl.d_secsize;
 		if (o == 0)
 			o = rawoffset;
 		error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
 		    o - rawoffset,
 		    (off_t)ppp->p_size * dl.d_secsize,
 		     dl.d_secsize,
 		    "%s%c", gp->name, 'a' + i);
 		if (error)
 			return (error);
 	}
 
 	/* Look good, go for it... */
 	for (u = 0; u < gsp->nslice; u++) {
 		ppp = &dl.d_partitions[u];
 	        o = (off_t)ppp->p_offset * dl.d_secsize;
 		if (o == 0)
 			o = rawoffset;
 		g_slice_config(gp, u, G_SLICE_CONFIG_SET,
 		    o - rawoffset,
 		    (off_t)ppp->p_size * dl.d_secsize,
 		     dl.d_secsize,
 		    "%s%c", gp->name, 'a' + u);
 	}
 
 	/* Update our softc */
 	ms->ondisk = dl;
 	if (label != ms->label)
 		bcopy(label, ms->label, LABELSIZE);
 	ms->rawoffset = rawoffset;
 
 	/*
 	 * In order to avoid recursively attaching to the same
 	 * on-disk label (it's usually visible through the 'c'
 	 * partition) we calculate an MD5 and ask if other BSD's
 	 * below us love that label.  If they do, we don't.
 	 */
 	MD5Init(&md5sum);
 	MD5Update(&md5sum, ms->label, sizeof(ms->label));
 	MD5Final(ms->labelsum, &md5sum);
 
 	return (0);
 }
 
 /*
  * This is an internal helper function, called multiple times from the taste
  * function to try to locate a disklabel on the disk.  More civilized formats
  * will not need this, as there is only one possible place on disk to look
  * for the magic spot.
  */
 
 static int
 g_bsd_try(struct g_geom *gp, struct g_slicer *gsp, struct g_consumer *cp, int secsize, struct g_bsd_softc *ms, off_t offset)
 {
 	int error;
 	u_char *buf;
 	struct disklabel *dl;
 	off_t secoff;
 
 	/*
 	 * We need to read entire aligned sectors, and we assume that the
 	 * disklabel does not span sectors, so one sector is enough.
 	 */
 	error = 0;
 	secoff = offset % secsize;
 	buf = g_read_data(cp, offset - secoff, secsize, &error);
-	if (buf == NULL || error != 0)
+	if (buf == NULL)
 		return (ENOENT);
 
 	/* Decode into our native format. */
 	dl = &ms->ondisk;
 	error = bsd_disklabel_le_dec(buf + secoff, dl, MAXPARTITIONS);
 	if (!error)
 		bcopy(buf + secoff, ms->label, LABELSIZE);
 
 	/* Remember to free the buffer g_read_data() gave us. */
 	g_free(buf);
 
 	ms->labeloffset = offset;
 	return (error);
 }
 
 /*
  * This function writes the current label to disk, possibly updating
  * the alpha SRM checksum.
  */
 
 static int
 g_bsd_writelabel(struct g_geom *gp, u_char *bootcode)
 {
 	off_t secoff;
 	u_int secsize;
 	struct g_consumer *cp;
 	struct g_slicer *gsp;
 	struct g_bsd_softc *ms;
 	u_char *buf;
 	uint64_t sum;
 	int error, i;
 
 	gsp = gp->softc;
 	ms = gsp->softc;
 	cp = LIST_FIRST(&gp->consumer);
 	/* Get sector size, we need it to read data. */
 	secsize = cp->provider->sectorsize;
 	secoff = ms->labeloffset % secsize;
 	if (bootcode == NULL) {
 		buf = g_read_data(cp, ms->labeloffset - secoff, secsize, &error);
-		if (buf == NULL || error != 0)
+		if (buf == NULL)
 			return (error);
 		bcopy(ms->label, buf + secoff, sizeof(ms->label));
 	} else {
 		buf = bootcode;
 		bcopy(ms->label, buf + ms->labeloffset, sizeof(ms->label));
 	}
 	if (ms->labeloffset == ALPHA_LABEL_OFFSET) {
 		sum = 0;
 		for (i = 0; i < 63; i++)
 			sum += le64dec(buf + i * 8);
 		le64enc(buf + 504, sum);
 	}
 	if (bootcode == NULL) {
 		error = g_write_data(cp, ms->labeloffset - secoff, buf, secsize);
 		g_free(buf);
 	} else {
 		error = g_write_data(cp, 0, bootcode, BBSIZE);
 	}
 	return(error);
 }
 
 /*
  * If the user tries to overwrite our disklabel through an open partition
  * or via a magicwrite config call, we end up here and try to prevent
  * footshooting as best we can.
  */
 static void
 g_bsd_hotwrite(void *arg, int flag)
 {
 	struct bio *bp;
 	struct g_geom *gp;
 	struct g_slicer *gsp;
 	struct g_slice *gsl;
 	struct g_bsd_softc *ms;
 	u_char *p;
 	int error;
 	
 	g_topology_assert();
 	/*
 	 * We should never get canceled, because that would amount to a removal
 	 * of the geom while there was outstanding I/O requests.
 	 */
 	KASSERT(flag != EV_CANCEL, ("g_bsd_hotwrite cancelled"));
 	bp = arg;
 	gp = bp->bio_to->geom;
 	gsp = gp->softc;
 	ms = gsp->softc;
 	gsl = &gsp->slices[bp->bio_to->index];
 	p = (u_char*)bp->bio_data + ms->labeloffset 
 	    - (bp->bio_offset + gsl->offset);
 	error = g_bsd_modify(gp, p);
 	if (error) {
 		g_io_deliver(bp, EPERM);
 		return;
 	}
 	g_slice_finish_hot(bp);
 }
 
 /*-
  * This start routine is only called for non-trivial requests, all the
  * trivial ones are handled autonomously by the slice code.
  * For requests we handle here, we must call the g_io_deliver() on the
  * bio, and return non-zero to indicate to the slice code that we did so.
  * This code executes in the "DOWN" I/O path, this means:
  *    * No sleeping.
  *    * Don't grab the topology lock.
  *    * Don't call biowait, g_getattr(), g_setattr() or g_read_data()
  */
 static int
 g_bsd_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td)
 {
 	struct g_geom *gp;
 	struct g_bsd_softc *ms;
 	struct g_slicer *gsp;
 	u_char *label;
 	int error;
 
 	gp = pp->geom;
 	gsp = gp->softc;
 	ms = gsp->softc;
 
 	switch(cmd) {
 	case DIOCGDINFO:
 		/* Return a copy of the disklabel to userland. */
 		bsd_disklabel_le_dec(ms->label, data, MAXPARTITIONS);
 		return(0);
 	case DIOCBSDBB: {
 		struct g_consumer *cp;
 		u_char *buf;
 		void *p;
 		int error, i;
 		uint64_t sum;
 
 		if (!(fflag & FWRITE))
 			return (EPERM);
 		/* The disklabel to set is the ioctl argument. */
 		buf = g_malloc(BBSIZE, M_WAITOK);
 		p = *(void **)data;
 		error = copyin(p, buf, BBSIZE);
 		if (!error) {
 			/* XXX: Rude, but supposedly safe */
 			DROP_GIANT();
 			g_topology_lock();
 			/* Validate and modify our slice instance to match. */
 			error = g_bsd_modify(gp, buf + ms->labeloffset);
 			if (!error) {
 				cp = LIST_FIRST(&gp->consumer);
 				if (ms->labeloffset == ALPHA_LABEL_OFFSET) {
 					sum = 0;
 					for (i = 0; i < 63; i++)
 						sum += le64dec(buf + i * 8);
 					le64enc(buf + 504, sum);
 				}
 				error = g_write_data(cp, 0, buf, BBSIZE);
 			}
 			g_topology_unlock();
 			PICKUP_GIANT();
 		}
 		g_free(buf);
 		return (error);
 	}
 	case DIOCSDINFO:
 	case DIOCWDINFO: {
 		if (!(fflag & FWRITE))
 			return (EPERM);
 		label = g_malloc(LABELSIZE, M_WAITOK);
 		/* The disklabel to set is the ioctl argument. */
 		bsd_disklabel_le_enc(label, data);
 
 		DROP_GIANT();
 		g_topology_lock();
 		/* Validate and modify our slice instance to match. */
 		error = g_bsd_modify(gp, label);
 		if (error == 0 && cmd == DIOCWDINFO)
 			error = g_bsd_writelabel(gp, NULL);
 		g_topology_unlock();
 		PICKUP_GIANT();
 		g_free(label);
 		return(error);
 	}
 	default:
 		return (ENOIOCTL);
 	}
 }
 
 static int
 g_bsd_start(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct g_bsd_softc *ms;
 	struct g_slicer *gsp;
 
 	gp = bp->bio_to->geom;
 	gsp = gp->softc;
 	ms = gsp->softc;
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr(bp, "BSD::labelsum", ms->labelsum,
 		    sizeof(ms->labelsum)))
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Dump configuration information in XML format.
  * Notice that the function is called once for the geom and once for each
  * consumer and provider.  We let g_slice_dumpconf() do most of the work.
  */
 static void
 g_bsd_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_bsd_softc *ms;
 	struct g_slicer *gsp;
 
 	gsp = gp->softc;
 	ms = gsp->softc;
 	g_slice_dumpconf(sb, indent, gp, cp, pp);
 	if (indent != NULL && pp == NULL && cp == NULL) {
 		sbuf_printf(sb, "%s<labeloffset>%jd</labeloffset>\n",
 		    indent, (intmax_t)ms->labeloffset);
 		sbuf_printf(sb, "%s<rawoffset>%jd</rawoffset>\n",
 		    indent, (intmax_t)ms->rawoffset);
 		sbuf_printf(sb, "%s<mbroffset>%jd</mbroffset>\n",
 		    indent, (intmax_t)ms->mbroffset);
 	} else if (pp != NULL) {
 		if (indent == NULL)
 			sbuf_printf(sb, " ty %d",
 			    ms->ondisk.d_partitions[pp->index].p_fstype);
 		else
 			sbuf_printf(sb, "%s<type>%d</type>\n", indent,
 			    ms->ondisk.d_partitions[pp->index].p_fstype);
 	}
 }
 
 /*
  * The taste function is called from the event-handler, with the topology
  * lock already held and a provider to examine.  The flags are unused.
  *
  * If flags == G_TF_NORMAL, the idea is to take a bite of the provider and
  * if we find valid, consistent magic on it, build a geom on it.
  * any magic bits which indicate that we should automatically put a BSD
  * geom on it.
  *
  * There may be cases where the operator would like to put a BSD-geom on
  * providers which do not meet all of the requirements.  This can be done
  * by instead passing the G_TF_INSIST flag, which will override these
  * checks.
  *
  * The final flags value is G_TF_TRANSPARENT, which instructs the method
  * to put a geom on top of the provider and configure it to be as transparent
  * as possible.  This is not really relevant to the BSD method and therefore
  * not implemented here.
  */
 
 static struct g_geom *
 g_bsd_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error, i;
 	struct g_bsd_softc *ms;
 	u_int secsize;
 	struct g_slicer *gsp;
 	u_char hash[16];
 	MD5_CTX md5sum;
 
 	g_trace(G_T_TOPOLOGY, "bsd_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 
 	/* We don't implement transparent inserts. */
 	if (flags == G_TF_TRANSPARENT)
 		return (NULL);
 
 	/*
 	 * BSD labels are a subclass of the general "slicing" topology so
 	 * a lot of the work can be done by the common "slice" code.
 	 * Create a geom with space for MAXPARTITIONS providers, one consumer
 	 * and a softc structure for us.  Specify the provider to attach
 	 * the consumer to and our "start" routine for special requests.
 	 * The provider is opened with mode (1,0,0) so we can do reads
 	 * from it.
 	 */
 	gp = g_slice_new(mp, MAXPARTITIONS, pp, &cp, &ms,
 	     sizeof(*ms), g_bsd_start);
 	if (gp == NULL)
 		return (NULL);
 
 	/* Get the geom_slicer softc from the geom. */
 	gsp = gp->softc;
 
 	/*
 	 * The do...while loop here allows us to have multiple escapes
 	 * using a simple "break".  This improves code clarity without
 	 * ending up in deep nesting and without using goto or come from.
 	 */
 	do {
 		/*
 		 * If the provider is an MBR we will only auto attach
 		 * to type 165 slices in the G_TF_NORMAL case.  We will
 		 * attach to any other type.
 		 */
 		error = g_getattr("MBR::type", cp, &i);
 		if (!error) {
 			if (i != 165 && flags == G_TF_NORMAL)
 				break;
 			error = g_getattr("MBR::offset", cp, &ms->mbroffset);
 			if (error)
 				break;
 		}
 
 		/* Same thing if we are inside a PC98 */
 		error = g_getattr("PC98::type", cp, &i);
 		if (!error) {
 			if (i != 0xc494 && flags == G_TF_NORMAL)
 				break;
 			error = g_getattr("PC98::offset", cp, &ms->mbroffset);
 			if (error)
 				break;
 		}
 
 		/* Get sector size, we need it to read data. */
 		secsize = cp->provider->sectorsize;
 		if (secsize < 512)
 			break;
 
 		/* First look for a label at the start of the second sector. */
 		error = g_bsd_try(gp, gsp, cp, secsize, ms, secsize);
 
 		/* Next, look for alpha labels */
 		if (error)
 			error = g_bsd_try(gp, gsp, cp, secsize, ms,
 			    ALPHA_LABEL_OFFSET);
 
 		/* If we didn't find a label, punt. */
 		if (error)
 			break;
 
 		/*
 		 * In order to avoid recursively attaching to the same
 		 * on-disk label (it's usually visible through the 'c'
 		 * partition) we calculate an MD5 and ask if other BSD's
 		 * below us love that label.  If they do, we don't.
 		 */
 		MD5Init(&md5sum);
 		MD5Update(&md5sum, ms->label, sizeof(ms->label));
 		MD5Final(ms->labelsum, &md5sum);
 
 		error = g_getattr("BSD::labelsum", cp, &hash);
 		if (!error && !bcmp(ms->labelsum, hash, sizeof(hash)))
 			break;
 
 		/*
 		 * Process the found disklabel, and modify our "slice"
 		 * instance to match it, if possible.
 		 */
 		error = g_bsd_modify(gp, ms->label);
 	} while (0);
 
 	/* Success or failure, we can close our provider now. */
 	g_access(cp, -1, 0, 0);
 
 	/* If we have configured any providers, return the new geom. */
 	if (gsp->nprovider > 0) {
 		g_slice_conf_hot(gp, 0, ms->labeloffset, LABELSIZE,
 		    G_SLICE_HOT_ALLOW, G_SLICE_HOT_DENY, G_SLICE_HOT_CALL);
 		gsp->hot = g_bsd_hotwrite;
 		return (gp);
 	}
 	/*
 	 * ...else push the "self-destruct" button, by spoiling our own
 	 * consumer.  This triggers a call to g_slice_spoiled which will
 	 * dismantle what was setup.
 	 */
 	g_slice_spoiled(cp);
 	return (NULL);
 }
 
 struct h0h0 {
 	struct g_geom *gp;
 	struct g_bsd_softc *ms;
 	u_char *label;
 	int error;
 };
 
 static void
 g_bsd_callconfig(void *arg, int flag)
 {
 	struct h0h0 *hp;
 
 	hp = arg;
 	hp->error = g_bsd_modify(hp->gp, hp->label);
 	if (!hp->error)
 		hp->error = g_bsd_writelabel(hp->gp, NULL);
 }
 
 /*
  * NB! curthread is user process which GCTL'ed.
  */
 static void
 g_bsd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
 {
 	u_char *label;
 	int error;
 	struct h0h0 h0h0;
 	struct g_geom *gp;
 	struct g_slicer *gsp;
 	struct g_consumer *cp;
 	struct g_bsd_softc *ms;
 
 	g_topology_assert();
 	gp = gctl_get_geom(req, mp, "geom");
 	if (gp == NULL)
 		return;
 	cp = LIST_FIRST(&gp->consumer);
 	gsp = gp->softc;
 	ms = gsp->softc;
 	if (!strcmp(verb, "read mbroffset")) {
 		gctl_set_param(req, "mbroffset",
 		    &ms->mbroffset, sizeof(ms->mbroffset));
 		return;
 	} else if (!strcmp(verb, "write label")) {
 		label = gctl_get_paraml(req, "label", LABELSIZE);
 		if (label == NULL)
 			return;
 		h0h0.gp = gp;
 		h0h0.ms = gsp->softc;
 		h0h0.label = label;
 		h0h0.error = -1;
 		/* XXX: Does this reference register with our selfdestruct code ? */
 		error = g_access(cp, 1, 1, 1);
 		if (error) {
 			gctl_error(req, "could not access consumer");
 			return;
 		}
 		g_bsd_callconfig(&h0h0, 0);
 		error = h0h0.error;
 		g_access(cp, -1, -1, -1);
 	} else if (!strcmp(verb, "write bootcode")) {
 		label = gctl_get_paraml(req, "bootcode", BBSIZE);
 		if (label == NULL)
 			return;
 		/* XXX: Does this reference register with our selfdestruct code ? */
 		error = g_access(cp, 1, 1, 1);
 		if (error) {
 			gctl_error(req, "could not access consumer");
 			return;
 		}
 		error = g_bsd_writelabel(gp, label);
 		g_access(cp, -1, -1, -1);
 	} else {
 		gctl_error(req, "Unknown verb parameter");
 	}
 
 	return;
 }
 
 /* Finally, register with GEOM infrastructure. */
 static struct g_class g_bsd_class = {
 	.name = BSD_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_bsd_taste,
 	.ctlreq = g_bsd_config,
 	.dumpconf = g_bsd_dumpconf,
 	.ioctl = g_bsd_ioctl,
 };
 
 DECLARE_GEOM_CLASS(g_bsd_class, g_bsd);
Index: head/sys/geom/geom_fox.c
===================================================================
--- head/sys/geom/geom_fox.c	(revision 152966)
+++ head/sys/geom/geom_fox.c	(revision 152967)
@@ -1,473 +1,473 @@
 /*-
  * Copyright (c) 2003 Poul-Henning Kamp
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /* This is a GEOM module for handling path selection for multi-path
  * storage devices.  It is named "fox" because it, like they, prefer
  * to have multiple exits to choose from.
  *
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/bio.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/libkern.h>
 #include <sys/endian.h>
 #include <sys/md5.h>
 #include <sys/errno.h>
 #include <geom/geom.h>
 
 #define FOX_CLASS_NAME "FOX"
 #define FOX_MAGIC	"GEOM::FOX"
 
 struct g_fox_softc {
 	off_t			mediasize;
 	u_int			sectorsize;
 	TAILQ_HEAD(, bio)	queue;
 	struct mtx		lock;
 	u_char 			magic[16];
 	struct g_consumer 	*path;
 	struct g_consumer 	*opath;
 	int			waiting;
 	int			cr, cw, ce;
 };
 
 /*
  * This function is called whenever we need to select a new path.
  */
 static void
 g_fox_select_path(void *arg, int flag)
 {
 	struct g_geom *gp;
 	struct g_fox_softc *sc;
 	struct g_consumer *cp1;
 	struct bio *bp;
 	int error;
 
 	g_topology_assert();
 	if (flag == EV_CANCEL)
 		return;
 	gp = arg;
 	sc = gp->softc;
 
 	if (sc->opath != NULL) {
 		/*
 		 * First, close the old path entirely.
 		 */
 		printf("Closing old path (%s) on fox (%s)\n",
 			sc->opath->provider->name, gp->name);
 
 		cp1 = LIST_NEXT(sc->opath, consumer);
 
 		g_access(sc->opath, -sc->cr, -sc->cw, -(sc->ce + 1));
 
 		/*
 		 * The attempt to reopen it with a exclusive count
 		 */
 		error = g_access(sc->opath, 0, 0, 1);
 		if (error) {
 			/*
 			 * Ok, ditch this consumer, we can't use it.
 			 */
 			printf("Drop old path (%s) on fox (%s)\n",
 				sc->opath->provider->name, gp->name);
 			g_detach(sc->opath);
 			g_destroy_consumer(sc->opath);
 			if (LIST_EMPTY(&gp->consumer)) {
 				/* No consumers left */
 				g_wither_geom(gp, ENXIO);
 				for (;;) {
 					bp = TAILQ_FIRST(&sc->queue);
 					if (bp == NULL)
 						break;
 					TAILQ_REMOVE(&sc->queue, bp, bio_queue);
 					bp->bio_error = ENXIO;
 					g_std_done(bp);
 				}
 				return;
 			}
 		} else {
 			printf("Got e-bit on old path (%s) on fox (%s)\n",
 				sc->opath->provider->name, gp->name);
 		}
 		sc->opath = NULL;
 	} else {
 		cp1 = LIST_FIRST(&gp->consumer);
 	}
 	if (cp1 == NULL)
 		cp1 = LIST_FIRST(&gp->consumer);
 	printf("Open new path (%s) on fox (%s)\n",
 		cp1->provider->name, gp->name);
 	error = g_access(cp1, sc->cr, sc->cw, sc->ce);
 	if (error) {
 		/*
 		 * If we failed, we take another trip through here
 		 */
 		printf("Open new path (%s) on fox (%s) failed, reselect.\n",
 			cp1->provider->name, gp->name);
 		sc->opath = cp1;
 		g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL);
 	} else {
 		printf("Open new path (%s) on fox (%s) succeeded\n",
 			cp1->provider->name, gp->name);
 		mtx_lock(&sc->lock);
 		sc->path = cp1;
 		sc->waiting = 0;
 		for (;;) {
 			bp = TAILQ_FIRST(&sc->queue);
 			if (bp == NULL)
 				break;
 			TAILQ_REMOVE(&sc->queue, bp, bio_queue);
 			g_io_request(bp, sc->path);
 		}
 		mtx_unlock(&sc->lock);
 	}
 }
 
 static void
 g_fox_orphan(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct g_fox_softc *sc;
 	int error, mark;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	printf("Removing path (%s) from fox (%s)\n",
 	    cp->provider->name, gp->name);
 	mtx_lock(&sc->lock);
 	if (cp == sc->path) {
 		sc->opath = NULL;
 		sc->path = NULL;
 		sc->waiting = 1;
 		mark = 1;
 	} else {
 		mark = 0;
 	}
 	mtx_unlock(&sc->lock);
 	    
 	g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	error = cp->provider->error;
 	g_detach(cp);
 	g_destroy_consumer(cp);	
 	if (!LIST_EMPTY(&gp->consumer)) {
 		if (mark)
 			g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL);
 		return;
 	}
 
 	mtx_destroy(&sc->lock);
 	g_free(gp->softc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 }
 
 static void
 g_fox_done(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct g_fox_softc *sc;
 	int error;
 
 	if (bp->bio_error == 0) {
 		g_std_done(bp);
 		return;
 	}
 	gp = bp->bio_from->geom;
 	sc = gp->softc;
 	if (bp->bio_from != sc->path) {
 		g_io_request(bp, sc->path);
 		return;
 	}
 	mtx_lock(&sc->lock);
 	sc->opath = sc->path;
 	sc->path = NULL;
 	error = g_post_event(g_fox_select_path, gp, M_NOWAIT, gp, NULL);
 	if (error) {
 		bp->bio_error = ENOMEM;
 		g_std_done(bp);
 	} else {
 		sc->waiting = 1;
 		TAILQ_INSERT_TAIL(&sc->queue, bp, bio_queue);
 	}
 	mtx_unlock(&sc->lock);
 }
 
 static void
 g_fox_start(struct bio *bp)
 {
 	struct g_geom *gp;
 	struct bio *bp2;
 	struct g_fox_softc *sc;
 	int error;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 	if (sc == NULL) {
 		g_io_deliver(bp, ENXIO);
 		return;
 	}
 	switch(bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		bp2 = g_clone_bio(bp);
 		if (bp2 == NULL) {
 			g_io_deliver(bp, ENOMEM);
 			break;
 		}
 		bp2->bio_offset += sc->sectorsize;
 		bp2->bio_done = g_fox_done;
 		mtx_lock(&sc->lock);
 		if (sc->path == NULL || !TAILQ_EMPTY(&sc->queue)) {
 			if (sc->waiting == 0) {
 				error = g_post_event(g_fox_select_path, gp,
 				    M_NOWAIT, gp, NULL);
 				if (error) {
 					g_destroy_bio(bp2);
 					bp2 = NULL;
 					g_io_deliver(bp, error);
 				} else {
 					sc->waiting = 1;
 				}
 			}
 			if (bp2 != NULL)
 				TAILQ_INSERT_TAIL(&sc->queue, bp2,
 				    bio_queue);
 		} else {
 			g_io_request(bp2, sc->path);
 		}
 		mtx_unlock(&sc->lock);
 		break;
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		break;
 	}
 	return;
 }
 
 static int
 g_fox_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_fox_softc *sc;
 	struct g_consumer *cp1;
 	int error;
 
 	g_topology_assert();
 	gp = pp->geom;
 	sc = gp->softc;
 	if (sc == NULL) {
 		if (dr <= 0 && dw <= 0 && de <= 0)
 			return (0);
 		else
 			return (ENXIO);
 	}
 
 	if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) {
 		/*
 		 * First open, open all consumers with an exclusive bit
 		 */
 		error = 0;
 		LIST_FOREACH(cp1, &gp->consumer, consumer) {
 			error = g_access(cp1, 0, 0, 1);
 			if (error) {
 				printf("FOX: access(%s,0,0,1) = %d\n",
 				    cp1->provider->name, error);
 				break;
 			}
 		}
 		if (error) {
 			LIST_FOREACH(cp1, &gp->consumer, consumer) {
 				if (cp1->ace)
 					g_access(cp1, 0, 0, -1);
 			}
 			return (error);
 		}
 	}
 	if (sc->path == NULL)
 		g_fox_select_path(gp, 0);
 	if (sc->path == NULL)
 		error = ENXIO;
 	else
 		error = g_access(sc->path, dr, dw, de);
 	if (error == 0) {
 		sc->cr += dr;
 		sc->cw += dw;
 		sc->ce += de;
 		if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) {
 			/*
 			 * Last close, remove e-bit on all consumers
 			 */
 			LIST_FOREACH(cp1, &gp->consumer, consumer)
 				g_access(cp1, 0, 0, -1);
 		}
 	}
 	return (error);
 }
 
 static struct g_geom *
 g_fox_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_geom *gp, *gp2;
 	struct g_provider *pp2;
 	struct g_consumer *cp, *cp2;
 	struct g_fox_softc *sc, *sc2;
 	int error;
 	u_int sectorsize;
 	u_char *buf;
 
 	g_trace(G_T_TOPOLOGY, "fox_taste(%s, %s)", mp->name, pp->name);
 	g_topology_assert();
 	if (!strcmp(pp->geom->class->name, mp->name))
 		return (NULL);
 	gp = g_new_geomf(mp, "%s.fox", pp->name);
 	gp->softc = g_malloc(sizeof(struct g_fox_softc), M_WAITOK | M_ZERO);
 	sc = gp->softc;
 
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_access(cp, 1, 0, 0);
 	if (error) {
 		g_free(sc);
 		g_detach(cp);
 		g_destroy_consumer(cp);	
 		g_destroy_geom(gp);
 		return(NULL);
 	}
 	do {
 		sectorsize = cp->provider->sectorsize;
 		g_topology_unlock();
 		buf = g_read_data(cp, 0, sectorsize, &error);
 		g_topology_lock();
-		if (buf == NULL || error != 0)
+		if (buf == NULL)
 			break;
 		if (memcmp(buf, FOX_MAGIC, strlen(FOX_MAGIC)))
 			break;
 
 		/*
 		 * First we need to see if this a new path for an existing fox.
 		 */
 		LIST_FOREACH(gp2, &mp->geom, geom) {
 			sc2 = gp2->softc;
 			if (sc2 == NULL)
 				continue;
 			if (memcmp(buf + 16, sc2->magic, sizeof sc2->magic))
 				continue;
 			break;
 		}
 		if (gp2 != NULL) {
 			/*
 			 * It was.  Create a new consumer for that fox,
 			 * attach it, and if the fox is open, open this
 			 * path with an exclusive count of one.
 			 */
 			printf("Adding path (%s) to fox (%s)\n",
 			    pp->name, gp2->name);
 			cp2 = g_new_consumer(gp2);
 			g_attach(cp2, pp);
 			pp2 = LIST_FIRST(&gp2->provider);
 			if (pp2->acr > 0 || pp2->acw > 0 || pp2->ace > 0) {
 				error = g_access(cp2, 0, 0, 1);
 				if (error) {
 					/*
 					 * This is bad, or more likely,
 					 * the user is doing something stupid
 					 */
 					printf(
 	"WARNING: New path (%s) to fox(%s) not added: %s\n%s",
 					    cp2->provider->name, gp2->name,
 	"Could not get exclusive bit.",
 	"WARNING: This indicates a risk of data inconsistency."
 					);
 					g_detach(cp2);
 					g_destroy_consumer(cp2);
 				}
 			}
 			break;
 		}
 		printf("Creating new fox (%s)\n", pp->name);
 		sc->path = cp;
 		memcpy(sc->magic, buf + 16, sizeof sc->magic);
 		pp2 = g_new_providerf(gp, "%s", gp->name);
 		pp2->mediasize = sc->mediasize = pp->mediasize - pp->sectorsize;
 		pp2->sectorsize = sc->sectorsize = pp->sectorsize;
 printf("fox %s lock %p\n", gp->name, &sc->lock);
 
 		mtx_init(&sc->lock, "fox queue", NULL, MTX_DEF);
 		TAILQ_INIT(&sc->queue);
 		g_error_provider(pp2, 0);
 	} while (0);
 	if (buf != NULL)
 		g_free(buf);
 	g_access(cp, -1, 0, 0);
 
 	if (!LIST_EMPTY(&gp->provider))
 		return (gp);
 
 	g_free(gp->softc);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	return (NULL);
 }
 
 static int
 g_fox_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 	struct g_fox_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 	mtx_destroy(&sc->lock);
 	g_free(gp->softc);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static struct g_class g_fox_class	= {
 	.name = FOX_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_fox_taste,
 	.destroy_geom = g_fox_destroy_geom,
 	.start = g_fox_start,
 	.spoiled = g_fox_orphan,
 	.orphan = g_fox_orphan,
 	.access= g_fox_access,
 };
 
 DECLARE_GEOM_CLASS(g_fox_class, g_fox);
Index: head/sys/geom/geom_gpt.c
===================================================================
--- head/sys/geom/geom_gpt.c	(revision 152966)
+++ head/sys/geom/geom_gpt.c	(revision 152967)
@@ -1,1167 +1,1167 @@
 /*-
  * Copyright (c) 2002, 2005 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/diskmbr.h>
 #include <sys/endian.h>
 #include <sys/gpt.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/uuid.h>
 #include <geom/geom.h>
 
 CTASSERT(offsetof(struct gpt_hdr, padding) == 92);
 CTASSERT(sizeof(struct gpt_ent) == 128);
 
 #define	G_GPT_TRACE(args)	/* g_trace args */
 
 /*
  * The GEOM GPT class. Nothing fancy...
  */
 static g_ctl_req_t g_gpt_ctlreq;
 static g_ctl_destroy_geom_t g_gpt_destroy_geom;
 static g_taste_t g_gpt_taste;
 
 static g_access_t g_gpt_access;
 static g_dumpconf_t g_gpt_dumpconf;
 static g_orphan_t g_gpt_orphan;
 static g_spoiled_t g_gpt_spoiled;
 static g_start_t g_gpt_start;
 
 static struct g_class g_gpt_class = {
 	.name = "GPT",
 	.version = G_VERSION,
 	/* Class methods. */
 	.ctlreq = g_gpt_ctlreq,
 	.destroy_geom = g_gpt_destroy_geom,
 	.taste = g_gpt_taste,
 	/* Geom methods. */
 	.access = g_gpt_access,
 	.dumpconf = g_gpt_dumpconf,
 	.orphan = g_gpt_orphan,
 	.spoiled = g_gpt_spoiled,
 	.start = g_gpt_start,
 };
 
 DECLARE_GEOM_CLASS(g_gpt_class, g_gpt);
 
 /*
  * The GEOM GPT instance data.
  */
 struct g_gpt_part {
 	LIST_ENTRY(g_gpt_part) parts;
 	struct g_provider *provider;
 	off_t		offset;
 	struct gpt_ent	ent;
 	int		index;
 };
 
 enum gpt_hdr_type {
 	GPT_HDR_PRIMARY,
 	GPT_HDR_SECONDARY,
 	GPT_HDR_COUNT
 };
 
 enum gpt_hdr_state {
 	GPT_HDR_UNKNOWN,
 	GPT_HDR_MISSING,
 	GPT_HDR_CORRUPT,
 	GPT_HDR_INVALID,
 	GPT_HDR_OK
 };
 
 struct g_gpt_softc {
 	LIST_HEAD(, g_gpt_part) parts;
 	struct gpt_hdr	hdr[GPT_HDR_COUNT];
 	enum gpt_hdr_state state[GPT_HDR_COUNT];
 };
 
 static struct uuid g_gpt_freebsd = GPT_ENT_TYPE_FREEBSD;
 static struct uuid g_gpt_freebsd_swap = GPT_ENT_TYPE_FREEBSD_SWAP;
 static struct uuid g_gpt_linux_swap = GPT_ENT_TYPE_LINUX_SWAP;
 static struct uuid g_gpt_unused = GPT_ENT_TYPE_UNUSED;
 
 /*
  * Support functions.
  */
 
 static void g_gpt_wither(struct g_geom *, int);
 
 static struct g_provider *
 g_gpt_ctl_add(struct gctl_req *req, const char *flags, struct g_geom *gp,
     struct uuid *type, uint64_t start, uint64_t end)
 {
 	struct g_provider *pp;
 	struct g_gpt_softc *softc;
 	struct g_gpt_part *last, *part;
 	int idx;
 
 	G_GPT_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name));
 	g_topology_assert();
 
 	pp = LIST_FIRST(&gp->consumer)->provider;
 	softc = gp->softc;
 
 	last = NULL;
 	idx = 0;
 	LIST_FOREACH(part, &softc->parts, parts) {
 		if (part->index == idx) {
 			idx = part->index + 1;
 			last = part;
 		}
 		/* XXX test for overlap */
 	}
 
 	part = g_malloc(sizeof(struct g_gpt_part), M_WAITOK | M_ZERO);
 	part->index = idx;
 	part->offset = start * pp->sectorsize;
 	if (last == NULL)
 		LIST_INSERT_HEAD(&softc->parts, part, parts);
 	else
 		LIST_INSERT_AFTER(last, part, parts);
 	part->ent.ent_type = *type;
 	kern_uuidgen(&part->ent.ent_uuid, 1);
 	part->ent.ent_lba_start = start;
 	part->ent.ent_lba_end = end;
 
 	/* XXX ent_attr */
 	/* XXX ent_name */
 
 	part->provider = g_new_providerf(gp, "%s%c%d", gp->name,
 	    !memcmp(type, &g_gpt_freebsd, sizeof(struct uuid)) ? 's' : 'p',
 	    idx + 1);
 	part->provider->index = idx;
 	part->provider->private = part;		/* Close the circle. */
 	part->provider->mediasize = (end - start + 1) * pp->sectorsize;
 	part->provider->sectorsize = pp->sectorsize;
 	part->provider->flags = pp->flags & G_PF_CANDELETE;
 	if (pp->stripesize > 0) {
 		part->provider->stripesize = pp->stripesize;
 		part->provider->stripeoffset =
 		    (pp->stripeoffset + part->offset) % pp->stripesize;
 	}
 	g_error_provider(part->provider, 0);
 
 	if (bootverbose) {
 		printf("GEOM: %s: partition ", part->provider->name);
 		printf_uuid(&part->ent.ent_uuid);
 		printf(".\n");
 	}
 
 	return (part->provider);
 }
 
 static struct g_geom *
 g_gpt_ctl_create(struct gctl_req *req, const char *flags, struct g_class *mp,
     struct g_provider *pp, uint32_t entries)
 {
 	struct uuid uuid;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	struct g_gpt_softc *softc;
 	struct gpt_hdr *hdr;
 	uint64_t last;
 	size_t tblsz;
 	int error, i;
 
 	G_GPT_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name));
 	g_topology_assert();
 
 	tblsz = (entries * sizeof(struct gpt_ent) + pp->sectorsize - 1) /
 	    pp->sectorsize;
 
 	/*
 	 * Sanity-check the size of the provider. This test is very similar
 	 * to the one in g_gpt_taste(). Here we want to make sure that the
 	 * size of the provider is large enough to hold a GPT that has the
 	 * requested number of entries, plus as many available sectors for
 	 * partitions of minimal size. The latter test is not exactly needed
 	 * but it helps keep the table size proportional to the media size.
 	 * Thus, a GPT with 128 entries must at least have 128 sectors of
 	 * usable partition space. Therefore, the absolute minimal size we
 	 * allow is (1 + 2 * (1 + 32) + 128) = 195 sectors. This is more
 	 * restrictive than what g_gpt_taste() requires.
 	 */
 	if (pp->sectorsize < 512 ||
 	    pp->sectorsize % sizeof(struct gpt_ent) != 0 ||
 	    pp->mediasize < (3 + 2 * tblsz + entries) * pp->sectorsize) {
 		gctl_error(req, "%d provider", ENOSPC);
 		return (NULL);
 	}
 
 	/* We don't nest. See also g_gpt_taste(). */
 	if (pp->geom->class == &g_gpt_class) {
 		gctl_error(req, "%d provider", ENODEV);
 		return (NULL);
 	}
 
 	/* Create a GEOM. */
 	gp = g_new_geomf(mp, "%s", pp->name);
 	softc = g_malloc(sizeof(struct g_gpt_softc), M_WAITOK | M_ZERO);
 	gp->softc = softc;
 	LIST_INIT(&softc->parts);
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	if (error == 0)
 		error = g_access(cp, 1, 0, 0);
 	if (error != 0) {
 		g_gpt_wither(gp, error);
 		gctl_error(req, "%d geom '%s'", error, pp->name);
 		return (NULL);
 	}
 
 	last = (pp->mediasize / pp->sectorsize) - 1;
 	kern_uuidgen(&uuid, 1);
 
 	/* Construct an in-memory GPT. */
 	for (i = GPT_HDR_PRIMARY; i < GPT_HDR_COUNT; i++) {
 		hdr = softc->hdr + i;
 		bcopy(GPT_HDR_SIG, hdr->hdr_sig, sizeof(hdr->hdr_sig));
 		hdr->hdr_revision = GPT_HDR_REVISION;
 		hdr->hdr_size = offsetof(struct gpt_hdr, padding);
 		hdr->hdr_lba_self = (i == GPT_HDR_PRIMARY) ? 1 : last;
 		hdr->hdr_lba_alt = (i == GPT_HDR_PRIMARY) ? last : 1;
 		hdr->hdr_lba_start = 2 + tblsz;
 		hdr->hdr_lba_end = last - (1 + tblsz);
 		hdr->hdr_uuid = uuid;
 		hdr->hdr_lba_table = (i == GPT_HDR_PRIMARY) ? 2 : last - tblsz;
 		hdr->hdr_entries = entries;
 		hdr->hdr_entsz = sizeof(struct gpt_ent);
 		softc->state[i] = GPT_HDR_OK;
 	}
 
 	if (0)
 		goto fail;
 
 	if (bootverbose) {
 		printf("GEOM: %s: GPT ", pp->name);
 		printf_uuid(&softc->hdr[GPT_HDR_PRIMARY].hdr_uuid);
 		printf(".\n");
 	}
 
 	g_access(cp, -1, 0, 0);
 	return (gp);
 
 fail:
 	g_access(cp, -1, 0, 0);
 	g_gpt_wither(gp, error);
 	gctl_error(req, "%d geom '%s'", error, pp->name);
 	return (NULL);
 }
 
 static void
 g_gpt_ctl_destroy(struct gctl_req *req, const char *flags, struct g_geom *gp)
 {
 }
 
 static void
 g_gpt_ctl_recover(struct gctl_req *req, const char *flags, struct g_geom *gp)
 {
 }
 
 static int
 g_gpt_has_pmbr(struct g_consumer *cp, int *error)
 {
 	struct dos_partition *part;
 	char *buf;
 	int i, pmbr;
 	uint16_t magic;
 
 	buf = g_read_data(cp, 0L, cp->provider->sectorsize, error);
-	if (*error != 0)
+	if (buf == NULL)
 		return (0);
 
 	pmbr = 0;
 
 	magic = le16toh(*(uint16_t *)(uintptr_t)(buf + DOSMAGICOFFSET));
 	if (magic != DOSMAGIC)
 		goto out;
 
 	part = (struct dos_partition *)(uintptr_t)(buf + DOSPARTOFF);
 	for (i = 0; i < 4; i++) {
 		if (part[i].dp_typ != 0 && part[i].dp_typ != DOSPTYP_PMBR)
 			goto out;
 	}
 
 	pmbr = 1;
 
 out:
 	g_free(buf);
 	return (pmbr);
 }
 
 static void
 g_gpt_load_hdr(struct g_gpt_softc *softc, struct g_provider *pp,
     enum gpt_hdr_type type, void *buf)
 {
 	struct uuid uuid;
 	struct gpt_hdr *hdr;
 	uint64_t lba, last;
 	uint32_t crc, sz;
 
 	softc->state[type] = GPT_HDR_MISSING;
 
 	hdr = softc->hdr + type;
 	bcopy(buf, hdr, sizeof(*hdr));
 	if (memcmp(hdr->hdr_sig, GPT_HDR_SIG, sizeof(hdr->hdr_sig)) != 0)
 		return;
 
 	softc->state[type] = GPT_HDR_CORRUPT;
 
 	sz = le32toh(hdr->hdr_size);
 	if (sz < 92 || sz > pp->sectorsize)
 		return;
 	crc = le32toh(hdr->hdr_crc_self);
 	hdr->hdr_crc_self = 0;
 	if (crc32(hdr, sz) != crc)
 		return;
 	hdr->hdr_size = sz;
 	hdr->hdr_crc_self = crc;
 
 	softc->state[type] = GPT_HDR_INVALID;
 
 	last = (pp->mediasize / pp->sectorsize) - 1;
 	hdr->hdr_revision = le32toh(hdr->hdr_revision);
 	if (hdr->hdr_revision < 0x00010000)
 		return;
 	hdr->hdr_lba_self = le64toh(hdr->hdr_lba_self);
 	if (hdr->hdr_lba_self != (type == GPT_HDR_PRIMARY ? 1 : last))
 		return;
 	hdr->hdr_lba_alt = le64toh(hdr->hdr_lba_alt);
 	if (hdr->hdr_lba_alt != (type == GPT_HDR_PRIMARY ? last : 1))
 		return;
 
 	/* Check the managed area. */
 	hdr->hdr_lba_start = le64toh(hdr->hdr_lba_start);
 	if (hdr->hdr_lba_start < 2 || hdr->hdr_lba_start >= last)
 		return;
 	hdr->hdr_lba_end = le64toh(hdr->hdr_lba_end);
 	if (hdr->hdr_lba_end < hdr->hdr_lba_start || hdr->hdr_lba_end >= last)
 		return;
 
 	/* Check the table location and size of the table. */
 	hdr->hdr_entries = le32toh(hdr->hdr_entries);
 	hdr->hdr_entsz = le32toh(hdr->hdr_entsz);
 	if (hdr->hdr_entries == 0 || hdr->hdr_entsz < 128 ||
 	    (hdr->hdr_entsz & 7) != 0)
 		return;
 	hdr->hdr_lba_table = le64toh(hdr->hdr_lba_table);
 	if (hdr->hdr_lba_table < 2 || hdr->hdr_lba_table >= last)
 		return;
 	if (hdr->hdr_lba_table >= hdr->hdr_lba_start &&
 	    hdr->hdr_lba_table <= hdr->hdr_lba_end)
 		return;
 	lba = hdr->hdr_lba_table +
 	    (hdr->hdr_entries * hdr->hdr_entsz + pp->sectorsize - 1) /
 	    pp->sectorsize - 1;
 	if (lba >= last)
 		return;
 	if (lba >= hdr->hdr_lba_start && lba <= hdr->hdr_lba_end)
 		return;
 
 	softc->state[type] = GPT_HDR_OK;
 
 	le_uuid_dec(&hdr->hdr_uuid, &uuid);
 	hdr->hdr_uuid = uuid;
 	hdr->hdr_crc_table = le32toh(hdr->hdr_crc_table);
 }
 
 static void
 g_gpt_load_tbl(struct g_geom *gp, struct g_provider *pp, struct gpt_hdr *hdr,
     char *tbl)
 {
 	struct uuid uuid;
 	struct gpt_ent *ent;
 	struct g_gpt_part *last, *part;
 	struct g_gpt_softc *softc;
 	uint64_t part_start, part_end;
 	unsigned int ch, idx;
 
 	softc = gp->softc;
 
 	for (idx = 0, last = part = NULL;
 	     idx < hdr->hdr_entries;
 	     idx++, last = part, tbl += hdr->hdr_entsz) {
 		ent = (struct gpt_ent *)(uintptr_t)tbl;
 		le_uuid_dec(&ent->ent_type, &uuid);
 		if (!memcmp(&uuid, &g_gpt_unused, sizeof(struct uuid)))
 			continue;
 		part_start = le64toh(ent->ent_lba_start);
 		part_end = le64toh(ent->ent_lba_end);
 		if (part_start < hdr->hdr_lba_start || part_start > part_end ||
 		    part_end > hdr->hdr_lba_end) {
 			printf("GEOM: %s: GPT partition %d is invalid -- "
 			    "ignored.\n", gp->name, idx + 1);
 			continue;
 		}
 
 		part = g_malloc(sizeof(struct g_gpt_part), M_WAITOK | M_ZERO);
 		part->index = idx;
 		part->offset = part_start * pp->sectorsize;
 		if (last == NULL)
 			LIST_INSERT_HEAD(&softc->parts, part, parts);
 		else
 			LIST_INSERT_AFTER(last, part, parts);
 		part->ent.ent_type = uuid;
 		le_uuid_dec(&ent->ent_uuid, &part->ent.ent_uuid);
 		part->ent.ent_lba_start = part_start;
 		part->ent.ent_lba_end = part_end;
 		part->ent.ent_attr = le64toh(ent->ent_attr);
 		for (ch = 0; ch < sizeof(ent->ent_name)/2; ch++)
 			part->ent.ent_name[ch] = le16toh(ent->ent_name[ch]);
 
 		g_topology_lock();
 		part->provider = g_new_providerf(gp, "%s%c%d", gp->name,
 		    !memcmp(&uuid, &g_gpt_freebsd, sizeof(struct uuid))
 		    ? 's' : 'p', idx + 1);
 		part->provider->index = idx;
 		part->provider->private = part;		/* Close the circle. */
 		part->provider->mediasize = (part_end - part_start + 1) *
 		    pp->sectorsize;
 		part->provider->sectorsize = pp->sectorsize;
 		part->provider->flags = pp->flags & G_PF_CANDELETE;
 		if (pp->stripesize > 0) {
 			part->provider->stripesize = pp->stripesize;
 			part->provider->stripeoffset =
 			    (pp->stripeoffset + part->offset) % pp->stripesize;
 		}
 		g_error_provider(part->provider, 0);
 		g_topology_unlock();
 
 		if (bootverbose) {
 			printf("GEOM: %s: partition ", part->provider->name);
 			printf_uuid(&part->ent.ent_uuid);
 			printf(".\n");
 		}
 	}
 }
 
 static int
 g_gpt_matched_hdrs(struct gpt_hdr *pri, struct gpt_hdr *sec)
 {
 
 	if (memcmp(&pri->hdr_uuid, &sec->hdr_uuid, sizeof(struct uuid)) != 0)
 		return (0);
 	return ((pri->hdr_revision == sec->hdr_revision &&
 	    pri->hdr_size == sec->hdr_size &&
 	    pri->hdr_lba_start == sec->hdr_lba_start &&
 	    pri->hdr_lba_end == sec->hdr_lba_end &&
 	    pri->hdr_entries == sec->hdr_entries &&
 	    pri->hdr_entsz == sec->hdr_entsz &&
 	    pri->hdr_crc_table == sec->hdr_crc_table) ? 1 : 0);
 }
 
 static int
 g_gpt_tbl_ok(struct gpt_hdr *hdr, char *tbl)
 {
 	size_t sz;
 	uint32_t crc;
 
 	crc = hdr->hdr_crc_table;
 	sz = hdr->hdr_entries * hdr->hdr_entsz;
 	return ((crc32(tbl, sz) == crc) ? 1 : 0);
 }
 
 static void
 g_gpt_to_utf8(struct sbuf *sb, uint16_t *str, size_t len)
 {
 	u_int bo;
 	uint32_t ch;
 	uint16_t c;
 
 	bo = BYTE_ORDER;
 	while (len > 0 && *str != 0) {
 		ch = (bo == BIG_ENDIAN) ? be16toh(*str) : le16toh(*str);
 		str++, len--;
 		if ((ch & 0xf800) == 0xd800) {
 			if (len > 0) {
 				c = (bo == BIG_ENDIAN) ? be16toh(*str)
 				    : le16toh(*str);
 				str++, len--;
 			} else
 				c = 0xfffd;
 			if ((ch & 0x400) == 0 && (c & 0xfc00) == 0xdc00) {
 				ch = ((ch & 0x3ff) << 10) + (c & 0x3ff);
 				ch += 0x10000;
 			} else
 				ch = 0xfffd;
 		} else if (ch == 0xfffe) { /* BOM (U+FEFF) swapped. */
 			bo = (bo == BIG_ENDIAN) ? LITTLE_ENDIAN : BIG_ENDIAN;
 			continue;
 		} else if (ch == 0xfeff) /* BOM (U+FEFF) unswapped. */
 			continue;
 
 		if (ch < 0x80)
 			sbuf_printf(sb, "%c", ch);
 		else if (ch < 0x800)
 			sbuf_printf(sb, "%c%c", 0xc0 | (ch >> 6),
 			    0x80 | (ch & 0x3f));
 		else if (ch < 0x10000)
 			sbuf_printf(sb, "%c%c%c", 0xe0 | (ch >> 12),
 			    0x80 | ((ch >> 6) & 0x3f), 0x80 | (ch & 0x3f));
 		else if (ch < 0x200000)
 			sbuf_printf(sb, "%c%c%c%c", 0xf0 | (ch >> 18),
 			    0x80 | ((ch >> 12) & 0x3f),
 			    0x80 | ((ch >> 6) & 0x3f), 0x80 | (ch & 0x3f));
 	}
 }
 
 static void
 g_gpt_wither(struct g_geom *gp, int error)
 {
 	struct g_gpt_part *part;
 	struct g_gpt_softc *softc;
 
 	softc = gp->softc;
 	if (softc != NULL) {
 		part = LIST_FIRST(&softc->parts);
 		while (part != NULL) {
 			LIST_REMOVE(part, parts);
 			g_free(part);
 			part = LIST_FIRST(&softc->parts);
 		}
 		g_free(softc);
 		gp->softc = NULL;
 	}
 	g_wither_geom(gp, error);
 }
 
 /*
  * Class methods.
  */
 
 static void
 g_gpt_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	struct uuid type;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_gpt_softc *softc;
 	const char *flags;
 	char const *s;
 	uint64_t start, end;
 	long entries;
 	int error;
 
 	G_GPT_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, verb));
 	g_topology_assert();
 
 	/*
 	 * All verbs take an optional flags parameter. The flags parameter
 	 * is a string with each letter an independent flag. Each verb has
 	 * it's own set of valid flags and the meaning of the flags is
 	 * specific to the verb. Typically the presence of a letter (=flag)
 	 * in the string means true and the absence means false.
 	 */
 	s = gctl_get_asciiparam(req, "flags");
 	flags = (s == NULL) ? "" : s;
 
 	/*
 	 * Only the create verb takes a provider parameter. Make this a
 	 * special case so that more code sharing is possible for the
 	 * common case.
 	 */
 	if (!strcmp(verb, "create")) {
 		/*
 		 * Create a GPT on a pristine disk-like provider.
 		 *	Required parameters/attributes:
 		 *		provider
 		 *	Optional parameters/attributes:
 		 *		entries
 		 */
 		s = gctl_get_asciiparam(req, "provider");
 		if (s == NULL) {
 			gctl_error(req, "%d provider", ENOATTR);
 			return;
 		}
 		pp = g_provider_by_name(s);
 		if (pp == NULL) {
 			gctl_error(req, "%d provider '%s'", EINVAL, s);
 			return;
 		}
 		/* Check that there isn't already a GPT on the provider. */
 		LIST_FOREACH(gp, &mp->geom, geom) {
 			if (!strcmp(s, gp->name)) {
 				gctl_error(req, "%d geom '%s'", EEXIST, s);
 				return;
                         }
 		}
 		s = gctl_get_asciiparam(req, "entries");
 		if (s != NULL) {
 			entries = strtol(s, (char **)(uintptr_t)&s, 0);
 			if (entries < 128 || *s != '\0') {
 				gctl_error(req, "%d entries %ld", EINVAL,
 				    entries);
 				return;
 			}
 		} else
 			entries = 128;	/* Documented mininum */
 		gp = g_gpt_ctl_create(req, flags, mp, pp, entries);
 		return;
 	}
 
 	/*
 	 * All but the create verb, which is handled above, operate on an
 	 * existing GPT geom. The geom parameter is non-optional, so get
 	 * it here first.
 	 */
 	s = gctl_get_asciiparam(req, "geom");
 	if (s == NULL) {
 		gctl_error(req, "%d geom", ENOATTR);
 		return;
 	}
 	/* Get the GPT geom with the given name. */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		if (!strcmp(s, gp->name))
 			break;
 	}
 	if (gp == NULL) {
 		gctl_error(req, "%d geom '%s'", EINVAL, s);
 		return;
 	}
 	softc = gp->softc;
 
 	/*
 	 * Now handle the verbs that can operate on a downgraded or
 	 * partially corrupted GPT. In particular these are the verbs
 	 * that don't deal with the table entries. We implement the
 	 * policy that all table entry related requests require a
 	 * valid GPT.
 	 */
 	if (!strcmp(verb, "destroy")) {
 		/*
 		 * Destroy a GPT completely.
 		 */
 		g_gpt_ctl_destroy(req, flags, gp);
 		return;
 	} else if (!strcmp(verb, "recover")) {
 		/*
 		 * Recover a downgraded GPT.
 		 */
 		g_gpt_ctl_recover(req, flags, gp);
 		return;
 	}
 
 	/*
 	 * Check that the GPT is complete and valid before we make changes
 	 * to the table entries.
 	 */
 	if (softc->state[GPT_HDR_PRIMARY] != GPT_HDR_OK ||
 	    softc->state[GPT_HDR_SECONDARY] != GPT_HDR_OK) {
 		gctl_error(req, "%d geom '%s'", ENXIO, s);
 		return;
 	}
 
 	if (!strcmp(verb, "add")) {
 		/*
 		 * Add a partition entry to a GPT.
 		 *	Required parameters/attributes:
 		 *		type
 		 *		start
 		 *		end
 		 *	Optional parameters/attributes:
 		 *		label
 		 */
 		s = gctl_get_asciiparam(req, "type");
 		if (s == NULL) {
 			gctl_error(req, "%d type", ENOATTR);
 			return;
 		}
 		error = parse_uuid(s, &type);
 		if (error != 0) {
 			gctl_error(req, "%d type '%s'", error, s);
 			return;
 		}
 		s = gctl_get_asciiparam(req, "start");
 		if (s == NULL) {
 			gctl_error(req, "%d start", ENOATTR);
 			return;
 		}
 		start = strtoq(s, (char **)(uintptr_t)&s, 0);
 		if (start < softc->hdr[GPT_HDR_PRIMARY].hdr_lba_start ||
 		    start > softc->hdr[GPT_HDR_PRIMARY].hdr_lba_end ||
 		    *s != '\0') {
 			gctl_error(req, "%d start %jd", EINVAL,
 			    (intmax_t)start);
 			return;
 		}
 		s = gctl_get_asciiparam(req, "end");
 		if (s == NULL) {
 			gctl_error(req, "%d end", ENOATTR);
 			return;
 		}
 		end = strtoq(s, (char **)(uintptr_t)&s, 0);
 		if (end < start ||
 		    end > softc->hdr[GPT_HDR_PRIMARY].hdr_lba_end ||
 		    *s != '\0') {
 			gctl_error(req, "%d end %jd", EINVAL,
 			    (intmax_t)end);
 			return;
 		}
 		pp = g_gpt_ctl_add(req, flags, gp, &type, start, end);
 		return;
 	} else if (!strcmp(verb, "modify")) {
 		/* Modify a partition entry. */
 		return;
 	} else if (!strcmp(verb, "remove")) {
 		/* Remove a partition entry from a GPT. */
 		return;
 	}
 
 	gctl_error(req, "%d verb '%s'", EINVAL, verb);
 }
 
 static int
 g_gpt_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp)
 {
 
 	G_GPT_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, gp->name));
 	g_topology_assert();
 
 	g_gpt_wither(gp, EINVAL);
 	return (0);
 }
 
 static struct g_geom *
 g_gpt_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
 {
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	struct g_gpt_softc *softc;
 	struct gpt_hdr *hdr;
 	void *buf;
 	off_t ofs;
 	size_t nbytes;
 	int error;
 
 	G_GPT_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name));
 	g_topology_assert();
 
 	/*
 	 * Sanity-check the provider. Since the first sector on the provider
 	 * must be a PMBR and a PMBR is 512 bytes large, the sector size must
 	 * be at least 512 bytes. We also require that the sector size is a
 	 * multiple of the GPT entry size (which is 128 bytes).
 	 * Also, since the theoretical minimum number of sectors needed by
 	 * GPT is 6, any medium that has less than 6 sectors is never going
 	 * to hold a GPT. The number 6 comes from:
 	 *	1 sector for the PMBR
 	 *	2 sectors for the GPT headers (each 1 sector)
 	 *	2 sectors for the GPT tables (each 1 sector)
 	 *	1 sector for an actual partition
 	 * It's better to catch this pathological case early than behaving
 	 * pathologically later on by panicing...
 	 */
 	if (pp->sectorsize < 512 ||
 	    pp->sectorsize % sizeof(struct gpt_ent) != 0 ||
 	    pp->mediasize < 6 * pp->sectorsize)
 		return (NULL);
 
 	/*
 	 * We don't nest. That is, we disallow nesting a GPT inside a GPT
 	 * partition. We check only for direct nesting. Indirect nesting is
 	 * not easy to determine. If you want, you can therefore nest GPT
 	 * partitions by putting a dummy GEOM in between them. But I didn't
 	 * say that...
 	 */
 	if (pp->geom->class == &g_gpt_class)
 		return (NULL);
 
 	/*
 	 * Create a GEOM with consumer and hook it up to the provider.
 	 * With that we become part of the topology. Optain read, write
 	 * and exclusive access to the provider.
 	 */
 	gp = g_new_geomf(mp, "%s", pp->name);
 	softc = g_malloc(sizeof(struct g_gpt_softc), M_WAITOK | M_ZERO);
 	gp->softc = softc;
 	LIST_INIT(&softc->parts);
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	if (error == 0)
 		error = g_access(cp, 1, 0, 0);
 	if (error != 0) {
 		g_gpt_wither(gp, error);
 		return (NULL);
 	}
 
 	g_topology_unlock();
 
 	/*
 	 * Read both the primary and secondary GPT headers.  We have all
 	 * the information at our fingertips that way to determine if
 	 * there's a GPT, including whether recovery is appropriate.
 	 */
 	buf = g_read_data(cp, pp->sectorsize, pp->sectorsize, &error);
-	if (error != 0)
+	if (buf == NULL)
 		goto fail;
 	g_gpt_load_hdr(softc, pp, GPT_HDR_PRIMARY, buf);
 	g_free(buf);
 
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
-	if (error != 0)
+	if (buf == NULL)
 		goto fail;
 	g_gpt_load_hdr(softc, pp, GPT_HDR_SECONDARY, buf);
 	g_free(buf);
 
 	/* Bail out if there are no GPT headers at all. */
 	if (softc->state[GPT_HDR_PRIMARY] == GPT_HDR_MISSING &&
 	    softc->state[GPT_HDR_SECONDARY] == GPT_HDR_MISSING) {
 		error = ENXIO;		/* Device not configured for GPT. */
 		goto fail;
 	}
 
 	/*
 	 * We have at least one GPT header (though that one may be corrupt
 	 * or invalid). This disk supposedly has GPT in some shape or form.
 	 * First check that there's a protective MBR. Complain if there
 	 * is none and fail.
 	 */
 	if (!g_gpt_has_pmbr(cp, &error)) {
 		printf("GEOM: %s: GPT detected, but no protective MBR.\n",
 		    pp->name);
 		error = ENXIO;
 		goto fail;
 	}
 
 	/*
 	 * Now, catch the non-recoverable case where there's no good GPT
 	 * header at all. That is, unrecoverable by us. The user may able
 	 * to fix it up with some magic.
 	 */
 	if (softc->state[GPT_HDR_PRIMARY] != GPT_HDR_OK &&
 	    softc->state[GPT_HDR_SECONDARY] != GPT_HDR_OK) {
 		printf("GEOM: %s: corrupt or invalid GPT detected.\n",
 		    pp->name);
 		printf("GEOM: %s: GPT rejected -- may not be recoverable.\n",
 		    pp->name);
 		error = EINVAL;		/* No valid GPT header exists. */
 		goto fail;
 	}
 
 	/*
 	 * Ok, at least one header is good. We can use the GPT. If there's
 	 * a corrupt or invalid header, we'd like to user to know about it.
 	 * Also catch the case where both headers appear to be good but are
 	 * not mirroring each other. We only check superficially for that.
 	 */
 	if (softc->state[GPT_HDR_PRIMARY] != GPT_HDR_OK) {
 		printf("GEOM: %s: the primary GPT header is corrupt or "
 		    "invalid.\n", pp->name);
 		printf("GEOM: %s: using the secondary instead -- recovery "
 		    "strongly advised.\n", pp->name);
 	} else if (softc->state[GPT_HDR_SECONDARY] != GPT_HDR_OK) {
 		printf("GEOM: %s: the secondary GPT header is corrupt or "
 		    "invalid.\n", pp->name);
 		printf("GEOM: %s: using the primary only -- recovery "
 		    "suggested.\n", pp->name);
 	} else if (!g_gpt_matched_hdrs(softc->hdr + GPT_HDR_PRIMARY,
 	    softc->hdr + GPT_HDR_SECONDARY)) {
 		printf("GEOM: %s: the primary and secondary GPT header do "
 		    "not agree.\n", pp->name);
 		printf("GEOM: %s: GPT rejected -- recovery required.\n",
 		    pp->name);
 		error = EINVAL;		/* No consistent GPT exists. */
 		goto fail;
 	}
 
 	/* Always prefer the primary header. */
 	hdr = (softc->state[GPT_HDR_PRIMARY] == GPT_HDR_OK)
 	    ? softc->hdr + GPT_HDR_PRIMARY : softc->hdr + GPT_HDR_SECONDARY;
 
 	/*
 	 * Now that we've got a GPT header, we have to deal with the table
 	 * itself. Again there's a primary table and a secondary table and
 	 * either or both may be corrupt or invalid. Redundancy is nice,
 	 * but it's a combinatorial pain in the butt.
 	 */
 
 	nbytes = ((hdr->hdr_entries * hdr->hdr_entsz + pp->sectorsize - 1) /
 	    pp->sectorsize) * pp->sectorsize;
 
 	ofs = hdr->hdr_lba_table * pp->sectorsize;
 	buf = g_read_data(cp, ofs, nbytes, &error);
-	if (error != 0)
+	if (buf == NULL)
 		goto fail;
 
 	/*
 	 * If the table is corrupt, check if we can use the other one.
 	 * Complain and bail if not.
 	 */
 	if (!g_gpt_tbl_ok(hdr, buf)) {
 		g_free(buf);
 		if (hdr != softc->hdr + GPT_HDR_PRIMARY ||
 		    softc->state[GPT_HDR_SECONDARY] != GPT_HDR_OK) {
 			printf("GEOM: %s: the GPT table is corrupt -- "
 			    "may not be recoverable.\n", pp->name);
 			goto fail;
 		}
 		softc->state[GPT_HDR_PRIMARY] = GPT_HDR_CORRUPT;
 		hdr = softc->hdr + GPT_HDR_SECONDARY;
 		ofs = hdr->hdr_lba_table * pp->sectorsize;
 		buf = g_read_data(cp, ofs, nbytes, &error);
-		if (error != 0)
+		if (buf == NULL)
 			goto fail;
 
 		if (!g_gpt_tbl_ok(hdr, buf)) {
 			g_free(buf);
 			printf("GEOM: %s: both primary and secondary GPT "
 			    "tables are corrupt.\n", pp->name);
 			printf("GEOM: %s: GPT rejected -- may not be "
 			    "recoverable.\n", pp->name);
 			goto fail;
 		}
 		printf("GEOM: %s: the primary GPT table is corrupt.\n",
 		    pp->name);
 		printf("GEOM: %s: using the secondary table -- recovery "
 		    "strongly advised.\n", pp->name);
 	}
 
 	if (bootverbose) {
 		printf("GEOM: %s: GPT ", pp->name);
 		printf_uuid(&hdr->hdr_uuid);
 		printf(".\n");
 	}
 
 	g_gpt_load_tbl(gp, pp, hdr, buf);
 	g_free(buf);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	return (gp);
 
  fail:
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	g_gpt_wither(gp, error);
 	return (NULL);
 }
 
 /*
  * Geom methods.
  */
 
 static int
 g_gpt_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *cp;
 
 	G_GPT_TRACE((G_T_ACCESS, "%s(%s,%d,%d,%d)", __func__, pp->name, dr,
 	    dw, de));
 
 	cp = LIST_FIRST(&pp->geom->consumer);
 
 	/* We always gain write-exclusive access. */
 	return (g_access(cp, dr, dw, dw + de));
 }
 
 static void
 g_gpt_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	static char *status[5] = {
 		"unknown", "missing", "corrupt", "invalid", "ok"
 	};
 	struct g_gpt_part *part;
 	struct g_gpt_softc *softc;
 	struct gpt_hdr *hdr;
 
 	KASSERT(sb != NULL && gp != NULL, (__func__));
 
 	if (indent == NULL) {
 		KASSERT(cp == NULL && pp != NULL, (__func__));
 		part = pp->private;
 		sbuf_printf(sb, " i %u o %ju ty ", pp->index,
 		    (uintmax_t)part->offset);
 		sbuf_printf_uuid(sb, &part->ent.ent_type);
 	} else if (cp != NULL) {	/* Consumer configuration. */
 		KASSERT(pp == NULL, (__func__));
 		/* none */
 	} else if (pp != NULL) {	/* Provider configuration. */
 		part = pp->private;
 		sbuf_printf(sb, "%s<index>%u</index>\n", indent, pp->index);
 		sbuf_printf(sb, "%s<type>", indent);
 		sbuf_printf_uuid(sb, &part->ent.ent_type);
 		sbuf_printf(sb, "</type>\n");
 		sbuf_printf(sb, "%s<uuid>", indent);
 		sbuf_printf_uuid(sb, &part->ent.ent_uuid);
 		sbuf_printf(sb, "</uuid>\n");
 		sbuf_printf(sb, "%s<offset>%ju</offset>\n", indent,
 		    (uintmax_t)part->offset);
 		sbuf_printf(sb, "%s<length>%ju</length>\n", indent,
 		    (uintmax_t)pp->mediasize);
 		sbuf_printf(sb, "%s<attr>%ju</attr>\n", indent,
 		    (uintmax_t)part->ent.ent_attr);
 		sbuf_printf(sb, "%s<label>", indent);
 		g_gpt_to_utf8(sb, part->ent.ent_name,
 		    sizeof(part->ent.ent_name)/2);
 		sbuf_printf(sb, "</label>\n");
 	} else {			/* Geom configuration. */
 		softc = gp->softc;
 		hdr = (softc->state[GPT_HDR_PRIMARY] == GPT_HDR_OK)
 		    ? softc->hdr + GPT_HDR_PRIMARY
 		    : softc->hdr + GPT_HDR_SECONDARY;
 		sbuf_printf(sb, "%s<uuid>", indent);
 		sbuf_printf_uuid(sb, &hdr->hdr_uuid);
 		sbuf_printf(sb, "</uuid>\n");
 		sbuf_printf(sb, "%s<primary>%s</primary>\n", indent,
 		    status[softc->state[GPT_HDR_PRIMARY]]);
 		sbuf_printf(sb, "%s<secondary>%s</secondary>\n", indent,
 		    status[softc->state[GPT_HDR_SECONDARY]]);
 		sbuf_printf(sb, "%s<selected>%s</selected>\n", indent,
 		    (hdr == softc->hdr + GPT_HDR_PRIMARY) ? "primary" :
 		    "secondary");
 		sbuf_printf(sb, "%s<revision>%u</revision>\n", indent,
 		    hdr->hdr_revision);
 		sbuf_printf(sb, "%s<header_size>%u</header_size>\n", indent,
 		    hdr->hdr_size);
 		sbuf_printf(sb, "%s<crc_self>%u</crc_self>\n", indent,
 		    hdr->hdr_crc_self);
 		sbuf_printf(sb, "%s<lba_self>%ju</lba_self>\n", indent,
 		    (uintmax_t)hdr->hdr_lba_self);
 		sbuf_printf(sb, "%s<lba_other>%ju</lba_other>\n", indent,
 		    (uintmax_t)hdr->hdr_lba_alt);
 		sbuf_printf(sb, "%s<lba_start>%ju</lba_start>\n", indent,
 		    (uintmax_t)hdr->hdr_lba_start);
 		sbuf_printf(sb, "%s<lba_end>%ju</lba_end>\n", indent,
 		    (uintmax_t)hdr->hdr_lba_end);
 		sbuf_printf(sb, "%s<lba_table>%ju</lba_table>\n", indent,
 		    (uintmax_t)hdr->hdr_lba_table);
 		sbuf_printf(sb, "%s<crc_table>%u</crc_table>\n", indent,
 		    hdr->hdr_crc_table);
 		sbuf_printf(sb, "%s<entries>%u</entries>\n", indent,
 		    hdr->hdr_entries);
 		sbuf_printf(sb, "%s<entry_size>%u</entry_size>\n", indent,
 		    hdr->hdr_entsz);
 	}
 }
 
 static void
 g_gpt_orphan(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 
 	pp = cp->provider;
 	KASSERT(pp != NULL, (__func__));
 	G_GPT_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, pp->name));
 	g_topology_assert();
 
 	KASSERT(pp->error != 0, (__func__));
         g_gpt_wither(cp->geom, pp->error);
 }
 
 static void
 g_gpt_spoiled(struct g_consumer *cp)
 {
 
 	G_GPT_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name));
 	g_topology_assert();
 
 	g_gpt_wither(cp->geom, ENXIO);
 }
 
 static void
 g_gpt_start(struct bio *bp)
 {
 	struct bio *bp2;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	struct g_gpt_part *part;
 	struct g_kerneldump *gkd;
 	struct g_provider *pp;
 
 	pp = bp->bio_to;
 	gp = pp->geom;
 	part = pp->private;
 	cp = LIST_FIRST(&gp->consumer);
 
 	G_GPT_TRACE((G_T_BIO, "%s: cmd=%d, provider=%s", __func__, bp->bio_cmd,
 	    pp->name));
 
 	switch(bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		if (bp->bio_offset >= pp->mediasize) {
 			g_io_deliver(bp, EIO);
 			break;
 		}
 		bp2 = g_clone_bio(bp);
 		if (bp2 == NULL) {
 			g_io_deliver(bp, ENOMEM);
 			break;
 		}
 		if (bp2->bio_offset + bp2->bio_length > pp->mediasize)
 			bp2->bio_length = pp->mediasize - bp2->bio_offset;
 		bp2->bio_done = g_std_done;
 		bp2->bio_offset += part->offset;
 		g_io_request(bp2, cp);
 		break;
 	case BIO_GETATTR:
 		if (!strcmp("GEOM::kerneldump", bp->bio_attribute)) {
 			/*
 			 * Refuse non-swap partitions to be used as kernel
 			 * dumps.
 			 */
 			if (memcmp(&part->ent.ent_type, &g_gpt_freebsd_swap,
 			    sizeof(struct uuid)) && memcmp(&part->ent.ent_type,
 				&g_gpt_linux_swap, sizeof(struct uuid))) {
 				g_io_deliver(bp, ENXIO);
 				break;
 			}
 			gkd = (struct g_kerneldump *)bp->bio_data;
 			if (gkd->offset >= pp->mediasize) {
 				g_io_deliver(bp, EIO);
 				break;
 			}
 			if (gkd->offset + gkd->length > pp->mediasize)
 				gkd->length = pp->mediasize - gkd->offset;
 			gkd->offset += part->offset;
 			/* FALLTHROUGH */
 		}
 		bp2 = g_clone_bio(bp);
 		if (bp2 == NULL) {
 			g_io_deliver(bp, ENOMEM);
 			break;
 		}
 		bp2->bio_done = g_std_done;
 		g_io_request(bp2, cp);
 		break;
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		break;
 	}
 }
Index: head/sys/geom/geom_mbr.c
===================================================================
--- head/sys/geom/geom_mbr.c	(revision 152966)
+++ head/sys/geom/geom_mbr.c	(revision 152967)
@@ -1,518 +1,518 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/md5.h>
 
 #include <sys/diskmbr.h>
 #include <sys/sbuf.h>
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 
 #define MBR_CLASS_NAME "MBR"
 #define MBREXT_CLASS_NAME "MBREXT"
 
 static struct dos_partition historical_bogus_partition_table[NDOSPART] = {
         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
         { 0x80, 0, 1, 0, DOSPTYP_386BSD, 255, 255, 255, 0, 50000, },
 };
 
 static struct dos_partition historical_bogus_partition_table_fixed[NDOSPART] = {
         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
         { 0x80, 0, 1, 0, DOSPTYP_386BSD, 254, 255, 255, 0, 50000, },
 };
 
 static void
 g_mbr_print(int i, struct dos_partition *dp)
 {
 
 	printf("[%d] f:%02x typ:%d", i, dp->dp_flag, dp->dp_typ);
 	printf(" s(CHS):%d/%d/%d", DPCYL(dp->dp_scyl, dp->dp_ssect),
 	    dp->dp_shd, DPSECT(dp->dp_ssect));
 	printf(" e(CHS):%d/%d/%d", DPCYL(dp->dp_ecyl, dp->dp_esect),
 	    dp->dp_ehd, DPSECT(dp->dp_esect));
 	printf(" s:%d l:%d\n", dp->dp_start, dp->dp_size);
 }
 
 struct g_mbr_softc {
 	int		type [NDOSPART];
 	u_int		sectorsize;
 	u_char		sec0[512];
 	u_char		slicesum[16];
 };
 
 /*
  * XXX: Add gctl_req arg and give good error msgs.
  * XXX: Check that length argument does not bring boot code inside any slice.
  */
 static int
 g_mbr_modify(struct g_geom *gp, struct g_mbr_softc *ms, u_char *sec0, int len __unused)
 {
 	int i, error;
 	off_t l[NDOSPART];
 	struct dos_partition ndp[NDOSPART], *dp;
 	MD5_CTX md5sum;
 
 	g_topology_assert();
 
 	if (sec0[0x1fe] != 0x55 && sec0[0x1ff] != 0xaa)
 		return (EBUSY);
 
 	dp = ndp;
 	for (i = 0; i < NDOSPART; i++) {
 		dos_partition_dec(
 		    sec0 + DOSPARTOFF + i * sizeof(struct dos_partition),
 		    dp + i);
 	}
 	if ((!bcmp(dp, historical_bogus_partition_table,
 	    sizeof historical_bogus_partition_table)) ||
 	    (!bcmp(dp, historical_bogus_partition_table_fixed,
 	    sizeof historical_bogus_partition_table_fixed))) {
 		/*
 		 * We will not allow people to write these from "the inside",
 		 * Since properly selfdestructing takes too much code.  If 
 		 * people really want to do this, they cannot have any
 		 * providers of this geom open, and in that case they can just
 		 * as easily overwrite the MBR in the parent device.
 		 */
 		return(EBUSY);
 	}
 	for (i = 0; i < NDOSPART; i++) {
 		/* 
 		 * A Protective MBR (PMBR) has a single partition of
 		 * type 0xEE spanning the whole disk. Such a MBR
 		 * protects a GPT on the disk from MBR tools that
 		 * don't know anything about GPT. We're interpreting
 		 * it a bit more loosely: any partition of type 0xEE
 		 * is to be skipped as it doesn't contain any data
 		 * that we should care about. We still allow other
 		 * partitions to be present in the MBR. A PMBR will
 		 * be handled correctly anyway.
 		 */
 		if (dp[i].dp_typ == DOSPTYP_PMBR)
 			l[i] = 0;
 		else if (dp[i].dp_flag != 0 && dp[i].dp_flag != 0x80)
 			l[i] = 0;
 		else if (dp[i].dp_typ == 0)
 			l[i] = 0;
 		else
 			l[i] = (off_t)dp[i].dp_size * ms->sectorsize;
 		error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
 		    (off_t)dp[i].dp_start * ms->sectorsize, l[i],
 		    ms->sectorsize, "%ss%d", gp->name, 1 + i);
 		if (error)
 			return (error);
 	}
 	for (i = 0; i < NDOSPART; i++) {
 		ms->type[i] = dp[i].dp_typ;
 		g_slice_config(gp, i, G_SLICE_CONFIG_SET,
 		    (off_t)dp[i].dp_start * ms->sectorsize, l[i],
 		    ms->sectorsize, "%ss%d", gp->name, 1 + i);
 	}
 	bcopy(sec0, ms->sec0, 512);
 
 	/*
 	 * Calculate MD5 from the first sector and use it for avoiding
 	 * recursive slices creation.
 	 */
 	MD5Init(&md5sum);
 	MD5Update(&md5sum, ms->sec0, sizeof(ms->sec0));
 	MD5Final(ms->slicesum, &md5sum);
 
 	return (0);
 }
 
 static int
 g_mbr_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td)
 {
 	struct g_geom *gp;
 	struct g_mbr_softc *ms;
 	struct g_slicer *gsp;
 	struct g_consumer *cp;
 	int error, opened;
 
 	gp = pp->geom;
 	gsp = gp->softc;
 	ms = gsp->softc;
 
 	opened = 0;
 	error = 0;
 	switch(cmd) {
 	case DIOCSMBR: {
 		if (!(fflag & FWRITE))
 			return (EPERM);
 		DROP_GIANT();
 		g_topology_lock();
 		cp = LIST_FIRST(&gp->consumer);
 		if (cp->acw == 0) {
 			error = g_access(cp, 0, 1, 0);
 			if (error == 0)
 				opened = 1;
 		}
 		if (!error)
 			error = g_mbr_modify(gp, ms, data, 512);
 		if (!error)
 			error = g_write_data(cp, 0, data, 512);
 		if (opened)
 			g_access(cp, 0, -1 , 0);
 		g_topology_unlock();
 		PICKUP_GIANT();
 		return(error);
 	}
 	default:
 		return (ENOIOCTL);
 	}
 }
 
 static int
 g_mbr_start(struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	struct g_mbr_softc *mp;
 	struct g_slicer *gsp;
 	int idx;
 
 	pp = bp->bio_to;
 	idx = pp->index;
 	gp = pp->geom;
 	gsp = gp->softc;
 	mp = gsp->softc;
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr_int(bp, "MBR::type", mp->type[idx]))
 			return (1);
 		if (g_handleattr_off_t(bp, "MBR::offset",
 		    gsp->slices[idx].offset))
 			return (1);
 		if (g_handleattr(bp, "MBR::slicesum", mp->slicesum,
 		    sizeof(mp->slicesum)))
 			return (1);
 	}
 
 	return (0);
 }
 
 static void
 g_mbr_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp)
 {
 	struct g_mbr_softc *mp;
 	struct g_slicer *gsp;
 
 	gsp = gp->softc;
 	mp = gsp->softc;
 	g_slice_dumpconf(sb, indent, gp, cp, pp);
 	if (pp != NULL) {
 		if (indent == NULL)
 			sbuf_printf(sb, " ty %d", mp->type[pp->index]);
 		else
 			sbuf_printf(sb, "%s<type>%d</type>\n", indent,
 			    mp->type[pp->index]);
 	}
 }
 
 static struct g_geom *
 g_mbr_taste(struct g_class *mp, struct g_provider *pp, int insist)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error;
 	struct g_mbr_softc *ms;
 	u_int fwsectors, sectorsize;
 	u_char *buf;
 	u_char hash[16];
 	MD5_CTX md5sum;
 
 	g_trace(G_T_TOPOLOGY, "mbr_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 	if (!strcmp(pp->geom->class->name, MBR_CLASS_NAME))
 		return (NULL);
 	gp = g_slice_new(mp, NDOSPART, pp, &cp, &ms, sizeof *ms, g_mbr_start);
 	if (gp == NULL)
 		return (NULL);
 	g_topology_unlock();
 	do {
 		error = g_getattr("GEOM::fwsectors", cp, &fwsectors);
 		if (error)
 			fwsectors = 17;
 		sectorsize = cp->provider->sectorsize;
 		if (sectorsize < 512)
 			break;
 		ms->sectorsize = sectorsize;
 		buf = g_read_data(cp, 0, sectorsize, &error);
-		if (buf == NULL || error != 0)
+		if (buf == NULL)
 			break;
 
 		/*
 		 * Calculate MD5 from the first sector and use it for avoiding
 		 * recursive slices creation.
 		 */
 		bcopy(buf, ms->sec0, 512);
 		MD5Init(&md5sum);
 		MD5Update(&md5sum, ms->sec0, sizeof(ms->sec0));
 		MD5Final(ms->slicesum, &md5sum);
 
 		error = g_getattr("MBR::slicesum", cp, &hash);
 		if (!error && !bcmp(ms->slicesum, hash, sizeof(hash))) {
 			g_free(buf);
 			break;
 		}
 
 		g_topology_lock();
 		g_mbr_modify(gp, ms, buf, 512);
 		g_topology_unlock();
 		g_free(buf);
 		break;
 	} while (0);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (LIST_EMPTY(&gp->provider)) {
 		g_slice_spoiled(cp);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_mbr_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_mbr_softc *ms;
 	struct g_slicer *gsp;
 	int opened = 0, error = 0;
 	void *data;
 	int len;
 
 	g_topology_assert();
 	gp = gctl_get_geom(req, mp, "geom");
 	if (gp == NULL)
 		return;
 	if (strcmp(verb, "write MBR")) {
 		gctl_error(req, "Unknown verb");
 		return;
 	}
 	gsp = gp->softc;
 	ms = gsp->softc;
 	data = gctl_get_param(req, "data", &len);
 	if (data == NULL)
 		return;
 	if (len < 512 || (len % 512)) {
 		gctl_error(req, "Wrong request length");
 		return;
 	}
 	cp = LIST_FIRST(&gp->consumer);
 	if (cp->acw == 0) {
 		error = g_access(cp, 0, 1, 0);
 		if (error == 0)
 			opened = 1;
 	}
 	if (!error)
 		error = g_mbr_modify(gp, ms, data, len);
 	if (error)
 		gctl_error(req, "conflict with open slices");
 	if (!error)
 		error = g_write_data(cp, 0, data, len);
 	if (error)
 		gctl_error(req, "sector zero write failed");
 	if (opened)
 		g_access(cp, 0, -1 , 0);
 	return;
 }
 
 static struct g_class g_mbr_class	= {
 	.name = MBR_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_mbr_taste,
 	.dumpconf = g_mbr_dumpconf,
 	.ctlreq = g_mbr_config,
 	.ioctl = g_mbr_ioctl,
 };
 
 DECLARE_GEOM_CLASS(g_mbr_class, g_mbr);
 
 #define NDOSEXTPART		32
 struct g_mbrext_softc {
 	int		type [NDOSEXTPART];
 };
 
 static int
 g_mbrext_start(struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	struct g_mbrext_softc *mp;
 	struct g_slicer *gsp;
 	int idx;
 
 	pp = bp->bio_to;
 	idx = pp->index;
 	gp = pp->geom;
 	gsp = gp->softc;
 	mp = gsp->softc;
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr_int(bp, "MBR::type", mp->type[idx]))
 			return (1);
 	}
 	return (0);
 }
 
 static void
 g_mbrext_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp)
 {
 	struct g_mbrext_softc *mp;
 	struct g_slicer *gsp;
 
 	g_slice_dumpconf(sb, indent, gp, cp, pp);
 	gsp = gp->softc;
 	mp = gsp->softc;
 	if (pp != NULL) {
 		if (indent == NULL)
 			sbuf_printf(sb, " ty %d", mp->type[pp->index]);
 		else
 			sbuf_printf(sb, "%s<type>%d</type>\n", indent,
 			    mp->type[pp->index]);
 	}
 }
 
 static struct g_geom *
 g_mbrext_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error, i, slice;
 	struct g_mbrext_softc *ms;
 	off_t off;
 	u_char *buf;
 	struct dos_partition dp[4];
 	u_int fwsectors, sectorsize;
 
 	g_trace(G_T_TOPOLOGY, "g_mbrext_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 	if (strcmp(pp->geom->class->name, MBR_CLASS_NAME))
 		return (NULL);
 	gp = g_slice_new(mp, NDOSEXTPART, pp, &cp, &ms, sizeof *ms,
 	    g_mbrext_start);
 	if (gp == NULL)
 		return (NULL);
 	g_topology_unlock();
 	off = 0;
 	slice = 0;
 	do {
 		error = g_getattr("MBR::type", cp, &i);
 		if (error || (i != DOSPTYP_EXT && i != DOSPTYP_EXTLBA))
 			break;
 		error = g_getattr("GEOM::fwsectors", cp, &fwsectors);
 		if (error)
 			fwsectors = 17;
 		sectorsize = cp->provider->sectorsize;
 		if (sectorsize != 512)
 			break;
 		for (;;) {
 			buf = g_read_data(cp, off, sectorsize, &error);
-			if (buf == NULL || error != 0)
+			if (buf == NULL)
 				break;
 			if (buf[0x1fe] != 0x55 && buf[0x1ff] != 0xaa) {
 				g_free(buf);
 				break;
 			}
 			for (i = 0; i < NDOSPART; i++) 
 				dos_partition_dec(
 				    buf + DOSPARTOFF + 
 				    i * sizeof(struct dos_partition), dp + i);
 			g_free(buf);
 			if (0 && bootverbose) {
 				printf("MBREXT Slice %d on %s:\n",
 				    slice + 5, gp->name);
 				g_mbr_print(0, dp);
 				g_mbr_print(1, dp + 1);
 			}
 			if ((dp[0].dp_flag & 0x7f) == 0 &&
 			     dp[0].dp_size != 0 && dp[0].dp_typ != 0) {
 				g_topology_lock();
 				g_slice_config(gp, slice, G_SLICE_CONFIG_SET,
 				    (((off_t)dp[0].dp_start) << 9ULL) + off,
 				    ((off_t)dp[0].dp_size) << 9ULL,
 				    sectorsize,
 				    "%*.*s%d",
 				    strlen(gp->name) - 1,
 				    strlen(gp->name) - 1,
 				    gp->name,
 				    slice + 5);
 				g_topology_unlock();
 				ms->type[slice] = dp[0].dp_typ;
 				slice++;
 			}
 			if (dp[1].dp_flag != 0)
 				break;
 			if (dp[1].dp_typ != DOSPTYP_EXT &&
 			    dp[1].dp_typ != DOSPTYP_EXTLBA)
 				break;
 			if (dp[1].dp_size == 0)
 				break;
 			off = ((off_t)dp[1].dp_start) << 9ULL;
 		}
 		break;
 	} while (0);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (LIST_EMPTY(&gp->provider)) {
 		g_slice_spoiled(cp);
 		return (NULL);
 	}
 	return (gp);
 }
 
 
 static struct g_class g_mbrext_class	= {
 	.name = MBREXT_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_mbrext_taste,
 	.dumpconf = g_mbrext_dumpconf,
 };
 
 DECLARE_GEOM_CLASS(g_mbrext_class, g_mbrext);
Index: head/sys/geom/geom_pc98.c
===================================================================
--- head/sys/geom/geom_pc98.c	(revision 152966)
+++ head/sys/geom/geom_pc98.c	(revision 152967)
@@ -1,368 +1,368 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include <sys/diskpc98.h>
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 
 #define PC98_CLASS_NAME "PC98"
 
 struct g_pc98_softc {
 	u_int fwsectors, fwheads, sectorsize;
 	int type[NDOSPART];
 	u_char sec[8192];
 };
 
 static void
 g_pc98_print(int i, struct pc98_partition *dp)
 {
 	char sname[17];
 
 	strncpy(sname, dp->dp_name, 16);
 	sname[16] = '\0';
 
 	hexdump(dp, sizeof(dp[0]), NULL, 0);
 	printf("[%d] mid:%d(0x%x) sid:%d(0x%x)",
 	       i, dp->dp_mid, dp->dp_mid, dp->dp_sid, dp->dp_sid);
 	printf(" s:%d/%d/%d", dp->dp_scyl, dp->dp_shd, dp->dp_ssect);
 	printf(" e:%d/%d/%d", dp->dp_ecyl, dp->dp_ehd, dp->dp_esect);
 	printf(" sname:%s\n", sname);
 }
 
 /*
  * XXX: Add gctl_req arg and give good error msgs.
  * XXX: Check that length argument does not bring boot code inside any slice.
  */
 static int
 g_pc98_modify(struct g_geom *gp, struct g_pc98_softc *ms, u_char *sec, int len __unused)
 {
 	int i, error;
 	off_t s[NDOSPART], l[NDOSPART];
 	struct pc98_partition dp[NDOSPART];
 
 	g_topology_assert();
 	
 	if (sec[0x1fe] != 0x55 || sec[0x1ff] != 0xaa)
 		return (EBUSY);
 
 #if 0
 	/*
 	 * By convetion, it seems that the ipl program has a jump at location
 	 * 0 to the real start of the boot loader.  By convetion, it appears
 	 * that after this jump, there's a string, terminated by at last one,
 	 * if not more, zeros, followed by the target of the jump.  FreeBSD's
 	 * pc98 boot0 uses 'IPL1' followed by 3 zeros here, likely for
 	 * compatibility with some older boot loader.  Linux98's boot loader
 	 * appears to use 'Linux 98' followed by only two.  GRUB/98 appears to
 	 * use 'GRUB/98 ' followed by none.  These last two appear to be
 	 * ported from the ia32 versions, but appear to show similar
 	 * convention.  Grub/98 has an additional NOP after the jmp, which
 	 * isn't present in others.
 	 *
 	 * The following test was inspired by looking only at partitions
 	 * with FreeBSD's boot0 (or one that it is compatible with).  As
 	 * such, if failed when other IPL programs were used.
 	 */
 	if (sec[4] != 'I' || sec[5] != 'P' || sec[6] != 'L' || sec[7] != '1')
 		return (EBUSY);
 #endif
 
 	for (i = 0; i < NDOSPART; i++)
 		pc98_partition_dec(
 			sec + 512 + i * sizeof(struct pc98_partition), &dp[i]);
 
 	for (i = 0; i < NDOSPART; i++) {
 		/* If start and end are identical it's bogus */
 		if (dp[i].dp_ssect == dp[i].dp_esect &&
 		    dp[i].dp_shd == dp[i].dp_ehd &&
 		    dp[i].dp_scyl == dp[i].dp_ecyl)
 			s[i] = l[i] = 0;
 		else if (dp[i].dp_ecyl == 0)
 			s[i] = l[i] = 0;
 		else {
 			s[i] = (off_t)dp[i].dp_scyl *
 				ms->fwsectors * ms->fwheads * ms->sectorsize;
 			l[i] = (off_t)(dp[i].dp_ecyl - dp[i].dp_scyl + 1) *
 				ms->fwsectors * ms->fwheads * ms->sectorsize;
 		}
 		if (bootverbose) {
 			printf("PC98 Slice %d on %s:\n", i + 1, gp->name);
 			g_pc98_print(i, dp + i);
 		}
 		if (s[i] < 0 || l[i] < 0)
 			error = EBUSY;
 		else
 			error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
 				       s[i], l[i], ms->sectorsize,
 				       "%ss%d", gp->name, i + 1);
 		if (error)
 			return (error);
 	}
 
 	for (i = 0; i < NDOSPART; i++) {
 		ms->type[i] = (dp[i].dp_sid << 8) | dp[i].dp_mid;
 		g_slice_config(gp, i, G_SLICE_CONFIG_SET, s[i], l[i],
 			       ms->sectorsize, "%ss%d", gp->name, i + 1);
 	}
 
 	bcopy(sec, ms->sec, sizeof (ms->sec));
 
 	return (0);
 }
 
 static int
 g_pc98_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td)
 {
 	struct g_geom *gp;
 	struct g_pc98_softc *ms;
 	struct g_slicer *gsp;
 	struct g_consumer *cp;
 	int error, opened;
 
 	gp = pp->geom;
 	gsp = gp->softc;
 	ms = gsp->softc;
 
 	opened = 0;
 	error = 0;
 	switch(cmd) {
 	case DIOCSPC98: {
 		if (!(fflag & FWRITE))
 			return (EPERM);
 		DROP_GIANT();
 		g_topology_lock();
 		cp = LIST_FIRST(&gp->consumer);
 		if (cp->acw == 0) {
 			error = g_access(cp, 0, 1, 0);
 			if (error == 0)
 				opened = 1;
 		}
 		if (!error)
 			error = g_pc98_modify(gp, ms, data, 8192);
 		if (!error)
 			error = g_write_data(cp, 0, data, 8192);
 		if (opened)
 			g_access(cp, 0, -1 , 0);
 		g_topology_unlock();
 		PICKUP_GIANT();
 		return(error);
 	}
 	default:
 		return (ENOIOCTL);
 	}
 }
 
 static int
 g_pc98_start(struct bio *bp)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
 	struct g_pc98_softc *mp;
 	struct g_slicer *gsp;
 	int idx;
 
 	pp = bp->bio_to;
 	idx = pp->index;
 	gp = pp->geom;
 	gsp = gp->softc;
 	mp = gsp->softc;
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr_int(bp, "PC98::type", mp->type[idx]))
 			return (1);
 		if (g_handleattr_off_t(bp, "PC98::offset",
 				       gsp->slices[idx].offset))
 			return (1);
 	}
 
 	return (0);
 }
 
 static void
 g_pc98_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 		struct g_consumer *cp __unused, struct g_provider *pp)
 {
 	struct g_pc98_softc *mp;
 	struct g_slicer *gsp;
 	struct pc98_partition dp;
 	char sname[17];
 
 	gsp = gp->softc;
 	mp = gsp->softc;
 	g_slice_dumpconf(sb, indent, gp, cp, pp);
 	if (pp != NULL) {
 		pc98_partition_dec(
 			mp->sec + 512 +
 			pp->index * sizeof(struct pc98_partition), &dp);
 		strncpy(sname, dp.dp_name, 16);
 		sname[16] = '\0';
 		if (indent == NULL) {
 			sbuf_printf(sb, " ty %d", mp->type[pp->index]);
 			sbuf_printf(sb, " sn %s", sname);
 		} else {
 			sbuf_printf(sb, "%s<type>%d</type>\n", indent,
 				    mp->type[pp->index]);
 			sbuf_printf(sb, "%s<sname>%s</sname>\n", indent,
 				    sname);
 		}
 	}
 }
 
 static struct g_geom *
 g_pc98_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error;
 	struct g_pc98_softc *ms;
 	u_int fwsectors, fwheads, sectorsize;
 	u_char *buf;
 
 	g_trace(G_T_TOPOLOGY, "g_pc98_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 	if (flags == G_TF_NORMAL &&
 	    !strcmp(pp->geom->class->name, PC98_CLASS_NAME))
 		return (NULL);
 	gp = g_slice_new(mp, NDOSPART, pp, &cp, &ms, sizeof *ms, g_pc98_start);
 	if (gp == NULL)
 		return (NULL);
 	g_topology_unlock();
 	do {
 		if (gp->rank != 2 && flags == G_TF_NORMAL)
 			break;
 		error = g_getattr("GEOM::fwsectors", cp, &fwsectors);
 		if (error || fwsectors == 0) {
 			fwsectors = 17;
 			if (bootverbose)
 				printf("g_pc98_taste: guessing %d sectors\n",
 				    fwsectors);
 		}
 		error = g_getattr("GEOM::fwheads", cp, &fwheads);
 		if (error || fwheads == 0) {
 			fwheads = 8;
 			if (bootverbose)
 				printf("g_pc98_taste: guessing %d heads\n",
 				    fwheads);
 		}
 		sectorsize = cp->provider->sectorsize;
 		if (sectorsize % 512 != 0)
 			break;
 		buf = g_read_data(cp, 0, 8192, &error);
-		if (buf == NULL || error != 0)
+		if (buf == NULL)
 			break;
 		ms->fwsectors = fwsectors;
 		ms->fwheads = fwheads;
 		ms->sectorsize = sectorsize;
 		g_topology_lock();
 		g_pc98_modify(gp, ms, buf, 8192);
 		g_topology_unlock();
 		g_free(buf);
 		break;
 	} while (0);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (LIST_EMPTY(&gp->provider)) {
 		g_slice_spoiled(cp);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_pc98_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_pc98_softc *ms;
 	struct g_slicer *gsp;
 	int opened = 0, error = 0;
 	void *data;
 	int len;
 
 	g_topology_assert();
 	gp = gctl_get_geom(req, mp, "geom");
 	if (gp == NULL)
 		return;
 	if (strcmp(verb, "write PC98")) {
 		gctl_error(req, "Unknown verb");
 		return;
 	}
 	gsp = gp->softc;
 	ms = gsp->softc;
 	data = gctl_get_param(req, "data", &len);
 	if (data == NULL)
 		return;
 	if (len < 8192 || (len % 512)) {
 		gctl_error(req, "Wrong request length");
 		return;
 	}
 	cp = LIST_FIRST(&gp->consumer);
 	if (cp->acw == 0) {
 		error = g_access(cp, 0, 1, 0);
 		if (error == 0)
 			opened = 1;
 	}
 	if (!error)
 		error = g_pc98_modify(gp, ms, data, len);
 	if (error)
 		gctl_error(req, "conflict with open slices");
 	if (!error)
 		error = g_write_data(cp, 0, data, len);
 	if (error)
 		gctl_error(req, "sector zero write failed");
 	if (opened)
 		g_access(cp, 0, -1 , 0);
 	return;
 }
 
 static struct g_class g_pc98_class = {
 	.name = PC98_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_pc98_taste,
 	.dumpconf = g_pc98_dumpconf,
 	.ctlreq = g_pc98_config,
 	.ioctl = g_pc98_ioctl,
 };
 
 DECLARE_GEOM_CLASS(g_pc98_class, g_pc98);
Index: head/sys/geom/geom_sunlabel.c
===================================================================
--- head/sys/geom/geom_sunlabel.c	(revision 152966)
+++ head/sys/geom/geom_sunlabel.c	(revision 152967)
@@ -1,322 +1,322 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/bio.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/md5.h>
 #include <sys/sun_disklabel.h>
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 #include <machine/endian.h>
 
 #define SUNLABEL_CLASS_NAME "SUN"
 
 struct g_sunlabel_softc {
 	int sectorsize;
 	int nheads;
 	int nsects;
 	int nalt;
 	u_char labelsum[16];
 };
 
 static int
 g_sunlabel_modify(struct g_geom *gp, struct g_sunlabel_softc *ms, u_char *sec0)
 {
 	int i, error;
 	u_int u, v, csize;
 	struct sun_disklabel sl;
 	MD5_CTX md5sum;
 
 	error = sunlabel_dec(sec0, &sl);
 	if (error)
 		return (error);
 
 	csize = sl.sl_ntracks * sl.sl_nsectors;
 
 	for (i = 0; i < SUN_NPART; i++) {
 		v = sl.sl_part[i].sdkp_cyloffset;
 		u = sl.sl_part[i].sdkp_nsectors;
 		error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
 		    ((off_t)v * csize) << 9ULL,
 		    ((off_t)u) << 9ULL,
 		    ms->sectorsize,
 		    "%s%c", gp->name, 'a' + i);
 		if (error)
 			return (error);
 	}
 	for (i = 0; i < SUN_NPART; i++) {
 		v = sl.sl_part[i].sdkp_cyloffset;
 		u = sl.sl_part[i].sdkp_nsectors;
 		g_slice_config(gp, i, G_SLICE_CONFIG_SET,
 		    ((off_t)v * csize) << 9ULL,
 		    ((off_t)u) << 9ULL,
 		    ms->sectorsize,
 		    "%s%c", gp->name, 'a' + i);
 	}
 	ms->nalt = sl.sl_acylinders;
 	ms->nheads = sl.sl_ntracks;
 	ms->nsects = sl.sl_nsectors;
 
 	/*
 	 * Calculate MD5 from the first sector and use it for avoiding
 	 * recursive labels creation.
 	 */
 	MD5Init(&md5sum);
 	MD5Update(&md5sum, sec0, ms->sectorsize);
 	MD5Final(ms->labelsum, &md5sum);
 
 	return (0);
 }
 
 static void
 g_sunlabel_hotwrite(void *arg, int flag)
 {
 	struct bio *bp;
 	struct g_geom *gp;
 	struct g_slicer *gsp;
 	struct g_slice *gsl;
 	struct g_sunlabel_softc *ms;
 	u_char *p;
 	int error;
 
 	KASSERT(flag != EV_CANCEL, ("g_sunlabel_hotwrite cancelled"));
 	bp = arg;
 	gp = bp->bio_to->geom;
 	gsp = gp->softc;
 	ms = gsp->softc;
 	gsl = &gsp->slices[bp->bio_to->index];
 	/*
 	 * XXX: For all practical purposes, this whould be equvivalent to
 	 * XXX: "p = (u_char *)bp->bio_data;" because the label is always
 	 * XXX: in the first sector and we refuse sectors smaller than the
 	 * XXX: label.
 	 */
 	p = (u_char *)bp->bio_data - (bp->bio_offset + gsl->offset);
 
 	error = g_sunlabel_modify(gp, ms, p);
 	if (error) {
 		g_io_deliver(bp, EPERM);
 		return;
 	}
 	g_slice_finish_hot(bp);
 }
 
 static void
 g_sunlabel_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp)
 {
 	struct g_slicer *gsp;
 	struct g_sunlabel_softc *ms;
 
 	gsp = gp->softc;
 	ms = gsp->softc;
 	g_slice_dumpconf(sb, indent, gp, cp, pp);
 	if (indent == NULL) {
 		sbuf_printf(sb, " sc %u hd %u alt %u",
 		    ms->nsects, ms->nheads, ms->nalt);
 	}
 }
 
 struct g_hh01 {
 	struct g_geom *gp;
 	struct g_sunlabel_softc *ms;
 	u_char *label;
 	int error;
 };
 
 static void
 g_sunlabel_callconfig(void *arg, int flag)
 {
 	struct g_hh01 *hp;
 
 	hp = arg;
 	hp->error = g_sunlabel_modify(hp->gp, hp->ms, hp->label);
 	if (!hp->error)
 		hp->error = g_write_data(LIST_FIRST(&hp->gp->consumer),
 		    0, hp->label, SUN_SIZE);
 }
 
 /*
  * NB! curthread is user process which GCTL'ed.
  */
 static void
 g_sunlabel_config(struct gctl_req *req, struct g_class *mp, const char *verb)
 {
 	u_char *label;
 	int error, i;
 	struct g_hh01 h0h0;
 	struct g_slicer *gsp;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	gp = gctl_get_geom(req, mp, "geom");
 	if (gp == NULL)
 		return;
 	cp = LIST_FIRST(&gp->consumer);
 	gsp = gp->softc;
 	if (!strcmp(verb, "write label")) {
 		label = gctl_get_paraml(req, "label", SUN_SIZE);
 		if (label == NULL)
 			return;
 		h0h0.gp = gp;
 		h0h0.ms = gsp->softc;
 		h0h0.label = label;
 		h0h0.error = -1;
 		/* XXX: Does this reference register with our selfdestruct code ? */
 		error = g_access(cp, 1, 1, 1);
 		if (error) {
 			gctl_error(req, "could not access consumer");
 			return;
 		}
 		g_sunlabel_callconfig(&h0h0, 0);
 		g_access(cp, -1, -1, -1);
 	} else if (!strcmp(verb, "write bootcode")) {
 		label = gctl_get_paraml(req, "bootcode", SUN_BOOTSIZE);
 		if (label == NULL)
 			return;
 		/* XXX: Does this reference register with our selfdestruct code ? */
 		error = g_access(cp, 1, 1, 1);
 		if (error) {
 			gctl_error(req, "could not access consumer");
 			return;
 		}
 		for (i = 0; i < SUN_NPART; i++) {
 			if (gsp->slices[i].length <= SUN_BOOTSIZE)
 				continue;
 			g_write_data(cp,
 			    gsp->slices[i].offset + SUN_SIZE, label + SUN_SIZE,
 			    SUN_BOOTSIZE - SUN_SIZE);
 		}
 		g_access(cp, -1, -1, -1);
 	} else {
 		gctl_error(req, "Unknown verb parameter");
 	}
 }
 
 static int
 g_sunlabel_start(struct bio *bp)
 {
 	struct g_sunlabel_softc *mp;
 	struct g_slicer *gsp;
 
 	gsp = bp->bio_to->geom->softc;
 	mp = gsp->softc;
 	if (bp->bio_cmd == BIO_GETATTR) {
 		if (g_handleattr(bp, "SUN::labelsum", mp->labelsum,
 		    sizeof(mp->labelsum)))
 			return (1);
 	}
 	return (0);
 }
 
 static struct g_geom *
 g_sunlabel_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_sunlabel_softc *ms;
 	struct g_slicer *gsp;
 	u_char *buf, hash[16];
 	MD5_CTX md5sum;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "g_sunlabel_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 	if (flags == G_TF_NORMAL &&
 	    !strcmp(pp->geom->class->name, SUNLABEL_CLASS_NAME))
 		return (NULL);
 	gp = g_slice_new(mp, 8, pp, &cp, &ms, sizeof *ms, g_sunlabel_start);
 	if (gp == NULL)
 		return (NULL);
 	gsp = gp->softc;
 	do {
 		ms->sectorsize = cp->provider->sectorsize;
 		if (ms->sectorsize < 512)
 			break;
 		g_topology_unlock();
 		buf = g_read_data(cp, 0, ms->sectorsize, &error);
 		g_topology_lock();
-		if (buf == NULL || error != 0)
+		if (buf == NULL)
 			break;
 
 		/*
 		 * Calculate MD5 from the first sector and use it for avoiding
 		 * recursive labels creation.
 		 */
 		MD5Init(&md5sum);
 		MD5Update(&md5sum, buf, ms->sectorsize);
 		MD5Final(ms->labelsum, &md5sum);
  
 		error = g_getattr("SUN::labelsum", cp, &hash);
 		if (!error && !bcmp(ms->labelsum, hash, sizeof(hash))) {
 			g_free(buf);
 			break;
 		}
 
 		g_sunlabel_modify(gp, ms, buf);
 		g_free(buf);
 
 		break;
 	} while (0);
 	g_access(cp, -1, 0, 0);
 	if (LIST_EMPTY(&gp->provider)) {
 		g_slice_spoiled(cp);
 		return (NULL);
 	}
 	g_slice_conf_hot(gp, 0, 0, SUN_SIZE,
 	    G_SLICE_HOT_ALLOW, G_SLICE_HOT_DENY, G_SLICE_HOT_CALL);
 	gsp->hot = g_sunlabel_hotwrite;
 	return (gp);
 }
 
 static struct g_class g_sunlabel_class = {
 	.name = SUNLABEL_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_sunlabel_taste,
 	.ctlreq = g_sunlabel_config,
 	.dumpconf = g_sunlabel_dumpconf,
 };
 
 DECLARE_GEOM_CLASS(g_sunlabel_class, g_sunlabel);
Index: head/sys/geom/geom_vol_ffs.c
===================================================================
--- head/sys/geom/geom_vol_ffs.c	(revision 152966)
+++ head/sys/geom/geom_vol_ffs.c	(revision 152967)
@@ -1,154 +1,154 @@
 /*-
  * Copyright (c) 2002, 2003 Gordon Tetlow
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 
 #include <geom/geom.h>
 #include <geom/geom_slice.h>
 
 #define VOL_FFS_CLASS_NAME "VOL_FFS"
 
 static int superblocks[] = SBLOCKSEARCH;
 
 struct g_vol_ffs_softc {
 	char *	vol;
 };
 
 static int
 g_vol_ffs_start(struct bio *bp __unused)
 {
 	return(0);
 }
 
 static struct g_geom *
 g_vol_ffs_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_vol_ffs_softc *ms;
 	int error, sb, superblock;
 	struct fs *fs;
 
 	g_trace(G_T_TOPOLOGY, "vol_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 
 	/* 
 	 * XXX This is a really weak way to make sure we don't recurse.
 	 * Probably ought to use BIO_GETATTR to check for this.
 	 */
 	if (flags == G_TF_NORMAL &&
 	    !strcmp(pp->geom->class->name, VOL_FFS_CLASS_NAME))
 		return (NULL);
 
 	gp = g_slice_new(mp, 1, pp, &cp, &ms, sizeof(*ms), g_vol_ffs_start);
 	if (gp == NULL)
 		return (NULL);
 	g_topology_unlock();
 	/*
 	 * Walk through the standard places that superblocks hide and look
 	 * for UFS magic. If we find magic, then check that the size in the
 	 * superblock corresponds to the size of the underlying provider.
 	 * Finally, look for a volume label and create an appropriate 
 	 * provider based on that.
 	 */
 	for (sb=0; (superblock = superblocks[sb]) != -1; sb++) {
 		/*
 		 * Take care not to issue an invalid I/O request.  The
 		 * offset and size of the superblock candidate must be
 		 * multiples of the provider's sector size, otherwise an
 		 * FFS can't exist on the provider anyway.
 		 */
 		if (superblock % cp->provider->sectorsize != 0 ||
 		    SBLOCKSIZE % cp->provider->sectorsize != 0)
 			continue;
 
 		fs = (struct fs *) g_read_data(cp, superblock,
 			SBLOCKSIZE, &error);
-		if (fs == NULL || error != 0)
+		if (fs == NULL)
 			continue;
 		/* Check for magic and make sure things are the right size */
 		if (fs->fs_magic == FS_UFS1_MAGIC) {
 			if (fs->fs_old_size * fs->fs_fsize !=
 			    (int32_t) pp->mediasize) {
 				g_free(fs);
 				continue;
 			}
 		} else if (fs->fs_magic == FS_UFS2_MAGIC) {
 			if (fs->fs_size * fs->fs_fsize !=
 			    (int64_t) pp->mediasize) {
 				g_free(fs);
 				continue;
 			}
 		} else {
 			g_free(fs);
 			continue;
 		}
 		/* Check for volume label */
 		if (fs->fs_volname[0] == '\0') {
 			g_free(fs);
 			continue;
 		}
 		/* XXX We need to check for namespace conflicts. */
 		/* XXX How do you handle a mirror set? */
 		/* XXX We don't validate the volume name. */
 		g_topology_lock();
 		/* Alright, we have a label and a volume name, reconfig. */
 		g_slice_config(gp, 0, G_SLICE_CONFIG_SET, (off_t) 0,
 		    pp->mediasize, pp->sectorsize, "vol/%s",
 		    fs->fs_volname);
 		g_free(fs);
 		g_topology_unlock();
 		break;
 	}
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (LIST_EMPTY(&gp->provider)) {
 		g_slice_spoiled(cp);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static struct g_class g_vol_ffs_class	= {
 	.name = VOL_FFS_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_vol_ffs_taste,
 };
 
 DECLARE_GEOM_CLASS(g_vol_ffs_class, g_vol_ffs);
Index: head/sys/geom/label/g_label_iso9660.c
===================================================================
--- head/sys/geom/label/g_label_iso9660.c	(revision 152966)
+++ head/sys/geom/label/g_label_iso9660.c	(revision 152967)
@@ -1,84 +1,84 @@
 /*-
  * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include <geom/geom.h>
 #include <geom/label/g_label.h>
 
 #define G_LABEL_ISO9660_DIR	"iso9660"
 
 #define	ISO9660_MAGIC	"\x01" "CD001" "\x01\x00"
 #define	ISO9660_OFFSET	0x8000
 #define	VOLUME_LEN	32
 
 
 static void
 g_label_iso9660_taste(struct g_consumer *cp, char *label, size_t size)
 {
 	struct g_provider *pp;
 	char *sector, *volume;
 	int i, error;
 
 	g_topology_assert_not();
 	pp = cp->provider;
 	label[0] = '\0';
 
 	if ((ISO9660_OFFSET % pp->sectorsize) != 0)
 		return;
 	sector = (char *)g_read_data(cp, ISO9660_OFFSET, pp->sectorsize,
 	    &error);
-	if (sector == NULL || error != 0)
+	if (sector == NULL)
 		return;
 	if (bcmp(sector, ISO9660_MAGIC, sizeof(ISO9660_MAGIC) - 1) != 0) {
 		g_free(sector);
 		return;
 	}
 	G_LABEL_DEBUG(1, "ISO9660 file system detected on %s.", pp->name);
 	volume = sector + 0x28;
 	bzero(label, size);
 	strlcpy(label, volume, MIN(size, VOLUME_LEN));
 	g_free(sector);
 	for (i = size - 1; i > 0; i--) {
 		if (label[i] == '\0')
 			continue;
 		else if (label[i] == ' ')
 			label[i] = '\0';
 		else
 			break;
 	}
 }
 
 const struct g_label_desc g_label_iso9660 = {
 	.ld_taste = g_label_iso9660_taste,
 	.ld_dir = G_LABEL_ISO9660_DIR
 };
Index: head/sys/geom/label/g_label_msdosfs.c
===================================================================
--- head/sys/geom/label/g_label_msdosfs.c	(revision 152966)
+++ head/sys/geom/label/g_label_msdosfs.c	(revision 152967)
@@ -1,101 +1,101 @@
 /*-
  * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include <geom/geom.h>
 #include <geom/label/g_label.h>
 
 #define G_LABEL_MSDOSFS_DIR	"msdosfs"
 
 #define	FAT12	"FAT12   "
 #define	FAT16	"FAT16   "
 #define	FAT32	"FAT32   "
 #define	VOLUME_LEN	11
 #define NO_NAME "NO NAME    "
 
 
 static void
 g_label_msdosfs_taste(struct g_consumer *cp, char *label, size_t size)
 {
 	struct g_provider *pp;
 	char *sector, *volume;
 	int i, error;
 
 	g_topology_assert_not();
 	pp = cp->provider;
 	label[0] = '\0';
 
 	sector = (char *)g_read_data(cp, 0, pp->sectorsize, &error);
-	if (sector == NULL || error != 0)
+	if (sector == NULL)
 		return;
 	if (strncmp(sector + 0x36, FAT12, strlen(FAT12)) == 0) {
 		G_LABEL_DEBUG(1, "MSDOS (FAT12) file system detected on %s.",
 		    pp->name);
 		volume = sector + 0x2b;
 	} else if (strncmp(sector + 0x36, FAT16, strlen(FAT16)) == 0) {
 		G_LABEL_DEBUG(1, "MSDOS (FAT16) file system detected on %s.",
 		    pp->name);
 		volume = sector + 0x2b;
 	} else if (strncmp(sector + 0x52, FAT32, strlen(FAT32)) == 0) {
 		G_LABEL_DEBUG(1, "MSDOS (FAT32) file system detected on %s.",
 		    pp->name);
 		volume = sector + 0x47;
 	} else {
 		g_free(sector);
 		return;
 	}
 	if (strncmp(volume, NO_NAME, VOLUME_LEN) == 0) {
 		g_free(sector);
 		return;
 	}
 	if (volume[0] == '\0') {
 		g_free(sector);
 		return;
 	}
 	bzero(label, size);
 	strlcpy(label, volume, MIN(size, VOLUME_LEN));
 	g_free(sector);
 	for (i = size - 1; i > 0; i--) {
 		if (label[i] == '\0')
 			continue;
 		else if (label[i] == ' ')
 			label[i] = '\0';
 		else
 			break;
 	}
 }
 
 const struct g_label_desc g_label_msdosfs = {
 	.ld_taste = g_label_msdosfs_taste,
 	.ld_dir = G_LABEL_MSDOSFS_DIR
 };
Index: head/sys/geom/label/g_label_ufs.c
===================================================================
--- head/sys/geom/label/g_label_ufs.c	(revision 152966)
+++ head/sys/geom/label/g_label_ufs.c	(revision 152967)
@@ -1,112 +1,112 @@
 /*-
  * Copyright (c) 2002, 2003 Gordon Tetlow
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 
 #include <geom/geom.h>
 #include <geom/label/g_label.h>
 
 #define G_LABEL_UFS_DIR	"ufs"
 
 static const int superblocks[] = SBLOCKSEARCH;
 
 static void
 g_label_ufs_taste(struct g_consumer *cp, char *label, size_t size)
 {
 	struct g_provider *pp;
 	int error, sb, superblock;
 	struct fs *fs;
 
 	g_topology_assert_not();
 	pp = cp->provider;
 	label[0] = '\0';
 	/*
 	 * Walk through the standard places that superblocks hide and look
 	 * for UFS magic. If we find magic, then check that the size in the
 	 * superblock corresponds to the size of the underlying provider.
 	 * Finally, look for a volume label and create an appropriate 
 	 * provider based on that.
 	 */
 	for (sb = 0; (superblock = superblocks[sb]) != -1; sb++) {
 		/*
 		 * Take care not to issue an invalid I/O request.  The
 		 * offset and size of the superblock candidate must be
 		 * multiples of the provider's sector size, otherwise an
 		 * FFS can't exist on the provider anyway.
 		 */
 		if (superblock % cp->provider->sectorsize != 0 ||
 		    SBLOCKSIZE % cp->provider->sectorsize != 0)
 			continue;
 
 		fs = (struct fs *)g_read_data(cp, superblock, SBLOCKSIZE,
 		    &error);
-		if (fs == NULL || error != 0)
+		if (fs == NULL)
 			continue;
 		/* Check for magic and make sure things are the right size */
 		if (fs->fs_magic == FS_UFS1_MAGIC) {
 			G_LABEL_DEBUG(1, "UFS1 file system detected on %s.",
 			    pp->name);
 			if (fs->fs_old_size * fs->fs_fsize !=
 			    (int32_t)pp->mediasize) {
 				g_free(fs);
 				continue;
 			}
 		} else if (fs->fs_magic == FS_UFS2_MAGIC) {
 			G_LABEL_DEBUG(1, "UFS2 file system detected on %s.",
 			    pp->name);
 			if (fs->fs_fsize <= 0 ||
 			    pp->mediasize / fs->fs_fsize != fs->fs_size) {
 				g_free(fs);
 				continue;
 			}
 		} else {
 			g_free(fs);
 			continue;
 		}
 		/* Check for volume label */
 		if (fs->fs_volname[0] == '\0') {
 			g_free(fs);
 			continue;
 		}
 		strlcpy(label, fs->fs_volname, size);
 		g_free(fs);
 		break;
 	}
 }
 
 const struct g_label_desc g_label_ufs = {
 	.ld_taste = g_label_ufs_taste,
 	.ld_dir = G_LABEL_UFS_DIR
 };
Index: head/sys/geom/mirror/g_mirror.c
===================================================================
--- head/sys/geom/mirror/g_mirror.c	(revision 152966)
+++ head/sys/geom/mirror/g_mirror.c	(revision 152967)
@@ -1,2883 +1,2883 @@
 /*-
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/eventhandler.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/sched.h>
 #include <geom/mirror/g_mirror.h>
 
 
 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
 u_int g_mirror_debug = 0;
 TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
     "Debug level");
 static u_int g_mirror_timeout = 4;
 TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
     0, "Time to wait on all mirror components");
 static u_int g_mirror_idletime = 5;
 TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
     &g_mirror_idletime, 0, "Mark components as clean when idling");
 static u_int g_mirror_reqs_per_sync = 5;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
     &g_mirror_reqs_per_sync, 0,
     "Number of regular I/O requests per synchronization request");
 static u_int g_mirror_syncs_per_sec = 1000;
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
     &g_mirror_syncs_per_sec, 0,
     "Number of synchronizations requests per second");
 
 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
 } while (0)
 
 static eventhandler_tag g_mirror_ehtag = NULL;
 
 static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static g_taste_t g_mirror_taste;
 static void g_mirror_init(struct g_class *mp);
 static void g_mirror_fini(struct g_class *mp);
 
 struct g_class g_mirror_class = {
 	.name = G_MIRROR_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_mirror_config,
 	.taste = g_mirror_taste,
 	.destroy_geom = g_mirror_destroy_geom,
 	.init = g_mirror_init,
 	.fini = g_mirror_fini
 };
 
 
 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
 static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
 
 
 static const char *
 g_mirror_disk_state2str(int state)
 {
 
 	switch (state) {
 	case G_MIRROR_DISK_STATE_NONE:
 		return ("NONE");
 	case G_MIRROR_DISK_STATE_NEW:
 		return ("NEW");
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		return ("ACTIVE");
 	case G_MIRROR_DISK_STATE_STALE:
 		return ("STALE");
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		return ("SYNCHRONIZING");
 	case G_MIRROR_DISK_STATE_DISCONNECTED:
 		return ("DISCONNECTED");
 	case G_MIRROR_DISK_STATE_DESTROY:
 		return ("DESTROY");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_mirror_device_state2str(int state)
 {
 
 	switch (state) {
 	case G_MIRROR_DEVICE_STATE_STARTING:
 		return ("STARTING");
 	case G_MIRROR_DEVICE_STATE_RUNNING:
 		return ("RUNNING");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_mirror_get_diskname(struct g_mirror_disk *disk)
 {
 
 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
 		return ("[unknown]");
 	return (disk->d_name);
 }
 
 /*
  * --- Events handling functions ---
  * Events in geom_mirror are used to maintain disks and device status
  * from one thread to simplify locking.
  */
 static void
 g_mirror_event_free(struct g_mirror_event *ep)
 {
 
 	free(ep, M_MIRROR);
 }
 
 int
 g_mirror_event_send(void *arg, int state, int flags)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	struct g_mirror_event *ep;
 	int error;
 
 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
 		disk = NULL;
 		sc = arg;
 	} else {
 		disk = arg;
 		sc = disk->d_softc;
 	}
 	ep->e_disk = disk;
 	ep->e_state = state;
 	ep->e_flags = flags;
 	ep->e_error = 0;
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 		return (0);
 	g_topology_assert();
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
 	g_topology_unlock();
 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
 		mtx_lock(&sc->sc_events_mtx);
 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
 		    hz * 5);
 	}
 	/* Don't even try to use 'sc' here, because it could be already dead. */
 	g_topology_lock();
 	error = ep->e_error;
 	g_mirror_event_free(ep);
 	return (error);
 }
 
 static struct g_mirror_event *
 g_mirror_event_get(struct g_mirror_softc *sc)
 {
 	struct g_mirror_event *ep;
 
 	mtx_lock(&sc->sc_events_mtx);
 	ep = TAILQ_FIRST(&sc->sc_events);
 	mtx_unlock(&sc->sc_events_mtx);
 	return (ep);
 }
 
 static void
 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
 {
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 static void
 g_mirror_event_cancel(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_event *ep, *tmpep;
 
 	g_topology_assert();
 
 	sc = disk->d_softc;
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
 			continue;
 		if (ep->e_disk != disk)
 			continue;
 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 			g_mirror_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			wakeup(ep);
 		}
 	}
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 /*
  * Return the number of disks in given state.
  * If state is equal to -1, count all connected disks.
  */
 u_int
 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
 {
 	struct g_mirror_disk *disk;
 	u_int n = 0;
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (state == -1 || disk->d_state == state)
 			n++;
 	}
 	return (n);
 }
 
 /*
  * Find a disk in mirror by its disk ID.
  */
 static struct g_mirror_disk *
 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert();
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_id == id)
 			return (disk);
 	}
 	return (NULL);
 }
 
 static u_int
 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 	struct bio *bp;
 	u_int nreqs = 0;
 
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 		if (bp->bio_from == cp)
 			nreqs++;
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	return (nreqs);
 }
 
 static int
 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 
 	if (cp->index > 0) {
 		G_MIRROR_DEBUG(2,
 		    "I/O requests for %s exist, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	if (g_mirror_nrequests(sc, cp) > 0) {
 		G_MIRROR_DEBUG(2,
 		    "I/O requests for %s in queue, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	return (0);
 }
 
 static void
 g_mirror_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *cp;
 
 	cp = arg;
 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	int retaste_wait;
 
 	g_topology_assert();
 
 	cp->private = NULL;
 	if (g_mirror_is_busy(sc, cp))
 		return;
 	pp = cp->provider;
 	retaste_wait = 0;
 	if (cp->acw == 1) {
 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
 			retaste_wait = 1;
 	}
 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
 	    -cp->acw, -cp->ace, 0);
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	if (retaste_wait) {
 		/*
 		 * After retaste event was send (inside g_access()), we can send
 		 * event to detach and destroy consumer.
 		 * A class, which has consumer to the given provider connected
 		 * will not receive retaste event for the provider.
 		 * This is the way how I ignore retaste events when I close
 		 * consumers opened for write: I detach and destroy consumer
 		 * after retaste event is sent.
 		 */
 		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
 		return;
 	}
 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static int
 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
 {
 	struct g_consumer *cp;
 	int error;
 
 	g_topology_assert();
 	KASSERT(disk->d_consumer == NULL,
 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
 
 	cp = g_new_consumer(disk->d_softc->sc_geom);
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 	error = g_access(cp, 1, 1, 1);
 	if (error != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
 		    pp->name, error);
 		return (error);
 	}
 	disk->d_consumer = cp;
 	disk->d_consumer->private = disk;
 	disk->d_consumer->index = 0;
 
 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
 	return (0);
 }
 
 static void
 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 
 	g_topology_assert();
 
 	if (cp == NULL)
 		return;
 	if (cp->provider != NULL)
 		g_mirror_kill_consumer(sc, cp);
 	else
 		g_destroy_consumer(cp);
 }
 
 /*
  * Initialize disk. This means allocate memory, create consumer, attach it
  * to the provider and open access (r1w1e1) to it.
  */
 static struct g_mirror_disk *
 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md, int *errorp)
 {
 	struct g_mirror_disk *disk;
 	int error;
 
 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
 	if (disk == NULL) {
 		error = ENOMEM;
 		goto fail;
 	}
 	disk->d_softc = sc;
 	error = g_mirror_connect_disk(disk, pp);
 	if (error != 0)
 		goto fail;
 	disk->d_id = md->md_did;
 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
 	disk->d_priority = md->md_priority;
 	disk->d_delay.sec = 0;
 	disk->d_delay.frac = 0;
 	binuptime(&disk->d_last_used);
 	disk->d_flags = md->md_dflags;
 	if (md->md_provider[0] != '\0')
 		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_sync.ds_offset = md->md_sync_offset;
 	disk->d_sync.ds_offset_done = md->md_sync_offset;
 	disk->d_sync.ds_resync = -1;
 	disk->d_genid = md->md_genid;
 	disk->d_sync.ds_syncid = md->md_syncid;
 	if (errorp != NULL)
 		*errorp = 0;
 	return (disk);
 fail:
 	if (errorp != NULL)
 		*errorp = error;
 	if (disk != NULL)
 		free(disk, M_MIRROR);
 	return (NULL);
 }
 
 static void
 g_mirror_destroy_disk(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 
 	g_topology_assert();
 
 	LIST_REMOVE(disk, d_next);
 	g_mirror_event_cancel(disk);
 	sc = disk->d_softc;
 	if (sc->sc_hint == disk)
 		sc->sc_hint = NULL;
 	switch (disk->d_state) {
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		g_mirror_sync_stop(disk, 1);
 		/* FALLTHROUGH */
 	case G_MIRROR_DISK_STATE_NEW:
 	case G_MIRROR_DISK_STATE_STALE:
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
 		free(disk, M_MIRROR);
 		break;
 	default:
 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 	}
 }
 
 static void
 g_mirror_destroy_device(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct g_mirror_event *ep;
 	struct g_geom *gp;
 	struct g_consumer *cp, *tmpcp;
 
 	g_topology_assert();
 
 	gp = sc->sc_geom;
 	if (sc->sc_provider != NULL)
 		g_mirror_destroy_provider(sc);
 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
 	    disk = LIST_FIRST(&sc->sc_disks)) {
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 		g_mirror_destroy_disk(disk);
 	}
 	while ((ep = g_mirror_event_get(sc)) != NULL) {
 		g_mirror_event_remove(sc, ep);
 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 			g_mirror_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			ep->e_flags |= G_MIRROR_EVENT_DONE;
 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
 			mtx_lock(&sc->sc_events_mtx);
 			wakeup(ep);
 			mtx_unlock(&sc->sc_events_mtx);
 		}
 	}
 	callout_drain(&sc->sc_callout);
 	gp->softc = NULL;
 
 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
 		g_mirror_disconnect_consumer(sc, cp);
 	}
 	sc->sc_sync.ds_geom->softc = NULL;
 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
 	mtx_destroy(&sc->sc_queue_mtx);
 	mtx_destroy(&sc->sc_events_mtx);
 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 }
 
 static void
 g_mirror_orphan(struct g_consumer *cp)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert();
 
 	disk = cp->private;
 	if (disk == NULL)
 		return;
 	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
 	    G_MIRROR_EVENT_DONTWAIT);
 }
 
 /*
  * Function should return the next active disk on the list.
  * It is possible that it will be the same disk as given.
  * If there are no active disks on list, NULL is returned.
  */
 static __inline struct g_mirror_disk *
 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
 {
 	struct g_mirror_disk *dp;
 
 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
 	    dp = LIST_NEXT(dp, d_next)) {
 		if (dp == NULL)
 			dp = LIST_FIRST(&sc->sc_disks);
 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
 			break;
 	}
 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 		return (NULL);
 	return (dp);
 }
 
 static struct g_mirror_disk *
 g_mirror_get_disk(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	if (sc->sc_hint == NULL) {
 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
 		if (sc->sc_hint == NULL)
 			return (NULL);
 	}
 	disk = sc->sc_hint;
 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
 		disk = g_mirror_find_next(sc, disk);
 		if (disk == NULL)
 			return (NULL);
 	}
 	sc->sc_hint = g_mirror_find_next(sc, disk);
 	return (disk);
 }
 
 static int
 g_mirror_write_metadata(struct g_mirror_disk *disk,
     struct g_mirror_metadata *md)
 {
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 	off_t offset, length;
 	u_char *sector;
 	int error = 0;
 
 	g_topology_assert();
 
 	sc = disk->d_softc;
 	cp = disk->d_consumer;
 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
 	KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	length = cp->provider->sectorsize;
 	offset = cp->provider->mediasize - length;
 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
 	if (md != NULL)
 		mirror_metadata_encode(md, sector);
 	g_topology_unlock();
 	error = g_write_data(cp, offset, sector, length);
 	g_topology_lock();
 	free(sector, M_MIRROR);
 	if (error != 0) {
 		disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
 		    G_MIRROR_EVENT_DONTWAIT);
 	}
 	return (error);
 }
 
 static int
 g_mirror_clear_metadata(struct g_mirror_disk *disk)
 {
 	int error;
 
 	g_topology_assert();
 	error = g_mirror_write_metadata(disk, NULL);
 	if (error == 0) {
 		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
 		    g_mirror_get_diskname(disk));
 	} else {
 		G_MIRROR_DEBUG(0,
 		    "Cannot clear metadata on disk %s (error=%d).",
 		    g_mirror_get_diskname(disk), error);
 	}
 	return (error);
 }
 
 void
 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
     struct g_mirror_metadata *md)
 {
 
 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
 	md->md_version = G_MIRROR_VERSION;
 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
 	md->md_mid = sc->sc_id;
 	md->md_all = sc->sc_ndisks;
 	md->md_slice = sc->sc_slice;
 	md->md_balance = sc->sc_balance;
 	md->md_genid = sc->sc_genid;
 	md->md_mediasize = sc->sc_mediasize;
 	md->md_sectorsize = sc->sc_sectorsize;
 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
 	bzero(md->md_provider, sizeof(md->md_provider));
 	if (disk == NULL) {
 		md->md_did = arc4random();
 		md->md_priority = 0;
 		md->md_syncid = 0;
 		md->md_dflags = 0;
 		md->md_sync_offset = 0;
 		md->md_provsize = 0;
 	} else {
 		md->md_did = disk->d_id;
 		md->md_priority = disk->d_priority;
 		md->md_syncid = disk->d_sync.ds_syncid;
 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			md->md_sync_offset = disk->d_sync.ds_offset_done;
 		else
 			md->md_sync_offset = 0;
 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
 			strlcpy(md->md_provider,
 			    disk->d_consumer->provider->name,
 			    sizeof(md->md_provider));
 		}
 		md->md_provsize = disk->d_consumer->provider->mediasize;
 	}
 }
 
 void
 g_mirror_update_metadata(struct g_mirror_disk *disk)
 {
 	struct g_mirror_metadata md;
 	int error;
 
 	g_topology_assert();
 	g_mirror_fill_metadata(disk->d_softc, disk, &md);
 	error = g_mirror_write_metadata(disk, &md);
 	if (error == 0) {
 		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
 		    g_mirror_get_diskname(disk));
 	} else {
 		G_MIRROR_DEBUG(0,
 		    "Cannot update metadata on disk %s (error=%d).",
 		    g_mirror_get_diskname(disk), error);
 	}
 }
 
 static void
 g_mirror_bump_syncid(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert();
 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_syncid++;
 	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
 	    sc->sc_syncid);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_sync.ds_syncid = sc->sc_syncid;
 			g_mirror_update_metadata(disk);
 		}
 	}
 }
 
 static void
 g_mirror_bump_genid(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert();
 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_genid++;
 	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
 	    sc->sc_genid);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_genid = sc->sc_genid;
 			g_mirror_update_metadata(disk);
 		}
 	}
 }
 
 static void
 g_mirror_idle(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	if (sc->sc_provider == NULL || sc->sc_provider->acw == 0)
 		return;
 	sc->sc_idle = 1;
 	g_topology_lock();
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 	}
 	g_topology_unlock();
 }
 
 static void
 g_mirror_unidle(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	sc->sc_idle = 0;
 	g_topology_lock();
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 	}
 	g_topology_unlock();
 }
 
 /*
  * Return 1 if we should check if mirror is idling.
  */
 static int
 g_mirror_check_idle(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	if (sc->sc_idle)
 		return (0);
 	if (sc->sc_provider != NULL && sc->sc_provider->acw == 0)
 		return (0);
 	/*
 	 * Check if there are no in-flight requests.
 	 */
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		if (disk->d_consumer->index > 0)
 			return (0);
 	}
 	return (1);
 }
 
 static __inline int
 bintime_cmp(struct bintime *bt1, struct bintime *bt2)
 {
 
 	if (bt1->sec < bt2->sec)
 		return (-1);
 	else if (bt1->sec > bt2->sec)
 		return (1);
 	if (bt1->frac < bt2->frac)
 		return (-1);
 	else if (bt1->frac > bt2->frac)
 		return (1);
 	return (0);
 }
 
 static void
 g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
 {
 
 	if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
 		return;
 	binuptime(&disk->d_delay);
 	bintime_sub(&disk->d_delay, &bp->bio_t0);
 }
 
 static void
 g_mirror_done(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR; 
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_disksort(&sc->sc_queue, bp);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 }
 
 static void
 g_mirror_regular_request(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	struct bio *pbp;
 
 	g_topology_assert_not();
 
 	bp->bio_from->index--;
 	pbp = bp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		g_topology_lock();
 		g_mirror_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 	} else {
 		g_mirror_update_delay(disk, bp);
 	}
 
 	pbp->bio_inbed++;
 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
 	    pbp->bio_children));
 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
 		g_destroy_bio(bp);
 		if (pbp->bio_children == pbp->bio_inbed) {
 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
 			pbp->bio_completed = pbp->bio_length;
 			g_io_deliver(pbp, pbp->bio_error);
 		}
 		return;
 	} else if (bp->bio_error != 0) {
 		if (pbp->bio_error == 0)
 			pbp->bio_error = bp->bio_error;
 		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
 		    bp->bio_error);
 		if (disk != NULL) {
 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 			g_mirror_event_send(disk,
 			    G_MIRROR_DISK_STATE_DISCONNECTED,
 			    G_MIRROR_EVENT_DONTWAIT);
 		}
 		switch (pbp->bio_cmd) {
 		case BIO_DELETE:
 		case BIO_WRITE:
 			pbp->bio_inbed--;
 			pbp->bio_children--;
 			break;
 		}
 	}
 	g_destroy_bio(bp);
 
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if (pbp->bio_children == pbp->bio_inbed) {
 			pbp->bio_error = 0;
 			mtx_lock(&sc->sc_queue_mtx);
 			bioq_disksort(&sc->sc_queue, pbp);
 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 			wakeup(sc);
 			mtx_unlock(&sc->sc_queue_mtx);
 		}
 		break;
 	case BIO_DELETE:
 	case BIO_WRITE:
 		if (pbp->bio_children == 0) {
 			/*
 			 * All requests failed.
 			 */
 		} else if (pbp->bio_inbed < pbp->bio_children) {
 			/* Do nothing. */
 			break;
 		} else if (pbp->bio_children == pbp->bio_inbed) {
 			/* Some requests succeeded. */
 			pbp->bio_error = 0;
 			pbp->bio_completed = pbp->bio_length;
 		}
 		g_io_deliver(pbp, pbp->bio_error);
 		break;
 	default:
 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
 		break;
 	}
 }
 
 static void
 g_mirror_sync_done(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC;
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_disksort(&sc->sc_queue, bp);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 }
 
 static void
 g_mirror_start(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL or there are no valid disks, provider's error
 	 * should be set and g_mirror_start() should not be called at all.
 	 */
 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 	    ("Provider's error should be set (error=%d)(mirror=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_MIRROR_LOGREQ(3, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_GETATTR:
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_disksort(&sc->sc_queue, bp);
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 }
 
 /*
  * Send one synchronization request.
  */
 static void
 g_mirror_sync_one(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct bio *bp;
 
 	sc = disk->d_softc;
 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 	    ("Disk %s is not marked for synchronization.",
 	    g_mirror_get_diskname(disk)));
 
 	bp = g_new_bio();
 	if (bp == NULL)
 		return;
 	bp->bio_parent = NULL;
 	bp->bio_cmd = BIO_READ;
 	bp->bio_offset = disk->d_sync.ds_offset;
 	bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
 	bp->bio_cflags = 0;
 	bp->bio_done = g_mirror_sync_done;
 	bp->bio_data = disk->d_sync.ds_data;
 	if (bp->bio_data == NULL) {
 		g_destroy_bio(bp);
 		return;
 	}
 	disk->d_sync.ds_offset += bp->bio_length;
 	bp->bio_to = sc->sc_provider;
 	G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
 	disk->d_sync.ds_consumer->index++;
 	g_io_request(bp, disk->d_sync.ds_consumer);
 }
 
 static void
 g_mirror_sync_request(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 
 	bp->bio_from->index--;
 	sc = bp->bio_from->geom->softc;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		g_topology_lock();
 		g_mirror_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 		g_destroy_bio(bp);
 		return;
 	}
 
 	/*
 	 * Synchronization request.
 	 */
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	    {
 		struct g_consumer *cp;
 
 		if (bp->bio_error != 0) {
 			G_MIRROR_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_destroy_bio(bp);
 			return;
 		}
 		G_MIRROR_LOGREQ(3, bp,
 		    "Synchronization request half-finished.");
 		bp->bio_cmd = BIO_WRITE;
 		bp->bio_cflags = 0;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(bp, cp);
 		return;
 	    }
 	case BIO_WRITE:
 	    {
 		struct g_mirror_disk_sync *sync;
 
 		if (bp->bio_error != 0) {
 			G_MIRROR_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_destroy_bio(bp);
 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 			g_mirror_event_send(disk,
 			    G_MIRROR_DISK_STATE_DISCONNECTED,
 			    G_MIRROR_EVENT_DONTWAIT);
 			return;
 		}
 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
 		sync = &disk->d_sync;
 		sync->ds_offset_done = bp->bio_offset + bp->bio_length;
 		g_destroy_bio(bp);
 		if (sync->ds_resync != -1)
 			break;
 		if (sync->ds_offset_done == sc->sc_provider->mediasize) {
 			/*
 			 * Disk up-to-date, activate it.
 			 */
 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
 			    G_MIRROR_EVENT_DONTWAIT);
 			return;
 		} else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) {
 			/*
 			 * Update offset_done on every 100 blocks.
 			 * XXX: This should be configurable.
 			 */
 			g_topology_lock();
 			g_mirror_update_metadata(disk);
 			g_topology_unlock();
 		}
 		return;
 	    }
 	default:
 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
 		    bp->bio_cmd, sc->sc_name));
 		break;
 	}
 }
 
 static void
 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
 			break;
 	}
 	if (disk == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENXIO;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	g_io_request(cbp, cp);
 }
 
 static void
 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	disk = g_mirror_get_disk(sc);
 	if (disk == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENXIO;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	g_io_request(cbp, cp);
 }
 
 static void
 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk, *dp;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	struct bintime curtime;
 
 	binuptime(&curtime);
 	/*
 	 * Find a disk which the smallest load.
 	 */
 	disk = NULL;
 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		/* If disk wasn't used for more than 2 sec, use it. */
 		if (curtime.sec - dp->d_last_used.sec >= 2) {
 			disk = dp;
 			break;
 		}
 		if (disk == NULL ||
 		    bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
 			disk = dp;
 		}
 	}
 	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	binuptime(&disk->d_last_used);
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	g_io_request(cbp, cp);
 }
 
 static void
 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	off_t left, mod, offset, slice;
 	u_char *data;
 	u_int ndisks;
 
 	if (bp->bio_length <= sc->sc_slice) {
 		g_mirror_request_round_robin(sc, bp);
 		return;
 	}
 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
 	slice = bp->bio_length / ndisks;
 	mod = slice % sc->sc_provider->sectorsize;
 	if (mod != 0)
 		slice += sc->sc_provider->sectorsize - mod;
 	/*
 	 * Allocate all bios before sending any request, so we can
 	 * return ENOMEM in nice and clean way.
 	 */
 	left = bp->bio_length;
 	offset = bp->bio_offset;
 	data = bp->bio_data;
 	bioq_init(&queue);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			for (cbp = bioq_first(&queue); cbp != NULL;
 			    cbp = bioq_first(&queue)) {
 				bioq_remove(&queue, cbp);
 				g_destroy_bio(cbp);
 			}
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		cbp->bio_done = g_mirror_done;
 		cbp->bio_caller1 = disk;
 		cbp->bio_to = disk->d_consumer->provider;
 		cbp->bio_offset = offset;
 		cbp->bio_data = data;
 		cbp->bio_length = MIN(left, slice);
 		left -= cbp->bio_length;
 		if (left == 0)
 			break;
 		offset += cbp->bio_length;
 		data += cbp->bio_length;
 	}
 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
 		bioq_remove(&queue, cbp);
 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 		disk = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		disk->d_consumer->index++;
 		g_io_request(cbp, disk->d_consumer);
 	}
 }
 
 static void
 g_mirror_register_request(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	sc = bp->bio_to->geom->softc;
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		switch (sc->sc_balance) {
 		case G_MIRROR_BALANCE_LOAD:
 			g_mirror_request_load(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_PREFER:
 			g_mirror_request_prefer(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_ROUND_ROBIN:
 			g_mirror_request_round_robin(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_SPLIT:
 			g_mirror_request_split(sc, bp);
 			break;
 		}
 		return;
 	case BIO_WRITE:
 	case BIO_DELETE:
 	    {
 		struct g_mirror_disk *disk;
 		struct g_mirror_disk_sync *sync;
 		struct bio_queue_head queue;
 		struct g_consumer *cp;
 		struct bio *cbp;
 
 		if (sc->sc_idle)
 			g_mirror_unidle(sc);
 		/*
 		 * Allocate all bios before sending any request, so we can
 		 * return ENOMEM in nice and clean way.
 		 */
 		bioq_init(&queue);
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			sync = &disk->d_sync;
 			switch (disk->d_state) {
 			case G_MIRROR_DISK_STATE_ACTIVE:
 				break;
 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 				if (bp->bio_offset >= sync->ds_offset)
 					continue;
 				else if (bp->bio_offset + bp->bio_length >
 				    sync->ds_offset_done &&
 				    (bp->bio_offset < sync->ds_resync ||
 				     sync->ds_resync == -1)) {
 					sync->ds_resync = bp->bio_offset -
 					    (bp->bio_offset % MAXPHYS);
 				}
 				break;
 			default:
 				continue;
 			}
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL) {
 				for (cbp = bioq_first(&queue); cbp != NULL;
 				    cbp = bioq_first(&queue)) {
 					bioq_remove(&queue, cbp);
 					g_destroy_bio(cbp);
 				}
 				if (bp->bio_error == 0)
 					bp->bio_error = ENOMEM;
 				g_io_deliver(bp, bp->bio_error);
 				return;
 			}
 			bioq_insert_tail(&queue, cbp);
 			cbp->bio_done = g_mirror_done;
 			cp = disk->d_consumer;
 			cbp->bio_caller1 = cp;
 			cbp->bio_to = cp->provider;
 			KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
 			    ("Consumer %s not opened (r%dw%de%d).",
 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
 		}
 		for (cbp = bioq_first(&queue); cbp != NULL;
 		    cbp = bioq_first(&queue)) {
 			bioq_remove(&queue, cbp);
 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 			cp = cbp->bio_caller1;
 			cbp->bio_caller1 = NULL;
 			cp->index++;
 			g_io_request(cbp, cp);
 		}
 		/*
 		 * Bump syncid on first write.
 		 */
 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
 			g_topology_lock();
 			g_mirror_bump_syncid(sc);
 			g_topology_unlock();
 		}
 		return;
 	    }
 	default:
 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
 		    bp->bio_cmd, sc->sc_name));
 		break;
 	}
 }
 
 static int
 g_mirror_can_destroy(struct g_mirror_softc *sc)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	gp = sc->sc_geom;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_mirror_is_busy(sc, cp))
 			return (0);
 	}
 	gp = sc->sc_sync.ds_geom;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_mirror_is_busy(sc, cp))
 			return (0);
 	}
 	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
 	    sc->sc_name);
 	return (1);
 }
 
 static int
 g_mirror_try_destroy(struct g_mirror_softc *sc)
 {
 
 	if (sc->sc_rootmount != NULL) {
 		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 		    sc->sc_rootmount);
 		root_mount_rel(sc->sc_rootmount);
 		sc->sc_rootmount = NULL;
 	}
 	g_topology_lock();
 	if (!g_mirror_can_destroy(sc)) {
 		g_topology_unlock();
 		return (0);
 	}
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
 		g_topology_unlock();
 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
 		    &sc->sc_worker);
 		wakeup(&sc->sc_worker);
 		sc->sc_worker = NULL;
 	} else {
 		g_mirror_destroy_device(sc);
 		g_topology_unlock();
 		free(sc, M_MIRROR);
 	}
 	return (1);
 }
 
 /*
  * Worker thread.
  */
 static void
 g_mirror_worker(void *arg)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	struct g_mirror_disk_sync *sync;
 	struct g_mirror_event *ep;
 	struct bio *bp;
 	u_int nreqs;
 
 	sc = arg;
 	mtx_lock_spin(&sched_lock);
 	sched_prio(curthread, PRIBIO);
 	mtx_unlock_spin(&sched_lock);
 
 	nreqs = 0;
 	for (;;) {
 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
 		/*
 		 * First take a look at events.
 		 * This is important to handle events before any I/O requests.
 		 */
 		ep = g_mirror_event_get(sc);
 		if (ep != NULL && g_topology_try_lock()) {
 			g_mirror_event_remove(sc, ep);
 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
 				/* Update only device status. */
 				G_MIRROR_DEBUG(3,
 				    "Running event for device %s.",
 				    sc->sc_name);
 				ep->e_error = 0;
 				g_mirror_update_device(sc, 1);
 			} else {
 				/* Update disk status. */
 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
 				     g_mirror_get_diskname(ep->e_disk));
 				ep->e_error = g_mirror_update_disk(ep->e_disk,
 				    ep->e_state);
 				if (ep->e_error == 0)
 					g_mirror_update_device(sc, 0);
 			}
 			g_topology_unlock();
 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
 				KASSERT(ep->e_error == 0,
 				    ("Error cannot be handled."));
 				g_mirror_event_free(ep);
 			} else {
 				ep->e_flags |= G_MIRROR_EVENT_DONE;
 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
 				    ep);
 				mtx_lock(&sc->sc_events_mtx);
 				wakeup(ep);
 				mtx_unlock(&sc->sc_events_mtx);
 			}
 			if ((sc->sc_flags &
 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				if (g_mirror_try_destroy(sc))
 					kthread_exit(0);
 			}
 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
 			continue;
 		}
 		/*
 		 * Now I/O requests.
 		 */
 		/* Get first request from the queue. */
 		mtx_lock(&sc->sc_queue_mtx);
 		bp = bioq_first(&sc->sc_queue);
 		if (bp == NULL) {
 			if (ep != NULL) {
 				/*
 				 * No I/O requests and topology lock was
 				 * already held? Try again.
 				 */
 				mtx_unlock(&sc->sc_queue_mtx);
 				continue;
 			}
 			if ((sc->sc_flags &
 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				if (g_mirror_try_destroy(sc))
 					kthread_exit(0);
 				mtx_lock(&sc->sc_queue_mtx);
 			}
 		}
 		if (sc->sc_sync.ds_ndisks > 0 &&
 		    (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
 			mtx_unlock(&sc->sc_queue_mtx);
 			/*
 			 * It is time for synchronization...
 			 */
 			nreqs = 0;
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_state !=
 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 					continue;
 				}
 				sync = &disk->d_sync;
 				if (sync->ds_offset >=
 				    sc->sc_provider->mediasize) {
 					continue;
 				}
 				if (sync->ds_offset > sync->ds_offset_done)
 					continue;
 				if (sync->ds_resync != -1) {
 					sync->ds_offset = sync->ds_resync;
 					sync->ds_offset_done = sync->ds_resync;
 					sync->ds_resync = -1;
 				}
 				g_mirror_sync_one(disk);
 			}
 			G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
 			goto sleep;
 		}
 		if (bp == NULL) {
 			if (g_mirror_check_idle(sc)) {
 				u_int idletime;
 
 				idletime = g_mirror_idletime;
 				if (idletime == 0)
 					idletime = 1;
 				idletime *= hz;
 				if (msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
 				    "m:w1", idletime) == EWOULDBLOCK) {
 					G_MIRROR_DEBUG(5, "%s: I'm here 3.",
 					    __func__);
 					/*
 					 * No I/O requests in 'idletime' seconds,
 					 * so mark components as clean.
 					 */
 					g_mirror_idle(sc);
 				}
 				G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
 			} else {
 				MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
 				    "m:w2", 0);
 				G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
 			}
 			continue;
 		}
 		nreqs++;
 		bioq_remove(&sc->sc_queue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 
 		if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) {
 			g_mirror_regular_request(bp);
 		} else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
 			u_int timeout, sps;
 
 			g_mirror_sync_request(bp);
 sleep:
 			sps = g_mirror_syncs_per_sec;
 			if (sps == 0) {
 				G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
 				continue;
 			}
 			if (ep != NULL) {
 				/*
 				 * We have some pending events, don't sleep now.
 				 */
 				G_MIRROR_DEBUG(5, "%s: I'm here 7.", __func__);
 				continue;
 			}
 			mtx_lock(&sc->sc_queue_mtx);
 			if (bioq_first(&sc->sc_queue) != NULL) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				G_MIRROR_DEBUG(5, "%s: I'm here 8.", __func__);
 				continue;
 			}
 			timeout = hz / sps;
 			if (timeout == 0)
 				timeout = 1;
 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w3",
 			    timeout);
 		} else {
 			g_mirror_register_request(bp);
 		}
 		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
 	}
 }
 
 /*
  * Open disk's consumer if needed.
  */
 static void
 g_mirror_update_access(struct g_mirror_disk *disk)
 {
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	pp = disk->d_softc->sc_provider;
 	if (pp == NULL)
 		return;
 	if (pp->acw > 0) {
 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
 			G_MIRROR_DEBUG(1,
 			    "Disk %s (device %s) marked as dirty.",
 			    g_mirror_get_diskname(disk),
 			    disk->d_softc->sc_name);
 			disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 		}
 	} else if (pp->acw == 0) {
 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
 			G_MIRROR_DEBUG(1,
 			    "Disk %s (device %s) marked as clean.",
 			    g_mirror_get_diskname(disk),
 			    disk->d_softc->sc_name);
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		}
 	}
 }
 
 static void
 g_mirror_sync_start(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	int error;
 
 	g_topology_assert();
 
 	sc = disk->d_softc;
 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
 	    sc->sc_state));
 
 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
 	    g_mirror_get_diskname(disk));
 	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 	KASSERT(disk->d_sync.ds_consumer == NULL,
 	    ("Sync consumer already exists (device=%s, disk=%s).",
 	    sc->sc_name, g_mirror_get_diskname(disk)));
 	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
 	disk->d_sync.ds_consumer->private = disk;
 	disk->d_sync.ds_consumer->index = 0;
 	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
 	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
 	    disk->d_softc->sc_name, error));
 	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
 	KASSERT(error == 0, ("Cannot open %s (error=%d).",
 	    disk->d_softc->sc_name, error));
 	disk->d_sync.ds_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
 	sc->sc_sync.ds_ndisks++;
 }
 
 /*
  * Stop synchronization process.
  * type: 0 - synchronization finished
  *       1 - synchronization stopped
  */
 static void
 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
 {
 
 	g_topology_assert();
 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 	    g_mirror_disk_state2str(disk->d_state)));
 	if (disk->d_sync.ds_consumer == NULL)
 		return;
 
 	if (type == 0) {
 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
 		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
 	} else /* if (type == 1) */ {
 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
 		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
 	}
 	g_mirror_kill_consumer(disk->d_softc, disk->d_sync.ds_consumer);
 	free(disk->d_sync.ds_data, M_MIRROR);
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_softc->sc_sync.ds_ndisks--;
 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 }
 
 static void
 g_mirror_launch_provider(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
 	pp->mediasize = sc->sc_mediasize;
 	pp->sectorsize = sc->sc_sectorsize;
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
 	    pp->name);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			g_mirror_sync_start(disk);
 	}
 }
 
 static void
 g_mirror_destroy_provider(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct bio *bp;
 
 	g_topology_assert();
 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
 	    sc->sc_name));
 
 	g_error_provider(sc->sc_provider, ENXIO);
 	mtx_lock(&sc->sc_queue_mtx);
 	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
 		bioq_remove(&sc->sc_queue, bp);
 		g_io_deliver(bp, ENXIO);
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
 	    sc->sc_provider->name);
 	sc->sc_provider->flags |= G_PF_WITHER;
 	g_orphan_provider(sc->sc_provider, ENXIO);
 	sc->sc_provider = NULL;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			g_mirror_sync_stop(disk, 1);
 	}
 }
 
 static void
 g_mirror_go(void *arg)
 {
 	struct g_mirror_softc *sc;
 
 	sc = arg;
 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
 	g_mirror_event_send(sc, 0,
 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
 }
 
 static u_int
 g_mirror_determine_state(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	u_int state;
 
 	sc = disk->d_softc;
 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
 		if ((disk->d_flags &
 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
 			/* Disk does not need synchronization. */
 			state = G_MIRROR_DISK_STATE_ACTIVE;
 		} else {
 			if ((sc->sc_flags &
 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
 			    (disk->d_flags &
 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
 				/*
 				 * We can start synchronization from
 				 * the stored offset.
 				 */
 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
 			} else {
 				state = G_MIRROR_DISK_STATE_STALE;
 			}
 		}
 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
 		/*
 		 * Reset all synchronization data for this disk,
 		 * because if it even was synchronized, it was
 		 * synchronized to disks with different syncid.
 		 */
 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		disk->d_sync.ds_syncid = sc->sc_syncid;
 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
 		} else {
 			state = G_MIRROR_DISK_STATE_STALE;
 		}
 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
 		/*
 		 * Not good, NOT GOOD!
 		 * It means that mirror was started on stale disks
 		 * and more fresh disk just arrive.
 		 * If there were writes, mirror is fucked up, sorry.
 		 * I think the best choice here is don't touch
 		 * this disk and inform the user laudly.
 		 */
 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
 		    "disk (%s) arrives!! It will not be connected to the "
 		    "running device.", sc->sc_name,
 		    g_mirror_get_diskname(disk));
 		g_mirror_destroy_disk(disk);
 		state = G_MIRROR_DISK_STATE_NONE;
 		/* Return immediately, because disk was destroyed. */
 		return (state);
 	}
 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
 	return (state);
 }
 
 /*
  * Update device state.
  */
 static void
 g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
 {
 	struct g_mirror_disk *disk;
 	u_int state;
 
 	g_topology_assert();
 
 	switch (sc->sc_state) {
 	case G_MIRROR_DEVICE_STATE_STARTING:
 	    {
 		struct g_mirror_disk *pdisk, *tdisk;
 		u_int dirty, ndisks, genid, syncid;
 
 		KASSERT(sc->sc_provider == NULL,
 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
 		/*
 		 * Are we ready? We are, if all disks are connected or
 		 * if we have any disks and 'force' is true.
 		 */
 		if ((force && g_mirror_ndisks(sc, -1) > 0) ||
 		    sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
 			;
 		} else if (g_mirror_ndisks(sc, -1) == 0) {
 			/*
 			 * Disks went down in starting phase, so destroy
 			 * device.
 			 */
 			callout_drain(&sc->sc_callout);
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 			    sc->sc_rootmount);
 			root_mount_rel(sc->sc_rootmount);
 			sc->sc_rootmount = NULL;
 			return;
 		} else {
 			return;
 		}
 
 		/*
 		 * Activate all disks with the biggest syncid.
 		 */
 		if (force) {
 			/*
 			 * If 'force' is true, we have been called due to
 			 * timeout, so don't bother canceling timeout.
 			 */
 			ndisks = 0;
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
 					ndisks++;
 				}
 			}
 			if (ndisks == 0) {
 				/* No valid disks found, destroy device. */
 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 				return;
 			}
 		} else {
 			/* Cancel timeout. */
 			callout_drain(&sc->sc_callout);
 		}
 
 		/*
 		 * Find the biggest genid.
 		 */
 		genid = 0;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_genid > genid)
 				genid = disk->d_genid;
 		}
 		sc->sc_genid = genid;
 		/*
 		 * Remove all disks without the biggest genid.
 		 */
 		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
 			if (disk->d_genid < genid) {
 				G_MIRROR_DEBUG(0,
 				    "Component %s (device %s) broken, skipping.",
 				    g_mirror_get_diskname(disk), sc->sc_name);
 				g_mirror_destroy_disk(disk);
 			}
 		}
 
 		/*
 		 * Find the biggest syncid.
 		 */
 		syncid = 0;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_sync.ds_syncid > syncid)
 				syncid = disk->d_sync.ds_syncid;
 		}
 
 		/*
 		 * Here we need to look for dirty disks and if all disks
 		 * with the biggest syncid are dirty, we have to choose
 		 * one with the biggest priority and rebuild the rest.
 		 */
 		/*
 		 * Find the number of dirty disks with the biggest syncid.
 		 * Find the number of disks with the biggest syncid.
 		 * While here, find a disk with the biggest priority.
 		 */
 		dirty = ndisks = 0;
 		pdisk = NULL;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_sync.ds_syncid != syncid)
 				continue;
 			if ((disk->d_flags &
 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 				continue;
 			}
 			ndisks++;
 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
 				dirty++;
 				if (pdisk == NULL ||
 				    pdisk->d_priority < disk->d_priority) {
 					pdisk = disk;
 				}
 			}
 		}
 		if (dirty == 0) {
 			/* No dirty disks at all, great. */
 		} else if (dirty == ndisks) {
 			/*
 			 * Force synchronization for all dirty disks except one
 			 * with the biggest priority.
 			 */
 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
 			    "master disk for synchronization.",
 			    g_mirror_get_diskname(pdisk), sc->sc_name);
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_sync.ds_syncid != syncid)
 					continue;
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 					continue;
 				}
 				KASSERT((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
 				    ("Disk %s isn't marked as dirty.",
 				    g_mirror_get_diskname(disk)));
 				/* Skip the disk with the biggest priority. */
 				if (disk == pdisk)
 					continue;
 				disk->d_sync.ds_syncid = 0;
 			}
 		} else if (dirty < ndisks) {
 			/*
 			 * Force synchronization for all dirty disks.
 			 * We have some non-dirty disks.
 			 */
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_sync.ds_syncid != syncid)
 					continue;
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 					continue;
 				}
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
 					continue;
 				}
 				disk->d_sync.ds_syncid = 0;
 			}
 		}
 
 		/* Reset hint. */
 		sc->sc_hint = NULL;
 		sc->sc_syncid = syncid;
 		if (force) {
 			/* Remember to bump syncid on first write. */
 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		}
 		state = G_MIRROR_DEVICE_STATE_RUNNING;
 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_device_state2str(state));
 		sc->sc_state = state;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			state = g_mirror_determine_state(disk);
 			g_mirror_event_send(disk, state,
 			    G_MIRROR_EVENT_DONTWAIT);
 			if (state == G_MIRROR_DISK_STATE_STALE)
 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		}
 		break;
 	    }
 	case G_MIRROR_DEVICE_STATE_RUNNING:
 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
 			/*
 			 * No active disks or no disks at all,
 			 * so destroy device.
 			 */
 			if (sc->sc_provider != NULL)
 				g_mirror_destroy_provider(sc);
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 			break;
 		} else if (g_mirror_ndisks(sc,
 		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
 			/*
 			 * We have active disks, launch provider if it doesn't
 			 * exist.
 			 */
 			if (sc->sc_provider == NULL)
 				g_mirror_launch_provider(sc);
 			if (sc->sc_rootmount != NULL) {
 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 			}
 		}
 		/*
 		 * Genid should be bumped immediately, so do it here.
 		 */
 		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
 			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
 			g_mirror_bump_genid(sc);
 		}
 		break;
 	default:
 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
 		break;
 	}
 }
 
 /*
  * Update disk state and device state if needed.
  */
 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
 	"Disk %s state changed from %s to %s (device %s).",		\
 	g_mirror_get_diskname(disk),					\
 	g_mirror_disk_state2str(disk->d_state),				\
 	g_mirror_disk_state2str(state), sc->sc_name)
 static int
 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
 {
 	struct g_mirror_softc *sc;
 
 	g_topology_assert();
 
 	sc = disk->d_softc;
 again:
 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
 	    g_mirror_disk_state2str(state));
 	switch (state) {
 	case G_MIRROR_DISK_STATE_NEW:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk arrive.
 		 */
 		/* Previous state should be NONE. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_state = state;
 		if (LIST_EMPTY(&sc->sc_disks))
 			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
 		else {
 			struct g_mirror_disk *dp;
 
 			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 				if (disk->d_priority >= dp->d_priority) {
 					LIST_INSERT_BEFORE(dp, disk, d_next);
 					dp = NULL;
 					break;
 				}
 				if (LIST_NEXT(dp, d_next) == NULL)
 					break;
 			}
 			if (dp != NULL)
 				LIST_INSERT_AFTER(dp, disk, d_next);
 		}
 		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
 			break;
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		state = g_mirror_determine_state(disk);
 		if (state != G_MIRROR_DISK_STATE_NONE)
 			goto again;
 		break;
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk does not need synchronization.
 		 * 2. Synchronization process finished successfully.
 		 */
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		/* Previous state should be NEW or SYNCHRONIZING. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
 			g_mirror_sync_stop(disk, 0);
 		}
 		disk->d_state = state;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		g_mirror_update_access(disk);
 		g_mirror_update_metadata(disk);
 		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		break;
 	case G_MIRROR_DISK_STATE_STALE:
 		/*
 		 * Possible scenarios:
 		 * 1. Stale disk was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		/*
 		 * STALE state is only possible if device is marked
 		 * NOAUTOSYNC.
 		 */
 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		g_mirror_update_metadata(disk);
 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		break;
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		/*
 		 * Possible scenarios:
 		 * 1. Disk which needs synchronization was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		if (sc->sc_provider != NULL) {
 			g_mirror_sync_start(disk);
 			g_mirror_update_metadata(disk);
 		}
 		break;
 	case G_MIRROR_DISK_STATE_DISCONNECTED:
 		/*
 		 * Possible scenarios:
 		 * 1. Device wasn't running yet, but disk disappear.
 		 * 2. Disk was active and disapppear.
 		 * 3. Disk disappear during synchronization process.
 		 */
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
 			/*
 			 * Previous state should be ACTIVE, STALE or
 			 * SYNCHRONIZING.
 			 */
 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 			    ("Wrong disk state (%s, %s).",
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
 			/* Previous state should be NEW. */
 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 			    ("Wrong disk state (%s, %s).",
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 			/*
 			 * Reset bumping syncid if disk disappeared in STARTING
 			 * state.
 			 */
 			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
 				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
 #ifdef	INVARIANTS
 		} else {
 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
 			    sc->sc_name,
 			    g_mirror_device_state2str(sc->sc_state),
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 #endif
 		}
 		DISK_STATE_CHANGED();
 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 
 		g_mirror_destroy_disk(disk);
 		break;
 	case G_MIRROR_DISK_STATE_DESTROY:
 	    {
 		int error;
 
 		error = g_mirror_clear_metadata(disk);
 		if (error != 0)
 			return (error);
 		DISK_STATE_CHANGED();
 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 
 		g_mirror_destroy_disk(disk);
 		sc->sc_ndisks--;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			g_mirror_update_metadata(disk);
 		}
 		break;
 	    }
 	default:
 		KASSERT(1 == 0, ("Unknown state (%u).", state));
 		break;
 	}
 	return (0);
 }
 #undef	DISK_STATE_CHANGED
 
 int
 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* Metadata are stored on last sector. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
-	if (error != 0) {
+	if (buf == NULL) {
 		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    cp->provider->name, error);
 		if (buf != NULL)
 			g_free(buf);
 		return (error);
 	}
 
 	/* Decode metadata. */
 	error = mirror_metadata_decode(buf, md);
 	g_free(buf);
 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
 		return (EINVAL);
 	if (md->md_version > G_MIRROR_VERSION) {
 		G_MIRROR_DEBUG(0,
 		    "Kernel module is too old to handle metadata from %s.",
 		    cp->provider->name);
 		return (EINVAL);
 	}
 	if (error != 0) {
 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
 		    cp->provider->name);
 		return (error);
 	}
 
 	return (0);
 }
 
 static int
 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md)
 {
 
 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
 		    pp->name, md->md_did);
 		return (EEXIST);
 	}
 	if (md->md_all != sc->sc_ndisks) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_all", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_slice != sc->sc_slice) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_slice", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_balance != sc->sc_balance) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_balance", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_mediasize != sc->sc_mediasize) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_mediasize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (sc->sc_mediasize > pp->mediasize) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
 		    sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_sectorsize != sc->sc_sectorsize) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_sectorsize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid sector size of disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid device flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid disk flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md)
 {
 	struct g_mirror_disk *disk;
 	int error;
 
 	g_topology_assert();
 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
 
 	error = g_mirror_check_metadata(sc, pp, md);
 	if (error != 0)
 		return (error);
 	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
 	    md->md_genid < sc->sc_genid) {
 		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	disk = g_mirror_init_disk(sc, pp, md, &error);
 	if (disk == NULL)
 		return (error);
 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
 	    G_MIRROR_EVENT_WAIT);
 	if (error != 0)
 		return (error);
 	if (md->md_version < G_MIRROR_VERSION) {
 		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
 		    pp->name, md->md_version, G_MIRROR_VERSION);
 		g_mirror_update_metadata(disk);
 	}
 	return (0);
 }
 
 static int
 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	int dcr, dcw, dce;
 
 	g_topology_assert();
 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
 	    acw, ace);
 
 	dcr = pp->acr + acr;
 	dcw = pp->acw + acw;
 	dce = pp->ace + ace;
 
 	sc = pp->geom->softc;
 	if (sc == NULL || LIST_EMPTY(&sc->sc_disks) ||
 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 		if (acr <= 0 && acw <= 0 && ace <= 0)
 			return (0);
 		else
 			return (ENXIO);
 	}
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		/*
 		 * Mark disk as dirty on open and unmark on close.
 		 */
 		if (pp->acw == 0 && dcw > 0) {
 			G_MIRROR_DEBUG(1,
 			    "Disk %s (device %s) marked as dirty.",
 			    g_mirror_get_diskname(disk), sc->sc_name);
 			disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 			g_mirror_update_metadata(disk);
 		} else if (pp->acw > 0 && dcw == 0) {
 			G_MIRROR_DEBUG(1,
 			    "Disk %s (device %s) marked as clean.",
 			    g_mirror_get_diskname(disk), sc->sc_name);
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 			g_mirror_update_metadata(disk);
 		}
 	}
 	return (0);
 }
 
 static struct g_geom *
 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
 {
 	struct g_mirror_softc *sc;
 	struct g_geom *gp;
 	int error, timeout;
 
 	g_topology_assert();
 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
 	    md->md_mid);
 
 	/* One disk is minimum. */
 	if (md->md_all < 1)
 		return (NULL);
 	/*
 	 * Action geom.
 	 */
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
 	gp->start = g_mirror_start;
 	gp->orphan = g_mirror_orphan;
 	gp->access = g_mirror_access;
 	gp->dumpconf = g_mirror_dumpconf;
 
 	sc->sc_id = md->md_mid;
 	sc->sc_slice = md->md_slice;
 	sc->sc_balance = md->md_balance;
 	sc->sc_mediasize = md->md_mediasize;
 	sc->sc_sectorsize = md->md_sectorsize;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_flags = md->md_mflags;
 	sc->sc_bump_id = 0;
 	sc->sc_idle = 0;
 	bioq_init(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
 	LIST_INIT(&sc->sc_disks);
 	TAILQ_INIT(&sc->sc_events);
 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
 	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 	/*
 	 * Synchronization geom.
 	 */
 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
 	gp->softc = sc;
 	gp->orphan = g_mirror_orphan;
 	sc->sc_sync.ds_geom = gp;
 	sc->sc_sync.ds_ndisks = 0;
 	error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
 	    "g_mirror %s", md->md_name);
 	if (error != 0) {
 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
 		    sc->sc_name);
 		g_destroy_geom(sc->sc_sync.ds_geom);
 		mtx_destroy(&sc->sc_events_mtx);
 		mtx_destroy(&sc->sc_queue_mtx);
 		g_destroy_geom(sc->sc_geom);
 		free(sc, M_MIRROR);
 		return (NULL);
 	}
 
 	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
 
 	sc->sc_rootmount = root_mount_hold("GMIRROR");
 	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
 	/*
 	 * Run timeout.
 	 */
 	timeout = g_mirror_timeout * hz;
 	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
 	return (sc->sc_geom);
 }
 
 int
 g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
 {
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_MIRROR_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
 	g_topology_unlock();
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
 	while (sc->sc_worker != NULL)
 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
 	g_topology_lock();
 	g_mirror_destroy_device(sc);
 	free(sc, M_MIRROR);
 	return (0);
 }
 
 static void
 g_mirror_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_mirror_metadata md;
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "mirror:taste");
 	/*
 	 * This orphan function should be never called.
 	 */
 	gp->orphan = g_mirror_taste_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_mirror_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
 		return (NULL);
 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
 		return (NULL);
 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
 		G_MIRROR_DEBUG(0,
 		    "Device %s: provider %s marked as inactive, skipping.",
 		    md.md_name, pp->name);
 		return (NULL);
 	}
 	if (g_mirror_debug >= 2)
 		mirror_metadata_dump(&md);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_sync.ds_geom == gp)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_mid != sc->sc_id) {
 			G_MIRROR_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 		break;
 	}
 	if (gp == NULL) {
 		gp = g_mirror_create(mp, &md);
 		if (gp == NULL) {
 			G_MIRROR_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 	}
 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 	error = g_mirror_add_disk(sc, pp, &md);
 	if (error != 0) {
 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 		    pp->name, gp->name, error);
 		if (LIST_EMPTY(&sc->sc_disks))
 			g_mirror_destroy(sc, 1);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static int
 g_mirror_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 
 	return (g_mirror_destroy(gp->softc, 0));
 }
 
 static void
 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_mirror_softc *sc;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	/* Skip synchronization geom. */
 	if (gp == sc->sc_sync.ds_geom)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		struct g_mirror_disk *disk;
 
 		disk = cp->private;
 		if (disk == NULL)
 			return;
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			sbuf_printf(sb, "%s<Synchronized>", indent);
 			if (disk->d_sync.ds_offset_done == 0)
 				sbuf_printf(sb, "0%%");
 			else {
 				sbuf_printf(sb, "%u%%",
 				    (u_int)((disk->d_sync.ds_offset_done * 100) /
 				    sc->sc_provider->mediasize));
 			}
 			sbuf_printf(sb, "</Synchronized>\n");
 		}
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
 		    disk->d_sync.ds_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
 		    disk->d_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (disk->d_flags == 0)
 			sbuf_printf(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((disk->d_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
 			    "SYNCHRONIZING");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
 #undef	ADD_FLAG
 		}
 		sbuf_printf(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
 		    disk->d_priority);
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_mirror_disk_state2str(disk->d_state));
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (sc->sc_flags == 0)
 			sbuf_printf(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((sc->sc_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
 #undef	ADD_FLAG
 		}
 		sbuf_printf(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
 		    (u_int)sc->sc_slice);
 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
 		    balance_name(sc->sc_balance));
 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
 		    sc->sc_ndisks);
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
 			sbuf_printf(sb, "%s", "STARTING");
 		else if (sc->sc_ndisks ==
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
 			sbuf_printf(sb, "%s", "COMPLETE");
 		else
 			sbuf_printf(sb, "%s", "DEGRADED");
 		sbuf_printf(sb, "</State>\n");
 	}
 }
 
 static void
 g_mirror_shutdown(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 
 	mp = arg;
 	DROP_GIANT();
 	g_topology_lock();
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if (gp->softc == NULL)
 			continue;
 		g_mirror_destroy(gp->softc, 1);
 	}
 	g_topology_unlock();
 	PICKUP_GIANT();
 #if 0
 	tsleep(&gp, PRIBIO, "m:shutdown", hz * 20);
 #endif
 }
 
 static void
 g_mirror_init(struct g_class *mp)
 {
 
 	g_mirror_ehtag = EVENTHANDLER_REGISTER(shutdown_post_sync,
 	    g_mirror_shutdown, mp, SHUTDOWN_PRI_FIRST);
 	if (g_mirror_ehtag == NULL)
 		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_mirror_fini(struct g_class *mp)
 {
 
 	if (g_mirror_ehtag == NULL)
 		return;
 	EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_ehtag);
 }
 
 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
Index: head/sys/geom/raid3/g_raid3.c
===================================================================
--- head/sys/geom/raid3/g_raid3.c	(revision 152966)
+++ head/sys/geom/raid3/g_raid3.c	(revision 152967)
@@ -1,3132 +1,3132 @@
 /*-
  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/eventhandler.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/sched.h>
 #include <geom/raid3/g_raid3.h>
 
 
 static MALLOC_DEFINE(M_RAID3, "raid3_data", "GEOM_RAID3 Data");
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, raid3, CTLFLAG_RW, 0, "GEOM_RAID3 stuff");
 u_int g_raid3_debug = 0;
 TUNABLE_INT("kern.geom.raid3.debug", &g_raid3_debug);
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, debug, CTLFLAG_RW, &g_raid3_debug, 0,
     "Debug level");
 static u_int g_raid3_timeout = 4;
 TUNABLE_INT("kern.geom.raid3.timeout", &g_raid3_timeout);
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, timeout, CTLFLAG_RW, &g_raid3_timeout,
     0, "Time to wait on all raid3 components");
 static u_int g_raid3_idletime = 5;
 TUNABLE_INT("kern.geom.raid3.idletime", &g_raid3_idletime);
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, idletime, CTLFLAG_RW,
     &g_raid3_idletime, 0, "Mark components as clean when idling");
 static u_int g_raid3_reqs_per_sync = 5;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
     &g_raid3_reqs_per_sync, 0,
     "Number of regular I/O requests per synchronization request");
 static u_int g_raid3_syncs_per_sec = 1000;
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
     &g_raid3_syncs_per_sec, 0,
     "Number of synchronizations requests per second");
 
 static u_int g_raid3_n64k = 50;
 TUNABLE_INT("kern.geom.raid3.n64k", &g_raid3_n64k);
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n64k, CTLFLAG_RD, &g_raid3_n64k, 0,
     "Maximum number of 64kB allocations");
 static u_int g_raid3_n16k = 200;
 TUNABLE_INT("kern.geom.raid3.n16k", &g_raid3_n16k);
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n16k, CTLFLAG_RD, &g_raid3_n16k, 0,
     "Maximum number of 16kB allocations");
 static u_int g_raid3_n4k = 1200;
 TUNABLE_INT("kern.geom.raid3.n4k", &g_raid3_n4k);
 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n4k, CTLFLAG_RD, &g_raid3_n4k, 0,
     "Maximum number of 4kB allocations");
 
 SYSCTL_NODE(_kern_geom_raid3, OID_AUTO, stat, CTLFLAG_RW, 0,
     "GEOM_RAID3 statistics");
 static u_int g_raid3_parity_mismatch = 0;
 SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, parity_mismatch, CTLFLAG_RD,
     &g_raid3_parity_mismatch, 0, "Number of failures in VERIFY mode");
 static u_int g_raid3_64k_requested = 0;
 SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 64k_requested, CTLFLAG_RD,
     &g_raid3_64k_requested, 0, "Number of requested 64kB allocations");
 static u_int g_raid3_64k_failed = 0;
 SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 64k_failed, CTLFLAG_RD,
     &g_raid3_64k_failed, 0, "Number of failed 64kB allocations");
 static u_int g_raid3_16k_requested = 0;
 SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 16k_requested, CTLFLAG_RD,
     &g_raid3_16k_requested, 0, "Number of requested 16kB allocations");
 static u_int g_raid3_16k_failed = 0;
 SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 16k_failed, CTLFLAG_RD,
     &g_raid3_16k_failed, 0, "Number of failed 16kB allocations");
 static u_int g_raid3_4k_requested = 0;
 SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 4k_requested, CTLFLAG_RD,
     &g_raid3_4k_requested, 0, "Number of requested 4kB allocations");
 static u_int g_raid3_4k_failed = 0;
 SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 4k_failed, CTLFLAG_RD,
     &g_raid3_4k_failed, 0, "Number of failed 4kB allocations");
 
 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
 	G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
 	G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
 } while (0)
 
 static eventhandler_tag g_raid3_ehtag = NULL;
 
 static int g_raid3_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static g_taste_t g_raid3_taste;
 static void g_raid3_init(struct g_class *mp);
 static void g_raid3_fini(struct g_class *mp);
 
 struct g_class g_raid3_class = {
 	.name = G_RAID3_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_raid3_config,
 	.taste = g_raid3_taste,
 	.destroy_geom = g_raid3_destroy_geom,
 	.init = g_raid3_init,
 	.fini = g_raid3_fini
 };
 
 
 static void g_raid3_destroy_provider(struct g_raid3_softc *sc);
 static int g_raid3_update_disk(struct g_raid3_disk *disk, u_int state);
 static void g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force);
 static void g_raid3_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 static void g_raid3_sync_stop(struct g_raid3_softc *sc, int type);
 
 
 static const char *
 g_raid3_disk_state2str(int state)
 {
 
 	switch (state) {
 	case G_RAID3_DISK_STATE_NODISK:
 		return ("NODISK");
 	case G_RAID3_DISK_STATE_NONE:
 		return ("NONE");
 	case G_RAID3_DISK_STATE_NEW:
 		return ("NEW");
 	case G_RAID3_DISK_STATE_ACTIVE:
 		return ("ACTIVE");
 	case G_RAID3_DISK_STATE_STALE:
 		return ("STALE");
 	case G_RAID3_DISK_STATE_SYNCHRONIZING:
 		return ("SYNCHRONIZING");
 	case G_RAID3_DISK_STATE_DISCONNECTED:
 		return ("DISCONNECTED");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_raid3_device_state2str(int state)
 {
 
 	switch (state) {
 	case G_RAID3_DEVICE_STATE_STARTING:
 		return ("STARTING");
 	case G_RAID3_DEVICE_STATE_DEGRADED:
 		return ("DEGRADED");
 	case G_RAID3_DEVICE_STATE_COMPLETE:
 		return ("COMPLETE");
 	default:
 		return ("INVALID");
 	}
 }
 
 const char *
 g_raid3_get_diskname(struct g_raid3_disk *disk)
 {
 
 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
 		return ("[unknown]");
 	return (disk->d_name);
 }
 
 #define	g_raid3_xor(src1, src2, dst, size)				\
 	_g_raid3_xor((uint64_t *)(src1), (uint64_t *)(src2),		\
 	    (uint64_t *)(dst), (size_t)size)
 static void
 _g_raid3_xor(uint64_t *src1, uint64_t *src2, uint64_t *dst, size_t size)
 {
 
 	KASSERT((size % 128) == 0, ("Invalid size: %zu.", size));
 	for (; size > 0; size -= 128) {
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 		*dst++ = (*src1++) ^ (*src2++);
 	}
 }
 
 static int
 g_raid3_is_zero(struct bio *bp)
 {
 	static const uint64_t zeros[] = {
 	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 	};
 	u_char *addr;
 	ssize_t size;
 
 	size = bp->bio_length;
 	addr = (u_char *)bp->bio_data;
 	for (; size > 0; size -= sizeof(zeros), addr += sizeof(zeros)) {
 		if (bcmp(addr, zeros, sizeof(zeros)) != 0)
 			return (0);
 	}
 	return (1);
 }
 
 /*
  * --- Events handling functions ---
  * Events in geom_raid3 are used to maintain disks and device status
  * from one thread to simplify locking.
  */
 static void
 g_raid3_event_free(struct g_raid3_event *ep)
 {
 
 	free(ep, M_RAID3);
 }
 
 int
 g_raid3_event_send(void *arg, int state, int flags)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct g_raid3_event *ep;
 	int error;
 
 	ep = malloc(sizeof(*ep), M_RAID3, M_WAITOK);
 	G_RAID3_DEBUG(4, "%s: Sending event %p.", __func__, ep);
 	if ((flags & G_RAID3_EVENT_DEVICE) != 0) {
 		disk = NULL;
 		sc = arg;
 	} else {
 		disk = arg;
 		sc = disk->d_softc;
 	}
 	ep->e_disk = disk;
 	ep->e_state = state;
 	ep->e_flags = flags;
 	ep->e_error = 0;
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 	G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	if ((flags & G_RAID3_EVENT_DONTWAIT) != 0)
 		return (0);
 	g_topology_assert();
 	G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
 	g_topology_unlock();
 	while ((ep->e_flags & G_RAID3_EVENT_DONE) == 0) {
 		mtx_lock(&sc->sc_events_mtx);
 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "r3:event",
 		    hz * 5);
 	}
 	/* Don't even try to use 'sc' here, because it could be already dead. */
 	g_topology_lock();
 	error = ep->e_error;
 	g_raid3_event_free(ep);
 	return (error);
 }
 
 static struct g_raid3_event *
 g_raid3_event_get(struct g_raid3_softc *sc)
 {
 	struct g_raid3_event *ep;
 
 	mtx_lock(&sc->sc_events_mtx);
 	ep = TAILQ_FIRST(&sc->sc_events);
 	mtx_unlock(&sc->sc_events_mtx);
 	return (ep);
 }
 
 static void
 g_raid3_event_remove(struct g_raid3_softc *sc, struct g_raid3_event *ep)
 {
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 static void
 g_raid3_event_cancel(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_event *ep, *tmpep;
 
 	g_topology_assert();
 
 	sc = disk->d_softc;
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
 		if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0)
 			continue;
 		if (ep->e_disk != disk)
 			continue;
 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 		if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0)
 			g_raid3_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			wakeup(ep);
 		}
 	}
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 /*
  * Return the number of disks in the given state.
  * If state is equal to -1, count all connected disks.
  */
 u_int
 g_raid3_ndisks(struct g_raid3_softc *sc, int state)
 {
 	struct g_raid3_disk *disk;
 	u_int n, ndisks;
 
 	for (n = ndisks = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 			continue;
 		if (state == -1 || disk->d_state == state)
 			ndisks++;
 	}
 	return (ndisks);
 }
 
 static u_int
 g_raid3_nrequests(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 	struct bio *bp;
 	u_int nreqs = 0;
 
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 		if (bp->bio_from == cp)
 			nreqs++;
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	return (nreqs);
 }
 
 static int
 g_raid3_is_busy(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 
 	if (cp->index > 0) {
 		G_RAID3_DEBUG(2,
 		    "I/O requests for %s exist, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	if (g_raid3_nrequests(sc, cp) > 0) {
 		G_RAID3_DEBUG(2,
 		    "I/O requests for %s in queue, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	return (0);
 }
 
 static void
 g_raid3_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *cp;
 
 	cp = arg;
 	G_RAID3_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_raid3_kill_consumer(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	int retaste_wait;
 
 	g_topology_assert();
 
 	cp->private = NULL;
 	if (g_raid3_is_busy(sc, cp))
 		return;
 	G_RAID3_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
 	pp = cp->provider;
 	retaste_wait = 0;
 	if (cp->acw == 1) {
 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
 			retaste_wait = 1;
 	}
 	G_RAID3_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
 	    -cp->acw, -cp->ace, 0);
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	if (retaste_wait) {
 		/*
 		 * After retaste event was send (inside g_access()), we can send
 		 * event to detach and destroy consumer.
 		 * A class, which has consumer to the given provider connected
 		 * will not receive retaste event for the provider.
 		 * This is the way how I ignore retaste events when I close
 		 * consumers opened for write: I detach and destroy consumer
 		 * after retaste event is sent.
 		 */
 		g_post_event(g_raid3_destroy_consumer, cp, M_WAITOK, NULL);
 		return;
 	}
 	G_RAID3_DEBUG(1, "Consumer %s destroyed.", pp->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static int
 g_raid3_connect_disk(struct g_raid3_disk *disk, struct g_provider *pp)
 {
 	struct g_consumer *cp;
 	int error;
 
 	g_topology_assert();
 	KASSERT(disk->d_consumer == NULL,
 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
 
 	cp = g_new_consumer(disk->d_softc->sc_geom);
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 	error = g_access(cp, 1, 1, 1);
 	if (error != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		G_RAID3_DEBUG(0, "Cannot open consumer %s (error=%d).",
 		    pp->name, error);
 		return (error);
 	}
 	disk->d_consumer = cp;
 	disk->d_consumer->private = disk;
 	disk->d_consumer->index = 0;
 	G_RAID3_DEBUG(2, "Disk %s connected.", g_raid3_get_diskname(disk));
 	return (0);
 }
 
 static void
 g_raid3_disconnect_consumer(struct g_raid3_softc *sc, struct g_consumer *cp)
 {
 
 	g_topology_assert();
 
 	if (cp == NULL)
 		return;
 	if (cp->provider != NULL)
 		g_raid3_kill_consumer(sc, cp);
 	else
 		g_destroy_consumer(cp);
 }
 
 /*
  * Initialize disk. This means allocate memory, create consumer, attach it
  * to the provider and open access (r1w1e1) to it.
  */
 static struct g_raid3_disk *
 g_raid3_init_disk(struct g_raid3_softc *sc, struct g_provider *pp,
     struct g_raid3_metadata *md, int *errorp)
 {
 	struct g_raid3_disk *disk;
 	int error;
 
 	disk = &sc->sc_disks[md->md_no];
 	error = g_raid3_connect_disk(disk, pp);
 	if (error != 0) {
 		if (errorp != NULL)
 			*errorp = error;
 		return (NULL);
 	}
 	disk->d_state = G_RAID3_DISK_STATE_NONE;
 	disk->d_flags = md->md_dflags;
 	if (md->md_provider[0] != '\0')
 		disk->d_flags |= G_RAID3_DISK_FLAG_HARDCODED;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_sync.ds_offset = md->md_sync_offset;
 	disk->d_sync.ds_offset_done = md->md_sync_offset;
 	disk->d_sync.ds_resync = -1;
 	disk->d_genid = md->md_genid;
 	disk->d_sync.ds_syncid = md->md_syncid;
 	if (errorp != NULL)
 		*errorp = 0;
 	return (disk);
 }
 
 static void
 g_raid3_destroy_disk(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 
 	g_topology_assert();
 
 	if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 		return;
 	g_raid3_event_cancel(disk);
 	sc = disk->d_softc;
 	switch (disk->d_state) {
 	case G_RAID3_DISK_STATE_SYNCHRONIZING:
 		if (sc->sc_syncdisk != NULL)
 			g_raid3_sync_stop(sc, 1);
 		/* FALLTHROUGH */
 	case G_RAID3_DISK_STATE_NEW:
 	case G_RAID3_DISK_STATE_STALE:
 	case G_RAID3_DISK_STATE_ACTIVE:
 		g_raid3_disconnect_consumer(sc, disk->d_consumer);
 		disk->d_consumer = NULL;
 		break;
 	default:
 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 	}
 	disk->d_state = G_RAID3_DISK_STATE_NODISK;
 }
 
 static void
 g_raid3_destroy_device(struct g_raid3_softc *sc)
 {
 	struct g_raid3_event *ep;
 	struct g_raid3_disk *disk;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	u_int n;
 
 	g_topology_assert();
 
 	gp = sc->sc_geom;
 	if (sc->sc_provider != NULL)
 		g_raid3_destroy_provider(sc);
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state != G_RAID3_DISK_STATE_NODISK) {
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 			g_raid3_update_metadata(disk);
 			g_raid3_destroy_disk(disk);
 		}
 	}
 	while ((ep = g_raid3_event_get(sc)) != NULL) {
 		g_raid3_event_remove(sc, ep);
 		if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0)
 			g_raid3_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			ep->e_flags |= G_RAID3_EVENT_DONE;
 			G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, ep);
 			mtx_lock(&sc->sc_events_mtx);
 			wakeup(ep);
 			mtx_unlock(&sc->sc_events_mtx);
 		}
 	}
 	callout_drain(&sc->sc_callout);
 	gp->softc = NULL;
 	cp = LIST_FIRST(&sc->sc_sync.ds_geom->consumer);
 	if (cp != NULL)
 		g_raid3_disconnect_consumer(sc, cp);
 	sc->sc_sync.ds_geom->softc = NULL;
 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
 	uma_zdestroy(sc->sc_zone_64k); 
 	uma_zdestroy(sc->sc_zone_16k); 
 	uma_zdestroy(sc->sc_zone_4k); 
 	mtx_destroy(&sc->sc_queue_mtx);
 	mtx_destroy(&sc->sc_events_mtx);
 	G_RAID3_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 }
 
 static void
 g_raid3_orphan(struct g_consumer *cp)
 {
 	struct g_raid3_disk *disk;
 
 	g_topology_assert();
 
 	disk = cp->private;
 	if (disk == NULL)
 		return;
 	disk->d_softc->sc_bump_id = G_RAID3_BUMP_SYNCID;
 	g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED,
 	    G_RAID3_EVENT_DONTWAIT);
 }
 
 static int
 g_raid3_write_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md)
 {
 	struct g_raid3_softc *sc;
 	struct g_consumer *cp;
 	off_t offset, length;
 	u_char *sector;
 	int error = 0;
 
 	g_topology_assert();
 
 	sc = disk->d_softc;
 	cp = disk->d_consumer;
 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
 	KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	length = cp->provider->sectorsize;
 	offset = cp->provider->mediasize - length;
 	sector = malloc((size_t)length, M_RAID3, M_WAITOK | M_ZERO);
 	if (md != NULL)
 		raid3_metadata_encode(md, sector);
 	g_topology_unlock();
 	error = g_write_data(cp, offset, sector, length);
 	g_topology_lock();
 	free(sector, M_RAID3);
 	if (error != 0) {
 		disk->d_softc->sc_bump_id = G_RAID3_BUMP_GENID;
 		g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED,
 		    G_RAID3_EVENT_DONTWAIT);
 	}
 	return (error);
 }
 
 int
 g_raid3_clear_metadata(struct g_raid3_disk *disk)
 {
 	int error;
 
 	g_topology_assert();
 	error = g_raid3_write_metadata(disk, NULL);
 	if (error == 0) {
 		G_RAID3_DEBUG(2, "Metadata on %s cleared.",
 		    g_raid3_get_diskname(disk));
 	} else {
 		G_RAID3_DEBUG(0,
 		    "Cannot clear metadata on disk %s (error=%d).",
 		    g_raid3_get_diskname(disk), error);
 	}
 	return (error);
 }
 
 void
 g_raid3_fill_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md)
 {
 	struct g_raid3_softc *sc;
 	struct g_provider *pp;
 
 	sc = disk->d_softc;
 	strlcpy(md->md_magic, G_RAID3_MAGIC, sizeof(md->md_magic));
 	md->md_version = G_RAID3_VERSION;
 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
 	md->md_id = sc->sc_id;
 	md->md_all = sc->sc_ndisks;
 	md->md_genid = sc->sc_genid;
 	md->md_mediasize = sc->sc_mediasize;
 	md->md_sectorsize = sc->sc_sectorsize;
 	md->md_mflags = (sc->sc_flags & G_RAID3_DEVICE_FLAG_MASK);
 	md->md_no = disk->d_no;
 	md->md_syncid = disk->d_sync.ds_syncid;
 	md->md_dflags = (disk->d_flags & G_RAID3_DISK_FLAG_MASK);
 	if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING)
 		md->md_sync_offset = disk->d_sync.ds_offset_done;
 	else
 		md->md_sync_offset = 0;
 	if (disk->d_consumer != NULL && disk->d_consumer->provider != NULL)
 		pp = disk->d_consumer->provider;
 	else
 		pp = NULL;
 	if ((disk->d_flags & G_RAID3_DISK_FLAG_HARDCODED) != 0 && pp != NULL)
 		strlcpy(md->md_provider, pp->name, sizeof(md->md_provider));
 	else
 		bzero(md->md_provider, sizeof(md->md_provider));
 	if (pp != NULL)
 		md->md_provsize = pp->mediasize;
 	else
 		md->md_provsize = 0;
 }
 
 void
 g_raid3_update_metadata(struct g_raid3_disk *disk)
 {
 	struct g_raid3_metadata md;
 	int error;
 
 	g_topology_assert();
 	g_raid3_fill_metadata(disk, &md);
 	error = g_raid3_write_metadata(disk, &md);
 	if (error == 0) {
 		G_RAID3_DEBUG(2, "Metadata on %s updated.",
 		    g_raid3_get_diskname(disk));
 	} else {
 		G_RAID3_DEBUG(0,
 		    "Cannot update metadata on disk %s (error=%d).",
 		    g_raid3_get_diskname(disk), error);
 	}
 }
 
 static void
 g_raid3_bump_syncid(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	u_int n;
 
 	g_topology_assert();
 	KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_syncid++;
 	G_RAID3_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
 	    sc->sc_syncid);
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			disk->d_sync.ds_syncid = sc->sc_syncid;
 			g_raid3_update_metadata(disk);
 		}
 	}
 }
 
 static void
 g_raid3_bump_genid(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	u_int n;
 
 	g_topology_assert();
 	KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_genid++;
 	G_RAID3_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
 	    sc->sc_genid);
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			disk->d_genid = sc->sc_genid;
 			g_raid3_update_metadata(disk);
 		}
 	}
 }
 
 static void
 g_raid3_idle(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	u_int i;
 
 	if (sc->sc_provider == NULL || sc->sc_provider->acw == 0)
 		return;
 	sc->sc_idle = 1;
 	g_topology_lock();
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
 			continue;
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		g_raid3_update_metadata(disk);
 	}
 	g_topology_unlock();
 }
 
 static void
 g_raid3_unidle(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	u_int i;
 
 	sc->sc_idle = 0;
 	g_topology_lock();
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
 			continue;
 		G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 		    g_raid3_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 		g_raid3_update_metadata(disk);
 	}
 	g_topology_unlock();
 }
 
 /*      
  * Return 1 if we should check if RAID3 device is idling.
  */
 static int      
 g_raid3_check_idle(struct g_raid3_softc *sc)
 {   
 	struct g_raid3_disk *disk;
 	u_int i;
 
 	if (sc->sc_idle)
 		return (0);
 	if (sc->sc_provider != NULL && sc->sc_provider->acw == 0)
 		return (0);
 	/* 
 	 * Check if there are no in-flight requests.
 	 */	 
 	for (i = 0; i < sc->sc_ndisks; i++) {
 		disk = &sc->sc_disks[i];
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
 			continue;
 		if (disk->d_consumer->index > 0)
 			return (0);
 	}	   
 	return (1); 
 }
 
 /*
  * Treat bio_driver1 field in parent bio as list head and field bio_caller1
  * in child bio as pointer to the next element on the list.
  */
 #define	G_RAID3_HEAD_BIO(pbp)	(pbp)->bio_driver1
 
 #define	G_RAID3_NEXT_BIO(cbp)	(cbp)->bio_caller1
 
 #define	G_RAID3_FOREACH_BIO(pbp, bp)					\
 	for ((bp) = G_RAID3_HEAD_BIO(pbp); (bp) != NULL;		\
 	    (bp) = G_RAID3_NEXT_BIO(bp))
 
 #define	G_RAID3_FOREACH_SAFE_BIO(pbp, bp, tmpbp)			\
 	for ((bp) = G_RAID3_HEAD_BIO(pbp);				\
 	    (bp) != NULL && ((tmpbp) = G_RAID3_NEXT_BIO(bp), 1);	\
 	    (bp) = (tmpbp))
 
 static void
 g_raid3_init_bio(struct bio *pbp)
 {
 
 	G_RAID3_HEAD_BIO(pbp) = NULL;
 }
 
 static void
 g_raid3_remove_bio(struct bio *cbp)
 {
 	struct bio *pbp, *bp;
 
 	pbp = cbp->bio_parent;
 	if (G_RAID3_HEAD_BIO(pbp) == cbp)
 		G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp);
 	else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == cbp) {
 				G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp);
 				break;
 			}
 		}
 	}
 	G_RAID3_NEXT_BIO(cbp) = NULL;
 }
 
 static void
 g_raid3_replace_bio(struct bio *sbp, struct bio *dbp)
 {
 	struct bio *pbp, *bp;
 
 	g_raid3_remove_bio(sbp);
 	pbp = dbp->bio_parent;
 	G_RAID3_NEXT_BIO(sbp) = G_RAID3_NEXT_BIO(dbp);
 	if (G_RAID3_HEAD_BIO(pbp) == dbp)
 		G_RAID3_HEAD_BIO(pbp) = sbp;
 	else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == dbp) {
 				G_RAID3_NEXT_BIO(bp) = sbp;
 				break;
 			}
 		}
 	}
 	G_RAID3_NEXT_BIO(dbp) = NULL;
 }
 
 static void
 g_raid3_destroy_bio(struct g_raid3_softc *sc, struct bio *cbp)
 {
 	struct bio *bp, *pbp;
 	size_t size;
 
 	pbp = cbp->bio_parent;
 	pbp->bio_children--;
 	KASSERT(cbp->bio_data != NULL, ("NULL bio_data"));
 	size = pbp->bio_length / (sc->sc_ndisks - 1);
 	if (size > 16384)
 		uma_zfree(sc->sc_zone_64k, cbp->bio_data);
 	else if (size > 4096)
 		uma_zfree(sc->sc_zone_16k, cbp->bio_data);
 	else
 		uma_zfree(sc->sc_zone_4k, cbp->bio_data);
 	if (G_RAID3_HEAD_BIO(pbp) == cbp) {
 		G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp);
 		G_RAID3_NEXT_BIO(cbp) = NULL;
 		g_destroy_bio(cbp);
 	} else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == cbp)
 				break;
 		}
 		if (bp != NULL) {
 			KASSERT(G_RAID3_NEXT_BIO(bp) != NULL,
 			    ("NULL bp->bio_driver1"));
 			G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp);
 			G_RAID3_NEXT_BIO(cbp) = NULL;
 		}
 		g_destroy_bio(cbp);
 	}
 }
 
 static struct bio *
 g_raid3_clone_bio(struct g_raid3_softc *sc, struct bio *pbp)
 {
 	struct bio *bp, *cbp;
 	size_t size;
 
 	cbp = g_clone_bio(pbp);
 	if (cbp == NULL)
 		return (NULL);
 	size = pbp->bio_length / (sc->sc_ndisks - 1);
 	if (size > 16384) {
 		cbp->bio_data = uma_zalloc(sc->sc_zone_64k, M_NOWAIT);
 		g_raid3_64k_requested++;
 	} else if (size > 4096) {
 		cbp->bio_data = uma_zalloc(sc->sc_zone_16k, M_NOWAIT);
 		g_raid3_16k_requested++;
 	} else {
 		cbp->bio_data = uma_zalloc(sc->sc_zone_4k, M_NOWAIT);
 		g_raid3_4k_requested++;
 	}
 	if (cbp->bio_data == NULL) {
 		if (size > 16384)
 			g_raid3_64k_failed++;
 		else if (size > 4096)
 			g_raid3_16k_failed++;
 		else
 			g_raid3_4k_failed++;
 		pbp->bio_children--;
 		g_destroy_bio(cbp);
 		return (NULL);
 	}
 	G_RAID3_NEXT_BIO(cbp) = NULL;
 	if (G_RAID3_HEAD_BIO(pbp) == NULL)
 		G_RAID3_HEAD_BIO(pbp) = cbp;
 	else {
 		G_RAID3_FOREACH_BIO(pbp, bp) {
 			if (G_RAID3_NEXT_BIO(bp) == NULL) {
 				G_RAID3_NEXT_BIO(bp) = cbp;
 				break;
 			}
 		}
 	}
 	return (cbp);
 }
 
 static void
 g_raid3_scatter(struct bio *pbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct bio *bp, *cbp;
 	off_t atom, cadd, padd, left;
 
 	sc = pbp->bio_to->geom->softc;
 	bp = NULL;
 	if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) {
 		/*
 		 * Find bio for which we should calculate data.
 		 */
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) {
 				bp = cbp;
 				break;
 			}
 		}
 		KASSERT(bp != NULL, ("NULL parity bio."));
 	}
 	atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
 	cadd = padd = 0;
 	for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) {
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			if (cbp == bp)
 				continue;
 			bcopy(pbp->bio_data + padd, cbp->bio_data + cadd, atom);
 			padd += atom;
 		}
 		cadd += atom;
 	}
 	if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) {
 		struct bio *tmpbp;
 
 		/*
 		 * Calculate parity.
 		 */
 		bzero(bp->bio_data, bp->bio_length);
 		G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) {
 			if (cbp == bp)
 				continue;
 			g_raid3_xor(cbp->bio_data, bp->bio_data, bp->bio_data,
 			    bp->bio_length);
 			if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_NODISK) != 0)
 				g_raid3_destroy_bio(sc, cbp);
 		}
 	}
 	G_RAID3_FOREACH_BIO(pbp, cbp) {
 		struct g_consumer *cp;
 
 		disk = cbp->bio_caller2;
 		cp = disk->d_consumer;
 		cbp->bio_to = cp->provider;
 		G_RAID3_LOGREQ(3, cbp, "Sending request.");
 		KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(cbp, cp);
 	}
 }
 
 static void
 g_raid3_gather(struct bio *pbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct bio *xbp, *fbp, *cbp;
 	off_t atom, cadd, padd, left;
 
 	sc = pbp->bio_to->geom->softc;
 	/*
 	 * Find bio for which we have to calculate data.
 	 * While going through this path, check if all requests
 	 * succeeded, if not, deny whole request.
 	 * If we're in COMPLETE mode, we allow one request to fail,
 	 * so if we find one, we're sending it to the parity consumer.
 	 * If there are more failed requests, we deny whole request.
 	 */
 	xbp = fbp = NULL;
 	G_RAID3_FOREACH_BIO(pbp, cbp) {
 		if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) {
 			KASSERT(xbp == NULL, ("More than one parity bio."));
 			xbp = cbp;
 		}
 		if (cbp->bio_error == 0)
 			continue;
 		/*
 		 * Found failed request.
 		 */
 		G_RAID3_LOGREQ(0, cbp, "Request failed.");
 		disk = cbp->bio_caller2;
 		if (disk != NULL) {
 			/*
 			 * Actually this is pointless to bump genid,
 			 * because whole device is fucked up.
 			 */
 			sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 			g_raid3_event_send(disk,
 			    G_RAID3_DISK_STATE_DISCONNECTED,
 			    G_RAID3_EVENT_DONTWAIT);
 		}
 		if (fbp == NULL) {
 			if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) {
 				/*
 				 * We are already in degraded mode, so we can't
 				 * accept any failures.
 				 */
 				if (pbp->bio_error == 0)
 					pbp->bio_error = fbp->bio_error;
 			} else {
 				fbp = cbp;
 			}
 		} else {
 			/*
 			 * Next failed request, that's too many.
 			 */
 			if (pbp->bio_error == 0)
 				pbp->bio_error = fbp->bio_error;
 		}
 	}
 	if (pbp->bio_error != 0)
 		goto finish;
 	if (fbp != NULL && (pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) {
 		pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_VERIFY;
 		if (xbp != fbp)
 			g_raid3_replace_bio(xbp, fbp);
 		g_raid3_destroy_bio(sc, fbp);
 	} else if (fbp != NULL) {
 		struct g_consumer *cp;
 
 		/*
 		 * One request failed, so send the same request to
 		 * the parity consumer.
 		 */
 		disk = pbp->bio_driver2;
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) {
 			pbp->bio_error = fbp->bio_error;
 			goto finish;
 		}
 		pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
 		pbp->bio_inbed--;
 		fbp->bio_flags &= ~(BIO_DONE | BIO_ERROR);
 		if (disk->d_no == sc->sc_ndisks - 1)
 			fbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 		fbp->bio_error = 0;
 		fbp->bio_completed = 0;
 		fbp->bio_children = 0;
 		fbp->bio_inbed = 0;
 		cp = disk->d_consumer;
 		fbp->bio_caller2 = disk;
 		fbp->bio_to = cp->provider;
 		G_RAID3_LOGREQ(3, fbp, "Sending request (recover).");
 		KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(fbp, cp);
 		return;
 	}
 	if (xbp != NULL) {
 		/*
 		 * Calculate parity.
 		 */
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0)
 				continue;
 			g_raid3_xor(cbp->bio_data, xbp->bio_data, xbp->bio_data,
 			    xbp->bio_length);
 		}
 		xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY;
 		if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) {
 			if (!g_raid3_is_zero(xbp)) {
 				g_raid3_parity_mismatch++;
 				pbp->bio_error = EIO;
 				goto finish;
 			}
 			g_raid3_destroy_bio(sc, xbp);
 		}
 	}
 	atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
 	cadd = padd = 0;
 	for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) {
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			bcopy(cbp->bio_data + cadd, pbp->bio_data + padd, atom);
 			pbp->bio_completed += atom;
 			padd += atom;
 		}
 		cadd += atom;
 	}
 finish:
 	if (pbp->bio_error == 0)
 		G_RAID3_LOGREQ(3, pbp, "Request finished.");
 	else {
 		if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0)
 			G_RAID3_LOGREQ(1, pbp, "Verification error.");
 		else
 			G_RAID3_LOGREQ(0, pbp, "Request failed.");
 	}
 	pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_MASK;
 	g_io_deliver(pbp, pbp->bio_error);
 	while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL)
 		g_raid3_destroy_bio(sc, cbp);
 }
 
 static void
 g_raid3_done(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags |= G_RAID3_BIO_CFLAG_REGULAR; 
 	G_RAID3_LOGREQ(3, bp, "Regular request done (error=%d).", bp->bio_error);
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_head(&sc->sc_queue, bp);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 }
 
 static void
 g_raid3_regular_request(struct bio *cbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct bio *pbp;
 
 	g_topology_assert_not();
 
 	cbp->bio_from->index--;
 	pbp = cbp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	disk = cbp->bio_from->private;
 	if (disk == NULL) {
 		g_topology_lock();
 		g_raid3_kill_consumer(sc, cbp->bio_from);
 		g_topology_unlock();
 	}
 
 	G_RAID3_LOGREQ(3, cbp, "Request finished.");
 	pbp->bio_inbed++;
 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
 	    pbp->bio_children));
 	if (pbp->bio_inbed != pbp->bio_children)
 		return;
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		g_raid3_gather(pbp);
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 	    {
 		int error = 0;
 
 		pbp->bio_completed = pbp->bio_length;
 		while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) {
 			if (cbp->bio_error != 0) {
 				disk = cbp->bio_caller2;
 				if (disk != NULL) {
 					sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 					g_raid3_event_send(disk,
 					    G_RAID3_DISK_STATE_DISCONNECTED,
 					    G_RAID3_EVENT_DONTWAIT);
 				}
 				if (error == 0)
 					error = cbp->bio_error;
 				else if (pbp->bio_error == 0) {
 					/*
 					 * Next failed request, that's too many.
 					 */
 					pbp->bio_error = error;
 				}
 			}
 			g_raid3_destroy_bio(sc, cbp);
 		}
 		if (pbp->bio_error == 0)
 			G_RAID3_LOGREQ(3, pbp, "Request finished.");
 		else
 			G_RAID3_LOGREQ(0, pbp, "Request failed.");
 		pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_DEGRADED;
 		pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_NOPARITY;
 		g_io_deliver(pbp, pbp->bio_error);
 		break;
 	    }
 	}
 }
 
 static void
 g_raid3_sync_done(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 
 	G_RAID3_LOGREQ(3, bp, "Synchronization request delivered.");
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags |= G_RAID3_BIO_CFLAG_SYNC;
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_head(&sc->sc_queue, bp);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 }
 
 static void
 g_raid3_start(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL or there are no valid disks, provider's error
 	 * should be set and g_raid3_start() should not be called at all.
 	 */
 	KASSERT(sc != NULL && (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 	    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE),
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_RAID3_LOGREQ(3, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_GETATTR:
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, bp);
 	G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 }
 
 /*
  * Send one synchronization request.
  */
 static void
 g_raid3_sync_one(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	struct bio *bp;
 
 	KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
 	    ("Wrong device state (%s, %s).", sc->sc_name,
 	    g_raid3_device_state2str(sc->sc_state)));
 	disk = sc->sc_syncdisk;
 	KASSERT(disk != NULL, ("No sync disk (%s).", sc->sc_name));
 	KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
 	    ("Disk %s is not marked for synchronization.",
 	    g_raid3_get_diskname(disk)));
 
 	bp = g_new_bio();
 	if (bp == NULL)
 		return;
 	bp->bio_parent = NULL;
 	bp->bio_cmd = BIO_READ;
 	bp->bio_offset = disk->d_sync.ds_offset * (sc->sc_ndisks - 1);
 	bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
 	bp->bio_cflags = 0;
 	bp->bio_done = g_raid3_sync_done;
 	bp->bio_data = disk->d_sync.ds_data;
 	if (bp->bio_data == NULL) {
 		g_destroy_bio(bp);
 		return;
 	}
 	bp->bio_cflags = G_RAID3_BIO_CFLAG_REGSYNC;
 	disk->d_sync.ds_offset += bp->bio_length / (sc->sc_ndisks - 1);
 	bp->bio_to = sc->sc_provider;
 	G_RAID3_LOGREQ(3, bp, "Sending synchronization request.");
 	disk->d_sync.ds_consumer->index++;
 	g_io_request(bp, disk->d_sync.ds_consumer);
 }
 
 static void
 g_raid3_sync_request(struct bio *bp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 
 	bp->bio_from->index--;
 	sc = bp->bio_from->geom->softc;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		g_topology_lock();
 		g_raid3_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 		g_destroy_bio(bp);
 		return;
 	}
 
 	/*
 	 * Synchronization request.
 	 */
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	    {
 		struct g_consumer *cp;
 		u_char *dst, *src;
 		off_t left;
 		u_int atom;
 
 		if (bp->bio_error != 0) {
 			G_RAID3_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_destroy_bio(bp);
 			return;
 		}
 		G_RAID3_LOGREQ(3, bp, "Synchronization request finished.");
 		atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
 		dst = src = bp->bio_data;
 		if (disk->d_no == sc->sc_ndisks - 1) {
 			u_int n;
 
 			/* Parity component. */
 			for (left = bp->bio_length; left > 0;
 			    left -= sc->sc_sectorsize) {
 				bcopy(src, dst, atom);
 				src += atom;
 				for (n = 1; n < sc->sc_ndisks - 1; n++) {
 					g_raid3_xor(src, dst, dst, atom);
 					src += atom;
 				}
 				dst += atom;
 			}
 		} else {
 			/* Regular component. */
 			src += atom * disk->d_no;
 			for (left = bp->bio_length; left > 0;
 			    left -= sc->sc_sectorsize) {
 				bcopy(src, dst, atom);
 				src += sc->sc_sectorsize;
 				dst += atom;
 			}
 		}
 		bp->bio_offset /= sc->sc_ndisks - 1;
 		bp->bio_length /= sc->sc_ndisks - 1;
 		bp->bio_cmd = BIO_WRITE;
 		bp->bio_cflags = 0;
 		bp->bio_children = bp->bio_inbed = 0;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(bp, cp);
 		return;
 	    }
 	case BIO_WRITE:
 	    {
 		struct g_raid3_disk_sync *sync;
 
 		if (bp->bio_error != 0) {
 			G_RAID3_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_destroy_bio(bp);
 			sc->sc_bump_id |= G_RAID3_BUMP_GENID;
 			g_raid3_event_send(disk,
 			    G_RAID3_DISK_STATE_DISCONNECTED,
 			    G_RAID3_EVENT_DONTWAIT);
 			return;
 		}
 		G_RAID3_LOGREQ(3, bp, "Synchronization request finished.");
 		sync = &disk->d_sync;
 		sync->ds_offset_done = bp->bio_offset + bp->bio_length;
 		g_destroy_bio(bp);
 		if (sync->ds_resync != -1)
 			return;
 		if (sync->ds_offset_done ==
 		    sc->sc_mediasize / (sc->sc_ndisks - 1)) {
 			/*
 			 * Disk up-to-date, activate it.
 			 */
 			g_raid3_event_send(disk, G_RAID3_DISK_STATE_ACTIVE,
 			    G_RAID3_EVENT_DONTWAIT);
 			return;
 		} else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) {
 			/*
 			 * Update offset_done on every 100 blocks.
 			 * XXX: This should be configurable.
 			 */
 			g_topology_lock();
 			g_raid3_update_metadata(disk);
 			g_topology_unlock();
 		}
 		return;
 	    }
 	default:
 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
 		    bp->bio_cmd, sc->sc_name));
 		break;
 	}
 }
 
 static int
 g_raid3_register_request(struct bio *pbp)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	off_t offset, length;
 	u_int n, ndisks;
 	int round_robin, verify;
 
 	ndisks = 0;
 	sc = pbp->bio_to->geom->softc;
 	if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGSYNC) != 0 &&
 	    sc->sc_syncdisk == NULL) {
 		g_io_deliver(pbp, EIO);
 		return (0);
 	}
 	g_raid3_init_bio(pbp);
 	length = pbp->bio_length / (sc->sc_ndisks - 1);
 	offset = pbp->bio_offset / (sc->sc_ndisks - 1);
 	round_robin = verify = 0;
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			pbp->bio_pflags |= G_RAID3_BIO_PFLAG_VERIFY;
 			verify = 1;
 			ndisks = sc->sc_ndisks;
 		} else {
 			verify = 0;
 			ndisks = sc->sc_ndisks - 1;
 		}
 		if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0 &&
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			round_robin = 1;
 		} else {
 			round_robin = 0;
 		}
 		KASSERT(!round_robin || !verify,
 		    ("ROUND-ROBIN and VERIFY are mutually exclusive."));
 		pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1];
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 	    {
 		struct g_raid3_disk_sync *sync;
 
 		if (sc->sc_idle)
 			g_raid3_unidle(sc);
 
 		ndisks = sc->sc_ndisks;
 
 		if (sc->sc_syncdisk == NULL)
 			break;
 		sync = &sc->sc_syncdisk->d_sync;
 		if (offset >= sync->ds_offset)
 			break;
 		if (offset + length <= sync->ds_offset_done)
 			break;
 		if (offset >= sync->ds_resync && sync->ds_resync != -1)
 			break;
 		sync->ds_resync = offset - (offset % MAXPHYS);
 		break;
 	    }
 	}
 	for (n = 0; n < ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		cbp = g_raid3_clone_bio(sc, pbp);
 		if (cbp == NULL) {
 			while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL)
 				g_raid3_destroy_bio(sc, cbp);
 			/*
 			 * To prevent deadlock, we must run back up
 			 * with the ENOMEM for failed requests of any
 			 * of our consumers.  Our own sync requests
 			 * can stick around, as they are finite.
 			 */
 			if ((pbp->bio_cflags &
 			    G_RAID3_BIO_CFLAG_REGULAR) != 0) {
 				g_io_deliver(pbp, ENOMEM);
 				return (0);
 			}
 			return (ENOMEM);
 		}
 		cbp->bio_offset = offset;
 		cbp->bio_length = length;
 		cbp->bio_done = g_raid3_done;
 		switch (pbp->bio_cmd) {
 		case BIO_READ:
 			if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) {
 				/*
 				 * Replace invalid component with the parity
 				 * component.
 				 */
 				disk = &sc->sc_disks[sc->sc_ndisks - 1];
 				cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 				pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
 			} else if (round_robin &&
 			    disk->d_no == sc->sc_round_robin) {
 				/*
 				 * In round-robin mode skip one data component
 				 * and use parity component when reading.
 				 */
 				pbp->bio_driver2 = disk;
 				disk = &sc->sc_disks[sc->sc_ndisks - 1];
 				cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 				sc->sc_round_robin++;
 				round_robin = 0;
 			} else if (verify && disk->d_no == sc->sc_ndisks - 1) {
 				cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
 			}
 			break;
 		case BIO_WRITE:
 		case BIO_DELETE:
 			if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 			    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 				if (n == ndisks - 1) {
 					/*
 					 * Active parity component, mark it as such.
 					 */
 					cbp->bio_cflags |=
 					    G_RAID3_BIO_CFLAG_PARITY;
 				}
 			} else {
 				pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
 				if (n == ndisks - 1) {
 					/*
 					 * Parity component is not connected,
 					 * so destroy its request.
 					 */
 					pbp->bio_pflags |=
 					    G_RAID3_BIO_PFLAG_NOPARITY;
 					g_raid3_destroy_bio(sc, cbp);
 					cbp = NULL;
 				} else {
 					cbp->bio_cflags |=
 					    G_RAID3_BIO_CFLAG_NODISK;
 					disk = NULL;
 				}
 			}
 			break;
 		}
 		if (cbp != NULL)
 			cbp->bio_caller2 = disk;
 	}
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if (round_robin) {
 			/*
 			 * If we are in round-robin mode and 'round_robin' is
 			 * still 1, it means, that we skipped parity component
 			 * for this read and must reset sc_round_robin field.
 			 */
 			sc->sc_round_robin = 0;
 		}
 		G_RAID3_FOREACH_BIO(pbp, cbp) {
 			disk = cbp->bio_caller2;
 			cp = disk->d_consumer;
 			cbp->bio_to = cp->provider;
 			G_RAID3_LOGREQ(3, cbp, "Sending request.");
 			KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
 			    ("Consumer %s not opened (r%dw%de%d).",
 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
 			cp->index++;
 			g_io_request(cbp, cp);
 		}
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 		/*
 		 * Bump syncid on first write.
 		 */
 		if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) {
 			sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID;
 			g_topology_lock();
 			g_raid3_bump_syncid(sc);
 			g_topology_unlock();
 		}
 		g_raid3_scatter(pbp);
 		break;
 	}
 	return (0);
 }
 
 static int
 g_raid3_can_destroy(struct g_raid3_softc *sc)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
  
 	g_topology_assert();
 	gp = sc->sc_geom;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_raid3_is_busy(sc, cp))
 			return (0);
 	}
 	gp = sc->sc_sync.ds_geom;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_raid3_is_busy(sc, cp))
 			return (0);
 	}
 	G_RAID3_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
 	    sc->sc_name);
 	return (1);
 }
  
 static int
 g_raid3_try_destroy(struct g_raid3_softc *sc)
 {
  
 	if (sc->sc_rootmount != NULL) {
 		G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 		    sc->sc_rootmount);
 		root_mount_rel(sc->sc_rootmount);
 		sc->sc_rootmount = NULL;
 	}
 
 	g_topology_lock();
 	if (!g_raid3_can_destroy(sc)) {
 		g_topology_unlock();
 		return (0);
 	}
 	if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_WAIT) != 0) {
 		g_topology_unlock();
 		G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__,
 		    &sc->sc_worker);
 		wakeup(&sc->sc_worker);
 		sc->sc_worker = NULL;
 	} else {
 		g_raid3_destroy_device(sc);
 		g_topology_unlock();
 		free(sc->sc_disks, M_RAID3);
 		free(sc, M_RAID3);
 	}
 	return (1);
 }
 
 /*
  * Worker thread.
  */
 static void
 g_raid3_worker(void *arg)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	struct g_raid3_disk_sync *sync;
 	struct g_raid3_event *ep;
 	struct bio *bp;
 	u_int nreqs;
 
 	sc = arg;
 	mtx_lock_spin(&sched_lock);
 	sched_prio(curthread, PRIBIO);
 	mtx_unlock_spin(&sched_lock);
 
 	nreqs = 0;
 	for (;;) {
 		G_RAID3_DEBUG(5, "%s: Let's see...", __func__);
 		/*
 		 * First take a look at events.
 		 * This is important to handle events before any I/O requests.
 		 */
 		ep = g_raid3_event_get(sc);
 		if (ep != NULL && g_topology_try_lock()) {
 			g_raid3_event_remove(sc, ep);
 			if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) {
 				/* Update only device status. */
 				G_RAID3_DEBUG(3,
 				    "Running event for device %s.",
 				    sc->sc_name);
 				ep->e_error = 0;
 				g_raid3_update_device(sc, 1);
 			} else {
 				/* Update disk status. */
 				G_RAID3_DEBUG(3, "Running event for disk %s.",
 				     g_raid3_get_diskname(ep->e_disk));
 				ep->e_error = g_raid3_update_disk(ep->e_disk,
 				    ep->e_state);
 				if (ep->e_error == 0)
 					g_raid3_update_device(sc, 0);
 			}
 			g_topology_unlock();
 			if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) {
 				KASSERT(ep->e_error == 0,
 				    ("Error cannot be handled."));
 				g_raid3_event_free(ep);
 			} else {
 				ep->e_flags |= G_RAID3_EVENT_DONE;
 				G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__,
 				    ep);
 				mtx_lock(&sc->sc_events_mtx);
 				wakeup(ep);
 				mtx_unlock(&sc->sc_events_mtx);
 			}
 			if ((sc->sc_flags &
 			    G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 				if (g_raid3_try_destroy(sc))
 					kthread_exit(0);
 			}
 			G_RAID3_DEBUG(5, "%s: I'm here 1.", __func__);
 			continue;
 		}
 		/*
 		 * Now I/O requests.
 		 */
 		/* Get first request from the queue. */
 		mtx_lock(&sc->sc_queue_mtx);
 		bp = bioq_first(&sc->sc_queue);
 		if (bp == NULL) {
 			if (ep != NULL) {
 				/*
 				 * No I/O requests and topology lock was
 				 * already held? Try again.
 				 */
 				mtx_unlock(&sc->sc_queue_mtx);
 				tsleep(ep, PRIBIO, "r3:top1", hz / 5);
 				continue;
 			}
 			if ((sc->sc_flags &
 			    G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				if (g_raid3_try_destroy(sc))
 					kthread_exit(0);
 				mtx_lock(&sc->sc_queue_mtx);
 			}
 		}
 		if (sc->sc_syncdisk != NULL &&
 		    (bp == NULL || nreqs > g_raid3_reqs_per_sync)) {
 			mtx_unlock(&sc->sc_queue_mtx);
 			/*
 			 * It is time for synchronization...
 			 */
 			nreqs = 0;
 			disk = sc->sc_syncdisk;
 			sync = &disk->d_sync;
 			if (sync->ds_offset <
 			    sc->sc_mediasize / (sc->sc_ndisks - 1) &&
 			    sync->ds_offset == sync->ds_offset_done) {
 				if (sync->ds_resync != -1) {
 					sync->ds_offset = sync->ds_resync;
 					sync->ds_offset_done = sync->ds_resync;
 					sync->ds_resync = -1;
 				}
 				g_raid3_sync_one(sc);
 			}
 			G_RAID3_DEBUG(5, "%s: I'm here 2.", __func__);
 			goto sleep;
 		}
 		if (bp == NULL) {
 			if (g_raid3_check_idle(sc)) {
 				u_int idletime;
 
 				idletime = g_raid3_idletime;
 				if (idletime == 0)
 					idletime = 1;
 				idletime *= hz;
 				if (msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
 				    "r3:w1", idletime) == EWOULDBLOCK) {
 					G_RAID3_DEBUG(5, "%s: I'm here 3.",
 					    __func__);
 					/*
 					 * No I/O requests in 'idletime'
 					 * seconds, so mark components as clean.
 					 */
 					g_raid3_idle(sc);
 				}
 				G_RAID3_DEBUG(5, "%s: I'm here 4.", __func__);
 			} else {
 				MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
 				    "r3:w2", 0);
 				G_RAID3_DEBUG(5, "%s: I'm here 5.", __func__);
 			}
 			continue;
 		}
 		nreqs++;
 		bioq_remove(&sc->sc_queue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 
 		if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0) {
 			g_raid3_regular_request(bp);
 		} else if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) {
 			u_int timeout, sps;
 
 			g_raid3_sync_request(bp);
 sleep:
 			sps = atomic_load_acq_int(&g_raid3_syncs_per_sec);
 			if (sps == 0) {
 				G_RAID3_DEBUG(5, "%s: I'm here 6.", __func__);
 				continue;
 			}
 			if (ep != NULL) {
 				/*
 				 * We have some pending events, don't sleep now.
 				 */
 				G_RAID3_DEBUG(5, "%s: I'm here 7.", __func__);
 				tsleep(ep, PRIBIO, "r3:top2", hz / 5);
 				continue;
 			}
 			mtx_lock(&sc->sc_queue_mtx);
 			if (bioq_first(&sc->sc_queue) != NULL) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				G_RAID3_DEBUG(5, "%s: I'm here 8.", __func__);
 				continue;
 			}
 			timeout = hz / sps;
 			if (timeout == 0)
 				timeout = 1;
 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "r3:w2",
 			    timeout);
 		} else {
 			if (g_raid3_register_request(bp) != 0) {
 				mtx_lock(&sc->sc_queue_mtx);
 				bioq_insert_tail(&sc->sc_queue, bp);
 				MSLEEP(&sc->sc_queue, &sc->sc_queue_mtx,
 				    PRIBIO | PDROP, "r3:lowmem", hz / 10);
 			}
 		}
 		G_RAID3_DEBUG(5, "%s: I'm here 9.", __func__);
 	}
 }
 
 /*
  * Open disk's consumer if needed.
  */
 static void
 g_raid3_update_access(struct g_raid3_disk *disk)
 {
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	pp = disk->d_softc->sc_provider;
 	if (pp == NULL)
 		return;
 	if (pp->acw > 0) {
 		if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) == 0) {
 			G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 			    g_raid3_get_diskname(disk), disk->d_softc->sc_name);
 			disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 		}
 	} else if (pp->acw == 0) {
 		if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) {
 			G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.",
 			    g_raid3_get_diskname(disk), disk->d_softc->sc_name);
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		}
 	}
 }
 
 static void
 g_raid3_sync_start(struct g_raid3_softc *sc)
 {
 	struct g_raid3_disk *disk;
 	int error;
 	u_int n;
 
 	g_topology_assert();
 
 	KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
 	    ("Device not in DEGRADED state (%s, %u).", sc->sc_name,
 	    sc->sc_state));
 	KASSERT(sc->sc_syncdisk == NULL, ("Syncdisk is not NULL (%s, %u).",
 	    sc->sc_name, sc->sc_state));
 	disk = NULL;
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		if (sc->sc_disks[n].d_state != G_RAID3_DISK_STATE_SYNCHRONIZING)
 			continue;
 		disk = &sc->sc_disks[n];
 		break;
 	}
 	if (disk == NULL)
 		return;
 
 	G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
 	    g_raid3_get_diskname(disk));
 	disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 	KASSERT(disk->d_sync.ds_consumer == NULL,
 	    ("Sync consumer already exists (device=%s, disk=%s).",
 	    sc->sc_name, g_raid3_get_diskname(disk)));
 	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
 	disk->d_sync.ds_consumer->private = disk;
 	disk->d_sync.ds_consumer->index = 0;
 	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
 	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
 	    disk->d_softc->sc_name, error));
 	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
 	KASSERT(error == 0, ("Cannot open %s (error=%d).",
 	    disk->d_softc->sc_name, error));
 	disk->d_sync.ds_data = malloc(MAXPHYS, M_RAID3, M_WAITOK);
 	sc->sc_syncdisk = disk;
 }
 
 /*
  * Stop synchronization process.
  * type: 0 - synchronization finished
  *       1 - synchronization stopped
  */
 static void
 g_raid3_sync_stop(struct g_raid3_softc *sc, int type)
 {
 	struct g_raid3_disk *disk;
 
 	g_topology_assert();
 	KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
 	    ("Device not in DEGRADED state (%s, %u).", sc->sc_name,
 	    sc->sc_state));
 	disk = sc->sc_syncdisk;
 	sc->sc_syncdisk = NULL;
 	KASSERT(disk != NULL, ("No disk was synchronized (%s).", sc->sc_name));
 	KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
 	    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 	    g_raid3_disk_state2str(disk->d_state)));
 	if (disk->d_sync.ds_consumer == NULL)
 		return;
 
 	if (type == 0) {
 		G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s finished.",
 		    disk->d_softc->sc_name, g_raid3_get_diskname(disk));
 	} else /* if (type == 1) */ {
 		G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
 		    disk->d_softc->sc_name, g_raid3_get_diskname(disk));
 	}
 	g_raid3_kill_consumer(disk->d_softc, disk->d_sync.ds_consumer);
 	free(disk->d_sync.ds_data, M_RAID3);
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 }
 
 static void
 g_raid3_launch_provider(struct g_raid3_softc *sc)
 {
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	pp = g_new_providerf(sc->sc_geom, "raid3/%s", sc->sc_name);
 	pp->mediasize = sc->sc_mediasize;
 	pp->sectorsize = sc->sc_sectorsize;
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 	G_RAID3_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
 	    pp->name);
 	if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED)
 		g_raid3_sync_start(sc);
 }
 
 static void
 g_raid3_destroy_provider(struct g_raid3_softc *sc)
 {
 	struct bio *bp;
 
 	g_topology_assert();
 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
 	    sc->sc_name));
 
 	g_error_provider(sc->sc_provider, ENXIO);
 	mtx_lock(&sc->sc_queue_mtx);
 	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
 		bioq_remove(&sc->sc_queue, bp);
 		g_io_deliver(bp, ENXIO);
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_RAID3_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
 	    sc->sc_provider->name);
 	sc->sc_provider->flags |= G_PF_WITHER;
 	g_orphan_provider(sc->sc_provider, ENXIO);
 	sc->sc_provider = NULL;
 	if (sc->sc_syncdisk != NULL)
 		g_raid3_sync_stop(sc, 1);
 }
 
 static void
 g_raid3_go(void *arg)
 {
 	struct g_raid3_softc *sc;
 
 	sc = arg;
 	G_RAID3_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
 	g_raid3_event_send(sc, 0,
 	    G_RAID3_EVENT_DONTWAIT | G_RAID3_EVENT_DEVICE);
 }
 
 static u_int
 g_raid3_determine_state(struct g_raid3_disk *disk)
 {
 	struct g_raid3_softc *sc;
 	u_int state;
 
 	sc = disk->d_softc;
 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
 		if ((disk->d_flags &
 		    G_RAID3_DISK_FLAG_SYNCHRONIZING) == 0) {
 			/* Disk does not need synchronization. */
 			state = G_RAID3_DISK_STATE_ACTIVE;
 		} else {
 			if ((sc->sc_flags &
 			     G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
 			    (disk->d_flags &
 			     G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) {
 				/*
 				 * We can start synchronization from
 				 * the stored offset.
 				 */
 				state = G_RAID3_DISK_STATE_SYNCHRONIZING;
 			} else {
 				state = G_RAID3_DISK_STATE_STALE;
 			}
 		}
 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
 		/*
 		 * Reset all synchronization data for this disk,
 		 * because if it even was synchronized, it was
 		 * synchronized to disks with different syncid.
 		 */
 		disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		disk->d_sync.ds_syncid = sc->sc_syncid;
 		if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 		    (disk->d_flags & G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) {
 			state = G_RAID3_DISK_STATE_SYNCHRONIZING;
 		} else {
 			state = G_RAID3_DISK_STATE_STALE;
 		}
 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
 		/*
 		 * Not good, NOT GOOD!
 		 * It means that device was started on stale disks
 		 * and more fresh disk just arrive.
 		 * If there were writes, device is fucked up, sorry.
 		 * I think the best choice here is don't touch
 		 * this disk and inform the user laudly.
 		 */
 		G_RAID3_DEBUG(0, "Device %s was started before the freshest "
 		    "disk (%s) arrives!! It will not be connected to the "
 		    "running device.", sc->sc_name,
 		    g_raid3_get_diskname(disk));
 		g_raid3_destroy_disk(disk);
 		state = G_RAID3_DISK_STATE_NONE;
 		/* Return immediately, because disk was destroyed. */
 		return (state);
 	}
 	G_RAID3_DEBUG(3, "State for %s disk: %s.",
 	    g_raid3_get_diskname(disk), g_raid3_disk_state2str(state));
 	return (state);
 }
 
 /*
  * Update device state.
  */
 static void
 g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force)
 {
 	struct g_raid3_disk *disk;
 	u_int state;
 
 	g_topology_assert();
 
 	switch (sc->sc_state) {
 	case G_RAID3_DEVICE_STATE_STARTING:
 	    {
 		u_int n, ndirty, ndisks, genid, syncid;
 
 		KASSERT(sc->sc_provider == NULL,
 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
 		/*
 		 * Are we ready? We are, if all disks are connected or
 		 * one disk is missing and 'force' is true.
 		 */
 		if (g_raid3_ndisks(sc, -1) + force == sc->sc_ndisks) {
 			if (!force)
 				callout_drain(&sc->sc_callout);
 		} else {
 			if (force) {
 				/*
 				 * Timeout expired, so destroy device.
 				 */
 				sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 				G_RAID3_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 			}
 			return;
 		}
 
 		/*
 		 * Find the biggest genid.
 		 */
 		genid = 0;
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			if (disk->d_genid > genid)
 				genid = disk->d_genid;
 		}
 		sc->sc_genid = genid;
 		/*
 		 * Remove all disks without the biggest genid.
 		 */
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			if (disk->d_genid < genid) {
 				G_RAID3_DEBUG(0,
 				    "Component %s (device %s) broken, skipping.",
 				    g_raid3_get_diskname(disk), sc->sc_name);
 				g_raid3_destroy_disk(disk);
 			}
 		}
 
 		/*
 		 * There must be at least 'sc->sc_ndisks - 1' components
 		 * with the same syncid and without SYNCHRONIZING flag.
 		 */
 
 		/*
 		 * Find the biggest syncid, number of valid components and
 		 * number of dirty components.
 		 */
 		ndirty = ndisks = syncid = 0;
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0)
 				ndirty++;
 			if (disk->d_sync.ds_syncid > syncid) {
 				syncid = disk->d_sync.ds_syncid;
 				ndisks = 0;
 			} else if (disk->d_sync.ds_syncid < syncid) {
 				continue;
 			}
 			if ((disk->d_flags &
 			    G_RAID3_DISK_FLAG_SYNCHRONIZING) != 0) {
 				continue;
 			}
 			ndisks++;
 		}
 		/*
 		 * Do we have enough valid components?
 		 */
 		if (ndisks + 1 < sc->sc_ndisks) {
 			G_RAID3_DEBUG(0,
 			    "Device %s is broken, too few valid components.",
 			    sc->sc_name);
 			sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 			return;
 		}
 		/*
 		 * If there is one DIRTY component and all disks are present,
 		 * mark it for synchronization. If there is more than one DIRTY
 		 * component, mark parity component for synchronization.
 		 */
 		if (ndisks == sc->sc_ndisks && ndirty == 1) {
 			for (n = 0; n < sc->sc_ndisks; n++) {
 				disk = &sc->sc_disks[n];
 				if ((disk->d_flags &
 				    G_RAID3_DISK_FLAG_DIRTY) == 0) {
 					continue;
 				}
 				disk->d_flags |=
 				    G_RAID3_DISK_FLAG_SYNCHRONIZING; 
 			}
 		} else if (ndisks == sc->sc_ndisks && ndirty > 1) {
 			disk = &sc->sc_disks[sc->sc_ndisks - 1];
 			disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING; 
 		}
 
 		sc->sc_syncid = syncid;
 		if (force) {
 			/* Remember to bump syncid on first write. */
 			sc->sc_bump_id |= G_RAID3_BUMP_SYNCID;
 		}
 		if (ndisks == sc->sc_ndisks)
 			state = G_RAID3_DEVICE_STATE_COMPLETE;
 		else /* if (ndisks == sc->sc_ndisks - 1) */
 			state = G_RAID3_DEVICE_STATE_DEGRADED;
 		G_RAID3_DEBUG(1, "Device %s state changed from %s to %s.",
 		    sc->sc_name, g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_device_state2str(state));
 		sc->sc_state = state;
 		for (n = 0; n < sc->sc_ndisks; n++) {
 			disk = &sc->sc_disks[n];
 			if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
 				continue;
 			state = g_raid3_determine_state(disk);
 			g_raid3_event_send(disk, state, G_RAID3_EVENT_DONTWAIT);
 			if (state == G_RAID3_DISK_STATE_STALE)
 				sc->sc_bump_id |= G_RAID3_BUMP_SYNCID;
 		}
 		break;
 	    }
 	case G_RAID3_DEVICE_STATE_DEGRADED:
 		/*
 		 * Genid need to be bumped immediately, so do it here.
 		 */
 		if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) {
 			sc->sc_bump_id &= ~G_RAID3_BUMP_GENID;
 			g_raid3_bump_genid(sc);
 		}
 
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0)
 			return;
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) <
 		    sc->sc_ndisks - 1) {
 			if (sc->sc_provider != NULL)
 				g_raid3_destroy_provider(sc);
 			sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 			return;
 		}
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) ==
 		    sc->sc_ndisks) {
 			state = G_RAID3_DEVICE_STATE_COMPLETE;
 			G_RAID3_DEBUG(1,
 			    "Device %s state changed from %s to %s.",
 			    sc->sc_name, g_raid3_device_state2str(sc->sc_state),
 			    g_raid3_device_state2str(state));
 			sc->sc_state = state;
 		}
 		if (sc->sc_provider == NULL)
 			g_raid3_launch_provider(sc);
 		if (sc->sc_rootmount != NULL) {
 			G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 			    sc->sc_rootmount);
 			root_mount_rel(sc->sc_rootmount);
 			sc->sc_rootmount = NULL;
 		}
 		break;
 	case G_RAID3_DEVICE_STATE_COMPLETE:
 		/*
 		 * Genid need to be bumped immediately, so do it here.
 		 */
 		if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) {
 			sc->sc_bump_id &= ~G_RAID3_BUMP_GENID;
 			g_raid3_bump_genid(sc);
 		}
 
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0)
 			return;
 		KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) >=
 		    sc->sc_ndisks - 1,
 		    ("Too few ACTIVE components in COMPLETE state (device %s).",
 		    sc->sc_name));
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) ==
 		    sc->sc_ndisks - 1) {
 			state = G_RAID3_DEVICE_STATE_DEGRADED;
 			G_RAID3_DEBUG(1,
 			    "Device %s state changed from %s to %s.",
 			    sc->sc_name, g_raid3_device_state2str(sc->sc_state),
 			    g_raid3_device_state2str(state));
 			sc->sc_state = state;
 		}
 		if (sc->sc_provider == NULL)
 			g_raid3_launch_provider(sc);
 		if (sc->sc_rootmount != NULL) {
 			G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 			    sc->sc_rootmount);
 			root_mount_rel(sc->sc_rootmount);
 			sc->sc_rootmount = NULL;
 		}
 		break;
 	default:
 		KASSERT(1 == 0, ("Wrong device state (%s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state)));
 		break;
 	}
 }
 
 /*
  * Update disk state and device state if needed.
  */
 #define	DISK_STATE_CHANGED()	G_RAID3_DEBUG(1,			\
 	"Disk %s state changed from %s to %s (device %s).",		\
 	g_raid3_get_diskname(disk),					\
 	g_raid3_disk_state2str(disk->d_state),				\
 	g_raid3_disk_state2str(state), sc->sc_name)
 static int
 g_raid3_update_disk(struct g_raid3_disk *disk, u_int state)
 {
 	struct g_raid3_softc *sc;
 
 	g_topology_assert();
 
 	sc = disk->d_softc;
 again:
 	G_RAID3_DEBUG(3, "Changing disk %s state from %s to %s.",
 	    g_raid3_get_diskname(disk), g_raid3_disk_state2str(disk->d_state),
 	    g_raid3_disk_state2str(state));
 	switch (state) {
 	case G_RAID3_DISK_STATE_NEW:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk arrive.
 		 */
 		/* Previous state should be NONE. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NONE,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_state = state;
 		G_RAID3_DEBUG(0, "Device %s: provider %s detected.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING)
 			break;
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		state = g_raid3_determine_state(disk);
 		if (state != G_RAID3_DISK_STATE_NONE)
 			goto again;
 		break;
 	case G_RAID3_DISK_STATE_ACTIVE:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk does not need synchronization.
 		 * 2. Synchronization process finished successfully.
 		 */
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		/* Previous state should be NEW or SYNCHRONIZING. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW ||
 		    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_RAID3_DISK_STATE_NEW)
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		else if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_SYNCHRONIZING;
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_FORCE_SYNC;
 			g_raid3_sync_stop(sc, 0);
 		}
 		disk->d_state = state;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		g_raid3_update_access(disk);
 		g_raid3_update_metadata(disk);
 		G_RAID3_DEBUG(0, "Device %s: provider %s activated.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		break;
 	case G_RAID3_DISK_STATE_STALE:
 		/*
 		 * Possible scenarios:
 		 * 1. Stale disk was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		/*
 		 * STALE state is only possible if device is marked
 		 * NOAUTOSYNC.
 		 */
 		KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		g_raid3_update_metadata(disk);
 		G_RAID3_DEBUG(0, "Device %s: provider %s is stale.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 		break;
 	case G_RAID3_DISK_STATE_SYNCHRONIZING:
 		/*
 		 * Possible scenarios:
 		 * 1. Disk which needs synchronization was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_raid3_device_state2str(sc->sc_state),
 		    g_raid3_get_diskname(disk),
 		    g_raid3_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_RAID3_DISK_STATE_NEW)
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		if (sc->sc_provider != NULL) {
 			g_raid3_sync_start(sc);
 			g_raid3_update_metadata(disk);
 		}
 		break;
 	case G_RAID3_DISK_STATE_DISCONNECTED:
 		/*
 		 * Possible scenarios:
 		 * 1. Device wasn't running yet, but disk disappear.
 		 * 2. Disk was active and disapppear.
 		 * 3. Disk disappear during synchronization process.
 		 */
 		if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED ||
 		    sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) {
 			/*
 			 * Previous state should be ACTIVE, STALE or
 			 * SYNCHRONIZING.
 			 */
 			KASSERT(disk->d_state == G_RAID3_DISK_STATE_ACTIVE ||
 			    disk->d_state == G_RAID3_DISK_STATE_STALE ||
 			    disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
 			    ("Wrong disk state (%s, %s).",
 			    g_raid3_get_diskname(disk),
 			    g_raid3_disk_state2str(disk->d_state)));
 		} else if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) {
 			/* Previous state should be NEW. */
 			KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW,
 			    ("Wrong disk state (%s, %s).",
 			    g_raid3_get_diskname(disk),
 			    g_raid3_disk_state2str(disk->d_state)));
 			/*
 			 * Reset bumping syncid if disk disappeared in STARTING
 			 * state.
 			 */
 			if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0)
 				sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID;
 #ifdef	INVARIANTS
 		} else {
 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
 			    sc->sc_name,
 			    g_raid3_device_state2str(sc->sc_state),
 			    g_raid3_get_diskname(disk),
 			    g_raid3_disk_state2str(disk->d_state)));
 #endif
 		}
 		DISK_STATE_CHANGED();
 		G_RAID3_DEBUG(0, "Device %s: provider %s disconnected.",
 		    sc->sc_name, g_raid3_get_diskname(disk));
 
 		g_raid3_destroy_disk(disk);
 		break;
 	default:
 		KASSERT(1 == 0, ("Unknown state (%u).", state));
 		break;
 	}
 	return (0);
 }
 #undef	DISK_STATE_CHANGED
 
 int
 g_raid3_read_metadata(struct g_consumer *cp, struct g_raid3_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* Metadata are stored on last sector. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
-	if (error != 0) {
+	if (buf == NULL) {
 		G_RAID3_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    cp->provider->name, error);
 		if (buf != NULL)
 			g_free(buf);
 		return (error);
 	}
 
 	/* Decode metadata. */
 	error = raid3_metadata_decode(buf, md);
 	g_free(buf);
 	if (strcmp(md->md_magic, G_RAID3_MAGIC) != 0)
 		return (EINVAL);
 	if (md->md_version > G_RAID3_VERSION) {
 		G_RAID3_DEBUG(0,
 		    "Kernel module is too old to handle metadata from %s.",
 		    cp->provider->name);
 		return (EINVAL);
 	}
 	if (error != 0) {
 		G_RAID3_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
 		    cp->provider->name);
 		return (error);
 	}
 
 	return (0);
 }
 
 static int
 g_raid3_check_metadata(struct g_raid3_softc *sc, struct g_provider *pp,
     struct g_raid3_metadata *md)
 {
 
 	if (md->md_no >= sc->sc_ndisks) {
 		G_RAID3_DEBUG(1, "Invalid disk %s number (no=%u), skipping.",
 		    pp->name, md->md_no);
 		return (EINVAL);
 	}
 	if (sc->sc_disks[md->md_no].d_state != G_RAID3_DISK_STATE_NODISK) {
 		G_RAID3_DEBUG(1, "Disk %s (no=%u) already exists, skipping.",
 		    pp->name, md->md_no);
 		return (EEXIST);
 	}
 	if (md->md_all != sc->sc_ndisks) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_all", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_mediasize != sc->sc_mediasize) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_mediasize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mediasize % (sc->sc_ndisks - 1)) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_mediasize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((sc->sc_mediasize / (sc->sc_ndisks - 1)) > pp->mediasize) {
 		G_RAID3_DEBUG(1,
 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
 		    sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_sectorsize / pp->sectorsize) < sc->sc_ndisks - 1) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_sectorsize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_sectorsize != sc->sc_sectorsize) {
 		G_RAID3_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_sectorsize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid sector size of disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mflags & ~G_RAID3_DEVICE_FLAG_MASK) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid device flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 &&
 	    (md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) {
 		/*
 		 * VERIFY and ROUND-ROBIN options are mutally exclusive.
 		 */
 		G_RAID3_DEBUG(1, "Both VERIFY and ROUND-ROBIN flags exist on "
 		    "disk %s (device %s), skipping.", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_dflags & ~G_RAID3_DISK_FLAG_MASK) != 0) {
 		G_RAID3_DEBUG(1,
 		    "Invalid disk flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 g_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp,
     struct g_raid3_metadata *md)
 {
 	struct g_raid3_disk *disk;
 	int error;
 
 	g_topology_assert();
 	G_RAID3_DEBUG(2, "Adding disk %s.", pp->name);
 
 	error = g_raid3_check_metadata(sc, pp, md);
 	if (error != 0)
 		return (error);
 	if (sc->sc_state != G_RAID3_DEVICE_STATE_STARTING &&
 	    md->md_genid < sc->sc_genid) {
 		G_RAID3_DEBUG(0, "Component %s (device %s) broken, skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	disk = g_raid3_init_disk(sc, pp, md, &error);
 	if (disk == NULL)
 		return (error);
 	error = g_raid3_event_send(disk, G_RAID3_DISK_STATE_NEW,
 	    G_RAID3_EVENT_WAIT);
 	if (error != 0)
 		return (error);
 	if (md->md_version < G_RAID3_VERSION) {
 		G_RAID3_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
 		    pp->name, md->md_version, G_RAID3_VERSION);
 		g_raid3_update_metadata(disk);
 	}
 	return (0);
 }
 
 static int
 g_raid3_access(struct g_provider *pp, int acr, int acw, int ace)
 {
 	struct g_raid3_softc *sc;
 	struct g_raid3_disk *disk;
 	int dcr, dcw, dce;
 	u_int n;
 
 	g_topology_assert();
 	G_RAID3_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
 	    acw, ace);
 
 	dcr = pp->acr + acr;
 	dcw = pp->acw + acw;
 	dce = pp->ace + ace;
 
 	sc = pp->geom->softc;
 	if (sc == NULL ||
 	    g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks - 1 ||
 	    (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
 		if (acr <= 0 && acw <= 0 && ace <= 0)
 			return (0);
 		else
 			return (ENXIO);
 	}
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		disk = &sc->sc_disks[n];
 		if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
 			continue;
 		/*
 		 * Mark disk as dirty on open and unmark on close.
 		 */
 		if (pp->acw == 0 && dcw > 0) {
 			G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 			    g_raid3_get_diskname(disk), sc->sc_name);
 			disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
 			g_raid3_update_metadata(disk);
 		} else if (pp->acw > 0 && dcw == 0) {
 			G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.",
 			    g_raid3_get_diskname(disk), sc->sc_name);
 			disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
 			g_raid3_update_metadata(disk);
 		}
 	}
 	return (0);
 }
 
 static struct g_geom *
 g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md)
 {
 	struct g_raid3_softc *sc;
 	struct g_geom *gp;
 	int error, timeout;
 	u_int n;
 
 	g_topology_assert();
 	G_RAID3_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id);
 
 	/* One disk is minimum. */
 	if (md->md_all < 1)
 		return (NULL);
 	/*
 	 * Action geom.
 	 */
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_RAID3, M_WAITOK | M_ZERO);
 	sc->sc_disks = malloc(sizeof(struct g_raid3_disk) * md->md_all, M_RAID3,
 	    M_WAITOK | M_ZERO);
 	gp->start = g_raid3_start;
 	gp->orphan = g_raid3_orphan;
 	gp->access = g_raid3_access;
 	gp->dumpconf = g_raid3_dumpconf;
 
 	sc->sc_id = md->md_id;
 	sc->sc_mediasize = md->md_mediasize;
 	sc->sc_sectorsize = md->md_sectorsize;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_round_robin = 0;
 	sc->sc_flags = md->md_mflags;
 	sc->sc_bump_id = 0;
 	sc->sc_idle = 0;
 	for (n = 0; n < sc->sc_ndisks; n++) {
 		sc->sc_disks[n].d_softc = sc;
 		sc->sc_disks[n].d_no = n;
 		sc->sc_disks[n].d_state = G_RAID3_DISK_STATE_NODISK;
 	}
 	bioq_init(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "graid3:queue", NULL, MTX_DEF);
 	TAILQ_INIT(&sc->sc_events);
 	mtx_init(&sc->sc_events_mtx, "graid3:events", NULL, MTX_DEF);
 	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
 	sc->sc_state = G_RAID3_DEVICE_STATE_STARTING;
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 	/*
 	 * Synchronization geom.
 	 */
 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
 	gp->softc = sc;
 	gp->orphan = g_raid3_orphan;
 	sc->sc_sync.ds_geom = gp;
 	sc->sc_zone_64k = uma_zcreate("gr3:64k", 65536, NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	uma_zone_set_max(sc->sc_zone_64k, g_raid3_n64k);
 	sc->sc_zone_16k = uma_zcreate("gr3:16k", 16384, NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	uma_zone_set_max(sc->sc_zone_64k, g_raid3_n16k);
 	sc->sc_zone_4k = uma_zcreate("gr3:4k", 4096, NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	uma_zone_set_max(sc->sc_zone_4k, g_raid3_n4k);
 	error = kthread_create(g_raid3_worker, sc, &sc->sc_worker, 0, 0,
 	    "g_raid3 %s", md->md_name);
 	if (error != 0) {
 		G_RAID3_DEBUG(1, "Cannot create kernel thread for %s.",
 		    sc->sc_name);
 		uma_zdestroy(sc->sc_zone_64k); 
 		uma_zdestroy(sc->sc_zone_16k); 
 		uma_zdestroy(sc->sc_zone_4k); 
 		g_destroy_geom(sc->sc_sync.ds_geom);
 		mtx_destroy(&sc->sc_events_mtx);
 		mtx_destroy(&sc->sc_queue_mtx);
 		g_destroy_geom(sc->sc_geom);
 		free(sc->sc_disks, M_RAID3);
 		free(sc, M_RAID3);
 		return (NULL);
 	}
 
 	G_RAID3_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
 
 	sc->sc_rootmount = root_mount_hold("GRAID3");
 	G_RAID3_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
 
 	/*
 	 * Run timeout.
 	 */
 	timeout = atomic_load_acq_int(&g_raid3_timeout);
 	callout_reset(&sc->sc_callout, timeout * hz, g_raid3_go, sc);
 	return (sc->sc_geom);
 }
 
 int
 g_raid3_destroy(struct g_raid3_softc *sc, boolean_t force)
 {
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_RAID3_DEBUG(1, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		} else {
 			G_RAID3_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
 	sc->sc_flags |= G_RAID3_DEVICE_FLAG_WAIT;
 	g_topology_unlock();
 	G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	wakeup(&sc->sc_queue);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
 	while (sc->sc_worker != NULL)
 		tsleep(&sc->sc_worker, PRIBIO, "r3:destroy", hz / 5);
 	G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
 	g_topology_lock();
 	g_raid3_destroy_device(sc);
 	free(sc->sc_disks, M_RAID3);
 	free(sc, M_RAID3);
 	return (0);
 }
 
 static void
 g_raid3_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_raid3_metadata md;
 	struct g_raid3_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	G_RAID3_DEBUG(2, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "raid3:taste");
 	/* This orphan function should be never called. */
 	gp->orphan = g_raid3_taste_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_raid3_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
 		return (NULL);
 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
 		return (NULL);
 	if (g_raid3_debug >= 2)
 		raid3_metadata_dump(&md);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_sync.ds_geom == gp)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_id != sc->sc_id) {
 			G_RAID3_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 		break;
 	}
 	if (gp == NULL) {
 		gp = g_raid3_create(mp, &md);
 		if (gp == NULL) {
 			G_RAID3_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 	}
 	G_RAID3_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 	error = g_raid3_add_disk(sc, pp, &md);
 	if (error != 0) {
 		G_RAID3_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 		    pp->name, gp->name, error);
 		if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NODISK) ==
 		    sc->sc_ndisks) {
 			g_raid3_destroy(sc, 1);
 		}
 		return (NULL);
 	}
 	return (gp);
 }
 
 static int
 g_raid3_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
     struct g_geom *gp)
 {
 
 	return (g_raid3_destroy(gp->softc, 0));
 }
 
 static void
 g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_raid3_softc *sc;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	/* Skip synchronization geom. */
 	if (gp == sc->sc_sync.ds_geom)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		struct g_raid3_disk *disk;
 
 		disk = cp->private;
 		if (disk == NULL)
 			return;
 		sbuf_printf(sb, "%s<Type>", indent);
 		if (disk->d_no == sc->sc_ndisks - 1)
 			sbuf_printf(sb, "PARITY");
 		else
 			sbuf_printf(sb, "DATA");
 		sbuf_printf(sb, "</Type>\n");
 		sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
 		    (u_int)disk->d_no);
 		if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
 			sbuf_printf(sb, "%s<Synchronized>", indent);
 			if (disk->d_sync.ds_offset_done == 0)
 				sbuf_printf(sb, "0%%");
 			else {
 				sbuf_printf(sb, "%u%%",
 				    (u_int)((disk->d_sync.ds_offset_done * 100) /
 				    (sc->sc_mediasize / (sc->sc_ndisks - 1))));
 			}
 			sbuf_printf(sb, "</Synchronized>\n");
 		}
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
 		    disk->d_sync.ds_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, disk->d_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (disk->d_flags == 0)
 			sbuf_printf(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((disk->d_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_RAID3_DISK_FLAG_DIRTY, "DIRTY");
 			ADD_FLAG(G_RAID3_DISK_FLAG_HARDCODED, "HARDCODED");
 			ADD_FLAG(G_RAID3_DISK_FLAG_SYNCHRONIZING,
 			    "SYNCHRONIZING");
 			ADD_FLAG(G_RAID3_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
 #undef	ADD_FLAG
 		}
 		sbuf_printf(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_raid3_disk_state2str(disk->d_state));
 	} else {
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (sc->sc_flags == 0)
 			sbuf_printf(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((sc->sc_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN,
 			    "ROUND-ROBIN");
 			ADD_FLAG(G_RAID3_DEVICE_FLAG_VERIFY, "VERIFY");
 #undef	ADD_FLAG
 		}
 		sbuf_printf(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
 		    sc->sc_ndisks);
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_raid3_device_state2str(sc->sc_state));
 	}
 }
 
 static void
 g_raid3_shutdown(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 
 	mp = arg;
 	DROP_GIANT();
 	g_topology_lock();
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if (gp->softc == NULL)
 			continue;
 		g_raid3_destroy(gp->softc, 1);
 	}
 	g_topology_unlock();
 	PICKUP_GIANT();
 #if 0
 	tsleep(&gp, PRIBIO, "r3:shutdown", hz * 20);
 #endif
 }
 
 static void
 g_raid3_init(struct g_class *mp)
 {
 
 	g_raid3_ehtag = EVENTHANDLER_REGISTER(shutdown_post_sync,
 	    g_raid3_shutdown, mp, SHUTDOWN_PRI_FIRST);
 	if (g_raid3_ehtag == NULL)
 		G_RAID3_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_raid3_fini(struct g_class *mp)
 {
 
 	if (g_raid3_ehtag == NULL)
 		return;
 	EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid3_ehtag);
 }
 
 DECLARE_GEOM_CLASS(g_raid3_class, g_raid3);
Index: head/sys/geom/uzip/g_uzip.c
===================================================================
--- head/sys/geom/uzip/g_uzip.c	(revision 152966)
+++ head/sys/geom/uzip/g_uzip.c	(revision 152967)
@@ -1,525 +1,525 @@
 /*-
  * Copyright (c) 2004 Max Khon
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 
 #include <geom/geom.h>
 #include <net/zlib.h>
 
 #undef GEOM_UZIP_DEBUG
 #ifdef GEOM_UZIP_DEBUG
 #define DPRINTF(a)	printf a
 #else
 #define DPRINTF(a)
 #endif
 
 MALLOC_DEFINE(M_GEOM_UZIP, "geom_uzip", "GEOM UZIP data structures");
 
 #define UZIP_CLASS_NAME	"UZIP"
 
 /*
  * Maximum allowed valid block size (to prevent foot-shooting)
  */
 #define MAX_BLKSZ	(MAXPHYS - MAXPHYS / 1000 - 12)
 
 /*
  * Integer values (block size, number of blocks, offsets)
  * are stored in big-endian (network) order on disk and struct cloop_header
  * and in native order in struct g_uzip_softc
  */
 
 #define CLOOP_MAGIC_LEN 128
 static char CLOOP_MAGIC_START[] = "#!/bin/sh\n";
 
 struct cloop_header {
 	char magic[CLOOP_MAGIC_LEN];	/* cloop magic */
 	uint32_t blksz;			/* block size */
 	uint32_t nblocks;		/* number of blocks */
 };
 
 struct g_uzip_softc {
 	uint32_t blksz;			/* block size */
 	uint32_t nblocks;		/* number of blocks */
 	uint64_t *offsets;
 
 	struct mtx last_mtx;
 	uint32_t last_blk;		/* last blk no */
 	char *last_buf;			/* last blk data */
 	int req_total;			/* total requests */
 	int req_cached;			/* cached requests */
 };
 
 static void
 g_uzip_softc_free(struct g_uzip_softc *sc, struct g_geom *gp)
 {
 	if (gp != NULL) {
 		printf("%s: %d requests, %d cached\n",
 		    gp->name, sc->req_total, sc->req_cached);
 	}
 	if (sc->offsets != NULL)
 		free(sc->offsets, M_GEOM_UZIP);
 	mtx_destroy(&sc->last_mtx);
 	free(sc->last_buf, M_GEOM_UZIP);
 	free(sc, M_GEOM_UZIP);
 }
 
 static void *
 z_alloc(void *nil, u_int type, u_int size)
 {
 	void *ptr;
 
 	ptr = malloc(type * size, M_GEOM_UZIP, M_NOWAIT);
 	return ptr;
 }
 
 static void
 z_free(void *nil, void *ptr)
 {
 	free(ptr, M_GEOM_UZIP);
 }
 
 static void
 g_uzip_done(struct bio *bp)
 {
 	int err;
 	struct bio *bp2;
 	z_stream zs;
 	struct g_provider *pp, *pp2;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	struct g_uzip_softc *sc;
 	off_t pos, upos;
 	uint32_t start_blk, i;
 	size_t bsize;
 
 	bp2 = bp->bio_parent;
 	pp = bp2->bio_to;
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	pp2 = cp->provider;
 	sc = gp->softc;
 	DPRINTF(("%s: done\n", gp->name));
 
 	bp2->bio_error = bp->bio_error;
 	if (bp2->bio_error != 0)
 		goto done;
 
 	/*
 	 * Uncompress data.
 	 */
 	zs.zalloc = z_alloc;
 	zs.zfree = z_free;
 	err = inflateInit(&zs);
 	if (err != Z_OK) {
 		bp2->bio_error = EIO;
 		goto done;
 	}
 	start_blk = bp2->bio_offset / sc->blksz;
 	bsize = pp2->sectorsize;
 	pos = sc->offsets[start_blk] % bsize;
 	upos = 0;
 	DPRINTF(("%s: done: start_blk %d, pos %lld, upos %lld (%lld, %d, %d)\n",
 	    gp->name, start_blk, pos, upos,
 	    bp2->bio_offset, sc->blksz, bsize));
 	for (i = start_blk; upos < bp2->bio_length; i++) {
 		off_t len, ulen, uoff;
 
 		uoff = i == start_blk ? bp2->bio_offset % sc->blksz : 0;
 		ulen = MIN(sc->blksz - uoff, bp2->bio_length - upos);
 		len = sc->offsets[i + 1] - sc->offsets[i];
 
 		zs.next_in = bp->bio_data + pos;
 		zs.avail_in = len;
 		zs.next_out = sc->last_buf;
 		zs.avail_out = sc->blksz;
 		mtx_lock(&sc->last_mtx);
 		err = inflate(&zs, Z_FINISH);
 		if (err != Z_STREAM_END) {
 			sc->last_blk = -1;
 			mtx_unlock(&sc->last_mtx);
 			DPRINTF(("%s: done: inflate failed (%lld + %lld -> %lld + %lld + %lld)\n",
 			    gp->name, pos, len, uoff, upos, ulen));
 			inflateEnd(&zs);
 			bp2->bio_error = EIO;
 			goto done;
 		}
 		sc->last_blk = i;
 		DPRINTF(("%s: done: inflated %lld + %lld -> %lld + %lld + %lld\n",
 		    gp->name,
 		    pos, len,
 		    uoff, upos, ulen));
 		memcpy(bp2->bio_data + upos, sc->last_buf + uoff, ulen);
 		mtx_unlock(&sc->last_mtx);
 
 		pos += len;
 		upos += ulen;
 		bp2->bio_completed += ulen;
 		err = inflateReset(&zs);
 		if (err != Z_OK) {
 			inflateEnd(&zs);
 			bp2->bio_error = EIO;
 			goto done;
 		}
 	}
 	err = inflateEnd(&zs);
 	if (err != Z_OK) {
 		bp2->bio_error = EIO;
 		goto done;
 	}
 
 done:
 	/*
 	 * Finish processing the request.
 	 */
 	DPRINTF(("%s: done: (%d, %lld, %ld)\n",
 	    gp->name, bp2->bio_error, bp2->bio_completed, bp2->bio_resid));
 	free(bp->bio_data, M_GEOM_UZIP);
 	g_destroy_bio(bp);
 	g_io_deliver(bp2, bp2->bio_error);
 }
 
 static void
 g_uzip_start(struct bio *bp)
 {
 	struct bio *bp2;
 	struct g_provider *pp, *pp2;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_uzip_softc *sc;
 	uint32_t start_blk, end_blk;
 	size_t bsize;
 
 	pp = bp->bio_to;
 	gp = pp->geom;
 	DPRINTF(("%s: start (%d)\n", gp->name, bp->bio_cmd));
 
 	if (bp->bio_cmd != BIO_READ) {
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	cp = LIST_FIRST(&gp->consumer);
 	pp2 = cp->provider;
 	sc = gp->softc;
 
 	start_blk = bp->bio_offset / sc->blksz;
 	end_blk = (bp->bio_offset + bp->bio_length + sc->blksz - 1) / sc->blksz;
 	KASSERT(start_blk < sc->nblocks,
 		("start_blk out of range"));
 	KASSERT(end_blk <= sc->nblocks,
 		("end_blk out of range"));
 
 	sc->req_total++;
 	if (start_blk + 1 == end_blk) {
 		mtx_lock(&sc->last_mtx);
 		if (start_blk == sc->last_blk) {
 			off_t uoff;
 
 			uoff = bp->bio_offset % sc->blksz;
 			KASSERT(bp->bio_length <= sc->blksz - uoff,
 			    ("cached data error"));
 			memcpy(bp->bio_data, sc->last_buf + uoff,
 			    bp->bio_length);
 			sc->req_cached++;
 			mtx_unlock(&sc->last_mtx);
 
 			DPRINTF(("%s: start: cached 0 + %lld, %lld + 0 + %lld\n",
 			    gp->name, bp->bio_length, uoff, bp->bio_length));
 			bp->bio_completed = bp->bio_length;
 			g_io_deliver(bp, 0);
 			return;
 		}
 		mtx_unlock(&sc->last_mtx);
 	}
 
 	bp2 = g_clone_bio(bp);
 	if (bp2 == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	bp2->bio_done = g_uzip_done;
 	DPRINTF(("%s: start (%d..%d), %s: %d + %lld, %s: %d + %lld\n",
 	    gp->name, start_blk, end_blk,
 	    pp->name, pp->sectorsize, pp->mediasize,
 	    pp2->name, pp2->sectorsize, pp2->mediasize));
 	bsize = pp2->sectorsize;
 	bp2->bio_offset = sc->offsets[start_blk] - sc->offsets[start_blk] % bsize;
 	bp2->bio_length = sc->offsets[end_blk] - bp2->bio_offset;
 	bp2->bio_length = (bp2->bio_length + bsize - 1) / bsize * bsize;
 	DPRINTF(("%s: start %lld + %lld -> %lld + %lld -> %lld + %lld\n",
 	    gp->name,
 	    bp->bio_offset, bp->bio_length,
 	    sc->offsets[start_blk], sc->offsets[end_blk] - sc->offsets[start_blk],
 	    bp2->bio_offset, bp2->bio_length));
 	bp2->bio_data = malloc(bp2->bio_length, M_GEOM_UZIP, M_NOWAIT);
 	if (bp2->bio_data == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 
 	g_io_request(bp2, cp);
 	DPRINTF(("%s: start ok\n", gp->name));
 }
 
 static void
 g_uzip_orphan(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 
 	g_trace(G_T_TOPOLOGY, "g_uzip_orphan(%p/%s)", cp, cp->provider->name);
 	g_topology_assert();
 	KASSERT(cp->provider->error != 0,
 		("g_uzip_orphan with error == 0"));
 
 	gp = cp->geom;
 	g_uzip_softc_free(gp->softc, gp);
 	gp->softc = NULL;
 	g_wither_geom(gp, cp->provider->error);
 }
 
 static int
 g_uzip_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	KASSERT (cp != NULL, ("g_uzip_access but no consumer"));
 
 	if (cp->acw + dw > 0)
 		return EROFS;
 
 	return (g_access(cp, dr, dw, de));
 }
 
 static void
 g_uzip_spoiled(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 
 	gp = cp->geom;
 	g_trace(G_T_TOPOLOGY, "g_uzip_spoiled(%p/%s)", cp, gp->name);
 	g_topology_assert();
 
 	g_uzip_softc_free(gp->softc, gp);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 }
 
 static struct g_geom *
 g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	int error;
 	uint32_t i, total_offsets, offsets_read, blk;
 	void *buf;
 	struct cloop_header *header;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	struct g_provider *pp2;
 	struct g_uzip_softc *sc;
 
 	g_trace(G_T_TOPOLOGY, "g_uzip_taste(%s,%s)", mp->name, pp->name);
 	g_topology_assert();
 	buf = NULL;
 
 	/*
 	 * Create geom instance.
 	 */
 	gp = g_new_geomf(mp, "%s.uzip", pp->name);
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	if (error == 0)
 		error = g_access(cp, 1, 0, 0);
 	if (error) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		g_destroy_geom(gp);
 		return (NULL);
 	}
 	g_topology_unlock();
 
 	/*
 	 * Read cloop header, look for CLOOP magic, perform
 	 * other validity checks.
 	 */
 	DPRINTF(("%s: media sectorsize %u, mediasize %lld\n",
 	    gp->name, pp->sectorsize, pp->mediasize));
 	buf = g_read_data(cp, 0, pp->sectorsize, &error);
-	if (buf == NULL || error != 0)
+	if (buf == NULL)
 		goto err;
 	header = (struct cloop_header *) buf;
 	if (strncmp(header->magic, CLOOP_MAGIC_START,
 		    sizeof(CLOOP_MAGIC_START) - 1) != 0) {
 		DPRINTF(("%s: no CLOOP magic\n", gp->name));
 		goto err;
 	}
 	if (header->magic[0x0b] != 'V' || header->magic[0x0c] < '2') {
 		DPRINTF(("%s: image version too old\n", gp->name));
 		goto err;
 	}
 
 	/*
 	 * Initialize softc and read offsets.
 	 */
 	sc = malloc(sizeof(*sc), M_GEOM_UZIP, M_WAITOK | M_ZERO);
 	gp->softc = sc;
 	sc->blksz = ntohl(header->blksz);
 	sc->nblocks = ntohl(header->nblocks);
 	if (sc->blksz % 512 != 0) {
 		printf("%s: block size (%u) should be multiple of 512.\n",
 		    gp->name, sc->blksz);
 		goto err;
 	}
 	if (sc->blksz > MAX_BLKSZ) {
 		printf("%s: block size (%u) should not be larger than %d.\n",
 		    gp->name, sc->blksz, MAX_BLKSZ);
 	}
 	total_offsets = sc->nblocks + 1;
 	if (sizeof(struct cloop_header) +
 	    total_offsets * sizeof(uint64_t) > pp->mediasize) {
 		printf("%s: media too small for %u blocks\n",
 		       gp->name, sc->nblocks);
 		goto err;
 	}
 	sc->offsets = malloc(
 	    total_offsets * sizeof(uint64_t), M_GEOM_UZIP, M_WAITOK);
 	offsets_read = MIN(total_offsets,
 	    (pp->sectorsize - sizeof(*header)) / sizeof(uint64_t));
 	for (i = 0; i < offsets_read; i++)
 		sc->offsets[i] = be64toh(((uint64_t *) (header + 1))[i]);
 	DPRINTF(("%s: %u offsets in the first sector\n",
 	       gp->name, offsets_read));
 	for (blk = 1; offsets_read < total_offsets; blk++) {
 		uint32_t nread;
 
 		free(buf, M_GEOM);
 		buf = g_read_data(
 		    cp, blk * pp->sectorsize, pp->sectorsize, &error);
-		if (buf == NULL || error != 0)
+		if (buf == NULL)
 			goto err;
 		nread = MIN(total_offsets - offsets_read,
 		     pp->sectorsize / sizeof(uint64_t));
 		DPRINTF(("%s: %u offsets read from sector %d\n",
 		    gp->name, nread, blk));
 		for (i = 0; i < nread; i++) {
 			sc->offsets[offsets_read + i] =
 			    be64toh(((uint64_t *) buf)[i]);
 		}
 		offsets_read += nread;
 	}
 	DPRINTF(("%s: done reading offsets\n", gp->name));
 	mtx_init(&sc->last_mtx, "geom_uzip cache", NULL, MTX_DEF);
 	sc->last_blk = -1;
 	sc->last_buf = malloc(sc->blksz, M_GEOM_UZIP, M_WAITOK);
 	sc->req_total = 0;
 	sc->req_cached = 0;
 
 	g_topology_lock();
 	pp2 = g_new_providerf(gp, "%s", gp->name);
 	pp2->sectorsize = 512;
 	pp2->mediasize = sc->nblocks * sc->blksz;
         pp2->flags = pp->flags & G_PF_CANDELETE;
         if (pp->stripesize > 0) {
                 pp2->stripesize = pp->stripesize;
                 pp2->stripeoffset = pp->stripeoffset;
         }
 	g_error_provider(pp2, 0);
 	g_access(cp, -1, 0, 0);
 
 	DPRINTF(("%s: taste ok (%d, %lld), (%d, %d), %x\n",
 	    gp->name,
 	    pp2->sectorsize, pp2->mediasize,
 	    pp2->stripeoffset, pp2->stripesize, pp2->flags));
 	printf("%s: %u x %u blocks\n",
 	       gp->name, sc->nblocks, sc->blksz);
 	return (gp);
 
 err:
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf != NULL)
 		free(buf, M_GEOM);
 	if (gp->softc != NULL) {
 		g_uzip_softc_free(gp->softc, NULL);
 		gp->softc = NULL;
 	}
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	return (NULL);
 }
 
 static int
 g_uzip_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
 {
 	struct g_provider *pp;
 
 	g_trace(G_T_TOPOLOGY, "g_uzip_destroy_geom(%s, %s)", mp->name, gp->name);
 	g_topology_assert();
 
 	if (gp->softc == NULL) {
 		printf("%s(%s): gp->softc == NULL\n", __func__, gp->name);
 		return (ENXIO);
 	}
 
 	KASSERT(gp != NULL, ("NULL geom"));
 	pp = LIST_FIRST(&gp->provider);
 	KASSERT(pp != NULL, ("NULL provider"));
 	if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
 		return (EBUSY);
 
 	g_uzip_softc_free(gp->softc, gp);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 static struct g_class g_uzip_class = {
 	.name = UZIP_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = g_uzip_taste,
 	.destroy_geom = g_uzip_destroy_geom,
 
 	.start = g_uzip_start,
 	.orphan = g_uzip_orphan,
 	.access = g_uzip_access,
 	.spoiled = g_uzip_spoiled,
 };
 
 DECLARE_GEOM_CLASS(g_uzip_class, geom_uzip);
 MODULE_DEPEND(geom_uzip, zlib, 1, 1, 1);
Index: head/sys/geom/vinum/geom_vinum_drive.c
===================================================================
--- head/sys/geom/vinum/geom_vinum_drive.c	(revision 152966)
+++ head/sys/geom/vinum/geom_vinum_drive.c	(revision 152967)
@@ -1,674 +1,674 @@
 /*-
  * Copyright (c) 2004, 2005 Lukas Ertl
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/errno.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/libkern.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 
 #include <geom/geom.h>
 #include <geom/vinum/geom_vinum_var.h>
 #include <geom/vinum/geom_vinum.h>
 #include <geom/vinum/geom_vinum_share.h>
 
 static void	gv_drive_dead(void *, int);
 static void	gv_drive_worker(void *);
 
 void
 gv_config_new_drive(struct gv_drive *d)
 {
 	struct gv_hdr *vhdr;
 	struct gv_freelist *fl;
 
 	KASSERT(d != NULL, ("config_new_drive: NULL d"));
 
 	vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO);
 	vhdr->magic = GV_MAGIC;
 	vhdr->config_length = GV_CFG_LEN;
 
 	bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN);
 	strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME);
 	microtime(&vhdr->label.date_of_birth);
 
 	d->hdr = vhdr;
 
 	LIST_INIT(&d->subdisks);
 	LIST_INIT(&d->freelist);
 
 	fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO);
 	fl->offset = GV_DATA_START;
 	fl->size = d->avail;
 	LIST_INSERT_HEAD(&d->freelist, fl, freelist);
 	d->freelist_entries = 1;
 
 	TAILQ_INIT(&d->bqueue);
 	mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
 	kthread_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name);
 	d->flags |= GV_DRIVE_THREAD_ACTIVE;
 }
 
 void
 gv_save_config_all(struct gv_softc *sc)
 {
 	struct gv_drive *d;
 
 	g_topology_assert();
 
 	LIST_FOREACH(d, &sc->drives, drive) {
 		if (d->geom == NULL)
 			continue;
 		gv_save_config(NULL, d, sc);
 	}
 }
 
 /* Save the vinum configuration back to disk. */
 void
 gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp2;
 	struct gv_hdr *vhdr, *hdr;
 	struct sbuf *sb;
 	int error;
 
 	g_topology_assert();
 
 	KASSERT(d != NULL, ("gv_save_config: null d"));
 	KASSERT(sc != NULL, ("gv_save_config: null sc"));
 
 	/*
 	 * We can't save the config on a drive that isn't up, but drives that
 	 * were just created aren't officially up yet, so we check a special
 	 * flag.
 	 */
 	if ((d->state != GV_DRIVE_UP) && !(d->flags && GV_DRIVE_NEWBORN))
 		return;
 
 	if (cp == NULL) {
 		gp = d->geom;
 		KASSERT(gp != NULL, ("gv_save_config: null gp"));
 		cp2 = LIST_FIRST(&gp->consumer);
 		KASSERT(cp2 != NULL, ("gv_save_config: null cp2"));
 	} else
 		cp2 = cp;
 
 	vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
 	vhdr->magic = GV_MAGIC;
 	vhdr->config_length = GV_CFG_LEN;
 
 	hdr = d->hdr;
 	if (hdr == NULL) {
 		printf("GEOM_VINUM: drive %s has NULL hdr\n", d->name);
 		g_free(vhdr);
 		return;
 	}
 	microtime(&hdr->label.last_update);
 	bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label));
 
 	sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
 	gv_format_config(sc, sb, 1, NULL);
 	sbuf_finish(sb);
 
 	error = g_access(cp2, 0, 1, 0);
 	if (error) {
 		printf("GEOM_VINUM: g_access failed on drive %s, errno %d\n",
 		    d->name, error);
 		sbuf_delete(sb);
 		g_free(vhdr);
 		return;
 	}
 	g_topology_unlock();
 
 	do {
 		error = g_write_data(cp2, GV_HDR_OFFSET, vhdr, GV_HDR_LEN);
 		if (error) {
 			printf("GEOM_VINUM: writing vhdr failed on drive %s, "
 			    "errno %d", d->name, error);
 			break;
 		}
 
 		error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb),
 		    GV_CFG_LEN);
 		if (error) {
 			printf("GEOM_VINUM: writing first config copy failed "
 			    "on drive %s, errno %d", d->name, error);
 			break;
 		}
 		
 		error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN,
 		    sbuf_data(sb), GV_CFG_LEN);
 		if (error)
 			printf("GEOM_VINUM: writing second config copy failed "
 			    "on drive %s, errno %d", d->name, error);
 	} while (0);
 
 	g_topology_lock();
 	g_access(cp2, 0, -1, 0);
 	sbuf_delete(sb);
 	g_free(vhdr);
 
 	if (d->geom != NULL)
 		gv_drive_modify(d);
 }
 
 /* This resembles g_slice_access(). */
 static int
 gv_drive_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_provider *pp2;
 	struct gv_drive *d;
 	struct gv_sd *s, *s2;
 	int error;
 
 	gp = pp->geom;
 	cp = LIST_FIRST(&gp->consumer);
 	if (cp == NULL)
 		return (0);
 
 	d = gp->softc;
 	if (d == NULL)
 		return (0);
 
 	s = pp->private;
 	KASSERT(s != NULL, ("gv_drive_access: NULL s"));
 
 	LIST_FOREACH(s2, &d->subdisks, from_drive) {
 		if (s == s2)
 			continue;
 		if (s->drive_offset + s->size <= s2->drive_offset)
 			continue;
 		if (s2->drive_offset + s2->size <= s->drive_offset)
 			continue;
 
 		/* Overlap. */
 		pp2 = s2->provider;
 		KASSERT(s2 != NULL, ("gv_drive_access: NULL s2"));
 		if ((pp->acw + dw) > 0 && pp2->ace > 0)
 			return (EPERM);
 		if ((pp->ace + de) > 0 && pp2->acw > 0)
 			return (EPERM);
 	}
 
 	error = g_access(cp, dr, dw, de);
 	return (error);
 }
 
 static void
 gv_drive_done(struct bio *bp)
 {
 	struct gv_drive *d;
 	struct gv_bioq *bq;
 
 	/* Put the BIO on the worker queue again. */
 	d = bp->bio_from->geom->softc;
 	bp->bio_cflags |= GV_BIO_DONE;
 	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
 	bq->bp = bp;
 	mtx_lock(&d->bqueue_mtx);
 	TAILQ_INSERT_TAIL(&d->bqueue, bq, queue);
 	wakeup(d);
 	mtx_unlock(&d->bqueue_mtx);
 }
 
 
 static void
 gv_drive_start(struct bio *bp)
 {
 	struct gv_drive *d;
 	struct gv_sd *s;
 	struct gv_bioq *bq;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_GETATTR:
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 
 	s = bp->bio_to->private;
 	if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) {
 		g_io_deliver(bp, ENXIO);
 		return;
 	}
 
 	d = bp->bio_to->geom->softc;
 
 	/*
 	 * Put the BIO on the worker queue, where the worker thread will pick
 	 * it up.
 	 */
 	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
 	bq->bp = bp;
 	mtx_lock(&d->bqueue_mtx);
 	TAILQ_INSERT_TAIL(&d->bqueue, bq, queue);
 	wakeup(d);
 	mtx_unlock(&d->bqueue_mtx);
 
 }
 
 static void
 gv_drive_worker(void *arg)
 {
 	struct bio *bp, *cbp;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct gv_drive *d;
 	struct gv_sd *s;
 	struct gv_bioq *bq, *bq2;
 	int error;
 
 	d = arg;
 
 	mtx_lock(&d->bqueue_mtx);
 	for (;;) {
 		/* We were signaled to exit. */
 		if (d->flags & GV_DRIVE_THREAD_DIE)
 			break;
 
 		/* Take the first BIO from out queue. */
 		bq = TAILQ_FIRST(&d->bqueue);
 		if (bq == NULL) {
 			msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10);
 			continue;
  		}
 		TAILQ_REMOVE(&d->bqueue, bq, queue);
 		mtx_unlock(&d->bqueue_mtx);
  
 		bp = bq->bp;
 		g_free(bq);
 		pp = bp->bio_to;
 		gp = pp->geom;
 
 		/* Completed request. */
 		if (bp->bio_cflags & GV_BIO_DONE) {
 			error = bp->bio_error;
 
 			/* Deliver the original request. */
 			g_std_done(bp);
 
 			/* The request had an error, we need to clean up. */
 			if (error != 0) {
 				g_topology_lock();
 				gv_set_drive_state(d, GV_DRIVE_DOWN,
 				    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
 				g_topology_unlock();
 				g_post_event(gv_drive_dead, d, M_WAITOK, d,
 				    NULL);
 			}
 
 		/* New request, needs to be sent downwards. */
 		} else {
 			s = pp->private;
 
 			if ((s->state == GV_SD_DOWN) ||
 			    (s->state == GV_SD_STALE)) {
 				g_io_deliver(bp, ENXIO);
 				mtx_lock(&d->bqueue_mtx);
 				continue;
 			}
 			if (bp->bio_offset > s->size) {
 				g_io_deliver(bp, EINVAL);
 				mtx_lock(&d->bqueue_mtx);
 				continue;
 			}
 
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL) {
 				g_io_deliver(bp, ENOMEM);
 				mtx_lock(&d->bqueue_mtx);
 				continue;
 			}
 			if (cbp->bio_offset + cbp->bio_length > s->size)
 				cbp->bio_length = s->size -
 				    cbp->bio_offset;
 			cbp->bio_done = gv_drive_done;
 			cbp->bio_offset += s->drive_offset;
 			g_io_request(cbp, LIST_FIRST(&gp->consumer));
 		}
 
 		mtx_lock(&d->bqueue_mtx);
 	}
 
 	TAILQ_FOREACH_SAFE(bq, &d->bqueue, queue, bq2) {
 		TAILQ_REMOVE(&d->bqueue, bq, queue);
 		mtx_unlock(&d->bqueue_mtx);
 		bp = bq->bp;
 		g_free(bq);
 		if (bp->bio_cflags & GV_BIO_DONE) 
 			g_std_done(bp);
 		else
 			g_io_deliver(bp, ENXIO);
 		mtx_lock(&d->bqueue_mtx);
 	}
 	mtx_unlock(&d->bqueue_mtx);
 	d->flags |= GV_DRIVE_THREAD_DEAD;
 
 	kthread_exit(ENXIO);
 }
 
 
 static void
 gv_drive_orphan(struct g_consumer *cp)
 {
 	struct g_geom *gp;
 	struct gv_drive *d;
 
 	g_topology_assert();
 	gp = cp->geom;
 	g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name);
 	d = gp->softc;
 	if (d != NULL) {
 		gv_set_drive_state(d, GV_DRIVE_DOWN,
 		    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
 		g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL);
 	} else
 		g_wither_geom(gp, ENXIO);
 }
 
 static struct g_geom *
 gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_geom *gp, *gp2;
 	struct g_consumer *cp;
 	struct gv_drive *d;
 	struct gv_sd *s;
 	struct gv_softc *sc;
 	struct gv_freelist *fl;
 	struct gv_hdr *vhdr;
 	int error;
 	char *buf, errstr[ERRBUFSIZ];
 
 	vhdr = NULL;
 	d = NULL;
 
 	g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name);
 	g_topology_assert();
 
 	/* Find the VINUM class and its associated geom. */
 	gp2 = find_vinum_geom();
 	if (gp2 == NULL)
 		return (NULL);
 	sc = gp2->softc;
 
 	gp = g_new_geomf(mp, "%s.vinumdrive", pp->name);
 	gp->start = gv_drive_start;
 	gp->orphan = gv_drive_orphan;
 	gp->access = gv_drive_access;
 	gp->start = gv_drive_start;
 
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_access(cp, 1, 0, 0);
 	if (error) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		g_destroy_geom(gp);
 		return (NULL);
 	}
 
 	g_topology_unlock();
 
 	/* Now check if the provided slice is a valid vinum drive. */
 	do {
 		vhdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, &error);
-		if (vhdr == NULL || error != 0)
+		if (vhdr == NULL)
 			break;
 		if (vhdr->magic != GV_MAGIC) {
 			g_free(vhdr);
 			break;
 		}
 
 		/* A valid vinum drive, let's parse the on-disk information. */
 		buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, &error);
 		if (buf == NULL || error != 0) {
 			g_free(vhdr);
 			break;
 		}
 		g_topology_lock();
 		gv_parse_config(sc, buf, 1);
 		g_free(buf);
 
 		/*
 		 * Let's see if this drive is already known in the
 		 * configuration.
 		 */
 		d = gv_find_drive(sc, vhdr->label.name);
 
 		/* We already know about this drive. */
 		if (d != NULL) {
 			/* Check if this drive already has a geom. */
 			if (d->geom != NULL) {
 				g_topology_unlock();
 				break;
 			}
 			bcopy(vhdr, d->hdr, sizeof(*vhdr));
 
 		/* This is a new drive. */
 		} else {
 			d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
 
 			/* Initialize all needed variables. */
 			d->size = pp->mediasize - GV_DATA_START;
 			d->avail = d->size;
 			d->hdr = vhdr;
 			strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME);
 			LIST_INIT(&d->subdisks);
 			LIST_INIT(&d->freelist);
 
 			/* We also need a freelist entry. */
 			fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
 			fl->offset = GV_DATA_START;
 			fl->size = d->avail;
 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
 			d->freelist_entries = 1;
 
 			TAILQ_INIT(&d->bqueue);
 
 			/* Save it into the main configuration. */
 			LIST_INSERT_HEAD(&sc->drives, d, drive);
 		}
 
 		/*
 		 * Create a bio queue mutex and a worker thread, if necessary.
 		 */
 		if (mtx_initialized(&d->bqueue_mtx) == 0)
 			mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
 
 		if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) {
 			kthread_create(gv_drive_worker, d, NULL, 0, 0,
 			    "gv_d %s", d->name);
 			d->flags |= GV_DRIVE_THREAD_ACTIVE;
 		}
 
 		g_access(cp, -1, 0, 0);
 
 		gp->softc = d;
 		d->geom = gp;
 		d->vinumconf = sc;
 		strncpy(d->device, pp->name, GV_MAXDRIVENAME);
 
 		/*
 		 * Find out which subdisks belong to this drive and crosslink
 		 * them.
 		 */
 		LIST_FOREACH(s, &sc->subdisks, sd) {
 			if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME))
 				/* XXX: errors ignored */
 				gv_sd_to_drive(sc, d, s, errstr,
 				    sizeof(errstr));
 		}
 
 		/* This drive is now up for sure. */
 		gv_set_drive_state(d, GV_DRIVE_UP, 0);
 
 		/*
 		 * If there are subdisks on this drive, we need to create
 		 * providers for them.
 		 */ 
 		if (d->sdcount)
 			gv_drive_modify(d);
 
 		return (gp);
 
 	} while (0);
 
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	return (NULL);
 }
 
 /*
  * Modify the providers for the given drive 'd'.  It is assumed that the
  * subdisk list of 'd' is already correctly set up.
  */
 void
 gv_drive_modify(struct gv_drive *d)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_provider *pp, *pp2;
 	struct gv_sd *s;
 
 	KASSERT(d != NULL, ("gv_drive_modify: null d"));
 	gp = d->geom;
 	KASSERT(gp != NULL, ("gv_drive_modify: null gp"));
 	cp = LIST_FIRST(&gp->consumer);
 	KASSERT(cp != NULL, ("gv_drive_modify: null cp"));
 	pp = cp->provider;
 	KASSERT(pp != NULL, ("gv_drive_modify: null pp"));
 
 	g_topology_assert();
 
 	LIST_FOREACH(s, &d->subdisks, from_drive) {
 		/* This subdisk already has a provider. */
 		if (s->provider != NULL)
 			continue;
 		pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name);
 		pp2->mediasize = s->size;
 		pp2->sectorsize = pp->sectorsize;
 		g_error_provider(pp2, 0);
 		s->provider = pp2;
 		pp2->private = s;
 	}
 }
 
 static void
 gv_drive_dead(void *arg, int flag)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct gv_drive *d;
 	struct gv_sd *s;
 
 	g_topology_assert();
 	KASSERT(arg != NULL, ("gv_drive_dead: NULL arg"));
 
 	if (flag == EV_CANCEL)
 		return;
 
 	d = arg;
 	if (d->state != GV_DRIVE_DOWN)
 		return;
 
 	g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name);
 
 	gp = d->geom;
 	if (gp == NULL)
 		return;
 
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (cp->nstart != cp->nend) {
 			printf("GEOM_VINUM: dead drive '%s' has still "
 			    "active requests, can't detach consumer\n",
 			    d->name);
 			g_post_event(gv_drive_dead, d, M_WAITOK, d,
 			    NULL);
 			return;
 		}
 		if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
 			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	}
 
 	printf("GEOM_VINUM: lost drive '%s'\n", d->name);
 	d->geom = NULL;
 	LIST_FOREACH(s, &d->subdisks, from_drive) {
 		s->provider = NULL;
 		s->consumer = NULL;
 	}
 	gv_kill_drive_thread(d);
 	gp->softc = NULL;
 	g_wither_geom(gp, ENXIO);
 }
 
 static int
 gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp)
 {
 	struct gv_drive *d;
 
 	g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name);
 	g_topology_assert();
 
 	d = gp->softc;
 	gv_kill_drive_thread(d);
 
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }
 
 #define	VINUMDRIVE_CLASS_NAME "VINUMDRIVE"
 
 static struct g_class g_vinum_drive_class = {
 	.name = VINUMDRIVE_CLASS_NAME,
 	.version = G_VERSION,
 	.taste = gv_drive_taste,
 	.destroy_geom = gv_drive_destroy_geom
 };
 
 DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive);