Index: stable/9/sys/geom/geom_ctl.c
===================================================================
--- stable/9/sys/geom/geom_ctl.c	(revision 299397)
+++ stable/9/sys/geom/geom_ctl.c	(revision 299398)
@@ -1,513 +1,513 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_geom.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/bio.h>
 #include <sys/conf.h>
 #include <sys/disk.h>
 #include <sys/malloc.h>
 #include <sys/sysctl.h>
 #include <sys/sbuf.h>
 
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 #define GCTL_TABLE 1
 #include <geom/geom_ctl.h>
 
 #include <machine/stdarg.h>
 
 static d_ioctl_t g_ctl_ioctl;
 
 static struct cdevsw g_ctl_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_flags =	D_NEEDGIANT,
 	.d_ioctl =	g_ctl_ioctl,
 	.d_name =	"g_ctl",
 };
 
 void
 g_ctl_init(void)
 {
 
 	make_dev_credf(MAKEDEV_ETERNAL, &g_ctl_cdevsw, 0, NULL,
 	    UID_ROOT, GID_OPERATOR, 0640, PATH_GEOM_CTL);
 	KASSERT(GCTL_PARAM_RD == VM_PROT_READ,
 		("GCTL_PARAM_RD != VM_PROT_READ"));
 	KASSERT(GCTL_PARAM_WR == VM_PROT_WRITE,
 		("GCTL_PARAM_WR != VM_PROT_WRITE"));
 }
 
 /*
  * Report an error back to the user in ascii format.  Return nerror
  * or EINVAL if nerror isn't specified.
  */
 int
 gctl_error(struct gctl_req *req, const char *fmt, ...)
 {
 	va_list ap;
 
 	if (req == NULL)
 		return (EINVAL);
 
 	/* We only record the first error */
 	if (sbuf_done(req->serror)) {
 		if (!req->nerror)
 			req->nerror = EEXIST;
 		return (req->nerror);
 	}
 	if (!req->nerror)
 		req->nerror = EINVAL;
 
 	va_start(ap, fmt);
 	sbuf_vprintf(req->serror, fmt, ap);
 	va_end(ap);
 	sbuf_finish(req->serror);
 	if (g_debugflags & G_F_CTLDUMP)
 		printf("gctl %p error \"%s\"\n", req, sbuf_data(req->serror));
 	return (req->nerror);
 }
 
 /*
  * Allocate space and copyin() something.
  * XXX: this should really be a standard function in the kernel.
  */
 static void *
 geom_alloc_copyin(struct gctl_req *req, void *uaddr, size_t len)
 {
 	void *ptr;
 
 	ptr = g_malloc(len, M_WAITOK);
 	req->nerror = copyin(uaddr, ptr, len);
 	if (!req->nerror)
 		return (ptr);
 	g_free(ptr);
 	return (NULL);
 }
 
 static void
 gctl_copyin(struct gctl_req *req)
 {
 	struct gctl_req_arg *ap;
 	char *p;
-	int i;
+	u_int i;
 
 	ap = geom_alloc_copyin(req, req->arg, req->narg * sizeof(*ap));
 	if (ap == NULL) {
 		gctl_error(req, "bad control request");
 		req->arg = NULL;
 		return;
 	}
 
 	/* Nothing have been copyin()'ed yet */
 	for (i = 0; i < req->narg; i++) {
 		ap[i].flag &= ~(GCTL_PARAM_NAMEKERNEL|GCTL_PARAM_VALUEKERNEL);
 		ap[i].flag &= ~GCTL_PARAM_CHANGED;
 		ap[i].kvalue = NULL;
 	}
 
 	for (i = 0; i < req->narg; i++) {
 		if (ap[i].nlen < 1 || ap[i].nlen > SPECNAMELEN) {
 			gctl_error(req,
 			    "wrong param name length %d: %d", i, ap[i].nlen);
 			break;
 		}
 		p = geom_alloc_copyin(req, ap[i].name, ap[i].nlen);
 		if (p == NULL)
 			break;
 		if (p[ap[i].nlen - 1] != '\0') {
 			gctl_error(req, "unterminated param name");
 			g_free(p);
 			break;
 		}
 		ap[i].name = p;
 		ap[i].flag |= GCTL_PARAM_NAMEKERNEL;
 		if (ap[i].len <= 0) {
 			gctl_error(req, "negative param length");
 			break;
 		}
 		p = geom_alloc_copyin(req, ap[i].value, ap[i].len);
 		if (p == NULL)
 			break;
 		if ((ap[i].flag & GCTL_PARAM_ASCII) &&
 		    p[ap[i].len - 1] != '\0') {
 			gctl_error(req, "unterminated param value");
 			g_free(p);
 			break;
 		}
 		ap[i].kvalue = p;
 		ap[i].flag |= GCTL_PARAM_VALUEKERNEL;
 	}
 	req->arg = ap;
 	return;
 }
 
 static void
 gctl_copyout(struct gctl_req *req)
 {
 	int error, i;
 	struct gctl_req_arg *ap;
 
 	if (req->nerror)
 		return;
 	error = 0;
 	ap = req->arg;
 	for (i = 0; i < req->narg; i++, ap++) {
 		if (!(ap->flag & GCTL_PARAM_CHANGED))
 			continue;
 		error = copyout(ap->kvalue, ap->value, ap->len);
 		if (!error)
 			continue;
 		req->nerror = error;
 		return;
 	}
 	return;
 }
 
 static void
 gctl_free(struct gctl_req *req)
 {
-	int i;
+	u_int i;
 
 	sbuf_delete(req->serror);
 	if (req->arg == NULL)
 		return;
 	for (i = 0; i < req->narg; i++) {
 		if (req->arg[i].flag & GCTL_PARAM_NAMEKERNEL)
 			g_free(req->arg[i].name);
 		if ((req->arg[i].flag & GCTL_PARAM_VALUEKERNEL) &&
 		    req->arg[i].len > 0)
 			g_free(req->arg[i].kvalue);
 	}
 	g_free(req->arg);
 }
 
 static void
 gctl_dump(struct gctl_req *req)
 {
 	struct gctl_req_arg *ap;
 	u_int i;
 	int j;
 
 	printf("Dump of gctl request at %p:\n", req);
 	if (req->nerror > 0) {
 		printf("  nerror:\t%d\n", req->nerror);
 		if (sbuf_len(req->serror) > 0)
 			printf("  error:\t\"%s\"\n", sbuf_data(req->serror));
 	}
 	if (req->arg == NULL)
 		return;
 	for (i = 0; i < req->narg; i++) {
 		ap = &req->arg[i];
 		if (!(ap->flag & GCTL_PARAM_NAMEKERNEL))
 			printf("  param:\t%d@%p", ap->nlen, ap->name);
 		else
 			printf("  param:\t\"%s\"", ap->name);
 		printf(" [%s%s%d] = ",
 		    ap->flag & GCTL_PARAM_RD ? "R" : "",
 		    ap->flag & GCTL_PARAM_WR ? "W" : "",
 		    ap->len);
 		if (!(ap->flag & GCTL_PARAM_VALUEKERNEL)) {
 			printf(" =@ %p", ap->value);
 		} else if (ap->flag & GCTL_PARAM_ASCII) {
 			printf("\"%s\"", (char *)ap->kvalue);
 		} else if (ap->len > 0) {
 			for (j = 0; j < ap->len && j < 512; j++)
 				printf(" %02x", ((u_char *)ap->kvalue)[j]);
 		} else {
 			printf(" = %p", ap->kvalue);
 		}
 		printf("\n");
 	}
 }
 
 int
 gctl_set_param(struct gctl_req *req, const char *param, void const *ptr,
     int len)
 {
-	int i;
+	u_int i;
 	struct gctl_req_arg *ap;
 
 	for (i = 0; i < req->narg; i++) {
 		ap = &req->arg[i];
 		if (strcmp(param, ap->name))
 			continue;
 		if (!(ap->flag & GCTL_PARAM_WR))
 			return (EPERM);
 		ap->flag |= GCTL_PARAM_CHANGED;
 		if (ap->len < len) {
 			bcopy(ptr, ap->kvalue, ap->len);
 			return (ENOSPC);
 		}
 		bcopy(ptr, ap->kvalue, len);
 		return (0);
 	}
 	return (EINVAL);
 }
 
 void
 gctl_set_param_err(struct gctl_req *req, const char *param, void const *ptr,
     int len)
 {
 
 	switch (gctl_set_param(req, param, ptr, len)) {
 	case EPERM:
 		gctl_error(req, "No write access %s argument", param);
 		break;
 	case ENOSPC:
 		gctl_error(req, "Wrong length %s argument", param);
 		break;
 	case EINVAL:
 		gctl_error(req, "Missing %s argument", param);
 		break;
 	}
 }
 
 void *
 gctl_get_param(struct gctl_req *req, const char *param, int *len)
 {
-	int i;
+	u_int i;
 	void *p;
 	struct gctl_req_arg *ap;
 
 	for (i = 0; i < req->narg; i++) {
 		ap = &req->arg[i];
 		if (strcmp(param, ap->name))
 			continue;
 		if (!(ap->flag & GCTL_PARAM_RD))
 			continue;
 		p = ap->kvalue;
 		if (len != NULL)
 			*len = ap->len;
 		return (p);
 	}
 	return (NULL);
 }
 
 char const *
 gctl_get_asciiparam(struct gctl_req *req, const char *param)
 {
-	int i;
+	u_int i;
 	char const *p;
 	struct gctl_req_arg *ap;
 
 	for (i = 0; i < req->narg; i++) {
 		ap = &req->arg[i];
 		if (strcmp(param, ap->name))
 			continue;
 		if (!(ap->flag & GCTL_PARAM_RD))
 			continue;
 		p = ap->kvalue;
 		if (ap->len < 1) {
 			gctl_error(req, "No length argument (%s)", param);
 			return (NULL);
 		}
 		if (p[ap->len - 1] != '\0') {
 			gctl_error(req, "Unterminated argument (%s)", param);
 			return (NULL);
 		}
 		return (p);
 	}
 	return (NULL);
 }
 
 void *
 gctl_get_paraml(struct gctl_req *req, const char *param, int len)
 {
 	int i;
 	void *p;
 
 	p = gctl_get_param(req, param, &i);
 	if (p == NULL)
 		gctl_error(req, "Missing %s argument", param);
 	else if (i != len) {
 		p = NULL;
 		gctl_error(req, "Wrong length %s argument", param);
 	}
 	return (p);
 }
 
 struct g_class *
 gctl_get_class(struct gctl_req *req, char const *arg)
 {
 	char const *p;
 	struct g_class *cp;
 
 	p = gctl_get_asciiparam(req, arg);
 	if (p == NULL)
 		return (NULL);
 	LIST_FOREACH(cp, &g_classes, class) {
 		if (!strcmp(p, cp->name))
 			return (cp);
 	}
 	return (NULL);
 }
 
 struct g_geom *
 gctl_get_geom(struct gctl_req *req, struct g_class *mpr, char const *arg)
 {
 	char const *p;
 	struct g_class *mp;
 	struct g_geom *gp;
 
 	p = gctl_get_asciiparam(req, arg);
 	if (p == NULL)
 		return (NULL);
 	LIST_FOREACH(mp, &g_classes, class) {
 		if (mpr != NULL && mpr != mp)
 			continue;
 		LIST_FOREACH(gp, &mp->geom, geom) {
 			if (!strcmp(p, gp->name))
 				return (gp);
 		}
 	}
 	gctl_error(req, "Geom not found: \"%s\"", p);
 	return (NULL);
 }
 
 struct g_provider *
 gctl_get_provider(struct gctl_req *req, char const *arg)
 {
 	char const *p;
 	struct g_provider *pp;
 
 	p = gctl_get_asciiparam(req, arg);
 	if (p == NULL)
 		return (NULL);
 	pp = g_provider_by_name(p);
 	if (pp != NULL)
 		return (pp);
 	gctl_error(req, "Provider not found: \"%s\"", p);
 	return (NULL);
 }
 
 static void
 g_ctl_req(void *arg, int flag __unused)
 {
 	struct g_class *mp;
 	struct gctl_req *req;
 	char const *verb;
 
 	g_topology_assert();
 	req = arg;
 	mp = gctl_get_class(req, "class");
 	if (mp == NULL) {
 		gctl_error(req, "Class not found");
 		return;
 	}
 	if (mp->ctlreq == NULL) {
 		gctl_error(req, "Class takes no requests");
 		return;
 	}
 	verb = gctl_get_param(req, "verb", NULL);
 	if (verb == NULL) {
 		gctl_error(req, "Verb missing");
 		return;
 	}
 	mp->ctlreq(req, mp, verb);
 	g_topology_assert();
 }
 
 
 static int
 g_ctl_ioctl_ctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
 {
 	struct gctl_req *req;
 	int nerror;
 
 	req = (void *)data;
 	req->nerror = 0;
 	/* It is an error if we cannot return an error text */
 	if (req->lerror < 2)
 		return (EINVAL);
 	if (!useracc(req->error, req->lerror, VM_PROT_WRITE))
 		return (EINVAL);
 
 	req->serror = sbuf_new_auto();
 	/* Check the version */
 	if (req->version != GCTL_VERSION) {
 		gctl_error(req, "kernel and libgeom version mismatch.");
 		req->arg = NULL;
 	} else {
 		/* Get things on board */
 		gctl_copyin(req);
 
 		if (g_debugflags & G_F_CTLDUMP)
 			gctl_dump(req);
 
 		if (!req->nerror) {
 			g_waitfor_event(g_ctl_req, req, M_WAITOK, NULL);
 			gctl_copyout(req);
 		}
 	}
 	if (sbuf_done(req->serror)) {
 		copyout(sbuf_data(req->serror), req->error,
 		    imin(req->lerror, sbuf_len(req->serror) + 1));
 	}
 
 	nerror = req->nerror;
 	gctl_free(req);
 	return (nerror);
 }
 
 static int
 g_ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
 {
 	int error;
 
 	switch(cmd) {
 	case GEOM_CTL:
 		error = g_ctl_ioctl_ctl(dev, cmd, data, fflag, td);
 		break;
 	default:
 		error = ENOIOCTL;
 		break;
 	}
 	return (error);
 
 }
Index: stable/9/sys/geom/mirror/g_mirror.c
===================================================================
--- stable/9/sys/geom/mirror/g_mirror.c	(revision 299397)
+++ stable/9/sys/geom/mirror/g_mirror.c	(revision 299398)
@@ -1,3297 +1,3297 @@
 /*-
  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/eventhandler.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/sched.h>
 #include <geom/mirror/g_mirror.h>
 
 FEATURE(geom_mirror, "GEOM mirroring support");
 
 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
     "GEOM_MIRROR stuff");
 u_int g_mirror_debug = 0;
 TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
     "Debug level");
 static u_int g_mirror_timeout = 4;
 TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
     0, "Time to wait on all mirror components");
 static u_int g_mirror_idletime = 5;
 TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
     &g_mirror_idletime, 0, "Mark components as clean when idling");
 static u_int g_mirror_disconnect_on_failure = 1;
 TUNABLE_INT("kern.geom.mirror.disconnect_on_failure",
     &g_mirror_disconnect_on_failure);
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RW,
     &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
 static u_int g_mirror_syncreqs = 2;
 TUNABLE_INT("kern.geom.mirror.sync_requests", &g_mirror_syncreqs);
 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
     &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
 
 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
 } while (0)
 
 static eventhandler_tag g_mirror_post_sync = NULL;
 static int g_mirror_shutdown = 0;
 
 static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static g_taste_t g_mirror_taste;
 static void g_mirror_init(struct g_class *mp);
 static void g_mirror_fini(struct g_class *mp);
 
 struct g_class g_mirror_class = {
 	.name = G_MIRROR_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_mirror_config,
 	.taste = g_mirror_taste,
 	.destroy_geom = g_mirror_destroy_geom,
 	.init = g_mirror_init,
 	.fini = g_mirror_fini
 };
 
 
 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
 static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
 static void g_mirror_register_request(struct bio *bp);
 static void g_mirror_sync_release(struct g_mirror_softc *sc);
 
 
 static const char *
 g_mirror_disk_state2str(int state)
 {
 
 	switch (state) {
 	case G_MIRROR_DISK_STATE_NONE:
 		return ("NONE");
 	case G_MIRROR_DISK_STATE_NEW:
 		return ("NEW");
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		return ("ACTIVE");
 	case G_MIRROR_DISK_STATE_STALE:
 		return ("STALE");
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		return ("SYNCHRONIZING");
 	case G_MIRROR_DISK_STATE_DISCONNECTED:
 		return ("DISCONNECTED");
 	case G_MIRROR_DISK_STATE_DESTROY:
 		return ("DESTROY");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_mirror_device_state2str(int state)
 {
 
 	switch (state) {
 	case G_MIRROR_DEVICE_STATE_STARTING:
 		return ("STARTING");
 	case G_MIRROR_DEVICE_STATE_RUNNING:
 		return ("RUNNING");
 	default:
 		return ("INVALID");
 	}
 }
 
 static const char *
 g_mirror_get_diskname(struct g_mirror_disk *disk)
 {
 
 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
 		return ("[unknown]");
 	return (disk->d_name);
 }
 
 /*
  * --- Events handling functions ---
  * Events in geom_mirror are used to maintain disks and device status
  * from one thread to simplify locking.
  */
 static void
 g_mirror_event_free(struct g_mirror_event *ep)
 {
 
 	free(ep, M_MIRROR);
 }
 
 int
 g_mirror_event_send(void *arg, int state, int flags)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	struct g_mirror_event *ep;
 	int error;
 
 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
 		disk = NULL;
 		sc = arg;
 	} else {
 		disk = arg;
 		sc = disk->d_softc;
 	}
 	ep->e_disk = disk;
 	ep->e_state = state;
 	ep->e_flags = flags;
 	ep->e_error = 0;
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 		return (0);
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
 	sx_xunlock(&sc->sc_lock);
 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
 		mtx_lock(&sc->sc_events_mtx);
 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
 		    hz * 5);
 	}
 	error = ep->e_error;
 	g_mirror_event_free(ep);
 	sx_xlock(&sc->sc_lock);
 	return (error);
 }
 
 static struct g_mirror_event *
 g_mirror_event_get(struct g_mirror_softc *sc)
 {
 	struct g_mirror_event *ep;
 
 	mtx_lock(&sc->sc_events_mtx);
 	ep = TAILQ_FIRST(&sc->sc_events);
 	mtx_unlock(&sc->sc_events_mtx);
 	return (ep);
 }
 
 static void
 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
 {
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 static void
 g_mirror_event_cancel(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_event *ep, *tmpep;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	mtx_lock(&sc->sc_events_mtx);
 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
 			continue;
 		if (ep->e_disk != disk)
 			continue;
 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 			g_mirror_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			wakeup(ep);
 		}
 	}
 	mtx_unlock(&sc->sc_events_mtx);
 }
 
 /*
  * Return the number of disks in given state.
  * If state is equal to -1, count all connected disks.
  */
 u_int
 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
 {
 	struct g_mirror_disk *disk;
 	u_int n = 0;
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (state == -1 || disk->d_state == state)
 			n++;
 	}
 	return (n);
 }
 
 /*
  * Find a disk in mirror by its disk ID.
  */
 static struct g_mirror_disk *
 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
 {
 	struct g_mirror_disk *disk;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_id == id)
 			return (disk);
 	}
 	return (NULL);
 }
 
 static u_int
 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 	struct bio *bp;
 	u_int nreqs = 0;
 
 	mtx_lock(&sc->sc_queue_mtx);
 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 		if (bp->bio_from == cp)
 			nreqs++;
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	return (nreqs);
 }
 
 static int
 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 
 	if (cp->index > 0) {
 		G_MIRROR_DEBUG(2,
 		    "I/O requests for %s exist, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	if (g_mirror_nrequests(sc, cp) > 0) {
 		G_MIRROR_DEBUG(2,
 		    "I/O requests for %s in queue, can't destroy it now.",
 		    cp->provider->name);
 		return (1);
 	}
 	return (0);
 }
 
 static void
 g_mirror_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	cp = arg;
 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static void
 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	int retaste_wait;
 
 	g_topology_assert();
 
 	cp->private = NULL;
 	if (g_mirror_is_busy(sc, cp))
 		return;
 	pp = cp->provider;
 	retaste_wait = 0;
 	if (cp->acw == 1) {
 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
 			retaste_wait = 1;
 	}
 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
 	    -cp->acw, -cp->ace, 0);
 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
 	if (retaste_wait) {
 		/*
 		 * After retaste event was send (inside g_access()), we can send
 		 * event to detach and destroy consumer.
 		 * A class, which has consumer to the given provider connected
 		 * will not receive retaste event for the provider.
 		 * This is the way how I ignore retaste events when I close
 		 * consumers opened for write: I detach and destroy consumer
 		 * after retaste event is sent.
 		 */
 		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
 		return;
 	}
 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 static int
 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
 {
 	struct g_consumer *cp;
 	int error;
 
 	g_topology_assert_not();
 	KASSERT(disk->d_consumer == NULL,
 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
 
 	g_topology_lock();
 	cp = g_new_consumer(disk->d_softc->sc_geom);
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		return (error);
 	}
 	error = g_access(cp, 1, 1, 1);
 	if (error != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		g_topology_unlock();
 		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
 		    pp->name, error);
 		return (error);
 	}
 	g_topology_unlock();
 	disk->d_consumer = cp;
 	disk->d_consumer->private = disk;
 	disk->d_consumer->index = 0;
 
 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
 	return (0);
 }
 
 static void
 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
 {
 
 	g_topology_assert();
 
 	if (cp == NULL)
 		return;
 	if (cp->provider != NULL)
 		g_mirror_kill_consumer(sc, cp);
 	else
 		g_destroy_consumer(cp);
 }
 
 /*
  * Initialize disk. This means allocate memory, create consumer, attach it
  * to the provider and open access (r1w1e1) to it.
  */
 static struct g_mirror_disk *
 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md, int *errorp)
 {
 	struct g_mirror_disk *disk;
 	int i, error;
 
 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
 	if (disk == NULL) {
 		error = ENOMEM;
 		goto fail;
 	}
 	disk->d_softc = sc;
 	error = g_mirror_connect_disk(disk, pp);
 	if (error != 0)
 		goto fail;
 	disk->d_id = md->md_did;
 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
 	disk->d_priority = md->md_priority;
 	disk->d_flags = md->md_dflags;
 	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
 	if (error == 0 && i != 0)
 		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
 	if (md->md_provider[0] != '\0')
 		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_sync.ds_offset = md->md_sync_offset;
 	disk->d_sync.ds_offset_done = md->md_sync_offset;
 	disk->d_genid = md->md_genid;
 	disk->d_sync.ds_syncid = md->md_syncid;
 	if (errorp != NULL)
 		*errorp = 0;
 	return (disk);
 fail:
 	if (errorp != NULL)
 		*errorp = error;
 	if (disk != NULL)
 		free(disk, M_MIRROR);
 	return (NULL);
 }
 
 static void
 g_mirror_destroy_disk(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	LIST_REMOVE(disk, d_next);
 	g_mirror_event_cancel(disk);
 	if (sc->sc_hint == disk)
 		sc->sc_hint = NULL;
 	switch (disk->d_state) {
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		g_mirror_sync_stop(disk, 1);
 		/* FALLTHROUGH */
 	case G_MIRROR_DISK_STATE_NEW:
 	case G_MIRROR_DISK_STATE_STALE:
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		g_topology_lock();
 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
 		g_topology_unlock();
 		free(disk, M_MIRROR);
 		break;
 	default:
 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 	}
 }
 
 static void
 g_mirror_destroy_device(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct g_mirror_event *ep;
 	struct g_geom *gp;
 	struct g_consumer *cp, *tmpcp;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	gp = sc->sc_geom;
 	if (sc->sc_provider != NULL)
 		g_mirror_destroy_provider(sc);
 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
 	    disk = LIST_FIRST(&sc->sc_disks)) {
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 		g_mirror_destroy_disk(disk);
 	}
 	while ((ep = g_mirror_event_get(sc)) != NULL) {
 		g_mirror_event_remove(sc, ep);
 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
 			g_mirror_event_free(ep);
 		else {
 			ep->e_error = ECANCELED;
 			ep->e_flags |= G_MIRROR_EVENT_DONE;
 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
 			mtx_lock(&sc->sc_events_mtx);
 			wakeup(ep);
 			mtx_unlock(&sc->sc_events_mtx);
 		}
 	}
 	callout_drain(&sc->sc_callout);
 
 	g_topology_lock();
 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
 		g_mirror_disconnect_consumer(sc, cp);
 	}
 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom(gp, ENXIO);
 	g_topology_unlock();
 	mtx_destroy(&sc->sc_queue_mtx);
 	mtx_destroy(&sc->sc_events_mtx);
 	sx_xunlock(&sc->sc_lock);
 	sx_destroy(&sc->sc_lock);
 }
 
 static void
 g_mirror_orphan(struct g_consumer *cp)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert();
 
 	disk = cp->private;
 	if (disk == NULL)
 		return;
 	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
 	    G_MIRROR_EVENT_DONTWAIT);
 }
 
 /*
  * Function should return the next active disk on the list.
  * It is possible that it will be the same disk as given.
  * If there are no active disks on list, NULL is returned.
  */
 static __inline struct g_mirror_disk *
 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
 {
 	struct g_mirror_disk *dp;
 
 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
 	    dp = LIST_NEXT(dp, d_next)) {
 		if (dp == NULL)
 			dp = LIST_FIRST(&sc->sc_disks);
 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
 			break;
 	}
 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 		return (NULL);
 	return (dp);
 }
 
 static struct g_mirror_disk *
 g_mirror_get_disk(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	if (sc->sc_hint == NULL) {
 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
 		if (sc->sc_hint == NULL)
 			return (NULL);
 	}
 	disk = sc->sc_hint;
 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
 		disk = g_mirror_find_next(sc, disk);
 		if (disk == NULL)
 			return (NULL);
 	}
 	sc->sc_hint = g_mirror_find_next(sc, disk);
 	return (disk);
 }
 
 static int
 g_mirror_write_metadata(struct g_mirror_disk *disk,
     struct g_mirror_metadata *md)
 {
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 	off_t offset, length;
 	u_char *sector;
 	int error = 0;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	cp = disk->d_consumer;
 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	length = cp->provider->sectorsize;
 	offset = cp->provider->mediasize - length;
 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
 	if (md != NULL &&
 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
 		mirror_metadata_encode(md, sector);
 	error = g_write_data(cp, offset, sector, length);
 	free(sector, M_MIRROR);
 	if (error != 0) {
 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
 			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
 			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_mirror_get_diskname(disk), sc->sc_name, error);
 		} else {
 			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
 			    "(device=%s, error=%d).",
 			    g_mirror_get_diskname(disk), sc->sc_name, error);
 		}
 		if (g_mirror_disconnect_on_failure &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 			g_mirror_event_send(disk,
 			    G_MIRROR_DISK_STATE_DISCONNECTED,
 			    G_MIRROR_EVENT_DONTWAIT);
 		}
 	}
 	return (error);
 }
 
 static int
 g_mirror_clear_metadata(struct g_mirror_disk *disk)
 {
 	int error;
 
 	g_topology_assert_not();
 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
 
 	error = g_mirror_write_metadata(disk, NULL);
 	if (error == 0) {
 		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
 		    g_mirror_get_diskname(disk));
 	} else {
 		G_MIRROR_DEBUG(0,
 		    "Cannot clear metadata on disk %s (error=%d).",
 		    g_mirror_get_diskname(disk), error);
 	}
 	return (error);
 }
 
 void
 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
     struct g_mirror_metadata *md)
 {
 
 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
 	md->md_version = G_MIRROR_VERSION;
 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
 	md->md_mid = sc->sc_id;
 	md->md_all = sc->sc_ndisks;
 	md->md_slice = sc->sc_slice;
 	md->md_balance = sc->sc_balance;
 	md->md_genid = sc->sc_genid;
 	md->md_mediasize = sc->sc_mediasize;
 	md->md_sectorsize = sc->sc_sectorsize;
 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
 	bzero(md->md_provider, sizeof(md->md_provider));
 	if (disk == NULL) {
 		md->md_did = arc4random();
 		md->md_priority = 0;
 		md->md_syncid = 0;
 		md->md_dflags = 0;
 		md->md_sync_offset = 0;
 		md->md_provsize = 0;
 	} else {
 		md->md_did = disk->d_id;
 		md->md_priority = disk->d_priority;
 		md->md_syncid = disk->d_sync.ds_syncid;
 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			md->md_sync_offset = disk->d_sync.ds_offset_done;
 		else
 			md->md_sync_offset = 0;
 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
 			strlcpy(md->md_provider,
 			    disk->d_consumer->provider->name,
 			    sizeof(md->md_provider));
 		}
 		md->md_provsize = disk->d_consumer->provider->mediasize;
 	}
 }
 
 void
 g_mirror_update_metadata(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_metadata md;
 	int error;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
 		g_mirror_fill_metadata(sc, disk, &md);
 	error = g_mirror_write_metadata(disk, &md);
 	if (error == 0) {
 		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
 		    g_mirror_get_diskname(disk));
 	} else {
 		G_MIRROR_DEBUG(0,
 		    "Cannot update metadata on disk %s (error=%d).",
 		    g_mirror_get_diskname(disk), error);
 	}
 }
 
 static void
 g_mirror_bump_syncid(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_syncid++;
 	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
 	    sc->sc_syncid);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_sync.ds_syncid = sc->sc_syncid;
 			g_mirror_update_metadata(disk);
 		}
 	}
 }
 
 static void
 g_mirror_bump_genid(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
 	    ("%s called with no active disks (device=%s).", __func__,
 	    sc->sc_name));
 
 	sc->sc_genid++;
 	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
 	    sc->sc_genid);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_genid = sc->sc_genid;
 			g_mirror_update_metadata(disk);
 		}
 	}
 }
 
 static int
 g_mirror_idle(struct g_mirror_softc *sc, int acw)
 {
 	struct g_mirror_disk *disk;
 	int timeout;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (sc->sc_provider == NULL)
 		return (0);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return (0);
 	if (sc->sc_idle)
 		return (0);
 	if (sc->sc_writes > 0)
 		return (0);
 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
 		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
 		if (!g_mirror_shutdown && timeout > 0)
 			return (timeout);
 	}
 	sc->sc_idle = 1;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_mirror_unidle(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 
 	g_topology_assert_not();
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	sc->sc_idle = 0;
 	sc->sc_last_write = time_uptime;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 		g_mirror_update_metadata(disk);
 	}
 }
 
 static void
 g_mirror_done(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 }
 
 static void
 g_mirror_regular_request(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	struct bio *pbp;
 
 	g_topology_assert_not();
 
 	pbp = bp->bio_parent;
 	sc = pbp->bio_to->geom->softc;
 	bp->bio_from->index--;
 	if (bp->bio_cmd == BIO_WRITE)
 		sc->sc_writes--;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		g_topology_lock();
 		g_mirror_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 	}
 
 	pbp->bio_inbed++;
 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
 	    pbp->bio_children));
 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
 		g_destroy_bio(bp);
 		if (pbp->bio_children == pbp->bio_inbed) {
 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
 			pbp->bio_completed = pbp->bio_length;
 			if (pbp->bio_cmd == BIO_WRITE ||
 			    pbp->bio_cmd == BIO_DELETE) {
 				bioq_remove(&sc->sc_inflight, pbp);
 				/* Release delayed sync requests if possible. */
 				g_mirror_sync_release(sc);
 			}
 			g_io_deliver(pbp, pbp->bio_error);
 		}
 		return;
 	} else if (bp->bio_error != 0) {
 		if (pbp->bio_error == 0)
 			pbp->bio_error = bp->bio_error;
 		if (disk != NULL) {
 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
 				disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
 				G_MIRROR_LOGREQ(0, bp,
 				    "Request failed (error=%d).",
 				    bp->bio_error);
 			} else {
 				G_MIRROR_LOGREQ(1, bp,
 				    "Request failed (error=%d).",
 				    bp->bio_error);
 			}
 			if (g_mirror_disconnect_on_failure &&
 			    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1)
 			{
 				sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 				g_mirror_event_send(disk,
 				    G_MIRROR_DISK_STATE_DISCONNECTED,
 				    G_MIRROR_EVENT_DONTWAIT);
 			}
 		}
 		switch (pbp->bio_cmd) {
 		case BIO_DELETE:
 		case BIO_WRITE:
 			pbp->bio_inbed--;
 			pbp->bio_children--;
 			break;
 		}
 	}
 	g_destroy_bio(bp);
 
 	switch (pbp->bio_cmd) {
 	case BIO_READ:
 		if (pbp->bio_inbed < pbp->bio_children)
 			break;
 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
 			g_io_deliver(pbp, pbp->bio_error);
 		else {
 			pbp->bio_error = 0;
 			mtx_lock(&sc->sc_queue_mtx);
 			bioq_insert_tail(&sc->sc_queue, pbp);
 			mtx_unlock(&sc->sc_queue_mtx);
 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 			wakeup(sc);
 		}
 		break;
 	case BIO_DELETE:
 	case BIO_WRITE:
 		if (pbp->bio_children == 0) {
 			/*
 			 * All requests failed.
 			 */
 		} else if (pbp->bio_inbed < pbp->bio_children) {
 			/* Do nothing. */
 			break;
 		} else if (pbp->bio_children == pbp->bio_inbed) {
 			/* Some requests succeeded. */
 			pbp->bio_error = 0;
 			pbp->bio_completed = pbp->bio_length;
 		}
 		bioq_remove(&sc->sc_inflight, pbp);
 		/* Release delayed sync requests if possible. */
 		g_mirror_sync_release(sc);
 		g_io_deliver(pbp, pbp->bio_error);
 		break;
 	default:
 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
 		break;
 	}
 }
 
 static void
 g_mirror_sync_done(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
 	sc = bp->bio_from->geom->softc;
 	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 }
 
 static void
 g_mirror_kernel_dump(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 	struct bio *cbp;
 	struct g_kerneldump *gkd;
 
 	/*
 	 * We configure dumping to the first component, because this component
 	 * will be used for reading with 'prefer' balance algorithm.
 	 * If the component with the higest priority is currently disconnected
 	 * we will not be able to read the dump after the reboot if it will be
 	 * connected and synchronized later. Can we do something better?
 	 */
 	sc = bp->bio_to->geom->softc;
 	disk = LIST_FIRST(&sc->sc_disks);
 
 	gkd = (struct g_kerneldump *)bp->bio_data;
 	if (gkd->length > bp->bio_to->mediasize)
 		gkd->length = bp->bio_to->mediasize;
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	cbp->bio_done = g_std_done;
 	g_io_request(cbp, disk->d_consumer);
 	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
 	    g_mirror_get_diskname(disk));
 }
 
 static void
 g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	bioq_init(&queue);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			for (cbp = bioq_first(&queue); cbp != NULL;
 			    cbp = bioq_first(&queue)) {
 				bioq_remove(&queue, cbp);
 				g_destroy_bio(cbp);
 			}
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		cbp->bio_done = g_std_done;
 		cbp->bio_caller1 = disk;
 		cbp->bio_to = disk->d_consumer->provider;
 	}
 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
 		bioq_remove(&queue, cbp);
 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 		disk = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		g_io_request(cbp, disk->d_consumer);
 	}
 }
 
 static void
 g_mirror_start(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	sc = bp->bio_to->geom->softc;
 	/*
 	 * If sc == NULL or there are no valid disks, provider's error
 	 * should be set and g_mirror_start() should not be called at all.
 	 */
 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 	    ("Provider's error should be set (error=%d)(mirror=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_MIRROR_LOGREQ(3, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	case BIO_FLUSH:
 		g_mirror_flush(sc, bp);
 		return;
 	case BIO_GETATTR:
 		if (g_handleattr_int(bp, "GEOM::candelete", 1))
 			return;
 		else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
 			g_mirror_kernel_dump(bp);
 			return;
 		}
 		/* FALLTHROUGH */
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, bp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	wakeup(sc);
 }
 
 /*
  * Return TRUE if the given request is colliding with a in-progress
  * synchronization request.
  */
 static int
 g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct bio *sbp;
 	off_t rstart, rend, sstart, send;
-	int i;
+	u_int i;
 
 	if (sc->sc_sync.ds_ndisks == 0)
 		return (0);
 	rstart = bp->bio_offset;
 	rend = bp->bio_offset + bp->bio_length;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			continue;
 		for (i = 0; i < g_mirror_syncreqs; i++) {
 			sbp = disk->d_sync.ds_bios[i];
 			if (sbp == NULL)
 				continue;
 			sstart = sbp->bio_offset;
 			send = sbp->bio_offset + sbp->bio_length;
 			if (rend > sstart && rstart < send)
 				return (1);
 		}
 	}
 	return (0);
 }
 
 /*
  * Return TRUE if the given sync request is colliding with a in-progress regular
  * request.
  */
 static int
 g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
 {
 	off_t rstart, rend, sstart, send;
 	struct bio *bp;
 
 	if (sc->sc_sync.ds_ndisks == 0)
 		return (0);
 	sstart = sbp->bio_offset;
 	send = sbp->bio_offset + sbp->bio_length;
 	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
 		rstart = bp->bio_offset;
 		rend = bp->bio_offset + bp->bio_length;
 		if (rend > sstart && rstart < send)
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Puts request onto delayed queue.
  */
 static void
 g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
 {
 
 	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
 	bioq_insert_head(&sc->sc_regular_delayed, bp);
 }
 
 /*
  * Puts synchronization request onto delayed queue.
  */
 static void
 g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
 {
 
 	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
 	bioq_insert_tail(&sc->sc_sync_delayed, bp);
 }
 
 /*
  * Releases delayed regular requests which don't collide anymore with sync
  * requests.
  */
 static void
 g_mirror_regular_release(struct g_mirror_softc *sc)
 {
 	struct bio *bp, *bp2;
 
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
 		if (g_mirror_sync_collision(sc, bp))
 			continue;
 		bioq_remove(&sc->sc_regular_delayed, bp);
 		G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
 		mtx_lock(&sc->sc_queue_mtx);
 		bioq_insert_head(&sc->sc_queue, bp);
 #if 0
 		/*
 		 * wakeup() is not needed, because this function is called from
 		 * the worker thread.
 		 */
 		wakeup(&sc->sc_queue);
 #endif
 		mtx_unlock(&sc->sc_queue_mtx);
 	}
 }
 
 /*
  * Releases delayed sync requests which don't collide anymore with regular
  * requests.
  */
 static void
 g_mirror_sync_release(struct g_mirror_softc *sc)
 {
 	struct bio *bp, *bp2;
 
 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
 		if (g_mirror_regular_collision(sc, bp))
 			continue;
 		bioq_remove(&sc->sc_sync_delayed, bp);
 		G_MIRROR_LOGREQ(2, bp,
 		    "Releasing delayed synchronization request.");
 		g_io_request(bp, bp->bio_from);
 	}
 }
 
 /*
  * Handle synchronization requests.
  * Every synchronization request is two-steps process: first, READ request is
  * send to active provider and then WRITE request (with read data) to the provider
  * beeing synchronized. When WRITE is finished, new synchronization request is
  * send.
  */
 static void
 g_mirror_sync_request(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_disk *disk;
 
 	bp->bio_from->index--;
 	sc = bp->bio_from->geom->softc;
 	disk = bp->bio_from->private;
 	if (disk == NULL) {
 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 		g_topology_lock();
 		g_mirror_kill_consumer(sc, bp->bio_from);
 		g_topology_unlock();
 		free(bp->bio_data, M_MIRROR);
 		g_destroy_bio(bp);
 		sx_xlock(&sc->sc_lock);
 		return;
 	}
 
 	/*
 	 * Synchronization request.
 	 */
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	    {
 		struct g_consumer *cp;
 
 		if (bp->bio_error != 0) {
 			G_MIRROR_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_destroy_bio(bp);
 			return;
 		}
 		G_MIRROR_LOGREQ(3, bp,
 		    "Synchronization request half-finished.");
 		bp->bio_cmd = BIO_WRITE;
 		bp->bio_cflags = 0;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		cp->index++;
 		g_io_request(bp, cp);
 		return;
 	    }
 	case BIO_WRITE:
 	    {
 		struct g_mirror_disk_sync *sync;
 		off_t offset;
 		void *data;
 		int i;
 
 		if (bp->bio_error != 0) {
 			G_MIRROR_LOGREQ(0, bp,
 			    "Synchronization request failed (error=%d).",
 			    bp->bio_error);
 			g_destroy_bio(bp);
 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
 			g_mirror_event_send(disk,
 			    G_MIRROR_DISK_STATE_DISCONNECTED,
 			    G_MIRROR_EVENT_DONTWAIT);
 			return;
 		}
 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
 		sync = &disk->d_sync;
 		if (sync->ds_offset == sc->sc_mediasize ||
 		    sync->ds_consumer == NULL ||
 		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 			/* Don't send more synchronization requests. */
 			sync->ds_inflight--;
 			if (sync->ds_bios != NULL) {
 				i = (int)(uintptr_t)bp->bio_caller1;
 				sync->ds_bios[i] = NULL;
 			}
 			free(bp->bio_data, M_MIRROR);
 			g_destroy_bio(bp);
 			if (sync->ds_inflight > 0)
 				return;
 			if (sync->ds_consumer == NULL ||
 			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				return;
 			}
 			/* Disk up-to-date, activate it. */
 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
 			    G_MIRROR_EVENT_DONTWAIT);
 			return;
 		}
 
 		/* Send next synchronization request. */
 		data = bp->bio_data;
 		bzero(bp, sizeof(*bp));
 		bp->bio_cmd = BIO_READ;
 		bp->bio_offset = sync->ds_offset;
 		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
 		sync->ds_offset += bp->bio_length;
 		bp->bio_done = g_mirror_sync_done;
 		bp->bio_data = data;
 		bp->bio_from = sync->ds_consumer;
 		bp->bio_to = sc->sc_provider;
 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
 		sync->ds_consumer->index++;
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_mirror_regular_collision(sc, bp))
 			g_mirror_sync_delay(sc, bp);
 		else
 			g_io_request(bp, sync->ds_consumer);
 
 		/* Release delayed requests if possible. */
 		g_mirror_regular_release(sc);
 
 		/* Find the smallest offset */
 		offset = sc->sc_mediasize;
 		for (i = 0; i < g_mirror_syncreqs; i++) {
 			bp = sync->ds_bios[i];
 			if (bp->bio_offset < offset)
 				offset = bp->bio_offset;
 		}
 		if (sync->ds_offset_done + (MAXPHYS * 100) < offset) {
 			/* Update offset_done on every 100 blocks. */
 			sync->ds_offset_done = offset;
 			g_mirror_update_metadata(disk);
 		}
 		return;
 	    }
 	default:
 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
 		    bp->bio_cmd, sc->sc_name));
 		break;
 	}
 }
 
 static void
 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
 			break;
 	}
 	if (disk == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENXIO;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	g_io_request(cbp, cp);
 }
 
 static void
 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	disk = g_mirror_get_disk(sc);
 	if (disk == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENXIO;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	g_io_request(cbp, cp);
 }
 
 #define TRACK_SIZE  (1 * 1024 * 1024)
 #define LOAD_SCALE	256
 #define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
 
 static void
 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct g_mirror_disk *disk, *dp;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	int prio, best;
 
 	/* Find a disk with the smallest load. */
 	disk = NULL;
 	best = INT_MAX;
 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		prio = dp->load;
 		/* If disk head is precisely in position - highly prefer it. */
 		if (dp->d_last_offset == bp->bio_offset)
 			prio -= 2 * LOAD_SCALE;
 		else
 		/* If disk head is close to position - prefer it. */
 		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
 			prio -= 1 * LOAD_SCALE;
 		if (prio <= best) {
 			disk = dp;
 			best = prio;
 		}
 	}
 	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		if (bp->bio_error == 0)
 			bp->bio_error = ENOMEM;
 		g_io_deliver(bp, bp->bio_error);
 		return;
 	}
 	/*
 	 * Fill in the component buf structure.
 	 */
 	cp = disk->d_consumer;
 	cbp->bio_done = g_mirror_done;
 	cbp->bio_to = cp->provider;
 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
 	    cp->acw, cp->ace));
 	cp->index++;
 	/* Remember last head position */
 	disk->d_last_offset = bp->bio_offset + bp->bio_length;
 	/* Update loads. */
 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 		dp->load = (dp->d_consumer->index * LOAD_SCALE +
 		    dp->load * 7) / 8;
 	}
 	g_io_request(cbp, cp);
 }
 
 static void
 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
 {
 	struct bio_queue_head queue;
 	struct g_mirror_disk *disk;
 	struct g_consumer *cp;
 	struct bio *cbp;
 	off_t left, mod, offset, slice;
 	u_char *data;
 	u_int ndisks;
 
 	if (bp->bio_length <= sc->sc_slice) {
 		g_mirror_request_round_robin(sc, bp);
 		return;
 	}
 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
 	slice = bp->bio_length / ndisks;
 	mod = slice % sc->sc_provider->sectorsize;
 	if (mod != 0)
 		slice += sc->sc_provider->sectorsize - mod;
 	/*
 	 * Allocate all bios before sending any request, so we can
 	 * return ENOMEM in nice and clean way.
 	 */
 	left = bp->bio_length;
 	offset = bp->bio_offset;
 	data = bp->bio_data;
 	bioq_init(&queue);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
 			continue;
 		cbp = g_clone_bio(bp);
 		if (cbp == NULL) {
 			for (cbp = bioq_first(&queue); cbp != NULL;
 			    cbp = bioq_first(&queue)) {
 				g_destroy_bio(cbp);
 				g_destroy_bio(cbp);
 			}
 			if (bp->bio_error == 0)
 				bp->bio_error = ENOMEM;
 			g_io_deliver(bp, bp->bio_error);
 			return;
 		}
 		bioq_insert_tail(&queue, cbp);
 		cbp->bio_done = g_mirror_done;
 		cbp->bio_caller1 = disk;
 		cbp->bio_to = disk->d_consumer->provider;
 		cbp->bio_offset = offset;
 		cbp->bio_data = data;
 		cbp->bio_length = MIN(left, slice);
 		left -= cbp->bio_length;
 		if (left == 0)
 			break;
 		offset += cbp->bio_length;
 		data += cbp->bio_length;
 	}
 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
 		bioq_remove(&queue, cbp);
 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 		disk = cbp->bio_caller1;
 		cbp->bio_caller1 = NULL;
 		cp = disk->d_consumer;
 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
 		    cp->acr, cp->acw, cp->ace));
 		disk->d_consumer->index++;
 		g_io_request(cbp, disk->d_consumer);
 	}
 }
 
 static void
 g_mirror_register_request(struct bio *bp)
 {
 	struct g_mirror_softc *sc;
 
 	sc = bp->bio_to->geom->softc;
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		switch (sc->sc_balance) {
 		case G_MIRROR_BALANCE_LOAD:
 			g_mirror_request_load(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_PREFER:
 			g_mirror_request_prefer(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_ROUND_ROBIN:
 			g_mirror_request_round_robin(sc, bp);
 			break;
 		case G_MIRROR_BALANCE_SPLIT:
 			g_mirror_request_split(sc, bp);
 			break;
 		}
 		return;
 	case BIO_WRITE:
 	case BIO_DELETE:
 	    {
 		struct g_mirror_disk *disk;
 		struct g_mirror_disk_sync *sync;
 		struct bio_queue_head queue;
 		struct g_consumer *cp;
 		struct bio *cbp;
 
 		/*
 		 * Delay the request if it is colliding with a synchronization
 		 * request.
 		 */
 		if (g_mirror_sync_collision(sc, bp)) {
 			g_mirror_regular_delay(sc, bp);
 			return;
 		}
 
 		if (sc->sc_idle)
 			g_mirror_unidle(sc);
 		else
 			sc->sc_last_write = time_uptime;
 
 		/*
 		 * Allocate all bios before sending any request, so we can
 		 * return ENOMEM in nice and clean way.
 		 */
 		bioq_init(&queue);
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			sync = &disk->d_sync;
 			switch (disk->d_state) {
 			case G_MIRROR_DISK_STATE_ACTIVE:
 				break;
 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 				if (bp->bio_offset >= sync->ds_offset)
 					continue;
 				break;
 			default:
 				continue;
 			}
 			if (bp->bio_cmd == BIO_DELETE &&
 			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
 				continue;
 			cbp = g_clone_bio(bp);
 			if (cbp == NULL) {
 				for (cbp = bioq_first(&queue); cbp != NULL;
 				    cbp = bioq_first(&queue)) {
 					bioq_remove(&queue, cbp);
 					g_destroy_bio(cbp);
 				}
 				if (bp->bio_error == 0)
 					bp->bio_error = ENOMEM;
 				g_io_deliver(bp, bp->bio_error);
 				return;
 			}
 			bioq_insert_tail(&queue, cbp);
 			cbp->bio_done = g_mirror_done;
 			cp = disk->d_consumer;
 			cbp->bio_caller1 = cp;
 			cbp->bio_to = cp->provider;
 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
 			    ("Consumer %s not opened (r%dw%de%d).",
 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
 		}
 		for (cbp = bioq_first(&queue); cbp != NULL;
 		    cbp = bioq_first(&queue)) {
 			bioq_remove(&queue, cbp);
 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
 			cp = cbp->bio_caller1;
 			cbp->bio_caller1 = NULL;
 			cp->index++;
 			sc->sc_writes++;
 			g_io_request(cbp, cp);
 		}
 		/*
 		 * Put request onto inflight queue, so we can check if new
 		 * synchronization requests don't collide with it.
 		 */
 		bioq_insert_tail(&sc->sc_inflight, bp);
 		/*
 		 * Bump syncid on first write.
 		 */
 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
 			g_mirror_bump_syncid(sc);
 		}
 		return;
 	    }
 	default:
 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
 		    bp->bio_cmd, sc->sc_name));
 		break;
 	}
 }
 
 static int
 g_mirror_can_destroy(struct g_mirror_softc *sc)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 	gp = sc->sc_geom;
 	if (gp->softc == NULL)
 		return (1);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
 		return (0);
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_mirror_is_busy(sc, cp))
 			return (0);
 	}
 	gp = sc->sc_sync.ds_geom;
 	LIST_FOREACH(cp, &gp->consumer, consumer) {
 		if (g_mirror_is_busy(sc, cp))
 			return (0);
 	}
 	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
 	    sc->sc_name);
 	return (1);
 }
 
 static int
 g_mirror_try_destroy(struct g_mirror_softc *sc)
 {
 
 	if (sc->sc_rootmount != NULL) {
 		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 		    sc->sc_rootmount);
 		root_mount_rel(sc->sc_rootmount);
 		sc->sc_rootmount = NULL;
 	}
 	g_topology_lock();
 	if (!g_mirror_can_destroy(sc)) {
 		g_topology_unlock();
 		return (0);
 	}
 	sc->sc_geom->softc = NULL;
 	sc->sc_sync.ds_geom->softc = NULL;
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
 		g_topology_unlock();
 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
 		    &sc->sc_worker);
 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
 		sx_xunlock(&sc->sc_lock);
 		wakeup(&sc->sc_worker);
 		sc->sc_worker = NULL;
 	} else {
 		g_topology_unlock();
 		g_mirror_destroy_device(sc);
 		free(sc, M_MIRROR);
 	}
 	return (1);
 }
 
 /*
  * Worker thread.
  */
 static void
 g_mirror_worker(void *arg)
 {
 	struct g_mirror_softc *sc;
 	struct g_mirror_event *ep;
 	struct bio *bp;
 	int timeout;
 
 	sc = arg;
 	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
 	thread_unlock(curthread);
 
 	sx_xlock(&sc->sc_lock);
 	for (;;) {
 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
 		/*
 		 * First take a look at events.
 		 * This is important to handle events before any I/O requests.
 		 */
 		ep = g_mirror_event_get(sc);
 		if (ep != NULL) {
 			g_mirror_event_remove(sc, ep);
 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
 				/* Update only device status. */
 				G_MIRROR_DEBUG(3,
 				    "Running event for device %s.",
 				    sc->sc_name);
 				ep->e_error = 0;
 				g_mirror_update_device(sc, 1);
 			} else {
 				/* Update disk status. */
 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
 				     g_mirror_get_diskname(ep->e_disk));
 				ep->e_error = g_mirror_update_disk(ep->e_disk,
 				    ep->e_state);
 				if (ep->e_error == 0)
 					g_mirror_update_device(sc, 0);
 			}
 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
 				KASSERT(ep->e_error == 0,
 				    ("Error cannot be handled."));
 				g_mirror_event_free(ep);
 			} else {
 				ep->e_flags |= G_MIRROR_EVENT_DONE;
 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
 				    ep);
 				mtx_lock(&sc->sc_events_mtx);
 				wakeup(ep);
 				mtx_unlock(&sc->sc_events_mtx);
 			}
 			if ((sc->sc_flags &
 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				if (g_mirror_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_MIRROR_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 			}
 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
 			continue;
 		}
 		/*
 		 * Check if we can mark array as CLEAN and if we can't take
 		 * how much seconds should we wait.
 		 */
 		timeout = g_mirror_idle(sc, -1);
 		/*
 		 * Now I/O requests.
 		 */
 		/* Get first request from the queue. */
 		mtx_lock(&sc->sc_queue_mtx);
 		bp = bioq_takefirst(&sc->sc_queue);
 		if (bp == NULL) {
 			if ((sc->sc_flags &
 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 				mtx_unlock(&sc->sc_queue_mtx);
 				if (g_mirror_try_destroy(sc)) {
 					curthread->td_pflags &= ~TDP_GEOM;
 					G_MIRROR_DEBUG(1, "Thread exiting.");
 					kproc_exit(0);
 				}
 				mtx_lock(&sc->sc_queue_mtx);
 			}
 			sx_xunlock(&sc->sc_lock);
 			/*
 			 * XXX: We can miss an event here, because an event
 			 *      can be added without sx-device-lock and without
 			 *      mtx-queue-lock. Maybe I should just stop using
 			 *      dedicated mutex for events synchronization and
 			 *      stick with the queue lock?
 			 *      The event will hang here until next I/O request
 			 *      or next event is received.
 			 */
 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
 			    timeout * hz);
 			sx_xlock(&sc->sc_lock);
 			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
 			continue;
 		}
 		mtx_unlock(&sc->sc_queue_mtx);
 
 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
 		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
 			g_mirror_sync_request(bp);	/* READ */
 		} else if (bp->bio_to != sc->sc_provider) {
 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
 				g_mirror_regular_request(bp);
 			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
 				g_mirror_sync_request(bp);	/* WRITE */
 			else {
 				KASSERT(0,
 				    ("Invalid request cflags=0x%hhx to=%s.",
 				    bp->bio_cflags, bp->bio_to->name));
 			}
 		} else {
 			g_mirror_register_request(bp);
 		}
 		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
 	}
 }
 
 static void
 g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
 {
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
 		return;
 	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 	} else if (sc->sc_idle &&
 	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
 		    g_mirror_get_diskname(disk), sc->sc_name);
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 	}
 }
 
 static void
 g_mirror_sync_start(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 	struct bio *bp;
 	int error, i;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 	    ("Disk %s is not marked for synchronization.",
 	    g_mirror_get_diskname(disk)));
 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
 	    sc->sc_state));
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	cp = g_new_consumer(sc->sc_sync.ds_geom);
 	error = g_attach(cp, sc->sc_provider);
 	KASSERT(error == 0,
 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
 	error = g_access(cp, 1, 0, 0);
 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
 	    g_mirror_get_diskname(disk));
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
 	KASSERT(disk->d_sync.ds_consumer == NULL,
 	    ("Sync consumer already exists (device=%s, disk=%s).",
 	    sc->sc_name, g_mirror_get_diskname(disk)));
 
 	disk->d_sync.ds_consumer = cp;
 	disk->d_sync.ds_consumer->private = disk;
 	disk->d_sync.ds_consumer->index = 0;
 
 	/*
 	 * Allocate memory for synchronization bios and initialize them.
 	 */
 	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
 	    M_MIRROR, M_WAITOK);
 	for (i = 0; i < g_mirror_syncreqs; i++) {
 		bp = g_alloc_bio();
 		disk->d_sync.ds_bios[i] = bp;
 		bp->bio_parent = NULL;
 		bp->bio_cmd = BIO_READ;
 		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
 		bp->bio_cflags = 0;
 		bp->bio_offset = disk->d_sync.ds_offset;
 		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
 		disk->d_sync.ds_offset += bp->bio_length;
 		bp->bio_done = g_mirror_sync_done;
 		bp->bio_from = disk->d_sync.ds_consumer;
 		bp->bio_to = sc->sc_provider;
 		bp->bio_caller1 = (void *)(uintptr_t)i;
 	}
 
 	/* Increase the number of disks in SYNCHRONIZING state. */
 	sc->sc_sync.ds_ndisks++;
 	/* Set the number of in-flight synchronization requests. */
 	disk->d_sync.ds_inflight = g_mirror_syncreqs;
 
 	/*
 	 * Fire off first synchronization requests.
 	 */
 	for (i = 0; i < g_mirror_syncreqs; i++) {
 		bp = disk->d_sync.ds_bios[i];
 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
 		disk->d_sync.ds_consumer->index++;
 		/*
 		 * Delay the request if it is colliding with a regular request.
 		 */
 		if (g_mirror_regular_collision(sc, bp))
 			g_mirror_sync_delay(sc, bp);
 		else
 			g_io_request(bp, disk->d_sync.ds_consumer);
 	}
 }
 
 /*
  * Stop synchronization process.
  * type: 0 - synchronization finished
  *       1 - synchronization stopped
  */
 static void
 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
 {
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 
 	g_topology_assert_not();
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 	    g_mirror_disk_state2str(disk->d_state)));
 	if (disk->d_sync.ds_consumer == NULL)
 		return;
 
 	if (type == 0) {
 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 	} else /* if (type == 1) */ {
 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 	}
 	free(disk->d_sync.ds_bios, M_MIRROR);
 	disk->d_sync.ds_bios = NULL;
 	cp = disk->d_sync.ds_consumer;
 	disk->d_sync.ds_consumer = NULL;
 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 	sc->sc_sync.ds_ndisks--;
 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
 	g_topology_lock();
 	g_mirror_kill_consumer(sc, cp);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 }
 
 static void
 g_mirror_launch_provider(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct g_provider *pp, *dp;
 
 	sx_assert(&sc->sc_lock, SX_LOCKED);
 
 	g_topology_lock();
 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
 	pp->mediasize = sc->sc_mediasize;
 	pp->sectorsize = sc->sc_sectorsize;
 	pp->stripesize = 0;
 	pp->stripeoffset = 0;
 
 	/* Splitting of unmapped BIO's could work but isn't implemented now */
 	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
 		pp->flags |= G_PF_ACCEPT_UNMAPPED;
 
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_consumer && disk->d_consumer->provider) {
 			dp = disk->d_consumer->provider;
 			if (dp->stripesize > pp->stripesize) {
 				pp->stripesize = dp->stripesize;
 				pp->stripeoffset = dp->stripeoffset;
 			}
 			/* A provider underneath us doesn't support unmapped */
 			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
 				G_MIRROR_DEBUG(0, "Cancelling unmapped "
 				    "because of %s.", dp->name);
 				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
 			}
 		}
 	}
 	sc->sc_provider = pp;
 	g_error_provider(pp, 0);
 	g_topology_unlock();
 	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			g_mirror_sync_start(disk);
 	}
 }
 
 static void
 g_mirror_destroy_provider(struct g_mirror_softc *sc)
 {
 	struct g_mirror_disk *disk;
 	struct bio *bp;
 
 	g_topology_assert_not();
 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
 	    sc->sc_name));
 
 	g_topology_lock();
 	g_error_provider(sc->sc_provider, ENXIO);
 	mtx_lock(&sc->sc_queue_mtx);
 	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
 		bioq_remove(&sc->sc_queue, bp);
 		g_io_deliver(bp, ENXIO);
 	}
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
 	    sc->sc_provider->name);
 	sc->sc_provider->flags |= G_PF_WITHER;
 	g_orphan_provider(sc->sc_provider, ENXIO);
 	g_topology_unlock();
 	sc->sc_provider = NULL;
 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
 			g_mirror_sync_stop(disk, 1);
 	}
 }
 
 static void
 g_mirror_go(void *arg)
 {
 	struct g_mirror_softc *sc;
 
 	sc = arg;
 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
 	g_mirror_event_send(sc, 0,
 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
 }
 
 static u_int
 g_mirror_determine_state(struct g_mirror_disk *disk)
 {
 	struct g_mirror_softc *sc;
 	u_int state;
 
 	sc = disk->d_softc;
 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
 		if ((disk->d_flags &
 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
 			/* Disk does not need synchronization. */
 			state = G_MIRROR_DISK_STATE_ACTIVE;
 		} else {
 			if ((sc->sc_flags &
 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 			    (disk->d_flags &
 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
 				/*
 				 * We can start synchronization from
 				 * the stored offset.
 				 */
 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
 			} else {
 				state = G_MIRROR_DISK_STATE_STALE;
 			}
 		}
 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
 		/*
 		 * Reset all synchronization data for this disk,
 		 * because if it even was synchronized, it was
 		 * synchronized to disks with different syncid.
 		 */
 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		disk->d_sync.ds_syncid = sc->sc_syncid;
 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
 		} else {
 			state = G_MIRROR_DISK_STATE_STALE;
 		}
 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
 		/*
 		 * Not good, NOT GOOD!
 		 * It means that mirror was started on stale disks
 		 * and more fresh disk just arrive.
 		 * If there were writes, mirror is broken, sorry.
 		 * I think the best choice here is don't touch
 		 * this disk and inform the user loudly.
 		 */
 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
 		    "disk (%s) arrives!! It will not be connected to the "
 		    "running device.", sc->sc_name,
 		    g_mirror_get_diskname(disk));
 		g_mirror_destroy_disk(disk);
 		state = G_MIRROR_DISK_STATE_NONE;
 		/* Return immediately, because disk was destroyed. */
 		return (state);
 	}
 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
 	return (state);
 }
 
 /*
  * Update device state.
  */
 static void
 g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
 {
 	struct g_mirror_disk *disk;
 	u_int state;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	switch (sc->sc_state) {
 	case G_MIRROR_DEVICE_STATE_STARTING:
 	    {
 		struct g_mirror_disk *pdisk, *tdisk;
 		u_int dirty, ndisks, genid, syncid;
 
 		KASSERT(sc->sc_provider == NULL,
 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
 		/*
 		 * Are we ready? We are, if all disks are connected or
 		 * if we have any disks and 'force' is true.
 		 */
 		ndisks = g_mirror_ndisks(sc, -1);
 		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
 			;
 		} else if (ndisks == 0) {
 			/*
 			 * Disks went down in starting phase, so destroy
 			 * device.
 			 */
 			callout_drain(&sc->sc_callout);
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
 			    sc->sc_rootmount);
 			root_mount_rel(sc->sc_rootmount);
 			sc->sc_rootmount = NULL;
 			return;
 		} else {
 			return;
 		}
 
 		/*
 		 * Activate all disks with the biggest syncid.
 		 */
 		if (force) {
 			/*
 			 * If 'force' is true, we have been called due to
 			 * timeout, so don't bother canceling timeout.
 			 */
 			ndisks = 0;
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
 					ndisks++;
 				}
 			}
 			if (ndisks == 0) {
 				/* No valid disks found, destroy device. */
 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 				return;
 			}
 		} else {
 			/* Cancel timeout. */
 			callout_drain(&sc->sc_callout);
 		}
 
 		/*
 		 * Find the biggest genid.
 		 */
 		genid = 0;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_genid > genid)
 				genid = disk->d_genid;
 		}
 		sc->sc_genid = genid;
 		/*
 		 * Remove all disks without the biggest genid.
 		 */
 		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
 			if (disk->d_genid < genid) {
 				G_MIRROR_DEBUG(0,
 				    "Component %s (device %s) broken, skipping.",
 				    g_mirror_get_diskname(disk), sc->sc_name);
 				g_mirror_destroy_disk(disk);
 			}
 		}
 
 		/*
 		 * Find the biggest syncid.
 		 */
 		syncid = 0;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_sync.ds_syncid > syncid)
 				syncid = disk->d_sync.ds_syncid;
 		}
 
 		/*
 		 * Here we need to look for dirty disks and if all disks
 		 * with the biggest syncid are dirty, we have to choose
 		 * one with the biggest priority and rebuild the rest.
 		 */
 		/*
 		 * Find the number of dirty disks with the biggest syncid.
 		 * Find the number of disks with the biggest syncid.
 		 * While here, find a disk with the biggest priority.
 		 */
 		dirty = ndisks = 0;
 		pdisk = NULL;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_sync.ds_syncid != syncid)
 				continue;
 			if ((disk->d_flags &
 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 				continue;
 			}
 			ndisks++;
 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
 				dirty++;
 				if (pdisk == NULL ||
 				    pdisk->d_priority < disk->d_priority) {
 					pdisk = disk;
 				}
 			}
 		}
 		if (dirty == 0) {
 			/* No dirty disks at all, great. */
 		} else if (dirty == ndisks) {
 			/*
 			 * Force synchronization for all dirty disks except one
 			 * with the biggest priority.
 			 */
 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
 			    "master disk for synchronization.",
 			    g_mirror_get_diskname(pdisk), sc->sc_name);
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_sync.ds_syncid != syncid)
 					continue;
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 					continue;
 				}
 				KASSERT((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
 				    ("Disk %s isn't marked as dirty.",
 				    g_mirror_get_diskname(disk)));
 				/* Skip the disk with the biggest priority. */
 				if (disk == pdisk)
 					continue;
 				disk->d_sync.ds_syncid = 0;
 			}
 		} else if (dirty < ndisks) {
 			/*
 			 * Force synchronization for all dirty disks.
 			 * We have some non-dirty disks.
 			 */
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_sync.ds_syncid != syncid)
 					continue;
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
 					continue;
 				}
 				if ((disk->d_flags &
 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
 					continue;
 				}
 				disk->d_sync.ds_syncid = 0;
 			}
 		}
 
 		/* Reset hint. */
 		sc->sc_hint = NULL;
 		sc->sc_syncid = syncid;
 		if (force) {
 			/* Remember to bump syncid on first write. */
 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		}
 		state = G_MIRROR_DEVICE_STATE_RUNNING;
 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_device_state2str(state));
 		sc->sc_state = state;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			state = g_mirror_determine_state(disk);
 			g_mirror_event_send(disk, state,
 			    G_MIRROR_EVENT_DONTWAIT);
 			if (state == G_MIRROR_DISK_STATE_STALE)
 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
 		}
 		break;
 	    }
 	case G_MIRROR_DEVICE_STATE_RUNNING:
 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
 			/*
 			 * No active disks or no disks at all,
 			 * so destroy device.
 			 */
 			if (sc->sc_provider != NULL)
 				g_mirror_destroy_provider(sc);
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 			break;
 		} else if (g_mirror_ndisks(sc,
 		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
 			/*
 			 * We have active disks, launch provider if it doesn't
 			 * exist.
 			 */
 			if (sc->sc_provider == NULL)
 				g_mirror_launch_provider(sc);
 			if (sc->sc_rootmount != NULL) {
 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
 				    __LINE__, sc->sc_rootmount);
 				root_mount_rel(sc->sc_rootmount);
 				sc->sc_rootmount = NULL;
 			}
 		}
 		/*
 		 * Genid should be bumped immediately, so do it here.
 		 */
 		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
 			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
 			g_mirror_bump_genid(sc);
 		}
 		break;
 	default:
 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
 		break;
 	}
 }
 
 /*
  * Update disk state and device state if needed.
  */
 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
 	"Disk %s state changed from %s to %s (device %s).",		\
 	g_mirror_get_diskname(disk),					\
 	g_mirror_disk_state2str(disk->d_state),				\
 	g_mirror_disk_state2str(state), sc->sc_name)
 static int
 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
 {
 	struct g_mirror_softc *sc;
 
 	sc = disk->d_softc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 again:
 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
 	    g_mirror_disk_state2str(state));
 	switch (state) {
 	case G_MIRROR_DISK_STATE_NEW:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk arrive.
 		 */
 		/* Previous state should be NONE. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_state = state;
 		if (LIST_EMPTY(&sc->sc_disks))
 			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
 		else {
 			struct g_mirror_disk *dp;
 
 			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
 				if (disk->d_priority >= dp->d_priority) {
 					LIST_INSERT_BEFORE(dp, disk, d_next);
 					dp = NULL;
 					break;
 				}
 				if (LIST_NEXT(dp, d_next) == NULL)
 					break;
 			}
 			if (dp != NULL)
 				LIST_INSERT_AFTER(dp, disk, d_next);
 		}
 		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
 			break;
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		state = g_mirror_determine_state(disk);
 		if (state != G_MIRROR_DISK_STATE_NONE)
 			goto again;
 		break;
 	case G_MIRROR_DISK_STATE_ACTIVE:
 		/*
 		 * Possible scenarios:
 		 * 1. New disk does not need synchronization.
 		 * 2. Synchronization process finished successfully.
 		 */
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		/* Previous state should be NEW or SYNCHRONIZING. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
 			g_mirror_sync_stop(disk, 0);
 		}
 		disk->d_state = state;
 		disk->d_sync.ds_offset = 0;
 		disk->d_sync.ds_offset_done = 0;
 		g_mirror_update_idle(sc, disk);
 		g_mirror_update_metadata(disk);
 		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		break;
 	case G_MIRROR_DISK_STATE_STALE:
 		/*
 		 * Possible scenarios:
 		 * 1. Stale disk was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		/*
 		 * STALE state is only possible if device is marked
 		 * NOAUTOSYNC.
 		 */
 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		g_mirror_update_metadata(disk);
 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 		break;
 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
 		/*
 		 * Possible scenarios:
 		 * 1. Disk which needs synchronization was connected.
 		 */
 		/* Previous state should be NEW. */
 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
 		    g_mirror_device_state2str(sc->sc_state),
 		    g_mirror_get_diskname(disk),
 		    g_mirror_disk_state2str(disk->d_state)));
 		DISK_STATE_CHANGED();
 
 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
 		disk->d_state = state;
 		if (sc->sc_provider != NULL) {
 			g_mirror_sync_start(disk);
 			g_mirror_update_metadata(disk);
 		}
 		break;
 	case G_MIRROR_DISK_STATE_DISCONNECTED:
 		/*
 		 * Possible scenarios:
 		 * 1. Device wasn't running yet, but disk disappear.
 		 * 2. Disk was active and disapppear.
 		 * 3. Disk disappear during synchronization process.
 		 */
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
 			/*
 			 * Previous state should be ACTIVE, STALE or
 			 * SYNCHRONIZING.
 			 */
 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
 			    ("Wrong disk state (%s, %s).",
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
 			/* Previous state should be NEW. */
 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
 			    ("Wrong disk state (%s, %s).",
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 			/*
 			 * Reset bumping syncid if disk disappeared in STARTING
 			 * state.
 			 */
 			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
 				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
 #ifdef	INVARIANTS
 		} else {
 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
 			    sc->sc_name,
 			    g_mirror_device_state2str(sc->sc_state),
 			    g_mirror_get_diskname(disk),
 			    g_mirror_disk_state2str(disk->d_state)));
 #endif
 		}
 		DISK_STATE_CHANGED();
 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 
 		g_mirror_destroy_disk(disk);
 		break;
 	case G_MIRROR_DISK_STATE_DESTROY:
 	    {
 		int error;
 
 		error = g_mirror_clear_metadata(disk);
 		if (error != 0)
 			return (error);
 		DISK_STATE_CHANGED();
 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
 		    sc->sc_name, g_mirror_get_diskname(disk));
 
 		g_mirror_destroy_disk(disk);
 		sc->sc_ndisks--;
 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 			g_mirror_update_metadata(disk);
 		}
 		break;
 	    }
 	default:
 		KASSERT(1 == 0, ("Unknown state (%u).", state));
 		break;
 	}
 	return (0);
 }
 #undef	DISK_STATE_CHANGED
 
 int
 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
 {
 	struct g_provider *pp;
 	u_char *buf;
 	int error;
 
 	g_topology_assert();
 
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	/* Metadata are stored on last sector. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL) {
 		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    cp->provider->name, error);
 		return (error);
 	}
 
 	/* Decode metadata. */
 	error = mirror_metadata_decode(buf, md);
 	g_free(buf);
 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
 		return (EINVAL);
 	if (md->md_version > G_MIRROR_VERSION) {
 		G_MIRROR_DEBUG(0,
 		    "Kernel module is too old to handle metadata from %s.",
 		    cp->provider->name);
 		return (EINVAL);
 	}
 	if (error != 0) {
 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
 		    cp->provider->name);
 		return (error);
 	}
 
 	return (0);
 }
 
 static int
 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md)
 {
 
 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
 		    pp->name, md->md_did);
 		return (EEXIST);
 	}
 	if (md->md_all != sc->sc_ndisks) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_all", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_slice != sc->sc_slice) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_slice", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_balance != sc->sc_balance) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_balance", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_mediasize != sc->sc_mediasize) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_mediasize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if (sc->sc_mediasize > pp->mediasize) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
 		    sc->sc_name);
 		return (EINVAL);
 	}
 	if (md->md_sectorsize != sc->sc_sectorsize) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid '%s' field on disk %s (device %s), skipping.",
 		    "md_sectorsize", pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid sector size of disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid device flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
 		G_MIRROR_DEBUG(1,
 		    "Invalid disk flags on disk %s (device %s), skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
     struct g_mirror_metadata *md)
 {
 	struct g_mirror_disk *disk;
 	int error;
 
 	g_topology_assert_not();
 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
 
 	error = g_mirror_check_metadata(sc, pp, md);
 	if (error != 0)
 		return (error);
 	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
 	    md->md_genid < sc->sc_genid) {
 		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
 		    pp->name, sc->sc_name);
 		return (EINVAL);
 	}
 	disk = g_mirror_init_disk(sc, pp, md, &error);
 	if (disk == NULL)
 		return (error);
 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
 	    G_MIRROR_EVENT_WAIT);
 	if (error != 0)
 		return (error);
 	if (md->md_version < G_MIRROR_VERSION) {
 		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
 		    pp->name, md->md_version, G_MIRROR_VERSION);
 		g_mirror_update_metadata(disk);
 	}
 	return (0);
 }
 
 static void
 g_mirror_destroy_delayed(void *arg, int flag)
 {
 	struct g_mirror_softc *sc;
 	int error;
 
 	if (flag == EV_CANCEL) {
 		G_MIRROR_DEBUG(1, "Destroying canceled.");
 		return;
 	}
 	sc = arg;
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
 	    ("DESTROY flag set on %s.", sc->sc_name));
 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0,
 	    ("DESTROYING flag not set on %s.", sc->sc_name));
 	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
 	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
 	if (error != 0) {
 		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
 		    sc->sc_name, error);
 		sx_xunlock(&sc->sc_lock);
 	}
 	g_topology_lock();
 }
 
 static int
 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
 {
 	struct g_mirror_softc *sc;
 	int dcr, dcw, dce, error = 0;
 
 	g_topology_assert();
 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
 	    acw, ace);
 
 	sc = pp->geom->softc;
 	if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0)
 		return (0);
 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
 
 	dcr = pp->acr + acr;
 	dcw = pp->acw + acw;
 	dce = pp->ace + ace;
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
 	    LIST_EMPTY(&sc->sc_disks)) {
 		if (acr > 0 || acw > 0 || ace > 0)
 			error = ENXIO;
 		goto end;
 	}
 	if (dcw == 0)
 		g_mirror_idle(sc, dcw);
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0) {
 		if (acr > 0 || acw > 0 || ace > 0) {
 			error = ENXIO;
 			goto end;
 		}
 		if (dcr == 0 && dcw == 0 && dce == 0) {
 			g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK,
 			    sc, NULL);
 		}
 	}
 end:
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 static struct g_geom *
 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
 {
 	struct g_mirror_softc *sc;
 	struct g_geom *gp;
 	int error, timeout;
 
 	g_topology_assert();
 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
 	    md->md_mid);
 
 	/* One disk is minimum. */
 	if (md->md_all < 1)
 		return (NULL);
 	/*
 	 * Action geom.
 	 */
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
 	gp->start = g_mirror_start;
 	gp->orphan = g_mirror_orphan;
 	gp->access = g_mirror_access;
 	gp->dumpconf = g_mirror_dumpconf;
 
 	sc->sc_id = md->md_mid;
 	sc->sc_slice = md->md_slice;
 	sc->sc_balance = md->md_balance;
 	sc->sc_mediasize = md->md_mediasize;
 	sc->sc_sectorsize = md->md_sectorsize;
 	sc->sc_ndisks = md->md_all;
 	sc->sc_flags = md->md_mflags;
 	sc->sc_bump_id = 0;
 	sc->sc_idle = 1;
 	sc->sc_last_write = time_uptime;
 	sc->sc_writes = 0;
 	sx_init(&sc->sc_lock, "gmirror:lock");
 	bioq_init(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
 	bioq_init(&sc->sc_regular_delayed);
 	bioq_init(&sc->sc_inflight);
 	bioq_init(&sc->sc_sync_delayed);
 	LIST_INIT(&sc->sc_disks);
 	TAILQ_INIT(&sc->sc_events);
 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
 	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
 	gp->softc = sc;
 	sc->sc_geom = gp;
 	sc->sc_provider = NULL;
 	/*
 	 * Synchronization geom.
 	 */
 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
 	gp->softc = sc;
 	gp->orphan = g_mirror_orphan;
 	sc->sc_sync.ds_geom = gp;
 	sc->sc_sync.ds_ndisks = 0;
 	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
 	    "g_mirror %s", md->md_name);
 	if (error != 0) {
 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
 		    sc->sc_name);
 		g_destroy_geom(sc->sc_sync.ds_geom);
 		mtx_destroy(&sc->sc_events_mtx);
 		mtx_destroy(&sc->sc_queue_mtx);
 		sx_destroy(&sc->sc_lock);
 		g_destroy_geom(sc->sc_geom);
 		free(sc, M_MIRROR);
 		return (NULL);
 	}
 
 	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
 
 	sc->sc_rootmount = root_mount_hold("GMIRROR");
 	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
 	/*
 	 * Run timeout.
 	 */
 	timeout = g_mirror_timeout * hz;
 	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
 	return (sc->sc_geom);
 }
 
 int
 g_mirror_destroy(struct g_mirror_softc *sc, int how)
 {
 	struct g_mirror_disk *disk;
 	struct g_provider *pp;
 
 	g_topology_assert_not();
 	if (sc == NULL)
 		return (ENXIO);
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	pp = sc->sc_provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		switch (how) {
 		case G_MIRROR_DESTROY_SOFT:
 			G_MIRROR_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		case G_MIRROR_DESTROY_DELAYED:
 			G_MIRROR_DEBUG(1,
 			    "Device %s will be destroyed on last close.",
 			    pp->name);
 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_state ==
 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 					g_mirror_sync_stop(disk, 1);
 				}
 			}
 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROYING;
 			return (EBUSY);
 		case G_MIRROR_DESTROY_HARD:
 			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
 			    "can't be definitely removed.", pp->name);
 		}
 	}
 
 	g_topology_lock();
 	if (sc->sc_geom->softc == NULL) {
 		g_topology_unlock();
 		return (0);
 	}
 	sc->sc_geom->softc = NULL;
 	sc->sc_sync.ds_geom->softc = NULL;
 	g_topology_unlock();
 
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
 	sx_xunlock(&sc->sc_lock);
 	mtx_lock(&sc->sc_queue_mtx);
 	wakeup(sc);
 	mtx_unlock(&sc->sc_queue_mtx);
 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
 	while (sc->sc_worker != NULL)
 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
 	sx_xlock(&sc->sc_lock);
 	g_mirror_destroy_device(sc);
 	free(sc, M_MIRROR);
 	return (0);
 }
 
 static void
 g_mirror_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static struct g_geom *
 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_mirror_metadata md;
 	struct g_mirror_softc *sc;
 	struct g_consumer *cp;
 	struct g_geom *gp;
 	int error;
 
 	g_topology_assert();
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
 
 	gp = g_new_geomf(mp, "mirror:taste");
 	/*
 	 * This orphan function should be never called.
 	 */
 	gp->orphan = g_mirror_taste_orphan;
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = g_mirror_read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (md.md_provider[0] != '\0' &&
 	    !g_compare_names(md.md_provider, pp->name))
 		return (NULL);
 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
 		return (NULL);
 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
 		G_MIRROR_DEBUG(0,
 		    "Device %s: provider %s marked as inactive, skipping.",
 		    md.md_name, pp->name);
 		return (NULL);
 	}
 	if (g_mirror_debug >= 2)
 		mirror_metadata_dump(&md);
 
 	/*
 	 * Let's check if device already exists.
 	 */
 	sc = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_sync.ds_geom == gp)
 			continue;
 		if (strcmp(md.md_name, sc->sc_name) != 0)
 			continue;
 		if (md.md_mid != sc->sc_id) {
 			G_MIRROR_DEBUG(0, "Device %s already configured.",
 			    sc->sc_name);
 			return (NULL);
 		}
 		break;
 	}
 	if (gp == NULL) {
 		gp = g_mirror_create(mp, &md);
 		if (gp == NULL) {
 			G_MIRROR_DEBUG(0, "Cannot create device %s.",
 			    md.md_name);
 			return (NULL);
 		}
 		sc = gp->softc;
 	}
 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
 	error = g_mirror_add_disk(sc, pp, &md);
 	if (error != 0) {
 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
 		    pp->name, gp->name, error);
 		if (LIST_EMPTY(&sc->sc_disks)) {
 			g_cancel_event(sc);
 			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
 			g_topology_lock();
 			return (NULL);
 		}
 		gp = NULL;
 	}
 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
 		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
 		g_topology_lock();
 		return (NULL);
 	}
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (gp);
 }
 
 static int
 g_mirror_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_mirror_softc *sc;
 	int error;
 
 	g_topology_unlock();
 	sc = gp->softc;
 	sx_xlock(&sc->sc_lock);
 	g_cancel_event(sc);
 	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
 	if (error != 0)
 		sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	return (error);
 }
 
 static void
 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_mirror_softc *sc;
 
 	g_topology_assert();
 
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	/* Skip synchronization geom. */
 	if (gp == sc->sc_sync.ds_geom)
 		return;
 	if (pp != NULL) {
 		/* Nothing here. */
 	} else if (cp != NULL) {
 		struct g_mirror_disk *disk;
 
 		disk = cp->private;
 		if (disk == NULL)
 			return;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
 			sbuf_printf(sb, "%s<Synchronized>", indent);
 			if (disk->d_sync.ds_offset == 0)
 				sbuf_printf(sb, "0%%");
 			else {
 				sbuf_printf(sb, "%u%%",
 				    (u_int)((disk->d_sync.ds_offset * 100) /
 				    sc->sc_provider->mediasize));
 			}
 			sbuf_printf(sb, "</Synchronized>\n");
 			if (disk->d_sync.ds_offset > 0) {
 				sbuf_printf(sb, "%s<BytesSynced>%jd"
 				    "</BytesSynced>\n", indent,
 				    (intmax_t)disk->d_sync.ds_offset);
 			}
 		}
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
 		    disk->d_sync.ds_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
 		    disk->d_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (disk->d_flags == 0)
 			sbuf_printf(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((disk->d_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
 			    "SYNCHRONIZING");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
 			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
 #undef	ADD_FLAG
 		}
 		sbuf_printf(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
 		    disk->d_priority);
 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 		    g_mirror_disk_state2str(disk->d_state));
 		sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	} else {
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
 		sbuf_printf(sb, "%s<Flags>", indent);
 		if (sc->sc_flags == 0)
 			sbuf_printf(sb, "NONE");
 		else {
 			int first = 1;
 
 #define	ADD_FLAG(flag, name)	do {					\
 	if ((sc->sc_flags & (flag)) != 0) {				\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
 #undef	ADD_FLAG
 		}
 		sbuf_printf(sb, "</Flags>\n");
 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
 		    (u_int)sc->sc_slice);
 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
 		    balance_name(sc->sc_balance));
 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
 		    sc->sc_ndisks);
 		sbuf_printf(sb, "%s<State>", indent);
 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
 			sbuf_printf(sb, "%s", "STARTING");
 		else if (sc->sc_ndisks ==
 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
 			sbuf_printf(sb, "%s", "COMPLETE");
 		else
 			sbuf_printf(sb, "%s", "DEGRADED");
 		sbuf_printf(sb, "</State>\n");
 		sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 }
 
 static void
 g_mirror_shutdown_post_sync(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_mirror_softc *sc;
 	int error;
 
 	mp = arg;
 	DROP_GIANT();
 	g_topology_lock();
 	g_mirror_shutdown = 1;
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		if ((sc = gp->softc) == NULL)
 			continue;
 		/* Skip synchronization geom. */
 		if (gp == sc->sc_sync.ds_geom)
 			continue;
 		g_topology_unlock();
 		sx_xlock(&sc->sc_lock);
 		g_mirror_idle(sc, -1);
 		g_cancel_event(sc);
 		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
 		if (error != 0)
 			sx_xunlock(&sc->sc_lock);
 		g_topology_lock();
 	}
 	g_topology_unlock();
 	PICKUP_GIANT();
 }
 
 static void
 g_mirror_init(struct g_class *mp)
 {
 
 	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
 	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_mirror_post_sync == NULL)
 		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_mirror_fini(struct g_class *mp)
 {
 
 	if (g_mirror_post_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
 }
 
 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
Index: stable/9/sys/geom/raid/md_promise.c
===================================================================
--- stable/9/sys/geom/raid/md_promise.c	(revision 299397)
+++ stable/9/sys/geom/raid/md_promise.c	(revision 299398)
@@ -1,2000 +1,2000 @@
 /*-
  * Copyright (c) 2011 Alexander Motin <mav@FreeBSD.org>
  * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <geom/geom.h>
 #include "geom/raid/g_raid.h"
 #include "g_raid_md_if.h"
 
 static MALLOC_DEFINE(M_MD_PROMISE, "md_promise_data", "GEOM_RAID Promise metadata");
 
 #define	PROMISE_MAX_DISKS	8
 #define	PROMISE_MAX_SUBDISKS	2
 #define	PROMISE_META_OFFSET	14
 
 struct promise_raid_disk {
 	uint8_t		flags;			/* Subdisk status. */
 #define PROMISE_F_VALID		0x01
 #define PROMISE_F_ONLINE	0x02
 #define PROMISE_F_ASSIGNED	0x04
 #define PROMISE_F_SPARE		0x08
 #define PROMISE_F_DUPLICATE	0x10
 #define PROMISE_F_REDIR		0x20
 #define PROMISE_F_DOWN		0x40
 #define PROMISE_F_READY		0x80
 
 	uint8_t		number;			/* Position in a volume. */
 	uint8_t		channel;		/* ATA channel number. */
 	uint8_t		device;			/* ATA device number. */
 	uint64_t	id __packed;		/* Subdisk ID. */
 } __packed;
 
 struct promise_raid_conf {
 	char		promise_id[24];
 #define PROMISE_MAGIC		"Promise Technology, Inc."
 #define FREEBSD_MAGIC		"FreeBSD ATA driver RAID "
 
 	uint32_t	dummy_0;
 	uint64_t	magic_0;
 #define PROMISE_MAGIC0(x)	(((uint64_t)(x.channel) << 48) | \
 				((uint64_t)(x.device != 0) << 56))
 	uint16_t	magic_1;
 	uint32_t	magic_2;
 	uint8_t		filler1[470];
 
 	uint32_t	integrity;
 #define PROMISE_I_VALID		0x00000080
 
 	struct promise_raid_disk	disk;	/* This subdisk info. */
 	uint32_t	disk_offset;		/* Subdisk offset. */
 	uint32_t	disk_sectors;		/* Subdisk size */
 	uint32_t	disk_rebuild;		/* Rebuild position. */
 	uint16_t	generation;		/* Generation number. */
 	uint8_t		status;			/* Volume status. */
 #define PROMISE_S_VALID		0x01
 #define PROMISE_S_ONLINE	0x02
 #define PROMISE_S_INITED	0x04
 #define PROMISE_S_READY		0x08
 #define PROMISE_S_DEGRADED	0x10
 #define PROMISE_S_MARKED	0x20
 #define PROMISE_S_MIGRATING	0x40
 #define PROMISE_S_FUNCTIONAL	0x80
 
 	uint8_t		type;			/* Voluem type. */
 #define PROMISE_T_RAID0		0x00
 #define PROMISE_T_RAID1		0x01
 #define PROMISE_T_RAID3		0x02
 #define PROMISE_T_RAID5		0x04
 #define PROMISE_T_SPAN		0x08
 #define PROMISE_T_JBOD		0x10
 
 	uint8_t		total_disks;		/* Disks in this volume. */
 	uint8_t		stripe_shift;		/* Strip size. */
 	uint8_t		array_width;		/* Number of RAID0 stripes. */
 	uint8_t		array_number;		/* Global volume number. */
 	uint32_t	total_sectors;		/* Volume size. */
 	uint16_t	cylinders;		/* Volume geometry: C. */
 	uint8_t		heads;			/* Volume geometry: H. */
 	uint8_t		sectors;		/* Volume geometry: S. */
 	uint64_t	volume_id __packed;	/* Volume ID, */
 	struct promise_raid_disk	disks[PROMISE_MAX_DISKS];
 						/* Subdisks in this volume. */
 	char		name[32];		/* Volume label. */
 
 	uint32_t	filler2[8];
 	uint32_t	magic_3;	/* Something related to rebuild. */
 	uint64_t	rebuild_lba64;	/* Per-volume rebuild position. */
 	uint32_t	magic_4;
 	uint32_t	magic_5;
 	uint32_t	total_sectors_high;
 	uint8_t		magic_6;
 	uint8_t		sector_size;
 	uint16_t	magic_7;
 	uint32_t	magic_8[31];
 	uint32_t	backup_time;
 	uint16_t	magic_9;
 	uint32_t	disk_offset_high;
 	uint32_t	disk_sectors_high;
 	uint32_t	disk_rebuild_high;
 	uint16_t	magic_10;
 	uint32_t	magic_11[3];
 	uint32_t	filler3[284];
 	uint32_t	checksum;
 } __packed;
 
 struct g_raid_md_promise_perdisk {
 	int		 pd_updated;
 	int		 pd_subdisks;
 	struct promise_raid_conf	*pd_meta[PROMISE_MAX_SUBDISKS];
 };
 
 struct g_raid_md_promise_pervolume {
 	struct promise_raid_conf	*pv_meta;
 	uint64_t			 pv_id;
 	uint16_t			 pv_generation;
 	int				 pv_disks_present;
 	int				 pv_started;
 	struct callout			 pv_start_co;	/* STARTING state timer. */
 };
 
 static g_raid_md_create_t g_raid_md_create_promise;
 static g_raid_md_taste_t g_raid_md_taste_promise;
 static g_raid_md_event_t g_raid_md_event_promise;
 static g_raid_md_volume_event_t g_raid_md_volume_event_promise;
 static g_raid_md_ctl_t g_raid_md_ctl_promise;
 static g_raid_md_write_t g_raid_md_write_promise;
 static g_raid_md_fail_disk_t g_raid_md_fail_disk_promise;
 static g_raid_md_free_disk_t g_raid_md_free_disk_promise;
 static g_raid_md_free_volume_t g_raid_md_free_volume_promise;
 static g_raid_md_free_t g_raid_md_free_promise;
 
 static kobj_method_t g_raid_md_promise_methods[] = {
 	KOBJMETHOD(g_raid_md_create,	g_raid_md_create_promise),
 	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_promise),
 	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_promise),
 	KOBJMETHOD(g_raid_md_volume_event,	g_raid_md_volume_event_promise),
 	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_promise),
 	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_promise),
 	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_promise),
 	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_promise),
 	KOBJMETHOD(g_raid_md_free_volume,	g_raid_md_free_volume_promise),
 	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_promise),
 	{ 0, 0 }
 };
 
 static struct g_raid_md_class g_raid_md_promise_class = {
 	"Promise",
 	g_raid_md_promise_methods,
 	sizeof(struct g_raid_md_object),
 	.mdc_enable = 1,
 	.mdc_priority = 100
 };
 
 
 static void
 g_raid_md_promise_print(struct promise_raid_conf *meta)
 {
 	int i;
 
 	if (g_raid_debug < 1)
 		return;
 
 	printf("********* ATA Promise Metadata *********\n");
 	printf("promise_id          <%.24s>\n", meta->promise_id);
 	printf("disk                %02x %02x %02x %02x %016jx\n",
 	    meta->disk.flags, meta->disk.number, meta->disk.channel,
 	    meta->disk.device, meta->disk.id);
 	printf("disk_offset         %u\n", meta->disk_offset);
 	printf("disk_sectors        %u\n", meta->disk_sectors);
 	printf("disk_rebuild        %u\n", meta->disk_rebuild);
 	printf("generation          %u\n", meta->generation);
 	printf("status              0x%02x\n", meta->status);
 	printf("type                %u\n", meta->type);
 	printf("total_disks         %u\n", meta->total_disks);
 	printf("stripe_shift        %u\n", meta->stripe_shift);
 	printf("array_width         %u\n", meta->array_width);
 	printf("array_number        %u\n", meta->array_number);
 	printf("total_sectors       %u\n", meta->total_sectors);
 	printf("cylinders           %u\n", meta->cylinders);
 	printf("heads               %u\n", meta->heads);
 	printf("sectors             %u\n", meta->sectors);
 	printf("volume_id           0x%016jx\n", meta->volume_id);
 	printf("disks:\n");
 	for (i = 0; i < PROMISE_MAX_DISKS; i++ ) {
 		printf("                    %02x %02x %02x %02x %016jx\n",
 		    meta->disks[i].flags, meta->disks[i].number,
 		    meta->disks[i].channel, meta->disks[i].device,
 		    meta->disks[i].id);
 	}
 	printf("name                <%.32s>\n", meta->name);
 	printf("magic_3             0x%08x\n", meta->magic_3);
 	printf("rebuild_lba64       %ju\n", meta->rebuild_lba64);
 	printf("magic_4             0x%08x\n", meta->magic_4);
 	printf("magic_5             0x%08x\n", meta->magic_5);
 	printf("total_sectors_high  0x%08x\n", meta->total_sectors_high);
 	printf("sector_size         %u\n", meta->sector_size);
 	printf("backup_time         %d\n", meta->backup_time);
 	printf("disk_offset_high    0x%08x\n", meta->disk_offset_high);
 	printf("disk_sectors_high   0x%08x\n", meta->disk_sectors_high);
 	printf("disk_rebuild_high   0x%08x\n", meta->disk_rebuild_high);
 	printf("=================================================\n");
 }
 
 static struct promise_raid_conf *
 promise_meta_copy(struct promise_raid_conf *meta)
 {
 	struct promise_raid_conf *nmeta;
 
 	nmeta = malloc(sizeof(*nmeta), M_MD_PROMISE, M_WAITOK);
 	memcpy(nmeta, meta, sizeof(*nmeta));
 	return (nmeta);
 }
 
 static int
 promise_meta_find_disk(struct promise_raid_conf *meta, uint64_t id)
 {
 	int pos;
 
 	for (pos = 0; pos < meta->total_disks; pos++) {
 		if (meta->disks[pos].id == id)
 			return (pos);
 	}
 	return (-1);
 }
 
 static int
 promise_meta_unused_range(struct promise_raid_conf **metaarr, int nsd,
     off_t sectors, off_t *off, off_t *size)
 {
 	off_t coff, csize, tmp;
 	int i, j;
 
 	sectors -= 131072;
 	*off = 0;
 	*size = 0;
 	coff = 0;
 	csize = sectors;
 	i = 0;
 	while (1) {
 		for (j = 0; j < nsd; j++) {
 			tmp = ((off_t)metaarr[j]->disk_offset_high << 32) +
 			    metaarr[j]->disk_offset;
 			if (tmp >= coff)
 				csize = MIN(csize, tmp - coff);
 		}
 		if (csize > *size) {
 			*off = coff;
 			*size = csize;
 		}
 		if (i >= nsd)
 			break;
 		coff = ((off_t)metaarr[i]->disk_offset_high << 32) +
 		     metaarr[i]->disk_offset +
 		    ((off_t)metaarr[i]->disk_sectors_high << 32) +
 		     metaarr[i]->disk_sectors;
 		csize = sectors - coff;
 		i++;
 	};
 	return ((*size > 0) ? 1 : 0);
 }
 
 static int
 promise_meta_translate_disk(struct g_raid_volume *vol, int md_disk_pos)
 {
 	int disk_pos, width;
 
 	if (md_disk_pos >= 0 && vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) {
 		width = vol->v_disks_count / 2;
 		disk_pos = (md_disk_pos / width) +
 		    (md_disk_pos % width) * width;
 	} else
 		disk_pos = md_disk_pos;
 	return (disk_pos);
 }
 
 static void
 promise_meta_get_name(struct promise_raid_conf *meta, char *buf)
 {
 	int i;
 
 	strncpy(buf, meta->name, 32);
 	buf[32] = 0;
 	for (i = 31; i >= 0; i--) {
 		if (buf[i] > 0x20)
 			break;
 		buf[i] = 0;
 	}
 }
 
 static void
 promise_meta_put_name(struct promise_raid_conf *meta, char *buf)
 {
 
 	memset(meta->name, 0x20, 32);
 	memcpy(meta->name, buf, MIN(strlen(buf), 32));
 }
 
 static int
 promise_meta_read(struct g_consumer *cp, struct promise_raid_conf **metaarr)
 {
 	struct g_provider *pp;
 	struct promise_raid_conf *meta;
 	char *buf;
 	int error, i, subdisks;
 	uint32_t checksum, *ptr;
 
 	pp = cp->provider;
 	subdisks = 0;
 next:
 	/* Read metadata block. */
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize *
 	    (63 - subdisks * PROMISE_META_OFFSET),
 	    pp->sectorsize * 4, &error);
 	if (buf == NULL) {
 		G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    pp->name, error);
 		return (subdisks);
 	}
 	meta = (struct promise_raid_conf *)buf;
 
 	/* Check if this is an Promise RAID struct */
 	if (strncmp(meta->promise_id, PROMISE_MAGIC, strlen(PROMISE_MAGIC)) &&
 	    strncmp(meta->promise_id, FREEBSD_MAGIC, strlen(FREEBSD_MAGIC))) {
 		if (subdisks == 0)
 			G_RAID_DEBUG(1,
 			    "Promise signature check failed on %s", pp->name);
 		g_free(buf);
 		return (subdisks);
 	}
 	meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK);
 	memcpy(meta, buf, MIN(sizeof(*meta), pp->sectorsize * 4));
 	g_free(buf);
 
 	/* Check metadata checksum. */
 	for (checksum = 0, ptr = (uint32_t *)meta, i = 0; i < 511; i++)
 		checksum += *ptr++;
 	if (checksum != meta->checksum) {
 		G_RAID_DEBUG(1, "Promise checksum check failed on %s", pp->name);
 		free(meta, M_MD_PROMISE);
 		return (subdisks);
 	}
 
 	if ((meta->integrity & PROMISE_I_VALID) == 0) {
 		G_RAID_DEBUG(1, "Promise metadata is invalid on %s", pp->name);
 		free(meta, M_MD_PROMISE);
 		return (subdisks);
 	}
 
 	if (meta->total_disks > PROMISE_MAX_DISKS) {
 		G_RAID_DEBUG(1, "Wrong number of disks on %s (%d)",
 		    pp->name, meta->total_disks);
 		free(meta, M_MD_PROMISE);
 		return (subdisks);
 	}
 
 	/* Remove filler garbage from fields used in newer metadata. */
 	if (meta->disk_offset_high == 0x8b8c8d8e &&
 	    meta->disk_sectors_high == 0x8788898a &&
 	    meta->disk_rebuild_high == 0x83848586) {
 		meta->disk_offset_high = 0;
 		meta->disk_sectors_high = 0;
 		if (meta->disk_rebuild == UINT32_MAX)
 			meta->disk_rebuild_high = UINT32_MAX;
 		else
 			meta->disk_rebuild_high = 0;
 		if (meta->total_sectors_high == 0x15161718) {
 			meta->total_sectors_high = 0;
 			meta->backup_time = 0;
 			if (meta->rebuild_lba64 == 0x2122232425262728)
 				meta->rebuild_lba64 = UINT64_MAX;
 		}
 	}
 	if (meta->sector_size < 1 || meta->sector_size > 8)
 		meta->sector_size = 1;
 
 	/* Save this part and look for next. */
 	*metaarr = meta;
 	metaarr++;
 	subdisks++;
 	if (subdisks < PROMISE_MAX_SUBDISKS)
 		goto next;
 
 	return (subdisks);
 }
 
 static int
 promise_meta_write(struct g_consumer *cp,
     struct promise_raid_conf **metaarr, int nsd)
 {
 	struct g_provider *pp;
 	struct promise_raid_conf *meta;
 	char *buf;
 	off_t off, size;
 	int error, i, subdisk, fake;
 	uint32_t checksum, *ptr;
 
 	pp = cp->provider;
 	subdisk = 0;
 	fake = 0;
 next:
 	buf = malloc(pp->sectorsize * 4, M_MD_PROMISE, M_WAITOK | M_ZERO);
 	meta = NULL;
 	if (subdisk < nsd) {
 		meta = metaarr[subdisk];
 	} else if (!fake && promise_meta_unused_range(metaarr, nsd,
 	    cp->provider->mediasize / cp->provider->sectorsize,
 	    &off, &size)) {
 		/* Optionally add record for unused space. */
 		meta = (struct promise_raid_conf *)buf;
 		memcpy(&meta->promise_id[0], PROMISE_MAGIC,
 		    sizeof(PROMISE_MAGIC) - 1);
 		meta->dummy_0 = 0x00020000;
 		meta->integrity = PROMISE_I_VALID;
 		meta->disk.flags = PROMISE_F_ONLINE | PROMISE_F_VALID;
 		meta->disk.number = 0xff;
 		arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0);
 		meta->disk_offset_high = off >> 32;
 		meta->disk_offset = (uint32_t)off;
 		meta->disk_sectors_high = size >> 32;
 		meta->disk_sectors = (uint32_t)size;
 		meta->disk_rebuild_high = UINT32_MAX;
 		meta->disk_rebuild = UINT32_MAX;
 		fake = 1;
 	}
 	if (meta != NULL) {
 		/* Recalculate checksum for case if metadata were changed. */
 		meta->checksum = 0;
 		for (checksum = 0, ptr = (uint32_t *)meta, i = 0; i < 511; i++)
 			checksum += *ptr++;
 		meta->checksum = checksum;
 		memcpy(buf, meta, MIN(pp->sectorsize * 4, sizeof(*meta)));
 	}
 	error = g_write_data(cp, pp->mediasize - pp->sectorsize *
 	    (63 - subdisk * PROMISE_META_OFFSET),
 	    buf, pp->sectorsize * 4);
 	if (error != 0) {
 		G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
 		    pp->name, error);
 	}
 	free(buf, M_MD_PROMISE);
 
 	subdisk++;
 	if (subdisk < PROMISE_MAX_SUBDISKS)
 		goto next;
 
 	return (error);
 }
 
 static int
 promise_meta_erase(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error, subdisk;
 
 	pp = cp->provider;
 	buf = malloc(4 * pp->sectorsize, M_MD_PROMISE, M_WAITOK | M_ZERO);
 	for (subdisk = 0; subdisk < PROMISE_MAX_SUBDISKS; subdisk++) {
 		error = g_write_data(cp, pp->mediasize - pp->sectorsize *
 		    (63 - subdisk * PROMISE_META_OFFSET),
 		    buf, 4 * pp->sectorsize);
 		if (error != 0) {
 			G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
 			    pp->name, error);
 		}
 	}
 	free(buf, M_MD_PROMISE);
 	return (error);
 }
 
 static int
 promise_meta_write_spare(struct g_consumer *cp)
 {
 	struct promise_raid_conf *meta;
 	off_t tmp;
 	int error;
 
 	meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK | M_ZERO);
 	memcpy(&meta->promise_id[0], PROMISE_MAGIC, sizeof(PROMISE_MAGIC) - 1);
 	meta->dummy_0 = 0x00020000;
 	meta->integrity = PROMISE_I_VALID;
 	meta->disk.flags = PROMISE_F_SPARE | PROMISE_F_ONLINE | PROMISE_F_VALID;
 	meta->disk.number = 0xff;
 	arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0);
 	tmp = cp->provider->mediasize / cp->provider->sectorsize - 131072;
 	meta->disk_sectors_high = tmp >> 32;
 	meta->disk_sectors = (uint32_t)tmp;
 	meta->disk_rebuild_high = UINT32_MAX;
 	meta->disk_rebuild = UINT32_MAX;
 	error = promise_meta_write(cp, &meta, 1);
 	free(meta, M_MD_PROMISE);
 	return (error);
 }
 
 static struct g_raid_volume *
 g_raid_md_promise_get_volume(struct g_raid_softc *sc, uint64_t id)
 {
 	struct g_raid_volume	*vol;
 	struct g_raid_md_promise_pervolume *pv;
 
 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		pv = vol->v_md_data;
 		if (pv->pv_id == id)
 			break;
 	}
 	return (vol);
 }
 
 static int
 g_raid_md_promise_purge_volumes(struct g_raid_softc *sc)
 {
 	struct g_raid_volume	*vol, *tvol;
 	struct g_raid_md_promise_pervolume *pv;
 	int i, res;
 
 	res = 0;
 	TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tvol) {
 		pv = vol->v_md_data;
 		if (!pv->pv_started || vol->v_stopping)
 			continue;
 		for (i = 0; i < vol->v_disks_count; i++) {
 			if (vol->v_subdisks[i].sd_state != G_RAID_SUBDISK_S_NONE)
 				break;
 		}
 		if (i >= vol->v_disks_count) {
 			g_raid_destroy_volume(vol);
 			res = 1;
 		}
 	}
 	return (res);
 }
 
 static int
 g_raid_md_promise_purge_disks(struct g_raid_softc *sc)
 {
 	struct g_raid_disk	*disk, *tdisk;
 	struct g_raid_volume	*vol;
 	struct g_raid_md_promise_perdisk *pd;
 	int i, j, res;
 
 	res = 0;
 	TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
 		if (disk->d_state == G_RAID_DISK_S_SPARE)
 			continue;
 		pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
 
 		/* Scan for deleted volumes. */
 		for (i = 0; i < pd->pd_subdisks; ) {
 			vol = g_raid_md_promise_get_volume(sc,
 			    pd->pd_meta[i]->volume_id);
 			if (vol != NULL && !vol->v_stopping) {
 				i++;
 				continue;
 			}
 			free(pd->pd_meta[i], M_MD_PROMISE);
 			for (j = i; j < pd->pd_subdisks - 1; j++)
 				pd->pd_meta[j] = pd->pd_meta[j + 1];
 			pd->pd_meta[pd->pd_subdisks - 1] = NULL;
 			pd->pd_subdisks--;
 			pd->pd_updated = 1;
 		}
 
 		/* If there is no metadata left - erase and delete disk. */
 		if (pd->pd_subdisks == 0) {
 			promise_meta_erase(disk->d_consumer);
 			g_raid_destroy_disk(disk);
 			res = 1;
 		}
 	}
 	return (res);
 }
 
 static int
 g_raid_md_promise_supported(int level, int qual, int disks, int force)
 {
 
 	if (disks > PROMISE_MAX_DISKS)
 		return (0);
 	switch (level) {
 	case G_RAID_VOLUME_RL_RAID0:
 		if (disks < 1)
 			return (0);
 		if (!force && disks < 2)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1:
 		if (disks < 1)
 			return (0);
 		if (!force && (disks != 2))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1E:
 		if (disks < 2)
 			return (0);
 		if (disks % 2 != 0)
 			return (0);
 		if (!force && (disks != 4))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_SINGLE:
 		if (disks != 1)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_CONCAT:
 		if (disks < 2)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID5:
 		if (disks < 3)
 			return (0);
 		if (qual != G_RAID_VOLUME_RLQ_R5LA)
 			return (0);
 		break;
 	default:
 		return (0);
 	}
 	if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
 		return (0);
 	return (1);
 }
 
 static int
 g_raid_md_promise_start_disk(struct g_raid_disk *disk, int sdn,
     struct g_raid_volume *vol)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd;
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_md_promise_pervolume *pv;
 	struct promise_raid_conf *meta;
 	off_t eoff, esize, size;
 	int disk_pos, md_disk_pos, i, resurrection = 0;
 
 	sc = disk->d_softc;
 	pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
 
 	pv = vol->v_md_data;
 	meta = pv->pv_meta;
 
 	if (sdn >= 0) {
 		/* Find disk position in metadata by it's serial. */
 		md_disk_pos = promise_meta_find_disk(meta, pd->pd_meta[sdn]->disk.id);
 		/* For RAID0+1 we need to translate order. */
 		disk_pos = promise_meta_translate_disk(vol, md_disk_pos);
 	} else {
 		md_disk_pos = -1;
 		disk_pos = -1;
 	}
 	if (disk_pos < 0) {
 		G_RAID_DEBUG1(1, sc, "Disk %s is not part of the volume %s",
 		    g_raid_get_diskname(disk), vol->v_name);
 		/* Failed stale disk is useless for us. */
 		if (sdn >= 0 &&
 		    pd->pd_meta[sdn]->disk.flags & PROMISE_F_DOWN) {
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
 			return (0);
 		}
 		/* If we were given specific metadata subdisk - erase it. */
 		if (sdn >= 0) {
 			free(pd->pd_meta[sdn], M_MD_PROMISE);
 			for (i = sdn; i < pd->pd_subdisks - 1; i++)
 				pd->pd_meta[i] = pd->pd_meta[i + 1];
 			pd->pd_meta[pd->pd_subdisks - 1] = NULL;
 			pd->pd_subdisks--;
 		}
 		/* If we are in the start process, that's all for now. */
 		if (!pv->pv_started)
 			goto nofit;
 		/*
 		 * If we have already started - try to get use of the disk.
 		 * Try to replace OFFLINE disks first, then FAILED.
 		 */
 		promise_meta_unused_range(pd->pd_meta, pd->pd_subdisks,
 		    disk->d_consumer->provider->mediasize /
 		    disk->d_consumer->provider->sectorsize,
 		    &eoff, &esize);
 		if (esize == 0) {
 			G_RAID_DEBUG1(1, sc, "No free space on disk %s",
 			    g_raid_get_diskname(disk));
 			goto nofit;
 		}
 		size = INT64_MAX;
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
 			if (sd->sd_state != G_RAID_SUBDISK_S_NONE)
 				size = sd->sd_size;
 			if (sd->sd_state <= G_RAID_SUBDISK_S_FAILED &&
 			    (disk_pos < 0 ||
 			     vol->v_subdisks[i].sd_state < sd->sd_state))
 				disk_pos = i;
 		}
 		if (disk_pos >= 0 &&
 		    vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT &&
 		    (off_t)esize * 512 < size) {
 			G_RAID_DEBUG1(1, sc, "Disk %s free space "
 			    "is too small (%ju < %ju)",
 			    g_raid_get_diskname(disk),
 			    (off_t)esize * 512, size);
 			disk_pos = -1;
 		}
 		if (disk_pos >= 0) {
 			if (vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT)
 				esize = size / 512;
 			/* For RAID0+1 we need to translate order. */
 			md_disk_pos = promise_meta_translate_disk(vol, disk_pos);
 		} else {
 nofit:
 			if (pd->pd_subdisks == 0) {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_SPARE);
 			}
 			return (0);
 		}
 		G_RAID_DEBUG1(1, sc, "Disk %s takes pos %d in the volume %s",
 		    g_raid_get_diskname(disk), disk_pos, vol->v_name);
 		resurrection = 1;
 	}
 
 	sd = &vol->v_subdisks[disk_pos];
 
 	if (resurrection && sd->sd_disk != NULL) {
 		g_raid_change_disk_state(sd->sd_disk,
 		    G_RAID_DISK_S_STALE_FAILED);
 		TAILQ_REMOVE(&sd->sd_disk->d_subdisks,
 		    sd, sd_next);
 	}
 	vol->v_subdisks[disk_pos].sd_disk = disk;
 	TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 
 	/* Welcome the new disk. */
 	if (resurrection)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 	else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
 	else
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 
 	if (resurrection) {
 		sd->sd_offset = (off_t)eoff * 512;
 		sd->sd_size = (off_t)esize * 512;
 	} else {
 		sd->sd_offset = (((off_t)pd->pd_meta[sdn]->disk_offset_high
 		    << 32) + pd->pd_meta[sdn]->disk_offset) * 512;
 		sd->sd_size = (((off_t)pd->pd_meta[sdn]->disk_sectors_high
 		    << 32) + pd->pd_meta[sdn]->disk_sectors) * 512;
 	}
 
 	if (resurrection) {
 		/* Stale disk, almost same as new. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_NEW);
 	} else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN) {
 		/* Failed disk. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_FAILED);
 	} else if (meta->disks[md_disk_pos].flags & PROMISE_F_REDIR) {
 		/* Rebuilding disk. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_REBUILD);
 		if (pd->pd_meta[sdn]->generation != meta->generation)
 			sd->sd_rebuild_pos = 0;
 		else {
 			sd->sd_rebuild_pos =
 			    (((off_t)pd->pd_meta[sdn]->disk_rebuild_high << 32) +
 			     pd->pd_meta[sdn]->disk_rebuild) * 512;
 		}
 	} else if (!(meta->disks[md_disk_pos].flags & PROMISE_F_ONLINE)) {
 		/* Rebuilding disk. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_NEW);
 	} else if (pd->pd_meta[sdn]->generation != meta->generation ||
 	    (meta->status & PROMISE_S_MARKED)) {
 		/* Stale disk or dirty volume (unclean shutdown). */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_STALE);
 	} else {
 		/* Up to date disk. */
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_ACTIVE);
 	}
 	g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 	    G_RAID_EVENT_SUBDISK);
 
 	return (resurrection);
 }
 
 static void
 g_raid_md_promise_refill(struct g_raid_softc *sc)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_object *md;
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_md_promise_pervolume *pv;
 	int update, updated, i, bad;
 
 	md = sc->sc_md;
 restart:
 	updated = 0;
 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		pv = vol->v_md_data;
 		if (!pv->pv_started || vol->v_stopping)
 			continue;
 
 		/* Search for subdisk that needs replacement. */
 		bad = 0;
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
 			if (sd->sd_state == G_RAID_SUBDISK_S_NONE ||
 			    sd->sd_state == G_RAID_SUBDISK_S_FAILED)
 			        bad = 1;
 		}
 		if (!bad)
 			continue;
 
 		G_RAID_DEBUG1(1, sc, "Volume %s is not complete, "
 		    "trying to refill.", vol->v_name);
 
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			/* Skip failed. */
 			if (disk->d_state < G_RAID_DISK_S_SPARE)
 				continue;
 			/* Skip already used by this volume. */
 			for (i = 0; i < vol->v_disks_count; i++) {
 				sd = &vol->v_subdisks[i];
 				if (sd->sd_disk == disk)
 					break;
 			}
 			if (i < vol->v_disks_count)
 				continue;
 
 			/* Try to use disk if it has empty extents. */
 			pd = disk->d_md_data;
 			if (pd->pd_subdisks < PROMISE_MAX_SUBDISKS) {
 				update =
 				    g_raid_md_promise_start_disk(disk, -1, vol);
 			} else
 				update = 0;
 			if (update) {
 				updated = 1;
 				g_raid_md_write_promise(md, vol, NULL, disk);
 				break;
 			}
 		}
 	}
 	if (updated)
 		goto restart;
 }
 
 static void
 g_raid_md_promise_start(struct g_raid_volume *vol)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_object *md;
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_md_promise_pervolume *pv;
 	struct promise_raid_conf *meta;
-	int i;
+	u_int i;
 
 	sc = vol->v_softc;
 	md = sc->sc_md;
 	pv = vol->v_md_data;
 	meta = pv->pv_meta;
 
 	vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
 	if (meta->type == PROMISE_T_RAID0)
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
 	else if (meta->type == PROMISE_T_RAID1) {
 		if (meta->array_width == 1)
 			vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
 		else
 			vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
 	} else if (meta->type == PROMISE_T_RAID3)
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID3;
 	else if (meta->type == PROMISE_T_RAID5) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
 		vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
 	} else if (meta->type == PROMISE_T_SPAN)
 		vol->v_raid_level = G_RAID_VOLUME_RL_CONCAT;
 	else if (meta->type == PROMISE_T_JBOD)
 		vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE;
 	else
 		vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
 	vol->v_strip_size = 512 << meta->stripe_shift; //ZZZ
 	vol->v_disks_count = meta->total_disks;
 	vol->v_mediasize = (off_t)meta->total_sectors * 512; //ZZZ
 	if (meta->total_sectors_high < 256) /* If value looks sane. */
 		vol->v_mediasize +=
 		    ((off_t)meta->total_sectors_high << 32) * 512; //ZZZ
 	vol->v_sectorsize = 512 * meta->sector_size;
 	for (i = 0; i < vol->v_disks_count; i++) {
 		sd = &vol->v_subdisks[i];
 		sd->sd_offset = (((off_t)meta->disk_offset_high << 32) +
 		    meta->disk_offset) * 512;
 		sd->sd_size = (((off_t)meta->disk_sectors_high << 32) +
 		    meta->disk_sectors) * 512;
 	}
 	g_raid_start_volume(vol);
 
 	/* Make all disks found till the moment take their places. */
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = disk->d_md_data;
 		for (i = 0; i < pd->pd_subdisks; i++) {
 			if (pd->pd_meta[i]->volume_id == meta->volume_id)
 				g_raid_md_promise_start_disk(disk, i, vol);
 		}
 	}
 
 	pv->pv_started = 1;
 	callout_stop(&pv->pv_start_co);
 	G_RAID_DEBUG1(0, sc, "Volume started.");
 	g_raid_md_write_promise(md, vol, NULL, NULL);
 
 	/* Pickup any STALE/SPARE disks to refill array if needed. */
 	g_raid_md_promise_refill(sc);
 
 	g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME);
 }
 
 static void
 g_raid_promise_go(void *arg)
 {
 	struct g_raid_volume *vol;
 	struct g_raid_softc *sc;
 	struct g_raid_md_promise_pervolume *pv;
 
 	vol = arg;
 	pv = vol->v_md_data;
 	sc = vol->v_softc;
 	if (!pv->pv_started) {
 		G_RAID_DEBUG1(0, sc, "Force volume start due to timeout.");
 		g_raid_event_send(vol, G_RAID_VOLUME_E_STARTMD,
 		    G_RAID_EVENT_VOLUME);
 	}
 }
 
 static void
 g_raid_md_promise_new_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_object *md;
 	struct promise_raid_conf *pdmeta;
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_md_promise_pervolume *pv;
 	struct g_raid_volume *vol;
 	int i;
 	char buf[33];
 
 	sc = disk->d_softc;
 	md = sc->sc_md;
 	pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
 
 	if (pd->pd_subdisks == 0) {
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
 		g_raid_md_promise_refill(sc);
 		return;
 	}
 
 	for (i = 0; i < pd->pd_subdisks; i++) {
 		pdmeta = pd->pd_meta[i];
 
 		/* Look for volume with matching ID. */
 		vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id);
 		if (vol == NULL) {
 			promise_meta_get_name(pdmeta, buf);
 			vol = g_raid_create_volume(sc, buf, pdmeta->array_number);
 			pv = malloc(sizeof(*pv), M_MD_PROMISE, M_WAITOK | M_ZERO);
 			pv->pv_id = pdmeta->volume_id;
 			vol->v_md_data = pv;
 			callout_init(&pv->pv_start_co, 1);
 			callout_reset(&pv->pv_start_co,
 			    g_raid_start_timeout * hz,
 			    g_raid_promise_go, vol);
 		} else
 			pv = vol->v_md_data;
 
 		/* If we haven't started yet - check metadata freshness. */
 		if (pv->pv_meta == NULL || !pv->pv_started) {
 			if (pv->pv_meta == NULL ||
 			    ((int16_t)(pdmeta->generation - pv->pv_generation)) > 0) {
 				G_RAID_DEBUG1(1, sc, "Newer disk");
 				if (pv->pv_meta != NULL)
 					free(pv->pv_meta, M_MD_PROMISE);
 				pv->pv_meta = promise_meta_copy(pdmeta);
 				pv->pv_generation = pv->pv_meta->generation;
 				pv->pv_disks_present = 1;
 			} else if (pdmeta->generation == pv->pv_generation) {
 				pv->pv_disks_present++;
 				G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
 				    pv->pv_disks_present,
 				    pv->pv_meta->total_disks);
 			} else {
 				G_RAID_DEBUG1(1, sc, "Older disk");
 			}
 		}
 	}
 
 	for (i = 0; i < pd->pd_subdisks; i++) {
 		pdmeta = pd->pd_meta[i];
 
 		/* Look for volume with matching ID. */
 		vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id);
 		if (vol == NULL)
 			continue;
 		pv = vol->v_md_data;
 
 		if (pv->pv_started) {
 			if (g_raid_md_promise_start_disk(disk, i, vol))
 				g_raid_md_write_promise(md, vol, NULL, NULL);
 		} else {
 			/* If we collected all needed disks - start array. */
 			if (pv->pv_disks_present == pv->pv_meta->total_disks)
 				g_raid_md_promise_start(vol);
 		}
 	}
 }
 
 static int
 g_raid_md_create_promise(struct g_raid_md_object *md, struct g_class *mp,
     struct g_geom **gp)
 {
 	struct g_geom *geom;
 	struct g_raid_softc *sc;
 
 	/* Search for existing node. */
 	LIST_FOREACH(geom, &mp->geom, geom) {
 		sc = geom->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_stopping != 0)
 			continue;
 		if (sc->sc_md->mdo_class != md->mdo_class)
 			continue;
 		break;
 	}
 	if (geom != NULL) {
 		*gp = geom;
 		return (G_RAID_MD_TASTE_EXISTING);
 	}
 
 	/* Create new one if not found. */
 	sc = g_raid_create_node(mp, "Promise", md);
 	if (sc == NULL)
 		return (G_RAID_MD_TASTE_FAIL);
 	md->mdo_softc = sc;
 	*gp = sc->sc_geom;
 	return (G_RAID_MD_TASTE_NEW);
 }
 
 static int
 g_raid_md_taste_promise(struct g_raid_md_object *md, struct g_class *mp,
                               struct g_consumer *cp, struct g_geom **gp)
 {
 	struct g_consumer *rcp;
 	struct g_provider *pp;
 	struct g_raid_softc *sc;
 	struct g_raid_disk *disk;
 	struct promise_raid_conf *meta, *metaarr[4];
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_geom *geom;
 	int i, j, result, len, subdisks;
 	char name[16];
 	uint16_t vendor;
 
 	G_RAID_DEBUG(1, "Tasting Promise on %s", cp->provider->name);
 	pp = cp->provider;
 
 	/* Read metadata from device. */
 	meta = NULL;
 	vendor = 0xffff;
 	g_topology_unlock();
 	len = 2;
 	if (pp->geom->rank == 1)
 		g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
 	subdisks = promise_meta_read(cp, metaarr);
 	g_topology_lock();
 	if (subdisks == 0) {
 		if (g_raid_aggressive_spare) {
 			if (vendor == 0x105a || vendor == 0x1002) {
 				G_RAID_DEBUG(1,
 				    "No Promise metadata, forcing spare.");
 				goto search;
 			} else {
 				G_RAID_DEBUG(1,
 				    "Promise/ATI vendor mismatch "
 				    "0x%04x != 0x105a/0x1002",
 				    vendor);
 			}
 		}
 		return (G_RAID_MD_TASTE_FAIL);
 	}
 
 	/* Metadata valid. Print it. */
 	for (i = 0; i < subdisks; i++)
 		g_raid_md_promise_print(metaarr[i]);
 
 	/* Purge meaningless (empty/spare) records. */
 	for (i = 0; i < subdisks; ) {
 		if (metaarr[i]->disk.flags & PROMISE_F_ASSIGNED) {
 			i++;
 			continue;
 		}
 		free(metaarr[i], M_MD_PROMISE);
 		for (j = i; j < subdisks - 1; j++)
 			metaarr[i] = metaarr[j + 1];
 		metaarr[subdisks - 1] = NULL;
 		subdisks--;
 	}
 
 search:
 	/* Search for matching node. */
 	sc = NULL;
 	LIST_FOREACH(geom, &mp->geom, geom) {
 		sc = geom->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_stopping != 0)
 			continue;
 		if (sc->sc_md->mdo_class != md->mdo_class)
 			continue;
 		break;
 	}
 
 	/* Found matching node. */
 	if (geom != NULL) {
 		G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
 		result = G_RAID_MD_TASTE_EXISTING;
 
 	} else { /* Not found matching node -- create one. */
 		result = G_RAID_MD_TASTE_NEW;
 		snprintf(name, sizeof(name), "Promise");
 		sc = g_raid_create_node(mp, name, md);
 		md->mdo_softc = sc;
 		geom = sc->sc_geom;
 	}
 
 	/* There is no return after this point, so we close passed consumer. */
 	g_access(cp, -1, 0, 0);
 
 	rcp = g_new_consumer(geom);
 	g_attach(rcp, pp);
 	if (g_access(rcp, 1, 1, 1) != 0)
 		; //goto fail1;
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO);
 	pd->pd_subdisks = subdisks;
 	for (i = 0; i < subdisks; i++)
 		pd->pd_meta[i] = metaarr[i];
 	disk = g_raid_create_disk(sc);
 	disk->d_md_data = (void *)pd;
 	disk->d_consumer = rcp;
 	rcp->private = disk;
 
 	g_raid_get_disk_info(disk);
 
 	g_raid_md_promise_new_disk(disk);
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	*gp = geom;
 	return (result);
 }
 
 static int
 g_raid_md_event_promise(struct g_raid_md_object *md,
     struct g_raid_disk *disk, u_int event)
 {
 	struct g_raid_softc *sc;
 
 	sc = md->mdo_softc;
 	if (disk == NULL)
 		return (-1);
 	switch (event) {
 	case G_RAID_DISK_E_DISCONNECTED:
 		/* Delete disk. */
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 		g_raid_destroy_disk(disk);
 		g_raid_md_promise_purge_volumes(sc);
 
 		/* Write updated metadata to all disks. */
 		g_raid_md_write_promise(md, NULL, NULL, NULL);
 
 		/* Check if anything left. */
 		if (g_raid_ndisks(sc, -1) == 0)
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_promise_refill(sc);
 		return (0);
 	}
 	return (-2);
 }
 
 static int
 g_raid_md_volume_event_promise(struct g_raid_md_object *md,
     struct g_raid_volume *vol, u_int event)
 {
 	struct g_raid_md_promise_pervolume *pv;
 
 	pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data;
 	switch (event) {
 	case G_RAID_VOLUME_E_STARTMD:
 		if (!pv->pv_started)
 			g_raid_md_promise_start(vol);
 		return (0);
 	}
 	return (-2);
 }
 
 static int
 g_raid_md_ctl_promise(struct g_raid_md_object *md,
     struct gctl_req *req)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol, *vol1;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk, *disks[PROMISE_MAX_DISKS];
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_md_promise_pervolume *pv;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	char arg[16];
 	const char *nodename, *verb, *volname, *levelname, *diskname;
 	char *tmp;
 	int *nargs, *force;
 	off_t esize, offs[PROMISE_MAX_DISKS], size, sectorsize, strip;
 	intmax_t *sizearg, *striparg;
 	int numdisks, i, len, level, qual;
 	int error;
 
 	sc = md->mdo_softc;
 	verb = gctl_get_param(req, "verb", NULL);
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	error = 0;
 	if (strcmp(verb, "label") == 0) {
 
 		if (*nargs < 4) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		volname = gctl_get_asciiparam(req, "arg1");
 		if (volname == NULL) {
 			gctl_error(req, "No volume name.");
 			return (-2);
 		}
 		levelname = gctl_get_asciiparam(req, "arg2");
 		if (levelname == NULL) {
 			gctl_error(req, "No RAID level.");
 			return (-3);
 		}
 		if (strcasecmp(levelname, "RAID5") == 0)
 			levelname = "RAID5-LA";
 		if (g_raid_volume_str2level(levelname, &level, &qual)) {
 			gctl_error(req, "Unknown RAID level '%s'.", levelname);
 			return (-4);
 		}
 		numdisks = *nargs - 3;
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (!g_raid_md_promise_supported(level, qual, numdisks,
 		    force ? *force : 0)) {
 			gctl_error(req, "Unsupported RAID level "
 			    "(0x%02x/0x%02x), or number of disks (%d).",
 			    level, qual, numdisks);
 			return (-5);
 		}
 
 		/* Search for disks, connect them and probe. */
 		size = INT64_MAX;
 		sectorsize = 0;
 		bzero(disks, sizeof(disks));
 		bzero(offs, sizeof(offs));
 		for (i = 0; i < numdisks; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i + 3);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -6;
 				break;
 			}
 			if (strcmp(diskname, "NONE") == 0)
 				continue;
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer != NULL && 
 				    disk->d_consumer->provider != NULL &&
 				    strcmp(disk->d_consumer->provider->name,
 				     diskname) == 0)
 					break;
 			}
 			if (disk != NULL) {
 				if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
 					gctl_error(req, "Disk '%s' is in a "
 					    "wrong state (%s).", diskname,
 					    g_raid_disk_state2str(disk->d_state));
 					error = -7;
 					break;
 				}
 				pd = disk->d_md_data;
 				if (pd->pd_subdisks >= PROMISE_MAX_SUBDISKS) {
 					gctl_error(req, "Disk '%s' already "
 					    "used by %d volumes.",
 					    diskname, pd->pd_subdisks);
 					error = -7;
 					break;
 				}
 				pp = disk->d_consumer->provider;
 				disks[i] = disk;
 				promise_meta_unused_range(pd->pd_meta,
 				    pd->pd_subdisks,
 				    pp->mediasize / pp->sectorsize,
 				    &offs[i], &esize);
 				size = MIN(size, (off_t)esize * pp->sectorsize);
 				sectorsize = MAX(sectorsize, pp->sectorsize);
 				continue;
 			}
 
 			g_topology_lock();
 			cp = g_raid_open_consumer(sc, diskname);
 			if (cp == NULL) {
 				gctl_error(req, "Can't open disk '%s'.",
 				    diskname);
 				g_topology_unlock();
 				error = -8;
 				break;
 			}
 			pp = cp->provider;
 			pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO);
 			disk = g_raid_create_disk(sc);
 			disk->d_md_data = (void *)pd;
 			disk->d_consumer = cp;
 			disks[i] = disk;
 			cp->private = disk;
 			g_topology_unlock();
 
 			g_raid_get_disk_info(disk);
 
 			/* Reserve some space for metadata. */
 			size = MIN(size, pp->mediasize - 131072llu * pp->sectorsize);
 			sectorsize = MAX(sectorsize, pp->sectorsize);
 		}
 		if (error != 0) {
 			for (i = 0; i < numdisks; i++) {
 				if (disks[i] != NULL &&
 				    disks[i]->d_state == G_RAID_DISK_S_NONE)
 					g_raid_destroy_disk(disks[i]);
 			}
 			return (error);
 		}
 
 		if (sectorsize <= 0) {
 			gctl_error(req, "Can't get sector size.");
 			return (-8);
 		}
 
 		/* Handle size argument. */
 		len = sizeof(*sizearg);
 		sizearg = gctl_get_param(req, "size", &len);
 		if (sizearg != NULL && len == sizeof(*sizearg) &&
 		    *sizearg > 0) {
 			if (*sizearg > size) {
 				gctl_error(req, "Size too big %lld > %lld.",
 				    (long long)*sizearg, (long long)size);
 				return (-9);
 			}
 			size = *sizearg;
 		}
 
 		/* Handle strip argument. */
 		strip = 131072;
 		len = sizeof(*striparg);
 		striparg = gctl_get_param(req, "strip", &len);
 		if (striparg != NULL && len == sizeof(*striparg) &&
 		    *striparg > 0) {
 			if (*striparg < sectorsize) {
 				gctl_error(req, "Strip size too small.");
 				return (-10);
 			}
 			if (*striparg % sectorsize != 0) {
 				gctl_error(req, "Incorrect strip size.");
 				return (-11);
 			}
 			strip = *striparg;
 		}
 
 		/* Round size down to strip or sector. */
 		if (level == G_RAID_VOLUME_RL_RAID1 ||
 		    level == G_RAID_VOLUME_RL_SINGLE ||
 		    level == G_RAID_VOLUME_RL_CONCAT)
 			size -= (size % sectorsize);
 		else if (level == G_RAID_VOLUME_RL_RAID1E &&
 		    (numdisks & 1) != 0)
 			size -= (size % (2 * strip));
 		else
 			size -= (size % strip);
 		if (size <= 0) {
 			gctl_error(req, "Size too small.");
 			return (-13);
 		}
 
 		/* We have all we need, create things: volume, ... */
 		pv = malloc(sizeof(*pv), M_MD_PROMISE, M_WAITOK | M_ZERO);
 		arc4rand(&pv->pv_id, sizeof(pv->pv_id), 0);
 		pv->pv_generation = 0;
 		pv->pv_started = 1;
 		vol = g_raid_create_volume(sc, volname, -1);
 		vol->v_md_data = pv;
 		vol->v_raid_level = level;
 		vol->v_raid_level_qualifier = qual;
 		vol->v_strip_size = strip;
 		vol->v_disks_count = numdisks;
 		if (level == G_RAID_VOLUME_RL_RAID0 ||
 		    level == G_RAID_VOLUME_RL_CONCAT ||
 		    level == G_RAID_VOLUME_RL_SINGLE)
 			vol->v_mediasize = size * numdisks;
 		else if (level == G_RAID_VOLUME_RL_RAID1)
 			vol->v_mediasize = size;
 		else if (level == G_RAID_VOLUME_RL_RAID3 ||
 		    level == G_RAID_VOLUME_RL_RAID5)
 			vol->v_mediasize = size * (numdisks - 1);
 		else { /* RAID1E */
 			vol->v_mediasize = ((size * numdisks) / strip / 2) *
 			    strip;
 		}
 		vol->v_sectorsize = sectorsize;
 		g_raid_start_volume(vol);
 
 		/* , and subdisks. */
 		for (i = 0; i < numdisks; i++) {
 			disk = disks[i];
 			sd = &vol->v_subdisks[i];
 			sd->sd_disk = disk;
 			sd->sd_offset = (off_t)offs[i] * 512;
 			sd->sd_size = size;
 			if (disk == NULL)
 				continue;
 			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 			g_raid_change_disk_state(disk,
 			    G_RAID_DISK_S_ACTIVE);
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_ACTIVE);
 			g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 			    G_RAID_EVENT_SUBDISK);
 		}
 
 		/* Write metadata based on created entities. */
 		G_RAID_DEBUG1(0, sc, "Array started.");
 		g_raid_md_write_promise(md, vol, NULL, NULL);
 
 		/* Pickup any STALE/SPARE disks to refill array if needed. */
 		g_raid_md_promise_refill(sc);
 
 		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 		    G_RAID_EVENT_VOLUME);
 		return (0);
 	}
 	if (strcmp(verb, "add") == 0) {
 
 		gctl_error(req, "`add` command is not applicable, "
 		    "use `label` instead.");
 		return (-99);
 	}
 	if (strcmp(verb, "delete") == 0) {
 
 		nodename = gctl_get_asciiparam(req, "arg0");
 		if (nodename != NULL && strcasecmp(sc->sc_name, nodename) != 0)
 			nodename = NULL;
 
 		/* Full node destruction. */
 		if (*nargs == 1 && nodename != NULL) {
 			/* Check if some volume is still open. */
 			force = gctl_get_paraml(req, "force", sizeof(*force));
 			if (force != NULL && *force == 0 &&
 			    g_raid_nopens(sc) != 0) {
 				gctl_error(req, "Some volume is still open.");
 				return (-4);
 			}
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer)
 					promise_meta_erase(disk->d_consumer);
 			}
 			g_raid_destroy_node(sc, 0);
 			return (0);
 		}
 
 		/* Destroy specified volume. If it was last - all node. */
 		if (*nargs > 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		volname = gctl_get_asciiparam(req,
 		    nodename != NULL ? "arg1" : "arg0");
 		if (volname == NULL) {
 			gctl_error(req, "No volume name.");
 			return (-2);
 		}
 
 		/* Search for volume. */
 		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 			if (strcmp(vol->v_name, volname) == 0)
 				break;
 			pp = vol->v_provider;
 			if (pp == NULL)
 				continue;
 			if (strcmp(pp->name, volname) == 0)
 				break;
 			if (strncmp(pp->name, "raid/", 5) == 0 &&
 			    strcmp(pp->name + 5, volname) == 0)
 				break;
 		}
 		if (vol == NULL) {
 			i = strtol(volname, &tmp, 10);
 			if (verb != volname && tmp[0] == 0) {
 				TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 					if (vol->v_global_id == i)
 						break;
 				}
 			}
 		}
 		if (vol == NULL) {
 			gctl_error(req, "Volume '%s' not found.", volname);
 			return (-3);
 		}
 
 		/* Check if volume is still open. */
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (force != NULL && *force == 0 &&
 		    vol->v_provider_open != 0) {
 			gctl_error(req, "Volume is still open.");
 			return (-4);
 		}
 
 		/* Destroy volume and potentially node. */
 		i = 0;
 		TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
 			i++;
 		if (i >= 2) {
 			g_raid_destroy_volume(vol);
 			g_raid_md_promise_purge_disks(sc);
 			g_raid_md_write_promise(md, NULL, NULL, NULL);
 		} else {
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer)
 					promise_meta_erase(disk->d_consumer);
 			}
 			g_raid_destroy_node(sc, 0);
 		}
 		return (0);
 	}
 	if (strcmp(verb, "remove") == 0 ||
 	    strcmp(verb, "fail") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		for (i = 1; i < *nargs; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -2;
 				break;
 			}
 			if (strncmp(diskname, "/dev/", 5) == 0)
 				diskname += 5;
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer != NULL && 
 				    disk->d_consumer->provider != NULL &&
 				    strcmp(disk->d_consumer->provider->name,
 				     diskname) == 0)
 					break;
 			}
 			if (disk == NULL) {
 				gctl_error(req, "Disk '%s' not found.",
 				    diskname);
 				error = -3;
 				break;
 			}
 
 			if (strcmp(verb, "fail") == 0) {
 				g_raid_md_fail_disk_promise(md, NULL, disk);
 				continue;
 			}
 
 			/* Erase metadata on deleting disk and destroy it. */
 			promise_meta_erase(disk->d_consumer);
 			g_raid_destroy_disk(disk);
 		}
 		g_raid_md_promise_purge_volumes(sc);
 
 		/* Write updated metadata to remaining disks. */
 		g_raid_md_write_promise(md, NULL, NULL, NULL);
 
 		/* Check if anything left. */
 		if (g_raid_ndisks(sc, -1) == 0)
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_promise_refill(sc);
 		return (error);
 	}
 	if (strcmp(verb, "insert") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		for (i = 1; i < *nargs; i++) {
 			/* Get disk name. */
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -3;
 				break;
 			}
 
 			/* Try to find provider with specified name. */
 			g_topology_lock();
 			cp = g_raid_open_consumer(sc, diskname);
 			if (cp == NULL) {
 				gctl_error(req, "Can't open disk '%s'.",
 				    diskname);
 				g_topology_unlock();
 				error = -4;
 				break;
 			}
 			pp = cp->provider;
 			g_topology_unlock();
 
 			pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO);
 
 			disk = g_raid_create_disk(sc);
 			disk->d_consumer = cp;
 			disk->d_md_data = (void *)pd;
 			cp->private = disk;
 
 			g_raid_get_disk_info(disk);
 
 			/* Welcome the "new" disk. */
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
 			promise_meta_write_spare(cp);
 			g_raid_md_promise_refill(sc);
 		}
 		return (error);
 	}
 	return (-100);
 }
 
 static int
 g_raid_md_write_promise(struct g_raid_md_object *md, struct g_raid_volume *tvol,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_md_promise_pervolume *pv;
 	struct promise_raid_conf *meta;
 	off_t rebuild_lba64;
 	int i, j, pos, rebuild;
 
 	sc = md->mdo_softc;
 
 	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 		return (0);
 
 	/* Generate new per-volume metadata for affected volumes. */
 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		if (vol->v_stopping)
 			continue;
 
 		/* Skip volumes not related to specified targets. */
 		if (tvol != NULL && vol != tvol)
 			continue;
 		if (tsd != NULL && vol != tsd->sd_volume)
 			continue;
 		if (tdisk != NULL) {
 			for (i = 0; i < vol->v_disks_count; i++) {
 				if (vol->v_subdisks[i].sd_disk == tdisk)
 					break;
 			}
 			if (i >= vol->v_disks_count)
 				continue;
 		}
 
 		pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data;
 		pv->pv_generation++;
 
 		meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK | M_ZERO);
 		if (pv->pv_meta != NULL)
 			memcpy(meta, pv->pv_meta, sizeof(*meta));
 		memcpy(meta->promise_id, PROMISE_MAGIC,
 		    sizeof(PROMISE_MAGIC) - 1);
 		meta->dummy_0 = 0x00020000;
 		meta->integrity = PROMISE_I_VALID;
 
 		meta->generation = pv->pv_generation;
 		meta->status = PROMISE_S_VALID | PROMISE_S_ONLINE |
 		    PROMISE_S_INITED | PROMISE_S_READY;
 		if (vol->v_state <= G_RAID_VOLUME_S_DEGRADED)
 			meta->status |= PROMISE_S_DEGRADED;
 		if (vol->v_dirty)
 			meta->status |= PROMISE_S_MARKED; /* XXX: INVENTED! */
 		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0 ||
 		    vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE)
 			meta->type = PROMISE_T_RAID0;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
 		    vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 			meta->type = PROMISE_T_RAID1;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3)
 			meta->type = PROMISE_T_RAID3;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
 			meta->type = PROMISE_T_RAID5;
 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT)
 			meta->type = PROMISE_T_SPAN;
 		else
 			meta->type = PROMISE_T_JBOD;
 		meta->total_disks = vol->v_disks_count;
 		meta->stripe_shift = ffs(vol->v_strip_size / 1024);
 		meta->array_width = vol->v_disks_count;
 		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
 		    vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
 			meta->array_width /= 2;
 		meta->array_number = vol->v_global_id;
 		meta->total_sectors = vol->v_mediasize / 512;
 		meta->total_sectors_high = (vol->v_mediasize / 512) >> 32;
 		meta->sector_size = vol->v_sectorsize / 512;
 		meta->cylinders = meta->total_sectors / (255 * 63) - 1;
 		meta->heads = 254;
 		meta->sectors = 63;
 		meta->volume_id = pv->pv_id;
 		rebuild_lba64 = UINT64_MAX;
 		rebuild = 0;
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
 			/* For RAID0+1 we need to translate order. */
 			pos = promise_meta_translate_disk(vol, i);
 			meta->disks[pos].flags = PROMISE_F_VALID |
 			    PROMISE_F_ASSIGNED;
 			if (sd->sd_state == G_RAID_SUBDISK_S_NONE) {
 				meta->disks[pos].flags |= 0;
 			} else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED) {
 				meta->disks[pos].flags |=
 				    PROMISE_F_DOWN | PROMISE_F_REDIR;
 			} else if (sd->sd_state <= G_RAID_SUBDISK_S_REBUILD) {
 				meta->disks[pos].flags |=
 				    PROMISE_F_ONLINE | PROMISE_F_REDIR;
 				if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) {
 					rebuild_lba64 = MIN(rebuild_lba64,
 					    sd->sd_rebuild_pos / 512);
 				} else
 					rebuild_lba64 = 0;
 				rebuild = 1;
 			} else {
 				meta->disks[pos].flags |= PROMISE_F_ONLINE;
 				if (sd->sd_state < G_RAID_SUBDISK_S_ACTIVE) {
 					meta->status |= PROMISE_S_MARKED;
 					if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
 						rebuild_lba64 = MIN(rebuild_lba64,
 						    sd->sd_rebuild_pos / 512);
 					} else
 						rebuild_lba64 = 0;
 				}
 			}
 			if (pv->pv_meta != NULL) {
 				meta->disks[pos].id = pv->pv_meta->disks[pos].id;
 			} else {
 				meta->disks[pos].number = i * 2;
 				arc4rand(&meta->disks[pos].id,
 				    sizeof(meta->disks[pos].id), 0);
 			}
 		}
 		promise_meta_put_name(meta, vol->v_name);
 
 		/* Try to mimic AMD BIOS rebuild/resync behavior. */
 		if (rebuild_lba64 != UINT64_MAX) {
 			if (rebuild)
 				meta->magic_3 = 0x03040010UL; /* Rebuild? */
 			else
 				meta->magic_3 = 0x03040008UL; /* Resync? */
 			/* Translate from per-disk to per-volume LBA. */
 			if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
 			    vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) {
 				rebuild_lba64 *= meta->array_width;
 			} else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3 ||
 			    vol->v_raid_level == G_RAID_VOLUME_RL_RAID5) {
 				rebuild_lba64 *= meta->array_width - 1;
 			} else
 				rebuild_lba64 = 0;
 		} else
 			meta->magic_3 = 0x03000000UL;
 		meta->rebuild_lba64 = rebuild_lba64;
 		meta->magic_4 = 0x04010101UL;
 
 		/* Replace per-volume metadata with new. */
 		if (pv->pv_meta != NULL)
 			free(pv->pv_meta, M_MD_PROMISE);
 		pv->pv_meta = meta;
 
 		/* Copy new metadata to the disks, adding or replacing old. */
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
 			disk = sd->sd_disk;
 			if (disk == NULL)
 				continue;
 			/* For RAID0+1 we need to translate order. */
 			pos = promise_meta_translate_disk(vol, i);
 			pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
 			for (j = 0; j < pd->pd_subdisks; j++) {
 				if (pd->pd_meta[j]->volume_id == meta->volume_id)
 					break;
 			}
 			if (j == pd->pd_subdisks)
 				pd->pd_subdisks++;
 			if (pd->pd_meta[j] != NULL)
 				free(pd->pd_meta[j], M_MD_PROMISE);
 			pd->pd_meta[j] = promise_meta_copy(meta);
 			pd->pd_meta[j]->disk = meta->disks[pos];
 			pd->pd_meta[j]->disk.number = pos;
 			pd->pd_meta[j]->disk_offset_high =
 			    (sd->sd_offset / 512) >> 32;
 			pd->pd_meta[j]->disk_offset = sd->sd_offset / 512;
 			pd->pd_meta[j]->disk_sectors_high =
 			    (sd->sd_size / 512) >> 32;
 			pd->pd_meta[j]->disk_sectors = sd->sd_size / 512;
 			if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) {
 				pd->pd_meta[j]->disk_rebuild_high =
 				    (sd->sd_rebuild_pos / 512) >> 32;
 				pd->pd_meta[j]->disk_rebuild =
 				    sd->sd_rebuild_pos / 512;
 			} else if (sd->sd_state < G_RAID_SUBDISK_S_REBUILD) {
 				pd->pd_meta[j]->disk_rebuild_high = 0;
 				pd->pd_meta[j]->disk_rebuild = 0;
 			} else {
 				pd->pd_meta[j]->disk_rebuild_high = UINT32_MAX;
 				pd->pd_meta[j]->disk_rebuild = UINT32_MAX;
 			}
 			pd->pd_updated = 1;
 		}
 	}
 
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
 		if (disk->d_state != G_RAID_DISK_S_ACTIVE)
 			continue;
 		if (!pd->pd_updated)
 			continue;
 		G_RAID_DEBUG(1, "Writing Promise metadata to %s",
 		    g_raid_get_diskname(disk));
 		for (i = 0; i < pd->pd_subdisks; i++)
 			g_raid_md_promise_print(pd->pd_meta[i]);
 		promise_meta_write(disk->d_consumer,
 		    pd->pd_meta, pd->pd_subdisks);
 		pd->pd_updated = 0;
 	}
 
 	return (0);
 }
 
 static int
 g_raid_md_fail_disk_promise(struct g_raid_md_object *md,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_promise_perdisk *pd;
 	struct g_raid_subdisk *sd;
 	int i, pos;
 
 	sc = md->mdo_softc;
 	pd = (struct g_raid_md_promise_perdisk *)tdisk->d_md_data;
 
 	/* We can't fail disk that is not a part of array now. */
 	if (tdisk->d_state != G_RAID_DISK_S_ACTIVE)
 		return (-1);
 
 	/*
 	 * Mark disk as failed in metadata and try to write that metadata
 	 * to the disk itself to prevent it's later resurrection as STALE.
 	 */
 	if (pd->pd_subdisks > 0 && tdisk->d_consumer != NULL)
 		G_RAID_DEBUG(1, "Writing Promise metadata to %s",
 		    g_raid_get_diskname(tdisk));
 	for (i = 0; i < pd->pd_subdisks; i++) {
 		pd->pd_meta[i]->disk.flags |=
 		    PROMISE_F_DOWN | PROMISE_F_REDIR;
 		pos = pd->pd_meta[i]->disk.number;
 		if (pos >= 0 && pos < PROMISE_MAX_DISKS) {
 			pd->pd_meta[i]->disks[pos].flags |=
 			    PROMISE_F_DOWN | PROMISE_F_REDIR;
 		}
 		g_raid_md_promise_print(pd->pd_meta[i]);
 	}
 	if (tdisk->d_consumer != NULL)
 		promise_meta_write(tdisk->d_consumer,
 		    pd->pd_meta, pd->pd_subdisks);
 
 	/* Change states. */
 	g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
 	TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_FAILED);
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
 		    G_RAID_EVENT_SUBDISK);
 	}
 
 	/* Write updated metadata to remaining disks. */
 	g_raid_md_write_promise(md, NULL, NULL, tdisk);
 
 	g_raid_md_promise_refill(sc);
 	return (0);
 }
 
 static int
 g_raid_md_free_disk_promise(struct g_raid_md_object *md,
     struct g_raid_disk *disk)
 {
 	struct g_raid_md_promise_perdisk *pd;
 	int i;
 
 	pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
 	for (i = 0; i < pd->pd_subdisks; i++) {
 		if (pd->pd_meta[i] != NULL) {
 			free(pd->pd_meta[i], M_MD_PROMISE);
 			pd->pd_meta[i] = NULL;
 		}
 	}
 	free(pd, M_MD_PROMISE);
 	disk->d_md_data = NULL;
 	return (0);
 }
 
 static int
 g_raid_md_free_volume_promise(struct g_raid_md_object *md,
     struct g_raid_volume *vol)
 {
 	struct g_raid_md_promise_pervolume *pv;
 
 	pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data;
 	if (pv && pv->pv_meta != NULL) {
 		free(pv->pv_meta, M_MD_PROMISE);
 		pv->pv_meta = NULL;
 	}
 	if (pv && !pv->pv_started) {
 		pv->pv_started = 1;
 		callout_stop(&pv->pv_start_co);
 	}
 	free(pv, M_MD_PROMISE);
 	vol->v_md_data = NULL;
 	return (0);
 }
 
 static int
 g_raid_md_free_promise(struct g_raid_md_object *md)
 {
 
 	return (0);
 }
 
 G_RAID_MD_DECLARE(promise, "Promise");
Index: stable/9/sys/geom/raid/md_sii.c
===================================================================
--- stable/9/sys/geom/raid/md_sii.c	(revision 299397)
+++ stable/9/sys/geom/raid/md_sii.c	(revision 299398)
@@ -1,1670 +1,1670 @@
 /*-
  * Copyright (c) 2011 Alexander Motin <mav@FreeBSD.org>
  * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kobj.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <geom/geom.h>
 #include "geom/raid/g_raid.h"
 #include "g_raid_md_if.h"
 
 static MALLOC_DEFINE(M_MD_SII, "md_sii_data", "GEOM_RAID SiI metadata");
 
 struct sii_raid_conf {
 	uint16_t	ata_params_00_53[54];
 	uint64_t	total_sectors;		/* 54 - 57 */
 	uint16_t	ata_params_58_81[72];
 	uint16_t	product_id;		/* 130 */
 	uint16_t	vendor_id;		/* 131 */
 	uint16_t	version_minor;		/* 132 */
 	uint16_t	version_major;		/* 133 */
 	uint8_t		timestamp[6];		/* 134 - 136 */
 	uint16_t	strip_sectors;		/* 137 */
 	uint16_t	dummy_2;
 	uint8_t		disk_number;		/* 139 */
 	uint8_t		type;
 #define SII_T_RAID0             0x00
 #define SII_T_RAID1             0x01
 #define SII_T_RAID01            0x02
 #define SII_T_SPARE             0x03
 #define SII_T_CONCAT            0x04
 #define SII_T_RAID5             0x10
 #define SII_T_RESERVED          0xfd
 #define SII_T_JBOD              0xff
 
 	uint8_t		raid0_disks;		/* 140 */
 	uint8_t		raid0_ident;
 	uint8_t		raid1_disks;		/* 141 */
 	uint8_t		raid1_ident;
 	uint64_t	rebuild_lba;		/* 142 - 145 */
 	uint32_t	generation;		/* 146 - 147 */
 	uint8_t		disk_status;		/* 148 */
 #define SII_S_CURRENT           0x01
 #define SII_S_REBUILD           0x02
 #define SII_S_DROPPED           0x03
 #define SII_S_REMOVED           0x04
 
 	uint8_t		raid_status;
 #define SII_S_ONLINE            0x01
 #define SII_S_AVAILABLE         0x02
 
 	uint8_t		raid_location;		/* 149 */
 	uint8_t		disk_location;
 	uint8_t		auto_rebuild;		/* 150 */
 #define SII_R_REBUILD           0x00
 #define SII_R_NOREBUILD         0xff
 
 	uint8_t		dummy_3;
 	uint8_t		name[16];		/* 151 - 158 */
 	uint16_t	checksum;		/* 159 */
 	uint16_t	ata_params_160_255[96];
 } __packed;
 
 struct g_raid_md_sii_perdisk {
 	struct sii_raid_conf	*pd_meta;
 	int			 pd_disk_pos;
 	off_t			 pd_disk_size;
 };
 
 struct g_raid_md_sii_object {
 	struct g_raid_md_object	 mdio_base;
 	uint8_t			 mdio_timestamp[6];
 	uint8_t			 mdio_location;
 	uint32_t		 mdio_generation;
 	struct sii_raid_conf	*mdio_meta;
 	struct callout		 mdio_start_co;	/* STARTING state timer. */
 	int			 mdio_total_disks;
 	int			 mdio_disks_present;
 	int			 mdio_started;
 	int			 mdio_incomplete;
 	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
 };
 
 static g_raid_md_create_t g_raid_md_create_sii;
 static g_raid_md_taste_t g_raid_md_taste_sii;
 static g_raid_md_event_t g_raid_md_event_sii;
 static g_raid_md_ctl_t g_raid_md_ctl_sii;
 static g_raid_md_write_t g_raid_md_write_sii;
 static g_raid_md_fail_disk_t g_raid_md_fail_disk_sii;
 static g_raid_md_free_disk_t g_raid_md_free_disk_sii;
 static g_raid_md_free_t g_raid_md_free_sii;
 
 static kobj_method_t g_raid_md_sii_methods[] = {
 	KOBJMETHOD(g_raid_md_create,	g_raid_md_create_sii),
 	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_sii),
 	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_sii),
 	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_sii),
 	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_sii),
 	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_sii),
 	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_sii),
 	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_sii),
 	{ 0, 0 }
 };
 
 static struct g_raid_md_class g_raid_md_sii_class = {
 	"SiI",
 	g_raid_md_sii_methods,
 	sizeof(struct g_raid_md_sii_object),
 	.mdc_enable = 1,
 	.mdc_priority = 100
 };
 
 static void
 g_raid_md_sii_print(struct sii_raid_conf *meta)
 {
 
 	if (g_raid_debug < 1)
 		return;
 
 	printf("********* ATA SiI RAID Metadata *********\n");
 	printf("total_sectors       %llu\n",
 	    (long long unsigned)meta->total_sectors);
 	printf("product_id          0x%04x\n", meta->product_id);
 	printf("vendor_id           0x%04x\n", meta->vendor_id);
 	printf("version_minor       0x%04x\n", meta->version_minor);
 	printf("version_major       0x%04x\n", meta->version_major);
 	printf("timestamp           0x%02x%02x%02x%02x%02x%02x\n",
 	    meta->timestamp[5], meta->timestamp[4], meta->timestamp[3],
 	    meta->timestamp[2], meta->timestamp[1], meta->timestamp[0]);
 	printf("strip_sectors       %d\n", meta->strip_sectors);
 	printf("disk_number         %d\n", meta->disk_number);
 	printf("type                0x%02x\n", meta->type);
 	printf("raid0_disks         %d\n", meta->raid0_disks);
 	printf("raid0_ident         %d\n", meta->raid0_ident);
 	printf("raid1_disks         %d\n", meta->raid1_disks);
 	printf("raid1_ident         %d\n", meta->raid1_ident);
 	printf("rebuild_lba         %llu\n",
 	    (long long unsigned)meta->rebuild_lba);
 	printf("generation          %d\n", meta->generation);
 	printf("disk_status         %d\n", meta->disk_status);
 	printf("raid_status         %d\n", meta->raid_status);
 	printf("raid_location       %d\n", meta->raid_location);
 	printf("disk_location       %d\n", meta->disk_location);
 	printf("auto_rebuild        %d\n", meta->auto_rebuild);
 	printf("name                <%.16s>\n", meta->name);
 	printf("checksum            0x%04x\n", meta->checksum);
 	printf("=================================================\n");
 }
 
 static struct sii_raid_conf *
 sii_meta_copy(struct sii_raid_conf *meta)
 {
 	struct sii_raid_conf *nmeta;
 
 	nmeta = malloc(sizeof(*meta), M_MD_SII, M_WAITOK);
 	memcpy(nmeta, meta, sizeof(*meta));
 	return (nmeta);
 }
 
 static int
 sii_meta_total_disks(struct sii_raid_conf *meta)
 {
 
 	switch (meta->type) {
 	case SII_T_RAID0:
 	case SII_T_RAID5:
 	case SII_T_CONCAT:
 		return (meta->raid0_disks);
 	case SII_T_RAID1:
 		return (meta->raid1_disks);
 	case SII_T_RAID01:
 		return (meta->raid0_disks * meta->raid1_disks);
 	case SII_T_SPARE:
 	case SII_T_JBOD:
 		return (1);
 	}
 	return (0);
 }
 
 static int
 sii_meta_disk_pos(struct sii_raid_conf *meta, struct sii_raid_conf *pdmeta)
 {
 
 	if (pdmeta->type == SII_T_SPARE)
 		return (-3);
 
 	if (memcmp(&meta->timestamp, &pdmeta->timestamp, 6) != 0)
 		return (-1);
 
 	switch (pdmeta->type) {
 	case SII_T_RAID0:
 	case SII_T_RAID1:
 	case SII_T_RAID5:
 	case SII_T_CONCAT:
 		return (pdmeta->disk_number);
 	case SII_T_RAID01:
 		return (pdmeta->raid1_ident * pdmeta->raid1_disks +
 		    pdmeta->raid0_ident);
 	case SII_T_JBOD:
 		return (0);
 	}
 	return (-1);
 }
 
 static void
 sii_meta_get_name(struct sii_raid_conf *meta, char *buf)
 {
 	int i;
 
 	strncpy(buf, meta->name, 16);
 	buf[16] = 0;
 	for (i = 15; i >= 0; i--) {
 		if (buf[i] > 0x20)
 			break;
 		buf[i] = 0;
 	}
 }
 
 static void
 sii_meta_put_name(struct sii_raid_conf *meta, char *buf)
 {
 
 	memset(meta->name, 0x20, 16);
 	memcpy(meta->name, buf, MIN(strlen(buf), 16));
 }
 
 static struct sii_raid_conf *
 sii_meta_read(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct sii_raid_conf *meta;
 	char *buf;
 	int error, i;
 	uint16_t checksum, *ptr;
 
 	pp = cp->provider;
 
 	/* Read the anchor sector. */
 	buf = g_read_data(cp,
 	    pp->mediasize - pp->sectorsize, pp->sectorsize, &error);
 	if (buf == NULL) {
 		G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
 		    pp->name, error);
 		return (NULL);
 	}
 	meta = (struct sii_raid_conf *)buf;
 
 	/* Check vendor ID. */
 	if (meta->vendor_id != 0x1095) {
 		G_RAID_DEBUG(1, "SiI vendor ID check failed on %s (0x%04x)",
 		    pp->name, meta->vendor_id);
 		g_free(buf);
 		return (NULL);
 	}
 
 	/* Check metadata major version. */
 	if (meta->version_major != 2) {
 		G_RAID_DEBUG(1, "SiI version check failed on %s (%d.%d)",
 		    pp->name, meta->version_major, meta->version_minor);
 		g_free(buf);
 		return (NULL);
 	}
 	meta = malloc(sizeof(*meta), M_MD_SII, M_WAITOK);
 	memcpy(meta, buf, min(sizeof(*meta), pp->sectorsize));
 	g_free(buf);
 
 	/* Check metadata checksum. */
 	for (checksum = 0, ptr = (uint16_t *)meta, i = 0; i <= 159; i++)
 		checksum += *ptr++;
 	if (checksum != 0) {
 		G_RAID_DEBUG(1, "SiI checksum check failed on %s", pp->name);
 		free(meta, M_MD_SII);
 		return (NULL);
 	}
 
 	/* Check raid type. */
 	if (meta->type != SII_T_RAID0 && meta->type != SII_T_RAID1 &&
 	    meta->type != SII_T_RAID01 && meta->type != SII_T_SPARE &&
 	    meta->type != SII_T_RAID5 && meta->type != SII_T_CONCAT &&
 	    meta->type != SII_T_JBOD) {
 		G_RAID_DEBUG(1, "SiI unknown RAID level on %s (0x%02x)",
 		    pp->name, meta->type);
 		free(meta, M_MD_SII);
 		return (NULL);
 	}
 
 	return (meta);
 }
 
 static int
 sii_meta_write(struct g_consumer *cp, struct sii_raid_conf *meta)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error, i;
 	uint16_t checksum, *ptr;
 
 	pp = cp->provider;
 
 	/* Recalculate checksum for case if metadata were changed. */
 	meta->checksum = 0;
 	for (checksum = 0, ptr = (uint16_t *)meta, i = 0; i < 159; i++)
 		checksum += *ptr++;
 	meta->checksum -= checksum;
 
 	/* Create and fill buffer. */
 	buf = malloc(pp->sectorsize, M_MD_SII, M_WAITOK | M_ZERO);
 	memcpy(buf, meta, sizeof(*meta));
 
 	/* Write 4 copies of metadata. */
 	for (i = 0; i < 4; i++) {
 		error = g_write_data(cp,
 		    pp->mediasize - (pp->sectorsize * (1 + 0x200 * i)),
 		    buf, pp->sectorsize);
 		if (error != 0) {
 			G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
 			    pp->name, error);
 			break;
 		}
 	}
 
 	free(buf, M_MD_SII);
 	return (error);
 }
 
 static int
 sii_meta_erase(struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error, i;
 
 	pp = cp->provider;
 	buf = malloc(pp->sectorsize, M_MD_SII, M_WAITOK | M_ZERO);
 	/* Write 4 copies of metadata. */
 	for (i = 0; i < 4; i++) {
 		error = g_write_data(cp,
 		    pp->mediasize - (pp->sectorsize * (1 + 0x200 * i)),
 		    buf, pp->sectorsize);
 		if (error != 0) {
 			G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
 			    pp->name, error);
 		}
 	}
 	free(buf, M_MD_SII);
 	return (error);
 }
 
 static int
 sii_meta_write_spare(struct g_consumer *cp)
 {
 	struct sii_raid_conf *meta;
 	int error;
 
 	meta = malloc(sizeof(*meta), M_MD_SII, M_WAITOK | M_ZERO);
 	meta->total_sectors = cp->provider->mediasize /
 	    cp->provider->sectorsize - 0x800;
 	meta->vendor_id = 0x1095;
 	meta->version_minor = 0;
 	meta->version_major = 2;
 	meta->timestamp[0] = arc4random();
 	meta->timestamp[1] = arc4random();
 	meta->timestamp[2] = arc4random();
 	meta->timestamp[3] = arc4random();
 	meta->timestamp[4] = arc4random();
 	meta->timestamp[5] = arc4random();
 	meta->type = SII_T_SPARE;
 	meta->generation = 1;
 	meta->raid1_ident = 0xff;
 	meta->raid_location = arc4random();
 	error = sii_meta_write(cp, meta);
 	free(meta, M_MD_SII);
 	return (error);
 }
 
 static struct g_raid_disk *
 g_raid_md_sii_get_disk(struct g_raid_softc *sc, int id)
 {
 	struct g_raid_disk	*disk;
 	struct g_raid_md_sii_perdisk *pd;
 
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 		if (pd->pd_disk_pos == id)
 			break;
 	}
 	return (disk);
 }
 
 static int
 g_raid_md_sii_supported(int level, int qual, int disks, int force)
 {
 
 	if (disks > 8)
 		return (0);
 	switch (level) {
 	case G_RAID_VOLUME_RL_RAID0:
 		if (disks < 1)
 			return (0);
 		if (!force && (disks < 2 || disks > 6))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1:
 		if (disks < 1)
 			return (0);
 		if (!force && (disks != 2))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID1E:
 		if (disks < 2)
 			return (0);
 		if (disks % 2 != 0)
 			return (0);
 		if (!force && (disks < 4))
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_SINGLE:
 		if (disks != 1)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_CONCAT:
 		if (disks < 2)
 			return (0);
 		break;
 	case G_RAID_VOLUME_RL_RAID5:
 		if (disks < 3)
 			return (0);
 		if (qual != G_RAID_VOLUME_RLQ_R5LS)
 			return (0);
 		break;
 	default:
 		return (0);
 	}
 	if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
 		return (0);
 	return (1);
 }
 
 static int
 g_raid_md_sii_start_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd, *tmpsd;
 	struct g_raid_disk *olddisk, *tmpdisk;
 	struct g_raid_md_object *md;
 	struct g_raid_md_sii_object *mdi;
 	struct g_raid_md_sii_perdisk *pd, *oldpd;
 	struct sii_raid_conf *meta;
 	int disk_pos, resurrection = 0;
 
 	sc = disk->d_softc;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_sii_object *)md;
 	meta = mdi->mdio_meta;
 	pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 	olddisk = NULL;
 
 	/* Find disk position in metadata by it's serial. */
 	if (pd->pd_meta != NULL)
 		disk_pos = sii_meta_disk_pos(meta, pd->pd_meta);
 	else
 		disk_pos = -3;
 	if (disk_pos < 0) {
 		G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
 		/* If we are in the start process, that's all for now. */
 		if (!mdi->mdio_started)
 			goto nofit;
 		/*
 		 * If we have already started - try to get use of the disk.
 		 * Try to replace OFFLINE disks first, then FAILED.
 		 */
 		TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) {
 			if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE &&
 			    tmpdisk->d_state != G_RAID_DISK_S_FAILED)
 				continue;
 			/* Make sure this disk is big enough. */
 			TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
 				if (sd->sd_offset + sd->sd_size + 512 >
 				    pd->pd_disk_size) {
 					G_RAID_DEBUG1(1, sc,
 					    "Disk too small (%ju < %ju)",
 					    pd->pd_disk_size,
 					    sd->sd_offset + sd->sd_size + 512);
 					break;
 				}
 			}
 			if (sd != NULL)
 				continue;
 			if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) {
 				olddisk = tmpdisk;
 				break;
 			} else if (olddisk == NULL)
 				olddisk = tmpdisk;
 		}
 		if (olddisk == NULL) {
 nofit:
 			if (disk_pos == -3 || pd->pd_disk_pos == -3) {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_SPARE);
 				return (1);
 			} else {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_STALE);
 				return (0);
 			}
 		}
 		oldpd = (struct g_raid_md_sii_perdisk *)olddisk->d_md_data;
 		disk_pos = oldpd->pd_disk_pos;
 		resurrection = 1;
 	}
 
 	if (olddisk == NULL) {
 		/* Find placeholder by position. */
 		olddisk = g_raid_md_sii_get_disk(sc, disk_pos);
 		if (olddisk == NULL)
 			panic("No disk at position %d!", disk_pos);
 		if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) {
 			G_RAID_DEBUG1(1, sc, "More then one disk for pos %d",
 			    disk_pos);
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
 			return (0);
 		}
 		oldpd = (struct g_raid_md_sii_perdisk *)olddisk->d_md_data;
 	}
 
 	/* Replace failed disk or placeholder with new disk. */
 	TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) {
 		TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next);
 		TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 		sd->sd_disk = disk;
 	}
 	oldpd->pd_disk_pos = -2;
 	pd->pd_disk_pos = disk_pos;
 
 	/* If it was placeholder -- destroy it. */
 	if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) {
 		g_raid_destroy_disk(olddisk);
 	} else {
 		/* Otherwise, make it STALE_FAILED. */
 		g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED);
 	}
 
 	/* Welcome the new disk. */
 	if (resurrection)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 	else if (pd->pd_meta->disk_status == SII_S_CURRENT ||
 	    pd->pd_meta->disk_status == SII_S_REBUILD)
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 	else
 		g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
 	TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 
 		/*
 		 * Different disks may have different sizes,
 		 * in concat mode. Update from real disk size.
 		 */
 		if (meta->type == SII_T_CONCAT || meta->type == SII_T_JBOD)
 			sd->sd_size = pd->pd_disk_size - 0x800 * 512;
 
 		if (resurrection) {
 			/* New or ex-spare disk. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_NEW);
 		} else if (pd->pd_meta->disk_status == SII_S_REBUILD) {
 			/* Rebuilding disk. */
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_REBUILD);
 			if (pd->pd_meta->generation == meta->generation)
 				sd->sd_rebuild_pos = pd->pd_meta->rebuild_lba * 512;
 			else
 				sd->sd_rebuild_pos = 0;
 		} else if (pd->pd_meta->disk_status == SII_S_CURRENT) {
 			if (pd->pd_meta->raid_status == SII_S_ONLINE ||
 			    pd->pd_meta->generation != meta->generation) {
 				/* Dirty or resyncing disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_STALE);
 			} else {
 				/* Up to date disk. */
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_ACTIVE);
 			}
 		} else {
 			g_raid_change_subdisk_state(sd,
 			    G_RAID_SUBDISK_S_FAILED);
 		}
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 		    G_RAID_EVENT_SUBDISK);
 	}
 
 	/* Update status of our need for spare. */
 	if (mdi->mdio_started) {
 		mdi->mdio_incomplete =
 		    (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
 		     mdi->mdio_total_disks);
 	}
 
 	return (resurrection);
 }
 
 static void
 g_disk_md_sii_retaste(void *arg, int pending)
 {
 
 	G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
 	g_retaste(&g_raid_class);
 	free(arg, M_MD_SII);
 }
 
 static void
 g_raid_md_sii_refill(struct g_raid_softc *sc)
 {
 	struct g_raid_md_object *md;
 	struct g_raid_md_sii_object *mdi;
 	struct g_raid_disk *disk;
 	struct task *task;
 	int update, na;
 
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_sii_object *)md;
 	update = 0;
 	do {
 		/* Make sure we miss anything. */
 		na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE);
 		if (na == mdi->mdio_total_disks)
 			break;
 
 		G_RAID_DEBUG1(1, md->mdo_softc,
 		    "Array is not complete (%d of %d), "
 		    "trying to refill.", na, mdi->mdio_total_disks);
 
 		/* Try to get use some of STALE disks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state == G_RAID_DISK_S_STALE) {
 				update += g_raid_md_sii_start_disk(disk);
 				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
 					break;
 			}
 		}
 		if (disk != NULL)
 			continue;
 
 		/* Try to get use some of SPARE disks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state == G_RAID_DISK_S_SPARE) {
 				update += g_raid_md_sii_start_disk(disk);
 				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
 					break;
 			}
 		}
 	} while (disk != NULL);
 
 	/* Write new metadata if we changed something. */
 	if (update)
 		g_raid_md_write_sii(md, NULL, NULL, NULL);
 
 	/* Update status of our need for spare. */
 	mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
 	    mdi->mdio_total_disks);
 
 	/* Request retaste hoping to find spare. */
 	if (mdi->mdio_incomplete) {
 		task = malloc(sizeof(struct task),
 		    M_MD_SII, M_WAITOK | M_ZERO);
 		TASK_INIT(task, 0, g_disk_md_sii_retaste, task);
 		taskqueue_enqueue(taskqueue_swi, task);
 	}
 }
 
 static void
 g_raid_md_sii_start(struct g_raid_softc *sc)
 {
 	struct g_raid_md_object *md;
 	struct g_raid_md_sii_object *mdi;
 	struct g_raid_md_sii_perdisk *pd;
 	struct sii_raid_conf *meta;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk, *best;
 	off_t size;
 	int j, disk_pos;
 	uint32_t gendiff, bestgendiff;
 	char buf[17];
 
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_sii_object *)md;
 	meta = mdi->mdio_meta;
 
 	/* Create volumes and subdisks. */
 	sii_meta_get_name(meta, buf);
 	vol = g_raid_create_volume(sc, buf, -1);
 	vol->v_mediasize = (off_t)meta->total_sectors * 512;
 	vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
 	if (meta->type == SII_T_RAID0) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
 		size = vol->v_mediasize / mdi->mdio_total_disks;
 	} else if (meta->type == SII_T_RAID1) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
 		size = vol->v_mediasize;
 	} else if (meta->type == SII_T_RAID01) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
 		size = vol->v_mediasize / (mdi->mdio_total_disks / 2);
 	} else if (meta->type == SII_T_CONCAT) {
 		if (mdi->mdio_total_disks == 1)
 			vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE;
 		else
 			vol->v_raid_level = G_RAID_VOLUME_RL_CONCAT;
 		size = 0;
 	} else if (meta->type == SII_T_RAID5) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
 		vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LS;
 		size = vol->v_mediasize / (mdi->mdio_total_disks - 1);
 	} else if (meta->type == SII_T_JBOD) {
 		vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE;
 		size = 0;
 	} else {
 		vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
 		size = 0;
 	}
 	vol->v_strip_size = meta->strip_sectors * 512; //ZZZ
 	vol->v_disks_count = mdi->mdio_total_disks;
 	vol->v_sectorsize = 512; //ZZZ
 	for (j = 0; j < vol->v_disks_count; j++) {
 		sd = &vol->v_subdisks[j];
 		sd->sd_offset = 0;
 		sd->sd_size = size;
 	}
 	g_raid_start_volume(vol);
 
 	/* Create disk placeholders to store data for later writing. */
 	for (disk_pos = 0; disk_pos < mdi->mdio_total_disks; disk_pos++) {
 		pd = malloc(sizeof(*pd), M_MD_SII, M_WAITOK | M_ZERO);
 		pd->pd_disk_pos = disk_pos;
 		disk = g_raid_create_disk(sc);
 		disk->d_md_data = (void *)pd;
 		disk->d_state = G_RAID_DISK_S_OFFLINE;
 		sd = &vol->v_subdisks[disk_pos];
 		sd->sd_disk = disk;
 		TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 	}
 
 	/*
 	 * Make all disks found till the moment take their places
 	 * in order of their generation numbers.
 	 */
 	do {
 		best = NULL;
 		bestgendiff = 0xffffffff;
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_state != G_RAID_DISK_S_NONE)
 				continue;
 			pd = disk->d_md_data;
 			if (pd->pd_meta == NULL)
 				gendiff = 0xfffffffe;
 			else
 				gendiff = meta->generation -
 				    pd->pd_meta->generation;
 			if (gendiff < bestgendiff) {
 				best = disk;
 				bestgendiff = gendiff;
 			}
 		}
 		if (best != NULL)
 			g_raid_md_sii_start_disk(best);
 	} while (best != NULL);
 
 	mdi->mdio_started = 1;
 	G_RAID_DEBUG1(0, sc, "Array started.");
 	g_raid_md_write_sii(md, NULL, NULL, NULL);
 
 	/* Pickup any STALE/SPARE disks to refill array if needed. */
 	g_raid_md_sii_refill(sc);
 
 	g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME);
 
 	callout_stop(&mdi->mdio_start_co);
 	G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount);
 	root_mount_rel(mdi->mdio_rootmount);
 	mdi->mdio_rootmount = NULL;
 }
 
 static void
 g_raid_md_sii_new_disk(struct g_raid_disk *disk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_object *md;
 	struct g_raid_md_sii_object *mdi;
 	struct sii_raid_conf *pdmeta;
 	struct g_raid_md_sii_perdisk *pd;
 
 	sc = disk->d_softc;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_sii_object *)md;
 	pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 	pdmeta = pd->pd_meta;
 
 	if (mdi->mdio_started) {
 		if (g_raid_md_sii_start_disk(disk))
 			g_raid_md_write_sii(md, NULL, NULL, NULL);
 	} else {
 		if (mdi->mdio_meta == NULL ||
 		    ((int32_t)(pdmeta->generation - mdi->mdio_generation)) > 0) {
 			G_RAID_DEBUG1(1, sc, "Newer disk");
 			if (mdi->mdio_meta != NULL)
 				free(mdi->mdio_meta, M_MD_SII);
 			mdi->mdio_meta = sii_meta_copy(pdmeta);
 			mdi->mdio_generation = mdi->mdio_meta->generation;
 			mdi->mdio_total_disks = sii_meta_total_disks(pdmeta);
 			mdi->mdio_disks_present = 1;
 		} else if (pdmeta->generation == mdi->mdio_generation) {
 			mdi->mdio_disks_present++;
 			G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
 			    mdi->mdio_disks_present,
 			    mdi->mdio_total_disks);
 		} else {
 			G_RAID_DEBUG1(1, sc, "Older disk");
 		}
 
 		/* If we collected all needed disks - start array. */
 		if (mdi->mdio_disks_present == mdi->mdio_total_disks)
 			g_raid_md_sii_start(sc);
 	}
 }
 
 static void
 g_raid_sii_go(void *arg)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_object *md;
 	struct g_raid_md_sii_object *mdi;
 
 	sc = arg;
 	md = sc->sc_md;
 	mdi = (struct g_raid_md_sii_object *)md;
 	if (!mdi->mdio_started) {
 		G_RAID_DEBUG1(0, sc, "Force array start due to timeout.");
 		g_raid_event_send(sc, G_RAID_NODE_E_START, 0);
 	}
 }
 
 static int
 g_raid_md_create_sii(struct g_raid_md_object *md, struct g_class *mp,
     struct g_geom **gp)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_sii_object *mdi;
 	char name[32];
 
 	mdi = (struct g_raid_md_sii_object *)md;
 	mdi->mdio_timestamp[5] = arc4random();
 	mdi->mdio_timestamp[4] = arc4random();
 	mdi->mdio_timestamp[3] = arc4random();
 	mdi->mdio_timestamp[2] = arc4random();
 	mdi->mdio_timestamp[1] = arc4random();
 	mdi->mdio_timestamp[0] = arc4random();
 	mdi->mdio_location = arc4random();
 	mdi->mdio_generation = 0;
 	snprintf(name, sizeof(name), "SiI-%02x%02x%02x%02x%02x%02x",
 	    mdi->mdio_timestamp[5], mdi->mdio_timestamp[4],
 	    mdi->mdio_timestamp[3], mdi->mdio_timestamp[2],
 	    mdi->mdio_timestamp[1], mdi->mdio_timestamp[0]);
 	sc = g_raid_create_node(mp, name, md);
 	if (sc == NULL)
 		return (G_RAID_MD_TASTE_FAIL);
 	md->mdo_softc = sc;
 	*gp = sc->sc_geom;
 	return (G_RAID_MD_TASTE_NEW);
 }
 
 static int
 g_raid_md_taste_sii(struct g_raid_md_object *md, struct g_class *mp,
                               struct g_consumer *cp, struct g_geom **gp)
 {
 	struct g_consumer *rcp;
 	struct g_provider *pp;
 	struct g_raid_md_sii_object *mdi, *mdi1;
 	struct g_raid_softc *sc;
 	struct g_raid_disk *disk;
 	struct sii_raid_conf *meta;
 	struct g_raid_md_sii_perdisk *pd;
 	struct g_geom *geom;
 	int disk_pos, result, spare, len;
 	char name[32];
 	uint16_t vendor;
 
 	G_RAID_DEBUG(1, "Tasting SiI on %s", cp->provider->name);
 	mdi = (struct g_raid_md_sii_object *)md;
 	pp = cp->provider;
 
 	/* Read metadata from device. */
 	meta = NULL;
 	vendor = 0xffff;
 	g_topology_unlock();
 	len = 2;
 	if (pp->geom->rank == 1)
 		g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
 	meta = sii_meta_read(cp);
 	g_topology_lock();
 	if (meta == NULL) {
 		if (g_raid_aggressive_spare) {
 			if (vendor == 0x1095) {
 				G_RAID_DEBUG(1,
 				    "No SiI metadata, forcing spare.");
 				spare = 2;
 				goto search;
 			} else {
 				G_RAID_DEBUG(1,
 				    "SiI vendor mismatch 0x%04x != 0x1095",
 				    vendor);
 			}
 		}
 		return (G_RAID_MD_TASTE_FAIL);
 	}
 
 	/* Check this disk position in obtained metadata. */
 	disk_pos = sii_meta_disk_pos(meta, meta);
 	if (disk_pos == -1) {
 		G_RAID_DEBUG(1, "SiI disk position not found");
 		goto fail1;
 	}
 
 	/* Metadata valid. Print it. */
 	g_raid_md_sii_print(meta);
 	G_RAID_DEBUG(1, "SiI disk position %d", disk_pos);
 	spare = (meta->type == SII_T_SPARE) ? 1 : 0;
 
 search:
 	/* Search for matching node. */
 	sc = NULL;
 	mdi1 = NULL;
 	LIST_FOREACH(geom, &mp->geom, geom) {
 		sc = geom->softc;
 		if (sc == NULL)
 			continue;
 		if (sc->sc_stopping != 0)
 			continue;
 		if (sc->sc_md->mdo_class != md->mdo_class)
 			continue;
 		mdi1 = (struct g_raid_md_sii_object *)sc->sc_md;
 		if (spare) {
 			if (mdi1->mdio_incomplete)
 				break;
 		} else {
 			if (mdi1->mdio_location == meta->raid_location &&
 			    memcmp(&mdi1->mdio_timestamp,
 			     &meta->timestamp, 6) == 0)
 				break;
 		}
 	}
 
 	/* Found matching node. */
 	if (geom != NULL) {
 		G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
 		result = G_RAID_MD_TASTE_EXISTING;
 
 	} else if (spare) { /* Not found needy node -- left for later. */
 		G_RAID_DEBUG(1, "Spare is not needed at this time");
 		goto fail1;
 
 	} else { /* Not found matching node -- create one. */
 		result = G_RAID_MD_TASTE_NEW;
 		memcpy(&mdi->mdio_timestamp, &meta->timestamp, 6);
 		mdi->mdio_location = meta->raid_location;
 		snprintf(name, sizeof(name), "SiI-%02x%02x%02x%02x%02x%02x",
 		    mdi->mdio_timestamp[5], mdi->mdio_timestamp[4],
 		    mdi->mdio_timestamp[3], mdi->mdio_timestamp[2],
 		    mdi->mdio_timestamp[1], mdi->mdio_timestamp[0]);
 		sc = g_raid_create_node(mp, name, md);
 		md->mdo_softc = sc;
 		geom = sc->sc_geom;
 		callout_init(&mdi->mdio_start_co, 1);
 		callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz,
 		    g_raid_sii_go, sc);
 		mdi->mdio_rootmount = root_mount_hold("GRAID-SiI");
 		G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount);
 	}
 
 	/* There is no return after this point, so we close passed consumer. */
 	g_access(cp, -1, 0, 0);
 
 	rcp = g_new_consumer(geom);
 	g_attach(rcp, pp);
 	if (g_access(rcp, 1, 1, 1) != 0)
 		; //goto fail1;
 
 	g_topology_unlock();
 	sx_xlock(&sc->sc_lock);
 
 	pd = malloc(sizeof(*pd), M_MD_SII, M_WAITOK | M_ZERO);
 	pd->pd_meta = meta;
 	if (spare == 2) {
 		pd->pd_disk_pos = -3;
 	} else {
 		pd->pd_disk_pos = -1;
 	}
 	pd->pd_disk_size = pp->mediasize;
 	disk = g_raid_create_disk(sc);
 	disk->d_md_data = (void *)pd;
 	disk->d_consumer = rcp;
 	rcp->private = disk;
 
 	g_raid_get_disk_info(disk);
 
 	g_raid_md_sii_new_disk(disk);
 
 	sx_xunlock(&sc->sc_lock);
 	g_topology_lock();
 	*gp = geom;
 	return (result);
 fail1:
 	free(meta, M_MD_SII);
 	return (G_RAID_MD_TASTE_FAIL);
 }
 
 static int
 g_raid_md_event_sii(struct g_raid_md_object *md,
     struct g_raid_disk *disk, u_int event)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_subdisk *sd;
 	struct g_raid_md_sii_object *mdi;
 	struct g_raid_md_sii_perdisk *pd;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_sii_object *)md;
 	if (disk == NULL) {
 		switch (event) {
 		case G_RAID_NODE_E_START:
 			if (!mdi->mdio_started)
 				g_raid_md_sii_start(sc);
 			return (0);
 		}
 		return (-1);
 	}
 	pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 	switch (event) {
 	case G_RAID_DISK_E_DISCONNECTED:
 		/* If disk was assigned, just update statuses. */
 		if (pd->pd_disk_pos >= 0) {
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 			if (disk->d_consumer) {
 				g_raid_kill_consumer(sc, disk->d_consumer);
 				disk->d_consumer = NULL;
 			}
 			TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_NONE);
 				g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 				    G_RAID_EVENT_SUBDISK);
 			}
 		} else {
 			/* Otherwise -- delete. */
 			g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 			g_raid_destroy_disk(disk);
 		}
 
 		/* Write updated metadata to all disks. */
 		g_raid_md_write_sii(md, NULL, NULL, NULL);
 
 		/* Check if anything left except placeholders. */
 		if (g_raid_ndisks(sc, -1) ==
 		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_sii_refill(sc);
 		return (0);
 	}
 	return (-2);
 }
 
 static int
 g_raid_md_ctl_sii(struct g_raid_md_object *md,
     struct gctl_req *req)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_sii_object *mdi;
 	struct g_raid_md_sii_perdisk *pd;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	char arg[16];
 	const char *verb, *volname, *levelname, *diskname;
 	int *nargs, *force;
 	off_t size, sectorsize, strip;
 	intmax_t *sizearg, *striparg;
 	int numdisks, i, len, level, qual, update;
 	int error;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_sii_object *)md;
 	verb = gctl_get_param(req, "verb", NULL);
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	error = 0;
 	if (strcmp(verb, "label") == 0) {
 
 		if (*nargs < 4) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		volname = gctl_get_asciiparam(req, "arg1");
 		if (volname == NULL) {
 			gctl_error(req, "No volume name.");
 			return (-2);
 		}
 		levelname = gctl_get_asciiparam(req, "arg2");
 		if (levelname == NULL) {
 			gctl_error(req, "No RAID level.");
 			return (-3);
 		}
 		if (strcasecmp(levelname, "RAID5") == 0)
 			levelname = "RAID5-LS";
 		if (g_raid_volume_str2level(levelname, &level, &qual)) {
 			gctl_error(req, "Unknown RAID level '%s'.", levelname);
 			return (-4);
 		}
 		numdisks = *nargs - 3;
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (!g_raid_md_sii_supported(level, qual, numdisks,
 		    force ? *force : 0)) {
 			gctl_error(req, "Unsupported RAID level "
 			    "(0x%02x/0x%02x), or number of disks (%d).",
 			    level, qual, numdisks);
 			return (-5);
 		}
 
 		/* Search for disks, connect them and probe. */
 		size = 0x7fffffffffffffffllu;
 		sectorsize = 0;
 		for (i = 0; i < numdisks; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i + 3);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -6;
 				break;
 			}
 			if (strcmp(diskname, "NONE") == 0) {
 				cp = NULL;
 				pp = NULL;
 			} else {
 				g_topology_lock();
 				cp = g_raid_open_consumer(sc, diskname);
 				if (cp == NULL) {
 					gctl_error(req, "Can't open '%s'.",
 					    diskname);
 					g_topology_unlock();
 					error = -7;
 					break;
 				}
 				pp = cp->provider;
 			}
 			pd = malloc(sizeof(*pd), M_MD_SII, M_WAITOK | M_ZERO);
 			pd->pd_disk_pos = i;
 			disk = g_raid_create_disk(sc);
 			disk->d_md_data = (void *)pd;
 			disk->d_consumer = cp;
 			if (cp == NULL)
 				continue;
 			cp->private = disk;
 			g_topology_unlock();
 
 			g_raid_get_disk_info(disk);
 
 			pd->pd_disk_size = pp->mediasize;
 			if (size > pp->mediasize)
 				size = pp->mediasize;
 			if (sectorsize < pp->sectorsize)
 				sectorsize = pp->sectorsize;
 		}
 		if (error != 0)
 			return (error);
 
 		if (sectorsize <= 0) {
 			gctl_error(req, "Can't get sector size.");
 			return (-8);
 		}
 
 		/* Reserve space for metadata. */
 		size -= 0x800 * sectorsize;
 
 		/* Handle size argument. */
 		len = sizeof(*sizearg);
 		sizearg = gctl_get_param(req, "size", &len);
 		if (sizearg != NULL && len == sizeof(*sizearg) &&
 		    *sizearg > 0) {
 			if (*sizearg > size) {
 				gctl_error(req, "Size too big %lld > %lld.",
 				    (long long)*sizearg, (long long)size);
 				return (-9);
 			}
 			size = *sizearg;
 		}
 
 		/* Handle strip argument. */
 		strip = 131072;
 		len = sizeof(*striparg);
 		striparg = gctl_get_param(req, "strip", &len);
 		if (striparg != NULL && len == sizeof(*striparg) &&
 		    *striparg > 0) {
 			if (*striparg < sectorsize) {
 				gctl_error(req, "Strip size too small.");
 				return (-10);
 			}
 			if (*striparg % sectorsize != 0) {
 				gctl_error(req, "Incorrect strip size.");
 				return (-11);
 			}
 			if (strip > 65535 * sectorsize) {
 				gctl_error(req, "Strip size too big.");
 				return (-12);
 			}
 			strip = *striparg;
 		}
 
 		/* Round size down to strip or sector. */
 		if (level == G_RAID_VOLUME_RL_RAID1)
 			size -= (size % sectorsize);
 		else if (level == G_RAID_VOLUME_RL_RAID1E &&
 		    (numdisks & 1) != 0)
 			size -= (size % (2 * strip));
 		else
 			size -= (size % strip);
 		if (size <= 0) {
 			gctl_error(req, "Size too small.");
 			return (-13);
 		}
 		if (size > 0xffffffffffffllu * sectorsize) {
 			gctl_error(req, "Size too big.");
 			return (-14);
 		}
 
 		/* We have all we need, create things: volume, ... */
 		mdi->mdio_total_disks = numdisks;
 		mdi->mdio_started = 1;
 		vol = g_raid_create_volume(sc, volname, -1);
 		vol->v_md_data = (void *)(intptr_t)0;
 		vol->v_raid_level = level;
 		vol->v_raid_level_qualifier = qual;
 		vol->v_strip_size = strip;
 		vol->v_disks_count = numdisks;
 		if (level == G_RAID_VOLUME_RL_RAID0 ||
 		    level == G_RAID_VOLUME_RL_CONCAT ||
 		    level == G_RAID_VOLUME_RL_SINGLE)
 			vol->v_mediasize = size * numdisks;
 		else if (level == G_RAID_VOLUME_RL_RAID1)
 			vol->v_mediasize = size;
 		else if (level == G_RAID_VOLUME_RL_RAID5)
 			vol->v_mediasize = size * (numdisks - 1);
 		else { /* RAID1E */
 			vol->v_mediasize = ((size * numdisks) / strip / 2) *
 			    strip;
 		}
 		vol->v_sectorsize = sectorsize;
 		g_raid_start_volume(vol);
 
 		/* , and subdisks. */
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 			sd = &vol->v_subdisks[pd->pd_disk_pos];
 			sd->sd_disk = disk;
 			sd->sd_offset = 0;
 			sd->sd_size = size;
 			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
 			if (sd->sd_disk->d_consumer != NULL) {
 				g_raid_change_disk_state(disk,
 				    G_RAID_DISK_S_ACTIVE);
 				g_raid_change_subdisk_state(sd,
 				    G_RAID_SUBDISK_S_ACTIVE);
 				g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
 				    G_RAID_EVENT_SUBDISK);
 			} else {
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 			}
 		}
 
 		/* Write metadata based on created entities. */
 		G_RAID_DEBUG1(0, sc, "Array started.");
 		g_raid_md_write_sii(md, NULL, NULL, NULL);
 
 		/* Pickup any STALE/SPARE disks to refill array if needed. */
 		g_raid_md_sii_refill(sc);
 
 		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
 		    G_RAID_EVENT_VOLUME);
 		return (0);
 	}
 	if (strcmp(verb, "delete") == 0) {
 
 		/* Check if some volume is still open. */
 		force = gctl_get_paraml(req, "force", sizeof(*force));
 		if (force != NULL && *force == 0 &&
 		    g_raid_nopens(sc) != 0) {
 			gctl_error(req, "Some volume is still open.");
 			return (-4);
 		}
 
 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 			if (disk->d_consumer)
 				sii_meta_erase(disk->d_consumer);
 		}
 		g_raid_destroy_node(sc, 0);
 		return (0);
 	}
 	if (strcmp(verb, "remove") == 0 ||
 	    strcmp(verb, "fail") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		for (i = 1; i < *nargs; i++) {
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -2;
 				break;
 			}
 			if (strncmp(diskname, "/dev/", 5) == 0)
 				diskname += 5;
 
 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 				if (disk->d_consumer != NULL && 
 				    disk->d_consumer->provider != NULL &&
 				    strcmp(disk->d_consumer->provider->name,
 				     diskname) == 0)
 					break;
 			}
 			if (disk == NULL) {
 				gctl_error(req, "Disk '%s' not found.",
 				    diskname);
 				error = -3;
 				break;
 			}
 
 			if (strcmp(verb, "fail") == 0) {
 				g_raid_md_fail_disk_sii(md, NULL, disk);
 				continue;
 			}
 
 			pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 
 			/* Erase metadata on deleting disk. */
 			sii_meta_erase(disk->d_consumer);
 
 			/* If disk was assigned, just update statuses. */
 			if (pd->pd_disk_pos >= 0) {
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
 				g_raid_kill_consumer(sc, disk->d_consumer);
 				disk->d_consumer = NULL;
 				TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
 					g_raid_change_subdisk_state(sd,
 					    G_RAID_SUBDISK_S_NONE);
 					g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
 					    G_RAID_EVENT_SUBDISK);
 				}
 			} else {
 				/* Otherwise -- delete. */
 				g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
 				g_raid_destroy_disk(disk);
 			}
 		}
 
 		/* Write updated metadata to remaining disks. */
 		g_raid_md_write_sii(md, NULL, NULL, NULL);
 
 		/* Check if anything left except placeholders. */
 		if (g_raid_ndisks(sc, -1) ==
 		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 			g_raid_destroy_node(sc, 0);
 		else
 			g_raid_md_sii_refill(sc);
 		return (error);
 	}
 	if (strcmp(verb, "insert") == 0) {
 		if (*nargs < 2) {
 			gctl_error(req, "Invalid number of arguments.");
 			return (-1);
 		}
 		update = 0;
 		for (i = 1; i < *nargs; i++) {
 			/* Get disk name. */
 			snprintf(arg, sizeof(arg), "arg%d", i);
 			diskname = gctl_get_asciiparam(req, arg);
 			if (diskname == NULL) {
 				gctl_error(req, "No disk name (%s).", arg);
 				error = -3;
 				break;
 			}
 
 			/* Try to find provider with specified name. */
 			g_topology_lock();
 			cp = g_raid_open_consumer(sc, diskname);
 			if (cp == NULL) {
 				gctl_error(req, "Can't open disk '%s'.",
 				    diskname);
 				g_topology_unlock();
 				error = -4;
 				break;
 			}
 			pp = cp->provider;
 
 			pd = malloc(sizeof(*pd), M_MD_SII, M_WAITOK | M_ZERO);
 			pd->pd_disk_pos = -3;
 			pd->pd_disk_size = pp->mediasize;
 
 			disk = g_raid_create_disk(sc);
 			disk->d_consumer = cp;
 			disk->d_md_data = (void *)pd;
 			cp->private = disk;
 			g_topology_unlock();
 
 			g_raid_get_disk_info(disk);
 
 			/* Welcome the "new" disk. */
 			update += g_raid_md_sii_start_disk(disk);
 			if (disk->d_state == G_RAID_DISK_S_SPARE) {
 				sii_meta_write_spare(cp);
 				g_raid_destroy_disk(disk);
 			} else if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
 				gctl_error(req, "Disk '%s' doesn't fit.",
 				    diskname);
 				g_raid_destroy_disk(disk);
 				error = -8;
 				break;
 			}
 		}
 
 		/* Write new metadata if we changed something. */
 		if (update)
 			g_raid_md_write_sii(md, NULL, NULL, NULL);
 		return (error);
 	}
 	gctl_error(req, "Command '%s' is not supported.", verb);
 	return (-100);
 }
 
 static int
 g_raid_md_write_sii(struct g_raid_md_object *md, struct g_raid_volume *tvol,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_volume *vol;
 	struct g_raid_subdisk *sd;
 	struct g_raid_disk *disk;
 	struct g_raid_md_sii_object *mdi;
 	struct g_raid_md_sii_perdisk *pd;
 	struct sii_raid_conf *meta;
-	int i;
+	u_int i;
 
 	sc = md->mdo_softc;
 	mdi = (struct g_raid_md_sii_object *)md;
 
 	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 		return (0);
 
 	/* Bump generation. Newly written metadata may differ from previous. */
 	mdi->mdio_generation++;
 
 	/* There is only one volume. */
 	vol = TAILQ_FIRST(&sc->sc_volumes);
 
 	/* Fill global fields. */
 	meta = malloc(sizeof(*meta), M_MD_SII, M_WAITOK | M_ZERO);
 	if (mdi->mdio_meta)
 		memcpy(meta, mdi->mdio_meta, sizeof(*meta));
 	meta->total_sectors = vol->v_mediasize / vol->v_sectorsize;
 	meta->vendor_id = 0x1095;
 	meta->version_minor = 0;
 	meta->version_major = 2;
 	memcpy(&meta->timestamp, &mdi->mdio_timestamp, 6);
 	meta->strip_sectors = vol->v_strip_size / vol->v_sectorsize;
 	if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0) {
 		meta->type = SII_T_RAID0;
 		meta->raid0_disks = vol->v_disks_count;
 		meta->raid1_disks = 0xff;
 	} else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1) {
 		meta->type = SII_T_RAID1;
 		meta->raid0_disks = 0xff;
 		meta->raid1_disks = vol->v_disks_count;
 	} else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) {
 		meta->type = SII_T_RAID01;
 		meta->raid0_disks = vol->v_disks_count / 2;
 		meta->raid1_disks = 2;
 	} else if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT ||
 	    vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE) {
 		meta->type = SII_T_JBOD;
 		meta->raid0_disks = vol->v_disks_count;
 		meta->raid1_disks = 0xff;
 	} else {
 		meta->type = SII_T_RAID5;
 		meta->raid0_disks = vol->v_disks_count;
 		meta->raid1_disks = 0xff;
 	}
 	meta->generation = mdi->mdio_generation;
 	meta->raid_status = vol->v_dirty ? SII_S_ONLINE : SII_S_AVAILABLE;
 	for (i = 0; i < vol->v_disks_count; i++) {
 		sd = &vol->v_subdisks[i];
 		if (sd->sd_state == G_RAID_SUBDISK_S_STALE ||
 		    sd->sd_state == G_RAID_SUBDISK_S_RESYNC)
 			meta->raid_status = SII_S_ONLINE;
 	}
 	meta->raid_location = mdi->mdio_location;
 	sii_meta_put_name(meta, vol->v_name);
 
 	/* We are done. Print meta data and store them to disks. */
 	if (mdi->mdio_meta != NULL)
 		free(mdi->mdio_meta, M_MD_SII);
 	mdi->mdio_meta = meta;
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 		if (disk->d_state != G_RAID_DISK_S_ACTIVE)
 			continue;
 		if (pd->pd_meta != NULL) {
 			free(pd->pd_meta, M_MD_SII);
 			pd->pd_meta = NULL;
 		}
 		pd->pd_meta = sii_meta_copy(meta);
 		if ((sd = TAILQ_FIRST(&disk->d_subdisks)) != NULL) {
 			if (sd->sd_state < G_RAID_SUBDISK_S_NEW)
 				pd->pd_meta->disk_status = SII_S_DROPPED;
 			else if (sd->sd_state < G_RAID_SUBDISK_S_STALE) {
 				pd->pd_meta->disk_status = SII_S_REBUILD;
 				pd->pd_meta->rebuild_lba =
 				    sd->sd_rebuild_pos / vol->v_sectorsize;
 			} else
 				pd->pd_meta->disk_status = SII_S_CURRENT;
 			if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1) {
 				pd->pd_meta->disk_number = sd->sd_pos;
 				pd->pd_meta->raid0_ident = 0xff;
 				pd->pd_meta->raid1_ident = 0;
 			} else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) {
 				pd->pd_meta->disk_number = sd->sd_pos / meta->raid1_disks;
 				pd->pd_meta->raid0_ident = sd->sd_pos % meta->raid1_disks;
 				pd->pd_meta->raid1_ident = sd->sd_pos / meta->raid1_disks;
 			} else {
 				pd->pd_meta->disk_number = sd->sd_pos;
 				pd->pd_meta->raid0_ident = 0;
 				pd->pd_meta->raid1_ident = 0xff;
 			}
 		}
 		G_RAID_DEBUG(1, "Writing SiI metadata to %s",
 		    g_raid_get_diskname(disk));
 		g_raid_md_sii_print(pd->pd_meta);
 		sii_meta_write(disk->d_consumer, pd->pd_meta);
 	}
 	return (0);
 }
 
 static int
 g_raid_md_fail_disk_sii(struct g_raid_md_object *md,
     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
 {
 	struct g_raid_softc *sc;
 	struct g_raid_md_sii_perdisk *pd;
 	struct g_raid_subdisk *sd;
 
 	sc = md->mdo_softc;
 	pd = (struct g_raid_md_sii_perdisk *)tdisk->d_md_data;
 
 	/* We can't fail disk that is not a part of array now. */
 	if (pd->pd_disk_pos < 0)
 		return (-1);
 
 	/*
 	 * Mark disk as failed in metadata and try to write that metadata
 	 * to the disk itself to prevent it's later resurrection as STALE.
 	 */
 	if (tdisk->d_consumer) {
 		if (pd->pd_meta) {
 			pd->pd_meta->disk_status = SII_S_REMOVED;
 			sii_meta_write(tdisk->d_consumer, pd->pd_meta);
 		} else
 			sii_meta_erase(tdisk->d_consumer);
 	}
 
 	/* Change states. */
 	g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
 	TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
 		g_raid_change_subdisk_state(sd,
 		    G_RAID_SUBDISK_S_FAILED);
 		g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
 		    G_RAID_EVENT_SUBDISK);
 	}
 
 	/* Write updated metadata to remaining disks. */
 	g_raid_md_write_sii(md, NULL, NULL, tdisk);
 
 	/* Check if anything left except placeholders. */
 	if (g_raid_ndisks(sc, -1) ==
 	    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
 		g_raid_destroy_node(sc, 0);
 	else
 		g_raid_md_sii_refill(sc);
 	return (0);
 }
 
 static int
 g_raid_md_free_disk_sii(struct g_raid_md_object *md,
     struct g_raid_disk *disk)
 {
 	struct g_raid_md_sii_perdisk *pd;
 
 	pd = (struct g_raid_md_sii_perdisk *)disk->d_md_data;
 	if (pd->pd_meta != NULL) {
 		free(pd->pd_meta, M_MD_SII);
 		pd->pd_meta = NULL;
 	}
 	free(pd, M_MD_SII);
 	disk->d_md_data = NULL;
 	return (0);
 }
 
 static int
 g_raid_md_free_sii(struct g_raid_md_object *md)
 {
 	struct g_raid_md_sii_object *mdi;
 
 	mdi = (struct g_raid_md_sii_object *)md;
 	if (!mdi->mdio_started) {
 		mdi->mdio_started = 0;
 		callout_stop(&mdi->mdio_start_co);
 		G_RAID_DEBUG1(1, md->mdo_softc,
 		    "root_mount_rel %p", mdi->mdio_rootmount);
 		root_mount_rel(mdi->mdio_rootmount);
 		mdi->mdio_rootmount = NULL;
 	}
 	if (mdi->mdio_meta != NULL) {
 		free(mdi->mdio_meta, M_MD_SII);
 		mdi->mdio_meta = NULL;
 	}
 	return (0);
 }
 
 G_RAID_MD_DECLARE(sii, "SiI");
Index: stable/9/sys/geom/virstor/g_virstor.c
===================================================================
--- stable/9/sys/geom/virstor/g_virstor.c	(revision 299397)
+++ stable/9/sys/geom/virstor/g_virstor.c	(revision 299398)
@@ -1,1897 +1,1897 @@
 /*-
  * Copyright (c) 2006-2007 Ivan Voras <ivoras@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /* Implementation notes:
  * - "Components" are wrappers around providers that make up the
  *   virtual storage (i.e. a virstor has "physical" components)
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sx.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/time.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/mutex.h>
 #include <vm/uma.h>
 #include <geom/geom.h>
 
 #include <geom/virstor/g_virstor.h>
 #include <geom/virstor/g_virstor_md.h>
 
 FEATURE(g_virstor, "GEOM virtual storage support");
 
 /* Declare malloc(9) label */
 static MALLOC_DEFINE(M_GVIRSTOR, "gvirstor", "GEOM_VIRSTOR Data");
 
 /* GEOM class methods */
 static g_init_t g_virstor_init;
 static g_fini_t g_virstor_fini;
 static g_taste_t g_virstor_taste;
 static g_ctl_req_t g_virstor_config;
 static g_ctl_destroy_geom_t g_virstor_destroy_geom;
 
 /* Declare & initialize class structure ("geom class") */
 struct g_class g_virstor_class = {
 	.name =		G_VIRSTOR_CLASS_NAME,
 	.version =	G_VERSION,
 	.init =		g_virstor_init,
 	.fini =		g_virstor_fini,
 	.taste =	g_virstor_taste,
 	.ctlreq =	g_virstor_config,
 	.destroy_geom = g_virstor_destroy_geom
 	/* The .dumpconf and the rest are only usable for a geom instance, so
 	 * they will be set when such instance is created. */
 };
 
 /* Declare sysctl's and loader tunables */
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, virstor, CTLFLAG_RW, 0,
     "GEOM_GVIRSTOR information");
 
 static u_int g_virstor_debug = 2; /* XXX: lower to 2 when released to public */
 TUNABLE_INT("kern.geom.virstor.debug", &g_virstor_debug);
 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, debug, CTLFLAG_RW, &g_virstor_debug,
     0, "Debug level (2=production, 5=normal, 15=excessive)");
 
 static u_int g_virstor_chunk_watermark = 100;
 TUNABLE_INT("kern.geom.virstor.chunk_watermark", &g_virstor_chunk_watermark);
 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, chunk_watermark, CTLFLAG_RW,
     &g_virstor_chunk_watermark, 0,
     "Minimum number of free chunks before issuing administrative warning");
 
 static u_int g_virstor_component_watermark = 1;
 TUNABLE_INT("kern.geom.virstor.component_watermark",
     &g_virstor_component_watermark);
 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, component_watermark, CTLFLAG_RW,
     &g_virstor_component_watermark, 0,
     "Minimum number of free components before issuing administrative warning");
 
 static int read_metadata(struct g_consumer *, struct g_virstor_metadata *);
 static void write_metadata(struct g_consumer *, struct g_virstor_metadata *);
 static int clear_metadata(struct g_virstor_component *);
 static int add_provider_to_geom(struct g_virstor_softc *, struct g_provider *,
     struct g_virstor_metadata *);
 static struct g_geom *create_virstor_geom(struct g_class *,
     struct g_virstor_metadata *);
 static void virstor_check_and_run(struct g_virstor_softc *);
 static u_int virstor_valid_components(struct g_virstor_softc *);
 static int virstor_geom_destroy(struct g_virstor_softc *, boolean_t,
     boolean_t);
 static void remove_component(struct g_virstor_softc *,
     struct g_virstor_component *, boolean_t);
 static void bioq_dismantle(struct bio_queue_head *);
 static int allocate_chunk(struct g_virstor_softc *,
     struct g_virstor_component **, u_int *, u_int *);
 static void delay_destroy_consumer(void *, int);
 static void dump_component(struct g_virstor_component *comp);
 #if 0
 static void dump_me(struct virstor_map_entry *me, unsigned int nr);
 #endif
 
 static void virstor_ctl_stop(struct gctl_req *, struct g_class *);
 static void virstor_ctl_add(struct gctl_req *, struct g_class *);
 static void virstor_ctl_remove(struct gctl_req *, struct g_class *);
 static struct g_virstor_softc * virstor_find_geom(const struct g_class *,
     const char *);
 static void update_metadata(struct g_virstor_softc *);
 static void fill_metadata(struct g_virstor_softc *, struct g_virstor_metadata *,
     u_int, u_int);
 
 static void g_virstor_orphan(struct g_consumer *);
 static int g_virstor_access(struct g_provider *, int, int, int);
 static void g_virstor_start(struct bio *);
 static void g_virstor_dumpconf(struct sbuf *, const char *, struct g_geom *,
     struct g_consumer *, struct g_provider *);
 static void g_virstor_done(struct bio *);
 
 static void invalid_call(void);
 /*
  * Initialise GEOM class (per-class callback)
  */
 static void
 g_virstor_init(struct g_class *mp __unused)
 {
 
 	/* Catch map struct size mismatch at compile time; Map entries must
 	 * fit into MAXPHYS exactly, with no wasted space. */
 	CTASSERT(VIRSTOR_MAP_BLOCK_ENTRIES*VIRSTOR_MAP_ENTRY_SIZE == MAXPHYS);
 
 	/* Init UMA zones, TAILQ's, other global vars */
 }
 
 /*
  * Finalise GEOM class (per-class callback)
  */
 static void
 g_virstor_fini(struct g_class *mp __unused)
 {
 
 	/* Deinit UMA zones & global vars */
 }
 
 /*
  * Config (per-class callback)
  */
 static void
 g_virstor_config(struct gctl_req *req, struct g_class *cp, char const *verb)
 {
 	uint32_t *version;
 
 	g_topology_assert();
 
 	version = gctl_get_paraml(req, "version", sizeof(*version));
 	if (version == NULL) {
 		gctl_error(req, "Failed to get 'version' argument");
 		return;
 	}
 	if (*version != G_VIRSTOR_VERSION) {
 		gctl_error(req, "Userland and kernel versions out of sync");
 		return;
 	}
 
 	g_topology_unlock();
 	if (strcmp(verb, "add") == 0)
 		virstor_ctl_add(req, cp);
 	else if (strcmp(verb, "stop") == 0 || strcmp(verb, "destroy") == 0)
 		virstor_ctl_stop(req, cp);
 	else if (strcmp(verb, "remove") == 0)
 		virstor_ctl_remove(req, cp);
 	else
 		gctl_error(req, "unknown verb: '%s'", verb);
 	g_topology_lock();
 }
 
 /*
  * "stop" verb from userland
  */
 static void
 virstor_ctl_stop(struct gctl_req *req, struct g_class *cp)
 {
 	int *force, *nargs;
 	int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof *nargs);
 	if (nargs == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "nargs");
 		return;
 	}
 	if (*nargs < 1) {
 		gctl_error(req, "Invalid number of arguments");
 		return;
 	}
 	force = gctl_get_paraml(req, "force", sizeof *force);
 	if (force == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "force");
 		return;
 	}
 
 	g_topology_lock();
 	for (i = 0; i < *nargs; i++) {
 		char param[8];
 		const char *name;
 		struct g_virstor_softc *sc;
 		int error;
 
 		sprintf(param, "arg%d", i);
 		name = gctl_get_asciiparam(req, param);
 		if (name == NULL) {
 			gctl_error(req, "No 'arg%d' argument", i);
 			g_topology_unlock();
 			return;
 		}
 		sc = virstor_find_geom(cp, name);
 		if (sc == NULL) {
 			gctl_error(req, "Don't know anything about '%s'", name);
 			g_topology_unlock();
 			return;
 		}
 
 		LOG_MSG(LVL_INFO, "Stopping %s by the userland command",
 		    sc->geom->name);
 		update_metadata(sc);
 		if ((error = virstor_geom_destroy(sc, TRUE, TRUE)) != 0) {
 			LOG_MSG(LVL_ERROR, "Cannot destroy %s: %d",
 			    sc->geom->name, error);
 		}
 	}
 	g_topology_unlock();
 }
 
 /*
  * "add" verb from userland - add new component(s) to the structure.
  * This will be done all at once in here, without going through the
  * .taste function for new components.
  */
 static void
 virstor_ctl_add(struct gctl_req *req, struct g_class *cp)
 {
 	/* Note: while this is going on, I/O is being done on
 	 * the g_up and g_down threads. The idea is to make changes
 	 * to softc members in a way that can atomically activate
 	 * them all at once. */
 	struct g_virstor_softc *sc;
 	int *hardcode, *nargs;
 	const char *geom_name;	/* geom to add a component to */
 	struct g_consumer *fcp;
 	struct g_virstor_bio_q *bq;
 	u_int added;
 	int error;
 	int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Invalid number of arguments");
 		return;
 	}
 	hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode));
 	if (hardcode == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "hardcode");
 		return;
 	}
 
 	/* Find "our" geom */
 	geom_name = gctl_get_asciiparam(req, "arg0");
 	if (geom_name == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "geom_name (arg0)");
 		return;
 	}
 	sc = virstor_find_geom(cp, geom_name);
 	if (sc == NULL) {
 		gctl_error(req, "Don't know anything about '%s'", geom_name);
 		return;
 	}
 
 	if (virstor_valid_components(sc) != sc->n_components) {
 		LOG_MSG(LVL_ERROR, "Cannot add components to incomplete "
 		    "virstor %s", sc->geom->name);
 		gctl_error(req, "Virstor %s is incomplete", sc->geom->name);
 		return;
 	}
 
 	fcp = sc->components[0].gcons;
 	added = 0;
 	g_topology_lock();
 	for (i = 1; i < *nargs; i++) {
 		struct g_virstor_metadata md;
 		char aname[8];
 		const char *prov_name;
 		struct g_provider *pp;
 		struct g_consumer *cp;
 		u_int nc;
 		u_int j;
 
 		snprintf(aname, sizeof aname, "arg%d", i);
 		prov_name = gctl_get_asciiparam(req, aname);
 		if (prov_name == NULL) {
 			gctl_error(req, "Error fetching argument '%s'", aname);
 			g_topology_unlock();
 			return;
 		}
 		if (strncmp(prov_name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
 			prov_name += sizeof(_PATH_DEV) - 1;
 
 		pp = g_provider_by_name(prov_name);
 		if (pp == NULL) {
 			/* This is the most common error so be verbose about it */
 			if (added != 0) {
 				gctl_error(req, "Invalid provider: '%s' (added"
 				    " %u components)", prov_name, added);
 				update_metadata(sc);
 			} else {
 				gctl_error(req, "Invalid provider: '%s'",
 				    prov_name);
 			}
 			g_topology_unlock();
 			return;
 		}
 		cp = g_new_consumer(sc->geom);
 		if (cp == NULL) {
 			gctl_error(req, "Cannot create consumer");
 			g_topology_unlock();
 			return;
 		}
 		error = g_attach(cp, pp);
 		if (error != 0) {
 			gctl_error(req, "Cannot attach a consumer to %s",
 			    pp->name);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			return;
 		}
 		if (fcp->acr != 0 || fcp->acw != 0 || fcp->ace != 0) {
 			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 			if (error != 0) {
 				gctl_error(req, "Access request failed for %s",
 				    pp->name);
 				g_destroy_consumer(cp);
 				g_topology_unlock();
 				return;
 			}
 		}
 		if (fcp->provider->sectorsize != pp->sectorsize) {
 			gctl_error(req, "Sector size doesn't fit for %s",
 			    pp->name);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			return;
 		}
 		for (j = 0; j < sc->n_components; j++) {
 			if (strcmp(sc->components[j].gcons->provider->name,
 			    pp->name) == 0) {
 				gctl_error(req, "Component %s already in %s",
 				    pp->name, sc->geom->name);
 				g_destroy_consumer(cp);
 				g_topology_unlock();
 				return;
 			}
 		}
 		sc->components = realloc(sc->components,
 		    sizeof(*sc->components) * (sc->n_components + 1),
 		    M_GVIRSTOR, M_WAITOK);
 
 		nc = sc->n_components;
 		sc->components[nc].gcons = cp;
 		sc->components[nc].sc = sc;
 		sc->components[nc].index = nc;
 		sc->components[nc].chunk_count = cp->provider->mediasize /
 		    sc->chunk_size;
 		sc->components[nc].chunk_next = 0;
 		sc->components[nc].chunk_reserved = 0;
 
 		if (sc->components[nc].chunk_count < 4) {
 			gctl_error(req, "Provider too small: %s",
 			    cp->provider->name);
 			g_destroy_consumer(cp);
 			g_topology_unlock();
 			return;
 		}
 		fill_metadata(sc, &md, nc, *hardcode);
 		write_metadata(cp, &md);
 		/* The new component becomes visible when n_components is
 		 * incremented */
 		sc->n_components++;
 		added++;
 
 	}
 	/* This call to update_metadata() is critical. In case there's a
 	 * power failure in the middle of it and some components are updated
 	 * while others are not, there will be trouble on next .taste() iff
 	 * a non-updated component is detected first */
 	update_metadata(sc);
 	g_topology_unlock();
 	LOG_MSG(LVL_INFO, "Added %d component(s) to %s", added,
 	    sc->geom->name);
 	/* Fire off BIOs previously queued because there wasn't any
 	 * physical space left. If the BIOs still can't be satisfied
 	 * they will again be added to the end of the queue (during
 	 * which the mutex will be recursed) */
 	bq = malloc(sizeof(*bq), M_GVIRSTOR, M_WAITOK);
 	bq->bio = NULL;
 	mtx_lock(&sc->delayed_bio_q_mtx);
 	/* First, insert a sentinel to the queue end, so we don't
 	 * end up in an infinite loop if there's still no free
 	 * space available. */
 	STAILQ_INSERT_TAIL(&sc->delayed_bio_q, bq, linkage);
 	while (!STAILQ_EMPTY(&sc->delayed_bio_q)) {
 		bq = STAILQ_FIRST(&sc->delayed_bio_q);
 		if (bq->bio != NULL) {
 			g_virstor_start(bq->bio);
 			STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
 			free(bq, M_GVIRSTOR);
 		} else {
 			STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
 			free(bq, M_GVIRSTOR);
 			break;
 		}
 	}
 	mtx_unlock(&sc->delayed_bio_q_mtx);
 
 }
 
 /*
  * Find a geom handled by the class
  */
 static struct g_virstor_softc *
 virstor_find_geom(const struct g_class *cp, const char *name)
 {
 	struct g_geom *gp;
 
 	LIST_FOREACH(gp, &cp->geom, geom) {
 		if (strcmp(name, gp->name) == 0)
 			return (gp->softc);
 	}
 	return (NULL);
 }
 
 /*
  * Update metadata on all components to reflect the current state
  * of these fields:
  *    - chunk_next
  *    - flags
  *    - md_count
  * Expects things to be set up so write_metadata() can work, i.e.
  * the topology lock must be held.
  */
 static void
 update_metadata(struct g_virstor_softc *sc)
 {
 	struct g_virstor_metadata md;
-	int n;
+	u_int n;
 
 	if (virstor_valid_components(sc) != sc->n_components)
 		return; /* Incomplete device */
 	LOG_MSG(LVL_DEBUG, "Updating metadata on components for %s",
 	    sc->geom->name);
 	/* Update metadata on components */
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__,
 	    sc->geom->class->name, sc->geom->name);
 	g_topology_assert();
 	for (n = 0; n < sc->n_components; n++) {
 		read_metadata(sc->components[n].gcons, &md);
 		md.chunk_next = sc->components[n].chunk_next;
 		md.flags = sc->components[n].flags;
 		md.md_count = sc->n_components;
 		write_metadata(sc->components[n].gcons, &md);
 	}
 }
 
 /*
  * Fills metadata (struct md) from information stored in softc and the nc'th
  * component of virstor
  */
 static void
 fill_metadata(struct g_virstor_softc *sc, struct g_virstor_metadata *md,
     u_int nc, u_int hardcode)
 {
 	struct g_virstor_component *c;
 
 	bzero(md, sizeof *md);
 	c = &sc->components[nc];
 
 	strncpy(md->md_magic, G_VIRSTOR_MAGIC, sizeof md->md_magic);
 	md->md_version = G_VIRSTOR_VERSION;
 	strncpy(md->md_name, sc->geom->name, sizeof md->md_name);
 	md->md_id = sc->id;
 	md->md_virsize = sc->virsize;
 	md->md_chunk_size = sc->chunk_size;
 	md->md_count = sc->n_components;
 
 	if (hardcode) {
 		strncpy(md->provider, c->gcons->provider->name,
 		    sizeof md->provider);
 	}
 	md->no = nc;
 	md->provsize = c->gcons->provider->mediasize;
 	md->chunk_count = c->chunk_count;
 	md->chunk_next = c->chunk_next;
 	md->chunk_reserved = c->chunk_reserved;
 	md->flags = c->flags;
 }
 
 /*
  * Remove a component from virstor device.
  * Can only be done if the component is unallocated.
  */
 static void
 virstor_ctl_remove(struct gctl_req *req, struct g_class *cp)
 {
 	/* As this is executed in parallel to I/O, operations on virstor
 	 * structures must be as atomic as possible. */
 	struct g_virstor_softc *sc;
 	int *nargs;
 	const char *geom_name;
 	u_int removed;
 	int i;
 
 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "nargs");
 		return;
 	}
 	if (*nargs < 2) {
 		gctl_error(req, "Invalid number of arguments");
 		return;
 	}
 	/* Find "our" geom */
 	geom_name = gctl_get_asciiparam(req, "arg0");
 	if (geom_name == NULL) {
 		gctl_error(req, "Error fetching argument '%s'",
 		    "geom_name (arg0)");
 		return;
 	}
 	sc = virstor_find_geom(cp, geom_name);
 	if (sc == NULL) {
 		gctl_error(req, "Don't know anything about '%s'", geom_name);
 		return;
 	}
 
 	if (virstor_valid_components(sc) != sc->n_components) {
 		LOG_MSG(LVL_ERROR, "Cannot remove components from incomplete "
 		    "virstor %s", sc->geom->name);
 		gctl_error(req, "Virstor %s is incomplete", sc->geom->name);
 		return;
 	}
 
 	removed = 0;
 	for (i = 1; i < *nargs; i++) {
 		char param[8];
 		const char *prov_name;
 		int j, found;
 		struct g_virstor_component *newcomp, *compbak;
 
 		sprintf(param, "arg%d", i);
 		prov_name = gctl_get_asciiparam(req, param);
 		if (prov_name == NULL) {
 			gctl_error(req, "Error fetching argument '%s'", param);
 			return;
 		}
 		if (strncmp(prov_name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
 			prov_name += sizeof(_PATH_DEV) - 1;
 
 		found = -1;
 		for (j = 0; j < sc->n_components; j++) {
 			if (strcmp(sc->components[j].gcons->provider->name,
 			    prov_name) == 0) {
 				found = j;
 				break;
 			}
 		}
 		if (found == -1) {
 			LOG_MSG(LVL_ERROR, "No %s component in %s",
 			    prov_name, sc->geom->name);
 			continue;
 		}
 
 		compbak = sc->components;
 		newcomp = malloc(sc->n_components * sizeof(*sc->components),
 		    M_GVIRSTOR, M_WAITOK | M_ZERO);
 		bcopy(sc->components, newcomp, found * sizeof(*sc->components));
 		bcopy(&sc->components[found + 1], newcomp + found,
 		    found * sizeof(*sc->components));
 		if ((sc->components[j].flags & VIRSTOR_PROVIDER_ALLOCATED) != 0) {
 			LOG_MSG(LVL_ERROR, "Allocated provider %s cannot be "
 			    "removed from %s",
 			    prov_name, sc->geom->name);
 			free(newcomp, M_GVIRSTOR);
 			/* We'll consider this non-fatal error */
 			continue;
 		}
 		/* Renumerate unallocated components */
 		for (j = 0; j < sc->n_components-1; j++) {
 			if ((sc->components[j].flags &
 			    VIRSTOR_PROVIDER_ALLOCATED) == 0) {
 				sc->components[j].index = j;
 			}
 		}
 		/* This is the critical section. If a component allocation
 		 * event happens while both variables are not yet set,
 		 * there will be trouble. Something will panic on encountering
 		 * NULL sc->components[x].gcomp member.
 		 * Luckily, component allocation happens very rarely and
 		 * removing components is an abnormal action in any case. */
 		sc->components = newcomp;
 		sc->n_components--;
 		/* End critical section */
 
 		g_topology_lock();
 		if (clear_metadata(&compbak[found]) != 0) {
 			LOG_MSG(LVL_WARNING, "Trouble ahead: cannot clear "
 			    "metadata on %s", prov_name);
 		}
 		g_detach(compbak[found].gcons);
 		g_destroy_consumer(compbak[found].gcons);
 		g_topology_unlock();
 
 		free(compbak, M_GVIRSTOR);
 
 		removed++;
 	}
 
 	/* This call to update_metadata() is critical. In case there's a
 	 * power failure in the middle of it and some components are updated
 	 * while others are not, there will be trouble on next .taste() iff
 	 * a non-updated component is detected first */
 	g_topology_lock();
 	update_metadata(sc);
 	g_topology_unlock();
 	LOG_MSG(LVL_INFO, "Removed %d component(s) from %s", removed,
 	    sc->geom->name);
 }
 
 /*
  * Clear metadata sector on component
  */
 static int
 clear_metadata(struct g_virstor_component *comp)
 {
 	char *buf;
 	int error;
 
 	LOG_MSG(LVL_INFO, "Clearing metadata on %s",
 	    comp->gcons->provider->name);
 	g_topology_assert();
 	error = g_access(comp->gcons, 0, 1, 0);
 	if (error != 0)
 		return (error);
 	buf = malloc(comp->gcons->provider->sectorsize, M_GVIRSTOR,
 	    M_WAITOK | M_ZERO);
 	error = g_write_data(comp->gcons,
 	    comp->gcons->provider->mediasize -
 	    comp->gcons->provider->sectorsize,
 	    buf,
 	    comp->gcons->provider->sectorsize);
 	free(buf, M_GVIRSTOR);
 	g_access(comp->gcons, 0, -1, 0);
 	return (error);
 }
 
 /*
  * Destroy geom forcibly.
  */
 static int
 g_virstor_destroy_geom(struct gctl_req *req __unused, struct g_class *mp,
     struct g_geom *gp)
 {
 	struct g_virstor_softc *sc;
 	int exitval;
 
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("%s: NULL sc", __func__));
 	
 	exitval = 0;
 	LOG_MSG(LVL_DEBUG, "%s called for %s, sc=%p", __func__, gp->name,
 	    gp->softc);
 
 	if (sc != NULL) {
 #ifdef INVARIANTS
 		char *buf;
 		int error;
 		off_t off;
 		int isclean, count;
 		int n;
 
 		LOG_MSG(LVL_INFO, "INVARIANTS detected");
 		LOG_MSG(LVL_INFO, "Verifying allocation "
 		    "table for %s", sc->geom->name);
 		count = 0;
 		for (n = 0; n < sc->chunk_count; n++) {
 			if (sc->map[n].flags || VIRSTOR_MAP_ALLOCATED != 0)
 				count++;
 		}
 		LOG_MSG(LVL_INFO, "Device %s has %d allocated chunks",
 		    sc->geom->name, count);
 		n = off = count = 0;
 		isclean = 1;
 		if (virstor_valid_components(sc) != sc->n_components) {
 			/* This is a incomplete virstor device (not all
 			 * components have been found) */
 			LOG_MSG(LVL_ERROR, "Device %s is incomplete",
 			    sc->geom->name);
 			goto bailout;
 		}
 		error = g_access(sc->components[0].gcons, 1, 0, 0);
 		KASSERT(error == 0, ("%s: g_access failed (%d)", __func__,
 		    error));
 		/* Compare the whole on-disk allocation table with what's
 		 * currently in memory */
 		while (n < sc->chunk_count) {
 			buf = g_read_data(sc->components[0].gcons, off,
 			    sc->sectorsize, &error);
 			KASSERT(buf != NULL, ("g_read_data returned NULL (%d) "
 			    "for read at %jd", error, off));
 			if (bcmp(buf, &sc->map[n], sc->sectorsize) != 0) {
 				LOG_MSG(LVL_ERROR, "ERROR in allocation table, "
 				    "entry %d, offset %jd", n, off);
 				isclean = 0;
 				count++;
 			}
 			n += sc->me_per_sector;
 			off += sc->sectorsize;
 			g_free(buf);
 		}
 		error = g_access(sc->components[0].gcons, -1, 0, 0);
 		KASSERT(error == 0, ("%s: g_access failed (%d) on exit",
 		    __func__, error));
 		if (isclean != 1) {
 			LOG_MSG(LVL_ERROR, "ALLOCATION TABLE CORRUPTED FOR %s "
 			    "(%d sectors don't match, max %zu allocations)",
 			    sc->geom->name, count,
 			    count * sc->me_per_sector);
 		} else {
 			LOG_MSG(LVL_INFO, "Allocation table ok for %s",
 			    sc->geom->name);
 		}
 bailout:
 #endif
 		update_metadata(sc);
 		virstor_geom_destroy(sc, FALSE, FALSE);
 		exitval = EAGAIN;
 	} else
 		exitval = 0;
 	return (exitval);
 }
 
 /*
  * Taste event (per-class callback)
  * Examines a provider and creates geom instances if needed
  */
 static struct g_geom *
 g_virstor_taste(struct g_class *mp, struct g_provider *pp, int flags)
 {
 	struct g_virstor_metadata md;
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct g_virstor_softc *sc;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 	LOG_MSG(LVL_DEBUG, "Tasting %s", pp->name);
 
 	/* We need a dummy geom to attach a consumer to the given provider */
 	gp = g_new_geomf(mp, "virstor:taste.helper");
 	gp->start = (void *)invalid_call;	/* XXX: hacked up so the        */
 	gp->access = (void *)invalid_call;	/* compiler doesn't complain.   */
 	gp->orphan = (void *)invalid_call;	/* I really want these to fail. */
 
 	cp = g_new_consumer(gp);
 	g_attach(cp, pp);
 	error = read_metadata(cp, &md);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 
 	if (error != 0)
 		return (NULL);
 
 	if (strcmp(md.md_magic, G_VIRSTOR_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version != G_VIRSTOR_VERSION) {
 		LOG_MSG(LVL_ERROR, "Kernel module version invalid "
 		    "to handle %s (%s) : %d should be %d",
 		    md.md_name, pp->name, md.md_version, G_VIRSTOR_VERSION);
 		return (NULL);
 	}
 	if (md.provsize != pp->mediasize)
 		return (NULL);
 
 	/* If the provider name is hardcoded, use the offered provider only
 	 * if it's been offered with its proper name (the one used in
 	 * the label command). */
 	if (md.provider[0] != '\0' &&
 	    !g_compare_names(md.provider, pp->name))
 		return (NULL);
 
 	/* Iterate all geoms this class already knows about to see if a new
 	 * geom instance of this class needs to be created (in case the provider
 	 * is first from a (possibly) multi-consumer geom) or it just needs
 	 * to be added to an existing instance. */
 	sc = NULL;
 	gp = NULL;
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		if (strcmp(md.md_name, sc->geom->name) != 0)
 			continue;
 		if (md.md_id != sc->id)
 			continue;
 		break;
 	}
 	if (gp != NULL) { /* We found an existing geom instance; add to it */
 		LOG_MSG(LVL_INFO, "Adding %s to %s", pp->name, md.md_name);
 		error = add_provider_to_geom(sc, pp, &md);
 		if (error != 0) {
 			LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)",
 			    pp->name, md.md_name, error);
 			return (NULL);
 		}
 	} else { /* New geom instance needs to be created */
 		gp = create_virstor_geom(mp, &md);
 		if (gp == NULL) {
 			LOG_MSG(LVL_ERROR, "Error creating new instance of "
 			    "class %s: %s", mp->name, md.md_name);
 			LOG_MSG(LVL_DEBUG, "Error creating %s at %s",
 			    md.md_name, pp->name);
 			return (NULL);
 		}
 		sc = gp->softc;
 		LOG_MSG(LVL_INFO, "Adding %s to %s (first found)", pp->name,
 		    md.md_name);
 		error = add_provider_to_geom(sc, pp, &md);
 		if (error != 0) {
 			LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)",
 			    pp->name, md.md_name, error);
 			virstor_geom_destroy(sc, TRUE, FALSE);
 			return (NULL);
 		}
 	}
 
 	return (gp);
 }
 
 /*
  * Destroyes consumer passed to it in arguments. Used as a callback
  * on g_event queue.
  */
 static void
 delay_destroy_consumer(void *arg, int flags __unused)
 {
 	struct g_consumer *c = arg;
 	KASSERT(c != NULL, ("%s: invalid consumer", __func__));
 	LOG_MSG(LVL_DEBUG, "Consumer %s destroyed with delay",
 	    c->provider->name);
 	g_detach(c);
 	g_destroy_consumer(c);
 }
 
 /*
  * Remove a component (consumer) from geom instance; If it's the first
  * component being removed, orphan the provider to announce geom's being
  * dismantled
  */
 static void
 remove_component(struct g_virstor_softc *sc, struct g_virstor_component *comp,
     boolean_t delay)
 {
 	struct g_consumer *c;
 
 	KASSERT(comp->gcons != NULL, ("Component with no consumer in %s",
 	    sc->geom->name));
 	c = comp->gcons;
 
 	comp->gcons = NULL;
 	KASSERT(c->provider != NULL, ("%s: no provider", __func__));
 	LOG_MSG(LVL_DEBUG, "Component %s removed from %s", c->provider->name,
 	    sc->geom->name);
 	if (sc->provider != NULL) {
 		/* Whither, GEOM? */
 		sc->provider->flags |= G_PF_WITHER;
 		g_orphan_provider(sc->provider, ENXIO);
 		sc->provider = NULL;
 		LOG_MSG(LVL_INFO, "Removing provider %s", sc->geom->name);
 	}
 
 	if (c->acr > 0 || c->acw > 0 || c->ace > 0)
 		g_access(c, -c->acr, -c->acw, -c->ace);
 	if (delay) {
 		/* Destroy consumer after it's tasted */
 		g_post_event(delay_destroy_consumer, c, M_WAITOK, NULL);
 	} else {
 		g_detach(c);
 		g_destroy_consumer(c);
 	}
 }
 
 /*
  * Destroy geom - called internally
  * See g_virstor_destroy_geom for the other one
  */
 static int
 virstor_geom_destroy(struct g_virstor_softc *sc, boolean_t force,
     boolean_t delay)
 {
 	struct g_provider *pp;
 	struct g_geom *gp;
-	int n;
+	u_int n;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	pp = sc->provider;
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		LOG_MSG(force ? LVL_WARNING : LVL_ERROR,
 		    "Device %s is still open.", pp->name);
 		if (!force)
 			return (EBUSY);
 	}
 
 	for (n = 0; n < sc->n_components; n++) {
 		if (sc->components[n].gcons != NULL)
 			remove_component(sc, &sc->components[n], delay);
 	}
 
 	gp = sc->geom;
 	gp->softc = NULL;
 
 	KASSERT(sc->provider == NULL, ("Provider still exists for %s",
 	    gp->name));
 
 	/* XXX: This might or might not work, since we're called with
 	 * the topology lock held. Also, it might panic the kernel if
 	 * the error'd BIO is in softupdates code. */
 	mtx_lock(&sc->delayed_bio_q_mtx);
 	while (!STAILQ_EMPTY(&sc->delayed_bio_q)) {
 		struct g_virstor_bio_q *bq;
 		bq = STAILQ_FIRST(&sc->delayed_bio_q);
 		bq->bio->bio_error = ENOSPC;
 		g_io_deliver(bq->bio, EIO);
 		STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage);
 		free(bq, M_GVIRSTOR);
 	}
 	mtx_unlock(&sc->delayed_bio_q_mtx);
 	mtx_destroy(&sc->delayed_bio_q_mtx);
 
 	free(sc->map, M_GVIRSTOR);
 	free(sc->components, M_GVIRSTOR);
 	bzero(sc, sizeof *sc);
 	free(sc, M_GVIRSTOR);
 
 	pp = LIST_FIRST(&gp->provider); /* We only offer one provider */
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		LOG_MSG(LVL_DEBUG, "Device %s destroyed", gp->name);
 
 	g_wither_geom(gp, ENXIO);
 
 	return (0);
 }
 
 /*
  * Utility function: read metadata & decode. Wants topology lock to be
  * held.
  */
 static int
 read_metadata(struct g_consumer *cp, struct g_virstor_metadata *md)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error;
 
 	g_topology_assert();
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		return (error);
 	pp = cp->provider;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	g_access(cp, -1, 0, 0);
 	if (buf == NULL)
 		return (error);
 
 	virstor_metadata_decode(buf, md);
 	g_free(buf);
 
 	return (0);
 }
 
 /**
  * Utility function: encode & write metadata. Assumes topology lock is
  * held.
  *
  * There is no useful way of recovering from errors in this function,
  * not involving panicking the kernel. If the metadata cannot be written
  * the most we can do is notify the operator and hope he spots it and
  * replaces the broken drive.
  */
 static void
 write_metadata(struct g_consumer *cp, struct g_virstor_metadata *md)
 {
 	struct g_provider *pp;
 	char *buf;
 	int error;
 
 	KASSERT(cp != NULL && md != NULL && cp->provider != NULL,
 	    ("Something's fishy in %s", __func__));
 	LOG_MSG(LVL_DEBUG, "Writing metadata on %s", cp->provider->name);
 	g_topology_assert();
 	error = g_access(cp, 0, 1, 0);
 	if (error != 0) {
 		LOG_MSG(LVL_ERROR, "g_access(0,1,0) failed for %s: %d",
 		    cp->provider->name, error);
 		return;
 	}
 	pp = cp->provider;
 
 	buf = malloc(pp->sectorsize, M_GVIRSTOR, M_WAITOK);
 	virstor_metadata_encode(md, buf);
 	g_topology_unlock();
 	error = g_write_data(cp, pp->mediasize - pp->sectorsize, buf,
 	    pp->sectorsize);
 	g_topology_lock();
 	g_access(cp, 0, -1, 0);
 	free(buf, M_GVIRSTOR);
 
 	if (error != 0)
 		LOG_MSG(LVL_ERROR, "Error %d writing metadata to %s",
 		    error, cp->provider->name);
 }
 
 /*
  * Creates a new instance of this GEOM class, initialise softc
  */
 static struct g_geom *
 create_virstor_geom(struct g_class *mp, struct g_virstor_metadata *md)
 {
 	struct g_geom *gp;
 	struct g_virstor_softc *sc;
 
 	LOG_MSG(LVL_DEBUG, "Creating geom instance for %s (id=%u)",
 	    md->md_name, md->md_id);
 
 	if (md->md_count < 1 || md->md_chunk_size < 1 ||
 	    md->md_virsize < md->md_chunk_size) {
 		/* This is bogus configuration, and probably means data is
 		 * somehow corrupted. Panic, maybe? */
 		LOG_MSG(LVL_ERROR, "Nonsensical metadata information for %s",
 		    md->md_name);
 		return (NULL);
 	}
 
 	/* Check if it's already created */
 	LIST_FOREACH(gp, &mp->geom, geom) {
 		sc = gp->softc;
 		if (sc != NULL && strcmp(sc->geom->name, md->md_name) == 0) {
 			LOG_MSG(LVL_WARNING, "Geom %s already exists",
 			    md->md_name);
 			if (sc->id != md->md_id) {
 				LOG_MSG(LVL_ERROR,
 				    "Some stale or invalid components "
 				    "exist for virstor device named %s. "
 				    "You will need to <CLEAR> all stale "
 				    "components and maybe reconfigure "
 				    "the virstor device. Tune "
 				    "kern.geom.virstor.debug sysctl up "
 				    "for more information.",
 				    sc->geom->name);
 			}
 			return (NULL);
 		}
 	}
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	gp->softc = NULL; /* to circumevent races that test softc */
 
 	gp->start = g_virstor_start;
 	gp->spoiled = g_virstor_orphan;
 	gp->orphan = g_virstor_orphan;
 	gp->access = g_virstor_access;
 	gp->dumpconf = g_virstor_dumpconf;
 
 	sc = malloc(sizeof(*sc), M_GVIRSTOR, M_WAITOK | M_ZERO);
 	sc->id = md->md_id;
 	sc->n_components = md->md_count;
 	sc->components = malloc(sizeof(struct g_virstor_component) * md->md_count,
 	    M_GVIRSTOR, M_WAITOK | M_ZERO);
 	sc->chunk_size = md->md_chunk_size;
 	sc->virsize = md->md_virsize;
 	STAILQ_INIT(&sc->delayed_bio_q);
 	mtx_init(&sc->delayed_bio_q_mtx, "gvirstor_delayed_bio_q_mtx",
 	    "gvirstor", MTX_DEF | MTX_RECURSE);
 
 	sc->geom = gp;
 	sc->provider = NULL; /* virstor_check_and_run will create it */
 	gp->softc = sc;
 
 	LOG_MSG(LVL_ANNOUNCE, "Device %s created", sc->geom->name);
 
 	return (gp);
 }
 
 /*
  * Add provider to a GEOM class instance
  */
 static int
 add_provider_to_geom(struct g_virstor_softc *sc, struct g_provider *pp,
     struct g_virstor_metadata *md)
 {
 	struct g_virstor_component *component;
 	struct g_consumer *cp, *fcp;
 	struct g_geom *gp;
 	int error;
 
 	if (md->no >= sc->n_components)
 		return (EINVAL);
 
 	/* "Current" compontent */
 	component = &(sc->components[md->no]);
 	if (component->gcons != NULL)
 		return (EEXIST);
 
 	gp = sc->geom;
 	fcp = LIST_FIRST(&gp->consumer);
 
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 
 	if (error != 0) {
 		g_destroy_consumer(cp);
 		return (error);
 	}
 
 	if (fcp != NULL) {
 		if (fcp->provider->sectorsize != pp->sectorsize) {
 			/* TODO: this can be made to work */
 			LOG_MSG(LVL_ERROR, "Provider %s of %s has invalid "
 			    "sector size (%d)", pp->name, sc->geom->name,
 			    pp->sectorsize);
 			return (EINVAL);
 		}
 		if (fcp->acr > 0 || fcp->acw || fcp->ace > 0) {
 			/* Replicate access permissions from first "live" consumer
 			 * to the new one */
 			error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
 			if (error != 0) {
 				g_detach(cp);
 				g_destroy_consumer(cp);
 				return (error);
 			}
 		}
 	}
 
 	/* Bring up a new component */
 	cp->private = component;
 	component->gcons = cp;
 	component->sc = sc;
 	component->index = md->no;
 	component->chunk_count = md->chunk_count;
 	component->chunk_next = md->chunk_next;
 	component->chunk_reserved = md->chunk_reserved;
 	component->flags = md->flags;
 
 	LOG_MSG(LVL_DEBUG, "%s attached to %s", pp->name, sc->geom->name);
 
 	virstor_check_and_run(sc);
 	return (0);
 }
 
 /*
  * Check if everything's ready to create the geom provider & device entry,
  * create and start provider.
  * Called ultimately by .taste, from g_event thread
  */
 static void
 virstor_check_and_run(struct g_virstor_softc *sc)
 {
 	off_t off;
 	size_t n, count;
 	int index;
 	int error;
 
 	if (virstor_valid_components(sc) != sc->n_components)
 		return;
 
 	if (virstor_valid_components(sc) == 0) {
 		/* This is actually a candidate for panic() */
 		LOG_MSG(LVL_ERROR, "No valid components for %s?",
 		    sc->provider->name);
 		return;
 	}
 
 	sc->sectorsize = sc->components[0].gcons->provider->sectorsize;
 
 	/* Initialise allocation map from the first consumer */
 	sc->chunk_count = sc->virsize / sc->chunk_size;
 	if (sc->chunk_count * (off_t)sc->chunk_size != sc->virsize) {
 		LOG_MSG(LVL_WARNING, "Device %s truncated to %ju bytes",
 		    sc->provider->name,
 		    sc->chunk_count * (off_t)sc->chunk_size);
 	}
 	sc->map_size = sc->chunk_count * sizeof *(sc->map);
 	/* The following allocation is in order of 4MB - 8MB */
 	sc->map = malloc(sc->map_size, M_GVIRSTOR, M_WAITOK);
 	KASSERT(sc->map != NULL, ("%s: Memory allocation error (%zu bytes) for %s",
 	    __func__, sc->map_size, sc->provider->name));
 	sc->map_sectors = sc->map_size / sc->sectorsize;
 
 	count = 0;
 	for (n = 0; n < sc->n_components; n++)
 		count += sc->components[n].chunk_count;
 	LOG_MSG(LVL_INFO, "Device %s has %zu physical chunks and %zu virtual "
 	    "(%zu KB chunks)",
 	    sc->geom->name, count, sc->chunk_count, sc->chunk_size / 1024);
 
 	error = g_access(sc->components[0].gcons, 1, 0, 0);
 	if (error != 0) {
 		LOG_MSG(LVL_ERROR, "Cannot acquire read access for %s to "
 		    "read allocation map for %s",
 		    sc->components[0].gcons->provider->name,
 		    sc->geom->name);
 		return;
 	}
 	/* Read in the allocation map */
 	LOG_MSG(LVL_DEBUG, "Reading map for %s from %s", sc->geom->name,
 	    sc->components[0].gcons->provider->name);
 	off = count = n = 0;
 	while (count < sc->map_size) {
 		struct g_virstor_map_entry *mapbuf;
 		size_t bs;
 
 		bs = MIN(MAXPHYS, sc->map_size - count);
 		if (bs % sc->sectorsize != 0) {
 			/* Check for alignment errors */
 			bs = (bs / sc->sectorsize) * sc->sectorsize;
 			if (bs == 0)
 				break;
 			LOG_MSG(LVL_ERROR, "Trouble: map is not sector-aligned "
 			    "for %s on %s", sc->geom->name,
 			    sc->components[0].gcons->provider->name);
 		}
 		mapbuf = g_read_data(sc->components[0].gcons, off, bs, &error);
 		if (mapbuf == NULL) {
 			free(sc->map, M_GVIRSTOR);
 			LOG_MSG(LVL_ERROR, "Error reading allocation map "
 			    "for %s from %s (offset %ju) (error %d)",
 			    sc->geom->name,
 			    sc->components[0].gcons->provider->name,
 			    off, error);
 			return;
 		}
 
 		bcopy(mapbuf, &sc->map[n], bs);
 		off += bs;
 		count += bs;
 		n += bs / sizeof *(sc->map);
 		g_free(mapbuf);
 	}
 	g_access(sc->components[0].gcons, -1, 0, 0);
 	LOG_MSG(LVL_DEBUG, "Read map for %s", sc->geom->name);
 
 	/* find first component with allocatable chunks */
 	index = -1;
 	for (n = 0; n < sc->n_components; n++) {
 		if (sc->components[n].chunk_next <
 		    sc->components[n].chunk_count) {
 			index = n;
 			break;
 		}
 	}
 	if (index == -1)
 		/* not found? set it to the last component and handle it
 		 * later */
 		index = sc->n_components - 1;
 
 	if (index >= sc->n_components - g_virstor_component_watermark - 1) {
 		LOG_MSG(LVL_WARNING, "Device %s running out of components "
 		    "(%d/%u: %s)", sc->geom->name,
 		    index+1,
 		    sc->n_components,
 		    sc->components[index].gcons->provider->name);
 	}
 	sc->curr_component = index;
 
 	if (sc->components[index].chunk_next >=
 	    sc->components[index].chunk_count - g_virstor_chunk_watermark) {
 		LOG_MSG(LVL_WARNING,
 		    "Component %s of %s is running out of free space "
 		    "(%u chunks left)",
 		    sc->components[index].gcons->provider->name,
 		    sc->geom->name, sc->components[index].chunk_count -
 		    sc->components[index].chunk_next);
 	}
 
 	sc->me_per_sector = sc->sectorsize / sizeof *(sc->map);
 	if (sc->sectorsize % sizeof *(sc->map) != 0) {
 		LOG_MSG(LVL_ERROR,
 		    "%s: Map entries don't fit exactly in a sector (%s)",
 		    __func__, sc->geom->name);
 		return;
 	}
 
 	/* Recalculate allocated chunks in components & at the same time
 	 * verify map data is sane. We could trust metadata on this, but
 	 * we want to make sure. */
 	for (n = 0; n < sc->n_components; n++)
 		sc->components[n].chunk_next = sc->components[n].chunk_reserved;
 
 	for (n = 0; n < sc->chunk_count; n++) {
 		if (sc->map[n].provider_no >= sc->n_components ||
 			sc->map[n].provider_chunk >=
 			sc->components[sc->map[n].provider_no].chunk_count) {
 			LOG_MSG(LVL_ERROR, "%s: Invalid entry %u in map for %s",
 			    __func__, (u_int)n, sc->geom->name);
 			LOG_MSG(LVL_ERROR, "%s: provider_no: %u, n_components: %u"
 			    " provider_chunk: %u, chunk_count: %u", __func__,
 			    sc->map[n].provider_no, sc->n_components,
 			    sc->map[n].provider_chunk,
 			    sc->components[sc->map[n].provider_no].chunk_count);
 			return;
 		}
 		if (sc->map[n].flags & VIRSTOR_MAP_ALLOCATED)
 			sc->components[sc->map[n].provider_no].chunk_next++;
 	}
 
 	sc->provider = g_new_providerf(sc->geom, "virstor/%s",
 	    sc->geom->name);
 
 	sc->provider->sectorsize = sc->sectorsize;
 	sc->provider->mediasize = sc->virsize;
 	g_error_provider(sc->provider, 0);
 
 	LOG_MSG(LVL_INFO, "%s activated", sc->provider->name);
 	LOG_MSG(LVL_DEBUG, "%s starting with current component %u, starting "
 	    "chunk %u", sc->provider->name, sc->curr_component,
 	    sc->components[sc->curr_component].chunk_next);
 }
 
 /*
  * Returns count of active providers in this geom instance
  */
 static u_int
 virstor_valid_components(struct g_virstor_softc *sc)
 {
 	unsigned int nc, i;
 
 	nc = 0;
 	KASSERT(sc != NULL, ("%s: softc is NULL", __func__));
 	KASSERT(sc->components != NULL, ("%s: sc->components is NULL", __func__));
 	for (i = 0; i < sc->n_components; i++)
 		if (sc->components[i].gcons != NULL)
 			nc++;
 	return (nc);
 }
 
 /*
  * Called when the consumer gets orphaned (?)
  */
 static void
 g_virstor_orphan(struct g_consumer *cp)
 {
 	struct g_virstor_softc *sc;
 	struct g_virstor_component *comp;
 	struct g_geom *gp;
 
 	g_topology_assert();
 	gp = cp->geom;
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 
 	comp = cp->private;
 	KASSERT(comp != NULL, ("%s: No component in private part of consumer",
 	    __func__));
 	remove_component(sc, comp, FALSE);
 	if (virstor_valid_components(sc) == 0)
 		virstor_geom_destroy(sc, TRUE, FALSE);
 }
 
 /*
  * Called to notify geom when it's been opened, and for what intent
  */
 static int
 g_virstor_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_consumer *c;
 	struct g_virstor_softc *sc;
 	struct g_geom *gp;
 	int error;
 
 	KASSERT(pp != NULL, ("%s: NULL provider", __func__));
 	gp = pp->geom;
 	KASSERT(gp != NULL, ("%s: NULL geom", __func__));
 	sc = gp->softc;
 
 	if (sc == NULL) {
 		/* It seems that .access can be called with negative dr,dw,dx
 		 * in this case but I want to check for myself */
 		LOG_MSG(LVL_WARNING, "access(%d, %d, %d) for %s",
 		    dr, dw, de, pp->name);
 		/* This should only happen when geom is withered so
 		 * allow only negative requests */
 		KASSERT(dr <= 0 && dw <= 0 && de <= 0,
 		    ("%s: Positive access for %s", __func__, pp->name));
 		if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0)
 			LOG_MSG(LVL_DEBUG, "Device %s definitely destroyed",
 			    pp->name);
 		return (0);
 	}
 
 	/* Grab an exclusive bit to propagate on our consumers on first open */
 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
 		de++;
 	/* ... drop it on close */
 	if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0) {
 		de--;
 		update_metadata(sc);	/* Writes statistical information */
 	}
 
 	error = ENXIO;
 	LIST_FOREACH(c, &gp->consumer, consumer) {
 		KASSERT(c != NULL, ("%s: consumer is NULL", __func__));
 		error = g_access(c, dr, dw, de);
 		if (error != 0) {
 			struct g_consumer *c2;
 
 			/* Backout earlier changes */
 			LIST_FOREACH(c2, &gp->consumer, consumer) {
 				if (c2 == c) /* all eariler components fixed */
 					return (error);
 				g_access(c2, -dr, -dw, -de);
 			}
 		}
 	}
 
 	return (error);
 }
 
 /*
  * Generate XML dump of current state
  */
 static void
 g_virstor_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_virstor_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 
 	if (sc == NULL || pp != NULL)
 		return;
 
 	if (cp != NULL) {
 		/* For each component */
 		struct g_virstor_component *comp;
 
 		comp = cp->private;
 		if (comp == NULL)
 			return;
 		sbuf_printf(sb, "%s<ComponentIndex>%u</ComponentIndex>\n",
 		    indent, comp->index);
 		sbuf_printf(sb, "%s<ChunkCount>%u</ChunkCount>\n",
 		    indent, comp->chunk_count);
 		sbuf_printf(sb, "%s<ChunksUsed>%u</ChunksUsed>\n",
 		    indent, comp->chunk_next);
 		sbuf_printf(sb, "%s<ChunksReserved>%u</ChunksReserved>\n",
 		    indent, comp->chunk_reserved);
 		sbuf_printf(sb, "%s<StorageFree>%u%%</StorageFree>\n",
 		    indent,
 		    comp->chunk_next > 0 ? 100 -
 		    ((comp->chunk_next + comp->chunk_reserved) * 100) /
 		    comp->chunk_count : 100);
 	} else {
 		/* For the whole thing */
 		u_int count, used, i;
 		off_t size;
 
 		count = used = size = 0;
 		for (i = 0; i < sc->n_components; i++) {
 			if (sc->components[i].gcons != NULL) {
 				count += sc->components[i].chunk_count;
 				used += sc->components[i].chunk_next +
 				    sc->components[i].chunk_reserved;
 				size += sc->components[i].gcons->
 				    provider->mediasize;
 			}
 		}
 
 		sbuf_printf(sb, "%s<Status>"
 		    "Components=%u, Online=%u</Status>\n", indent,
 		    sc->n_components, virstor_valid_components(sc));
 		sbuf_printf(sb, "%s<State>%u%% physical free</State>\n",
 		    indent, 100-(used * 100) / count);
 		sbuf_printf(sb, "%s<ChunkSize>%zu</ChunkSize>\n", indent,
 		    sc->chunk_size);
 		sbuf_printf(sb, "%s<PhysicalFree>%u%%</PhysicalFree>\n",
 		    indent, used > 0 ? 100 - (used * 100) / count : 100);
 		sbuf_printf(sb, "%s<ChunkPhysicalCount>%u</ChunkPhysicalCount>\n",
 		    indent, count);
 		sbuf_printf(sb, "%s<ChunkVirtualCount>%zu</ChunkVirtualCount>\n",
 		    indent, sc->chunk_count);
 		sbuf_printf(sb, "%s<PhysicalBacking>%zu%%</PhysicalBacking>\n",
 		    indent,
 		    (count * 100) / sc->chunk_count);
 		sbuf_printf(sb, "%s<PhysicalBackingSize>%jd</PhysicalBackingSize>\n",
 		    indent, size);
 		sbuf_printf(sb, "%s<VirtualSize>%jd</VirtualSize>\n", indent,
 		    sc->virsize);
 	}
 }
 
 /*
  * GEOM .done handler
  * Can't use standard handler because one requested IO may
  * fork into additional data IOs
  */
 static void
 g_virstor_done(struct bio *b)
 {
 	struct g_virstor_softc *sc;
 	struct bio *parent_b;
 
 	parent_b = b->bio_parent;
 	sc = parent_b->bio_to->geom->softc;
 
 	if (b->bio_error != 0) {
 		LOG_MSG(LVL_ERROR, "Error %d for offset=%ju, length=%ju, %s",
 		    b->bio_error, b->bio_offset, b->bio_length,
 		    b->bio_to->name);
 		if (parent_b->bio_error == 0)
 			parent_b->bio_error = b->bio_error;
 	}
 
 	parent_b->bio_inbed++;
 	parent_b->bio_completed += b->bio_completed;
 
 	if (parent_b->bio_children == parent_b->bio_inbed) {
 		parent_b->bio_completed = parent_b->bio_length;
 		g_io_deliver(parent_b, parent_b->bio_error);
 	}
 	g_destroy_bio(b);
 }
 
 /*
  * I/O starts here
  * Called in g_down thread
  */
 static void
 g_virstor_start(struct bio *b)
 {
 	struct g_virstor_softc *sc;
 	struct g_virstor_component *comp;
 	struct bio *cb;
 	struct g_provider *pp;
 	char *addr;
 	off_t offset, length;
 	struct bio_queue_head bq;
 	size_t chunk_size;	/* cached for convenience */
 	u_int count;
 
 	pp = b->bio_to;
 	sc = pp->geom->softc;
 	KASSERT(sc != NULL, ("%s: no softc (error=%d, device=%s)", __func__,
 	    b->bio_to->error, b->bio_to->name));
 
 	LOG_REQ(LVL_MOREDEBUG, b, "%s", __func__);
 
 	switch (b->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		break;
 	default:
 		g_io_deliver(b, EOPNOTSUPP);
 		return;
 	}
 
 	LOG_MSG(LVL_DEBUG2, "BIO arrived, size=%ju", b->bio_length);
 	bioq_init(&bq);
 
 	chunk_size = sc->chunk_size;
 	addr = b->bio_data;
 	offset = b->bio_offset;	/* virtual offset and length */
 	length = b->bio_length;
 
 	while (length > 0) {
 		size_t chunk_index, in_chunk_offset, in_chunk_length;
 		struct virstor_map_entry *me;
 
 		chunk_index = offset / chunk_size; /* round downwards */
 		in_chunk_offset = offset % chunk_size;
 		in_chunk_length = min(length, chunk_size - in_chunk_offset);
 		LOG_MSG(LVL_DEBUG, "Mapped %s(%ju, %ju) to (%zu,%zu,%zu)",
 		    b->bio_cmd == BIO_READ ? "R" : "W",
 		    offset, length,
 		    chunk_index, in_chunk_offset, in_chunk_length);
 		me = &sc->map[chunk_index];
 
 		if (b->bio_cmd == BIO_READ || b->bio_cmd == BIO_DELETE) {
 			if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) {
 				/* Reads from unallocated chunks return zeroed
 				 * buffers */
 				if (b->bio_cmd == BIO_READ)
 					bzero(addr, in_chunk_length);
 			} else {
 				comp = &sc->components[me->provider_no];
 
 				cb = g_clone_bio(b);
 				if (cb == NULL) {
 					bioq_dismantle(&bq);
 					if (b->bio_error == 0)
 						b->bio_error = ENOMEM;
 					g_io_deliver(b, b->bio_error);
 					return;
 				}
 				cb->bio_to = comp->gcons->provider;
 				cb->bio_done = g_virstor_done;
 				cb->bio_offset =
 				    (off_t)me->provider_chunk * (off_t)chunk_size
 				    + in_chunk_offset;
 				cb->bio_length = in_chunk_length;
 				cb->bio_data = addr;
 				cb->bio_caller1 = comp;
 				bioq_disksort(&bq, cb);
 			}
 		} else { /* handle BIO_WRITE */
 			KASSERT(b->bio_cmd == BIO_WRITE,
 			    ("%s: Unknown command %d", __func__,
 			    b->bio_cmd));
 
 			if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) {
 				/* We have a virtual chunk, represented by
 				 * the "me" entry, but it's not yet allocated
 				 * (tied to) a physical chunk. So do it now. */
 				struct virstor_map_entry *data_me;
 				u_int phys_chunk, comp_no;
 				off_t s_offset;
 				int error;
 
 				error = allocate_chunk(sc, &comp, &comp_no,
 				    &phys_chunk);
 				if (error != 0) {
 					/* We cannot allocate a physical chunk
 					 * to satisfy this request, so we'll
 					 * delay it to when we can...
 					 * XXX: this will prevent the fs from
 					 * being umounted! */
 					struct g_virstor_bio_q *biq;
 					biq = malloc(sizeof *biq, M_GVIRSTOR,
 					    M_NOWAIT);
 					if (biq == NULL) {
 						bioq_dismantle(&bq);
 						if (b->bio_error == 0)
 							b->bio_error = ENOMEM;
 						g_io_deliver(b, b->bio_error);
 						return;
 					}
 					biq->bio = b;
 					mtx_lock(&sc->delayed_bio_q_mtx);
 					STAILQ_INSERT_TAIL(&sc->delayed_bio_q,
 					    biq, linkage);
 					mtx_unlock(&sc->delayed_bio_q_mtx);
 					LOG_MSG(LVL_WARNING, "Delaying BIO "
 					    "(size=%ju) until free physical "
 					    "space can be found on %s",
 					    b->bio_length,
 					    sc->provider->name);
 					return;
 				}
 				LOG_MSG(LVL_DEBUG, "Allocated chunk %u on %s "
 				    "for %s",
 				    phys_chunk,
 				    comp->gcons->provider->name,
 				    sc->provider->name);
 
 				me->provider_no = comp_no;
 				me->provider_chunk = phys_chunk;
 				me->flags |= VIRSTOR_MAP_ALLOCATED;
 
 				cb = g_clone_bio(b);
 				if (cb == NULL) {
 					me->flags &= ~VIRSTOR_MAP_ALLOCATED;
 					me->provider_no = 0;
 					me->provider_chunk = 0;
 					bioq_dismantle(&bq);
 					if (b->bio_error == 0)
 						b->bio_error = ENOMEM;
 					g_io_deliver(b, b->bio_error);
 					return;
 				}
 
 				/* The allocation table is stored continuously
 				 * at the start of the drive. We need to
 				 * calculate the offset of the sector that holds
 				 * this map entry both on the drive and in the
 				 * map array.
 				 * sc_offset will end up pointing to the drive
 				 * sector. */
 				s_offset = chunk_index * sizeof *me;
 				s_offset = (s_offset / sc->sectorsize) *
 				    sc->sectorsize;
 
 				/* data_me points to map entry sector
 				 * in memory (analoguos to offset) */
 				data_me = &sc->map[(chunk_index /
 				    sc->me_per_sector) * sc->me_per_sector];
 
 				/* Commit sector with map entry to storage */
 				cb->bio_to = sc->components[0].gcons->provider;
 				cb->bio_done = g_virstor_done;
 				cb->bio_offset = s_offset;
 				cb->bio_data = (char *)data_me;
 				cb->bio_length = sc->sectorsize;
 				cb->bio_caller1 = &sc->components[0];
 				bioq_disksort(&bq, cb);
 			}
 
 			comp = &sc->components[me->provider_no];
 			cb = g_clone_bio(b);
 			if (cb == NULL) {
 				bioq_dismantle(&bq);
 				if (b->bio_error == 0)
 					b->bio_error = ENOMEM;
 				g_io_deliver(b, b->bio_error);
 				return;
 			}
 			/* Finally, handle the data */
 			cb->bio_to = comp->gcons->provider;
 			cb->bio_done = g_virstor_done;
 			cb->bio_offset = (off_t)me->provider_chunk*(off_t)chunk_size +
 			    in_chunk_offset;
 			cb->bio_length = in_chunk_length;
 			cb->bio_data = addr;
 			cb->bio_caller1 = comp;
 			bioq_disksort(&bq, cb);
 		}
 		addr += in_chunk_length;
 		length -= in_chunk_length;
 		offset += in_chunk_length;
 	}
 
 	/* Fire off bio's here */
 	count = 0;
 	for (cb = bioq_first(&bq); cb != NULL; cb = bioq_first(&bq)) {
 		bioq_remove(&bq, cb);
 		LOG_REQ(LVL_MOREDEBUG, cb, "Firing request");
 		comp = cb->bio_caller1;
 		cb->bio_caller1 = NULL;
 		LOG_MSG(LVL_DEBUG, " firing bio, offset=%ju, length=%ju",
 		    cb->bio_offset, cb->bio_length);
 		g_io_request(cb, comp->gcons);
 		count++;
 	}
 	if (count == 0) { /* We handled everything locally */
 		b->bio_completed = b->bio_length;
 		g_io_deliver(b, 0);
 	}
 
 }
 
 /*
  * Allocate a chunk from a physical provider. Returns physical component,
  * chunk index relative to the component and the component's index.
  */
 static int
 allocate_chunk(struct g_virstor_softc *sc, struct g_virstor_component **comp,
     u_int *comp_no_p, u_int *chunk)
 {
 	u_int comp_no;
 
 	KASSERT(sc->curr_component < sc->n_components,
 	    ("%s: Invalid curr_component: %u",  __func__, sc->curr_component));
 
 	comp_no = sc->curr_component;
 	*comp = &sc->components[comp_no];
 	dump_component(*comp);
 	if ((*comp)->chunk_next >= (*comp)->chunk_count) {
 		/* This component is full. Allocate next component */
 		if (comp_no >= sc->n_components-1) {
 			LOG_MSG(LVL_ERROR, "All physical space allocated for %s",
 			    sc->geom->name);
 			return (-1);
 		}
 		(*comp)->flags &= ~VIRSTOR_PROVIDER_CURRENT;
 		sc->curr_component = ++comp_no;
 
 		*comp = &sc->components[comp_no];
 		if (comp_no >= sc->n_components - g_virstor_component_watermark-1)
 			LOG_MSG(LVL_WARNING, "Device %s running out of components "
 			    "(switching to %u/%u: %s)", sc->geom->name,
 			    comp_no+1, sc->n_components,
 			    (*comp)->gcons->provider->name);
 		/* Take care not to overwrite reserved chunks */
 		if ( (*comp)->chunk_reserved > 0 &&
 		    (*comp)->chunk_next < (*comp)->chunk_reserved)
 			(*comp)->chunk_next = (*comp)->chunk_reserved;
 
 		(*comp)->flags |=
 		    VIRSTOR_PROVIDER_ALLOCATED | VIRSTOR_PROVIDER_CURRENT;
 		dump_component(*comp);
 		*comp_no_p = comp_no;
 		*chunk = (*comp)->chunk_next++;
 	} else {
 		*comp_no_p = comp_no;
 		*chunk = (*comp)->chunk_next++;
 	}
 	return (0);
 }
 
 /* Dump a component */
 static void
 dump_component(struct g_virstor_component *comp)
 {
 
 	if (g_virstor_debug < LVL_DEBUG2)
 		return;
 	printf("Component %d: %s\n", comp->index, comp->gcons->provider->name);
 	printf("  chunk_count: %u\n", comp->chunk_count);
 	printf("   chunk_next: %u\n", comp->chunk_next);
 	printf("        flags: %u\n", comp->flags);
 }
 
 #if 0
 /* Dump a map entry */
 static void
 dump_me(struct virstor_map_entry *me, unsigned int nr)
 {
 	if (g_virstor_debug < LVL_DEBUG)
 		return;
 	printf("VIRT. CHUNK #%d: ", nr);
 	if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0)
 		printf("(unallocated)\n");
 	else
 		printf("allocated at provider %u, provider_chunk %u\n",
 		    me->provider_no, me->provider_chunk);
 }
 #endif
 
 /*
  * Dismantle bio_queue and destroy its components
  */
 static void
 bioq_dismantle(struct bio_queue_head *bq)
 {
 	struct bio *b;
 
 	for (b = bioq_first(bq); b != NULL; b = bioq_first(bq)) {
 		bioq_remove(bq, b);
 		g_destroy_bio(b);
 	}
 }
 
 /*
  * The function that shouldn't be called.
  * When this is called, the stack is already garbled because of
  * argument mismatch. There's nothing to do now but panic, which is
  * accidentally the whole purpose of this function.
  * Motivation: to guard from accidentally calling geom methods when
  * they shouldn't be called. (see g_..._taste)
  */
 static void
 invalid_call(void)
 {
 	panic("invalid_call() has just been called. Something's fishy here.");
 }
 
 DECLARE_GEOM_CLASS(g_virstor_class, g_virstor); /* Let there be light */
Index: stable/9/sys
===================================================================
--- stable/9/sys	(revision 299397)
+++ stable/9/sys	(revision 299398)

Property changes on: stable/9/sys
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys:r298698